Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
Merge pull request #296 from gnes-ai/feat-add-healthcheck
Browse files Browse the repository at this point in the history
feat(service): add healthcheck for arbitrary service
  • Loading branch information
mergify[bot] authored Sep 26, 2019
2 parents b5efdba + 0367334 commit 5127395
Show file tree
Hide file tree
Showing 16 changed files with 257 additions and 46 deletions.
2 changes: 2 additions & 0 deletions Dockerfiles/alpine.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,6 @@ RUN apk add --no-cache \

WORKDIR /

ENV GNES_VCS_VERSION=$VCS_REF

ENTRYPOINT ["gnes"]
2 changes: 2 additions & 0 deletions Dockerfiles/buster.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,6 @@ RUN ln -s locale.h /usr/include/xlocale.h && \

WORKDIR /

ENV GNES_VCS_VERSION=$VCS_REF

ENTRYPOINT ["gnes"]
2 changes: 2 additions & 0 deletions Dockerfiles/full.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ RUN pip --no-cache-dir install .[all] \

WORKDIR /

ENV GNES_VCS_VERSION=$VCS_REF

ENTRYPOINT ["gnes"]
2 changes: 2 additions & 0 deletions Dockerfiles/ubuntu18.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,6 @@ RUN pip3 install . --no-cache-dir --compile && \

WORKDIR /

ENV GNES_VCS_VERSION=$VCS_REF

ENTRYPOINT ["gnes"]
2 changes: 1 addition & 1 deletion docker-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ do
printf "i will build $IMAGE_TAG based on $DOCKER_FILE\n"

docker build --network host --build-arg BUILD_DATE=`date -u +"%Y-%m-%dT%H:%M:%SZ"` \
--build-arg VCS_REF=${DRONE_COMMIT_REF} \
--build-arg VCS_REF=`git rev-parse --short HEAD` \
--rm --target $TARGET -t $IMAGE_TAG -f $DOCKER_FILE .


Expand Down
40 changes: 40 additions & 0 deletions docs/chapter/enviromentvars.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Environment Variables

There are couple of environment variables that GNES respect during runtime.

## `GNES_PROFILING`

Set to any non-empty string to turn on service-level time profiling for GNES.

Default is disabled.

## `GNES_PROFILING_MEM`

Set to any non-empty string to turn on service-level memory profiling for GNES. Warning, memory profiling could hurt the efficiency significantly.

Default is disabled.

## `GNES_WARN_UNNAMED_COMPONENT`

Set to `0` to turn off the warning like `this object is not named ("name" is not found under "gnes_config" in YAML config), i will call it "BaseRouter-51ce94cc". naming the object is important as it provides an unique identifier when serializing/deserializing this object.`

Set to `1` to enable it.

Default is enabled.

## `GNES_VCS_VERSION`

Git version of GNES. This is used when `--check_version` is turned on. For GNES official docker image, `GNES_VCS_VERSION` is automatically set to the git version during the building procedure.

Default is the git head version when building docker image. Otherwise it is not set.

## `GNES_CONTROL_PORT`

Control port of the microservice. Useful when doing health check via `gnes healthcheck`.

Default is not set. A random port will be used.

## `GNES_CONTRIB_MODULE`

(*depreciated*) Paths of the third party components. See examples in GNES hub for latest usage.

1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Highlights

chapter/troubleshooting.md
chapter/protobuf-dev.md
chapter/enviromentvars.md

.. toctree::
:maxdepth: 2
Expand Down
2 changes: 1 addition & 1 deletion gnes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@

# do not change this line manually
# this is managed by shell/make-proto.sh and updated on every execution
__proto_version__ = '0.0.8'
__proto_version__ = '0.0.10'
19 changes: 19 additions & 0 deletions gnes/cli/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,25 @@ def client(args):
'see "gnes client --help" for details')


def healthcheck(args):
from ..service.base import send_ctrl_message
from ..proto import gnes_pb2, add_version
import time
ctrl_addr = 'tcp://%s:%d' % (args.host, args.port)
msg = gnes_pb2.Message()
add_version(msg.envelope)
msg.request.control.command = gnes_pb2.Request.ControlRequest.STATUS
for j in range(args.retries):
r = send_ctrl_message(ctrl_addr, msg, timeout=args.timeout)
if not r:
print('%s is not responding, retry (%d/%d) in 1s' % (ctrl_addr, j + 1, args.retries))
else:
print('%s returns %s' % (ctrl_addr, r))
exit(0)
time.sleep(1)
exit(1)


def _client_http(args):
from ..client.http import HttpClient
HttpClient(args).start()
Expand Down
21 changes: 20 additions & 1 deletion gnes/cli/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def set_composer_flask_parser(parser=None):
def set_service_parser(parser=None):
from ..service.base import SocketType, BaseService, ParallelType
import random
import os
if not parser:
parser = set_base_parser()
min_port, max_port = 49152, 65536
Expand All @@ -157,7 +158,8 @@ def set_service_parser(parser=None):
parser.add_argument('--socket_out', type=SocketType.from_string, choices=list(SocketType),
default=SocketType.PUSH_BIND,
help='socket type for output port')
parser.add_argument('--port_ctrl', type=int, default=random.randrange(min_port, max_port),
parser.add_argument('--port_ctrl', type=int,
default=int(os.environ.get('GNES_CONTROL_PORT', random.randrange(min_port, max_port))),
help='port for controlling the service, default a random port between [49152, 65536]')
parser.add_argument('--timeout', type=int, default=-1,
help='timeout (ms) of all communication, -1 for waiting forever')
Expand Down Expand Up @@ -241,6 +243,21 @@ def set_preprocessor_parser(parser=None):
return parser


def set_healthcheck_parser(parser=None):
if not parser:
parser = set_base_parser()

parser.add_argument('--host', type=str, default='127.0.0.1',
help='host address of the checked service')
parser.add_argument('--port', type=int, required=True,
help='control port of the checked service')
parser.add_argument('--timeout', type=int, default=1000,
help='timeout (ms) of one check, -1 for waiting forever')
parser.add_argument('--retries', type=int, default=3,
help='max number of tried health checks before exit 1')
return parser


def set_router_parser(parser=None):
if not parser:
parser = set_base_parser()
Expand Down Expand Up @@ -411,4 +428,6 @@ def get_main_parser():
# others
set_composer_flask_parser(
sp.add_parser('compose', help='start a GNES Board to visualize YAML configs', formatter_class=adf))
set_healthcheck_parser(
sp.add_parser('healthcheck', help='do health check on any GNES microservice', formatter_class=adf))
return parser
22 changes: 21 additions & 1 deletion gnes/proto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.

import ctypes
import os
import random
from typing import List, Iterator, Tuple
from typing import Optional
Expand All @@ -25,7 +26,8 @@
from . import gnes_pb2
from ..helper import batch_iterator, default_logger

__all__ = ['RequestGenerator', 'send_message', 'recv_message', 'blob2array', 'array2blob', 'gnes_pb2', 'add_route']
__all__ = ['RequestGenerator', 'send_message', 'recv_message',
'blob2array', 'array2blob', 'gnes_pb2', 'add_route', 'add_version']


class RequestGenerator:
Expand Down Expand Up @@ -114,6 +116,13 @@ def add_route(evlp: 'gnes_pb2.Envelope', name: str, identity: str):
r.service_identity = identity


def add_version(evlp: 'gnes_pb2.Envelope'):
from .. import __version__, __proto_version__
evlp.gnes_version = __version__
evlp.proto_version = __proto_version__
evlp.vcs_version = os.environ.get('GNES_VCS_VERSION', '')


def merge_routes(msg: 'gnes_pb2.Message', prev_msgs: List['gnes_pb2.Message']):
# take unique routes by service identity
routes = {(r.service + r.service_identity): r for m in prev_msgs for r in m.envelope.routes}
Expand Down Expand Up @@ -145,6 +154,17 @@ def check_msg_version(msg: 'gnes_pb2.Message'):
'incoming message has protobuf version %s, whereas local protobuf version %s' % (
msg.envelope.proto_version, __proto_version__))

if hasattr(msg.envelope, 'vcs_version'):
if not msg.envelope.vcs_version or not os.environ.get('GNES_VCS_VERSION'):
default_logger.warning('incoming message contains empty "vcs_version", '
'you may ignore it in debug/unittest mode, '
'or if you run gnes OUTSIDE docker container where GNES_VCS_VERSION is unset'
'otherwise please check if frontend service set correct version')
elif os.environ.get('GNES_VCS_VERSION') != msg.envelope.vcs_version:
raise AttributeError('mismatched vcs version! '
'incoming message has vcs_version %s, whereas local environment vcs_version is %s' % (
msg.envelope.vcs_version, os.environ.get('GNES_VCS_VERSION')))

if not hasattr(msg.envelope, 'proto_version') and not hasattr(msg.envelope, 'gnes_version'):
raise AttributeError('version_check=True locally, '
'but incoming message contains no version info in its envelope. '
Expand Down
3 changes: 3 additions & 0 deletions gnes/proto/gnes.proto
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ message Envelope {
string gnes_version = 7;

string proto_version = 8;

string vcs_version = 9;
}

message Message {
Expand Down Expand Up @@ -162,6 +164,7 @@ message Response {
SUCCESS = 0;
ERROR = 1;
PENDING = 2;
READY = 3;
}

message TrainResponse {
Expand Down
Loading

0 comments on commit 5127395

Please sign in to comment.