Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
Merge pull request #276 from gnes-ai/fix-docker
Browse files Browse the repository at this point in the history
fix(docker): reduce the size of built image
  • Loading branch information
mergify[bot] authored Sep 23, 2019
2 parents 1e3517e + c5347a5 commit ea69135
Show file tree
Hide file tree
Showing 12 changed files with 83 additions and 67 deletions.
3 changes: 2 additions & 1 deletion Dockerfiles/alpine.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ LABEL maintainer="[email protected]" \

WORKDIR /gnes/

ADD . ./
ADD setup.py MANIFEST.in requirements.txt README.md ./
ADD gnes ./gnes/

RUN apk add --no-cache \
--virtual=.build-dependencies \
Expand Down
3 changes: 2 additions & 1 deletion Dockerfiles/buster.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \

WORKDIR /gnes/

ADD . ./
ADD setup.py MANIFEST.in requirements.txt README.md ./
ADD gnes ./gnes/

RUN ln -s locale.h /usr/include/xlocale.h && \
pip install . --no-cache-dir --compile && \
Expand Down
6 changes: 3 additions & 3 deletions Dockerfiles/full.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ RUN pip --no-cache-dir install -r requirements_tmp.txt

FROM dependency as base

ADD . ./

ADD setup.py MANIFEST.in requirements.txt README.md ./
ADD gnes ./gnes/

RUN pip --no-cache-dir install .[all] \
&& rm -rf /tmp/*
&& rm -rf /tmp/* && rm -rf /gnes

WORKDIR /

Expand Down
3 changes: 2 additions & 1 deletion Dockerfiles/ubuntu18.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
ln -s /usr/bin/python3 python && \
apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*

ADD . ./
ADD setup.py MANIFEST.in requirements.txt README.md ./
ADD gnes ./gnes/

RUN pip3 install . --no-cache-dir --compile && \
rm -rf /tmp/* && rm -rf /gnes
Expand Down
9 changes: 5 additions & 4 deletions gnes/cli/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,10 @@ def set_service_parser(parser=None):
parser.add_argument('--check_version', action=ActionNoYes, default=True,
help='comparing the GNES and proto version of incoming message with local setup, '
'mismatch raise an exception')
parser.add_argument('--identity', type=str, default=str(uuid.uuid4()).split('-')[0],
help='identity of the service, by default a random uuid string')
parser.add_argument('--identity', type=str, default='',
help='identity of the service, empty by default')
parser.add_argument('--route_table', action=ActionNoYes, default=True,
help='showing a route table with time cost after receiving the result')
return parser


Expand Down Expand Up @@ -307,8 +309,7 @@ def set_frontend_parser(parser=None):
read_only=True)
parser.add_argument('--max_concurrency', type=int, default=10,
help='maximum concurrent connections allowed')
parser.add_argument('--route_table', action=ActionNoYes, default=True,
help='showing a route table with time cost after receiving the result')

return parser


Expand Down
4 changes: 2 additions & 2 deletions gnes/client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ def __init__(self, args):
self.ctx.setsockopt(zmq.LINGER, 0)
self.receiver, recv_addr = build_socket(
self.ctx, self.args.host_in, self.args.port_in,
self.args.socket_in, getattr(self, 'identity', None))
self.args.socket_in, self.args.identity)
self.sender, send_addr = build_socket(self.ctx, self.args.host_out,
self.args.port_out,
self.args.socket_out,
getattr(self, 'identity', None))
self.args.identity)
self.logger.info(
'input %s:%s\t output %s:%s' %
(self.args.host_in, colored(self.args.port_in, 'yellow'),
Expand Down
44 changes: 43 additions & 1 deletion gnes/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ def progressbar(i, prefix="", suffix="", count=100, size=60):
x = int(size * i / count)
sys.stdout.write(
"%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), _i,
(step + 1) * count, suffix))
(step + 1) * count, suffix))
sys.stdout.flush()


def get_first_available_gpu():
try:
import GPUtil
Expand Down Expand Up @@ -585,6 +586,47 @@ def _path_import(absolute_path):
return module, spec


def make_route_table(routes, exclude_frontend: bool = False):
route_time = []
if exclude_frontend:
total_duration = get_duration(routes[0].start_time, routes[0].end_time)
else:
total_duration = get_duration(routes[0].start_time, routes[-1].end_time)
sum_duration = 0
for k in routes[(1 if exclude_frontend else 0):]:
d = get_duration(k.start_time, k.end_time)
route_time.append((k.service, d))
sum_duration += d

def get_table_str(time_table):
return '\n'.join(
['%40s\t%3.3fs\t%3d%%' % (k[0], k[1], k[1] / total_duration * 100) for k in
sorted(time_table, key=lambda x: x[1], reverse=True)])

summary = [('system', total_duration - sum_duration),
('total', total_duration),
('job', sum_duration),
('parallel', max(sum_duration - total_duration, 0))]
route_table = ('\n%s\n' % ('-' * 80)).join(
['%40s\t%-6s\t%3s' % ('Breakdown', 'Time', 'Percent'), get_table_str(route_time),
get_table_str(summary)])
return route_table


def get_duration(start_time, end_time):
if not start_time or not end_time:
return -1
d_s = end_time.seconds - start_time.seconds
d_n = end_time.nanos - start_time.nanos
if d_s < 0 and d_n > 0:
d_s = max(d_s + 1, 0)
d_n = max(d_n - 1e9, 0)
elif d_s > 0 and d_n < 0:
d_s = max(d_s - 1, 0)
d_n = max(d_n + 1e9, 0)
return max(d_s + d_n / 1e9, 0)


profile_logger = set_logger('PROFILE')
default_logger = set_logger('GNES')
profiling = time_profile
Expand Down
6 changes: 3 additions & 3 deletions gnes/indexer/chunk/faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,16 @@ def post_init(self):
@BCI.update_helper_indexer
def add(self, keys: List[Tuple[int, Any]], vectors: np.ndarray, weights: List[float], *args, **kwargs):
if len(vectors) != len(keys):
raise ValueError("vectors length should be equal to doc_ids")
raise ValueError('vectors length should be equal to doc_ids')

if vectors.dtype != np.float32:
raise ValueError("vectors should be ndarray of float32")
raise ValueError('vectors should be ndarray of float32')

self._faiss_index.add(vectors)

def query(self, keys: np.ndarray, top_k: int, *args, **kwargs) -> List[List[Tuple]]:
if keys.dtype != np.float32:
raise ValueError("vectors should be ndarray of float32")
raise ValueError('vectors should be ndarray of float32')

score, ids = self._faiss_index.search(keys, top_k)
ret = []
Expand Down
2 changes: 1 addition & 1 deletion gnes/proto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def add_route(evlp: 'gnes_pb2.Envelope', name: str, identity: str):

def merge_routes(msg: 'gnes_pb2.Message', prev_msgs: List['gnes_pb2.Message']):
# take unique routes by service identity
routes = {r.service_identity: r for m in prev_msgs for r in m.envelope.routes}
routes = {(r.service + r.service_identity): r for m in prev_msgs for r in m.envelope.routes}
msg.envelope.ClearField('routes')
msg.envelope.routes.extend(sorted(routes.values(), key=lambda x: (x.start_time.seconds, x.start_time.nanos)))

Expand Down
10 changes: 6 additions & 4 deletions gnes/service/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

from ..base import TrainableBase, T
from ..cli.parser import resolve_yaml_path
from ..helper import set_logger, PathImporter, TimeContext
from ..helper import set_logger, PathImporter, TimeContext, make_route_table
from ..proto import gnes_pb2, add_route, send_message, recv_message, router2str


Expand Down Expand Up @@ -129,7 +129,6 @@ def build_socket(ctx: 'zmq.Context', host: str, port: int, socket_type: 'SocketT
sock.setsockopt(zmq.SUBSCRIBE, identity.encode('ascii') if identity else b'')
# sock.setsockopt(zmq.SUBSCRIBE, b'')


# Note: the following very dangerous for pub-sub socketc
sock.setsockopt(zmq.RCVHWM, 10)
sock.setsockopt(zmq.RCVBUF, 10 * 1024 * 1024) # limit of network buffer 100M
Expand Down Expand Up @@ -361,16 +360,19 @@ def _hook_add_route(self, msg: 'gnes_pb2.Message', *args, **kwargs):
@handler.register_hook(hook_type='post')
def _hook_update_route_timestamp(self, msg: 'gnes_pb2.Message', *args, **kwargs):
msg.envelope.routes[-1].end_time.GetCurrentTime()
if self.args.route_table:
self.logger.info('route: %s' % router2str(msg))
self.logger.info('route table: \n%s' % make_route_table(msg.envelope.routes))

@zmqd.context()
def _run(self, ctx):
ctx.setsockopt(zmq.LINGER, 0)
self.handler.service_context = self
self.logger.info('bind sockets...')
in_sock, _ = build_socket(ctx, self.args.host_in, self.args.port_in, self.args.socket_in,
getattr(self, 'identity', None))
self.args.identity)
out_sock, _ = build_socket(ctx, self.args.host_out, self.args.port_out, self.args.socket_out,
getattr(self, 'identity', None))
self.args.identity)
ctrl_sock, ctrl_addr = build_socket(ctx, self.default_host, self.args.port_ctrl, SocketType.PAIR_BIND)

self.logger.info(
Expand Down
42 changes: 2 additions & 40 deletions gnes/service/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from .. import __version__, __proto_version__
from ..client.base import ZmqClient
from ..helper import set_logger
from ..helper import set_logger, make_route_table
from ..proto import gnes_pb2_grpc, gnes_pb2, router2str, add_route


Expand Down Expand Up @@ -79,48 +79,10 @@ def remove_envelope(self, m: 'gnes_pb2.Message'):
m.envelope.routes[0].end_time.GetCurrentTime()
if self.args.route_table:
self.logger.info('route: %s' % router2str(m))
route_time = []
k = m.envelope.routes[0]
total_duration = self.get_duration(k.start_time, k.end_time)

sum_duration = 0
for k in m.envelope.routes[1:]:
if k.first_start_time and k.last_end_time:
d = self.get_duration(k.first_start_time, k.last_end_time)
else:
d = self.get_duration(k.start_time, k.end_time)

route_time.append((k.service, d))
sum_duration += d

def get_table_str(time_table):
return '\n'.join(
['%40s\t%3.3fs\t%3d%%' % (k[0], k[1], k[1] / total_duration * 100) for k in
sorted(time_table, key=lambda x: x[1], reverse=True)])

summary = [('system', total_duration - sum_duration),
('total', total_duration),
('job', sum_duration)]

route_table = ('\n%s\n' % ('-' * 80)).join(
['%40s\t%-6s\t%3s' % ('Breakdown', 'Time', 'Percent'), get_table_str(route_time),
get_table_str(summary)])
self.logger.info('route table: \n%s' % route_table)
self.logger.info('route table: \n%s' % make_route_table(m.envelope.routes, exclude_frontend=True))

return resp

@staticmethod
def get_duration(start_time, end_time):
d_s = end_time.seconds - start_time.seconds
d_n = end_time.nanos - start_time.nanos
if d_s < 0 and d_n > 0:
d_s = max(d_s + 1, 0)
d_n = max(d_n - 1e9, 0)
elif d_s > 0 and d_n < 0:
d_s = max(d_s - 1, 0)
d_n = max(d_n + 1e9, 0)
return max(d_s + d_n / 1e9, 0)

def Call(self, request, context):
with self.zmq_context as zmq_client:
zmq_client.send_message(self.add_envelope(request, zmq_client), self.args.timeout)
Expand Down
18 changes: 12 additions & 6 deletions tests/test_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,41 +457,47 @@ def test_multimap_multireduce(self):
'--yaml_path', self.publish_router_yaml,
'--socket_in', str(SocketType.SUB_CONNECT),
'--socket_out', str(SocketType.PUB_BIND),
'--port_in', str(p1.port_out)
'--port_in', str(p1.port_out),
'--identity', ''
])
p22 = set_router_parser().parse_args([
'--yaml_path', self.publish_router_yaml,
'--socket_in', str(SocketType.SUB_CONNECT),
'--socket_out', str(SocketType.PUB_BIND),
'--port_in', str(p1.port_out)
'--port_in', str(p1.port_out),
'--identity', ''
])
r311 = set_router_parser().parse_args([
'--socket_in', str(SocketType.SUB_CONNECT),
'--socket_out', str(SocketType.PUSH_CONNECT),
'--port_in', str(p21.port_out),
'--port_out', str(r41.port_in),
'--yaml_path', 'BaseRouter'
'--yaml_path', 'BaseRouter',
'--identity', ''
])
r312 = set_router_parser().parse_args([
'--socket_in', str(SocketType.SUB_CONNECT),
'--socket_out', str(SocketType.PUSH_CONNECT),
'--port_in', str(p21.port_out),
'--port_out', str(r41.port_in),
'--yaml_path', 'BaseRouter'
'--yaml_path', 'BaseRouter',
'--identity', ''
])
r321 = set_router_parser().parse_args([
'--socket_in', str(SocketType.SUB_CONNECT),
'--socket_out', str(SocketType.PUSH_CONNECT),
'--port_in', str(p22.port_out),
'--port_out', str(r42.port_in),
'--yaml_path', 'BaseRouter'
'--yaml_path', 'BaseRouter',
'--identity', ''
])
r322 = set_router_parser().parse_args([
'--socket_in', str(SocketType.SUB_CONNECT),
'--socket_out', str(SocketType.PUSH_CONNECT),
'--port_in', str(p22.port_out),
'--port_out', str(r42.port_in),
'--yaml_path', 'BaseRouter'
'--yaml_path', 'BaseRouter',
'--identity', ''
])

c_args = _set_client_parser().parse_args([
Expand Down

0 comments on commit ea69135

Please sign in to comment.