From 6a368335c802d2d0f1c5916d395c66ac46993814 Mon Sep 17 00:00:00 2001 From: hanhxiao Date: Wed, 9 Oct 2019 13:47:48 +0800 Subject: [PATCH] fix(cli): show more detailed version info in cli --- gnes/cli/parser.py | 7 +++++-- gnes/client/cli.py | 2 +- gnes/preprocessor/text/split.py | 2 ++ tests/test_gnes_flow.py | 9 +++++---- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/gnes/cli/parser.py b/gnes/cli/parser.py index 65ee0b28..f67b9ca2 100644 --- a/gnes/cli/parser.py +++ b/gnes/cli/parser.py @@ -77,8 +77,9 @@ def resolve_yaml_path(path): def set_base_parser(): - from .. import __version__ + from .. import __version__, __proto_version__ from termcolor import colored + import os # create the top-level parser parser = argparse.ArgumentParser( description='%s, a cloud-native semantic search system ' @@ -88,7 +89,9 @@ def set_base_parser(): colored('GNES v%s: Generic Neural Elastic Search' % __version__, 'green'), colored('https://gnes.ai', 'cyan', attrs=['underline'])), formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) + parser.add_argument('-v', '--version', action='version', + version='%(prog)s' + ': %s\nprotobuf: %s\nvcs_version: %s' % + (__version__, __proto_version__, os.environ.get('GNES_VCS_VERSION', 'unknown'))) parser.add_argument('--verbose', action='store_true', default=False, help='turn on detailed logging for debug') return parser diff --git a/gnes/client/cli.py b/gnes/client/cli.py index 6e701416..53236554 100644 --- a/gnes/client/cli.py +++ b/gnes/client/cli.py @@ -87,7 +87,7 @@ def query_callback(self, req: 'gnes_pb2.Request', resp: 'gnes_pb2.Response'): @property def bytes_generator(self) -> Generator[bytes, None, None]: - if self.bytes_generator: + if self._bytes_generator: return self._bytes_generator else: raise ValueError('bytes_generator is empty or not set') diff --git a/gnes/preprocessor/text/split.py b/gnes/preprocessor/text/split.py index 5e89838e..808935b0 100644 --- a/gnes/preprocessor/text/split.py +++ b/gnes/preprocessor/text/split.py @@ -46,6 +46,8 @@ def apply(self, doc: 'gnes_pb2.Document') -> None: ret = [(m.group(0), m.start(), m.end()) for m in re.finditer(r'[^{0}]+[{0}]'.format(self.deliminator), doc.raw_text)] + if not ret: + ret = [(doc.raw_text, 0, len(doc.raw_text))] for ci, (r, s, e) in enumerate(ret): f = ''.join(filter(lambda x: x in string.printable, r)) f = re.sub('\n+', ' ', f).strip() diff --git a/tests/test_gnes_flow.py b/tests/test_gnes_flow.py index 49ddd4ff..6fa9dcaf 100644 --- a/tests/test_gnes_flow.py +++ b/tests/test_gnes_flow.py @@ -22,7 +22,8 @@ def setUp(self): self.indexer1_bin = os.path.join(self.test_dir, 'my_faiss_indexer.bin') self.indexer2_bin = os.path.join(self.test_dir, 'my_fulltext_indexer.bin') self.encoder_bin = os.path.join(self.test_dir, 'my_transformer.bin') - + if os.path.exists(self.test_dir): + self.tearDown() os.mkdir(self.test_dir) os.environ['TEST_WORKDIR'] = self.test_dir @@ -100,7 +101,7 @@ def _test_index_flow(self): for k in [self.indexer1_bin, self.indexer2_bin, self.encoder_bin]: self.assertFalse(os.path.exists(k)) - flow = (Flow(check_version=False, route_table=True) + flow = (Flow(check_version=False, route_table=False) .add(gfs.Preprocessor, name='prep', yaml_path='SentSplitPreprocessor') .add(gfs.Encoder, yaml_path='yaml/flow-transformer.yml') .add(gfs.Indexer, name='vec_idx', yaml_path='yaml/flow-vecindex.yml') @@ -110,13 +111,13 @@ def _test_index_flow(self): num_part=2, service_in=['vec_idx', 'doc_idx'])) with flow.build(backend='thread') as f: - f.index(txt_file=self.test_file, batch_size=4) + f.index(txt_file=self.test_file, batch_size=20) for k in [self.indexer1_bin, self.indexer2_bin, self.encoder_bin]: self.assertTrue(os.path.exists(k)) def _test_query_flow(self): - flow = (Flow(check_version=False, route_table=True) + flow = (Flow(check_version=False, route_table=False) .add(gfs.Preprocessor, name='prep', yaml_path='SentSplitPreprocessor') .add(gfs.Encoder, yaml_path='yaml/flow-transformer.yml') .add(gfs.Indexer, name='vec_idx', yaml_path='yaml/flow-vecindex.yml')