diff --git a/.github/demo-poem-index.svg b/.github/demo-poem-index.svg new file mode 100644 index 00000000..408cd4da --- /dev/null +++ b/.github/demo-poem-index.svg @@ -0,0 +1,131 @@ + \ No newline at end of file diff --git a/README.md b/README.md index 719d557b..138a302d 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,8 @@ Either way, if you end up reading the following message after `$ gnes` or `$ doc - [🐣 Preliminaries](#-preliminaries) * [Microservice](#microservice) * [Runtime](#runtime) +- [Demo for the impatient](#demo-for-the-impatient) + * [Semantic poem search in 3-minutes or less](#building-a-semantic-poem-search-engine-in-3-minutes-or-less) - [Build your first GNES app on local machine](#build-your-first-gnes-app-on-local-machine) - [Scale your GNES app to the cloud](#scale-your-gnes-app-to-the-cloud) - [Customize GNES on your need](#customize-gnes-to-your-need) @@ -247,6 +249,20 @@ Okay, now that we have a bunch of apps, what are we expecting them to do? In a t 💡 The key to understand GNES is to know *which runtime requires what microservices, and each microservice does what*. +### Demo for the impatient + +#### Building a semantic poem search engine in 3-minutes or less + +For the impatient, we present a complete demo using GNES that enables semantic index and query on poems. + +Please checkout [this repository for details](https://github.com/gnes-ai/demo-poems-ir) and follow the instructions to reproduce. + +
+ + + +
+ ### Build your first GNES app on local machine Let's start with a typical indexing procedure by writing a YAML config (see the left column of the table): diff --git a/gnes/cli/parser.py b/gnes/cli/parser.py index 7dab8625..93b64176 100644 --- a/gnes/cli/parser.py +++ b/gnes/cli/parser.py @@ -137,8 +137,8 @@ def set_service_parser(parser=None): help='port for controlling the service, default a random port between [49152, 65536]') parser.add_argument('--timeout', type=int, default=-1, help='timeout (ms) of all communication, -1 for waiting forever') - parser.add_argument('--dump_interval', type=int, default=5, - help='serialize the service to a file every n seconds') + parser.add_argument('--dump_interval', type=int, default=-1, + help='serialize the service to a file every n seconds, -1 means --read_only') parser.add_argument('--read_only', action='store_true', default=False, help='do not allow the service to modify the model, ' 'dump_interval will be ignored') diff --git a/gnes/indexer/doc/leveldb.py b/gnes/indexer/doc/leveldb.py index d06e3325..2ae70428 100644 --- a/gnes/indexer/doc/leveldb.py +++ b/gnes/indexer/doc/leveldb.py @@ -66,21 +66,6 @@ def query(self, keys: List[int], *args, **kwargs) -> List['gnes_pb2.Document']: res.append(self._NOT_FOUND) return res - def update_counter(self, docs: List['gnes_pb2.Document'], *args, **kwargs): - self._num_doc += len(docs) - self._num_chunks += sum(list(map(lambda x: len(x.chunks), docs))) - - @property - def num_doc(self): - return self._num_doc - - @property - def num_chunks(self): - return self._num_chunks - - @property - def num_chunks_avg(self): - return self._num_chunks / self._num_doc def close(self): super().close() diff --git a/tests/test_annoyindexer.py b/tests/test_annoyindexer.py index 66fe533b..3dde7494 100644 --- a/tests/test_annoyindexer.py +++ b/tests/test_annoyindexer.py @@ -4,6 +4,7 @@ import numpy as np from gnes.indexer.chunk.annoy import AnnoyIndexer +from gnes.indexer.chunk.numpy import NumpyIndexer class TestAnnoyIndexer(unittest.TestCase): @@ -27,3 +28,19 @@ def test_search(self): a.close() a.dump() a.dump_yaml() + + def test_numpy_indexer(self): + a = NumpyIndexer() + a.add(list(zip(list(range(10)), list(range(10)))), self.toy_data, [1.] * 10) + self.assertEqual(a.num_chunks, 10) + self.assertEqual(a.num_docs, 10) + top_1 = [i[0][0] for i in a.query(self.toy_data, top_k=1)] + self.assertEqual(top_1, list(range(10))) + a.close() + a.dump() + a.dump_yaml() + b = NumpyIndexer.load_yaml(a.yaml_full_path) + self.assertEqual(b.num_chunks, 10) + self.assertEqual(b.num_docs, 10) + top_1 = [i[0][0] for i in b.query(self.toy_data, top_k=1)] + self.assertEqual(top_1, list(range(10)))