diff --git a/.github/demo-poem-index.svg b/.github/demo-poem-index.svg new file mode 100644 index 00000000..408cd4da --- /dev/null +++ b/.github/demo-poem-index.svg @@ -0,0 +1,131 @@ + + + + + + + + + + + + + + + demo-poems-ir git:(master) demo-poems-ir git:(master) m demo-poems-ir git:(master) ma demo-poems-ir git:(master) mak demo-poems-ir git:(master) make demo-poems-ir git:(master) make demo-poems-ir git:(master) make i demo-poems-ir git:(master) make in demo-poems-ir git:(master) make ind demo-poems-ir git:(master) make inde demo-poems-ir git:(master) make index demo-poems-ir git:(master) make index docker stack rm my-gnesNothing found in stack: my-gnesrm -rf .cache && mkdir -p .cachedocker stack deploy --compose-file demo-poem-index.yml my-gnesCreating network my-gnes_defaultCreating service my-gnes_Frontend00Creating service my-gnes_Preprocessor10Creating service my-gnes_Encoder20Creating service my-gnes_Indexer30Creating service my-gnes_Indexer21Creating service my-gnes_Router40demo-poems-ir git:(master) make c demo-poems-ir git:(master) make cl demo-poems-ir git:(master) make cli demo-poems-ir git:(master) make clie demo-poems-ir git:(master) make clien demo-poems-ir git:(master) make client demo-poems-ir git:(master) make client_ demo-poems-ir git:(master) make client_i demo-poems-ir git:(master) make client_in demo-poems-ir git:(master) make client_index demo-poems-ir git:(master) make client_index demo-poems-ir git:(master) make client_index unset https_proxy && unset http_proxy && docker run --rm --network host -v /Users/hanhxiao/Documents/demo-poems-ir/data:/data/ gnes/demo-poem:client --mode index --batch_size 4 --txt_file /data/kaggle_poem_dataset.csvI:MyClient:[bas:__i: 70]:setting up channel...I:MyClient:[bas:__i: 78]:waiting channel to be ready...I:MyClient:[bas:__i: 80]:making stub...C:MyClient:[bas:__i: 82]:ready!index [ ] 0% elapsed: 0.0s speed: 0.0 bytes/s 0.0 batch/s left: 0.9s index [====== ] 33% elapsed: 12.8s speed: 237.8 bytes/s 0.1 batch/s left: 25.6s index [============= ] 67% elapsed: 22.3s speed: 193.9 bytes/s 0.1 batch/s left: 11.1s index [====================] 100% elapsed: 27.5s speed: 146.2 bytes/s 0.1 batch/s left: 0.0s t: 0.0sdone!demo-poems-ir git:(master) + \ No newline at end of file diff --git a/README.md b/README.md index 719d557b..138a302d 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,8 @@ Either way, if you end up reading the following message after `$ gnes` or `$ doc - [🐣 Preliminaries](#-preliminaries) * [Microservice](#microservice) * [Runtime](#runtime) +- [Demo for the impatient](#demo-for-the-impatient) + * [Semantic poem search in 3-minutes or less](#building-a-semantic-poem-search-engine-in-3-minutes-or-less) - [Build your first GNES app on local machine](#build-your-first-gnes-app-on-local-machine) - [Scale your GNES app to the cloud](#scale-your-gnes-app-to-the-cloud) - [Customize GNES on your need](#customize-gnes-to-your-need) @@ -247,6 +249,20 @@ Okay, now that we have a bunch of apps, what are we expecting them to do? In a t 💡 The key to understand GNES is to know *which runtime requires what microservices, and each microservice does what*. +### Demo for the impatient + +#### Building a semantic poem search engine in 3-minutes or less + +For the impatient, we present a complete demo using GNES that enables semantic index and query on poems. + +Please checkout [this repository for details](https://github.com/gnes-ai/demo-poems-ir) and follow the instructions to reproduce. + +

+ +query + +

+ ### Build your first GNES app on local machine Let's start with a typical indexing procedure by writing a YAML config (see the left column of the table): diff --git a/gnes/cli/parser.py b/gnes/cli/parser.py index 7dab8625..93b64176 100644 --- a/gnes/cli/parser.py +++ b/gnes/cli/parser.py @@ -137,8 +137,8 @@ def set_service_parser(parser=None): help='port for controlling the service, default a random port between [49152, 65536]') parser.add_argument('--timeout', type=int, default=-1, help='timeout (ms) of all communication, -1 for waiting forever') - parser.add_argument('--dump_interval', type=int, default=5, - help='serialize the service to a file every n seconds') + parser.add_argument('--dump_interval', type=int, default=-1, + help='serialize the service to a file every n seconds, -1 means --read_only') parser.add_argument('--read_only', action='store_true', default=False, help='do not allow the service to modify the model, ' 'dump_interval will be ignored') diff --git a/gnes/indexer/doc/leveldb.py b/gnes/indexer/doc/leveldb.py index d06e3325..2ae70428 100644 --- a/gnes/indexer/doc/leveldb.py +++ b/gnes/indexer/doc/leveldb.py @@ -66,21 +66,6 @@ def query(self, keys: List[int], *args, **kwargs) -> List['gnes_pb2.Document']: res.append(self._NOT_FOUND) return res - def update_counter(self, docs: List['gnes_pb2.Document'], *args, **kwargs): - self._num_doc += len(docs) - self._num_chunks += sum(list(map(lambda x: len(x.chunks), docs))) - - @property - def num_doc(self): - return self._num_doc - - @property - def num_chunks(self): - return self._num_chunks - - @property - def num_chunks_avg(self): - return self._num_chunks / self._num_doc def close(self): super().close() diff --git a/tests/test_annoyindexer.py b/tests/test_annoyindexer.py index 66fe533b..3dde7494 100644 --- a/tests/test_annoyindexer.py +++ b/tests/test_annoyindexer.py @@ -4,6 +4,7 @@ import numpy as np from gnes.indexer.chunk.annoy import AnnoyIndexer +from gnes.indexer.chunk.numpy import NumpyIndexer class TestAnnoyIndexer(unittest.TestCase): @@ -27,3 +28,19 @@ def test_search(self): a.close() a.dump() a.dump_yaml() + + def test_numpy_indexer(self): + a = NumpyIndexer() + a.add(list(zip(list(range(10)), list(range(10)))), self.toy_data, [1.] * 10) + self.assertEqual(a.num_chunks, 10) + self.assertEqual(a.num_docs, 10) + top_1 = [i[0][0] for i in a.query(self.toy_data, top_k=1)] + self.assertEqual(top_1, list(range(10))) + a.close() + a.dump() + a.dump_yaml() + b = NumpyIndexer.load_yaml(a.yaml_full_path) + self.assertEqual(b.num_chunks, 10) + self.assertEqual(b.num_docs, 10) + top_1 = [i[0][0] for i in b.query(self.toy_data, top_k=1)] + self.assertEqual(top_1, list(range(10)))