From 9ad21b91eb0567f5938c3fe172623ca5319631f2 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sun, 20 Sep 2020 13:52:42 +0800 Subject: [PATCH 01/20] =?UTF-8?q?=F0=9F=90=9B=20Fix=20generator=20bug.=20[?= =?UTF-8?q?#393]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kashgari/generators.py | 9 ++++++++ tests/test_generator.py | 47 ++++++++++++++++++++++++++++++---------- tests/test_tokenizers.py | 4 ++-- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/kashgari/generators.py b/kashgari/generators.py index d390b213..7e036198 100644 --- a/kashgari/generators.py +++ b/kashgari/generators.py @@ -100,6 +100,15 @@ def __iter__(self) -> Iterator: max_position=self.max_position) yield x_tensor, y_tensor batch_x, batch_y = [], [] + if batch_x: + x_tensor = self.text_processor.transform(batch_x, + seq_length=self.seq_length, + max_position=self.max_position, + segment=self.segment) + y_tensor = self.label_processor.transform(batch_y, + seq_length=self.seq_length, + max_position=self.max_position) + yield x_tensor, y_tensor def take(self, batch_count: int = None) -> Any: """ diff --git a/tests/test_generator.py b/tests/test_generator.py index 0aa9b29f..1956ba80 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -32,17 +32,42 @@ def test_batch_generator(self): text_processor.build_vocab_generator([corpus_gen]) label_processor.build_vocab_generator([corpus_gen]) - batch_dataset1 = BatchDataSet(corpus_gen, - text_processor=text_processor, - label_processor=label_processor, - segment=False, - seq_length=None, - max_position=100, - batch_size=12) - - duplicate_len = len(batch_dataset1) - assert len(list(batch_dataset1.take(duplicate_len))) == duplicate_len - assert len(list(batch_dataset1.take(1))) == 1 + batch_dataset = BatchDataSet(corpus_gen, + text_processor=text_processor, + label_processor=label_processor, + segment=False, + seq_length=None, + max_position=100, + batch_size=12) + + duplicate_len = len(batch_dataset) + assert len(list(batch_dataset.take(duplicate_len))) == duplicate_len + assert len(list(batch_dataset.take(1))) == 1 + + def test_huge_batch_size(self): + x, y = [['this', 'is', 'Jack', 'Ma']], [['O', 'O', 'B', 'I']] + + text_processor = SequenceProcessor() + label_processor = SequenceProcessor(build_vocab_from_labels=True, min_count=1) + + corpus_gen = CorpusGenerator(x, y) + + text_processor.build_vocab_generator([corpus_gen]) + label_processor.build_vocab_generator([corpus_gen]) + + batch_dataset = BatchDataSet(corpus_gen, + text_processor=text_processor, + label_processor=label_processor, + segment=False, + seq_length=None, + max_position=100, + batch_size=512) + + for x_b, y_b in batch_dataset.take(1): + print(y_b.shape) + duplicate_len = len(batch_dataset) + assert len(list(batch_dataset.take(duplicate_len))) == duplicate_len + assert len(list(batch_dataset.take(1))) == 1 if __name__ == '__main__': diff --git a/tests/test_tokenizers.py b/tests/test_tokenizers.py index c71b38f9..fee5ffd0 100644 --- a/tests/test_tokenizers.py +++ b/tests/test_tokenizers.py @@ -17,12 +17,12 @@ class TestUtils(unittest.TestCase): def test_jieba_tokenizer(self): - os.system("pip uninstall -y jieba") + os.system("pip3 uninstall -y jieba") with self.assertRaises(ModuleNotFoundError): _ = JiebaTokenizer() - os.system("pip install jieba") + os.system("pip3 install jieba") t = JiebaTokenizer() assert ['你好', '世界', '!', ' ', 'Hello', ' ', 'World'] == t.tokenize('你好世界! Hello World') From 8b1d83c7192ea9f3d6ccaf1b74b8fcb28d71b309 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sun, 20 Sep 2020 13:52:52 +0800 Subject: [PATCH 02/20] =?UTF-8?q?=F0=9F=94=87=20Remove=20logs.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kashgari/tasks/abs_task_model.py | 1 - kashgari/tasks/labeling/abc_model.py | 1 - 2 files changed, 2 deletions(-) diff --git a/kashgari/tasks/abs_task_model.py b/kashgari/tasks/abs_task_model.py index 567856f4..b09623e8 100644 --- a/kashgari/tasks/abs_task_model.py +++ b/kashgari/tasks/abs_task_model.py @@ -106,7 +106,6 @@ def load_model(cls, model_path: str) -> Union["ABCLabelingModel", "ABCClassifica tf_model_str = json.dumps(model_config['tf_model']) - print(tf_model_str) model.tf_model = tf.keras.models.model_from_json(tf_model_str, custom_objects=kashgari.custom_objects) diff --git a/kashgari/tasks/labeling/abc_model.py b/kashgari/tasks/labeling/abc_model.py index 329b6c2a..1fe84747 100644 --- a/kashgari/tasks/labeling/abc_model.py +++ b/kashgari/tasks/labeling/abc_model.py @@ -265,7 +265,6 @@ def predict(self, else: seq_length = None - print(self.crf_layer) tensor = self.text_processor.transform(x_data, segment=self.embedding.segment, seq_length=seq_length, From f5206154922cb053e4238ca04a8c25fd6d6b792e Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sat, 24 Oct 2020 11:22:08 +0800 Subject: [PATCH 03/20] =?UTF-8?q?=E2=9C=A8=20Save=20model=20using=20save?= =?UTF-8?q?=5Fmodel=20format.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kashgari/tasks/abs_task_model.py | 28 ++++++++++++------- .../tasks/classification/bi_lstm_model.py | 25 ----------------- 2 files changed, 18 insertions(+), 35 deletions(-) diff --git a/kashgari/tasks/abs_task_model.py b/kashgari/tasks/abs_task_model.py index b09623e8..c425bf9b 100644 --- a/kashgari/tasks/abs_task_model.py +++ b/kashgari/tasks/abs_task_model.py @@ -30,14 +30,13 @@ class ABCTaskModel(ABC): def __init__(self) -> None: - self.embedding: ABCEmbedding + self.tf_model: tf.keras.Model = None + self.embedding: ABCEmbedding = None self.hyper_parameters: Dict[str, Any] self.sequence_length: int self.text_processor: ABCProcessor self.label_processor: ABCProcessor - self.tf_model: tf.keras.Model - def to_dict(self) -> Dict[str, Any]: model_json_str = self.tf_model.to_json() @@ -76,11 +75,12 @@ def default_hyper_parameters(cls) -> Dict[str, Dict[str, Any]]: """ raise NotImplementedError - def save(self, model_path: str) -> str: + def save(self, model_path: str, h5_weight: bool = False) -> str: """ Save model Args: - model_path: + model_path: target model path + h5_weight: whether using original h5 format or new saved_model format """ pathlib.Path(model_path).mkdir(exist_ok=True, parents=True) model_path = os.path.abspath(model_path) @@ -88,9 +88,12 @@ def save(self, model_path: str) -> str: with open(os.path.join(model_path, 'model_config.json'), 'w') as f: f.write(json.dumps(self.to_dict(), indent=2, ensure_ascii=False)) f.close() - - self.embedding.embed_model.save_weights(os.path.join(model_path, 'embed_model_weights.h5')) - self.tf_model.save_weights(os.path.join(model_path, 'model_weights.h5')) # type: ignore + if h5_weight: + self.embedding.embed_model.save_weights(os.path.join(model_path, 'embed_model_weights.h5')) + self.tf_model.save_weights(os.path.join(model_path, 'model_weights.h5')) # type: ignore + else: + self.embedding.embed_model.save(os.path.join(model_path, 'embed_model')) + self.tf_model.save(os.path.join(model_path, 'full_model')) logger.info('model saved to {}'.format(os.path.abspath(model_path))) return model_path @@ -112,8 +115,13 @@ def load_model(cls, model_path: str) -> Union["ABCLabelingModel", "ABCClassifica if isinstance(model.tf_model.layers[-1], KConditionalRandomField): model.layer_crf = model.tf_model.layers[-1] - model.tf_model.load_weights(os.path.join(model_path, 'model_weights.h5')) - model.embedding.embed_model.load_weights(os.path.join(model_path, 'embed_model_weights.h5')) + h5_model_path = os.path.join(model_path, 'model_weights.h5') + if os.path.exists(h5_model_path): + model.tf_model.load_weights(h5_model_path) + model.embedding.embed_model.load_weights(os.path.join(model_path, 'embed_model_weights.h5')) + else: + model.tf_model = tf.keras.models.load_model(os.path.join(model_path, 'embed_model')) + model.tf_model = tf.keras.models.load_model(os.path.join(model_path, 'full_model')) return model @abstractmethod diff --git a/kashgari/tasks/classification/bi_lstm_model.py b/kashgari/tasks/classification/bi_lstm_model.py index 4aeaa71c..f0efb82f 100644 --- a/kashgari/tasks/classification/bi_lstm_model.py +++ b/kashgari/tasks/classification/bi_lstm_model.py @@ -46,28 +46,3 @@ def build_model_arc(self) -> None: tensor = layer(tensor) self.tf_model: keras.Model = keras.Model(embed_model.inputs, tensor) - - -if __name__ == "__main__": - import logging - - logging.basicConfig(level='DEBUG') - - from kashgari.embeddings import WordEmbedding - - w2v_path = '/Users/brikerman/Desktop/nlp/language_models/w2v/sgns.weibo.bigram-char' - w2v = WordEmbedding(w2v_path, w2v_kwargs={'limit': 10000}) - - from kashgari.corpus import SMP2018ECDTCorpus - - x, y = SMP2018ECDTCorpus.load_data() - - model = BiLSTM_Model(embedding=w2v) - model.fit(x, y) - - # 或者集成 CorpusGenerator 实现自己的数据迭代器 - # train_gen = CorpusGenerator() - # model.fit_generator(train_gen=train_gen, - # valid_gen=valid_gen, - # batch_size=batch_size, - # epochs=epochs) From 5329b37585b6890a827c15edfac0a16ac7c77866 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Sat, 24 Oct 2020 11:57:54 +0800 Subject: [PATCH 04/20] =?UTF-8?q?=F0=9F=90=9B=20Fix=20corpus=20cutter=20bu?= =?UTF-8?q?g.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kashgari/corpus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kashgari/corpus.py b/kashgari/corpus.py index 24e63fcf..75dd1c73 100644 --- a/kashgari/corpus.py +++ b/kashgari/corpus.py @@ -176,7 +176,7 @@ def load_data(cls, raise ModuleNotFoundError( "please install jieba, `$ pip install jieba`") x_data = [list(jieba.cut(item)) for item in df['query'].to_list()] - elif 'char': + elif cutter == 'char': x_data = [list(item) for item in df['query'].to_list()] y_data = df['label'].to_list() From 5e81950f580628e5284343ad561ca2c189e5fd79 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 12:01:34 +0800 Subject: [PATCH 05/20] =?UTF-8?q?=E2=9C=A8=20Add=20convert=20to=20saved=20?= =?UTF-8?q?model=20for=20tf-serving.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kashgari/processors/__init__.py | 2 + kashgari/processors/abc_processor.py | 2 + kashgari/processors/sequence_processor.py | 5 ++ kashgari/processors/tools.py | 34 ++++++++++++ kashgari/tasks/abs_task_model.py | 25 +++------ kashgari/tasks/classification/bi_gru_model.py | 4 -- kashgari/utils/__init__.py | 1 + kashgari/utils/model.py | 52 +++++++++++++++++++ 8 files changed, 102 insertions(+), 23 deletions(-) create mode 100644 kashgari/processors/tools.py create mode 100644 kashgari/utils/model.py diff --git a/kashgari/processors/__init__.py b/kashgari/processors/__init__.py index 103ff4b4..f860b873 100644 --- a/kashgari/processors/__init__.py +++ b/kashgari/processors/__init__.py @@ -11,5 +11,7 @@ from .class_processor import ClassificationProcessor from .sequence_processor import SequenceProcessor +from .tools import load_processors_from_model + if __name__ == "__main__": pass diff --git a/kashgari/processors/abc_processor.py b/kashgari/processors/abc_processor.py index c8fe8df0..b6c856cc 100644 --- a/kashgari/processors/abc_processor.py +++ b/kashgari/processors/abc_processor.py @@ -42,6 +42,8 @@ def __init__(self, **kwargs: Any) -> None: self.token_bos: str = kwargs.get('token_bos', '[CLS]') # type: ignore self.token_eos: str = kwargs.get('token_eos', '[SEP]') # type: ignore + self._sequence_length_from_saved_model: Optional[int] = None + @property def vocab_size(self) -> int: return len(self.vocab2idx) diff --git a/kashgari/processors/sequence_processor.py b/kashgari/processors/sequence_processor.py index 6b1f63b6..020434ee 100644 --- a/kashgari/processors/sequence_processor.py +++ b/kashgari/processors/sequence_processor.py @@ -106,6 +106,11 @@ def transform(self, max_position: int = None, segment: bool = False) -> np.ndarray: seq_length_from = "" + + # An ugly patch for tf-serving use case. + if seq_length is None and self._sequence_length_from_saved_model is not None: + seq_length = self._sequence_length_from_saved_model + if seq_length is None: seq_length_from = "max length of the samples" seq_length = max([len(i) for i in samples]) + 2 diff --git a/kashgari/processors/tools.py b/kashgari/processors/tools.py new file mode 100644 index 00000000..48d8498b --- /dev/null +++ b/kashgari/processors/tools.py @@ -0,0 +1,34 @@ +# encoding: utf-8 + +# author: BrikerMan +# contact: eliyar917@gmail.com +# blog: https://eliyar.biz + +# file: tools.py +# time: 11:24 上午 + +import json +import os +from typing import Tuple + +from kashgari.processors.abc_processor import ABCProcessor +from kashgari.utils.serialize import load_data_object + + +def load_processors_from_model(model_path: str) -> Tuple[ABCProcessor, ABCProcessor]: + with open(os.path.join(model_path, 'model_config.json'), 'r') as f: + model_config = json.loads(f.read()) + text_processor: ABCProcessor = load_data_object(model_config['text_processor']) + label_processor: ABCProcessor = load_data_object(model_config['label_processor']) + + sequence_length_from_saved_model = model_config['config'].get('sequence_length', None) + text_processor._sequence_length_from_saved_model = sequence_length_from_saved_model + label_processor._sequence_length_from_saved_model = sequence_length_from_saved_model + + return text_processor, label_processor + + +if __name__ == "__main__": + text_processor, label_processor = load_processors_from_model('/Users/brikerman/Desktop/tf-serving/1603683152') + x = text_processor.transform([list('我想你了')]) + print(x.tolist()) diff --git a/kashgari/tasks/abs_task_model.py b/kashgari/tasks/abs_task_model.py index c425bf9b..b40c9bc4 100644 --- a/kashgari/tasks/abs_task_model.py +++ b/kashgari/tasks/abs_task_model.py @@ -47,6 +47,7 @@ def to_dict(self) -> Dict[str, Any]: '__module__': self.__class__.__module__, 'config': { 'hyper_parameters': self.hyper_parameters, # type: ignore + 'sequence_length': self.sequence_length # type: ignore }, 'embedding': self.embedding.to_dict(), # type: ignore 'text_processor': self.text_processor.to_dict(), @@ -88,12 +89,9 @@ def save(self, model_path: str, h5_weight: bool = False) -> str: with open(os.path.join(model_path, 'model_config.json'), 'w') as f: f.write(json.dumps(self.to_dict(), indent=2, ensure_ascii=False)) f.close() - if h5_weight: - self.embedding.embed_model.save_weights(os.path.join(model_path, 'embed_model_weights.h5')) - self.tf_model.save_weights(os.path.join(model_path, 'model_weights.h5')) # type: ignore - else: - self.embedding.embed_model.save(os.path.join(model_path, 'embed_model')) - self.tf_model.save(os.path.join(model_path, 'full_model')) + + self.embedding.embed_model.save_weights(os.path.join(model_path, 'embed_model_weights.h5')) + self.tf_model.save_weights(os.path.join(model_path, 'model_weights.h5')) # type: ignore logger.info('model saved to {}'.format(os.path.abspath(model_path))) return model_path @@ -115,13 +113,8 @@ def load_model(cls, model_path: str) -> Union["ABCLabelingModel", "ABCClassifica if isinstance(model.tf_model.layers[-1], KConditionalRandomField): model.layer_crf = model.tf_model.layers[-1] - h5_model_path = os.path.join(model_path, 'model_weights.h5') - if os.path.exists(h5_model_path): - model.tf_model.load_weights(h5_model_path) - model.embedding.embed_model.load_weights(os.path.join(model_path, 'embed_model_weights.h5')) - else: - model.tf_model = tf.keras.models.load_model(os.path.join(model_path, 'embed_model')) - model.tf_model = tf.keras.models.load_model(os.path.join(model_path, 'full_model')) + model.tf_model.load_weights(os.path.join(model_path, 'model_weights.h5')) + model.embedding.embed_model.load_weights(os.path.join(model_path, 'embed_model_weights.h5')) return model @abstractmethod @@ -129,9 +122,3 @@ def build_model(self, x_data: Any, y_data: Any) -> None: raise NotImplementedError - - -if __name__ == "__main__": - path = '/var/folders/x3/_dg9_drj42l_cc70tsqkpqrw0000gn/T/1590915853.4571211' - m = ABCTaskModel.load_model(path) - m.tf_model.summary() diff --git a/kashgari/tasks/classification/bi_gru_model.py b/kashgari/tasks/classification/bi_gru_model.py index 48692a85..ccfd67a7 100644 --- a/kashgari/tasks/classification/bi_gru_model.py +++ b/kashgari/tasks/classification/bi_gru_model.py @@ -44,7 +44,3 @@ def build_model_arc(self) -> None: tensor = layer(tensor) self.tf_model = keras.Model(embed_model.inputs, tensor) - - -if __name__ == "__main__": - pass diff --git a/kashgari/utils/__init__.py b/kashgari/utils/__init__.py index e15cc02b..ccacb99d 100644 --- a/kashgari/utils/__init__.py +++ b/kashgari/utils/__init__.py @@ -17,6 +17,7 @@ from .data import unison_shuffled_copies from .multi_label import MultiLabelBinarizer from .serialize import load_data_object +from .model import convert_to_saved_model if TYPE_CHECKING: from kashgari.tasks.labeling import ABCLabelingModel diff --git a/kashgari/utils/model.py b/kashgari/utils/model.py new file mode 100644 index 00000000..8241ee12 --- /dev/null +++ b/kashgari/utils/model.py @@ -0,0 +1,52 @@ +# encoding: utf-8 + +# author: BrikerMan +# contact: eliyar917@gmail.com +# blog: https://eliyar.biz + +# file: model.py +# time: 10:57 上午 + +import json +import os +import pathlib +import time +from typing import Union + +from kashgari.tasks.abs_task_model import ABCTaskModel + + +def convert_to_saved_model(model: ABCTaskModel, + model_path: str, + version: Union[str, int] = None, + signatures=None, + options=None): + """ + Export model for tensorflow serving + Args: + model: Target model. + model_path: The path to which the SavedModel will be stored. + version: The model version code, default timestamp + signatures: Signatures to save with the SavedModel. Applicable to the + 'tf' format only. Please see the `signatures` argument in + `tf.saved_model.save` for details. + options: Optional `tf.saved_model.SaveOptions` object that specifies + options for saving to SavedModel. + + """ + if not isinstance(model, ABCTaskModel): + raise ValueError("Only supports the classification model and labeling model") + if version is None: + version = round(time.time()) + export_path = os.path.join(model_path, str(version)) + + pathlib.Path(export_path).mkdir(exist_ok=True, parents=True) + model.tf_model.save(export_path, save_format='tf', signatures=signatures, options=options) + + with open(os.path.join(export_path, 'model_config.json'), 'w') as f: + f.write(json.dumps(model.to_dict(), indent=2, ensure_ascii=True)) + f.close() + + +if __name__ == "__main__": + pass From e7f72aa7cec13625495979182d670c8aaf5eae18 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 12:53:54 +0800 Subject: [PATCH 06/20] =?UTF-8?q?=F0=9F=93=9D=20Add=20tf-serving=20documen?= =?UTF-8?q?t.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/advance-use/tensorflow-serving.md | 121 +++++++++++++++++++++++++ docs/index.rst | 6 ++ 2 files changed, 127 insertions(+) create mode 100644 docs/advance-use/tensorflow-serving.md diff --git a/docs/advance-use/tensorflow-serving.md b/docs/advance-use/tensorflow-serving.md new file mode 100644 index 00000000..d25ec32e --- /dev/null +++ b/docs/advance-use/tensorflow-serving.md @@ -0,0 +1,121 @@ +# Tensorflow Serving + +```python +from kashgari.tasks.classification import BiGRU_Model +from kashgari.corpus import SMP2018ECDTCorpus +from kashgari import utils + +train_x, train_y = SMP2018ECDTCorpus.load_data() + +model = BiGRU_Model() +model.fit(train_x, train_y) + +# Save model +utils.convert_to_saved_model(model, + model_path="saved_model/bgru", + version=1) +``` + +Then run tensorflow-serving. + +```bash +docker run -t --rm -p 8501:8501 -v "/saved_model:/models/" -e MODEL_NAME=bgru tensorflow/serving +``` + +Load processor from model, then predict with serving. + +We need to check model input keys first. + +```python +import requests +res = requests.get("http://localhost:8501/v1/models/bgru/metadata") +inputs = res.json()['metadata']['signature_def']['signature_def']['serving_default']['inputs'] +input_sample_keys = list(inputs.keys()) +print(input_sample_keys) +# ['Input-Token', 'Input-Segment'] +``` + +If we have only one input key, aka we are not using BERT like embedding, + we need to pass json in this format to predict endpoint. + +```json +{ + "instances": [ + [2, 1, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [2, 9, 41, 459, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ] +} +``` + +Here is the code. + +```python +import requests +import numpy as np +from kashgari.processors import load_processors_from_model + +text_processor, label_processor = load_processors_from_model('/Users/brikerman/Desktop/tf-serving/1603683152') + +samples = [ + ['hello', 'world'], + ['你', '好', '世', '界'] +] +tensor = text_processor.transform(samples) + +instances = [i.tolist() for i in tensor] + +# predict +r = requests.post("http://localhost:8501/v1/models/bgru:predict", json={"instances": instances}) +predictions = r.json()['predictions'] + +# Convert result back to labels +labels = label_processor.inverse_transform(np.array(predictions).argmax(-1)) +print(labels) +``` + +If we are using Bert, then we need to handle multi input fields, + for example we get this two keys `['Input-Token', 'Input-Segment']` from metadata endpoint. + Then we need to pass a json in this format. + +```json +[ + { + "Input-Token": [2, 1, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "Input-Segment": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + }, + { + "Input-Token": [2, 9, 41, 459, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0], + "Input-Segment": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + } +] + +``` + +Here is the code. + +```python +import requests +import numpy as np +from kashgari.processors import load_processors_from_model + +text_processor, label_processor = load_processors_from_model('/Users/brikerman/Desktop/tf-serving/1603683152') + +samples = [ + ['hello', 'world'], + ['你', '好', '世', '界'] +] +tensor = text_processor.transform(samples) + +instances = [{ + "Input-Token": i.tolist(), + "Input-Segment": np.zeros(i.shape).tolist() +} for i in tensor] + +# predict +r = requests.post("http://localhost:8501/v1/models/bgru:predict", json={"instances": instances}) +predictions = r.json()['predictions'] + +# Convert result back to labels +labels = label_processor.inverse_transform(np.array(predictions).argmax(-1)) +print(labels) +``` diff --git a/docs/index.rst b/docs/index.rst index 59af1eae..85887819 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -24,6 +24,12 @@ embeddings/bert-embedding.rst embeddings/transformer-embedding.rst +.. toctree:: + :maxdepth: 2 + :caption: Advanced Use Cases + + advance-use/tensorflow-serving.md + .. toctree:: :maxdepth: 3 :caption: API From b8df1ad1ecd364105ba294ddc78d35ccbc974442 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 13:04:54 +0800 Subject: [PATCH 07/20] =?UTF-8?q?=F0=9F=9A=A8=20Fix=20compiler=20/=20linte?= =?UTF-8?q?r=20warnings.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kashgari/utils/model.py | 6 +++--- tests/test_classification/test_bi_lstm_model.py | 16 +++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/kashgari/utils/model.py b/kashgari/utils/model.py index 8241ee12..cdcde206 100644 --- a/kashgari/utils/model.py +++ b/kashgari/utils/model.py @@ -11,7 +11,7 @@ import os import pathlib import time -from typing import Union +from typing import Union, Any from kashgari.tasks.abs_task_model import ABCTaskModel @@ -19,8 +19,8 @@ def convert_to_saved_model(model: ABCTaskModel, model_path: str, version: Union[str, int] = None, - signatures=None, - options=None): + signatures: Any = None, + options: Any = None) -> None: """ Export model for tensorflow serving Args: diff --git a/tests/test_classification/test_bi_lstm_model.py b/tests/test_classification/test_bi_lstm_model.py index 87b66f66..5a2b8fba 100644 --- a/tests/test_classification/test_bi_lstm_model.py +++ b/tests/test_classification/test_bi_lstm_model.py @@ -8,16 +8,14 @@ # time: 1:57 下午 import os +import tempfile import time import unittest -import tempfile -import numpy as np - -from tests.test_macros import TestMacros from kashgari.corpus import SMP2018ECDTCorpus from kashgari.embeddings import WordEmbedding from kashgari.tasks.classification import BiLSTM_Model +from tests.test_macros import TestMacros class TestBiLSTM_Model(unittest.TestCase): @@ -63,6 +61,15 @@ def test_basic_use(self): # Make sure use sigmoid as activation function assert new_model.tf_model.layers[-1].activation.__name__ == 'softmax' + # TF Serving Test + from kashgari.utils import convert_to_saved_model + convert_to_saved_model(new_model, + os.path.join(model_path, 'serving'), + version=1) + + from kashgari.processors import load_processors_from_model + _ = load_processors_from_model(os.path.join(model_path, 'serving', '1')) + def test_multi_label(self): corpus = TestMacros.jigsaw_mini_corpus model = self.TASK_MODEL_CLASS(sequence_length=20, multi_label=True) @@ -112,6 +119,5 @@ def test_with_word_embedding(self): _ = new_model.predict(valid_x[:20]) - if __name__ == '__main__': unittest.main() From 30085da5082490933e7c1b274008c624bf1e9a6c Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 13:16:32 +0800 Subject: [PATCH 08/20] =?UTF-8?q?=F0=9F=9A=A7=20Work=20in=20progress.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build.yml | 24 ++++++++++++++++++++++++ requirements.dev.txt | 1 + 2 files changed, 25 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5137986e..931d605c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,6 +7,30 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + group: [ 1, 2, 3, 4, 5 ] + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install deps + run: | + python -m pip install --upgrade pip + pip install -r requirements.dev.txt + pip install -r requirements.txt + + - name: Run pytest + run: pytest --cov --splits 5 --group ${{ matrix.group }} tests/ + - name: Upload coverage + uses: actions/upload-artifact@v1 + with: + name: coverage${{ matrix.group }} + path: .coverage sonarcloud: name: SonarCloud runs-on: ubuntu-latest diff --git a/requirements.dev.txt b/requirements.dev.txt index ad075ea5..8bc9b794 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -4,6 +4,7 @@ flake8-builtins mypy pytest>=5.4.3 pytest-cov +pytest-split coveralls # documents From c820baa3ee1efdee9c4dee8ce69c73315aeadb57 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 13:27:51 +0800 Subject: [PATCH 09/20] =?UTF-8?q?=F0=9F=9A=A7=20Work=20in=20progress.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/{build.yml => test.yml} | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) rename .github/workflows/{build.yml => test.yml} (69%) diff --git a/.github/workflows/build.yml b/.github/workflows/test.yml similarity index 69% rename from .github/workflows/build.yml rename to .github/workflows/test.yml index 931d605c..9935f96a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/test.yml @@ -1,4 +1,4 @@ -name: Build +name: Test on: push: branches: @@ -8,10 +8,12 @@ on: types: [opened, synchronize, reopened] jobs: test: + name: Pytest runs-on: ubuntu-latest strategy: matrix: group: [ 1, 2, 3, 4, 5 ] + tensorflow_version: [ 2.1.0, 2.2.0, 2.3.0 ] steps: - uses: actions/checkout@v2 - name: Set up Python 3.8 @@ -21,23 +23,29 @@ jobs: - name: Install deps run: | python -m pip install --upgrade pip + pip install tensorflow==${{ matrix.tensorflow_version }} pip install -r requirements.dev.txt pip install -r requirements.txt - name: Run pytest - run: pytest --cov --splits 5 --group ${{ matrix.group }} tests/ + run: pytest --doctest-modules --junitxml=test-reports/junit.xml --cov=kashgari --cov-report=xml:coverage.xml --cov-report term --cov-report=html:htmlcov --cov-config .coveragerc --cov --splits 5 --group ${{ matrix.group }} tests/test_processor - name: Upload coverage uses: actions/upload-artifact@v1 with: name: coverage${{ matrix.group }} path: .coverage + sonarcloud: name: SonarCloud runs-on: ubuntu-latest + needs: test steps: - uses: actions/checkout@v2 with: fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis + - uses: actions/download-artifact@v2 + - name: Display structure of downloaded files + run: ls -R - name: SonarCloud Scan uses: SonarSource/sonarcloud-github-action@master env: From 9bd51a1d70f9a36e11e67a7b24703515ccf52964 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 13:44:11 +0800 Subject: [PATCH 10/20] =?UTF-8?q?=F0=9F=9A=A7=20Work=20in=20progress.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 45 ++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9935f96a..cd7fcfe2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,13 +7,27 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: +# lint: +# name: Lint +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v2 +# - name: Set up Python 3.8 +# uses: actions/setup-python@v1 +# with: +# python-version: 3.8 +# - name: Install deps +# run: | +# pip install -r requirements.dev.txt +# - name: Run lint script +# run: sh ./scripts/lint.sh test: - name: Pytest + name: "Test with TF ${{ matrix.tensorflow_version }} - ${{ matrix.group }}" runs-on: ubuntu-latest strategy: matrix: - group: [ 1, 2, 3, 4, 5 ] - tensorflow_version: [ 2.1.0, 2.2.0, 2.3.0 ] + group: [ 1, 2 ] + tensorflow_version: [ 2.2.0, 2.3.0 ] steps: - uses: actions/checkout@v2 - name: Set up Python 3.8 @@ -28,12 +42,29 @@ jobs: pip install -r requirements.txt - name: Run pytest - run: pytest --doctest-modules --junitxml=test-reports/junit.xml --cov=kashgari --cov-report=xml:coverage.xml --cov-report term --cov-report=html:htmlcov --cov-config .coveragerc --cov --splits 5 --group ${{ matrix.group }} tests/test_processor + run: 'pytest + --doctest-modules + --junitxml=test-reports/junit-${{ matrix.tensorflow_version }}-${{ matrix.group }}.xml + --cov=kashgari + --cov-report=xml:cov-reports/coverage-${{ matrix.tensorflow_version }}-${{ matrix.group }}.xml + --cov-report term + --cov-config .coveragerc + --cov + --splits 2 + --group ${{ matrix.group }} + tests/test_processor' + + - name: Upload unit test + uses: actions/upload-artifact@v2 + with: + name: junitxml-${{ matrix.group }} + path: test-reports + - name: Upload coverage - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v2 with: - name: coverage${{ matrix.group }} - path: .coverage + name: coverage-${{ matrix.group }} + path: cov-reports sonarcloud: name: SonarCloud From 67a3fb6dffc7931048336b55c9759b05c586ee22 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 15:02:55 +0800 Subject: [PATCH 11/20] =?UTF-8?q?=F0=9F=91=B7=20Add=20Github=20CI.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 53 ++++++++---- .test_durations | 162 +++++++++++++++++++++++++++++++++++++ sonar-project.properties | 3 + 3 files changed, 202 insertions(+), 16 deletions(-) create mode 100644 .test_durations diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cd7fcfe2..948a133b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,23 +4,24 @@ on: branches: - v2-main - v2-dev + - v2/github-actions pull_request: types: [opened, synchronize, reopened] jobs: -# lint: -# name: Lint -# runs-on: ubuntu-latest -# steps: -# - uses: actions/checkout@v2 -# - name: Set up Python 3.8 -# uses: actions/setup-python@v1 -# with: -# python-version: 3.8 -# - name: Install deps -# run: | -# pip install -r requirements.dev.txt -# - name: Run lint script -# run: sh ./scripts/lint.sh + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install deps + run: | + pip install -r requirements.dev.txt + - name: Run lint script + run: sh ./scripts/lint.sh test: name: "Test with TF ${{ matrix.tensorflow_version }} - ${{ matrix.group }}" runs-on: ubuntu-latest @@ -54,16 +55,19 @@ jobs: --group ${{ matrix.group }} tests/test_processor' + - name: Display structure of all files + run: ls -R + - name: Upload unit test uses: actions/upload-artifact@v2 with: - name: junitxml-${{ matrix.group }} + name: junitxml-${{ matrix.tensorflow_version }}-${{ matrix.group }} path: test-reports - name: Upload coverage uses: actions/upload-artifact@v2 with: - name: coverage-${{ matrix.group }} + name: coverage-${{ matrix.tensorflow_version }}-${{ matrix.group }} path: cov-reports sonarcloud: @@ -75,6 +79,14 @@ jobs: with: fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis - uses: actions/download-artifact@v2 + with: + path: artifacts + - name: Display structure of downloaded files + run: ls -R + - name: Copy Artifacts to target file + run: | + mkdir -p test-reports && cp artifacts/junit*/* test-reports + mkdir -p cov-reports && cp artifacts/cov*/* cov-reports - name: Display structure of downloaded files run: ls -R - name: SonarCloud Scan @@ -82,3 +94,12 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} +# - name: Publish Unit Test Results +# uses: EnricoMi/publish-unit-test-result-action@v1.3 +# if: always() +# with: +# github_token: ${{ secrets.GITHUB_TOKEN }} +# check_name: Unit Test Results +# files: test-results/*.xml +# report_individual_runs: true +# deduplicate_classes_by_file_name: false diff --git a/.test_durations b/.test_durations new file mode 100644 index 00000000..e6eae91c --- /dev/null +++ b/.test_durations @@ -0,0 +1,162 @@ +[ + [ + "tests/test_corpus.py::TestChineseDailyNerCorpus::test_load_data", + 0.9164261159999998 + ], + [ + "tests/test_corpus.py::TestSMP2018ECDTCorpus::test_load_data", + 0.10876109099999942 + ], + [ + "tests/test_generator.py::TestGenerator::test_batch_generator", + 0.21724430800000016 + ], + [ + "tests/test_generator.py::TestGenerator::test_corpus_generator", + 0.0005134079999997709 + ], + [ + "tests/test_generator.py::TestGenerator::test_huge_batch_size", + 0.0018426819999994848 + ], + [ + "tests/test_tokenizers.py::TestUtils::test_base_tokenizer", + 0.000504422999999754 + ], + [ + "tests/test_tokenizers.py::TestUtils::test_bert_tokenizer", + 0.0019401849999995946 + ], + [ + "tests/test_tokenizers.py::TestUtils::test_jieba_tokenizer", + 6.517317019 + ], + [ + "tests/test_utils.py::TestUtils::test_get_list_subset", + 0.00043809899999835977 + ], + [ + "tests/test_utils.py::TestUtils::test_unison_shuffled_copies", + 0.0006712190000008889 + ], + [ + "tests/test_classification/test_bi_gru_model.py::TestBiGRU_Model::test_basic_use", + 28.251553606999998 + ], + [ + "tests/test_classification/test_bi_gru_model.py::TestBiGRU_Model::test_multi_label", + 18.903764830000007 + ], + [ + "tests/test_classification/test_bi_gru_model.py::TestBiGRU_Model::test_with_word_embedding", + 11.325183209999999 + ], + [ + "tests/test_classification/test_bi_lstm_model.py::TestBiLSTM_Model::test_basic_use", + 28.38882036000001 + ], + [ + "tests/test_classification/test_bi_lstm_model.py::TestBiLSTM_Model::test_multi_label", + 23.973969605999997 + ], + [ + "tests/test_classification/test_bi_lstm_model.py::TestBiLSTM_Model::test_with_word_embedding", + 12.634332431999994 + ], + [ + "tests/test_classification/test_cnn_attention_model.py::TestCnnAttention_Model::test_basic_use", + 3.0587875910000264 + ], + [ + "tests/test_classification/test_cnn_attention_model.py::TestCnnAttention_Model::test_multi_label", + 11.14959770699997 + ], + [ + "tests/test_classification/test_cnn_attention_model.py::TestCnnAttention_Model::test_with_word_embedding", + 1.5054829560000371 + ], + [ + "tests/test_classification/test_cnn_gru_model.py::TestCNN_GRU_Model::test_basic_use", + 5.774400861999993 + ], + [ + "tests/test_classification/test_cnn_gru_model.py::TestCNN_GRU_Model::test_multi_label", + 6.717924136999983 + ], + [ + "tests/test_classification/test_cnn_gru_model.py::TestCNN_GRU_Model::test_with_word_embedding", + 2.8432921600000327 + ], + [ + "tests/test_classification/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_basic_use", + 6.77620332699999 + ], + [ + "tests/test_classification/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_multi_label", + 6.855374507999954 + ], + [ + "tests/test_classification/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_with_word_embedding", + 2.7520542480000074 + ], + [ + "tests/test_classification/test_cnn_model.py::TestBiGRU_Model::test_basic_use", + 2.0675844740000002 + ], + [ + "tests/test_classification/test_cnn_model.py::TestBiGRU_Model::test_multi_label", + 2.6255372759999887 + ], + [ + "tests/test_classification/test_cnn_model.py::TestBiGRU_Model::test_with_word_embedding", + 0.7283437279999987 + ], + [ + "tests/test_embeddings/test_bare_embedding.py::TestBareEmbedding::test_base_cases", + 0.10849002699998778 + ], + [ + "tests/test_embeddings/test_bare_embedding.py::TestBareEmbedding::test_with_model", + 5.119307947999971 + ], + [ + "tests/test_embeddings/test_transformer_embedding.py::TestBareEmbedding::test_base_cases", + 0.580331665999978 + ], + [ + "tests/test_embeddings/test_transformer_embedding.py::TestBareEmbedding::test_with_model", + 5.116756056000014 + ], + [ + "tests/test_embeddings/test_transformer_embedding.py::TestTransferEmbedding::test_base_cases", + 1.304065329000025 + ], + [ + "tests/test_embeddings/test_transformer_embedding.py::TestTransferEmbedding::test_with_model", + 7.028098716000017 + ], + [ + "tests/test_embeddings/test_word_embedding.py::TestBareEmbedding::test_base_cases", + 0.11307986700001038 + ], + [ + "tests/test_embeddings/test_word_embedding.py::TestBareEmbedding::test_with_model", + 5.595726683000009 + ], + [ + "tests/test_embeddings/test_word_embedding.py::TestWordEmbedding::test_base_cases", + 0.25862976200002663 + ], + [ + "tests/test_embeddings/test_word_embedding.py::TestWordEmbedding::test_with_model", + 5.084454087000012 + ], + [ + "tests/test_labeling/test_bi_gru_crf_model.py::TestBiGRU_CRF_Model::test_basic_use", + 30.372882512000018 + ], + [ + "tests/test_labeling/test_bi_gru_crf_model.py::TestBiGRU_CRF_Model::test_with_bert", + 0.00018015500000956308 + ] +] \ No newline at end of file diff --git a/sonar-project.properties b/sonar-project.properties index ff531d67..7d27479f 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -10,3 +10,6 @@ sonar.organization=brikerman-github # Encoding of the source code. Default is default system encoding #sonar.sourceEncoding=UTF-8 + +sonar.python.coverage.reportPaths=artifacts/coverage*/coverage*.xml +sonar.python.xunit.reportPath=artifacts/junit*/junit-*.xml From 3f372ca591b487888f3f7c17b4ecc7fcbb759608 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 15:06:51 +0800 Subject: [PATCH 12/20] =?UTF-8?q?=F0=9F=91=B7=20Update=20Github=20CI=20Con?= =?UTF-8?q?fig.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 948a133b..7398f3ce 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,7 +53,7 @@ jobs: --cov --splits 2 --group ${{ matrix.group }} - tests/test_processor' + tests/' - name: Display structure of all files run: ls -R From 615adcff36bcf6dc40e8f7ab62b11dda44ff5544 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 15:09:49 +0800 Subject: [PATCH 13/20] =?UTF-8?q?=F0=9F=91=B7=20Update=20Github=20CI=20Con?= =?UTF-8?q?fig.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7398f3ce..ae4135a2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - group: [ 1, 2 ] + group: [ 1, 2, 3, 4, 5, 6 ] tensorflow_version: [ 2.2.0, 2.3.0 ] steps: - uses: actions/checkout@v2 @@ -51,7 +51,7 @@ jobs: --cov-report term --cov-config .coveragerc --cov - --splits 2 + --splits 6 --group ${{ matrix.group }} tests/' From fd8e74de012ab34aa43d10f1e3da6a238fa7b4b1 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 15:35:56 +0800 Subject: [PATCH 14/20] =?UTF-8?q?=F0=9F=94=A7=20Fix=20readthedocs,=20[skip?= =?UTF-8?q?=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 2 ++ .readthedocs.yml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ae4135a2..0ca92d40 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,6 +23,7 @@ jobs: - name: Run lint script run: sh ./scripts/lint.sh test: + if: "!contains(github.event.head_commit.message, 'skip ci')" name: "Test with TF ${{ matrix.tensorflow_version }} - ${{ matrix.group }}" runs-on: ubuntu-latest strategy: @@ -71,6 +72,7 @@ jobs: path: cov-reports sonarcloud: + if: "!contains(github.event.head_commit.message, 'skip ci')" name: SonarCloud runs-on: ubuntu-latest needs: test diff --git a/.readthedocs.yml b/.readthedocs.yml index e250fcd7..a6a7a1ab 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -21,7 +21,7 @@ formats: # Optionally set the version of Python and requirements required to build your docs python: - version: 3.7 + version: 3.8 install: - requirements: ./docs/requirements.txt - requirements: ./requirements.txt From 2a9b8d000b836e21ec692c2ca5a97be7d34bd223 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Mon, 26 Oct 2020 15:51:59 +0800 Subject: [PATCH 15/20] =?UTF-8?q?=F0=9F=94=A7=20Add=20configs.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .readthedocs.yml | 2 +- .test_durations | 156 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 117 insertions(+), 41 deletions(-) diff --git a/.readthedocs.yml b/.readthedocs.yml index a6a7a1ab..6feebaa2 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -23,6 +23,6 @@ formats: python: version: 3.8 install: - - requirements: ./docs/requirements.txt + - requirements: ./requirements.dev.txt - requirements: ./requirements.txt diff --git a/.test_durations b/.test_durations index e6eae91c..5d7277c6 100644 --- a/.test_durations +++ b/.test_durations @@ -1,162 +1,238 @@ [ [ "tests/test_corpus.py::TestChineseDailyNerCorpus::test_load_data", - 0.9164261159999998 + 0.791890680999999 ], [ "tests/test_corpus.py::TestSMP2018ECDTCorpus::test_load_data", - 0.10876109099999942 + 0.07738860300000105 ], [ "tests/test_generator.py::TestGenerator::test_batch_generator", - 0.21724430800000016 + 0.1659608910000001 ], [ "tests/test_generator.py::TestGenerator::test_corpus_generator", - 0.0005134079999997709 + 0.0004785480000002451 ], [ "tests/test_generator.py::TestGenerator::test_huge_batch_size", - 0.0018426819999994848 + 0.0017857870000010934 ], [ "tests/test_tokenizers.py::TestUtils::test_base_tokenizer", - 0.000504422999999754 + 0.00045628799999875014 ], [ "tests/test_tokenizers.py::TestUtils::test_bert_tokenizer", - 0.0019401849999995946 + 0.00681187300000019 ], [ "tests/test_tokenizers.py::TestUtils::test_jieba_tokenizer", - 6.517317019 + 3.054017858000001 ], [ "tests/test_utils.py::TestUtils::test_get_list_subset", - 0.00043809899999835977 + 0.0004596760000001865 ], [ "tests/test_utils.py::TestUtils::test_unison_shuffled_copies", - 0.0006712190000008889 + 0.0008055950000001033 ], [ "tests/test_classification/test_bi_gru_model.py::TestBiGRU_Model::test_basic_use", - 28.251553606999998 + 26.957131174000004 ], [ "tests/test_classification/test_bi_gru_model.py::TestBiGRU_Model::test_multi_label", - 18.903764830000007 + 20.192117628000005 ], [ "tests/test_classification/test_bi_gru_model.py::TestBiGRU_Model::test_with_word_embedding", - 11.325183209999999 + 11.372548664 ], [ "tests/test_classification/test_bi_lstm_model.py::TestBiLSTM_Model::test_basic_use", - 28.38882036000001 + 28.27500425000001 ], [ "tests/test_classification/test_bi_lstm_model.py::TestBiLSTM_Model::test_multi_label", - 23.973969605999997 + 28.04191687800001 ], [ "tests/test_classification/test_bi_lstm_model.py::TestBiLSTM_Model::test_with_word_embedding", - 12.634332431999994 + 11.645751617000016 ], [ "tests/test_classification/test_cnn_attention_model.py::TestCnnAttention_Model::test_basic_use", - 3.0587875910000264 + 2.8881167019999907 ], [ "tests/test_classification/test_cnn_attention_model.py::TestCnnAttention_Model::test_multi_label", - 11.14959770699997 + 14.609918974999971 ], [ "tests/test_classification/test_cnn_attention_model.py::TestCnnAttention_Model::test_with_word_embedding", - 1.5054829560000371 + 1.442862555000005 ], [ "tests/test_classification/test_cnn_gru_model.py::TestCNN_GRU_Model::test_basic_use", - 5.774400861999993 + 5.812660918000006 ], [ "tests/test_classification/test_cnn_gru_model.py::TestCNN_GRU_Model::test_multi_label", - 6.717924136999983 + 6.379847061999982 ], [ "tests/test_classification/test_cnn_gru_model.py::TestCNN_GRU_Model::test_with_word_embedding", - 2.8432921600000327 + 3.058245263999993 ], [ "tests/test_classification/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_basic_use", - 6.77620332699999 + 6.5338332440000215 ], [ "tests/test_classification/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_multi_label", - 6.855374507999954 + 7.435480620999982 ], [ "tests/test_classification/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_with_word_embedding", - 2.7520542480000074 + 2.917810065999987 ], [ "tests/test_classification/test_cnn_model.py::TestBiGRU_Model::test_basic_use", - 2.0675844740000002 + 1.4649902729999837 ], [ "tests/test_classification/test_cnn_model.py::TestBiGRU_Model::test_multi_label", - 2.6255372759999887 + 2.6597315669999944 ], [ "tests/test_classification/test_cnn_model.py::TestBiGRU_Model::test_with_word_embedding", - 0.7283437279999987 + 1.2886368380000022 ], [ "tests/test_embeddings/test_bare_embedding.py::TestBareEmbedding::test_base_cases", - 0.10849002699998778 + 0.10868100899998012 ], [ "tests/test_embeddings/test_bare_embedding.py::TestBareEmbedding::test_with_model", - 5.119307947999971 + 4.935055361000025 ], [ "tests/test_embeddings/test_transformer_embedding.py::TestBareEmbedding::test_base_cases", - 0.580331665999978 + 0.10724691600000824 ], [ "tests/test_embeddings/test_transformer_embedding.py::TestBareEmbedding::test_with_model", - 5.116756056000014 + 5.357032331999989 ], [ "tests/test_embeddings/test_transformer_embedding.py::TestTransferEmbedding::test_base_cases", - 1.304065329000025 + 1.2804299539999988 ], [ "tests/test_embeddings/test_transformer_embedding.py::TestTransferEmbedding::test_with_model", - 7.028098716000017 + 6.972018837000036 ], [ "tests/test_embeddings/test_word_embedding.py::TestBareEmbedding::test_base_cases", - 0.11307986700001038 + 0.10442442199999391 ], [ "tests/test_embeddings/test_word_embedding.py::TestBareEmbedding::test_with_model", - 5.595726683000009 + 5.0369742190000295 ], [ "tests/test_embeddings/test_word_embedding.py::TestWordEmbedding::test_base_cases", - 0.25862976200002663 + 0.24649433999999815 ], [ "tests/test_embeddings/test_word_embedding.py::TestWordEmbedding::test_with_model", - 5.084454087000012 + 5.744442873999986 ], [ "tests/test_labeling/test_bi_gru_crf_model.py::TestBiGRU_CRF_Model::test_basic_use", - 30.372882512000018 + 27.22844565699998 ], [ "tests/test_labeling/test_bi_gru_crf_model.py::TestBiGRU_CRF_Model::test_with_bert", - 0.00018015500000956308 + 15.653805492000004 + ], + [ + "tests/test_labeling/test_bi_gru_crf_model.py::TestBiGRU_CRF_Model::test_with_word_embedding", + 15.798518177000062 + ], + [ + "tests/test_labeling/test_bi_gru_model.py::TestBiGRU_Model::test_basic_use", + 22.86519840400001 + ], + [ + "tests/test_labeling/test_bi_gru_model.py::TestBiGRU_Model::test_predict_and_callback", + 11.08044686200003 + ], + [ + "tests/test_labeling/test_bi_gru_model.py::TestBiGRU_Model::test_with_bert", + 13.311688684999979 + ], + [ + "tests/test_labeling/test_bi_gru_model.py::TestBiGRU_Model::test_with_word_embedding", + 13.12735856400002 + ], + [ + "tests/test_labeling/test_bi_lstm_crf_model.py::TestBiLSTM_CRF_Model::test_basic_use", + 30.706889874000012 + ], + [ + "tests/test_labeling/test_bi_lstm_crf_model.py::TestBiLSTM_CRF_Model::test_with_bert", + 17.221633065999924 + ], + [ + "tests/test_labeling/test_bi_lstm_crf_model.py::TestBiLSTM_CRF_Model::test_with_word_embedding", + 17.035713270999963 + ], + [ + "tests/test_labeling/test_bi_lstm_model.py::TestBiLSTM_Model::test_basic_use", + 27.88147675900001 + ], + [ + "tests/test_labeling/test_bi_lstm_model.py::TestBiLSTM_Model::test_with_bert", + 14.351038211999992 + ], + [ + "tests/test_labeling/test_bi_lstm_model.py::TestBiLSTM_Model::test_with_word_embedding", + 13.007894668000006 + ], + [ + "tests/test_labeling/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_basic_use", + 24.476096025000004 + ], + [ + "tests/test_labeling/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_with_bert", + 13.546466815999906 + ], + [ + "tests/test_labeling/test_cnn_lstm_model.py::TestCNN_LSTM_Model::test_with_word_embedding", + 12.68343920000001 + ], + [ + "tests/test_processor/test_class_processor.py::TestClassificationProcessor::test_multi_label_processor", + 1.1768544880000036 + ], + [ + "tests/test_processor/test_class_processor.py::TestClassificationProcessor::test_processor", + 0.001395669999965321 + ], + [ + "tests/test_processor/test_sequence_processor.py::TestSequenceProcessor::test_label_processor", + 0.01879332600003636 + ], + [ + "tests/test_processor/test_sequence_processor.py::TestSequenceProcessor::test_text_processor", + 0.021944461000089177 + ], + [ + "tests/test_seq2seq/test_seq2seq.py::TestSeq2Seq::test_base_use_case", + 130.10345570900006 ] ] \ No newline at end of file From 990be6201c4a5c327347b2f7eb113820c9bbcbeb Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Wed, 28 Oct 2020 21:51:11 +0800 Subject: [PATCH 16/20] =?UTF-8?q?=F0=9F=94=96=20Release=202.0.1.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .travis.yml | 69 ------------------------------------- docs/about/release-notes.md | 5 +++ kashgari/__version__.py | 2 +- 3 files changed, 6 insertions(+), 70 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4a3c97c9..00000000 --- a/.travis.yml +++ /dev/null @@ -1,69 +0,0 @@ -language: python -dist: xenial - -env: - global: - - COVERALLS_PARALLEL=true - matrix: - - TF_VERSION="2.1.0" TEST_FILE="tests/test_seq2seq/" - - TF_VERSION="2.1.0" TEST_FILE="tests/test_labeling/test_bi_gru_model.py tests/test_labeling/test_bi_lstm_model.py tests/test_labeling/test_cnn_lstm_model.py" - - TF_VERSION="2.1.0" TEST_FILE="tests/test_labeling/test_bi_gru_crf_model.py tests/test_labeling/test_bi_lstm_crf_model.py" - - TF_VERSION="2.1.0" TEST_FILE="tests/test_classification/" - - TF_VERSION="2.1.0" TEST_FILE="tests/test_embeddings/" - - TF_VERSION="2.1.0" TEST_FILE="tests/test_processor/" - - TF_VERSION="2.1.0" TEST_FILE="tests/test_corpus.py tests/test_utils.py tests/test_tokenizers.py" - - - TF_VERSION="2.2.0" TEST_FILE="tests/test_seq2seq/" - - TF_VERSION="2.2.0" TEST_FILE="tests/test_labeling/test_bi_gru_model.py tests/test_labeling/test_bi_lstm_model.py tests/test_labeling/test_cnn_lstm_model.py" - - TF_VERSION="2.2.0" TEST_FILE="tests/test_labeling/test_bi_gru_crf_model.py tests/test_labeling/test_bi_lstm_crf_model.py" - - TF_VERSION="2.2.0" TEST_FILE="tests/test_classification/" - - TF_VERSION="2.2.0" TEST_FILE="tests/test_embeddings/" - - TF_VERSION="2.2.0" TEST_FILE="tests/test_processor/" - - TF_VERSION="2.2.0" TEST_FILE="tests/test_corpus.py tests/test_utils.py tests/test_tokenizers.py" - - - TF_VERSION="2.3.0" TEST_FILE="tests/test_seq2seq/" - - TF_VERSION="2.3.0" TEST_FILE="tests/test_labeling/test_bi_gru_model.py tests/test_labeling/test_bi_lstm_model.py tests/test_labeling/test_cnn_lstm_model.py" - - TF_VERSION="2.3.0" TEST_FILE="tests/test_labeling/test_bi_gru_crf_model.py tests/test_labeling/test_bi_lstm_crf_model.py" - - TF_VERSION="2.3.0" TEST_FILE="tests/test_classification/" - - TF_VERSION="2.3.0" TEST_FILE="tests/test_embeddings/" - - TF_VERSION="2.3.0" TEST_FILE="tests/test_processor/" - - TF_VERSION="2.3.0" TEST_FILE="tests/test_corpus.py tests/test_utils.py tests/test_tokenizers.py" - -python: - - "3.6" - -cache: pip - -before_install: - - export BOTO_CONFIG=/dev/null - -stages: - - Lint - - Test - -install: - - pip install tensorflow==$TF_VERSION - - pip install -r requirements.dev.txt - - pip install -r requirements.txt - - git fetch --unshallow --quiet - - export PYTHONPATH=`pwd` - -script: - - TARGET="2.1.0"; if [ "$TF_VERSION" == "$TARGET" ]; then pip install "tensorflow_addons<0.10.0";else echo "No need to install."; fi - - python -c "import kashgari;print(f'kashgari version {kashgari.__version__}')" - - pytest --doctest-modules --junitxml=test-reports/junit.xml --cov=kashgari --cov-report=xml:coverage.xml --cov-report term --cov-report=html:htmlcov --cov-config .coveragerc $TEST_FILE - -after_script: - - coveralls - -notifications: - webhooks: https://coveralls.io/webhook - -jobs: - include: - - stage: Lint - python: "3.7" - install: - - pip install -r requirements.dev.txt - script: - - sh ./scripts/lint.sh diff --git a/docs/about/release-notes.md b/docs/about/release-notes.md index f6a8b5d5..546c24a3 100644 --- a/docs/about/release-notes.md +++ b/docs/about/release-notes.md @@ -17,6 +17,11 @@ pip show kashgari ## Current Release +### [2.0.1] - 2020.10.28 + +- ✨ Add `convert_to_saved_model` API for tf-serving use case. +- ✨ Add tf-serving documents. + ### [2.0.0] - 2020.09.10 This is a fully re-implemented version with TF2. diff --git a/kashgari/__version__.py b/kashgari/__version__.py index d4186858..c77c520a 100644 --- a/kashgari/__version__.py +++ b/kashgari/__version__.py @@ -7,4 +7,4 @@ # file: __version__.py.py # time: 2019-05-20 16:32 -__version__ = '2.0.0' +__version__ = '2.0.1' From 345cf808eeb3465af774413c021d9861f4d2ec58 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Wed, 28 Oct 2020 22:01:09 +0800 Subject: [PATCH 17/20] =?UTF-8?q?=F0=9F=97=91=20Cleaned=20up.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kashgari/tasks/abs_task_model.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/kashgari/tasks/abs_task_model.py b/kashgari/tasks/abs_task_model.py index b40c9bc4..6da8ea99 100644 --- a/kashgari/tasks/abs_task_model.py +++ b/kashgari/tasks/abs_task_model.py @@ -76,13 +76,7 @@ def default_hyper_parameters(cls) -> Dict[str, Dict[str, Any]]: """ raise NotImplementedError - def save(self, model_path: str, h5_weight: bool = False) -> str: - """ - Save model - Args: - model_path: target model path - h5_weight: whether using original h5 format or new saved_model format - """ + def save(self, model_path: str) -> str: pathlib.Path(model_path).mkdir(exist_ok=True, parents=True) model_path = os.path.abspath(model_path) From f1595ef664f05ce90872948faba844bf83b32101 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Thu, 29 Oct 2020 12:33:35 +0800 Subject: [PATCH 18/20] =?UTF-8?q?=F0=9F=94=A7=20Add=20or=20update=20config?= =?UTF-8?q?uration=20files.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0ca92d40..ca41d77a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,7 +23,7 @@ jobs: - name: Run lint script run: sh ./scripts/lint.sh test: - if: "!contains(github.event.head_commit.message, 'skip ci')" + if: always() name: "Test with TF ${{ matrix.tensorflow_version }} - ${{ matrix.group }}" runs-on: ubuntu-latest strategy: @@ -56,9 +56,6 @@ jobs: --group ${{ matrix.group }} tests/' - - name: Display structure of all files - run: ls -R - - name: Upload unit test uses: actions/upload-artifact@v2 with: From cc8bbe8fa47f8e7c509da5da06a847e984731099 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Thu, 29 Oct 2020 14:19:28 +0800 Subject: [PATCH 19/20] =?UTF-8?q?=F0=9F=93=8C=20Pin=20dependencies=20to=20?= =?UTF-8?q?specific=20versions.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4ddbdd54..f7ae01f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,9 @@ numpy>=1.18.1 gensim>=3.8.1 pandas>=1.0.1 tqdm -bert4keras>=0.7.9 +# Limit this version to avoid json serilization issue. +# See https://github.com/bojone/bert4keras/issues/241 +bert4keras==0.7.9 scikit-learn>=0.21.1 tensorflow>=2.1.0 tensorflow_addons From ec91627c4d930a54ad6ccb8f6d7e173dc897aec0 Mon Sep 17 00:00:00 2001 From: BrikerMan Date: Thu, 29 Oct 2020 15:14:43 +0800 Subject: [PATCH 20/20] =?UTF-8?q?=F0=9F=93=8C=20Pin=20dependencies=20to=20?= =?UTF-8?q?specific=20versions.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f7ae01f1..79a0c2d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ pandas>=1.0.1 tqdm # Limit this version to avoid json serilization issue. # See https://github.com/bojone/bert4keras/issues/241 -bert4keras==0.7.9 +bert4keras>=0.9.1 scikit-learn>=0.21.1 tensorflow>=2.1.0 tensorflow_addons