diff --git a/exp/__init__.py b/arelight/__init__.py similarity index 100% rename from exp/__init__.py rename to arelight/__init__.py diff --git a/network/__init__.py b/arelight/exp/__init__.py similarity index 100% rename from network/__init__.py rename to arelight/exp/__init__.py diff --git a/exp/doc_ops.py b/arelight/exp/doc_ops.py similarity index 100% rename from exp/doc_ops.py rename to arelight/exp/doc_ops.py diff --git a/exp/exp.py b/arelight/exp/exp.py similarity index 90% rename from exp/exp.py rename to arelight/exp/exp.py index 72b905d..a60db09 100644 --- a/exp/exp.py +++ b/arelight/exp/exp.py @@ -1,6 +1,6 @@ from arekit.common.experiment.api.base import BaseExperiment -from exp.opin_ops import CustomOpinionOperations +from arelight.exp.opin_ops import CustomOpinionOperations class CustomExperiment(BaseExperiment): diff --git a/exp/exp_io.py b/arelight/exp/exp_io.py similarity index 94% rename from exp/exp_io.py rename to arelight/exp/exp_io.py index 58f0afe..fbcf5b3 100644 --- a/exp/exp_io.py +++ b/arelight/exp/exp_io.py @@ -1,6 +1,7 @@ import os from arekit.contrib.experiment_rusentrel.model_io.tf_networks import RuSentRelExperimentNetworkIOUtils -from network.args.const import OUTPUT_DIR + +from examples.args.const import OUTPUT_DIR class InferIOUtils(RuSentRelExperimentNetworkIOUtils): diff --git a/exp/opin_ops.py b/arelight/exp/opin_ops.py similarity index 100% rename from exp/opin_ops.py rename to arelight/exp/opin_ops.py diff --git a/network/args/__init__.py b/arelight/network/__init__.py similarity index 100% rename from network/args/__init__.py rename to arelight/network/__init__.py diff --git a/network/bert/__init__.py b/arelight/network/bert/__init__.py similarity index 100% rename from network/bert/__init__.py rename to arelight/network/bert/__init__.py diff --git a/network/bert/ctx.py b/arelight/network/bert/ctx.py similarity index 100% rename from network/bert/ctx.py rename to arelight/network/bert/ctx.py diff --git a/network/nn/__init__.py b/arelight/network/nn/__init__.py similarity index 100% rename from network/nn/__init__.py rename to arelight/network/nn/__init__.py diff --git a/network/nn/common.py b/arelight/network/nn/common.py similarity index 100% rename from network/nn/common.py rename to arelight/network/nn/common.py diff --git a/network/nn/ctx.py b/arelight/network/nn/ctx.py similarity index 100% rename from network/nn/ctx.py rename to arelight/network/nn/ctx.py diff --git a/network/nn/embedding.py b/arelight/network/nn/embedding.py similarity index 100% rename from network/nn/embedding.py rename to arelight/network/nn/embedding.py diff --git a/pipelines/__init__.py b/arelight/pipelines/__init__.py similarity index 100% rename from pipelines/__init__.py rename to arelight/pipelines/__init__.py diff --git a/pipelines/backend.py b/arelight/pipelines/backend.py similarity index 97% rename from pipelines/backend.py rename to arelight/pipelines/backend.py index 02ee70e..5d52c22 100644 --- a/pipelines/backend.py +++ b/arelight/pipelines/backend.py @@ -5,7 +5,7 @@ from arekit.common.pipeline.items.base import BasePipelineItem from brat_backend import BratBackend -from exp.exp_io import InferIOUtils +from arelight.exp.exp_io import InferIOUtils class BratBackendPipelineItem(BasePipelineItem): diff --git a/pipelines/inference_bert.py b/arelight/pipelines/inference_bert.py similarity index 98% rename from pipelines/inference_bert.py rename to arelight/pipelines/inference_bert.py index 605210b..b6a9d33 100644 --- a/pipelines/inference_bert.py +++ b/arelight/pipelines/inference_bert.py @@ -10,7 +10,7 @@ from deeppavlov.models.bert import bert_classifier from deeppavlov.models.preprocessors.bert_preprocessor import BertPreprocessor -from exp.exp_io import InferIOUtils +from arelight.exp.exp_io import InferIOUtils class BertInferencePipelineItem(BasePipelineItem): diff --git a/pipelines/inference_nn.py b/arelight/pipelines/inference_nn.py similarity index 98% rename from pipelines/inference_nn.py rename to arelight/pipelines/inference_nn.py index 341873c..dd39f63 100644 --- a/pipelines/inference_nn.py +++ b/arelight/pipelines/inference_nn.py @@ -20,8 +20,8 @@ from arekit.contrib.networks.shapes import NetworkInputShapes from arekit.processing.languages.ru.pos_service import PartOfSpeechTypesService -from exp.exp_io import InferIOUtils -from network.args.const import BAG_SIZE +from examples.args.const import BAG_SIZE +from arelight.exp.exp_io import InferIOUtils class TensorflowNetworkInferencePipelineItem(BasePipelineItem): diff --git a/pipelines/serialize_bert.py b/arelight/pipelines/serialize_bert.py similarity index 93% rename from pipelines/serialize_bert.py rename to arelight/pipelines/serialize_bert.py index 8593a13..9066d6d 100644 --- a/pipelines/serialize_bert.py +++ b/arelight/pipelines/serialize_bert.py @@ -13,11 +13,11 @@ from arekit.contrib.bert.samplers.types import BertSampleProviderTypes from arekit.processing.text.pipeline_terms_splitter import TermsSplitterParser -from exp.doc_ops import CustomDocOperations -from exp.exp import CustomExperiment -from exp.exp_io import InferIOUtils -from network.bert.ctx import BertSerializationContext -from pipelines.utils import input_to_docs +from arelight.exp.doc_ops import CustomDocOperations +from arelight.exp.exp import CustomExperiment +from arelight.exp.exp_io import InferIOUtils +from arelight.network.bert.ctx import BertSerializationContext +from arelight.pipelines.utils import input_to_docs class BertTextsSerializationPipelineItem(BasePipelineItem): diff --git a/pipelines/serialize_nn.py b/arelight/pipelines/serialize_nn.py similarity index 93% rename from pipelines/serialize_nn.py rename to arelight/pipelines/serialize_nn.py index 45a1371..30775c7 100644 --- a/pipelines/serialize_nn.py +++ b/arelight/pipelines/serialize_nn.py @@ -18,13 +18,14 @@ from arekit.processing.text.pipeline_frames_negation import FrameVariantsSentimentNegation from arekit.processing.text.pipeline_terms_splitter import TermsSplitterParser from arekit.processing.text.pipeline_tokenizer import DefaultTextTokenizer -from exp.doc_ops import CustomDocOperations -from exp.exp import CustomExperiment -from exp.exp_io import InferIOUtils -from network.nn.common import create_and_fill_variant_collection -from network.nn.ctx import NetworkSerializationContext -from network.nn.embedding import RusvectoresEmbedding -from pipelines.utils import input_to_docs + +from arelight.exp.doc_ops import CustomDocOperations +from arelight.exp.exp import CustomExperiment +from arelight.exp.exp_io import InferIOUtils +from arelight.network.nn.common import create_and_fill_variant_collection +from arelight.network.nn.ctx import NetworkSerializationContext +from arelight.network.nn.embedding import RusvectoresEmbedding +from arelight.pipelines.utils import input_to_docs class NetworkTextsSerializationPipelineItem(BasePipelineItem): diff --git a/pipelines/train_bert.py b/arelight/pipelines/train_bert.py similarity index 100% rename from pipelines/train_bert.py rename to arelight/pipelines/train_bert.py diff --git a/pipelines/utils.py b/arelight/pipelines/utils.py similarity index 100% rename from pipelines/utils.py rename to arelight/pipelines/utils.py diff --git a/text/__init__.py b/arelight/text/__init__.py similarity index 100% rename from text/__init__.py rename to arelight/text/__init__.py diff --git a/text/ner_base.py b/arelight/text/ner_base.py similarity index 100% rename from text/ner_base.py rename to arelight/text/ner_base.py diff --git a/text/ner_ontonotes.py b/arelight/text/ner_ontonotes.py similarity index 97% rename from text/ner_ontonotes.py rename to arelight/text/ner_ontonotes.py index fe817ff..b98bf6c 100644 --- a/text/ner_ontonotes.py +++ b/arelight/text/ner_ontonotes.py @@ -1,6 +1,6 @@ import importlib -from text.ner_base import BaseNER +from src.text.ner_base import BaseNER class BertOntonotesNER(BaseNER): diff --git a/text/pipeline_entities_bert_ontonotes.py b/arelight/text/pipeline_entities_bert_ontonotes.py similarity index 96% rename from text/pipeline_entities_bert_ontonotes.py rename to arelight/text/pipeline_entities_bert_ontonotes.py index 22acbd5..f54dcef 100644 --- a/text/pipeline_entities_bert_ontonotes.py +++ b/arelight/text/pipeline_entities_bert_ontonotes.py @@ -4,7 +4,7 @@ from arekit.common.text.partitioning.terms import TermsPartitioning from arekit.processing.entities.obj_desc import NerObjectDescriptor -from text.ner_ontonotes import BertOntonotesNER +from src.text.ner_ontonotes import BertOntonotesNER class BertOntonotesNERPipelineItem(SentenceObjectsParserPipelineItem): diff --git a/text/pipeline_entities_default.py b/arelight/text/pipeline_entities_default.py similarity index 100% rename from text/pipeline_entities_default.py rename to arelight/text/pipeline_entities_default.py diff --git a/dependencies.txt b/dependencies.txt index 91f2684..d69faac 100644 --- a/dependencies.txt +++ b/dependencies.txt @@ -2,6 +2,6 @@ gensim==3.2.0 deeppavlov==0.11.0 rusenttokenize # DeepPavlov bert-dp dependencies: -git+https://github.com/deepmipt/bert.git@feat/multi_gpu +bert_dp @ git+https://github.com/deepmipt/bert.git@feat/multi_gpu # Install arekit -git+https://github.com/nicolay-r/AREkit@0.22.0-rc +arekit @ git+https://github.com/nicolay-r/AREkit@0.22.0-rc \ No newline at end of file diff --git a/examples/args/__init__.py b/examples/args/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/network/args/base.py b/examples/args/base.py similarity index 100% rename from network/args/base.py rename to examples/args/base.py diff --git a/network/args/common.py b/examples/args/common.py similarity index 98% rename from network/args/common.py rename to examples/args/common.py index 4eea7f9..8acc068 100644 --- a/network/args/common.py +++ b/examples/args/common.py @@ -10,9 +10,7 @@ from arekit.contrib.source.rusentrel.utils import iter_synonym_groups from arekit.processing.lemmatization.mystem import MystemWrapper -from network.args.base import BaseArg -from text.pipeline_entities_bert_ontonotes import BertOntonotesNERPipelineItem -from text.pipeline_entities_default import TextEntitiesParser +from examples.args.base import BaseArg class InputTextArg(BaseArg): diff --git a/network/args/const.py b/examples/args/const.py similarity index 100% rename from network/args/const.py rename to examples/args/const.py diff --git a/network/args/train.py b/examples/args/train.py similarity index 99% rename from network/args/train.py rename to examples/args/train.py index 05b8dd4..4fba474 100644 --- a/network/args/train.py +++ b/examples/args/train.py @@ -1,6 +1,6 @@ from arekit.contrib.networks.enum_input_types import ModelInputType, ModelInputTypeService -from network.args.base import BaseArg +from examples.args.base import BaseArg class BagsPerMinibatchArg(BaseArg): diff --git a/examples/rusentrel/common.py b/examples/rusentrel/common.py index b64c6ec..334f00f 100644 --- a/examples/rusentrel/common.py +++ b/examples/rusentrel/common.py @@ -5,7 +5,7 @@ from arekit.contrib.experiment_rusentrel.exp_ds.utils import read_ruattitudes_in_memory from arekit.contrib.source.rusentrel.io_utils import RuSentRelIOUtils -from network.nn.embedding import RusvectoresEmbedding +from arelight.network.nn.embedding import RusvectoresEmbedding class Common: diff --git a/examples/rusentrel/configs/single.py b/examples/rusentrel/configs/single.py index 3bfae1c..0b736d4 100644 --- a/examples/rusentrel/configs/single.py +++ b/examples/rusentrel/configs/single.py @@ -9,7 +9,7 @@ from arekit.contrib.networks.context.configurations.self_att_bilstm import SelfAttentionBiLSTMConfig from arekit.contrib.networks.tf_helpers.cell_types import CellTypes -from network.args.const import TERMS_PER_CONTEXT +from examples.args.const import TERMS_PER_CONTEXT def ctx_self_att_bilstm_custom_config(config): diff --git a/examples/rusentrel/exp_io.py b/examples/rusentrel/exp_io.py index 3dc4e3a..f22780d 100644 --- a/examples/rusentrel/exp_io.py +++ b/examples/rusentrel/exp_io.py @@ -1,5 +1,5 @@ from arekit.contrib.experiment_rusentrel.model_io.tf_networks import RuSentRelExperimentNetworkIOUtils -from network.args.const import OUTPUT_DIR +from examples.args.const import OUTPUT_DIR class CustomRuSentRelNetworkExperimentIO(RuSentRelExperimentNetworkIOUtils): diff --git a/examples/serialize_rusentrel_for_bert.py b/examples/serialize_rusentrel_for_bert.py index 830c33e..c0a435e 100644 --- a/examples/serialize_rusentrel_for_bert.py +++ b/examples/serialize_rusentrel_for_bert.py @@ -1,7 +1,4 @@ import argparse -import sys - -sys.path.append('../') from arekit.common.experiment.annot.algo.pair_based import PairBasedAnnotationAlgorithm from arekit.common.experiment.annot.default import DefaultAnnotator @@ -11,7 +8,6 @@ from arekit.common.labels.provider.constant import ConstantLabelProvider from arekit.common.labels.str_fmt import StringLabelsFormatter from arekit.contrib.bert.handlers.serializer import BertExperimentInputSerializerIterationHandler -from arekit.contrib.bert.samplers.types import BertSampleProviderTypes from arekit.contrib.experiment_rusentrel.entities.factory import create_entity_formatter from arekit.contrib.experiment_rusentrel.factory import create_experiment from arekit.contrib.experiment_rusentrel.labels.types import ExperimentNeutralLabel, ExperimentPositiveLabel, \ @@ -20,16 +16,15 @@ from arekit.contrib.experiment_rusentrel.types import ExperimentTypes from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions +from examples.args import const, common +from examples.args.const import DEFAULT_TEXT_FILEPATH from examples.rusentrel.common import Common from examples.rusentrel.exp_io import CustomRuSentRelNetworkExperimentIO -from network.args import const -from network.args.common import TermsPerContextArg, SynonymsCollectionArg, EntitiesParserArg, InputTextArg, \ - FromFilesArg, RusVectoresEmbeddingFilepathArg, EntityFormatterTypesArg, UseBalancingArg, \ - DistanceInTermsBetweenAttitudeEndsArg, StemmerArg, BertTextBFormatTypeArg -from network.args.const import DEFAULT_TEXT_FILEPATH -from network.bert.ctx import BertSerializationContext + from utils import create_labels_scaler +from arelight.network.bert.ctx import BertSerializationContext + class ExperimentBERTTextBThreeScaleLabelsFormatter(StringLabelsFormatter): @@ -46,29 +41,29 @@ def __init__(self): "required for inference and training.") # Provide arguments. - InputTextArg.add_argument(parser, default=None) - FromFilesArg.add_argument(parser, default=[DEFAULT_TEXT_FILEPATH]) - EntitiesParserArg.add_argument(parser, default="bert-ontonotes") - RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH) - TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) - SynonymsCollectionArg.add_argument(parser, default=None) - UseBalancingArg.add_argument(parser, default=True) - DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None) - EntityFormatterTypesArg.add_argument(parser, default="hidden-bert-styled") - BertTextBFormatTypeArg.add_argument(parser, default='nli_m') - StemmerArg.add_argument(parser, default="mystem") + common.InputTextArg.add_argument(parser, default=None) + common.FromFilesArg.add_argument(parser, default=[DEFAULT_TEXT_FILEPATH]) + common.EntitiesParserArg.add_argument(parser, default="bert-ontonotes") + common.RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH) + common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) + common.SynonymsCollectionArg.add_argument(parser, default=None) + common.UseBalancingArg.add_argument(parser, default=True) + common.DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None) + common.EntityFormatterTypesArg.add_argument(parser, default="hidden-bert-styled") + common.BertTextBFormatTypeArg.add_argument(parser, default='nli_m') + common.StemmerArg.add_argument(parser, default="mystem") # Parsing arguments. args = parser.parse_args() # Reading arguments. - text_from_arg = InputTextArg.read_argument(args) - texts_from_files = FromFilesArg.read_argument(args) - terms_per_context = TermsPerContextArg.read_argument(args) - use_balancing = UseBalancingArg.read_argument(args) - stemmer = StemmerArg.read_argument(args) - entity_fmt = EntityFormatterTypesArg.read_argument(args) - dist_in_terms_between_attitude_ends = DistanceInTermsBetweenAttitudeEndsArg.read_argument(args) + text_from_arg = common.InputTextArg.read_argument(args) + texts_from_files = common.FromFilesArg.read_argument(args) + terms_per_context = common.TermsPerContextArg.read_argument(args) + use_balancing = common.UseBalancingArg.read_argument(args) + stemmer = common.StemmerArg.read_argument(args) + entity_fmt = common.EntityFormatterTypesArg.read_argument(args) + dist_in_terms_between_attitude_ends = common.DistanceInTermsBetweenAttitudeEndsArg.read_argument(args) # Predefined parameters. labels_count = 3 @@ -120,7 +115,7 @@ def __init__(self): opin_ops=experiment.OpinionOperations, sample_labels_fmt=ExperimentBERTTextBThreeScaleLabelsFormatter(), annot_labels_fmt=experiment.OpinionOperations.LabelsFormatter, - sample_provider_type=BertTextBFormatTypeArg.read_argument(args), + sample_provider_type=common.BertTextBFormatTypeArg.read_argument(args), entity_formatter=experiment.ExperimentContext.StringEntityFormatter, value_to_group_id_func=synonyms.get_synonym_group_index, balance_train_samples=use_balancing) diff --git a/examples/serialize_rusentrel_for_nn.py b/examples/serialize_rusentrel_for_nn.py index 7f783ef..e5932fd 100644 --- a/examples/serialize_rusentrel_for_nn.py +++ b/examples/serialize_rusentrel_for_nn.py @@ -1,8 +1,9 @@ -import sys import argparse -sys.path.append('../') +from arelight.network.nn.common import create_and_fill_variant_collection +from arelight.network.nn.ctx import NetworkSerializationContext +from examples.args import const, common from utils import create_labels_scaler from arekit.common.experiment.annot.algo.pair_based import PairBasedAnnotationAlgorithm @@ -23,12 +24,6 @@ from arekit.processing.text.pipeline_frames_lemmatized import LemmasBasedFrameVariantsParser from arekit.processing.text.pipeline_tokenizer import DefaultTextTokenizer -from network.args import const -from network.args.common import LabelsCountArg, RusVectoresEmbeddingFilepathArg, TermsPerContextArg, \ - StemmerArg, UseBalancingArg, DistanceInTermsBetweenAttitudeEndsArg, FramesColectionArg, EntityFormatterTypesArg -from network.nn.common import create_and_fill_variant_collection -from network.nn.ctx import NetworkSerializationContext - from examples.rusentrel.common import Common from examples.rusentrel.exp_io import CustomRuSentRelNetworkExperimentIO @@ -37,27 +32,27 @@ parser = argparse.ArgumentParser(description="RuSentRel dataset serialization script") # Provide arguments. - LabelsCountArg.add_argument(parser, default=3) - RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH) - TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) - EntityFormatterTypesArg.add_argument(parser, default="hidden-simple-eng") - StemmerArg.add_argument(parser, default="mystem") - UseBalancingArg.add_argument(parser, default=True) - DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None) - FramesColectionArg.add_argument(parser) + common.LabelsCountArg.add_argument(parser, default=3) + common.RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH) + common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) + common.EntityFormatterTypesArg.add_argument(parser, default="hidden-simple-eng") + common.StemmerArg.add_argument(parser, default="mystem") + common.UseBalancingArg.add_argument(parser, default=True) + common.DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None) + common.FramesColectionArg.add_argument(parser) # Parsing arguments. args = parser.parse_args() # Reading arguments. - embedding_filepath = RusVectoresEmbeddingFilepathArg.read_argument(args) - labels_count = LabelsCountArg.read_argument(args) - terms_per_context = TermsPerContextArg.read_argument(args) - entity_fmt = EntityFormatterTypesArg.read_argument(args) - stemmer = StemmerArg.read_argument(args) - use_balancing = UseBalancingArg.read_argument(args) - dist_in_terms_between_attitude_ends = DistanceInTermsBetweenAttitudeEndsArg.read_argument(args) - frames_collection = FramesColectionArg.read_argument(args) + embedding_filepath = common.RusVectoresEmbeddingFilepathArg.read_argument(args) + labels_count = common.LabelsCountArg.read_argument(args) + terms_per_context = common.TermsPerContextArg.read_argument(args) + entity_fmt = common.EntityFormatterTypesArg.read_argument(args) + stemmer = common.StemmerArg.read_argument(args) + use_balancing = common.UseBalancingArg.read_argument(args) + dist_in_terms_between_attitude_ends = common.DistanceInTermsBetweenAttitudeEndsArg.read_argument(args) + frames_collection = common.FramesColectionArg.read_argument(args) pos_tagger = POSMystemWrapper(MystemWrapper().MystemInstance) # Default parameters diff --git a/examples/train_bert.py b/examples/train_bert.py index 1d70ff9..916ee68 100644 --- a/examples/train_bert.py +++ b/examples/train_bert.py @@ -1,17 +1,10 @@ import argparse -import sys from os.path import join -sys.path.append('../') - from arekit.common.pipeline.base import BasePipeline -from network.args import const -from network.args.common import BertConfigFilepathArg, BertCheckpointFilepathArg, BertVocabFilepathArg, \ - BertSaveFilepathArg, InputSamplesFilepath, TokensPerContextArg -from network.args.const import BERT_CONFIG_PATH, BERT_CKPT_PATH, BERT_VOCAB_PATH, OUTPUT_DIR, \ - BERT_DEFAULT_STATE_NAME, BERT_TARGET_DIR -from network.args.train import EpochsCountArg, BatchSizeArg, LearningRateArg, DoLowercaseArg -from pipelines.train_bert import BertFinetunePipelineItem +from arelight.pipelines.train_bert import BertFinetunePipelineItem + +from examples.args import train, common, const if __name__ == '__main__': @@ -20,31 +13,31 @@ "required for inference and training.") # Provide arguments. - TokensPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) - BertConfigFilepathArg.add_argument(parser, default=BERT_CONFIG_PATH) - BertCheckpointFilepathArg.add_argument(parser, default=BERT_CKPT_PATH) - BertVocabFilepathArg.add_argument(parser, default=BERT_VOCAB_PATH) - BertSaveFilepathArg.add_argument(parser, default=join(BERT_TARGET_DIR, BERT_DEFAULT_STATE_NAME)) - InputSamplesFilepath.add_argument(parser, default=join(OUTPUT_DIR, join("rsr-v1_1-fx-nobalance-tpc50-bert_3l", "sample-train-0.tsv.gz"))) - LearningRateArg.add_argument(parser, default=2e-5) - EpochsCountArg.add_argument(parser, default=4) - BatchSizeArg.add_argument(parser, default=6) - DoLowercaseArg.add_argument(parser, default=False) + common.TokensPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) + common.BertConfigFilepathArg.add_argument(parser, default=const.BERT_CONFIG_PATH) + common.BertCheckpointFilepathArg.add_argument(parser, default=const.BERT_CKPT_PATH) + common.BertVocabFilepathArg.add_argument(parser, default=const.BERT_VOCAB_PATH) + common.BertSaveFilepathArg.add_argument(parser, default=join(const.BERT_TARGET_DIR, const.BERT_DEFAULT_STATE_NAME)) + common.InputSamplesFilepath.add_argument(parser, default=join(const.OUTPUT_DIR, join("rsr-v1_1-fx-nobalance-tpc50-bert_3l", "sample-train-0.tsv.gz"))) + train.LearningRateArg.add_argument(parser, default=2e-5) + train.EpochsCountArg.add_argument(parser, default=4) + train.BatchSizeArg.add_argument(parser, default=6) + train.DoLowercaseArg.add_argument(parser, default=False) # Parsing arguments. args = parser.parse_args() # Compose pipeline item. ppl = BasePipeline([ - BertFinetunePipelineItem(bert_config_file=BertConfigFilepathArg.read_argument(args), - model_checkpoint_path=BertCheckpointFilepathArg.read_argument(args), - vocab_filepath=BertVocabFilepathArg.read_argument(args), - do_lowercase=DoLowercaseArg.read_argument(args), - max_seq_length=TokensPerContextArg.read_argument(args), - learning_rate=LearningRateArg.read_argument(args), - save_path=BertSaveFilepathArg.read_argument(args)) + BertFinetunePipelineItem(bert_config_file=common.BertConfigFilepathArg.read_argument(args), + model_checkpoint_path=common.BertCheckpointFilepathArg.read_argument(args), + vocab_filepath=common.BertVocabFilepathArg.read_argument(args), + do_lowercase=train.DoLowercaseArg.read_argument(args), + max_seq_length=common.TokensPerContextArg.read_argument(args), + learning_rate=train.LearningRateArg.read_argument(args), + save_path=common.BertSaveFilepathArg.read_argument(args)) ]) - ppl.run(InputSamplesFilepath.read_argument(args), - params_dict={"epochs_count": EpochsCountArg.read_argument(args), - "batch_size": BatchSizeArg.read_argument(args)}) + ppl.run(common.InputSamplesFilepath.read_argument(args), + params_dict={"epochs_count": train.EpochsCountArg.read_argument(args), + "batch_size": train.BatchSizeArg.read_argument(args)}) diff --git a/examples/train_nn_on_rusentrel.py b/examples/train_nn_on_rusentrel.py index 6f19a5a..2d2d6bb 100644 --- a/examples/train_nn_on_rusentrel.py +++ b/examples/train_nn_on_rusentrel.py @@ -1,11 +1,11 @@ -import sys import argparse -sys.path.append('../') - +from arelight.network.nn.common import create_bags_collection_type, create_full_model_name, create_network_model_io from utils import create_labels_scaler +from examples.args import const, train from examples.rusentrel.common import Common +from examples.args.const import NEURAL_NETWORKS_TARGET_DIR, BAG_SIZE from examples.rusentrel.config_setups import optionally_modify_config_for_experiment, modify_config_for_model from examples.rusentrel.exp_io import CustomRuSentRelNetworkExperimentIO @@ -27,14 +27,7 @@ from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions from arekit.processing.languages.ru.pos_service import PartOfSpeechTypesService -from network.args import const -from network.args.common import LabelsCountArg, TermsPerContextArg, \ - DistanceInTermsBetweenAttitudeEndsArg, ModelNameArg, VocabFilepathArg, \ - EmbeddingMatrixFilepathArg, ModelLoadDirArg -from network.args.const import NEURAL_NETWORKS_TARGET_DIR, BAG_SIZE -from network.args.train import BagsPerMinibatchArg, ModelInputTypeArg, DropoutKeepProbArg, \ - LearningRateArg, EpochsCountArg -from network.nn.common import create_bags_collection_type, create_network_model_io, create_full_model_name +from examples.args import common if __name__ == '__main__': @@ -42,35 +35,35 @@ "based on the RuSentRel and RuAttitudes datasets (optionally)") # Composing cmd arguments. - LabelsCountArg.add_argument(parser, default=3) - BagsPerMinibatchArg.add_argument(parser, default=const.BAGS_PER_MINIBATCH) - TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) - DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None) - ModelInputTypeArg.add_argument(parser, default=ModelInputType.SingleInstance) - ModelNameArg.add_argument(parser, default=ModelNames.PCNN.value) - DropoutKeepProbArg.add_argument(parser, default=0.5) - LearningRateArg.add_argument(parser, default=0.1) - EpochsCountArg.add_argument(parser, default=150) - VocabFilepathArg.add_argument(parser, default=None) - EmbeddingMatrixFilepathArg.add_argument(parser, default=None) - ModelLoadDirArg.add_argument(parser, default=None) + common.LabelsCountArg.add_argument(parser, default=3) + common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) + common.DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None) + common.ModelNameArg.add_argument(parser, default=ModelNames.PCNN.value) + common.VocabFilepathArg.add_argument(parser, default=None) + common.EmbeddingMatrixFilepathArg.add_argument(parser, default=None) + common.ModelLoadDirArg.add_argument(parser, default=None) + train.ModelInputTypeArg.add_argument(parser, default=ModelInputType.SingleInstance) + train.BagsPerMinibatchArg.add_argument(parser, default=const.BAGS_PER_MINIBATCH) + train.DropoutKeepProbArg.add_argument(parser, default=0.5) + train.LearningRateArg.add_argument(parser, default=0.1) + train.EpochsCountArg.add_argument(parser, default=150) # Parsing arguments. args = parser.parse_args() # Reading arguments. - labels_count = LabelsCountArg.read_argument(args) - model_input_type = ModelInputTypeArg.read_argument(args) - model_name = ModelNameArg.read_argument(args) - embedding_matrix_filepath = EmbeddingMatrixFilepathArg.read_argument(args) - vocab_filepath = VocabFilepathArg.read_argument(args) - dropout_keep_prob = DropoutKeepProbArg.read_argument(args) - bags_per_minibatch = BagsPerMinibatchArg.read_argument(args) - terms_per_context = TermsPerContextArg.read_argument(args) - learning_rate = LearningRateArg.read_argument(args) - dist_in_terms_between_attitude_ends = DistanceInTermsBetweenAttitudeEndsArg.read_argument(args) - epochs_count = EpochsCountArg.read_argument(args) - model_load_dir = ModelLoadDirArg.read_argument(args) + labels_count = common.LabelsCountArg.read_argument(args) + model_name = common.ModelNameArg.read_argument(args) + embedding_matrix_filepath = common.EmbeddingMatrixFilepathArg.read_argument(args) + vocab_filepath = common.VocabFilepathArg.read_argument(args) + terms_per_context = common.TermsPerContextArg.read_argument(args) + dist_in_terms_between_attitude_ends = common.DistanceInTermsBetweenAttitudeEndsArg.read_argument(args) + model_load_dir = common.ModelLoadDirArg.read_argument(args) + model_input_type = train.ModelInputTypeArg.read_argument(args) + bags_per_minibatch = train.BagsPerMinibatchArg.read_argument(args) + dropout_keep_prob = train.DropoutKeepProbArg.read_argument(args) + learning_rate = train.LearningRateArg.read_argument(args) + epochs_count = train.EpochsCountArg.read_argument(args) # Utilize predefined versions and folding format. exp_type = ExperimentTypes.RuSentRel diff --git a/run_infer_bert.py b/run_infer_bert.py index 75cb6f5..0a99593 100644 --- a/run_infer_bert.py +++ b/run_infer_bert.py @@ -12,15 +12,14 @@ from arekit.contrib.experiment_rusentrel.labels.types import ExperimentNegativeLabel, ExperimentPositiveLabel from arekit.contrib.networks.core.predict.tsv_writer import TsvPredictWriter -from network.args import const -from network.args.common import LabelsCountArg, InputTextArg, FromFilesArg, SynonymsCollectionArg, \ - EntityFormatterTypesArg, EntitiesParserArg, TermsPerContextArg, PredictOutputFilepathArg, BertConfigFilepathArg, \ - BertCheckpointFilepathArg, BertVocabFilepathArg, BertTextBFormatTypeArg, TokensPerContextArg -from network.args.const import BERT_FINETUNED_CKPT_PATH, BERT_VOCAB_PATH, BERT_CONFIG_PATH -from network.args.train import DoLowercaseArg -from pipelines.backend import BratBackendPipelineItem -from pipelines.inference_bert import BertInferencePipelineItem -from pipelines.serialize_bert import BertTextsSerializationPipelineItem +from arelight.pipelines.backend import BratBackendPipelineItem +from arelight.pipelines.inference_bert import BertInferencePipelineItem +from arelight.pipelines.serialize_bert import BertTextsSerializationPipelineItem + +from examples.args import common +from examples.args import train +from examples.args import const +from examples.args.train import DoLowercaseArg from utils import create_labels_scaler if __name__ == '__main__': @@ -28,31 +27,31 @@ parser = argparse.ArgumentParser(description="Text inference example") # Providing arguments. - InputTextArg.add_argument(parser, default=None) - FromFilesArg.add_argument(parser, default=[const.DEFAULT_TEXT_FILEPATH]) - SynonymsCollectionArg.add_argument(parser, default=None) - LabelsCountArg.add_argument(parser, default=3) - TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) - TokensPerContextArg.add_argument(parser, default=128) - EntitiesParserArg.add_argument(parser, default="bert-ontonotes") - EntityFormatterTypesArg.add_argument(parser, default="hidden-bert-styled") - PredictOutputFilepathArg.add_argument(parser, default=None) - BertCheckpointFilepathArg.add_argument(parser, default=BERT_FINETUNED_CKPT_PATH) - BertConfigFilepathArg.add_argument(parser, default=BERT_CONFIG_PATH) - BertVocabFilepathArg.add_argument(parser, default=BERT_VOCAB_PATH) - BertTextBFormatTypeArg.add_argument(parser, default='nli_m') - DoLowercaseArg.add_argument(parser, default=False) + common.InputTextArg.add_argument(parser, default=None) + common.FromFilesArg.add_argument(parser, default=[const.DEFAULT_TEXT_FILEPATH]) + common.SynonymsCollectionArg.add_argument(parser, default=None) + common.LabelsCountArg.add_argument(parser, default=3) + common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) + common.TokensPerContextArg.add_argument(parser, default=128) + common.EntitiesParserArg.add_argument(parser, default="bert-ontonotes") + common.EntityFormatterTypesArg.add_argument(parser, default="hidden-bert-styled") + common.PredictOutputFilepathArg.add_argument(parser, default=None) + common.BertCheckpointFilepathArg.add_argument(parser, default=const.BERT_FINETUNED_CKPT_PATH) + common.BertConfigFilepathArg.add_argument(parser, default=const.BERT_CONFIG_PATH) + common.BertVocabFilepathArg.add_argument(parser, default=const.BERT_VOCAB_PATH) + common.BertTextBFormatTypeArg.add_argument(parser, default='nli_m') + train.DoLowercaseArg.add_argument(parser, default=False) # Parsing arguments. args = parser.parse_args() # Reading text-related parameters. - texts_from_files = FromFilesArg.read_argument(args) - text_from_arg = InputTextArg.read_argument(args) + texts_from_files = common.FromFilesArg.read_argument(args) + text_from_arg = common.InputTextArg.read_argument(args) actual_content = text_from_arg if text_from_arg is not None else texts_from_files # Implement extra structures. - labels_scaler = create_labels_scaler(LabelsCountArg.read_argument(args)) + labels_scaler = create_labels_scaler(common.LabelsCountArg.read_argument(args)) # Parsing arguments. args = parser.parse_args() @@ -61,12 +60,12 @@ ppl = BasePipeline(pipeline=[ BertTextsSerializationPipelineItem( - synonyms=SynonymsCollectionArg.read_argument(args), - terms_per_context=TermsPerContextArg.read_argument(args), - entities_parser=EntitiesParserArg.read_argument(args), - entity_fmt=create_entity_formatter(EntityFormatterTypesArg.read_argument(args)), + synonyms=common.SynonymsCollectionArg.read_argument(args), + terms_per_context=common.TermsPerContextArg.read_argument(args), + entities_parser=common.EntitiesParserArg.read_argument(args), + entity_fmt=create_entity_formatter(common.EntityFormatterTypesArg.read_argument(args)), name_provider=ExperimentNameProvider(name="example-bert", suffix="infer"), - text_b_type=BertTextBFormatTypeArg.read_argument(args), + text_b_type=common.BertTextBFormatTypeArg.read_argument(args), opin_annot=DefaultAnnotator( PairBasedAnnotationAlgorithm( dist_in_terms_bound=None, @@ -77,10 +76,10 @@ BertInferencePipelineItem( data_type=DataType.Test, predict_writer=TsvPredictWriter(), - bert_config_file=BertConfigFilepathArg.read_argument(args), - model_checkpoint_path=BertCheckpointFilepathArg.read_argument(args), - vocab_filepath=BertVocabFilepathArg.read_argument(args), - max_seq_length=TokensPerContextArg.read_argument(args), + bert_config_file=common.BertConfigFilepathArg.read_argument(args), + model_checkpoint_path=common.BertCheckpointFilepathArg.read_argument(args), + vocab_filepath=common.BertVocabFilepathArg.read_argument(args), + max_seq_length=common.TokensPerContextArg.read_argument(args), do_lowercase=DoLowercaseArg.read_argument(args), labels_scaler=labels_scaler), @@ -93,7 +92,7 @@ ) ]) - backend_template = PredictOutputFilepathArg.read_argument(args) + backend_template = common.PredictOutputFilepathArg.read_argument(args) ppl.run(actual_content, { "predict_fp": "{}.npz".format(backend_template) if backend_template is not None else None, diff --git a/run_infer_nn.py b/run_infer_nn.py index 4ebaa6a..3f02a7f 100644 --- a/run_infer_nn.py +++ b/run_infer_nn.py @@ -16,56 +16,54 @@ from arekit.contrib.networks.enum_input_types import ModelInputType from arekit.contrib.networks.enum_name_types import ModelNames -from network.args import const -from network.args.common import ModelNameArg, LabelsCountArg, RusVectoresEmbeddingFilepathArg, SynonymsCollectionArg, \ - InputTextArg, TermsPerContextArg, VocabFilepathArg, EmbeddingMatrixFilepathArg, ModelLoadDirArg, EntitiesParserArg, \ - StemmerArg, PredictOutputFilepathArg, FramesColectionArg, FromFilesArg, EntityFormatterTypesArg -from network.args.train import ModelInputTypeArg, BagsPerMinibatchArg -from network.nn.common import create_network_model_io, create_bags_collection_type, create_full_model_name -from pipelines.backend import BratBackendPipelineItem -from pipelines.inference_nn import TensorflowNetworkInferencePipelineItem -from pipelines.serialize_nn import NetworkTextsSerializationPipelineItem +from examples.args import const, common +from examples.args.train import BagsPerMinibatchArg, ModelInputTypeArg from utils import create_labels_scaler +from arelight.pipelines.inference_nn import TensorflowNetworkInferencePipelineItem +from arelight.pipelines.backend import BratBackendPipelineItem +from arelight.pipelines.serialize_nn import NetworkTextsSerializationPipelineItem +from arelight.network.nn.common import create_full_model_name, create_network_model_io, create_bags_collection_type + if __name__ == '__main__': parser = argparse.ArgumentParser(description="Text inference example") # Providing arguments. - InputTextArg.add_argument(parser, default=None) - FromFilesArg.add_argument(parser, default=[const.DEFAULT_TEXT_FILEPATH]) - SynonymsCollectionArg.add_argument(parser, default=None) - RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH) + common.InputTextArg.add_argument(parser, default=None) + common.FromFilesArg.add_argument(parser, default=[const.DEFAULT_TEXT_FILEPATH]) + common.SynonymsCollectionArg.add_argument(parser, default=None) + common.RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH) BagsPerMinibatchArg.add_argument(parser, default=const.BAGS_PER_MINIBATCH) - LabelsCountArg.add_argument(parser, default=3) - ModelNameArg.add_argument(parser, default=ModelNames.PCNN.value) + common.LabelsCountArg.add_argument(parser, default=3) + common.ModelNameArg.add_argument(parser, default=ModelNames.PCNN.value) ModelInputTypeArg.add_argument(parser, default=ModelInputType.SingleInstance) - TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) - EntityFormatterTypesArg.add_argument(parser, default="hidden-simple-eng") - VocabFilepathArg.add_argument(parser, default=None) - EmbeddingMatrixFilepathArg.add_argument(parser, default=None) - ModelLoadDirArg.add_argument(parser, default=const.NEURAL_NETWORKS_TARGET_DIR) - EntitiesParserArg.add_argument(parser, default="bert-ontonotes") - StemmerArg.add_argument(parser, default="mystem") - PredictOutputFilepathArg.add_argument(parser, default=None) - FramesColectionArg.add_argument(parser) + common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) + common.EntityFormatterTypesArg.add_argument(parser, default="hidden-simple-eng") + common.VocabFilepathArg.add_argument(parser, default=None) + common.EmbeddingMatrixFilepathArg.add_argument(parser, default=None) + common.ModelLoadDirArg.add_argument(parser, default=const.NEURAL_NETWORKS_TARGET_DIR) + common.EntitiesParserArg.add_argument(parser, default="bert-ontonotes") + common.StemmerArg.add_argument(parser, default="mystem") + common.PredictOutputFilepathArg.add_argument(parser, default=None) + common.FramesColectionArg.add_argument(parser) # Parsing arguments. args = parser.parse_args() # Reading provided arguments. - model_name = ModelNameArg.read_argument(args) + model_name = common.ModelNameArg.read_argument(args) model_input_type = ModelInputTypeArg.read_argument(args) - model_load_dir = ModelLoadDirArg.read_argument(args) - frames_collection = FramesColectionArg.read_argument(args) + model_load_dir = common.ModelLoadDirArg.read_argument(args) + frames_collection = common.FramesColectionArg.read_argument(args) # Reading text-related parameters. - texts_from_files = FromFilesArg.read_argument(args) - text_from_arg = InputTextArg.read_argument(args) + texts_from_files = common.FromFilesArg.read_argument(args) + text_from_arg = common.InputTextArg.read_argument(args) actual_content = text_from_arg if text_from_arg is not None else texts_from_files # Implement extra structures. - labels_scaler = create_labels_scaler(LabelsCountArg.read_argument(args)) + labels_scaler = create_labels_scaler(common.LabelsCountArg.read_argument(args)) # Parsing arguments. args = parser.parse_args() @@ -77,22 +75,22 @@ nn_io = create_network_model_io( full_model_name=full_model_name, - embedding_filepath=EmbeddingMatrixFilepathArg.read_argument(args), + embedding_filepath=common.EmbeddingMatrixFilepathArg.read_argument(args), source_dir=model_load_dir, target_dir=model_load_dir, - vocab_filepath=VocabFilepathArg.read_argument(args), + vocab_filepath=common.VocabFilepathArg.read_argument(args), model_name_tag=u'') # Declaring pipeline. ppl = BasePipeline(pipeline=[ NetworkTextsSerializationPipelineItem( frames_collection=frames_collection, - synonyms=SynonymsCollectionArg.read_argument(args), - terms_per_context=TermsPerContextArg.read_argument(args), - embedding_path=RusVectoresEmbeddingFilepathArg.read_argument(args), - entities_parser=EntitiesParserArg.read_argument(args), - entity_fmt=create_entity_formatter(EntityFormatterTypesArg.read_argument(args)), - stemmer=StemmerArg.read_argument(args), + synonyms=common.SynonymsCollectionArg.read_argument(args), + terms_per_context=common.TermsPerContextArg.read_argument(args), + embedding_path=common.RusVectoresEmbeddingFilepathArg.read_argument(args), + entities_parser=common.EntitiesParserArg.read_argument(args), + entity_fmt=create_entity_formatter(common.EntityFormatterTypesArg.read_argument(args)), + stemmer=common.StemmerArg.read_argument(args), name_provider=ExperimentNameProvider(name="example", suffix="infer"), opin_annot=DefaultAnnotator( PairBasedAnnotationAlgorithm( @@ -122,7 +120,7 @@ ) ]) - backend_template = PredictOutputFilepathArg.read_argument(args) + backend_template = common.PredictOutputFilepathArg.read_argument(args) ppl.run(actual_content, { "predict_fp": "{}.npz".format(backend_template) if backend_template is not None else None, diff --git a/run_serialize_bert.py b/run_serialize_bert.py index dd8d1f6..a6976ed 100644 --- a/run_serialize_bert.py +++ b/run_serialize_bert.py @@ -9,12 +9,10 @@ from arekit.common.labels.provider.constant import ConstantLabelProvider from arekit.common.pipeline.base import BasePipeline from arekit.contrib.experiment_rusentrel.entities.factory import create_entity_formatter +from arelight.pipelines.serialize_bert import BertTextsSerializationPipelineItem -from network.args import const -from network.args.common import InputTextArg, EntitiesParserArg, TermsPerContextArg, \ - SynonymsCollectionArg, FromFilesArg, EntityFormatterTypesArg, BertTextBFormatTypeArg -from network.args.const import DEFAULT_TEXT_FILEPATH -from pipelines.serialize_bert import BertTextsSerializationPipelineItem +from examples.args import const, common +from examples.args.const import DEFAULT_TEXT_FILEPATH if __name__ == '__main__': @@ -22,28 +20,28 @@ "required for inference and training.") # Provide arguments. - InputTextArg.add_argument(parser, default=None) - FromFilesArg.add_argument(parser, default=[DEFAULT_TEXT_FILEPATH]) - EntitiesParserArg.add_argument(parser, default="bert-ontonotes") - TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) - EntityFormatterTypesArg.add_argument(parser, default="hidden-bert-styled") - SynonymsCollectionArg.add_argument(parser, default=None) - BertTextBFormatTypeArg.add_argument(parser, default='nli_m') + common.InputTextArg.add_argument(parser, default=None) + common.FromFilesArg.add_argument(parser, default=[DEFAULT_TEXT_FILEPATH]) + common.EntitiesParserArg.add_argument(parser, default="bert-ontonotes") + common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) + common.EntityFormatterTypesArg.add_argument(parser, default="hidden-bert-styled") + common.SynonymsCollectionArg.add_argument(parser, default=None) + common.BertTextBFormatTypeArg.add_argument(parser, default='nli_m') # Parsing arguments. args = parser.parse_args() - text_from_arg = InputTextArg.read_argument(args) - texts_from_files = FromFilesArg.read_argument(args) + text_from_arg = common.InputTextArg.read_argument(args) + texts_from_files = common.FromFilesArg.read_argument(args) ppl = BasePipeline([ BertTextsSerializationPipelineItem( - terms_per_context=TermsPerContextArg.read_argument(args), - synonyms=SynonymsCollectionArg.read_argument(args), - entities_parser=EntitiesParserArg.read_argument(args), + terms_per_context=common.TermsPerContextArg.read_argument(args), + synonyms=common.SynonymsCollectionArg.read_argument(args), + entities_parser=common.EntitiesParserArg.read_argument(args), name_provider=ExperimentNameProvider(name="example-bert", suffix="serialize"), - entity_fmt=create_entity_formatter(EntityFormatterTypesArg.read_argument(args)), - text_b_type=BertTextBFormatTypeArg.read_argument(args), + entity_fmt=create_entity_formatter(common.EntityFormatterTypesArg.read_argument(args)), + text_b_type=common.BertTextBFormatTypeArg.read_argument(args), opin_annot=DefaultAnnotator(annot_algo=PairBasedAnnotationAlgorithm( dist_in_terms_bound=None, label_provider=ConstantLabelProvider(label_instance=NoLabel()))), diff --git a/run_serialize_nn.py b/run_serialize_nn.py index 155cf2c..7a060fd 100644 --- a/run_serialize_nn.py +++ b/run_serialize_nn.py @@ -10,12 +10,11 @@ from arekit.common.pipeline.base import BasePipeline from arekit.contrib.experiment_rusentrel.entities.factory import create_entity_formatter -from network.args import const -from network.args.common import InputTextArg, EntitiesParserArg, RusVectoresEmbeddingFilepathArg, TermsPerContextArg, \ - StemmerArg, SynonymsCollectionArg, FramesColectionArg, FromFilesArg, EntityFormatterTypesArg -from network.args.const import DEFAULT_TEXT_FILEPATH -from pipelines.serialize_nn import NetworkTextsSerializationPipelineItem +from examples.args import const +from examples.args import common +from examples.args.const import DEFAULT_TEXT_FILEPATH +from arelight.pipelines.serialize_nn import NetworkTextsSerializationPipelineItem if __name__ == '__main__': @@ -23,36 +22,36 @@ "required for inference and training.") # Provide arguments. - InputTextArg.add_argument(parser, default=None) - FromFilesArg.add_argument(parser, default=[DEFAULT_TEXT_FILEPATH]) - EntitiesParserArg.add_argument(parser, default="bert-ontonotes") - RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH) - TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) - EntityFormatterTypesArg.add_argument(parser, default="hidden-simple-eng") - StemmerArg.add_argument(parser, default="mystem") - SynonymsCollectionArg.add_argument(parser, default=None) - FramesColectionArg.add_argument(parser) + common.InputTextArg.add_argument(parser, default=None) + common.FromFilesArg.add_argument(parser, default=[DEFAULT_TEXT_FILEPATH]) + common.EntitiesParserArg.add_argument(parser, default="bert-ontonotes") + common.RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH) + common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT) + common.EntityFormatterTypesArg.add_argument(parser, default="hidden-simple-eng") + common.StemmerArg.add_argument(parser, default="mystem") + common.SynonymsCollectionArg.add_argument(parser, default=None) + common.FramesColectionArg.add_argument(parser) # Parsing arguments. args = parser.parse_args() ppl = BasePipeline([ NetworkTextsSerializationPipelineItem( - terms_per_context=TermsPerContextArg.read_argument(args), - synonyms=SynonymsCollectionArg.read_argument(args), - entities_parser=EntitiesParserArg.read_argument(args), - embedding_path=RusVectoresEmbeddingFilepathArg.read_argument(args), + terms_per_context=common.TermsPerContextArg.read_argument(args), + synonyms=common.SynonymsCollectionArg.read_argument(args), + entities_parser=common.EntitiesParserArg.read_argument(args), + embedding_path=common.RusVectoresEmbeddingFilepathArg.read_argument(args), name_provider=ExperimentNameProvider(name="example", suffix="serialize"), - entity_fmt=create_entity_formatter(EntityFormatterTypesArg.read_argument(args)), + entity_fmt=create_entity_formatter(common.EntityFormatterTypesArg.read_argument(args)), opin_annot=DefaultAnnotator(annot_algo=PairBasedAnnotationAlgorithm( dist_in_terms_bound=None, label_provider=ConstantLabelProvider(label_instance=NoLabel()))), - stemmer=StemmerArg.read_argument(args), - frames_collection=FramesColectionArg.read_argument(args), + stemmer=common.StemmerArg.read_argument(args), + frames_collection=common.FramesColectionArg.read_argument(args), data_folding=NoFolding(doc_ids_to_fold=[0], supported_data_types=[DataType.Test])) ]) - text_from_arg = InputTextArg.read_argument(args) - text_from_file = FromFilesArg.read_argument(args) + text_from_arg = common.InputTextArg.read_argument(args) + text_from_file = common.FromFilesArg.read_argument(args) ppl.run(text_from_arg if text_from_arg is not None else text_from_file) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..463f5d3 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from setuptools import ( + setup, + find_packages, +) + + +def get_requirements(filenames): + r_total = [] + for filename in filenames: + with open(filename) as f: + r_local = f.read().splitlines() + r_total.extend(r_local) + return r_total + +setup( + name='arelight', + version='0.22.0', + description='About Mass-media text processing application for your ' + 'Relation Extraction task, powered by AREkit.', + url='https://github.com/nicolay-r/ARElight', + author='Nicolay Rusnachenko', + author_email='???', + license='MIT License', + classifiers=[ + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.6', + 'Topic :: Scientific/Engineering :: Information Analysis', + 'Topic :: Text Processing :: Linguistic', + ], + keywords='natural language processing, relation extraction, sentiment analysis', + packages=find_packages(), + package_dir={'src': 'src'}, + install_requires=get_requirements(['dependencies.txt']), +) \ No newline at end of file diff --git a/test/test_bert_ontonotes_ner.py b/test/test_bert_ontonotes_ner.py index 14aadbd..8fde7fb 100644 --- a/test/test_bert_ontonotes_ner.py +++ b/test/test_bert_ontonotes_ner.py @@ -1,8 +1,7 @@ import unittest from arekit.processing.entities.obj_desc import NerObjectDescriptor - -from text.ner_ontonotes import BertOntonotesNER +from arelight.text.ner_ontonotes import BertOntonotesNER class BertOntonotesTest(unittest.TestCase): diff --git a/test/test_bert_ontonotes_ner_pipeline_item.py b/test/test_bert_ontonotes_ner_pipeline_item.py index 770c2ee..620c8bd 100644 --- a/test/test_bert_ontonotes_ner_pipeline_item.py +++ b/test/test_bert_ontonotes_ner_pipeline_item.py @@ -5,8 +5,7 @@ from arekit.common.news.sentence import BaseNewsSentence from arekit.common.text.parser import BaseTextParser from arekit.processing.text.pipeline_terms_splitter import TermsSplitterParser - -from text.pipeline_entities_bert_ontonotes import BertOntonotesNERPipelineItem +from arelight.text.pipeline_entities_bert_ontonotes import BertOntonotesNERPipelineItem class BertOntonotesPipelineItemTest(unittest.TestCase):