diff --git a/README.md b/README.md index e47ef04..ecfbd07 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ Infer sentiment attitudes from a mass-media document(s). Using the `BERT` fine-tuned model version: ```bash -python3.6 infer_bert.py --from-files ../data/texts-inosmi-rus/e1.txt \ +python3.6 -m arelight.run.infer.py --from-files ../data/texts-inosmi-rus/e1.txt \ --labels-count 3 \ --terms-per-context 50 \ --tokens-per-context 128 \ @@ -49,7 +49,7 @@ python3.6 infer_bert.py --from-files ../data/texts-inosmi-rus/e1.txt \ ``` From `CSV` file (you need to have `text` column; sentence parser could be disabled): ``` -python3.6 infer_bert.py \ +python3.6 arelight.run.infer.py \ --from-dataframe ../data/examples.csv \ --entities-parser bert-ontonotes \ --terms-per-context 50 \ @@ -66,7 +66,7 @@ python3.6 infer_bert.py \ From list of files ```bash -python3.6 serialize_bert.py --from-files ../data/texts-inosmi-rus/e1.txt \ +python3.6 arelight.run.serialize.py --from-files ../data/texts-inosmi-rus/e1.txt \ --entities-parser bert-ontonotes \ --terms-per-context 50 \ --sentence-parser ru \ @@ -74,7 +74,7 @@ python3.6 serialize_bert.py --from-files ../data/texts-inosmi-rus/e1.txt \ ``` From `CSV` file (you need to have `text` column; sentence parser could be disabled): ``` -python3.6 serialize_bert.py \ +python3.6 arelight.run.serialize.py \ --from-dataframe ../data/examples.csv \ --entities-parser bert-ontonotes \ --terms-per-context 50 \ @@ -86,7 +86,7 @@ python3.6 serialize_bert.py \

-## Papers +## Reference * [Nicolay Rusnachenko: Language Models Application in Sentiment Attitude Extraction Task (2021) [RUS]](https://nicolay-r.github.io/website/data/rusnachenko2021language.pdf) diff --git a/examples/args/__init__.py b/arelight/run/__init__.py similarity index 100% rename from examples/args/__init__.py rename to arelight/run/__init__.py diff --git a/examples/entities/__init__.py b/arelight/run/args/__init__.py similarity index 100% rename from examples/entities/__init__.py rename to arelight/run/args/__init__.py diff --git a/examples/args/base.py b/arelight/run/args/base.py similarity index 100% rename from examples/args/base.py rename to arelight/run/args/base.py diff --git a/examples/args/common.py b/arelight/run/args/common.py similarity index 91% rename from examples/args/common.py rename to arelight/run/args/common.py index 2c379a3..0fd33e4 100644 --- a/examples/args/common.py +++ b/arelight/run/args/common.py @@ -1,19 +1,13 @@ import importlib -from arekit.contrib.source.rusentiframes.collection import RuSentiFramesCollection -from arekit.contrib.source.rusentiframes.labels_fmt import RuSentiFramesLabelsFormatter, \ - RuSentiFramesEffectLabelsFormatter from arekit.contrib.source.rusentiframes.types import RuSentiFramesVersionsService, RuSentiFramesVersions from arekit.contrib.utils.processing.lemmatization.mystem import MystemWrapper -from arelight.pipelines.demo.labels.base import NegativeLabel, PositiveLabel from arelight.pipelines.items.entities_default import TextEntitiesParser from arelight.pipelines.items.entities_ner_dp import DeepPavlovNERPipelineItem +from arelight.run.args.base import BaseArg from arelight.samplers.types import SampleFormattersService -from examples.args.base import BaseArg -from examples.entities.types import EntityFormattersService - class InputTextArg(BaseArg): @@ -31,28 +25,6 @@ def add_argument(parser, default): help='Input text for processing') -class FramesColectionArg(BaseArg): - - @staticmethod - def read_argument(args): - if args.frames == "ruattitudes-20": - return RuSentiFramesCollection.read_collection( - version=RuSentiFramesVersions.V20, - labels_fmt=RuSentiFramesLabelsFormatter( - pos_label_type=PositiveLabel, neg_label_type=NegativeLabel), - effect_labels_fmt=RuSentiFramesEffectLabelsFormatter( - pos_label_type=PositiveLabel, neg_label_type=NegativeLabel)) - - @staticmethod - def add_argument(parser, default="ruattitudes-20"): - parser.add_argument('--frames', - dest='frames', - type=str, - default=default, - nargs='?', - help='Collection for frames annotation in text (Default: {})'.format(default)) - - class PredictOutputFilepathArg(BaseArg): @staticmethod diff --git a/examples/args/const.py b/arelight/run/args/const.py similarity index 100% rename from examples/args/const.py rename to arelight/run/args/const.py diff --git a/examples/args/train.py b/arelight/run/args/train.py similarity index 97% rename from examples/args/train.py rename to arelight/run/args/train.py index b46865f..8d48539 100644 --- a/examples/args/train.py +++ b/arelight/run/args/train.py @@ -1,4 +1,4 @@ -from examples.args.base import BaseArg +from arelight.run.args.base import BaseArg class EpochsCountArg(BaseArg): diff --git a/arelight/run/entities/__init__.py b/arelight/run/entities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/entities/factory.py b/arelight/run/entities/factory.py similarity index 95% rename from examples/entities/factory.py rename to arelight/run/entities/factory.py index 394f0c5..d9d9614 100644 --- a/examples/entities/factory.py +++ b/arelight/run/entities/factory.py @@ -4,7 +4,7 @@ from arekit.contrib.utils.entities.formatters.str_simple_sharp_prefixed_fmt import SharpPrefixedEntitiesSimpleFormatter from arekit.contrib.utils.entities.formatters.str_simple_uppercase_fmt import SimpleUppercasedEntityFormatter -from examples.entities.types import EntityFormatterTypes +from arelight.run.entities.types import EntityFormatterTypes def create_entity_formatter(fmt_type, create_russian_pos_tagger_func=None): diff --git a/examples/entities/types.py b/arelight/run/entities/types.py similarity index 91% rename from examples/entities/types.py rename to arelight/run/entities/types.py index f6c87dc..3c3fe83 100644 --- a/examples/entities/types.py +++ b/arelight/run/entities/types.py @@ -1,6 +1,6 @@ from enum import Enum -from examples.utils import EnumConversionService +from arelight.run.utils import EnumConversionService class EntityFormatterTypes(Enum): diff --git a/examples/infer_bert.py b/arelight/run/infer.py similarity index 94% rename from examples/infer_bert.py rename to arelight/run/infer.py index abdf056..ab807fb 100644 --- a/examples/infer_bert.py +++ b/arelight/run/infer.py @@ -13,13 +13,10 @@ from arelight.pipelines.demo.infer_bert_rus import demo_infer_texts_bert_pipeline from arelight.pipelines.items.backend_brat_html import BratHtmlEmbeddingPipelineItem from arelight.pipelines.items.utils import input_to_docs - -from examples.args import common -from examples.args import train -from examples.args import const -from examples.entities.factory import create_entity_formatter -from examples.entities.types import EntityFormatterTypes -from examples.utils import create_labels_scaler, read_synonyms_collection +from arelight.run.args import common, const, train +from arelight.run.entities.factory import create_entity_formatter +from arelight.run.entities.types import EntityFormatterTypes +from arelight.run.utils import create_labels_scaler, read_synonyms_collection if __name__ == '__main__': diff --git a/examples/serialize_bert.py b/arelight/run/serialize.py similarity index 97% rename from examples/serialize_bert.py rename to arelight/run/serialize.py index c13c339..d600c26 100644 --- a/examples/serialize_bert.py +++ b/arelight/run/serialize.py @@ -22,13 +22,10 @@ from arelight.doc_ops import InMemoryDocOperations from arelight.pipelines.annot_nolabel import create_neutral_annotation_pipeline from arelight.pipelines.items.utils import input_to_docs +from arelight.run.args import common from arelight.samplers.bert import create_bert_sample_provider from arelight.samplers.types import BertSampleProviderTypes -from examples.args import const, common -from examples.entities.factory import create_entity_formatter -from examples.utils import read_synonyms_collection - if __name__ == '__main__': diff --git a/examples/train_bert.py b/arelight/run/train.py similarity index 97% rename from examples/train_bert.py rename to arelight/run/train.py index ba06c8b..73f9d9d 100644 --- a/examples/train_bert.py +++ b/arelight/run/train.py @@ -4,7 +4,7 @@ from arekit.common.pipeline.base import BasePipeline from arelight.pipelines.items.train_bert import BertFinetunePipelineItem -from examples.args import train, common, const +from arelight.run.args import common, train, const if __name__ == '__main__': diff --git a/examples/utils.py b/arelight/run/utils.py similarity index 100% rename from examples/utils.py rename to arelight/run/utils.py diff --git a/arelight/samplers/types.py b/arelight/samplers/types.py index c34c16f..9c94260 100644 --- a/arelight/samplers/types.py +++ b/arelight/samplers/types.py @@ -1,6 +1,6 @@ from enum import Enum -from examples.utils import EnumConversionService +from arelight.run.utils import EnumConversionService class BertSampleProviderTypes(Enum): diff --git a/download.py b/download.py index 310a2fb..4de5660 100644 --- a/download.py +++ b/download.py @@ -3,7 +3,7 @@ from arekit.common import utils -from examples.args import const +from arelight.run.args import const def download_examples_data(): diff --git a/setup.py b/setup.py index a80c8a2..1189807 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ def get_requirements(filenames): setup( name='arelight', - version='0.23.1', + version='0.23.2', description='About Mass-media text processing application for your ' 'Relation Extraction task, powered by AREkit.', url='https://github.com/nicolay-r/ARElight',