Skip to content

Commit

Permalink
#31 done
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Apr 13, 2022
1 parent fc51aed commit 6d2bba4
Show file tree
Hide file tree
Showing 46 changed files with 270 additions and 268 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion exp/exp.py → arelight/exp/exp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from arekit.common.experiment.api.base import BaseExperiment

from exp.opin_ops import CustomOpinionOperations
from arelight.exp.opin_ops import CustomOpinionOperations


class CustomExperiment(BaseExperiment):
Expand Down
3 changes: 2 additions & 1 deletion exp/exp_io.py → arelight/exp/exp_io.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from arekit.contrib.experiment_rusentrel.model_io.tf_networks import RuSentRelExperimentNetworkIOUtils
from network.args.const import OUTPUT_DIR

from examples.args.const import OUTPUT_DIR


class InferIOUtils(RuSentRelExperimentNetworkIOUtils):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion pipelines/backend.py → arelight/pipelines/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from arekit.common.pipeline.items.base import BasePipelineItem

from brat_backend import BratBackend
from exp.exp_io import InferIOUtils
from arelight.exp.exp_io import InferIOUtils


class BratBackendPipelineItem(BasePipelineItem):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from deeppavlov.models.bert import bert_classifier
from deeppavlov.models.preprocessors.bert_preprocessor import BertPreprocessor

from exp.exp_io import InferIOUtils
from arelight.exp.exp_io import InferIOUtils


class BertInferencePipelineItem(BasePipelineItem):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from arekit.contrib.networks.shapes import NetworkInputShapes
from arekit.processing.languages.ru.pos_service import PartOfSpeechTypesService

from exp.exp_io import InferIOUtils
from network.args.const import BAG_SIZE
from examples.args.const import BAG_SIZE
from arelight.exp.exp_io import InferIOUtils


class TensorflowNetworkInferencePipelineItem(BasePipelineItem):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from arekit.contrib.bert.samplers.types import BertSampleProviderTypes
from arekit.processing.text.pipeline_terms_splitter import TermsSplitterParser

from exp.doc_ops import CustomDocOperations
from exp.exp import CustomExperiment
from exp.exp_io import InferIOUtils
from network.bert.ctx import BertSerializationContext
from pipelines.utils import input_to_docs
from arelight.exp.doc_ops import CustomDocOperations
from arelight.exp.exp import CustomExperiment
from arelight.exp.exp_io import InferIOUtils
from arelight.network.bert.ctx import BertSerializationContext
from arelight.pipelines.utils import input_to_docs


class BertTextsSerializationPipelineItem(BasePipelineItem):
Expand Down
15 changes: 8 additions & 7 deletions pipelines/serialize_nn.py → arelight/pipelines/serialize_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
from arekit.processing.text.pipeline_frames_negation import FrameVariantsSentimentNegation
from arekit.processing.text.pipeline_terms_splitter import TermsSplitterParser
from arekit.processing.text.pipeline_tokenizer import DefaultTextTokenizer
from exp.doc_ops import CustomDocOperations
from exp.exp import CustomExperiment
from exp.exp_io import InferIOUtils
from network.nn.common import create_and_fill_variant_collection
from network.nn.ctx import NetworkSerializationContext
from network.nn.embedding import RusvectoresEmbedding
from pipelines.utils import input_to_docs

from arelight.exp.doc_ops import CustomDocOperations
from arelight.exp.exp import CustomExperiment
from arelight.exp.exp_io import InferIOUtils
from arelight.network.nn.common import create_and_fill_variant_collection
from arelight.network.nn.ctx import NetworkSerializationContext
from arelight.network.nn.embedding import RusvectoresEmbedding
from arelight.pipelines.utils import input_to_docs


class NetworkTextsSerializationPipelineItem(BasePipelineItem):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion text/ner_ontonotes.py → arelight/text/ner_ontonotes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import importlib

from text.ner_base import BaseNER
from src.text.ner_base import BaseNER


class BertOntonotesNER(BaseNER):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from arekit.common.text.partitioning.terms import TermsPartitioning
from arekit.processing.entities.obj_desc import NerObjectDescriptor

from text.ner_ontonotes import BertOntonotesNER
from src.text.ner_ontonotes import BertOntonotesNER


class BertOntonotesNERPipelineItem(SentenceObjectsParserPipelineItem):
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions dependencies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ gensim==3.2.0
deeppavlov==0.11.0
rusenttokenize
# DeepPavlov bert-dp dependencies:
git+https://github.com/deepmipt/bert.git@feat/multi_gpu
bert_dp @ git+https://github.com/deepmipt/bert.git@feat/multi_gpu
# Install arekit
git+https://github.com/nicolay-r/[email protected]
arekit @ git+https://github.com/nicolay-r/[email protected]
Empty file added examples/args/__init__.py
Empty file.
File renamed without changes.
4 changes: 1 addition & 3 deletions network/args/common.py → examples/args/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@
from arekit.contrib.source.rusentrel.utils import iter_synonym_groups
from arekit.processing.lemmatization.mystem import MystemWrapper

from network.args.base import BaseArg
from text.pipeline_entities_bert_ontonotes import BertOntonotesNERPipelineItem
from text.pipeline_entities_default import TextEntitiesParser
from examples.args.base import BaseArg


class InputTextArg(BaseArg):
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion network/args/train.py → examples/args/train.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from arekit.contrib.networks.enum_input_types import ModelInputType, ModelInputTypeService

from network.args.base import BaseArg
from examples.args.base import BaseArg


class BagsPerMinibatchArg(BaseArg):
Expand Down
2 changes: 1 addition & 1 deletion examples/rusentrel/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from arekit.contrib.experiment_rusentrel.exp_ds.utils import read_ruattitudes_in_memory
from arekit.contrib.source.rusentrel.io_utils import RuSentRelIOUtils

from network.nn.embedding import RusvectoresEmbedding
from arelight.network.nn.embedding import RusvectoresEmbedding


class Common:
Expand Down
2 changes: 1 addition & 1 deletion examples/rusentrel/configs/single.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from arekit.contrib.networks.context.configurations.self_att_bilstm import SelfAttentionBiLSTMConfig
from arekit.contrib.networks.tf_helpers.cell_types import CellTypes

from network.args.const import TERMS_PER_CONTEXT
from examples.args.const import TERMS_PER_CONTEXT


def ctx_self_att_bilstm_custom_config(config):
Expand Down
2 changes: 1 addition & 1 deletion examples/rusentrel/exp_io.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from arekit.contrib.experiment_rusentrel.model_io.tf_networks import RuSentRelExperimentNetworkIOUtils
from network.args.const import OUTPUT_DIR
from examples.args.const import OUTPUT_DIR


class CustomRuSentRelNetworkExperimentIO(RuSentRelExperimentNetworkIOUtils):
Expand Down
53 changes: 24 additions & 29 deletions examples/serialize_rusentrel_for_bert.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import argparse
import sys

sys.path.append('../')

from arekit.common.experiment.annot.algo.pair_based import PairBasedAnnotationAlgorithm
from arekit.common.experiment.annot.default import DefaultAnnotator
Expand All @@ -11,7 +8,6 @@
from arekit.common.labels.provider.constant import ConstantLabelProvider
from arekit.common.labels.str_fmt import StringLabelsFormatter
from arekit.contrib.bert.handlers.serializer import BertExperimentInputSerializerIterationHandler
from arekit.contrib.bert.samplers.types import BertSampleProviderTypes
from arekit.contrib.experiment_rusentrel.entities.factory import create_entity_formatter
from arekit.contrib.experiment_rusentrel.factory import create_experiment
from arekit.contrib.experiment_rusentrel.labels.types import ExperimentNeutralLabel, ExperimentPositiveLabel, \
Expand All @@ -20,16 +16,15 @@
from arekit.contrib.experiment_rusentrel.types import ExperimentTypes
from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions

from examples.args import const, common
from examples.args.const import DEFAULT_TEXT_FILEPATH
from examples.rusentrel.common import Common
from examples.rusentrel.exp_io import CustomRuSentRelNetworkExperimentIO
from network.args import const
from network.args.common import TermsPerContextArg, SynonymsCollectionArg, EntitiesParserArg, InputTextArg, \
FromFilesArg, RusVectoresEmbeddingFilepathArg, EntityFormatterTypesArg, UseBalancingArg, \
DistanceInTermsBetweenAttitudeEndsArg, StemmerArg, BertTextBFormatTypeArg
from network.args.const import DEFAULT_TEXT_FILEPATH
from network.bert.ctx import BertSerializationContext

from utils import create_labels_scaler

from arelight.network.bert.ctx import BertSerializationContext


class ExperimentBERTTextBThreeScaleLabelsFormatter(StringLabelsFormatter):

Expand All @@ -46,29 +41,29 @@ def __init__(self):
"required for inference and training.")

# Provide arguments.
InputTextArg.add_argument(parser, default=None)
FromFilesArg.add_argument(parser, default=[DEFAULT_TEXT_FILEPATH])
EntitiesParserArg.add_argument(parser, default="bert-ontonotes")
RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH)
TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT)
SynonymsCollectionArg.add_argument(parser, default=None)
UseBalancingArg.add_argument(parser, default=True)
DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None)
EntityFormatterTypesArg.add_argument(parser, default="hidden-bert-styled")
BertTextBFormatTypeArg.add_argument(parser, default='nli_m')
StemmerArg.add_argument(parser, default="mystem")
common.InputTextArg.add_argument(parser, default=None)
common.FromFilesArg.add_argument(parser, default=[DEFAULT_TEXT_FILEPATH])
common.EntitiesParserArg.add_argument(parser, default="bert-ontonotes")
common.RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH)
common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT)
common.SynonymsCollectionArg.add_argument(parser, default=None)
common.UseBalancingArg.add_argument(parser, default=True)
common.DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None)
common.EntityFormatterTypesArg.add_argument(parser, default="hidden-bert-styled")
common.BertTextBFormatTypeArg.add_argument(parser, default='nli_m')
common.StemmerArg.add_argument(parser, default="mystem")

# Parsing arguments.
args = parser.parse_args()

# Reading arguments.
text_from_arg = InputTextArg.read_argument(args)
texts_from_files = FromFilesArg.read_argument(args)
terms_per_context = TermsPerContextArg.read_argument(args)
use_balancing = UseBalancingArg.read_argument(args)
stemmer = StemmerArg.read_argument(args)
entity_fmt = EntityFormatterTypesArg.read_argument(args)
dist_in_terms_between_attitude_ends = DistanceInTermsBetweenAttitudeEndsArg.read_argument(args)
text_from_arg = common.InputTextArg.read_argument(args)
texts_from_files = common.FromFilesArg.read_argument(args)
terms_per_context = common.TermsPerContextArg.read_argument(args)
use_balancing = common.UseBalancingArg.read_argument(args)
stemmer = common.StemmerArg.read_argument(args)
entity_fmt = common.EntityFormatterTypesArg.read_argument(args)
dist_in_terms_between_attitude_ends = common.DistanceInTermsBetweenAttitudeEndsArg.read_argument(args)

# Predefined parameters.
labels_count = 3
Expand Down Expand Up @@ -120,7 +115,7 @@ def __init__(self):
opin_ops=experiment.OpinionOperations,
sample_labels_fmt=ExperimentBERTTextBThreeScaleLabelsFormatter(),
annot_labels_fmt=experiment.OpinionOperations.LabelsFormatter,
sample_provider_type=BertTextBFormatTypeArg.read_argument(args),
sample_provider_type=common.BertTextBFormatTypeArg.read_argument(args),
entity_formatter=experiment.ExperimentContext.StringEntityFormatter,
value_to_group_id_func=synonyms.get_synonym_group_index,
balance_train_samples=use_balancing)
Expand Down
43 changes: 19 additions & 24 deletions examples/serialize_rusentrel_for_nn.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import sys
import argparse

sys.path.append('../')
from arelight.network.nn.common import create_and_fill_variant_collection
from arelight.network.nn.ctx import NetworkSerializationContext

from examples.args import const, common
from utils import create_labels_scaler

from arekit.common.experiment.annot.algo.pair_based import PairBasedAnnotationAlgorithm
Expand All @@ -23,12 +24,6 @@
from arekit.processing.text.pipeline_frames_lemmatized import LemmasBasedFrameVariantsParser
from arekit.processing.text.pipeline_tokenizer import DefaultTextTokenizer

from network.args import const
from network.args.common import LabelsCountArg, RusVectoresEmbeddingFilepathArg, TermsPerContextArg, \
StemmerArg, UseBalancingArg, DistanceInTermsBetweenAttitudeEndsArg, FramesColectionArg, EntityFormatterTypesArg
from network.nn.common import create_and_fill_variant_collection
from network.nn.ctx import NetworkSerializationContext

from examples.rusentrel.common import Common
from examples.rusentrel.exp_io import CustomRuSentRelNetworkExperimentIO

Expand All @@ -37,27 +32,27 @@
parser = argparse.ArgumentParser(description="RuSentRel dataset serialization script")

# Provide arguments.
LabelsCountArg.add_argument(parser, default=3)
RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH)
TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT)
EntityFormatterTypesArg.add_argument(parser, default="hidden-simple-eng")
StemmerArg.add_argument(parser, default="mystem")
UseBalancingArg.add_argument(parser, default=True)
DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None)
FramesColectionArg.add_argument(parser)
common.LabelsCountArg.add_argument(parser, default=3)
common.RusVectoresEmbeddingFilepathArg.add_argument(parser, default=const.EMBEDDING_FILEPATH)
common.TermsPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT)
common.EntityFormatterTypesArg.add_argument(parser, default="hidden-simple-eng")
common.StemmerArg.add_argument(parser, default="mystem")
common.UseBalancingArg.add_argument(parser, default=True)
common.DistanceInTermsBetweenAttitudeEndsArg.add_argument(parser, default=None)
common.FramesColectionArg.add_argument(parser)

# Parsing arguments.
args = parser.parse_args()

# Reading arguments.
embedding_filepath = RusVectoresEmbeddingFilepathArg.read_argument(args)
labels_count = LabelsCountArg.read_argument(args)
terms_per_context = TermsPerContextArg.read_argument(args)
entity_fmt = EntityFormatterTypesArg.read_argument(args)
stemmer = StemmerArg.read_argument(args)
use_balancing = UseBalancingArg.read_argument(args)
dist_in_terms_between_attitude_ends = DistanceInTermsBetweenAttitudeEndsArg.read_argument(args)
frames_collection = FramesColectionArg.read_argument(args)
embedding_filepath = common.RusVectoresEmbeddingFilepathArg.read_argument(args)
labels_count = common.LabelsCountArg.read_argument(args)
terms_per_context = common.TermsPerContextArg.read_argument(args)
entity_fmt = common.EntityFormatterTypesArg.read_argument(args)
stemmer = common.StemmerArg.read_argument(args)
use_balancing = common.UseBalancingArg.read_argument(args)
dist_in_terms_between_attitude_ends = common.DistanceInTermsBetweenAttitudeEndsArg.read_argument(args)
frames_collection = common.FramesColectionArg.read_argument(args)
pos_tagger = POSMystemWrapper(MystemWrapper().MystemInstance)

# Default parameters
Expand Down
53 changes: 23 additions & 30 deletions examples/train_bert.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
import argparse
import sys
from os.path import join

sys.path.append('../')

from arekit.common.pipeline.base import BasePipeline
from network.args import const
from network.args.common import BertConfigFilepathArg, BertCheckpointFilepathArg, BertVocabFilepathArg, \
BertSaveFilepathArg, InputSamplesFilepath, TokensPerContextArg
from network.args.const import BERT_CONFIG_PATH, BERT_CKPT_PATH, BERT_VOCAB_PATH, OUTPUT_DIR, \
BERT_DEFAULT_STATE_NAME, BERT_TARGET_DIR
from network.args.train import EpochsCountArg, BatchSizeArg, LearningRateArg, DoLowercaseArg
from pipelines.train_bert import BertFinetunePipelineItem
from arelight.pipelines.train_bert import BertFinetunePipelineItem

from examples.args import train, common, const

if __name__ == '__main__':

Expand All @@ -20,31 +13,31 @@
"required for inference and training.")

# Provide arguments.
TokensPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT)
BertConfigFilepathArg.add_argument(parser, default=BERT_CONFIG_PATH)
BertCheckpointFilepathArg.add_argument(parser, default=BERT_CKPT_PATH)
BertVocabFilepathArg.add_argument(parser, default=BERT_VOCAB_PATH)
BertSaveFilepathArg.add_argument(parser, default=join(BERT_TARGET_DIR, BERT_DEFAULT_STATE_NAME))
InputSamplesFilepath.add_argument(parser, default=join(OUTPUT_DIR, join("rsr-v1_1-fx-nobalance-tpc50-bert_3l", "sample-train-0.tsv.gz")))
LearningRateArg.add_argument(parser, default=2e-5)
EpochsCountArg.add_argument(parser, default=4)
BatchSizeArg.add_argument(parser, default=6)
DoLowercaseArg.add_argument(parser, default=False)
common.TokensPerContextArg.add_argument(parser, default=const.TERMS_PER_CONTEXT)
common.BertConfigFilepathArg.add_argument(parser, default=const.BERT_CONFIG_PATH)
common.BertCheckpointFilepathArg.add_argument(parser, default=const.BERT_CKPT_PATH)
common.BertVocabFilepathArg.add_argument(parser, default=const.BERT_VOCAB_PATH)
common.BertSaveFilepathArg.add_argument(parser, default=join(const.BERT_TARGET_DIR, const.BERT_DEFAULT_STATE_NAME))
common.InputSamplesFilepath.add_argument(parser, default=join(const.OUTPUT_DIR, join("rsr-v1_1-fx-nobalance-tpc50-bert_3l", "sample-train-0.tsv.gz")))
train.LearningRateArg.add_argument(parser, default=2e-5)
train.EpochsCountArg.add_argument(parser, default=4)
train.BatchSizeArg.add_argument(parser, default=6)
train.DoLowercaseArg.add_argument(parser, default=False)

# Parsing arguments.
args = parser.parse_args()

# Compose pipeline item.
ppl = BasePipeline([
BertFinetunePipelineItem(bert_config_file=BertConfigFilepathArg.read_argument(args),
model_checkpoint_path=BertCheckpointFilepathArg.read_argument(args),
vocab_filepath=BertVocabFilepathArg.read_argument(args),
do_lowercase=DoLowercaseArg.read_argument(args),
max_seq_length=TokensPerContextArg.read_argument(args),
learning_rate=LearningRateArg.read_argument(args),
save_path=BertSaveFilepathArg.read_argument(args))
BertFinetunePipelineItem(bert_config_file=common.BertConfigFilepathArg.read_argument(args),
model_checkpoint_path=common.BertCheckpointFilepathArg.read_argument(args),
vocab_filepath=common.BertVocabFilepathArg.read_argument(args),
do_lowercase=train.DoLowercaseArg.read_argument(args),
max_seq_length=common.TokensPerContextArg.read_argument(args),
learning_rate=train.LearningRateArg.read_argument(args),
save_path=common.BertSaveFilepathArg.read_argument(args))
])

ppl.run(InputSamplesFilepath.read_argument(args),
params_dict={"epochs_count": EpochsCountArg.read_argument(args),
"batch_size": BatchSizeArg.read_argument(args)})
ppl.run(common.InputSamplesFilepath.read_argument(args),
params_dict={"epochs_count": train.EpochsCountArg.read_argument(args),
"batch_size": train.BatchSizeArg.read_argument(args)})
Loading

0 comments on commit 6d2bba4

Please sign in to comment.