Skip to content

Commit

Permalink
Refactoring. brat backend has been moved into lib. Removed models dir.
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Apr 14, 2022
1 parent 1b7cedb commit 7dd7fc4
Show file tree
Hide file tree
Showing 10 changed files with 20 additions and 17 deletions.
6 changes: 3 additions & 3 deletions brat_backend.py → arelight/brat_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,10 @@ def __to_data(self, samples, result, obj_color_types, rel_color_types, label_to_

return text, coll_data, doc_data

def to_html(self, obj_color_types, rel_color_types,
def to_html(self, obj_color_types, rel_color_types, template_filepath,
samples_data_filepath, result_data_filepath,
label_to_rel, docs_range=None, brat_url="http://localhost:8001/"):
assert(isinstance(template_filepath, str))
assert(isinstance(docs_range, tuple) or docs_range is None)
assert(isinstance(label_to_rel, dict))

Expand All @@ -357,8 +358,7 @@ def to_html(self, obj_color_types, rel_color_types,
docs_range=docs_range)

# Loading template file.
template_source = join(self.current_dir, "brat_template.html")
with open(template_source, "r") as templateFile:
with open(template_filepath, "r") as templateFile:
template = templateFile.read()

# Replace template placeholders.
Expand Down
3 changes: 2 additions & 1 deletion arelight/pipelines/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from arekit.common.experiment.data_type import DataType
from arekit.common.pipeline.items.base import BasePipelineItem

from brat_backend import BratBackend
from arelight.brat_backend import BratBackend
from arelight.exp.exp_io import InferIOUtils


Expand All @@ -26,6 +26,7 @@ def apply_core(self, input_data, pipeline_ctx):
samples_data_filepath=input_data.create_samples_writer_target(DataType.Test),
obj_color_types=self.__obj_color_types,
rel_color_types=self.__rel_color_types,
template_filepath=pipeline_ctx.provide_or_none("template_filepath"),
label_to_rel=self.__label_to_rel)

# Setup predicted result writer.
Expand Down
File renamed without changes.
14 changes: 5 additions & 9 deletions download.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,22 @@ def download_examples_data():
data = {
const.EMBEDDING_FILEPATH: "http://rusvectores.org/static/models/rusvectores2/news_mystem_skipgram_1000_20_2015.bin.gz",
const.SYNONYMS_FILEPATH: "https://raw.githubusercontent.com/nicolay-r/RuSentRel/v1.1/synonyms.txt",
# PCNN: pretrained model dir.
const.PCNN_DEFAULT_MODEL_TAR: "https://www.dropbox.com/s/ceqy69vj59te534/fx_ctx_pcnn.tar.gz?dl=1",
# NOTE: this is a pre-trained model and it is expected to be fine-tunned.
const.BERT_PRETRAINED_MODEL_PATHDIR: "https://www.dropbox.com/s/cr6nejxjiqbyd5o/ra-20-srubert-large-neut-nli-pretrained-3l.tar.gz?dl=1",
const.BERT_PRETRAINED_MODEL_TAR: "https://www.dropbox.com/s/cr6nejxjiqbyd5o/ra-20-srubert-large-neut-nli-pretrained-3l.tar.gz?dl=1",
# Fine-tuned on RuSentRel collection.
const.BERT_FINETUNED_MODEL_PATHDIR: "https://www.dropbox.com/s/g73osmwyrqtr2at/ra-20-srubert-large-neut-nli-pretrained-3l-finetuned.tar.gz?dl=1"
const.BERT_FINETUNED_MODEL_TAR: "https://www.dropbox.com/s/g73osmwyrqtr2at/ra-20-srubert-large-neut-nli-pretrained-3l-finetuned.tar.gz?dl=1"
}

untar = [
const.BERT_PRETRAINED_MODEL_PATHDIR,
const.BERT_FINETUNED_MODEL_PATHDIR
]

# Perform downloading ...
for local_name, url_link in data.items():
print("Downloading: {}".format(local_name))
utils.download(dest_file_path=os.path.join(root_dir, local_name),
source_url=url_link)

# Extracting tar files ...
for local_name in untar:
local_name += '.tar.gz'
for local_name in data.keys():
print(local_name)
if not os.path.exists(local_name):
continue
Expand Down
6 changes: 5 additions & 1 deletion examples/args/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@
SYNONYMS_FILEPATH = join(DATA_DIR, "synonyms.txt")

# Common model dir.
DEFAULT_MODEL_DIR = join(current_dir, "../../models/")
DEFAULT_MODEL_DIR = join(DATA_DIR, "models")

PCNN_DEFAULT_MODEL_TAR = join(DEFAULT_MODEL_DIR, "fx_ctx_pcnn.tar.gz")

# Default pretrained BERT.
NEURAL_NETWORKS_TARGET_DIR = DEFAULT_MODEL_DIR
BERT_DEFAULT_STATE_NAME = "ra-20-srubert-large-neut-nli-pretrained-3l"
BERT_PRETRAINED_MODEL_PATHDIR = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_STATE_NAME)
BERT_PRETRAINED_MODEL_TAR = BERT_PRETRAINED_MODEL_PATHDIR + '.tar.gz'
BERT_CONFIG_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "bert_config.json")
BERT_CKPT_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "model.ckpt-30238")
BERT_VOCAB_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "vocab.txt")
Expand All @@ -27,6 +30,7 @@
BERT_TARGET_DIR = DEFAULT_MODEL_DIR
BERT_DEFAULT_FINETUNED = BERT_DEFAULT_STATE_NAME + '-finetuned'
BERT_FINETUNED_MODEL_PATHDIR = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_FINETUNED)
BERT_FINETUNED_MODEL_TAR = BERT_FINETUNED_MODEL_PATHDIR + '.tar.gz'
BERT_FINETUNED_CKPT_PATH = join(BERT_FINETUNED_MODEL_PATHDIR, BERT_DEFAULT_STATE_NAME)

# The common output directory.
Expand Down
4 changes: 3 additions & 1 deletion examples/infer_texts_bert.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
from os.path import join

from arekit.common.experiment.annot.algo.pair_based import PairBasedAnnotationAlgorithm
from arekit.common.experiment.annot.default import DefaultAnnotator
Expand All @@ -20,7 +21,7 @@
from examples.args import train
from examples.args import const
from examples.args.train import DoLowercaseArg
from utils import create_labels_scaler
from examples.utils import create_labels_scaler

if __name__ == '__main__':

Expand Down Expand Up @@ -95,6 +96,7 @@
backend_template = common.PredictOutputFilepathArg.read_argument(args)

ppl.run(actual_content, {
"template_filepath": join(const.DATA_DIR, "brat_template.html"),
"predict_fp": "{}.npz".format(backend_template) if backend_template is not None else None,
"brat_vis_fp": "{}.html".format(backend_template) if backend_template is not None else None
})
2 changes: 2 additions & 0 deletions examples/infer_texts_nn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
from os.path import join

from arekit.common.experiment.annot.algo.pair_based import PairBasedAnnotationAlgorithm
from arekit.common.experiment.annot.default import DefaultAnnotator
Expand Down Expand Up @@ -123,6 +124,7 @@
backend_template = common.PredictOutputFilepathArg.read_argument(args)

ppl.run(actual_content, {
"template_filepath": join(const.DATA_DIR, "brat_template.html"),
"predict_fp": "{}.npz".format(backend_template) if backend_template is not None else None,
"brat_vis_fp": "{}.html".format(backend_template) if backend_template is not None else None
})
2 changes: 0 additions & 2 deletions models/fx_ctx_pcnn/checkpoint

This file was deleted.

Binary file removed models/fx_ctx_pcnn/fx_ctx_pcnn.data-00000-of-00001
Binary file not shown.
Binary file removed models/fx_ctx_pcnn/fx_ctx_pcnn.index
Binary file not shown.

0 comments on commit 7dd7fc4

Please sign in to comment.