diff --git a/download.py b/download.py index 287ea94..dcb1618 100644 --- a/download.py +++ b/download.py @@ -1,28 +1,45 @@ -from os.path import join +import os +import tarfile from arekit.contrib.source import utils -from network.args.const import EMBEDDING_FILEPATH, SYNONYMS_FILEPATH, BERT_PRETRAINED_MODEL_PATH, \ - BERT_FINETUNED_MODEL_PATH + +from examples.args import const def download_examples_data(): root_dir = utils.get_default_download_dir() data = { - EMBEDDING_FILEPATH: "http://rusvectores.org/static/models/rusvectores2/news_mystem_skipgram_1000_20_2015.bin.gz", - SYNONYMS_FILEPATH: "https://raw.githubusercontent.com/nicolay-r/RuSentRel/v1.1/synonyms.txt", + const.EMBEDDING_FILEPATH: "http://rusvectores.org/static/models/rusvectores2/news_mystem_skipgram_1000_20_2015.bin.gz", + const.SYNONYMS_FILEPATH: "https://raw.githubusercontent.com/nicolay-r/RuSentRel/v1.1/synonyms.txt", # NOTE: this is a pre-trained model and it is expected to be fine-tunned. - BERT_PRETRAINED_MODEL_PATH: "https://www.dropbox.com/s/cr6nejxjiqbyd5o/ra-20-srubert-large-neut-nli-pretrained-3l.tar.gz?dl=1", + const.BERT_PRETRAINED_MODEL_PATHDIR: "https://www.dropbox.com/s/cr6nejxjiqbyd5o/ra-20-srubert-large-neut-nli-pretrained-3l.tar.gz?dl=1", # Fine-tuned on RuSentRel collection. - BERT_FINETUNED_MODEL_PATH: "https://www.dropbox.com/s/g73osmwyrqtr2at/ra-20-srubert-large-neut-nli-pretrained-3l-finetuned.tar.gz?dl=1" - + const.BERT_FINETUNED_MODEL_PATHDIR: "https://www.dropbox.com/s/g73osmwyrqtr2at/ra-20-srubert-large-neut-nli-pretrained-3l-finetuned.tar.gz?dl=1" } + untar = [ + const.BERT_PRETRAINED_MODEL_PATHDIR, + const.BERT_FINETUNED_MODEL_PATHDIR + ] + # Perform downloading ... for local_name, url_link in data.items(): print("Downloading: {}".format(local_name)) - utils.download(dest_file_path=join(root_dir, local_name), + utils.download(dest_file_path=os.path.join(root_dir, local_name), source_url=url_link) + # Extracting tar files ... + for local_name in untar: + local_name += '.tar.gz' + print(local_name) + if not os.path.exists(local_name): + continue + if not tarfile.is_tarfile(local_name): + continue + with tarfile.open(local_name) as f: + target = os.path.dirname(local_name) + f.extractall(path=target) + if __name__ == '__main__': download_examples_data() diff --git a/examples/args/const.py b/examples/args/const.py index 702fa1f..aee792d 100644 --- a/examples/args/const.py +++ b/examples/args/const.py @@ -18,16 +18,16 @@ # Default pretrained BERT. NEURAL_NETWORKS_TARGET_DIR = DEFAULT_MODEL_DIR BERT_DEFAULT_STATE_NAME = "ra-20-srubert-large-neut-nli-pretrained-3l" -BERT_PRETRAINED_MODEL_PATH = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_STATE_NAME) -BERT_CONFIG_PATH = join(BERT_PRETRAINED_MODEL_PATH, "bert_config.json") -BERT_CKPT_PATH = join(BERT_PRETRAINED_MODEL_PATH, "model.ckpt-30238") -BERT_VOCAB_PATH = join(BERT_PRETRAINED_MODEL_PATH, "vocab.txt") +BERT_PRETRAINED_MODEL_PATHDIR = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_STATE_NAME) +BERT_CONFIG_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "bert_config.json") +BERT_CKPT_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "model.ckpt-30238") +BERT_VOCAB_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "vocab.txt") # Default Fine-tuned BERT. BERT_TARGET_DIR = DEFAULT_MODEL_DIR BERT_DEFAULT_FINETUNED = BERT_DEFAULT_STATE_NAME + '-finetuned' -BERT_FINETUNED_MODEL_PATH = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_FINETUNED) -BERT_FINETUNED_CKPT_PATH = join(BERT_FINETUNED_MODEL_PATH, BERT_DEFAULT_STATE_NAME) +BERT_FINETUNED_MODEL_PATHDIR = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_FINETUNED) +BERT_FINETUNED_CKPT_PATH = join(BERT_FINETUNED_MODEL_PATHDIR, BERT_DEFAULT_STATE_NAME) # The common output directory. OUTPUT_DIR = join(current_dir, "../../_output") diff --git a/utils.py b/examples/utils.py similarity index 100% rename from utils.py rename to examples/utils.py