Skip to content

Commit

Permalink
Refactoring file location. Added tar files extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Apr 13, 2022
1 parent 737e8b3 commit 1b7cedb
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 15 deletions.
35 changes: 26 additions & 9 deletions download.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,45 @@
from os.path import join
import os
import tarfile
from arekit.contrib.source import utils
from network.args.const import EMBEDDING_FILEPATH, SYNONYMS_FILEPATH, BERT_PRETRAINED_MODEL_PATH, \
BERT_FINETUNED_MODEL_PATH

from examples.args import const


def download_examples_data():
root_dir = utils.get_default_download_dir()

data = {
EMBEDDING_FILEPATH: "http://rusvectores.org/static/models/rusvectores2/news_mystem_skipgram_1000_20_2015.bin.gz",
SYNONYMS_FILEPATH: "https://raw.githubusercontent.com/nicolay-r/RuSentRel/v1.1/synonyms.txt",
const.EMBEDDING_FILEPATH: "http://rusvectores.org/static/models/rusvectores2/news_mystem_skipgram_1000_20_2015.bin.gz",
const.SYNONYMS_FILEPATH: "https://raw.githubusercontent.com/nicolay-r/RuSentRel/v1.1/synonyms.txt",
# NOTE: this is a pre-trained model and it is expected to be fine-tunned.
BERT_PRETRAINED_MODEL_PATH: "https://www.dropbox.com/s/cr6nejxjiqbyd5o/ra-20-srubert-large-neut-nli-pretrained-3l.tar.gz?dl=1",
const.BERT_PRETRAINED_MODEL_PATHDIR: "https://www.dropbox.com/s/cr6nejxjiqbyd5o/ra-20-srubert-large-neut-nli-pretrained-3l.tar.gz?dl=1",
# Fine-tuned on RuSentRel collection.
BERT_FINETUNED_MODEL_PATH: "https://www.dropbox.com/s/g73osmwyrqtr2at/ra-20-srubert-large-neut-nli-pretrained-3l-finetuned.tar.gz?dl=1"

const.BERT_FINETUNED_MODEL_PATHDIR: "https://www.dropbox.com/s/g73osmwyrqtr2at/ra-20-srubert-large-neut-nli-pretrained-3l-finetuned.tar.gz?dl=1"
}

untar = [
const.BERT_PRETRAINED_MODEL_PATHDIR,
const.BERT_FINETUNED_MODEL_PATHDIR
]

# Perform downloading ...
for local_name, url_link in data.items():
print("Downloading: {}".format(local_name))
utils.download(dest_file_path=join(root_dir, local_name),
utils.download(dest_file_path=os.path.join(root_dir, local_name),
source_url=url_link)

# Extracting tar files ...
for local_name in untar:
local_name += '.tar.gz'
print(local_name)
if not os.path.exists(local_name):
continue
if not tarfile.is_tarfile(local_name):
continue
with tarfile.open(local_name) as f:
target = os.path.dirname(local_name)
f.extractall(path=target)


if __name__ == '__main__':
download_examples_data()
12 changes: 6 additions & 6 deletions examples/args/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
# Default pretrained BERT.
NEURAL_NETWORKS_TARGET_DIR = DEFAULT_MODEL_DIR
BERT_DEFAULT_STATE_NAME = "ra-20-srubert-large-neut-nli-pretrained-3l"
BERT_PRETRAINED_MODEL_PATH = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_STATE_NAME)
BERT_CONFIG_PATH = join(BERT_PRETRAINED_MODEL_PATH, "bert_config.json")
BERT_CKPT_PATH = join(BERT_PRETRAINED_MODEL_PATH, "model.ckpt-30238")
BERT_VOCAB_PATH = join(BERT_PRETRAINED_MODEL_PATH, "vocab.txt")
BERT_PRETRAINED_MODEL_PATHDIR = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_STATE_NAME)
BERT_CONFIG_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "bert_config.json")
BERT_CKPT_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "model.ckpt-30238")
BERT_VOCAB_PATH = join(BERT_PRETRAINED_MODEL_PATHDIR, "vocab.txt")

# Default Fine-tuned BERT.
BERT_TARGET_DIR = DEFAULT_MODEL_DIR
BERT_DEFAULT_FINETUNED = BERT_DEFAULT_STATE_NAME + '-finetuned'
BERT_FINETUNED_MODEL_PATH = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_FINETUNED)
BERT_FINETUNED_CKPT_PATH = join(BERT_FINETUNED_MODEL_PATH, BERT_DEFAULT_STATE_NAME)
BERT_FINETUNED_MODEL_PATHDIR = join(NEURAL_NETWORKS_TARGET_DIR, BERT_DEFAULT_FINETUNED)
BERT_FINETUNED_CKPT_PATH = join(BERT_FINETUNED_MODEL_PATHDIR, BERT_DEFAULT_STATE_NAME)

# The common output directory.
OUTPUT_DIR = join(current_dir, "../../_output")
File renamed without changes.

0 comments on commit 1b7cedb

Please sign in to comment.