Skip to content

Commit

Permalink
refactor: continue
Browse files Browse the repository at this point in the history
  • Loading branch information
lukasellinger committed Oct 16, 2024
1 parent 3b73590 commit b144bd8
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 17 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -248,4 +248,6 @@ fabric.properties
*.safetensors

dataset/jan
dataset/SHROOM_test-labeled
dataset/SHROOM_test-labeled

data/
12 changes: 6 additions & 6 deletions scripts/dataset_creation/create_filtered_fever.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ def create_def_dataset(file_in: str, file_out: str, person_prop=0.1, long=True)
return len(def_dataset), len(dataset_raw)


print(create_def_dataset(file_in='../dataset/fever/train.jsonl',
file_out='../dataset/def_train.jsonl',
print(create_def_dataset(file_in='../data/raw/fever/train.jsonl',
file_out='../data/processed/fever/def_train.jsonl',
person_prop=0.1))
print(create_def_dataset(file_in='../dataset/fever/dev.jsonl',
file_out='../dataset/def_dev.jsonl',
print(create_def_dataset(file_in='../data/raw/fever/dev.jsonl',
file_out='../data/processed/fever/def_dev.jsonl',
person_prop=0.1))
print(create_def_dataset(file_in='../dataset/fever/test.jsonl',
file_out='../dataset/def_test.jsonl',
print(create_def_dataset(file_in='../data/raw/fever/test.jsonl',
file_out='../data/processed/fever/def_test.jsonl',
person_prop=0.1))
4 changes: 2 additions & 2 deletions scripts/dataset_creation/create_shroom_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from config import HF_WRITE_TOKEN, PROJECT_DIR

data_files = {
'modelagnostic': str(PROJECT_DIR / "dataset/SHROOM_test-labeled/test.model-agnostic.json"),
'modelaware': str(PROJECT_DIR / "dataset/SHROOM_test-labeled/test.model-aware.json")
'modelagnostic': str(PROJECT_DIR / "data/raw/SHROOM_test-labeled/test.model-agnostic.json"),
'modelaware': str(PROJECT_DIR / "data/raw/SHROOM_test-labeled/test.model-aware.json")
}

dataset = load_dataset('json', data_files=data_files)
Expand Down
6 changes: 3 additions & 3 deletions scripts/dataset_creation/create_wiktionary_mini.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from general_utils.spacy_utils import is_single_word

data_files = {
#'train': str(PROJECT_DIR / 'dataset/jan/0_10_words/train.parquet'),
#'val': str(PROJECT_DIR / 'dataset/jan/0_10_words/val.parquet'),
'test': str(PROJECT_DIR / 'dataset/jan/0_10_words/test.parquet')
#'train': str(PROJECT_DIR / 'data/raw/jan/0_10_words/train.parquet'),
#'val': str(PROJECT_DIR / 'data/raw/jan/0_10_words/val.parquet'),
'test': str(PROJECT_DIR / 'data/raw/jan/0_10_words/test.parquet')
}

# Load the dataset
Expand Down
6 changes: 3 additions & 3 deletions scripts/sqlite_related/create_fever_dataset_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@
with FeverDocDB() as db:
db.write(CREATE_DEF_DATASET)

train_file_path = PROJECT_DIR.joinpath("dataset/def_train.jsonl")
dev_file_path = PROJECT_DIR.joinpath("dataset/def_dev.jsonl")
test_file_path = PROJECT_DIR.joinpath("dataset/def_test.jsonl")
train_file_path = PROJECT_DIR.joinpath("data/processed/fever/def_train.jsonl")
dev_file_path = PROJECT_DIR.joinpath("data/processed/fever/def_dev.jsonl")
test_file_path = PROJECT_DIR.joinpath("data/processed/fever/def_test.jsonl")
reader = JSONLineReader()

for set_type, path in zip(['train', 'dev', 'test'],
Expand Down
2 changes: 1 addition & 1 deletion scripts/sqlite_related/create_jan_set_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,5 @@ def main(table, json_table):

if __name__ == "__main__":
TABLE = 'german_dataset'
dataset_json_table = PROJECT_DIR.joinpath('dataset/jan_eval_results_table.json')
dataset_json_table = PROJECT_DIR.joinpath('data/raw/jan/jan_raw_german-claim_verification.json')
main(TABLE, dataset_json_table)
2 changes: 1 addition & 1 deletion scripts/sqlite_related/create_wikipages_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
with FeverDocDB() as db:
db.write(CREATE_DOCUMENTS)

wiki_pages_dir = PROJECT_DIR.joinpath('wiki-pages')
wiki_pages_dir = PROJECT_DIR.joinpath('data/wiki-pages')
wiki_pages = os.listdir(wiki_pages_dir)
reader = JSONLineReader()

Expand Down

0 comments on commit b144bd8

Please sign in to comment.