-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsync_past_data.py
43 lines (39 loc) · 1.55 KB
/
sync_past_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from huggingface_hub import snapshot_download
from pathlib import Path
import os
from datetime import datetime, timedelta
from dotenv import load_dotenv
load_dotenv()
# download all files to "past_data" local folder
def sync_data_recent(lookback_days=7):
# downloads all files from the last lookback_days days
def generate_allow_pattern(lookback_days):
current_date = datetime.now()
return [f"Metric_TSP_V2_{(current_date - timedelta(days=i)).strftime('%Y_%m_%d')}.tsv" for i in range(lookback_days)]
HF_REPO = os.getenv("HF_REPO")
print(HF_REPO)
# check that local_dir exists
parent = Path(__file__).resolve().parent
LOCAL_DIR = parent / 'past_data'
if not LOCAL_DIR.exists():
LOCAL_DIR.mkdir(parents=True, exist_ok=True)
print(f"Created folder: {LOCAL_DIR}")
else:
print(f"Folder already exists: {LOCAL_DIR}")
snapshot_download(repo_id=HF_REPO, repo_type="dataset", local_dir=LOCAL_DIR, allow_patterns=generate_allow_pattern(lookback_days))
def sync_all_data():
# downloads all files
HF_REPO = os.getenv("HF_REPO")
print(HF_REPO)
# check that local_dir exists
parent = Path(__file__).resolve().parent
LOCAL_DIR = parent / 'past_data'
if not LOCAL_DIR.exists():
LOCAL_DIR.mkdir(parents=True, exist_ok=True)
print(f"Created folder: {LOCAL_DIR}")
else:
print(f"Folder already exists: {LOCAL_DIR}")
snapshot_download(repo_id=HF_REPO, repo_type="dataset", local_dir=LOCAL_DIR)
if __name__=="__main__":
# sync_all_data()
sync_data_recent()