-
Notifications
You must be signed in to change notification settings - Fork 6
/
2_prep.yml
115 lines (88 loc) · 4.07 KB
/
2_prep.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
target_default: 2_prep
include:
- 1_fetch.yml
packages:
- scipiper
- dplyr
- glmtools
- feather
- stringr
- readr
- yaml
- tidyr
sources:
- 2_prep/src/nml_write.R
- 2_prep/src/driver_utils.R
- 2_prep/src/yeti_job_utils.R
- 2_prep/src/file_utils.R
targets:
2_prep:
depends:
- 2_prep/out/nml_file_group.yml
- 2_prep/out/toha_glm3_nml_group.yml
- 2_prep/out/glm_pball_array.rds
- 2_prep/out/temperature_obs.feather
2_prep/out/toha_glm3_nml_group.yml:
command: write_glm3_nml_files(target_name, nml_list, site_ids = toha_model_ids,
base_nml = '2_prep/in/glm3_template.nml', nml_dir = I('2_prep/tmp'))
# these files should have been prefixed with pb0 to designate that they are uncalibrated defaults...
2_prep/out/nml_file_group.yml:
command: write_nml_files(target_name, nml_list, base_nml = '2_prep/in/nhdhr_105231881.nml')
bad_segfault_lake:
command: c(I('nhdhr_59746189'))
hyperscales_unrun_ids:
command: filter_unrun(nml_list = nml_list, ran_source_ids, jared_test_ids, target_expansion_ids, bad_segfault_lake)
# naming here should be pball instead of cal...geez a bunch of my decisions were shortsighted.
2_prep/out/nml_cal_file_group.yml:
command: write_result_nml_files(target_name, nml_objs = cal_result_nmls,
pb0_nml_list = nml_list,
sync_dir = I('2_prep/sync'), prefix = I('pball'))
2_prep/out/driver_file_group.yml:
command: sync_drivers(target_name,
nml_list)
2_prep/out/glm_pb0_array.rds:
command: build_pb0_job_list(target_name, nml_list, job_chunk = I(20),
temperature_file = '1_fetch/out/03_temperature_observations.csv')
2_prep/out/glm_pball_export.rds:
command: build_pball_job_list(target_name,
cal_nml_ind = '2_prep/out/nml_cal_file_group.yml', job_chunk = I(20))
# this stage is run after the pb0 lakes are run and pushed up to SB. Perhaps this needs a different remake_file
# to designate the break
# cal_lakes not used now, as we're using Jared's list. Could be swapped in later in pball
cal_lakes:
command: filter_cal_lakes('2_prep/in/pb0_config.json',
min_dates = I(50), min_depths = I(5), min_depth_density = I(0.5),
temperature_file = '1_fetch/out/03_temperature_observations.csv')
jared_source_ids:
command: read_feather_ids('2_prep/in/Jared_TrainLakes.feather')
jared_test_ids:
command: read_jared_result_ids('2_prep/in/GLM_metamodel_predicted_sources_glm_transfer_pball_test_lakes.csv')
2_prep/out/glm_pball_array.rds:
command: build_pball_job_df(target_name, nml_list, sim_ids = cal_lakes) # jared_source_ids)
# NEED TO REMOVE rmse = 99 results, since they never had successful runs...
# am seeing Error in surface_height[j] <- elev[NS[j], j] :
# replacement has length zero
# Calls: tryCatch ... export_temp -> <Anonymous> -> get_var -> get_surface_height
cal_result_nmls:
command: filter_finished_cals(result_dir = I('3_run/sync'), pattern = I('^pball_nhdhr_.*.nml$'),
cal_lakes = cal_lakes,
dummy = I('2020-02-08'))
2_prep/out/glm_transfer_array.rds:
command: build_transfer_job_list(target_name, cal_nml_obj = cal_result_nmls,
base_nml_list = nml_list, sim_ids = jared_source_ids)
ran_source_ids:
command: extract_source_ids('2_prep/out/glm_transfer_array.rds')
target_expansion_ids:
command: extract_expansion_ids('../pgmtl-data-release/tmp/filtered_observations.rds',
target_ids = jared_test_ids,
source_ids = ran_source_ids)
2_prep/out/glm_transfer_test_array.rds:
command: build_transfer_test_job_list(target_name, cal_nml_obj = cal_result_nmls,
base_nml_list = nml_list, source_ids = ran_source_ids, target_ids = jared_test_ids)
2_prep/out/glm_pball_export_fails_array.rds:
command: failed_array_jobs(target_name,
'2_prep/out/glm_pball_export.rds', job_chunk = I(1), nml_list, dummy = I('2020-01-28'))
# for some reason we need to re-run a few stragglers:
2_prep/out/glm_pb0_fails_array.rds:
command: failed_array_jobs(target_name,
'2_prep/out/glm_pb0_array.rds', job_chunk = I(1), nml_list, dummy = I('2020-01-12'))