-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcdf_prep.R
227 lines (183 loc) · 5.61 KB
/
cdf_prep.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
utils::globalVariables(c("%>%", "mutate"))
#' @title prep_cdf_long
#'
#' @description
#' \code{prep_cdf_long} a wrapper around several cdf prep functions
#'
#' @param cdf_long a map assessmentresults.csv file. can be one term, or many terms
#' together in one file.
#'
#' @return a prepped cdf file
#'
#' @export
prep_cdf_long <- function(cdf_long) {
cdf_long <- cdf_long %>%
#names
lower_df_names() %>%
#fallwinterspring, academic_year
extract_academic_year()
cdf_long$measurementscale <- clean_measurementscale(cdf_long$measurementscale)
cdf_long$teststartdate <- munge_startdate(cdf_long$teststartdate)
cdf_long$growthmeasureyn <- as.logical(cdf_long$growthmeasureyn)
assert_that(check_cdf_long(cdf_long)$boolean)
return(cdf_long)
}
#' @title process_cdf_long
#'
#' @description
#' \code{process_cdf_long} the second step in cdf processing
#'
#' @param prepped_cdf output of prep_cdf_long
#'
#' @return a processed cdf file
#'
#' @export
process_cdf_long <- function(prepped_cdf) {
prepped_cdf %>%
dedupe_cdf(method="NWEA") %>%
grade_level_seasonify() %>%
grade_season_labelify() %>%
grade_season_factors() %>%
make_npr_consistent() %>%
mutate(
testquartile = kipp_quartile(consistent_percentile)
)
}
#' @title dedupe_cdf
#'
#' @description
#' \code{dedupe_cdf} makes sure that the cdf only contains one row student/subject/term
#'
#' @param prepped_cdf conforming prepped cdf file.
#' @param method can choose between c('NWEA', 'high RIT', 'most recent').
#' Default is NWEA method.
#'
#' @return a data frame with one row per kid
#'
#' @export
dedupe_cdf <- function(prepped_cdf, method="NWEA") {
#verify inputs
assert_that(
is.data.frame(prepped_cdf),
method %in% c("NWEA", "high RIT", "most recent"),
check_cdf_long(prepped_cdf)$boolean
)
#reminder: if you want the highest value for an element to rank 1,
#throw a negative sign in front of the variable
#if you want the lowest to rank 1, leave as is.
rank_methods <- list(
"NWEA" = "-growthmeasureyn, teststandarderror",
"high RIT" = "-testritscore",
"most recent" = "-teststartdate"
)
#pull the method off the list
use_method <- rank_methods[[method]]
do_call_rank_with_method <- paste0("do.call(rank, list(", use_method, "))")
#dedupe using dplyr mutate
dupe_tagged <- prepped_cdf %>%
group_by(studentid, measurementscale, map_year_academic, fallwinterspring) %>%
#using mutate_ because we want to hand our function to mutate as a string.
mutate_(
rn=do_call_rank_with_method
)
deduped <- dupe_tagged[dupe_tagged$rn==1, ]
return(deduped)
}
#' @title grade_level_seasonify
#'
#' @description
#' \code{grade_level_seasonify} turns grade level into a simplified continuous scale,
#' using consistent offsets for MAP 'seasons'
#'
#' @param cdf a cdf that has 'grade' and 'fallwinterspring' columns (eg product of )
#' \code{grade_levelify()}
#'
#' @return a data frame with a 'grade_level_season' column
grade_level_seasonify <- function(cdf) {
#inputs consistency check
cdf %>%
ensures_that(
c('grade', 'fallwinterspring') %in% names() ~ "'grade'
and 'fallwinterspring' must be in in your cdf to
grade_seasonify"
)
season_offsets <- data.frame(
season=c('Fall', 'Winter', 'Spring', 'Summer')
,offset=c(-0.8, -0.5, 0, 0.1)
)
#get the offset
munge <- left_join(
x = cdf,
y = season_offsets,
by = c('fallwinterspring' = 'season')
)
munge %>%
mutate(
grade_level_season = grade + offset
) %>%
select(-offset) %>%
as.data.frame()
}
#' @title grade_season_labelify
#'
#' @description
#' \code{grade_season_labelify} returns an abbreviated label ('5S') that is useful when
#' labelling charts
#'
#' @param x a cdf that has 'grade_level_season' (eg product of grade_level_seasonify)
#' \code{grade_levelify()}
#'
#' @return a data frame with a grade_season_labels
grade_season_labelify <- function(x) {
assert_that('grade_level_season' %in% names(x))
x$grade_season_label <- fall_spring_me(x$grade_level_season)
return(as.data.frame(x))
}
#' @title grade_season_factors
#'
#' @description helper function that 1) converts grade_season_label to factor and
#' 2) orders the labels based on grade_level_season
#'
#' @param x a cdf that has grade_level_season and grade_season_label
grade_season_factors <- function(x) {
x$grade_season_label <- factor(
x$grade_season_label,
levels = unique(x[order(x$grade_level_season),]$grade_season_label),
ordered = TRUE
)
return(x)
}
#' @title make_npr_consistent
#'
#' @description join a cdf to a norms study and get the empirical
#' percentiles. protects against longitudinal findings being
#' clouded by changes in the norms.
#'
#' @param cdf a mostly-processed cdf object (this is the last step)
#' in process_cdf
#' @param norm_study name of a norm study. default is 2011. look in norm_data.R
#' for documentation of available norm studies.
make_npr_consistent <- function(
cdf,
norm_study = 'student_status_norms_2011'
) {
#read norm df from text
norm_df <- eval(as.name(norm_study))
#make sure that cdf has required fields
ensure_fields(
c('measurementscale', 'fallwinterspring', 'grade', 'testritscore'),
cdf
)
names(norm_df)[names(norm_df)=='percentile'] <- 'consistent_percentile'
norm_df$percentile_source <- norm_study
dplyr::left_join(
x = cdf,
y = norm_df,
by = c(
"measurementscale" = "measurementscale",
"fallwinterspring" = "fallwinterspring",
"grade" = "grade",
"testritscore" = "RIT"
)
)
}