-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathCurtisKephartAssignment3_Sol.R
126 lines (98 loc) · 2.91 KB
/
CurtisKephartAssignment3_Sol.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#' Curtis Kephart
#' Winter 2016
#' CurtisKephartAssignment3Creator.R
#'
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 0
print("Curtis Kephart")
print(0142214)
print("[email protected]")
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 1
library(foreign)
df.ex <- read.dta(
"https://github.com/EconomiCurtis/econ294_2015/raw/master/data/org_example.dta"
) %>% tbl_df()
#loaded this way, it's a data frae
class(df.ex)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 2
require(dplyr)
df.ex.2 <- dplyr::filter(
df.ex,
year == 2013 & month == 12
)
print(nrow(df.ex.2))
df.ex.2 <- dplyr::filter(
df.ex, year == 2013 & (month == 7 | month == 8 | month == 9) #many other ways to do this
)
print(nrow(df.ex.2))
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 3
df.ex.3a <- df.ex %>%
dplyr::arrange(
year, month
)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 4
df.ex.4a <- df.ex %>%
select(year:age)
df.ex.4b <- df.ex %>%
select(year, month, starts_with("i"))
print(distinct(select(df.ex, state)) %>% as.data.frame())
# since I loaded df.ex as a tbl_df, print only printed the top 10 rows,
# convert to regular df with "%>% as.data.frame()"
unique(df.ex$state) #an alternative with fewer keystrokes
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 5
stndz <- function(x){
(x - mean(x, na.rm = T)) / sd(x, na.rm = T)
}
nrmlz <- function(x){
(x - min(x, na.rm = T)) / (max(x, na.rm = T) - min(x, na.rm = T))
}
df.ex.5a <- df.ex %>%
dplyr::mutate(
rw.stndz = stndz(rw),
rw.nrmlz = nrmlz(rw)
)
df.ex.5b <- df.ex %>%
dplyr::group_by(year, month) %>%
dplyr::mutate(
rw.stndz = stndz(rw),
rw.nrmlz = nrmlz(rw),
count = n()
)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 6
df.ex.6 <- df.ex %>%
dplyr::group_by(year, month, state) %>%
dplyr::summarise(
rw.min = min(rw, na.rm = T),
rw.1stq = quantile(rw, 0.25, na.rm = T),
rw.mean = mean(rw, na.rm = T),
rw.median = median(rw, na.rm = T),
rw.3rdq = quantile(rw, 0.75, na.rm = T),
rw.max = max(rw, na.rm = T),
count = n()
)
print(nrow(df.ex.6))
df.ex.6 %>%
arrange(desc(rw.mean)) %>%
head()
# is that right? -nope
# why not? -annoyingly, since the summarize is grouped by year/month/state,
# arrange only sorts by that group
# Basically, congrats if you caught that,
# and double congrats if you figured out a solution.
df.ex.6 %>%
ungroup() %>% #remove any grouping
arrange(desc(rw.mean)) %>%
head()
print("2013, 12 DC has highest mean rw")
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 7 bonus
str(df.ex$state)
df.ex.7a <- df.ex %>%
arrange(year, month, desc(state))
distinct(select(df.ex.7a))