-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexerciseG2.R
48 lines (32 loc) · 1.74 KB
/
exerciseG2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Exercise G2
# install a new package with babynames data
# Note: normally, do not include install.packages commands in your scripts
install.packages("babynames")
library(babynames)
library(tidyr) # installed as part of tidyverse (you can install it separately if needed)
# there's a dataframe called babynames in the library; look at the first few observations.
# note that a tibble is a dataframe with a few special behaviors -- you can treat it as a dataframe
data(babynames)
head(babynames)
# using tapply, get the total (sum) number of babies by year*
# * it's not quite the total because names with < 5 babies aren't included
# Get the total count of girls with each name: fill in the code below
# (replace ___ with the correct values)
girl_counts <- tapply(babynames$___[babynames$sex == ___],
babynames$___[babynames$sex == ___],
___)
# get a list of just the top 10 most frequent girl names; fill in the code
top_girls <- sort(___, ___)[1:10]
# the following code will then make a dataset with one row for each top girls name,
# and one columns for each year from 2000 onwards
# Run this code
top_girls_wide <- subset(babynames, subset = (sex == "F" &
name %in% names(top_girls) &
year >= 2000),
select=c(year, name, n))
top_girls_wide <- spread(top_girls_wide, year, n)
top_girls_wide
# use apply on top_girls_wide to get the total number of girls each year
# with one of the top 10 girls names since 1880
# Hint: if you get the error: Error in FUN(newX[, i], ...) : invalid 'type' (character) of argument
# take a look at the first column of top_girls_wide. How can you fix your code?