forked from rdpeng/ExData_Plotting1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload.data.R
39 lines (27 loc) · 1.26 KB
/
load.data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
library(dplyr)
# Download data
load.data <- function () {
if(!file.exists("./data")) {
file.url <- "https://d396qusza40orc.cloudfront.net/exdata%2Fdata%2Fhousehold_power_consumption.zip"
dir.create("./data")
download.file(file.url, destfile="data/data.zip", method="curl")
unzip(zipfile = "./data/data.zip", exdir = "./data")
file.remove("./data/data.zip")
}
# Read in the data, it uses roughly 80 megabytes of RAM, my computer is
# able to sustain that. Then keep only dates 2007-02-01 and 2007-02-02.
# Current date format is : dd/mm/yyyy
metering.data <- read.csv('./data/household_power_consumption.txt',
sep = ";",
stringsAsFactors = FALSE)
metering.data <- metering.data %>% filter(Date == '1/2/2007' | Date == '2/2/2007')
# Convert fields to good type:
# - convert date field to date
# - measurements numeric fields as numeric
metering.data$Date <- as.Date(metering.data$Date, format = "%d/%m/%Y")
metering.data[,3:9] <- apply(metering.data[,3:9], 2, as.numeric)
# Create a date-time valid field
metering.data$DateTime <- strptime(paste(metering.data$Date, metering.data$Time), "%Y-%m-%d %H:%M:%S")
# Return our data frame
metering.data
}