-
Notifications
You must be signed in to change notification settings - Fork 0
/
Employment_Allocation.R
157 lines (112 loc) · 5.14 KB
/
Employment_Allocation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
library(sp)
library(rgdal)
library(rgeos)
library(dplyr)
library(maptools)
library(spatialEco)
library(progress)
#' Set working directory
wd <- setwd("c:/personal/r")
###############################################################################
gghm.pol <- "GGH_TAZ" # GGHM TAZ polygon
emp <- "EmpFile1"
qt <- "QuadTree"
#' read shapefiles
emp_shp <- readOGR(wd, emp)
emp_shp <- emp_shp[, "ID"]
#equiv <- read.csv("equiv.csv")
gghm_poly <- readOGR(wd, gghm.pol)
qt_shp <- readOGR(wd, qt)
#' Get employment values by SyntheticEmp corressponding to either a GGH TAZ
#' or DA by 2-digit NAICS and sort by DAUID
#' Also batch in the total employment at the geography
on_emp_naics <- read.csv("SyntheticEmp_DAGGH.csv", stringsAsFactors = FALSE,
col.names = c("DAUID", "naics2", "seqID")) %>% .[order(.$DAUID),]
# total employment
on_emp_tot <- read.csv("SyntheticTotEmp_DAGGH.csv", stringsAsFactors = FALSE) %>%
subset(., emp != 0)
###############################################################################
#' Join the total employment to the employment shapefile
###############################################################################
emp_shp@data = data.frame(emp_shp@data,
on_emp_tot[match(emp_shp@data$ID, on_emp_tot$taz),])
# get rid of NA's and zeros in the TAZ. These are basically geographies that do not have
# any employment in them
emp_shp <- emp_shp[!is.na(emp_shp@data$taz),]
###############################################################################
#' Select the first polygon and add randomly create the number of points that
#' correspond to the employment
###############################################################################
#' Order the shapefile and subset the first polygon
emp_shp1 <- emp_shp[order(emp_shp@data$ID),]
# select the first polygon
emp_poly_1 <- emp_shp1[emp_shp1@data$ID == emp_shp1@data$ID[1], ]
first <- emp_poly_1@data
#' create random points based on the number of jobs
#' in the first DA
# get the number of jobs in first DA
jobs_n <- emp_shp1@data$emp[1]
sp <- spsample(emp_poly_1, jobs_n, type = "random")
#' subset the records from the ON_Emp data frame
#' that corressponds to the first DA
emp_1 <- subset(on_emp_naics, DAUID == first$ID)
#' convert to spatial points data frame
SPDF = SpatialPointsDataFrame(sp, emp_1)
###############################################################################
#' Now do this for the reamining polygons
###############################################################################
# get unique DA ids
id <- emp_shp1@data %>% .[1]
M3.lst <- as.list(id$ID) # use list to enumerate over TAZs
#' create progress bar
pb <- winProgressBar(title="Example progress bar", label="0% done",
min=0, max=100, initial=0)
for (i in 2:length(M3.lst)){
# for loop that gets every polygon and creates random points in it based on
# the number of jobs
#' select a DA
emp_poly_i <- emp_shp1[emp_shp1@data$ID == M3.lst[i], ]
# get the number of jobs in each polygon
n_jobs_i <- emp_shp1@data$emp[i]
#' create random points
sp_i <- spsample(emp_poly_i, n_jobs_i, type = "random", iter = 6)
#' subset the records to dataframe
jobs_sub <- subset(on_emp_naics, DAUID == M3.lst[i])
#' convert to spatial points data frame
SPDF.i = SpatialPointsDataFrame(sp_i, jobs_sub)
#+ save results
SPDF <- spRbind(SPDF, SPDF.i)
#' update progress bar
Sys.sleep(0.1) # slow down the code for illustration purposes
info <- sprintf("%d%% done", round((i/length(M3.lst))*100))
setWinProgressBar(pb, i/length(M3.lst)*100, label=info)
}
#' make copy
SPDF_on <- SPDF
#' write shapefile
writeOGR(SPDF, layer = paste0("EmploymentOntario_Firm"), wd,
driver="ESRI Shapefile", overwrite_layer=T )
###############################################################################
#' Do a point in polygon and transfer the QuadID to the points
###############################################################################
SPDF_qt <- point.in.poly(SPDF, qt_shp)
ss <- SPDF_qt@data %>% .[-c(1,5)]
colnames(ss) <- c("Naics2", "EmpID", "ProvID")
###############################################################################
#' Join the Firm ID to the table above
###############################################################################
#' Firms are joined to the employee table
#' batch in Rick's Firm Synthesis file
firm <- read.csv("c:/personal/r/firms.csv", stringsAsFactors = FALSE) %>%
subset(., taz > 0 & n_employees > 0, select = c("firm_id", "n_employees", "taz")) %>%
.[order(.$taz),]
# create an empty dataframe
firm_expanded <- as.data.frame(matrix(NA_integer_,
nrow = sum(firm$n_employees), ncol = 3))
# expand the firms to number of employees
firm_expanded <- firm[rep(seq(nrow(firm)), firm$n_employees), 1:3]
# Now cbind the expanded firms to the employees table assigned to the QT
ss <- cbind(ss, firm_expanded) %>% .[-c(6,7)]
###############################################################################
#' write out the final file
write.csv(ss, "Jobs_QT.csv", row.names = FALSE)