forked from mgwalsh/TRM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMW_LREP_ensemble.R
195 lines (163 loc) · 7.4 KB
/
MW_LREP_ensemble.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# Ensemble predictions of site-level control yields (Yc) and
# treatment response ratio indices (SRI) in Malawi
# Malawi LREP response trial data (courtesy of LREP & Todd Benson)
# LREP data documentation at: https://www.dropbox.com/s/4qbxnz4mdl92pdv/Malawi%20area-specific%20fertilizer%20recs%20report.pdf?dl=0
# Data pre-processing with: https://github.com/mgwalsh/TRM/blob/master/MW_LREP_SI.R
# M. Walsh & J. Chen, December 2014
# Required packages
# install.packages(c("downloader","raster","rgdal","caret","MASS","randomForest","gbm","nnet","elasticnet")), dependencies=TRUE)
suppressPackageStartupMessages({
require(downloader)
require(raster)
require(rgdal)
require(caret)
require(MASS)
require(randomForest)
require(gbm)
require(nnet)
require(elasticnet)
})
# Data downloads ----------------------------------------------------------
# Create a "Data" folder in your current working directory
dir.create("LREP_data", showWarnings=F)
setwd("./LREP_data")
# Site index data download to "./LREP_Data"
download("https://www.dropbox.com/s/o9588q2wci8mtiv/MW_Site_Indices.csv?raw=1", "MW_Site_Indices.csv", mode="wb")
mwsite <- read.table(paste(dat_dir, "/MW_Site_Indices.csv", sep=""), header=T, sep=",")
# Malawi grids download to "./LREP_Data" (~7.6 Mb)
download("https://www.dropbox.com/s/54di5f37yp30bz4/MW_grids.zip?dl=0", "./LREP_Data/MW_grids.zip", mode="wb")
unzip("./LREP_Data/MW_grids.zip", exdir="./LREP_Data", overwrite=T)
glist <- list.files(path="./LREP_Data", pattern="tif", full.names=T)
mwgrid <- stack(glist)
# Data setup --------------------------------------------------------------
# Extract gridded variables at LREP locations
coordinates(mwsite) <- ~Easting+Northing
projection(mwsite) <- projection(mwgrid)
sitegrid <- extract(mwgrid, mwsite)
# Assemble dataframes
# Average control yield estimates (Yc, kg/ha)
Yc <- mwsite$Yc
ycdat <- cbind.data.frame(Yc, sitegrid)
ycdat <- na.omit(ycdat)
# Average site response index estimates (SRI, dimensionless)
SRI <- mwsite$SRI
sidat <- cbind.data.frame(SRI, sitegrid)
sidat <- na.omit(sidat)
# set train/test set randomization seed
seed <- 138
set.seed(seed)
# Split data into train and test sets ------------------------------------
# Control yield train/test split
ycIndex <- createDataPartition(ycdat$Yc, p = 0.7, list = FALSE, times = 1)
ycTrain <- ycdat[ ycIndex,]
ycTest <- ycdat[-ycIndex,]
# Site response index train/test split
siIndex <- createDataPartition(sidat$SRI, p = 0.7, list = FALSE, times = 1)
siTrain <- sidat[ siIndex,]
siTest <- sidat[-siIndex,]
# Stepwise main effects GLM's <MASS> --------------------------------------
# 5-fold CV
step <- trainControl(method = "cv", number = 5)
# Average control yields (Yc, kg/ha)
Yc.glm <- train(log(Yc) ~ ., data = ycTrain,
method = "glmStepAIC",
trControl = step)
ycglm.pred <- predict(mwgrid, Yc.glm) ## spatial predictions
# Site response indices (SRI, dimensionless)
SRI.glm <- train(SRI ~ ., data = siTrain,
method = "glmStepAIC",
trControl = step)
siglm.pred <- predict(mwgrid, SRI.glm) ## spatial predictions
# Random forests <randomForest> -------------------------------------------
# out-of-bag predictions
oob <- trainControl(method = "oob")
# Average control yields (Yc, kg/ha)
Yc.rf <- train(log(Yc) ~ ., data = ycTrain,
method = "rf",
trControl = oob)
ycrf.pred <- predict(mwgrid, Yc.rf) ## spatial predictions
# Site response indices (SRI, dimensionless)
SRI.rf <- train(SRI ~ ., data = siTrain,
method = "rf",
trControl = oob)
sirf.pred <- predict(mwgrid, SRI.rf) ## spatial predictions
# Gradient boosting <gbm> -------------------------------------------------
# CV for training gbm's
gbm <- trainControl(method = "repeatedcv", number = 5, repeats = 5)
# Average control yields (Yc, kg/ha)
Yc.gbm <- train(log(Yc) ~ ., data = ycTrain,
method = "gbm",
trControl = gbm)
ycgbm.pred <- predict(mwgrid, Yc.gbm) ## spatial predictions
# Site response indices (SRI, dimensionless)
SRI.gbm <- train(SRI ~ ., data = siTrain,
method = "gbm",
trControl = gbm)
sigbm.pred <- predict(mwgrid, SRI.gbm) ## spatial predictions
# Neural nets <nnet> ------------------------------------------------------
# CV for training nnet's
nn <- trainControl(method = "cv", number = 10)
# Average control yields (Yc, kg/ha)
Yc.nn <- train(log(Yc) ~ ., data = ycTrain,
method = "nnet",
linout = T,
trControl = nn)
ycnn.pred <- predict(mwgrid, Yc.nn) ## spatial predictions
# Site response indices (SRI, dimensionless)
SRI.nn <- train(SRI ~ ., data = siTrain,
method = "nnet",
linout = T,
trControl = nn)
sinn.pred <- predict(mwgrid, SRI.nn) ## spatial predictions
# Plot predictions --------------------------------------------------------
# Control yield (Yc) prediction plots
yc.preds <- stack(ycglm.pred, ycrf.pred, ycgbm.pred, ycnn.pred)
names(yc.preds) <- c("glmStepAIC","randomForest","gbm","nnet")
plot(yc.preds, axes = F)
# Site response index plots (SRI, dimensionless)
si.preds <- stack(siglm.pred, sirf.pred, sigbm.pred, sinn.pred)
names(si.preds) <- c("glmStepAIC","randomForest","gbm","nnet")
plot(si.preds, axes = F)
# Ensemble predictions <glm>, <rf>, <gbm>, <nnet> --------------------------
# Ensemble set-up
pred <- stack(ycglm.pred, ycrf.pred, ycgbm.pred, ycnn.pred,
siglm.pred, sirf.pred, sigbm.pred, sinn.pred)
names(pred) <- c("YCglm","YCrf","YCgbm","YCnn",
"SIglm","SIrf","SIgbm","SInn")
mwpred <- extract(pred, mwsite)
# Average control yields (Yc, kg/ha)
ycens <- cbind.data.frame(Yc, mwpred)
ycens <- na.omit(ycens)
ycensTest <- ycens[-ycIndex,] ## replicate previous test set
# Site response indices (SRI, dimensionless)
siens <- cbind.data.frame(SRI, mwpred)
siens <- na.omit(siens)
siensTest <- siens[-siIndex,] ## replicate previous test set
# Ridge regression ensemble weighting on the test set
# 10-fold CV
ens <- trainControl(method = "cv", number = 10)
# Average control yields (Yc, kg/ha)
YC.ens <- train(log(Yc) ~ YCglm + YCrf + YCgbm + YCnn, data = ycensTest,
method = "ridge",
trControl = ens)
yc.pred <- predict(YC.ens, ycensTest, type="raw")
yc.test <- cbind(ycensTest, yc.pred)
ycens.pred <- predict(pred, YC.ens, type="raw") ## spatial predictions
plot(exp(ycens.pred), axes = F)
# Site response indices (SRI, dimensionless)
SRI.ens <- train(SRI ~ SIglm + SIrf + SIgbm + SInn, data = siensTest,
method = "ridge",
trControl = ens)
si.pred <- predict(SRI.ens, siensTest, type="raw")
si.test <- cbind(siensTest, si.pred)
siens.pred <- predict(pred, SRI.ens, type="raw") ## spatial predictions
plot(siens.pred, axes = F)
# Write spatial predictions -----------------------------------------------
# Create a "LREP_Results" folder in current working directory
dir.create("LREP_Results", showWarnings=F)
# Export Gtif's to "./LREP_Results"
writeRaster(yc.preds, filename="./LREP_Results/LREP_ycpreds.tif", datatype="FLT4S", options="INTERLEAVE=BAND", overwrite=T)
writeRaster(si.preds, filename="./LREP_Results/LREP_sipreds.tif", datatype="FLT4S", options="INTERLEAVE=BAND", overwrite=T)
# Ensemble predictions
enspred <- stack(exp(ycens.pred), siens.pred)
writeRaster(enspred, filename="./LREP_Results/LREP_enspred.tif", datatype="FLT4S", options="INTERLEAVE=BAND", overwrite=T)