Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: reduce object sizes up to 88% #687

Merged
merged 3 commits into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion R/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: Robyn
Type: Package
Title: Semi-Automated Marketing Mix Modeling (MMM) from Meta Marketing Science
Version: 3.10.2.9000
Version: 3.10.3.9000
Authors@R: c(
person("Gufeng", "Zhou", , "[email protected]", c("aut")),
person("Leonel", "Sentana", , "[email protected]", c("aut")),
Expand Down
133 changes: 21 additions & 112 deletions R/R/model.R
Original file line number Diff line number Diff line change
Expand Up @@ -569,74 +569,12 @@ robyn_mmm <- function(InputCollect,
#### Get hyperparameter sample
hypParamSam <- hypParamSamNG[i, ]
adstock <- check_adstock(adstock)
#### Transform media for model fitting
dt_modAdstocked <- select(dt_mod, -.data$ds)
mediaAdstocked <- list()
mediaImmediate <- list()
mediaCarryover <- list()
mediaVecCum <- list()
mediaSaturated <- list()
mediaSaturatedImmediate <- list()
mediaSaturatedCarryover <- list()

for (v in seq_along(all_media)) {
################################################
## 1. Adstocking (whole data)
# Decayed/adstocked response = Immediate response + Carryover response
m <- dt_modAdstocked[, all_media[v]][[1]]
if (adstock == "geometric") {
theta <- hypParamSam[paste0(all_media[v], "_thetas")][[1]][[1]]
}
if (grepl("weibull", adstock)) {
shape <- hypParamSam[paste0(all_media[v], "_shapes")][[1]][[1]]
scale <- hypParamSam[paste0(all_media[v], "_scales")][[1]][[1]]
}
x_list <- transform_adstock(m, adstock, theta = theta, shape = shape, scale = scale)
m_adstocked <- x_list$x_decayed
mediaAdstocked[[v]] <- m_adstocked
m_carryover <- m_adstocked - m
m[m_carryover < 0] <- m_adstocked[m_carryover < 0] # adapt for weibull_pdf with lags
m_carryover[m_carryover < 0] <- 0 # adapt for weibull_pdf with lags
mediaImmediate[[v]] <- m
mediaCarryover[[v]] <- m_carryover
mediaVecCum[[v]] <- x_list$thetaVecCum

################################################
## 2. Saturation (only window data)
# Saturated response = Immediate response + carryover response
m_adstockedRollWind <- m_adstocked[rollingWindowStartWhich:rollingWindowEndWhich]
m_carryoverRollWind <- m_carryover[rollingWindowStartWhich:rollingWindowEndWhich]

alpha <- hypParamSam[paste0(all_media[v], "_alphas")][[1]][[1]]
gamma <- hypParamSam[paste0(all_media[v], "_gammas")][[1]][[1]]
mediaSaturated[[v]] <- saturation_hill(
m_adstockedRollWind,
alpha = alpha, gamma = gamma
)
mediaSaturatedCarryover[[v]] <- saturation_hill(
m_adstockedRollWind,
alpha = alpha, gamma = gamma, x_marginal = m_carryoverRollWind
)
mediaSaturatedImmediate[[v]] <- mediaSaturated[[v]] - mediaSaturatedCarryover[[v]]
# plot(m_adstockedRollWind, mediaSaturated[[1]])
}

names(mediaAdstocked) <- names(mediaImmediate) <- names(mediaCarryover) <- names(mediaVecCum) <-
names(mediaSaturated) <- names(mediaSaturatedImmediate) <- names(mediaSaturatedCarryover) <-
all_media
dt_modAdstocked <- dt_modAdstocked %>%
select(-all_of(all_media)) %>%
bind_cols(mediaAdstocked)
dt_mediaImmediate <- bind_cols(mediaImmediate)
dt_mediaCarryover <- bind_cols(mediaCarryover)
mediaVecCum <- bind_cols(mediaVecCum)
dt_modSaturated <- dt_modAdstocked[rollingWindowStartWhich:rollingWindowEndWhich, ] %>%
select(-all_of(all_media)) %>%
bind_cols(mediaSaturated)
dt_saturatedImmediate <- bind_cols(mediaSaturatedImmediate)
dt_saturatedImmediate[is.na(dt_saturatedImmediate)] <- 0
dt_saturatedCarryover <- bind_cols(mediaSaturatedCarryover)
dt_saturatedCarryover[is.na(dt_saturatedCarryover)] <- 0
#### Transform media for model fitting
temp <- run_transformations(InputCollect, hypParamSam, adstock)
dt_modSaturated <- temp$dt_modSaturated
dt_saturatedImmediate <- temp$dt_saturatedImmediate
dt_saturatedCarryover <- temp$dt_saturatedCarryover

#####################################
#### Split train & test and prepare data for modelling
Expand Down Expand Up @@ -729,11 +667,10 @@ robyn_mmm <- function(InputCollect,
)
decompCollect <- model_decomp(
coefs = mod_out$coefs,
dt_modSaturated = dt_modSaturated,
y_pred = mod_out$y_pred,
dt_modSaturated = dt_modSaturated,
dt_saturatedImmediate = dt_saturatedImmediate,
dt_saturatedCarryover = dt_saturatedCarryover,
i = i,
dt_modRollWind = dt_modRollWind,
refreshAddedStart = refreshAddedStart
)
Expand Down Expand Up @@ -847,16 +784,6 @@ robyn_mmm <- function(InputCollect,
bind_cols(common[, (split_common + 1):total_common]) %>%
dplyr::mutate_all(unlist)

mediaDecompImmediate <- select(decompCollect$mediaDecompImmediate, -.data$ds, -.data$y)
colnames(mediaDecompImmediate) <- paste0(colnames(mediaDecompImmediate), "_MDI")
mediaDecompCarryover <- select(decompCollect$mediaDecompCarryover, -.data$ds, -.data$y)
colnames(mediaDecompCarryover) <- paste0(colnames(mediaDecompCarryover), "_MDC")
resultCollect[["xDecompVec"]] <- bind_cols(
decompCollect$xDecompVec,
mediaDecompImmediate,
mediaDecompCarryover
) %>% mutate(trial = trial, iterNG = lng, iterPar = i)

resultCollect[["xDecompAgg"]] <- decompCollect$xDecompAgg %>%
mutate(train_size = train_size) %>%
bind_cols(common)
Expand Down Expand Up @@ -952,11 +879,11 @@ robyn_mmm <- function(InputCollect,
})
))

resultCollect[["xDecompVec"]] <- as_tibble(bind_rows(
lapply(resultCollectNG, function(x) {
bind_rows(lapply(x, function(y) y$xDecompVec))
})
))
# resultCollect[["xDecompVec"]] <- as_tibble(bind_rows(
# lapply(resultCollectNG, function(x) {
# bind_rows(lapply(x, function(y) y$xDecompVec))
# })
# ))

resultCollect[["xDecompAgg"]] <- as_tibble(bind_rows(
lapply(resultCollectNG, function(x) {
Expand Down Expand Up @@ -994,8 +921,10 @@ robyn_mmm <- function(InputCollect,
))
}

model_decomp <- function(coefs, dt_modSaturated, y_pred, dt_saturatedImmediate,
dt_saturatedCarryover, i, dt_modRollWind, refreshAddedStart) {
model_decomp <- function(coefs, y_pred,
dt_modSaturated, dt_saturatedImmediate,
dt_saturatedCarryover, dt_modRollWind,
refreshAddedStart) {
## Input for decomp
y <- dt_modSaturated$dep_var
# x <- data.frame(x)
Expand All @@ -1013,8 +942,8 @@ model_decomp <- function(coefs, dt_modSaturated, y_pred, dt_saturatedImmediate,
xDecompOut <- cbind(data.frame(ds = dt_modRollWind$ds, y = y, y_pred = y_pred), xDecomp)

## Decomp immediate & carryover response
sel_coef <- rownames(coefs) %in% names(dt_saturatedImmediate)
coefs_media <- coefs[sel_coef, ]
sel_coef <- c(rownames(coefs), names(coefs)) %in% names(dt_saturatedImmediate)
coefs_media <- coefs[sel_coef]
names(coefs_media) <- rownames(coefs)[sel_coef]
mediaDecompImmediate <- data.frame(mapply(function(regressor, coeff) {
regressor * coeff
Expand All @@ -1023,26 +952,8 @@ model_decomp <- function(coefs, dt_modSaturated, y_pred, dt_saturatedImmediate,
regressor * coeff
}, regressor = dt_saturatedCarryover, coeff = coefs_media))

## QA decomp
check_split <- all(round(xDecomp[, names(coefs_media)], 2) ==
round(mediaDecompImmediate + mediaDecompCarryover, 2))
if (!check_split) {
message(paste0(
"Attention for loop ", i,
": immediate & carryover decomp don't sum up to total"
))
}
y_hat <- rowSums(xDecomp, na.rm = TRUE)
errorTerm <- y_hat - y_pred
# if (prod(round(y_pred) == round(y_hat)) == 0) {
# message(paste0(
# "Attention for loop ", i,
# ": manual decomp is not matching linear model prediction. ",
# "Deviation is ", round(mean(errorTerm / y) * 100, 2), "%"
# ))
# }

## Output decomp
y_hat <- rowSums(xDecomp, na.rm = TRUE)
y_hat.scaled <- rowSums(abs(xDecomp), na.rm = TRUE)
xDecompOutPerc.scaled <- abs(xDecomp) / y_hat.scaled
xDecompOut.scaled <- y_hat * xDecompOutPerc.scaled
Expand All @@ -1067,15 +978,14 @@ model_decomp <- function(coefs, dt_modSaturated, y_pred, dt_saturatedImmediate,
xDecompOutAggMeanNon0RF[is.nan(xDecompOutAggMeanNon0RF)] <- 0
xDecompOutAggMeanNon0PercRF <- xDecompOutAggMeanNon0RF / sum(xDecompOutAggMeanNon0RF)

coefsOutCat <- coefsOut <- data.frame(rn = rownames(coefs), coefs)
coefsOutCat <- coefsOut <- data.frame(rn = c(rownames(coefs), names(coefs)), coefs)
if (length(x_factor) > 0) {
coefsOut$rn <- sapply(x_factor, function(x) str_replace(coefsOut$rn, paste0(x, ".*"), x))
}
coefsOut <- coefsOut %>%
group_by(.data$rn) %>%
summarise(s0 = mean(.data$s0)) %>%
rename("coef" = "s0") %>%
.[match(rownames(coefsOut), .$rn), ]
rename("coef" = 2) %>%
summarise(coef = mean(.data$coef))

decompOutAgg <- as_tibble(cbind(coefsOut, data.frame(
xDecompAgg = xDecompOutAgg,
Expand Down Expand Up @@ -1158,7 +1068,6 @@ model_refit <- function(x_train, y_train, x_val, y_val, x_test, y_test,
nrmse_val <- nrmse_test <- y_val_pred <- y_test_pred <- NA
}


mod_out <- list(
rsq_train = rsq_train,
rsq_val = rsq_val,
Expand Down
9 changes: 0 additions & 9 deletions R/R/outputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,6 @@ robyn_outputs <- function(InputCollect, OutputModels,
pareto_fronts <- pareto_results$pareto_fronts
allSolutions <- pareto_results$pareto_solutions

# Reduce the size of xDecompVec with only pareto-front models and create solID
for (i in seq(OutputModels$trials)) {
OutputModels[names(OutputModels) %in% paste0("trial", i)][[1]]$resultCollect$xDecompVec <-
OutputModels[names(OutputModels) %in% paste0("trial", i)][[1]]$resultCollect$xDecompVec %>%
mutate(solID = paste(.data$trial, .data$iterNG, .data$iterPar, sep = "_")) %>%
filter(.data$solID %in% pareto_results$pareto_solutions) %>%
select(-c("iterNG", "iterPar"))
}

#####################################
#### Gather the results into output object

Expand Down
40 changes: 31 additions & 9 deletions R/R/pareto.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,14 @@ robyn_pareto <- function(InputCollect, OutputModels,
message(sprintf(">> Pareto-Front: %s [%s models]", pf, length(uniqueSol)))
}

# To recreate "xDecompVec", "xDecompVecImmediate", "xDecompVecCarryover" for each model
temp <- OutputModels[names(OutputModels) %in% paste0("trial", 1:OutputModels$trials)]
xDecompVecImmCarr <- bind_rows(lapply(temp, function(x) x$resultCollect$xDecompVec))
if (!"solID" %in% colnames(xDecompVecImmCarr)) {
xDecompVecImmCarr <- xDecompVecImmCarr %>%
mutate(solID = paste(.data$trial, .data$iterNG, .data$iterPar, sep = "_")) %>%
filter(.data$solID %in% uniqueSol)
}
# # To recreate "xDecompVec", "xDecompVecImmediate", "xDecompVecCarryover" for each model
# temp <- OutputModels[names(OutputModels) %in% paste0("trial", 1:OutputModels$trials)]
# xDecompVecImmCarr <- bind_rows(lapply(temp, function(x) x$resultCollect$xDecompVec))
# if (!"solID" %in% colnames(xDecompVecImmCarr)) {
# xDecompVecImmCarr <- xDecompVecImmCarr %>%
# mutate(solID = paste(.data$trial, .data$iterNG, .data$iterPar, sep = "_")) %>%
# filter(.data$solID %in% uniqueSol)
# }

# Calculations for pareto AND pareto plots
for (sid in uniqueSol) {
Expand Down Expand Up @@ -473,7 +473,29 @@ robyn_pareto <- function(InputCollect, OutputModels,
plot6data <- list(xDecompVecPlot = xDecompVecPlot)

## 7. Immediate vs carryover response
temp <- filter(xDecompVecImmCarr, .data$solID == sid)
# temp <- filter(xDecompVecImmCarr, .data$solID == sid)
hypParamSam <- resultHypParam[resultHypParam$solID == sid, ]
dt_saturated_dfs <- run_transformations(InputCollect, hypParamSam, adstock)
coefs <- xDecompAgg$coef[xDecompAgg$solID == sid]
names(coefs) <- xDecompAgg$rn[xDecompAgg$solID == sid]
decompCollect <- model_decomp(
coefs = coefs,
y_pred = dt_saturated_dfs$dt_modSaturated$dep_var, # IS THIS RIGHT?
gufengzhou marked this conversation as resolved.
Show resolved Hide resolved
dt_modSaturated = dt_saturated_dfs$dt_modSaturated,
dt_saturatedImmediate = dt_saturated_dfs$dt_saturatedImmediate,
dt_saturatedCarryover = dt_saturated_dfs$dt_saturatedCarryover,
dt_modRollWind = dt_modRollWind,
refreshAddedStart = InputCollect$refreshAddedStart
)
mediaDecompImmediate <- select(decompCollect$mediaDecompImmediate, -.data$ds, -.data$y)
colnames(mediaDecompImmediate) <- paste0(colnames(mediaDecompImmediate), "_MDI")
mediaDecompCarryover <- select(decompCollect$mediaDecompCarryover, -.data$ds, -.data$y)
colnames(mediaDecompCarryover) <- paste0(colnames(mediaDecompCarryover), "_MDC")
temp <- bind_cols(
decompCollect$xDecompVec,
mediaDecompImmediate,
mediaDecompCarryover
) %>% mutate(solID = sid)
vec_collect <- list(
xDecompVec = select(temp, -dplyr::ends_with("_MDI"), -dplyr::ends_with("_MDC")),
xDecompVecImmediate = select(temp, -dplyr::ends_with("_MDC"), -all_of(InputCollect$all_media)),
Expand Down
81 changes: 81 additions & 0 deletions R/R/transformation.R
Original file line number Diff line number Diff line change
Expand Up @@ -350,3 +350,84 @@ plot_saturation <- function(plot = TRUE) {
return(p1 + p2)
}
}

#### Transform media for model fitting
run_transformations <- function(InputCollect, hypParamSam, adstock) {
all_media <- InputCollect$all_media
rollingWindowStartWhich <- InputCollect$rollingWindowStartWhich
rollingWindowEndWhich <- InputCollect$rollingWindowEndWhich
dt_modAdstocked <- select(InputCollect$dt_mod, -.data$ds)

mediaAdstocked <- list()
mediaImmediate <- list()
mediaCarryover <- list()
mediaVecCum <- list()
mediaSaturated <- list()
mediaSaturatedImmediate <- list()
mediaSaturatedCarryover <- list()

for (v in seq_along(all_media)) {
################################################
## 1. Adstocking (whole data)
# Decayed/adstocked response = Immediate response + Carryover response
m <- dt_modAdstocked[, all_media[v]][[1]]
if (adstock == "geometric") {
theta <- hypParamSam[paste0(all_media[v], "_thetas")][[1]][[1]]
}
if (grepl("weibull", adstock)) {
shape <- hypParamSam[paste0(all_media[v], "_shapes")][[1]][[1]]
scale <- hypParamSam[paste0(all_media[v], "_scales")][[1]][[1]]
}
x_list <- transform_adstock(m, adstock, theta = theta, shape = shape, scale = scale)
m_adstocked <- x_list$x_decayed
mediaAdstocked[[v]] <- m_adstocked
m_carryover <- m_adstocked - m
m[m_carryover < 0] <- m_adstocked[m_carryover < 0] # adapt for weibull_pdf with lags
m_carryover[m_carryover < 0] <- 0 # adapt for weibull_pdf with lags
mediaImmediate[[v]] <- m
mediaCarryover[[v]] <- m_carryover
mediaVecCum[[v]] <- x_list$thetaVecCum

################################################
## 2. Saturation (only window data)
# Saturated response = Immediate response + carryover response
m_adstockedRollWind <- m_adstocked[rollingWindowStartWhich:rollingWindowEndWhich]
m_carryoverRollWind <- m_carryover[rollingWindowStartWhich:rollingWindowEndWhich]


alpha <- hypParamSam[paste0(all_media[v], "_alphas")][[1]][[1]]
gamma <- hypParamSam[paste0(all_media[v], "_gammas")][[1]][[1]]
mediaSaturated[[v]] <- saturation_hill(
m_adstockedRollWind,
alpha = alpha, gamma = gamma
)
mediaSaturatedCarryover[[v]] <- saturation_hill(
m_adstockedRollWind,
alpha = alpha, gamma = gamma, x_marginal = m_carryoverRollWind
)
mediaSaturatedImmediate[[v]] <- mediaSaturated[[v]] - mediaSaturatedCarryover[[v]]
# plot(m_adstockedRollWind, mediaSaturated[[1]])
}

names(mediaAdstocked) <- names(mediaImmediate) <- names(mediaCarryover) <- names(mediaVecCum) <-
names(mediaSaturated) <- names(mediaSaturatedImmediate) <- names(mediaSaturatedCarryover) <-
all_media
dt_modAdstocked <- dt_modAdstocked %>%
select(-all_of(all_media)) %>%
bind_cols(mediaAdstocked)
dt_mediaImmediate <- bind_cols(mediaImmediate)
dt_mediaCarryover <- bind_cols(mediaCarryover)
mediaVecCum <- bind_cols(mediaVecCum)
dt_modSaturated <- dt_modAdstocked[rollingWindowStartWhich:rollingWindowEndWhich, ] %>%
select(-all_of(all_media)) %>%
bind_cols(mediaSaturated)
dt_saturatedImmediate <- bind_cols(mediaSaturatedImmediate)
dt_saturatedImmediate[is.na(dt_saturatedImmediate)] <- 0
dt_saturatedCarryover <- bind_cols(mediaSaturatedCarryover)
dt_saturatedCarryover[is.na(dt_saturatedCarryover)] <- 0
return(list(
dt_modSaturated = dt_modSaturated,
dt_saturatedImmediate = dt_saturatedImmediate,
dt_saturatedCarryover = dt_saturatedCarryover
))
}
2 changes: 1 addition & 1 deletion demo/demo.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# LICENSE file in the root directory of this source tree.

#############################################################################################
#################### Meta MMM Open Source: Robyn 3.10.1 #######################
#################### Meta MMM Open Source: Robyn 3.10.3 #######################
#################### Quick demo guide #######################
#############################################################################################

Expand Down