-
Notifications
You must be signed in to change notification settings - Fork 0
/
SankeyMarine.R
93 lines (70 loc) · 3.45 KB
/
SankeyMarine.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
library(dplyr)
library(networkD3)
library(rjson)
library(data.table)
library(jsonlite)
library(knitr)
knitr::opts_chunk$set(fig.width = 15, fig.height = 15, fig.show = "asis")
##########################################################################################
#' Title: "Sankey Diagram Marine Commodities"
#' Author: "Mausam Duggal"
#' Date: "Nov 7th, 2015"
#########################################################################################
#' set working directory
wd <- setwd("c:/personal/R")
#+ reduce margins for plotting
par(mar = c(1, 1, 1, 1), lwd = 0.1, cex = 0.2)
#' user can change this to make diagram vary by tonnage.
#' set to zero if all points to be included
user <- 100000
#' read rail summaries
#' replace empty values with Unknown
#' then take the Unknown values out to make the diagram better
#' then create pivot table with tonnes summed up
#' finally rename columns to make compatible with Sankey Diagram
marine <- read.csv("C:/Personal/R/MarineDataSummary.csv")
#+ Pivot table
marine.sum <- marine %>% group_by(Handling_Port, OD_Port_Country) %>%
summarise(tons = sum(tonnes))
#+ user input that varies visualization results
marine.sum <- subset(marine.sum, tons>user)
#+ rename columns
marine.sum <- setnames(marine.sum, old = c("Handling_Port", "OD_Port_Country", "tons"), new = c("target", "source", "value"))
#' get unique names from source and target columns to make vlookup
#' this vlookup will be used to replace the text values in rail.sum with
#' integers created from the Vlookup
#+ unique names from source and target column
u.source <- data.frame(unique(marine.sum$source))
names(u.source)[names(u.source)== "unique.marine.sum.source."] <- "unique"
u.target <- data.frame(unique(marine.sum$target))
names(u.target)[names(u.target)== "unique.marine.sum.target."] <- "unique"
#+ append unique values together
#+ and then get final set of unique values
#+ then populate a corressponding ID field starting from 1
#+ this is required by Sankey and also serves as a lookup
u.all <- rbind(u.source, u.target)
u.all <- data.frame(unique(u.all$unique))
names(u.all)[names(u.all)== "unique.u.all.unique."] <- "Areas"
u.all$id <- seq(1:nrow(u.all))
#' vlookup and transfer unique Ids to rail.sum
#' transfer ID values from u.all to rail.sum using
#' match function
#' then deduct one from each value in the source and
#' target column because indexing in Java starts from 0
#' finally need to make a dataframe and create a list
#' of two dataframes. First, rail.sum that shows flows
#' and second of the Area names
marine.sum[1:2] <- u.all[match(as.character(unlist(marine.sum[1:2])), # vlookup
as.character(u.all[[1]])), 2]
marine.sum$source = marine.sum$source-1 # subtract 1
marine.sum$target = marine.sum$target-1 # subtract 1
marine.sum1 <- data.frame(marine.sum) # make dataframe
marine.list <- list(marine.sum1, u.all) # create list
names(marine.list) <- c("links", "nodes") # name list
#' Write and Read JSON file for plotting
write(toJSON( marine.list),'jsmarine')
res <- jsonlite::fromJSON("jsmarine")
#+ Plot
sankeyNetwork(Links = res$links, Nodes = res$nodes, Source = "source",
Target = "target", Value = "value", NodeID = "Areas",
units = "Tonnes", fontSize = 12, nodeWidth = 50, nodePadding = 5)