-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdata_analysis_rq2.Rmd
105 lines (85 loc) · 2.82 KB
/
data_analysis_rq2.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
---
title: "RQ2: How are the active versions distributed among the different releases of a library?"
output: html_notebook
---
```{r}
library(tidyverse)
library(latex2exp)
library(scales)
theme_set(theme_bw())
```
### Distribution of active, passive and dormant versions
```{r}
# df_patterns <- metrics %>%
# filter(is.na(isPassive) == F) %>%
# mutate(Library = paste(groupId, artifactId, sep = ":")) %>%
# mutate(PA = ifelse(isPassive == "true", "P", "A")) %>%
# group_by(Library) %>%
# arrange(releaseDate) %>%
# summarise(PAList = list(PA), dates = list(releaseDate))
# position of the active versions
PAList <- df_patterns$PAList
PosA <- c()
PosP <- c()
for (i in 1:length(PAList)) {
PALib <- PAList[i][[1]]
for (j in 1:length(PALib)) {
if (PALib[j] == "A") {
PosA <- c(PosA, j / length(PALib))
}
# else{#is passive
# PosP <- c(PosP, j / length(PALib))
# }
}
}
a <- character(0)
patterns_file <- patterns_file %>% filter(Patterns != "")
PAList <- patterns_file$newlistPass
PosA <- c()
PosP <- c()
for (i in 1:length(PAList)) {
print(i)
PALib <- PAList[i][[1]]
for (j in 1:length(PALib)) {
if (PALib[j] == "A") {
PosA <- c(PosA, j / length(PALib))
}
# else{#is passive
# PosP <- c(PosP, j / length(PALib))
# }
}
}
data.frame(PosA = PosA) %>%
ggplot(aes(PosA)) +
geom_histogram() +
geom_histogram(bins = 30, color="black", fill = "#CCCCCC") +
ylab("count") +
xlab("relative position of active versions")
ggsave(filename = "Figures/hist_pos_active_versions.pdf",
height = 2.5, width = 4, units = c("in"), device = "pdf")
data.frame(PosA = PosA) %>%
filter(PosA == 1)
write_csv(passive[1:387,], "passive_non_dormant_libs.csv")
```
### Patterns search
```{r}
patterns_file <- read_csv("Data/patterns.csv")
patterns_file <- patterns_file %>% select(library, listPass) %>% rename(Library = library)
patterns_file <- inner_join(patterns_file, libraries_betw5and200_versions, by = "Library")
patterns_file$newlistPass <- substring(patterns_file$listPass, 2, nchar(patterns_file$listPass)-1)
patterns_file$newlistPass <- gsub("true", "P", patterns_file$newlistPass)
patterns_file$newlistPass <- gsub("false", "A", patterns_file$newlistPass)
patterns_file$newlistPass <- strsplit(patterns_file$newlistPass, ",")
# get the patterns
patterns <- c()
for (i in 1:length(patterns_file$newlistPass)) {
patterns <- c(patterns, paste(rle(patterns_file$newlistPass[i][[1]])$values, collapse = ""))
}
patterns_file$Patterns <- patterns
patterns_file$Patterns <- as.factor(patterns_file$Patterns)
table(patterns_file$Patterns)
count(patterns_file, Patterns) %>% arrange(desc(n))
View(patterns_file %>% filter(Patterns == "APA") %>% select(Library, MeanPageranks, Patterns))
xxx <- read_csv("Data/patterns.csv")
View(xxx %>% filter(library == "com.google.guava:guava-jdk5"))
```