-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathcheck_allen.R
executable file
·73 lines (61 loc) · 1.71 KB
/
check_allen.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
library(jaffelab)
## PDF --> Excel using Adobe Acrobat
## Excel --> TSV using Excel
## scan subsequent text file
x = scan("HBA_ISH_GeneList.txt", what = "character", sep = "\n")
## get rows for each table
ind = c(1, grep("Table", x), nrow(x))
names(ind) = gsub("\"", "", gsub(" ", "", ss(x[ind], "\\.", 1)))
names(ind)[c(1, length(ind))] = c("Table1", "Table5")
## get row indicies
indList = split(ind, names(ind))
first = sapply(indList, "[[", 1)
second = c(first[-1] - 1, length(x))
indList2 = mapply(function(x, y)
x:y, first, second)
## subset
tableList = lapply(indList2, function(ii)
x[ii])
## manually clean up
tableList$Table1 = tableList$Table1[-(1:3)]
tableList = lapply(tableList, function(x)
x[-(1:2)])
## read in
datList = lapply(tableList[-3], function(x) {
read.delim(text = x,
as.is = TRUE,
header = TRUE)
})
datList = lapply(datList, function(x) {
x[, colMeans(is.na(x)) < 1]
})
## fix table 3
tab3 = tableList$Table3
tab3 = tab3[!grepl("Gene", tab3)]
tab3 = tab3[!grepl("Character", tab3)]
tab3 = tab3[!grepl("Table 3", tab3)]
tab3 = tab3[!grepl("System", tab3)]
tab3 = tab3[!grepl("Symbol", tab3)]
dat3 = read.delim(text = tab3,
as.is = TRUE,
header = FALSE)
dat3 = dat3[, colMeans(is.na(dat3)) < 1]
colnames(dat3) = c(
"Gene.Symbol",
"EntrezID",
"Gene.Description",
"Characterized",
"System",
"Family" ,
"Marker.Type"
)
datList$Table3 = dat3
datList = datList[paste0("Table", 1:5)]
dir.create("tables")
for (i in seq(along = datList)) {
write.table(datList[[i]],
paste0("tables/allen_HBA_ISH_GeneList_Table", i, ".txt"),
row.names = FALSE)
}
zip("allen_HBA_ISH_GeneList_Tables.zip",
files = list.files("tables", full = TRUE))