forked from broadinstitute/imaging_metric_comparison
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdistance_from_origin.Rmd
96 lines (77 loc) · 3.38 KB
/
distance_from_origin.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
---
title: "test"
output: html_document
---
```{r setup, include=FALSE}
# all usefull libraries
library(magrittr)
library(dplyr)
library(ggplot2)
library(foreach)
library(doMC)
library(stringr)
library(tidyverse)
```
```{r import data with feature selection, message=FALSE, eval=FALSE}
filename <- "Pf_Gustafsdottir.rds"
profiles <- readRDS(file.path("..", "..", "input", "BBBC022_2013", "old", filename))
profiles <- profiles$data
profiles %<>% rename(Metadata_plate = Plate, Metadata_well = Well, Metadata_BROAD_sample = Image_Metadata_BROAD_ID, Metadata_SOURCE_COMPOUND_NAME = Image_Metadata_SOURCE_COMPOUND_NAME)
variables <-
names(profiles) %>% str_subset("^Cells_|^Cytoplasm_|^Nuclei_") # nfeat
metadata <-
names(profiles) %>% str_subset("^Metadata_") # metadata
# add column wheather DMSO or treatment
profiles %<>% mutate(Metadata_broad_sample_type = ifelse(Metadata_BROAD_sample == "", "control", "trt"))
metadata <-
names(profiles) %>% str_subset("^Metadata_") # metadata
```
```{r compound check}
zero_vector <- rep(0, length(variables))
zero_vector.2 <- profiles %>% select(one_of(variables)) %>% summarise_each(funs(mean)) %>% unlist
zero_vector.3 <- profiles %>% select(one_of(variables)) %>% summarise_each(funs(median)) %>% unlist
dist <- apply(profiles %>% select(one_of(variables)), 1, function(x) sqrt(sum((x-zero_vector)^2)))
dist.2 <- apply(profiles %>% select(one_of(variables)), 1, function(x) sqrt(sum((x-zero_vector.2)^2)))
dist.3 <- apply(profiles %>% select(one_of(variables)), 1, function(x) sqrt(sum((x-zero_vector.3)^2)))
dat <- data.frame(distance = dist, type = profiles$Metadata_broad_sample_type)
dat2 <- data.frame(distance = dist.2, type = profiles$Metadata_broad_sample_type)
dat3 <- data.frame(distance = dist.3, type = profiles$Metadata_broad_sample_type)
library(plyr)
cdat <- ddply(dat, "type", summarise, rating.mean=mean(distance))
cdat
# Overlaid histograms with means
ggplot(dat, aes(x=distance, fill=type, y=..density../sum(..density..))) +
geom_histogram(binwidth=5, alpha=.4, position="identity") +
xlim(0, 500) +
geom_vline(data=cdat, aes(xintercept=rating.mean, colour=type),
linetype="dashed", size=0.5) +
ggtitle("Euclidean distance to center") +
ylab("density")
library(plyr)
cdat2 <- ddply(dat2, "type", summarise, rating.mean=mean(distance))
cdat2
# Overlaid histograms with means
ggplot(dat2, aes(x=distance, fill=type, y=..density../sum(..density..))) +
geom_histogram(binwidth=5, alpha=.4, position="identity") +
xlim(0, 500) +
geom_vline(data=cdat2, aes(xintercept=rating.mean, colour=type),
linetype="dashed", size=0.5) +
ggtitle("Euclidean distance to center") +
ylab("density")
library(plyr)
cdat3 <- ddply(dat3, "type", summarise, rating.mean=mean(distance))
cdat3
# Overlaid histograms with means
ggplot(dat3, aes(x=distance, fill=type, y=..density../sum(..density..))) +
geom_histogram(binwidth=5, alpha=.4, position="identity") +
xlim(0, 500) +
geom_vline(data=cdat, aes(xintercept=rating.mean, colour=type),
linetype="dashed", size=0.5) +
ggtitle("Euclidean distance to center") +
ylab("density")
thres <- quantile(dat %>% filter(type == "control") %>% select(one_of(c("distance"))) %>% unlist, .95)
print(thres)
tmp <- dat %>% filter(type == "trt") %>% select(one_of(c("distance"))) %>% unlist
inds <- which(tmp > thres)
percentage_selected <- length(inds)/length(tmp) # arount 22%
```