Skip to content

Commit

Permalink
fix two issues in new RdTest (#167)
Browse files Browse the repository at this point in the history
  • Loading branch information
cwhelan authored May 13, 2021
1 parent 599fa86 commit 2166cd2
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 20 deletions.
2 changes: 1 addition & 1 deletion input_values/dockers.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"sv_pipeline_base_docker" : "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline-base:rlc_posthoc_filtering_cnv_mcnv_compatability_9a8561",
"sv_pipeline_docker" : "us.gcr.io/broad-dsde-methods/cwhelan/sv-pipeline:cw_clean_vcf_part1_rm_allosome_greps_8348649",
"sv_pipeline_qc_docker" : "us.gcr.io/broad-dsde-methods/markw/sv-pipeline-qc:mw-xz-fixes-7cbffee",
"sv_pipeline_rdtest_docker" : "us.gcr.io/broad-dsde-methods/cwhelan/sv-pipeline-rdtest:cw_bins_not_multiples_of_binsize_ebd0719",
"sv_pipeline_rdtest_docker" : "us.gcr.io/broad-dsde-methods/cwhelan/sv-pipeline-rdtest:cw_rdtest_bin_fix_update_5f2c8f",
"wham_docker" : "us.gcr.io/broad-dsde-methods/wham:8645aa",
"igv_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/igv:mw-xz-fixes-2-b1be6a9",
"duphold_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9",
Expand Down
44 changes: 25 additions & 19 deletions src/RdTest/RdTest.R
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,11 @@ loadData <- function(chr, start, end, cnvID, sampleIDs,coveragefile,medianfile,b
if (any(gapLengths) > 0) {
gapStarts <- cov1$end[which(gapLengths > 0)]
gapEnds <- cov1$start[which(gapLengths > 0)+1]

zeroBinStarts <- unlist(lapply(1:length(gapStarts), function(i) { seq(gapStarts[i], gapEnds[i], by=BinSize) }))
zeroBinStarts <- unlist(lapply(1:length(gapStarts), function(i) { seq(gapStarts[i], gapEnds[i]-1, by=BinSize) }))
zeroBinEnds <- unlist(lapply(1:length(gapEnds),
function(i) {
c( if (gapStarts[i] + BinSize < gapEnds[i]) { seq(gapStarts[i] + BinSize, gapEnds[i], by=BinSize) }, gapEnds[i])
c( if (gapStarts[i] + BinSize < gapEnds[i]) { seq(gapStarts[i] + BinSize, gapEnds[i]-1, by=BinSize) }, gapEnds[i])
}))

column_start = matrix(zeroBinStarts, ncol = 1)
column_end = matrix(zeroBinEnds, ncol = 1)
ncov_col = ncol(cov1)
Expand All @@ -244,12 +242,13 @@ loadData <- function(chr, start, end, cnvID, sampleIDs,coveragefile,medianfile,b
}
#Round down the number of used bins events for smaller events (e.g at 100 bp bins can't have 10 bins if event is less than 1kb)
startAdjToInnerBinStart <- if (any(cov1$start >= start)) { cov1$start[which(cov1$start >= start)[1]] } else { min(cov1$start) }
endAjdToInnerBinEnd <- if (any(cov1$end <= end)) { cov1$end[max(which(cov1$end <= end))] } else { max(cov1$end) }
endAdjToInnerBinEnd <- if (any(cov1$end <= end)) { cov1$end[max(which(cov1$end <= end))] } else { max(cov1$end) }

if ((endAjdToInnerBinEnd - startAdjToInnerBinStart) < bins * BinSize)
numInternalBins <- sum(cov1$start >= startAdjToInnerBinStart & cov1$end <= endAdjToInnerBinEnd)
if (numInternalBins < bins)
{
bins = (endAjdToInnerBinEnd - startAdjToInnerBinStart) /
BinSize
bins = numInternalBins

if (bins <= 1)
{
# include all bins that overlap the start and end of the interval in Rstart and Rend
Expand All @@ -263,27 +262,34 @@ loadData <- function(chr, start, end, cnvID, sampleIDs,coveragefile,medianfile,b
if (!exists("compression"))
{
UnadjustedBins <-
(endAjdToInnerBinEnd - startAdjToInnerBinStart) /
(bins * BinSize)
numInternalBins / bins

# Number of bins that need to be removed
RemainderForRemoval <-
##Need to account for round error by trunc so add the decimal####
trunc(((
UnadjustedBins - trunc(UnadjustedBins)
) * BinSize * bins / 2) + 0.000000001)
RemainderFront <-
round_any(RemainderForRemoval, BinSize, floor)
RemainderBack <-
round_any(RemainderForRemoval, BinSize, ceiling)
) * bins / 2) + 0.000000001)
BinsToRemoveFromFront <-
round_any(RemainderForRemoval, 1, floor)
BinsToRemoveFromBack <-
round_any(RemainderForRemoval, 1, ceiling)

startBinIdx <- which(cov1$start == startAdjToInnerBinStart)
endBinIdx <- which(cov1$end == endAdjToInnerBinEnd)

newStartBinIdx <- startBinIdx + BinsToRemoveFromFront
newEndBinIdx <- endBinIdx - BinsToRemoveFromBack

Rstart <-
startAdjToInnerBinStart + RemainderFront
cov1$start[newStartBinIdx]
Rend <-
endAjdToInnerBinEnd - RemainderBack
compression <- (Rend - Rstart) / (BinSize * bins)
cov1$end[newEndBinIdx]
compression <- (newEndBinIdx - newStartBinIdx + 1) / (bins)
}
#Cut bins down to those required for compressed clean size based on Rstart and Rend##
cov1<-cov1[which(cov1[,3]>Rstart & cov1[,2]<Rend ),]


#Samples Filter
if ( !is.null(opt$Blacklist) ) {
samplesBlacklist <- readLines(opt$Blacklist)
Expand Down

0 comments on commit 2166cd2

Please sign in to comment.