Skip to content

Commit

Permalink
Kvg multiple windows (#33)
Browse files Browse the repository at this point in the history
* Find superbubbles in graph
* When a superbubble has more than two paths, trim off the low quality ones
  • Loading branch information
kvg authored Oct 15, 2024
1 parent 43fa901 commit 9fd2116
Show file tree
Hide file tree
Showing 5 changed files with 342 additions and 168 deletions.
24 changes: 10 additions & 14 deletions src/hidive/src/correct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,29 @@ pub fn start(
gfa_output: Option<PathBuf>,
kmer_size: usize,
model_path: &PathBuf,
long_read_fasta_paths: &Vec<PathBuf>,
short_read_fasta_paths: &Vec<PathBuf>,
long_read_fasta_path: &PathBuf,
short_read_fasta_path: &PathBuf,
) {
let long_read_seq_urls = skydive::parse::parse_file_names(long_read_fasta_paths);
let short_read_seq_urls = skydive::parse::parse_file_names(short_read_fasta_paths);
let long_read_seq_urls = skydive::parse::parse_file_names(&[long_read_fasta_path.clone()]);
let short_read_seq_urls = skydive::parse::parse_file_names(&[short_read_fasta_path.clone()]);

// Read all long reads.
skydive::elog!("Processing long-read samples {:?}...", long_read_seq_urls.iter().map(|url| url.as_str()).collect::<Vec<&str>>());
let all_lr_seqs = skydive::utils::read_fasta(long_read_fasta_paths);
let all_lr_seqs = skydive::utils::read_fasta(&vec![long_read_fasta_path.clone()]);

// Read all short reads.
skydive::elog!("Processing short-read samples {:?}...", short_read_seq_urls.iter().map(|url| url.as_str()).collect::<Vec<&str>>());
let all_sr_seqs = skydive::utils::read_fasta(short_read_fasta_paths);
let all_sr_seqs = skydive::utils::read_fasta(&vec![short_read_fasta_path.clone()]);

let l1 = LdBG::from_sequences("lr".to_string(), kmer_size, &all_lr_seqs);
let s1 = LdBG::from_sequences("sr".to_string(), kmer_size, &all_sr_seqs);

let m = MLdBG::from_ldbgs(vec![l1, s1])
.score_kmers(model_path)
.collapse()
.clean_color_specific_paths(1, 0.2)
.clean_tangles(1, 100, 0.2)
.clean_branches(0.01)
.clean_tips(3*kmer_size, 0.01)
.clean_contigs(100)
.build_links(&all_lr_seqs, false);
.clean(0.2, 0.01)
.build_links(&all_lr_seqs, true)
;

skydive::elog!("Built MLdBG with {} k-mers.", m.kmers.len());

Expand All @@ -52,8 +49,7 @@ pub fn start(

let progress_bar = skydive::utils::default_bounded_progress_bar("Correcting reads", all_lr_seqs.len() as u64);

let corrected_seqs =
all_lr_seqs
let corrected_seqs = all_lr_seqs
.par_iter()
.progress_with(progress_bar)
.map(|seq| m.correct_seq(seq))
Expand Down
16 changes: 8 additions & 8 deletions src/hidive/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,13 @@ enum Commands {
#[clap(short, long, required = true, value_parser)]
model_path: PathBuf,

/// FASTA files with short-read sequences (may contain one or more samples).
#[clap(short, long, required = false, value_parser)]
short_read_fasta_paths: Vec<PathBuf>,

/// FASTA files with long-read sequences (may contain one or more samples).
#[clap(required = true, value_parser)]
long_read_fasta_paths: Vec<PathBuf>,
long_read_fasta_path: PathBuf,

/// FASTA files with short-read sequences (may contain one or more samples).
#[clap(required = true, value_parser)]
short_read_fasta_path: PathBuf,
},

/// Co-assemble target locus from long- and short-read data using a linked de Bruijn graph.
Expand Down Expand Up @@ -421,10 +421,10 @@ fn main() {
gfa_output,
kmer_size,
model_path,
long_read_fasta_paths,
short_read_fasta_paths,
long_read_fasta_path,
short_read_fasta_path,
} => {
correct::start(&output, gfa_output, kmer_size, &model_path, &long_read_fasta_paths, &short_read_fasta_paths);
correct::start(&output, gfa_output, kmer_size, &model_path, &long_read_fasta_path, &short_read_fasta_path);
}
Commands::Coassemble {
output,
Expand Down
1 change: 1 addition & 0 deletions src/skydive/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ openssl = { version = "0.10", features = ["vendored"] }
path-absolutize = "3.1.1"
parquet = "52.1.0"
petgraph = "0.6.5"
poasta = { git = "https://github.com/broadinstitute/poasta.git" }
#polars = { version = "*", features = ["parquet", "lazy", "csv-file", "strings", "temporal", "dtype-duration", "dtype-categorical", "concat_str", "list", "list_eval", "rank", "lazy_regex"]}
pyo3 = { version = "0.20.0", features = ["abi3-py37", "extension-module"] }
pyo3-asyncio = { version = "0.20.0", features = ["attributes", "async-std-runtime", "tokio-runtime"] }
Expand Down
Loading

0 comments on commit 9fd2116

Please sign in to comment.