From 57e0c9f55e2f2cae530810adc8c5c52235c429f0 Mon Sep 17 00:00:00 2001 From: Kiran V Garimella Date: Tue, 23 Jan 2024 00:06:55 -0500 Subject: [PATCH] If necessary, set the CURL_CA_BUNDLE path to enable access to files in GCS --- Cargo.lock | 2 +- src/alignment.rs | 20 ++++++++++++++------ src/storage.rs | 13 +++++++++++-- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 60b838e..aeb123b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1122,7 +1122,7 @@ dependencies = [ [[package]] name = "genomeshader" -version = "0.1.22" +version = "0.1.23" dependencies = [ "anyhow", "chrono", diff --git a/src/alignment.rs b/src/alignment.rs index 4c02066..aed6829 100644 --- a/src/alignment.rs +++ b/src/alignment.rs @@ -12,7 +12,10 @@ use polars::prelude::*; use rust_htslib::bam::record::{Aux, Cigar}; use rust_htslib::bam::{Read, IndexedReader, self, ext::BamRecordExtensions}; -use crate::storage::{local_get_file_update_time, gcs_get_file_update_time}; +use crate::storage::{ + local_get_file_update_time, gcs_get_file_update_time, + local_guess_curl_ca_bundle, gcs_authorize_data_access +}; #[derive(Debug, PartialEq)] pub enum ElementType { @@ -42,7 +45,14 @@ fn extract_reads(cohort: &String, reads_path: &String, chr: String, start: u64, url::Url::from_file_path(reads_path).unwrap() }; - let mut bam = IndexedReader::from_url(&url).unwrap(); + let mut bam = match IndexedReader::from_url(&url) { + Ok(bam) => bam, + Err(_) => { + local_guess_curl_ca_bundle(); + IndexedReader::from_url(&url).unwrap() + } + }; + let header = bam::Header::from_template(bam.header()); let mut rg_sm_map = HashMap::new(); @@ -323,6 +333,8 @@ pub fn stage_data(cache_path: PathBuf, reads_paths: &HashSet<(String, String)>, let temp_dir = env::temp_dir(); env::set_current_dir(&temp_dir).unwrap(); + gcs_authorize_data_access(); + let loci_list: Vec<(String, u64, u64)> = loci.iter().cloned().collect(); (0..loci_list.len()) .into_par_iter() @@ -336,8 +348,6 @@ pub fn stage_data(cache_path: PathBuf, reads_paths: &HashSet<(String, String)>, (0..reads_paths_list.len()) .into_par_iter() // iterate over BAMs .for_each(|j| { //|(reads, cohort)| { - println!("{}", env::var("GCS_OAUTH_TOKEN").unwrap_or("GCS_OAUTH_TOKEN not set".to_string())); - let (reads, cohort) = &reads_paths_list[j]; let df = extract_reads(&cohort, reads, chr.to_string(), *start, *stop); dfs.lock().unwrap().push(df); @@ -448,8 +458,6 @@ mod tests { let mut loci = HashSet::new(); loci.insert((chr, start, stop)); - // gcs_authorize_data_access(); - let r = stage_data(cache_path, &bam_paths, &loci, false); } diff --git a/src/storage.rs b/src/storage.rs index 452aa0a..e150f6e 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -6,7 +6,7 @@ use chrono::{DateTime, Utc}; use std::path::PathBuf; use std::fs::metadata; -fn gcs_split_path(path: &String) -> (String, String) { +pub fn gcs_split_path(path: &String) -> (String, String) { let re = regex::Regex::new(r"^gs://").unwrap(); let path = re.replace(&path, ""); let split: Vec<&str> = path.split('/').collect(); @@ -17,7 +17,7 @@ fn gcs_split_path(path: &String) -> (String, String) { (bucket_name, prefix) } -fn gcs_list_files(path: &String) -> Result, cloud_storage::Error> { +pub fn gcs_list_files(path: &String) -> Result, cloud_storage::Error> { let (bucket_name, prefix) = gcs_split_path(path); let client = Client::new()?; @@ -42,6 +42,15 @@ pub fn local_get_file_update_time(path: &PathBuf) -> std::io::Result::from(modified_time)) } +pub fn local_guess_curl_ca_bundle() { + // See https://github.com/rust-bio/rust-htslib/issues/404 + + // Set if CURL_CA_BUNDLE is unset or empty + if std::env::var("CURL_CA_BUNDLE").map_or(true, |v| v.is_empty()) { + std::env::set_var("CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt"); + } +} + pub fn gcs_authorize_data_access() { // Execute the command and capture the output let output = std::process::Command::new("gcloud")