Skip to content

Commit

Permalink
If necessary, set the CURL_CA_BUNDLE path to enable access to files i…
Browse files Browse the repository at this point in the history
…n GCS
  • Loading branch information
kvg committed Jan 23, 2024
1 parent d9d1abf commit 57e0c9f
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 14 additions & 6 deletions src/alignment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ use polars::prelude::*;
use rust_htslib::bam::record::{Aux, Cigar};
use rust_htslib::bam::{Read, IndexedReader, self, ext::BamRecordExtensions};

use crate::storage::{local_get_file_update_time, gcs_get_file_update_time};
use crate::storage::{
local_get_file_update_time, gcs_get_file_update_time,
local_guess_curl_ca_bundle, gcs_authorize_data_access
};

#[derive(Debug, PartialEq)]
pub enum ElementType {
Expand Down Expand Up @@ -42,7 +45,14 @@ fn extract_reads(cohort: &String, reads_path: &String, chr: String, start: u64,
url::Url::from_file_path(reads_path).unwrap()
};

let mut bam = IndexedReader::from_url(&url).unwrap();
let mut bam = match IndexedReader::from_url(&url) {
Ok(bam) => bam,
Err(_) => {
local_guess_curl_ca_bundle();
IndexedReader::from_url(&url).unwrap()
}
};

let header = bam::Header::from_template(bam.header());

let mut rg_sm_map = HashMap::new();
Expand Down Expand Up @@ -323,6 +333,8 @@ pub fn stage_data(cache_path: PathBuf, reads_paths: &HashSet<(String, String)>,
let temp_dir = env::temp_dir();
env::set_current_dir(&temp_dir).unwrap();

gcs_authorize_data_access();

let loci_list: Vec<(String, u64, u64)> = loci.iter().cloned().collect();
(0..loci_list.len())
.into_par_iter()
Expand All @@ -336,8 +348,6 @@ pub fn stage_data(cache_path: PathBuf, reads_paths: &HashSet<(String, String)>,
(0..reads_paths_list.len())
.into_par_iter() // iterate over BAMs
.for_each(|j| { //|(reads, cohort)| {
println!("{}", env::var("GCS_OAUTH_TOKEN").unwrap_or("GCS_OAUTH_TOKEN not set".to_string()));

let (reads, cohort) = &reads_paths_list[j];
let df = extract_reads(&cohort, reads, chr.to_string(), *start, *stop);
dfs.lock().unwrap().push(df);
Expand Down Expand Up @@ -448,8 +458,6 @@ mod tests {
let mut loci = HashSet::new();
loci.insert((chr, start, stop));

// gcs_authorize_data_access();

let r = stage_data(cache_path, &bam_paths, &loci, false);
}

Expand Down
13 changes: 11 additions & 2 deletions src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use chrono::{DateTime, Utc};
use std::path::PathBuf;
use std::fs::metadata;

fn gcs_split_path(path: &String) -> (String, String) {
pub fn gcs_split_path(path: &String) -> (String, String) {
let re = regex::Regex::new(r"^gs://").unwrap();
let path = re.replace(&path, "");
let split: Vec<&str> = path.split('/').collect();
Expand All @@ -17,7 +17,7 @@ fn gcs_split_path(path: &String) -> (String, String) {
(bucket_name, prefix)
}

fn gcs_list_files(path: &String) -> Result<Vec<ObjectList>, cloud_storage::Error> {
pub fn gcs_list_files(path: &String) -> Result<Vec<ObjectList>, cloud_storage::Error> {
let (bucket_name, prefix) = gcs_split_path(path);

let client = Client::new()?;
Expand All @@ -42,6 +42,15 @@ pub fn local_get_file_update_time(path: &PathBuf) -> std::io::Result<DateTime<Ut
Ok(DateTime::<Utc>::from(modified_time))
}

pub fn local_guess_curl_ca_bundle() {
// See https://github.com/rust-bio/rust-htslib/issues/404

// Set if CURL_CA_BUNDLE is unset or empty
if std::env::var("CURL_CA_BUNDLE").map_or(true, |v| v.is_empty()) {
std::env::set_var("CURL_CA_BUNDLE", "/etc/ssl/certs/ca-certificates.crt");
}
}

pub fn gcs_authorize_data_access() {
// Execute the command and capture the output
let output = std::process::Command::new("gcloud")
Expand Down

0 comments on commit 57e0c9f

Please sign in to comment.