From b4af646e7ab141d934fc3a5bfa831fbf55721d09 Mon Sep 17 00:00:00 2001 From: alumi Date: Wed, 11 Dec 2019 12:25:43 +0900 Subject: [PATCH 1/2] Fix a bug in reading bgzipped VCF files --- src/cljam/io/vcf.clj | 1 - src/cljam/io/vcf/reader.clj | 12 +++++++----- test/cljam/io/vcf_test.clj | 11 +++++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/cljam/io/vcf.clj b/src/cljam/io/vcf.clj index cb36f3a2..4ea6a325 100644 --- a/src/cljam/io/vcf.clj +++ b/src/cljam/io/vcf.clj @@ -31,7 +31,6 @@ header (with-open [r (cio/reader (util/compressor-input-stream f))] (vcf-reader/load-header r))] (VCFReader. (util/as-url f) meta-info header - (if (bgzf/bgzip? f) (bgzf/bgzf-input-stream f) (cio/reader (util/compressor-input-stream f))) diff --git a/src/cljam/io/vcf/reader.clj b/src/cljam/io/vcf/reader.clj index c8d32a7e..16c88323 100644 --- a/src/cljam/io/vcf/reader.clj +++ b/src/cljam/io/vcf/reader.clj @@ -8,7 +8,7 @@ [proton.core :refer [as-long]] [cljam.io.util.bin :as util-bin] [cljam.io.vcf.util :as vcf-util]) - (:import [java.io Closeable] + (:import [java.io Closeable BufferedReader] [clojure.lang LazilyPersistentVector] bgzf4j.BGZFInputStream)) @@ -109,7 +109,7 @@ v)])) (defn load-meta-info - [^java.io.BufferedReader rdr] + [^BufferedReader rdr] (loop [line (.readLine rdr), meta-info {}] (if (meta-line? line) (let [[k v] (parse-meta-info-line line)] @@ -133,7 +133,7 @@ (cstr/split (subs line 1) #"\t")) (defn load-header - [^java.io.BufferedReader rdr] + [^BufferedReader rdr] (loop [line (.readLine rdr)] (if (header-line? line) (parse-header-line line) @@ -164,8 +164,10 @@ (apply hash-map)))) (defn- read-data-lines - [^java.io.BufferedReader rdr header kws] - (when-let [line (.readLine rdr)] + [rdr header kws] + (when-let [line (if (instance? BufferedReader rdr) + (.readLine ^BufferedReader rdr) + (.readLine ^BGZFInputStream rdr))] (if-not (or (meta-line? line) (header-line? line)) (cons (parse-data-line line kws) (lazy-seq (read-data-lines rdr header kws))) diff --git a/test/cljam/io/vcf_test.clj b/test/cljam/io/vcf_test.clj index 3670556e..c73a05c8 100644 --- a/test/cljam/io/vcf_test.clj +++ b/test/cljam/io/vcf_test.clj @@ -286,6 +286,17 @@ (testing "v4.3 complex" (let [temp-file (.getAbsolutePath (cio/file temp-dir "test_v4_3_complex.bcf"))] (with-open [v (vcf/reader test-vcf-complex-file)] + (let [xs (vcf/read-variants v) + m (vcf/meta-info v) + h (vcf/header v)] + (with-open [b (vcf/writer temp-file m h)] + (vcf/write-variants b xs)) + (with-open [b (vcf/reader temp-file)] + (is (= xs (vcf/read-variants b)))))))) + (testing "v4.3 complex bgzip" + (let [temp-file (.getAbsolutePath + (cio/file temp-dir "test_v4_3_complex_bgzip.bcf"))] + (with-open [v (vcf/reader test-vcf-complex-gz-file)] (let [xs (vcf/read-variants v) m (vcf/meta-info v) h (vcf/header v)] From f4779bb55aed04400f1e13aeee6b85c930e136ef Mon Sep 17 00:00:00 2001 From: alumi Date: Fri, 13 Dec 2019 10:00:11 +0900 Subject: [PATCH 2/2] Add an explicit test case for reading bgzipped VCF sequentially --- test/cljam/io/vcf_test.clj | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/cljam/io/vcf_test.clj b/test/cljam/io/vcf_test.clj index c73a05c8..2cd89189 100644 --- a/test/cljam/io/vcf_test.clj +++ b/test/cljam/io/vcf_test.clj @@ -116,9 +116,11 @@ (is (= (vcf/read-variants rdr) test-vcf-no-samples-variants-deep))))) (deftest read-variants-complex-test - (with-open [v (vcf/reader test-vcf-complex-file) - b (vcf/reader test-bcf-complex-file)] + (with-open [v (vcf/reader test-vcf-complex-file) ;; uncompressed VCF + z (vcf/reader test-vcf-complex-gz-file) ;; bgzipped VCF + b (vcf/reader test-bcf-complex-file)] ;; bgzipped BCF (is (= (vcf/read-variants v) + (vcf/read-variants z) (vcf/read-variants b))))) (deftest-remote bin-index-is-done-without-errors-with-a-large-file