From cdb191f405b0697f07d921741878cd96ce6f0a98 Mon Sep 17 00:00:00 2001 From: Jonn Smith Date: Mon, 27 Jun 2022 13:57:02 -0400 Subject: [PATCH] Added VIM to docker image (#134) * Added vim to docker container * Updated Dockerfile to allow for new versions of packages. * Added in samtools library dependencies for new pysam. * Pinned pysam version. * Fixed the `longbow correct` tests to work with padded data. --- docker/Dockerfile | 10 +++++++--- setup.py | 2 +- src/longbow/correct/command.py | 3 +++ .../correct/correct_expected_corrected_data.sam | 9 ++++++--- tests/test_data/correct/correct_test_data.sam | 9 ++++++--- 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index d415e793..1259301e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,8 +5,9 @@ MAINTAINER Kiran V Garimella ARG branch # install gsutil -RUN apt-get update -RUN apt-get install -y curl git-lfs time datamash wget zlib1g-dev libssl-dev lbzip2 make gcc +RUN apt-get update --allow-releaseinfo-change +RUN apt-get update -m +RUN apt-get install -y curl git-lfs time datamash wget zlib1g-dev libssl-dev lbzip2 make gcc libbz2-dev libncurses5-dev libncursesw5-dev liblzma-dev RUN curl https://sdk.cloud.google.com | bash # Setup crcmodc for gsutil: @@ -37,7 +38,7 @@ RUN wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1. && tar xjf samtools-1.14.tar.bz2 \ && rm samtools-1.14.tar.bz2 \ && cd samtools-1.14 \ - && ./configure --without-curses --disable-lzma --disable-bz2 \ + && ./configure \ && make \ && make install @@ -45,6 +46,9 @@ RUN wget https://github.com/samtools/samtools/releases/download/1.14/samtools-1. RUN wget -O /usr/local/bin/starcode https://github.com/gui11aume/starcode/releases/download/1.4/starcode-1.4 \ && chmod 755 /usr/local/bin/starcode +# Install vim so we can edit files later: +RUN apt-get install -y vim + # activate conda environment ENV PATH="/longbow/venv/bin:/root/google-cloud-sdk/bin/:$PATH" RUN echo "source /longbow/venv/bin/activate" >> ~/.bashrc diff --git a/setup.py b/setup.py index 686dbe87..ced4fdaa 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ 'pandas', 'numpy', 'matplotlib', - 'pysam>=0.16.0.1', + 'pysam==0.19.1', 'progress', 'construct', 'pathos', diff --git a/src/longbow/correct/command.py b/src/longbow/correct/command.py index 6c74a790..82e71fda 100644 --- a/src/longbow/correct/command.py +++ b/src/longbow/correct/command.py @@ -487,6 +487,9 @@ def _correct_barcode_fn(in_queue, out_queue, bam_header_dict, barcode_tag, corre read.set_tag(corrected_tag, new_bc) read.set_tag(longbow.utils.constants.COULD_CORRECT_BARCODE_TAG, True) + print(f"Old BC: {old_bc}") + print(f"New BC: {new_bc}") + # Need to do some basic math for padded barcode strings: if len(old_bc) > barcode_length: end_pad_bases = int((len(old_bc) - barcode_length)/2) diff --git a/tests/test_data/correct/correct_expected_corrected_data.sam b/tests/test_data/correct/correct_expected_corrected_data.sam index df2863d9..58ba17b6 100644 --- a/tests/test_data/correct/correct_expected_corrected_data.sam +++ b/tests/test_data/correct/correct_expected_corrected_data.sam @@ -15,6 +15,9 @@ clr_barcode_in_whitelist 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d C clr_barcode_lev_1_corrected 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCCCCCT YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:-1 pz:i:0 clr_barcode_lev_2_corrected 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCCCCTT YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:-1 pz:i:0 clr_barcode_lev_3_corrected 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCCCTTT YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:-1 pz:i:0 -ccs_multi_correction_success 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCCCCCCCCCAAAA YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:1 pz:i:4 -clr_multi_correction_success 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCCCCCCCCCAAAA YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:-1 pz:i:4 -ccs_multi_correction_same_barcode_success 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCCCCCCCCCCAAAA YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:1 pz:i:0 \ No newline at end of file +ccs_multi_correction_success_no_change_1 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCCCCCCCCCAAAA YC:i:1 YP:i:0 CB:Z:CCCCCCCCCCCCCCCC rq:f:1 pz:i:4 +clr_multi_correction_success_no_change_2 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCCCCCCCCCAAAA YC:i:1 YP:i:0 CB:Z:CCCCCCCCCCCCCCCC rq:f:-1 pz:i:4 +ccs_multi_correction_success_change_1 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCACCCCCCCCAAAA YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:1 pz:i:4 +clr_multi_correction_success_change_2 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCACCCCCCCAAAA YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:-1 pz:i:4 +ccs_multi_correction_same_barcode_success_1 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCCCCCCCCCCAAAA YC:i:1 YP:i:0 CB:Z:CCCCCCCCCCCCCCCC rq:f:1 pz:i:0 +ccs_multi_correction_same_barcode_success_2 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCACCCCCCCCAAAA YC:i:1 YP:i:1 CB:Z:CCCCCCCCCCCCCCCC rq:f:1 pz:i:0 \ No newline at end of file diff --git a/tests/test_data/correct/correct_test_data.sam b/tests/test_data/correct/correct_test_data.sam index a39d3282..75e1691b 100644 --- a/tests/test_data/correct/correct_test_data.sam +++ b/tests/test_data/correct/correct_test_data.sam @@ -22,8 +22,11 @@ clr_barcode_lev_3_corrected 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433 clr_barcode_lev_4_not_corrected 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCCTTTT rq:f:-1 ccs_correction_equidistant 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AGGGGGGGGTTTTTTC rq:f:1 clr_correction_equidistant 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AGGGGGGGGTTTTTTC rq:f:-1 -ccs_multi_correction_success 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCCCCCCCCCAAAA rq:f:1 -clr_multi_correction_success 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCCCCCCCCCAAAA rq:f:-1 +ccs_multi_correction_success_no_change_1 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCCCCCCCCCAAAA rq:f:1 +clr_multi_correction_success_no_change_2 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCCCCCCCCCAAAA rq:f:-1 +ccs_multi_correction_success_change_1 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCACCCCCCCCAAAA rq:f:1 +clr_multi_correction_success_change_2 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:AAAACCCCCCCCACCCCCCCAAAA rq:f:-1 ccs_multi_correction_failure 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:GGGGGGGGGGGGGGGGGGGGGGGG rq:f:1 clr_multi_correction_failure 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:GGGGGGGGGGGGGGGGGGGGGGGG rq:f:-1 -ccs_multi_correction_same_barcode_success 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCCCCCCCCCCAAAA rq:f:1 \ No newline at end of file +ccs_multi_correction_same_barcode_success_1 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCCCCCCCCCCAAAA rq:f:1 +ccs_multi_correction_same_barcode_success_2 4 * 0 255 * * 0 0 GGGGGGGGGG ++++++++++ RG:Z:cd9b433d CR:Z:CCCCCCCCCCCACCCCCCCCAAAA rq:f:1 \ No newline at end of file