From c7f939c4e9f557e3f99dd24a7f161d65c0a94065 Mon Sep 17 00:00:00 2001 From: Mikkel Schubert Date: Tue, 22 Jan 2019 22:29:40 +0100 Subject: [PATCH] Fix bug in --adapter-list. The orientation of mate 2 adapters would not be reversed, resulting in behavior matching --pcr2 rather than --adapter2. --- src/adapterset.cpp | 1 + .../regression/misc/adapter_list/adapters.txt | 1 + tests/regression/misc/adapter_list/info.json | 6 ++ .../misc/adapter_list/input_1.fastq | 4 + .../misc/adapter_list/input_2.fastq | 4 + .../misc/adapter_list/your_output.discarded | 0 .../adapter_list/your_output.pair1.truncated | 4 + .../adapter_list/your_output.pair2.truncated | 4 + .../misc/adapter_list/your_output.settings | 96 +++++++++++++++++++ .../your_output.singleton.truncated | 0 tests/regression/run | 17 +++- 11 files changed, 132 insertions(+), 5 deletions(-) create mode 100644 tests/regression/misc/adapter_list/adapters.txt create mode 100644 tests/regression/misc/adapter_list/info.json create mode 100644 tests/regression/misc/adapter_list/input_1.fastq create mode 100644 tests/regression/misc/adapter_list/input_2.fastq create mode 100644 tests/regression/misc/adapter_list/your_output.discarded create mode 100644 tests/regression/misc/adapter_list/your_output.pair1.truncated create mode 100644 tests/regression/misc/adapter_list/your_output.pair2.truncated create mode 100644 tests/regression/misc/adapter_list/your_output.settings create mode 100644 tests/regression/misc/adapter_list/your_output.singleton.truncated diff --git a/src/adapterset.cpp b/src/adapterset.cpp index 9d900b3d..d5c8d23b 100644 --- a/src/adapterset.cpp +++ b/src/adapterset.cpp @@ -327,6 +327,7 @@ bool adapter_set::load_adapters(const std::string& filename, bool paired_end) if (row.second.size() > 1) { adapter_3p = row.second.at(1); + adapter_3p.reverse_complement(); } m_adapters.push_back(fastq_pair(adapter_5p, adapter_3p)); diff --git a/tests/regression/misc/adapter_list/adapters.txt b/tests/regression/misc/adapter_list/adapters.txt new file mode 100644 index 00000000..cb976cf1 --- /dev/null +++ b/tests/regression/misc/adapter_list/adapters.txt @@ -0,0 +1 @@ +AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT diff --git a/tests/regression/misc/adapter_list/info.json b/tests/regression/misc/adapter_list/info.json new file mode 100644 index 00000000..1584a76e --- /dev/null +++ b/tests/regression/misc/adapter_list/info.json @@ -0,0 +1,6 @@ +{ + "arguments": ["--adapter-list", "adapters.txt"], + "return_code": 0, + "stderr": [ + ] +} diff --git a/tests/regression/misc/adapter_list/input_1.fastq b/tests/regression/misc/adapter_list/input_1.fastq new file mode 100644 index 00000000..3aa27aad --- /dev/null +++ b/tests/regression/misc/adapter_list/input_1.fastq @@ -0,0 +1,4 @@ +@AAGGGCSeq_1_5180_50/1 meta data +ACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGCAGGCCTAGATCGGAAGAGCACACGTCTGAACTCCAGTCACAAGGGCATCTCGTATG ++ +IJJHJJIJIIHJHHIGIHIGGGIGFGEFGGFGGEHGFHGFEDFFFEDECCBCCBCBEBCDBABABA?A@?@?>==>==<><<:<996978544100-,)! diff --git a/tests/regression/misc/adapter_list/input_2.fastq b/tests/regression/misc/adapter_list/input_2.fastq new file mode 100644 index 00000000..73cb65eb --- /dev/null +++ b/tests/regression/misc/adapter_list/input_2.fastq @@ -0,0 +1,4 @@ +@AAGGGCSeq_1_5180_50/2 data meta +AGGCCTCCTAGGGAGAGGAGGGTGGATGGAATTAAGGGTGTTAGTCATGTAGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCC ++ +JIHJJIJJJJJIHIHJHJHHJFGIHHHGHGGEGFIHEEDEEFBEDFEDEDBDBCBCCBBAA?ADAAA@@@>>>><=><<;<:<;87:78753420/,+)! diff --git a/tests/regression/misc/adapter_list/your_output.discarded b/tests/regression/misc/adapter_list/your_output.discarded new file mode 100644 index 00000000..e69de29b diff --git a/tests/regression/misc/adapter_list/your_output.pair1.truncated b/tests/regression/misc/adapter_list/your_output.pair1.truncated new file mode 100644 index 00000000..80b925dd --- /dev/null +++ b/tests/regression/misc/adapter_list/your_output.pair1.truncated @@ -0,0 +1,4 @@ +@AAGGGCSeq_1_5180_50/1 meta data +ACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGCAGGCCT ++ +IJJHJJIJIIHJHHIGIHIGGGIGFGEFGGFGGEHGFHGFEDFFFEDECC diff --git a/tests/regression/misc/adapter_list/your_output.pair2.truncated b/tests/regression/misc/adapter_list/your_output.pair2.truncated new file mode 100644 index 00000000..288131d1 --- /dev/null +++ b/tests/regression/misc/adapter_list/your_output.pair2.truncated @@ -0,0 +1,4 @@ +@AAGGGCSeq_1_5180_50/2 data meta +AGGCCTCCTAGGGAGAGGAGGGTGGATGGAATTAAGGGTGTTAGTCATGT ++ +JIHJJIJJJJJIHIHJHJHHJFGIHHHGHGGEGFIHEEDEEFBEDFEDED diff --git a/tests/regression/misc/adapter_list/your_output.settings b/tests/regression/misc/adapter_list/your_output.settings new file mode 100644 index 00000000..846e1039 --- /dev/null +++ b/tests/regression/misc/adapter_list/your_output.settings @@ -0,0 +1,96 @@ +AdapterRemoval ver. 2.1.7 +Trimming of paired-end reads + + +[Adapter sequences] +Adapter1[1]: AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG +Adapter2[1]: AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT + + +[Adapter trimming] +RNG seed: 372265332 +Alignment shift value: 2 +Global mismatch threshold: 0.333333 +Quality format (input): Phred+33 +Quality score max (input): 41 +Quality format (output): Phred+33 +Quality score max (output): 41 +Mate-number separator (input): '/' +Trimming 5p: 0 +Trimming 3p: 0 +Trimming Ns: No +Trimming Phred scores <= 2: No +Trimming using sliding windows: No +Minimum genomic length: 15 +Maximum genomic length: 4294967295 +Collapse overlapping reads: No +Minimum overlap (in case of collapse): 11 + + +[Trimming statistics] +Total number of read pairs: 1 +Number of unaligned read pairs: 0 +Number of well aligned read pairs: 1 +Number of discarded mate 1 reads: 0 +Number of singleton mate 1 reads: 0 +Number of discarded mate 2 reads: 0 +Number of singleton mate 2 reads: 0 +Number of reads with adapters[1]: 2 +Number of retained reads: 2 +Number of retained nucleotides: 100 +Average length of retained reads: 50 + + +[Length distribution] +Length Mate1 Mate2 Singleton Discarded All +0 0 0 0 0 0 +1 0 0 0 0 0 +2 0 0 0 0 0 +3 0 0 0 0 0 +4 0 0 0 0 0 +5 0 0 0 0 0 +6 0 0 0 0 0 +7 0 0 0 0 0 +8 0 0 0 0 0 +9 0 0 0 0 0 +10 0 0 0 0 0 +11 0 0 0 0 0 +12 0 0 0 0 0 +13 0 0 0 0 0 +14 0 0 0 0 0 +15 0 0 0 0 0 +16 0 0 0 0 0 +17 0 0 0 0 0 +18 0 0 0 0 0 +19 0 0 0 0 0 +20 0 0 0 0 0 +21 0 0 0 0 0 +22 0 0 0 0 0 +23 0 0 0 0 0 +24 0 0 0 0 0 +25 0 0 0 0 0 +26 0 0 0 0 0 +27 0 0 0 0 0 +28 0 0 0 0 0 +29 0 0 0 0 0 +30 0 0 0 0 0 +31 0 0 0 0 0 +32 0 0 0 0 0 +33 0 0 0 0 0 +34 0 0 0 0 0 +35 0 0 0 0 0 +36 0 0 0 0 0 +37 0 0 0 0 0 +38 0 0 0 0 0 +39 0 0 0 0 0 +40 0 0 0 0 0 +41 0 0 0 0 0 +42 0 0 0 0 0 +43 0 0 0 0 0 +44 0 0 0 0 0 +45 0 0 0 0 0 +46 0 0 0 0 0 +47 0 0 0 0 0 +48 0 0 0 0 0 +49 0 0 0 0 0 +50 1 1 0 0 2 diff --git a/tests/regression/misc/adapter_list/your_output.singleton.truncated b/tests/regression/misc/adapter_list/your_output.singleton.truncated new file mode 100644 index 00000000..e69de29b diff --git a/tests/regression/run b/tests/regression/run index 25ab5732..0bd58a36 100755 --- a/tests/regression/run +++ b/tests/regression/run @@ -243,6 +243,10 @@ class TestCase(object): with open(os.path.join(root, 'barcodes.txt'), 'w') as handle: handle.writelines(self._files['barcodes']) + if 'adapters' in self._files: + with open(os.path.join(root, 'adapters.txt'), 'w') as handle: + handle.writelines(self._files['adapters']) + return final_files["input_1"], final_files["input_2"] def _do_call(self, root, input_1, input_2, compression, interleaved): @@ -278,13 +282,14 @@ class TestCase(object): def _check_file_creation(self, root, input_1, input_2, compression): expected_files = set(self._files["output"]) - if 'barcodes' in self._files: - expected_files.add('barcodes.txt') + for key in ('barcodes', 'adapters'): + if key in self._files: + expected_files.add(key + '.txt') if compression != UNCOMPRESSED: expected_files_ = set() for value in expected_files: - if not value.endswith(".settings") and value != 'barcodes.txt': + if not (value.endswith(".settings") or value.endswith(".txt")): expected_files_.add(value + "." + compression) else: expected_files_.add(value) @@ -391,10 +396,12 @@ class TestCase(object): result["input_1"].append(os.path.join(abs_root, filename)) elif filename.startswith("input_2"): result["input_2"].append(os.path.join(abs_root, filename)) - elif filename not in ('info.json', 'README', 'barcodes.txt'): - result["output"][filename] = read_lines(root, filename) elif filename == 'barcodes.txt': result["barcodes"] = read_lines(root, filename) + elif filename == 'adapters.txt': + result["adapters"] = read_lines(root, filename) + elif filename not in ('info.json', 'README'): + result["output"][filename] = read_lines(root, filename) return result