-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy paththesis.bib
507 lines (488 loc) · 25.2 KB
/
thesis.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
@article {rlpbwt,
author = {Bonizzoni, Paola and Boucher, Christina and Cozzi, Davide and Gagie, Travis and Kashgouli, Sana and K{\"o}ppl, Dominik and Rossi, Massimiliano},
title = {Compressed Data Structures for Population-Scale Positional{ Burrows{\textendash}Wheeler} Transforms},
elocation-id = {2022.09.16.508250},
month = {09},
year = {2022},
doi = {10.1101/2022.09.16.508250},
publisher = {Cold Spring Harbor Laboratory},
abstract = {The positional Burrows{\textendash}Wheeler Transform (PBWT) was presented in 2014 by Durbin as a means to find all maximal haplotype matches in h sequences containing w variation sites in O(hw)-time. This time complexity of finding maximal haplotype matches using the PBWT is a significant improvement over the na{\"\i}ve pattern-matching algorithm that requires O(h2w)-time. Compared to the more famous Burrows-Wheeler Transform (BWT), however, a relatively little amount of attention has been paid to the PBWT. This has resulted in less space-efficient data structures for building and storing the PBWT. Given the increasing size of available haplotype datasets, and the applicability of the PBWT to pangenomics, the time is ripe for identifying efficient data structures that can be constructed for large datasets. Here, we present a comprehensive study of the memory footprint of data structures supporting maximal haplotype matching in conjunction with the PBWT. In particular, we present several data structure components that act as building blocks for constructing six different data structures that store the PBWT in a manner that supports efficiently finding the maximal haplotype matches. We estimate the memory usage of the data structures by bounding the space usage with respect to the input size. In light of this experimental analysis, we implement the solutions that are deemed to be superior with respect to the memory usage and show the performance on haplotype datasets taken from the 1000 Genomes Project data.Competing Interest StatementThe authors have declared no competing interest.},
URL = {https://www.biorxiv.org/content/early/2022/09/19/2022.09.16.508250},
eprint = {https://www.biorxiv.org/content/early/2022/09/19/2022.09.16.508250.full.pdf},
journal = {bioRxiv}
}
@inproceedings{sdsl,
title = {From Theory to Practice: Plug and Play with Succinct Data Structures},
author = {Gog, Simon and Beller, Timo and Moffat, Alistair and Petri, Matthias},
year = 2014,
booktitle = {13th International Symposium on Experimental Algorithms, (SEA 2014)},
pages = {326--337},
ee = {http://dx.doi.org/10.1007/978-3-319-07959-2_28}
}
@article{slpsurvey,
title = {Algorithmics on {SLP}-compressed strings: A survey},
author = {Lohrey, Markus},
year = 2012,
journal = {Groups-Complexity-Cryptology},
publisher = {Walter de Gruyter GmbH},
volume = 4,
number = 2,
pages = {241--299}
}
@inproceedings{slpgagie,
title = {Practical random access to {SLP}-compressed texts},
author = {Gagie, Travis and Manzini, Giovanni and Navarro, Gonzalo and Sakamoto, Hiroshi and Seelbach Benkner, Louisa and Takabatake, Yoshimasa and others},
year = 2020,
booktitle = {International Symposium on String Processing and Information Retrieval},
pages = {221--231},
organization = {Springer}
}
@article{sa,
title = {Suffix arrays: a new method for on-line string searches},
author = {Manber, Udi and Myers, Gene},
year = 1993,
journal = {siam Journal on Computing},
publisher = {SIAM},
volume = 22,
number = 5,
pages = {935--948}
}
@article{bwt,
title = {A block-sorting lossless data compression algorithm},
author = {Burrows, Michael and Wheeler, David},
year = 1994,
booktitle = {Digital SRC Research Report},
publisher = {Citeseer},
organization = {Citeseer}
}
@inbook{gusfield1997,
title = {Algorithms on Strings, Trees, and Sequences: Computer Science and Computational Biology},
author = {Gusfield, Dan},
year = 1997,
booktitle = {Algorithms on Strings, Trees, and Sequences: Computer Science and Computational Biology},
publisher = {Cambridge University Press},
place = {Cambridge}
}
@misc{durbin_gh,
title = {{PBWT}},
author = {Durbin, Richard},
year = 2014,
journal = {GitHub repository},
publisher = {GitHub},
howpublished = {\url{https://github.com/richarddurbin/pbwt}},
commit = {0de8d02df1b77146ded81e9e196991fdab520767}
}
@misc{macs,
title = {{MaCS}},
author = {Chen, Gary K.},
year = 2019,
journal = {GitHub repository},
publisher = {GitHub},
howpublished = {\url{https://github.com/gchen98/macs}},
commit = {85b0475231fb32dab70e7ae0c81b0603151b4da7}
}
@article{wiuf,
title = {Recombination as a point process along sequences},
author = {Wiuf, Carsten and Hein, Jotun},
year = 1999,
journal = {Theoretical population biology},
publisher = {Elsevier},
volume = 55,
number = 3,
pages = {248--259}
}
@misc{shapedslp,
title = {{ShapedSlp}},
author = {Tomohiro, I.},
year = 2020,
journal = {GitHub repository},
publisher = {GitHub},
howpublished = {\url{https://github.com/itomomoti/ShapedSlp}},
commit = {2a2fcaf0ea4fcd4f88a8e7c61d4c63c697b12508}
}
@inproceedings{bigrepair,
title={Rpair: rescaling repair with rsync},
author={Gagie, Travis and Manzini, Giovanni and Navarro, Gonzalo and Sakamoto, Hiroshi and Takabatake, Yoshimasa and others},
booktitle={International Symposium on String Processing and Information Retrieval},
pages={35--44},
year={2019},
organization={Springer}
}
@inproceedings{rpair,
title = {Rpair: rescaling repair with rsync},
author = {Gagie, Travis and Manzini, Giovanni and Navarro, Gonzalo and Sakamoto, Hiroshi and Takabatake, Yoshimasa and others},
year = 2019,
booktitle = {International Symposium on String Processing and Information Retrieval},
pages = {35--44},
organization = {Springer}
}
@article{pbwt,
title = {Efficient haplotype matching and storage using the positional {Burrows–Wheeler} transform ({PBWT})},
author = {Durbin, Richard},
year = 2014,
month = {01},
journal = {Bioinformatics},
volume = 30,
number = 9,
pages = {1266--1272},
doi = {10.1093/bioinformatics/btu014},
issn = {1367-4803},
url = {https://doi.org/10.1093/bioinformatics/btu014},
abstract = {Motivation: Over the last few years, methods based on suffix arrays using the Burrows–Wheeler Transform have been widely used for DNA sequence read matching and assembly. These provide very fast search algorithms, linear in the search pattern size, on a highly compressible representation of the dataset being searched. Meanwhile, algorithmic development for genotype data has concentrated on statistical methods for phasing and imputation, based on probabilistic matching to hidden Markov model representations of the reference data, which while powerful are much less computationally efficient. Here a theory of haplotype matching using suffix array ideas is developed, which should scale too much larger datasets than those currently handled by genotype algorithms.Results: Given M sequences with N bi-allelic variable sites, an O(NM) algorithm to derive a representation of the data based on positional prefix arrays is given, which is termed the positional Burrows–Wheeler transform (PBWT). On large datasets this compresses with run-length encoding by more than a factor of a hundred smaller than using gzip on the raw data. Using this representation a method is given to find all maximal haplotype matches within the set in O(NM) time rather than O(NM2) as expected from naive pairwise comparison, and also a fast algorithm, empirically independent of M given sufficient memory for indexes, to find maximal matches between a new sequence and the set. The discussion includes some proposals about how these approaches could be used for imputation and phasing.Availability:http://github.com/richarddurbin/pbwtContact:[email protected]},
eprint = {https://academic.oup.com/bioinformatics/article-pdf/30/9/1266/647197/btu014.pdf}
}
@article{dpbwt,
title = {{{d-PBWT}: dynamic positional {Burrows–Wheeler} transform}},
author = {Sanaullah, Ahsan and Zhi, Degui and Zhang, Shaojie},
year = 2021,
month = {02},
journal = {Bioinformatics},
volume = 37,
number = 16,
pages = {2390--2397},
doi = {10.1093/bioinformatics/btab117},
issn = {1367-4803},
url = {https://doi.org/10.1093/bioinformatics/btab117},
abstract = {{Durbin’s positional Burrows–Wheeler transform (PBWT) is a scalable data structure for haplotype matching. It has been successfully applied to identical by descent (IBD) segment identification and genotype imputation. Once the PBWT of a haplotype panel is constructed, it supports efficient retrieval of all shared long segments among all individuals (long matches) and efficient query between an external haplotype and the panel. However, the standard PBWT is an array-based static data structure and does not support dynamic updates of the panel.Here, we generalize the static PBWT to a dynamic data structure, d-PBWT, where the reverse prefix sorting at each position is stored with linked lists. We also developed efficient algorithms for insertion and deletion of individual haplotypes. In addition, we verified that d-PBWT can support all algorithms of PBWT. In doing so, we systematically investigated variations of set maximal match and long match query algorithms: while they all have average case time complexity independent of database size, they have different worst case complexities and dependencies on additional data structures.The benchmarking code is available at genome.ucf.edu/d-PBWT.Supplementary data are available at Bioinformatics online.}},
eprint = {https://academic.oup.com/bioinformatics/article-pdf/37/16/2390/39947158/btab117.pdf}
}
@inproceedings{rlbwt,
title = {Succinct suffix arrays based on run-length encoding},
author = {M{\"a}kinen, Veli and Navarro, Gonzalo},
year = 2005,
booktitle = {Annual Symposium on Combinatorial Pattern Matching},
pages = {45--56},
organization = {Springer}
}
@article{gagie2020,
title = {Fully functional suffix trees and optimal text searching in BWT-runs bounded space},
author = {Gagie, Travis and Navarro, Gonzalo and Prezza, Nicola},
year = 2020,
journal = {Journal of the ACM (JACM)},
publisher = {ACM New York, NY, USA},
volume = 67,
number = 1,
pages = {1--54}
}
@article{policriti,
title = {{LZ77} computation based on the run-length encoded {BWT}},
author = {Policriti, Alberto and Prezza, Nicola},
year = 2018,
journal = {Algorithmica},
publisher = {Springer},
volume = 80,
number = 7,
pages = {1986--2011}
}
@article{kuhnle,
title = {Efficient construction of a complete index for pan-genomics read alignment},
author = {Kuhnle, Alan and Mun, Taher and Boucher, Christina and Gagie, Travis and Langmead, Ben and Manzini, Giovanni},
year = 2020,
journal = {Journal of Computational Biology},
publisher = {Mary Ann Liebert, Inc., publishers 140 Huguenot Street, 3rd Floor New~…},
volume = 27,
number = 4,
pages = {500--513}
}
@article{mun,
title = {Matching reads to many genomes with the r-index},
author = {Mun, Taher and Kuhnle, Alan and Boucher, Christina and Gagie, Travis and Langmead, Ben and Manzini, Giovanni},
year = 2020,
journal = {Journal of Computational Biology},
publisher = {Mary Ann Liebert, Inc., publishers 140 Huguenot Street, 3rd Floor New~…},
volume = 27,
number = 4,
pages = {514--518}
}
@article{boucher,
title = {Prefix-free parsing for building big {BWTs}},
author = {Boucher, Christina and Gagie, Travis and Kuhnle, Alan and Langmead, Ben and Manzini, Giovanni and Mun, Taher},
year = 2019,
journal = {Algorithms for Molecular Biology},
publisher = {BioMed Central},
volume = 14,
number = 1,
pages = {1--15}
}
@article{blast,
title = {Basic local alignment search tool},
author = {Altschul, Stephen F and Gish, Warren and Miller, Webb and Myers, Eugene W and Lipman, David J},
year = 1990,
journal = {Journal of molecular biology},
publisher = {Elsevier},
volume = 215,
number = 3,
pages = {403--410}
}
@article{bannai,
title = {Refining the r-index},
author = {Bannai, Hideo and Gagie, Travis and Tomohiro, I},
year = 2020,
journal = {Theoretical Computer Science},
publisher = {Elsevier},
volume = 812,
pages = {96--108}
}
@article{moni,
title = {{MONI}: A Pangenomic Index for Finding Maximal Exact Matches},
author = {Rossi, Massimiliano and Oliva, Marco and Langmead, Ben and Gagie, Travis and Boucher, Christina},
year = 2022,
month = {02},
journal = {Journal of Computational Biology},
publisher = {Mary Ann Liebert, Inc., publishers 140 Huguenot Street, 3rd Floor New}
}
@inproceedings{phoni,
title = {{PHONI}: Streamed matching statistics with multi-genome references},
author = {Boucher, Christina and Gagie, Travis and Tomohiro, I and K{\"o}ppl, Dominik and Langmead, Ben and Manzini, Giovanni and Navarro, Gonzalo and Pacheco, Alejandro and Rossi, Massimiliano},
year = 2021,
booktitle = {2021 Data Compression Conference (DCC)},
pages = {193--202},
organization = {IEEE}
}
@inproceedings{plcp,
title = {Permuted longest-common-prefix array},
author = {K{\"a}rkk{\"a}inen, Juha and Manzini, Giovanni and Puglisi, Simon J},
year = 2009,
booktitle = {Annual Symposium on Combinatorial Pattern Matching},
pages = {181--192},
organization = {Springer}
}
@inproceedings{plcp2,
title = {Succinct representations of lcp information and improvements in the compressed suffix arrays},
author = {Sadakane, Kunihiko},
year = 2002,
booktitle = {SODA},
volume = 2,
pages = {225--232}
}
@inproceedings{plcp3,
title = {Linear-time longest-common-prefix computation in suffix arrays and its applications},
author = {Kasai, Toru and Lee, Gunho and Arimura, Hiroki and Arikawa, Setsuo and Park, Kunsoo},
year = 2001,
booktitle = {Annual Symposium on Combinatorial Pattern Matching},
pages = {181--192},
organization = {Springer}
}
@article{mpbwt,
title = {Multi-allelic positional {Burrows-Wheeler} transform},
author = {Naseri, Ardalan and Zhi, Degui and Zhang, Shaojie},
year = 2019,
journal = {BMC bioinformatics},
publisher = {BioMed Central},
volume = 20,
number = 11,
pages = {1--8}
}
@article{leap,
title = {Efficient haplotype matching between a query and a panel for genealogical search},
author = {Naseri, Ardalan and Holzhauser, Erwin and Zhi, Degui and Zhang, Shaojie},
year = 2019,
journal = {Bioinformatics},
publisher = {Oxford University Press},
volume = 35,
number = 14,
pages = {i233--i241}
}
@article{williams,
title = {Maximal perfect haplotype blocks with wildcards},
author = {Williams, Lucia and Mumey, Brendan},
year = 2020,
journal = {Iscience},
publisher = {Elsevier},
volume = 23,
number = 6,
pages = 101149
}
@article{impute5,
title = {Genotype imputation using the positional {Burrows Wheeler} transform},
author = {Rubinacci, Simone and Delaneau, Olivier and Marchini, Jonathan},
year = 2020,
journal = {PLoS genetics},
publisher = {Public Library of Science San Francisco, CA USA},
volume = 16,
number = 11,
pages = {e1009049}
}
@inproceedings{tricks,
author={Brown, Nathaniel K. and Gagie, Travis and Rossi, Massimiliano},
booktitle={2022 Data Compression Conference (DCC)},
title={RLBWT Tricks},
year={2022},
volume={},
number={},
pages={444-444},
doi={10.1109/DCC52660.2022.00055}}
@article{snakemake,
title = {Sustainable data analysis with Snakemake},
author = {M{\"o}lder, Felix and Jablonski, Kim Philipp and Letcher, Brice and Hall, Michael B and Tomkins-Tinch, Christopher H and Sochat, Vanessa and Forster, Jan and Lee, Soohyun and Twardziok, Sven O and Kanitz, Alexander and others},
year = 2021,
journal = {F1000Research},
publisher = {Faculty of 1000 Ltd},
volume = 10
}
@article{tri,
title = {Human triallelic sites: evidence for a new mutational mechanism?},
author = {Hodgkinson, Alan and Eyre-Walker, Adam},
year = 2010,
journal = {Genetics},
publisher = {Oxford University Press},
volume = 184,
number = 1,
pages = {233--241}
}
@article{tri2,
title = {Multiallelic positions in the human genome: challenges for genetic analyses},
author = {Campbell, Ian M and Gambin, Tomasz and Jhangiani, Shalini N and Grove, Megan L and Veeraraghavan, Narayanan and Muzny, Donna M and Shaw, Chad A and Gibbs, Richard A and Boerwinkle, Eric and Yu, Fuli and others},
year = 2016,
journal = {Human mutation},
publisher = {Wiley Online Library},
volume = 37,
number = 3,
pages = {231--234}
}
@article{kmems,
title = {{MONI} can find {k-MEMs}},
author = {Gagie, Travis},
year = 2022,
journal = {arXiv preprint arXiv:2202.05085}
}
@article{tettelin,
title = {Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial “pan-genome”},
author = {Tettelin, Herv{\'e} and Masignani, Vega and Cieslewicz, Michael J and Donati, Claudio and Medini, Duccio and Ward, Naomi L and Angiuoli, Samuel V and Crabtree, Jonathan and Jones, Amanda L and Durkin, A Scott and others},
year = 2005,
journal = {Proceedings of the National Academy of Sciences},
publisher = {National Acad Sciences},
volume = 102,
number = 39,
pages = {13950--13955}
}
@article{tutorial,
title = {Computational graph pangenomics: a tutorial on data structures and their applications},
author = {Baaijens, Jasmijn A and Bonizzoni, Paola and Boucher, Christina and Della Vedova, Gianluca and Pirola, Yuri and Rizzi, Raffaella and Sir{\'e}n, Jouni},
year = 2022,
journal = {Natural Computing},
publisher = {Springer},
pages = {1--28}
}
@article{pancon,
title = {Computational pan-genomics: status, promises and challenges},
author = {{The Computational Pan-Genomics Consortium}},
year = 2016,
month = 10,
journal = {Briefings in Bioinformatics},
volume = 19,
number = 1,
pages = {118--135},
doi = {10.1093/bib/bbw089},
issn = {1477-4054},
url = {https://doi.org/10.1093/bib/bbw089},
abstract = {{Many disciplines, from human genetics and oncology to plant breeding, microbiology and virology, commonly face the challenge of analyzing rapidly increasing numbers of genomes. In case of Homo sapiens, the number of sequenced genomes will approach hundreds of thousands in the next few years. Simply scaling up established bioinformatics pipelines will not be sufficient for leveraging the full potential of such rich genomic data sets. Instead, novel, qualitatively different computational methods and paradigms are needed. We will witness the rapid extension of computational pan-genomics, a new sub-area of research in computational biology. In this article, we generalize existing definitions and understand a pan-genome as any collection of genomic sequences to be analyzed jointly or to be used as a reference. We examine already available approaches to construct and use pan-genomes, discuss the potential benefits of future technologies and methodologies and review open challenges from the vantage point of the above-mentioned biological disciplines. As a prominent example for a computational paradigm shift, we particularly highlight the transition from the representation of reference genomes as strings to representations as graphs. We outline how this and other challenges from different application domains translate into common computational problems, point out relevant bioinformatics techniques and identify open problems in computer science. With this review, we aim to increase awareness that a joint approach to computational pan-genomics can help address many of the problems currently faced in various domains.}},
eprint = {https://academic.oup.com/bib/article-pdf/19/1/118/25406834/bbw089.pdf}
}
@misc{gwas,
title = {Genome-wide association studies (GWAS)},
journal = {Genome.gov},
publisher = {Genome.gov},
howpublished = {\url{https://www.genome.gov/genetics-glossary/Genome-Wide-Association-Studies}}
}
@book{succinct,
title={Succinct static data structures},
author={Jacobson, Guy Joseph},
year={1988},
publisher={Carnegie Mellon University}
}
@inproceedings{jacobson,
title={Space-efficient static trees and graphs},
author={Jacobson, Guy},
booktitle={30th annual symposium on foundations of computer science},
pages={549--554},
year={1989},
organization={IEEE Computer Society}
}
@book{navarro,
title={Compact data structures: A practical approach},
author={Navarro, Gonzalo},
year={2016},
publisher={Cambridge University Press}
}
@incollection{raslp,
title={Access, rank, and select in grammar-compressed strings},
author={Belazzougui, Djamal and Cording, Patrick Hagge and Puglisi, Simon J and Tabei, Yasuo},
booktitle={Algorithms-ESA 2015},
pages={142--154},
year={2015},
publisher={Springer}
}
@inproceedings{fm,
title={Opportunistic data structures with applications},
author={Ferragina, Paolo and Manzini, Giovanni},
booktitle={Proceedings 41st annual symposium on foundations of computer science},
pages={390--398},
year={2000},
organization={IEEE}
}
@article{haplo,
title={The international {HapMap} project},
author={Gibbs, Richard A and Belmont, John W and Hardenbol, Paul and Willis, Thomas D and Yu, FL and Yang, HM and Ch'ang, Lan-Yang and Huang, Wei and Liu, Bin and Shen, Yan and others},
year={2003},
journal = {Nature},
publisher={Nature Publishing Group},
doi={10.1038/nature02168},
url={https://doi.org/10.1038/nature02168}
}
@article{bcftools,
author = {Danecek, Petr and Bonfield, James K and Liddle, Jennifer and Marshall, John and Ohan, Valeriu and Pollard, Martin O and Whitwham, Andrew and Keane, Thomas and McCarthy, Shane A and Davies, Robert M and Li, Heng},
title = "{Twelve years of {SAMtools and BCFtools}}",
journal = {GigaScience},
volume = {10},
number = {2},
year = {2021},
month = {02},
abstract = "{SAMtools and BCFtools are widely used programs for processing and analysing high-throughput sequencing data. They include tools for file format conversion and manipulation, sorting, querying, statistics, variant calling, and effect analysis amongst other methods.The first version appeared online 12 years ago and has been maintained and further developed ever since, with many new features and improvements added over the years. The SAMtools and BCFtools packages represent a unique collection of tools that have been used in numerous other software projects and countless genomic pipelines.Both SAMtools and BCFtools are freely available on GitHub under the permissive MIT licence, free for both non-commercial and commercial use. Both packages have been installed \\>1 million times via Bioconda. The source code and documentation are available from https://www.htslib.org.}",
issn = {2047-217X},
doi = {10.1093/gigascience/giab008},
url = {https://doi.org/10.1093/gigascience/giab008},
note = {giab008},
eprint = {https://academic.oup.com/gigascience/article-pdf/10/2/giab008/36332246/giab008.pdf},
}
@article{1kgp,
title={A global reference for human genetic variation},
author={1000 Genomes Project Consortium and others},
journal={Nature},
volume={526},
number={7571},
pages={68},
year={2015},
publisher={Nature Publishing Group}
}
@article{vcf,
title={The variant call format and {VCFtools}},
author={Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A and Banks, Eric and DePristo, Mark A and Handsaker, Robert E and Lunter, Gerton and Marth, Gabor T and Sherry, Stephen T and others},
journal={Bioinformatics},
volume={27},
number={15},
pages={2156--2158},
year={2011},
publisher={Oxford University Press}
}
@article{openmp,
Author = {Dagum, Leonardo and Menon, Ramesh},
Date-Added = {2014-07-24 11:13:01 +0000},
Date-Modified = {2014-07-24 11:13:01 +0000},
Journal = {Computational Science \& Engineering, IEEE},
Number = {1},
Pages = {46--55},
Publisher = {IEEE},
Title = {{OpenMP}: an industry standard {API} for shared-memory programming},
Volume = {5},
Year = {1998}}
@article{tabix,
title={Tabix: fast retrieval of sequence features from generic TAB-delimited files},
author={Li, Heng},
journal={Bioinformatics},
volume={27},
number={5},
pages={718--719},
year={2011},
publisher={Oxford University Press}
}