-
Notifications
You must be signed in to change notification settings - Fork 73
/
Copy pathploidy_table_from_ped.py
112 lines (95 loc) · 3.06 KB
/
ploidy_table_from_ped.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/env python
import argparse
import sys
from collections import defaultdict
from typing import Optional, List, Text
def create_header(contigs: List[Text]) -> Text:
"""
Creates header for the table
Parameters
----------
contigs: List[Text]
ordered list of contigs
Returns
-------
Text
header line
"""
return '\t'.join(['SAMPLE'] + contigs)
def convert_ped_record(ped_record: Text,
contigs: List[Text],
chr_x: Text = 'chrX',
chr_y: Text = 'chrY') -> Text:
"""
Converts a ped file record to a table record.
Parameters
----------
ped_record: Text
ped file record
contigs: List[Text]
ordered list of contigs
chr_x: Text = 'chrX'
chromosome X name
chr_y: Text = 'chrY'
chromosome Y name
Returns
-------
Text
ploidy table record
"""
tokens = ped_record.strip().split('\t')
sample = tokens[1]
ploidy = defaultdict(lambda: 2)
if tokens[4] == "1":
ploidy[chr_x] = 1
ploidy[chr_y] = 1
elif tokens[4] == "2":
ploidy[chr_x] = 2
ploidy[chr_y] = 0
else:
ploidy[chr_x] = 0
ploidy[chr_y] = 0
return "\t".join([sample] + [str(ploidy[c]) for c in contigs])
def __read_contigs(path: Text) -> List[Text]:
with open(path, 'r') as f:
return [line.strip().split('\t')[0] for line in f]
def __parse_arguments(argv: List[Text]) -> argparse.Namespace:
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(
description="Create a ploidy table from a PED file",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--ped", type=str, required=True,
help="PED file")
parser.add_argument("--contigs", type=str, required=True,
help="Ordered list of contigs")
parser.add_argument("--out", type=str, required=True,
help="Output VCF")
parser.add_argument("--chr-x", type=str, default="chrX",
help="Chromosome X name")
parser.add_argument("--chr-y", type=str, default="chrY",
help="Chromosome Y name")
if len(argv) <= 1:
parser.parse_args(["--help"])
sys.exit(0)
parsed_arguments = parser.parse_args(argv[1:])
return parsed_arguments
def main(argv: Optional[List[Text]] = None):
if argv is None:
argv = sys.argv
arguments = __parse_arguments(argv)
contigs = __read_contigs(arguments.contigs)
with open(arguments.ped, 'r') as ped, open(arguments.out, 'w') as out:
out.write(create_header(contigs=contigs) + "\n")
for line in ped:
if line.startswith('#'):
# skip comments / headers
continue
out.write(convert_ped_record(
ped_record=line,
contigs=contigs,
chr_x=arguments.chr_x,
chr_y=arguments.chr_y
) + "\n")
if __name__ == "__main__":
main()