Skip to content

Commit

Permalink
Grouping annotations by entity GPI parent ID; issue #83
Browse files Browse the repository at this point in the history
  • Loading branch information
dustine32 committed Jul 21, 2020
1 parent 02ef984 commit bd9c7a7
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
2 changes: 1 addition & 1 deletion bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology):
unzip(gpad_path, unzipped)
gpad_path = unzipped
# NOTE: Validation on GPAD not included here since it's currently baked into produce() above.
extractor = AssocExtractor(gpad_path)
extractor = AssocExtractor(gpad_path, gpi_path)
assocs_by_gene = extractor.group_assocs()

absolute_target = os.path.abspath(target)
Expand Down
23 changes: 19 additions & 4 deletions ontobio/rdfgen/gocamgen/gocam_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def parse_gpi(gpi_file):


class AssocExtractor:
def __init__(self, gpad_file, parser_config: AssocParserConfig = None):
def __init__(self, gpad_file, gpi_file, parser_config: AssocParserConfig = None):
if parser_config:
gpad_parser = GpadParser(config=parser_config)
else:
Expand All @@ -173,13 +173,15 @@ def __init__(self, gpad_file, parser_config: AssocParserConfig = None):
length=lines) as associations:
self.assocs = self.extract_properties_from_assocs(associations)

self.entity_parents = self.parse_gpi_parents(gpi_file)

def group_assocs(self):
assocs_by_gene = {}
for a in self.assocs:
# validation function
# if not self.assoc_filter.validate_line(a):
# continue
subject_id = a["subject"]["id"]
# If entity has parent, assign to parent entity model
if subject_id in self.entity_parents:
subject_id = self.entity_parents[subject_id]
if subject_id in assocs_by_gene:
assocs_by_gene[subject_id].append(a)
else:
Expand All @@ -193,6 +195,19 @@ def extract_properties_from_assocs(assocs):
new_assoc_list.append(extract_properties(a))
return new_assoc_list

@staticmethod
def parse_gpi_parents(gpi_file):
if gpi_file is None:
return None
parser = GpiParser()
entity_parents = {}
entities = parser.parse(gpi_file)
for entity in entities:
entity_id = entity['id']
if len(entity['parents']) > 0:
entity_parents[entity_id] = entity['parents'][0] # There may only be one
return entity_parents


def unzip(filepath):
input_file = gzip.GzipFile(filepath, "rb")
Expand Down

1 comment on commit bd9c7a7

@dustine32
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.