Skip to content

Commit

Permalink
review changes
Browse files Browse the repository at this point in the history
  • Loading branch information
adraismawur committed Oct 1, 2024
1 parent 68ef6ce commit 8b2f21e
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 13 deletions.
22 changes: 11 additions & 11 deletions big_scape/comparison/extend.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ def extend_greedy(pair: RecordPair) -> None:

def extend_simple_match(pair: RecordPair, match, gap):
"""Performs extension by first creating a simple match matrix, then
performing a match/gap extentsion similar to legaxy expansion
performing a match/gap extentsion similar to legacy expansion
This method expects LCS to have been performed on the pair, and will
do all four directions at once
Expand All @@ -612,11 +612,11 @@ def extend_simple_match(pair: RecordPair, match, gap):
b_domains = []

# so we'll do a loop through cds and through domains to keep track of everything
for cds_idx, cds in enumerate(pair.record_a.get_cds()):
for cds_idx, cds in enumerate(pair.record_a.get_cds_with_domains()):
for domain in cds.hsps:
a_domains.append((domain, cds_idx))

for cds_idx, cds in enumerate(pair.record_b.get_cds()):
for cds_idx, cds in enumerate(pair.record_b.get_cds_with_domains()):
for domain in cds.hsps:
b_domains.append((domain, cds_idx))

Expand All @@ -632,7 +632,7 @@ def extend_simple_match(pair: RecordPair, match, gap):
# a forward
score = 0
max_score = 0
for i in range(pair.comparable_region.lcs_a_stop + 1, len(a_domains)):
for i in range(pair.comparable_region.lcs_domain_a_stop, len(a_domains)):
domain = a_domains[i][0]

if domain not in common_domains:
Expand All @@ -643,13 +643,13 @@ def extend_simple_match(pair: RecordPair, match, gap):
if score > max_score:
cds_idx = a_domains[i][1]
max_score = score
pair.comparable_region.a_stop = cds_idx
pair.comparable_region.domain_a_stop = i
pair.comparable_region.a_stop = cds_idx + 1
pair.comparable_region.domain_a_stop = i + 1

# a reverse
score = 0
max_score = 0
for i in range(pair.comparable_region.lcs_a_start - 1, -1, -1):
for i in range(pair.comparable_region.lcs_domain_a_start - 1, -1, -1):
domain = a_domains[i][0]

if domain not in common_domains:
Expand All @@ -666,7 +666,7 @@ def extend_simple_match(pair: RecordPair, match, gap):
# b forward
score = 0
max_score = 0
for i in range(pair.comparable_region.lcs_b_stop + 1, len(b_domains)):
for i in range(pair.comparable_region.lcs_domain_b_stop, len(b_domains)):
domain = b_domains[i][0]

if domain not in common_domains:
Expand All @@ -677,13 +677,13 @@ def extend_simple_match(pair: RecordPair, match, gap):
if score > max_score:
cds_idx = b_domains[i][1]
max_score = score
pair.comparable_region.b_stop = cds_idx
pair.comparable_region.domain_b_stop = i
pair.comparable_region.b_stop = cds_idx + 1
pair.comparable_region.domain_b_stop = i + 1

# b reverse
score = 0
max_score = 0
for i in range(pair.comparable_region.lcs_b_start - 1, -1, -1):
for i in range(pair.comparable_region.lcs_domain_b_start - 1, -1, -1):
domain = b_domains[i][0]

if domain not in common_domains:
Expand Down
1 change: 1 addition & 0 deletions big_scape/enums/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class ALIGNMENT_MODE(Enum):
class EXTEND_STRATEGY(Enum):
LEGACY = "legacy"
GREEDY = "greedy"
SIMPLE_MATCH = "simple_match"


class COMPARISON_MODE(Enum):
Expand Down
45 changes: 43 additions & 2 deletions test/comparison/test_extend.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,13 +907,13 @@ def test_match_extend(self):

# emulate LCS
pair.comparable_region = bs_comp.ComparableRegion(
9, 10, 6, 7, 9, 10, 6, 7, False
9, 11, 6, 8, 9, 11, 6, 8, False
)

bs_comp.extend.extend_simple_match(pair, 5, -2)

expected_comparable_region = bs_comp.ComparableRegion(
6, 10, 5, 7, 6, 10, 5, 7, False
6, 11, 5, 8, 6, 11, 5, 8, False
)

self.assertEqual(pair.comparable_region, expected_comparable_region)
Expand All @@ -933,3 +933,44 @@ def test_match_extend(self):
pair.comparable_region.domain_b_stop,
expected_comparable_region.domain_b_stop,
)

def test_expand_simple_match_multi_domain(self):
"""Tests glocal expand with multi domain cdss"""
# brackets indicate a cds with multiple domains
#
# A: [XX]BX[A BC]DEX XXXX
# B: XXXXXXXXXX XX A[BC]EDX[XXXX]
#
a_cds, b_cds = generate_mock_cds_lists(
10, 17, [3, 3, 3, 4, 5], [12, 13, 13, 15, 14], False
)
a_cds[0].hsps.append(a_cds[0].hsps[0])
a_cds[1].hsps = [a_cds[3].hsps[1]]
b_cds[-1].hsps.extend([b_cds[-1].hsps[0]] * 3)
record_a = generate_mock_region(a_cds)
record_b = generate_mock_region(b_cds)
pair = big_scape.comparison.record_pair.RecordPair(record_a, record_b)
pair.comparable_region = bs_comp.ComparableRegion(
3, 4, 12, 14, 4, 7, 12, 15, False
)
bs_comp.extend.extend_simple_match(pair, 5, -2)
expected_comparable_region = bs_comp.ComparableRegion(
1, 6, 12, 16, 2, 9, 12, 17, False
)
self.assertEqual(pair.comparable_region, expected_comparable_region)
self.assertEqual(
pair.comparable_region.domain_a_start,
expected_comparable_region.domain_a_start,
)
self.assertEqual(
pair.comparable_region.domain_b_start,
expected_comparable_region.domain_b_start,
)
self.assertEqual(
pair.comparable_region.domain_a_stop,
expected_comparable_region.domain_a_stop,
)
self.assertEqual(
pair.comparable_region.domain_b_stop,
expected_comparable_region.domain_b_stop,
)

0 comments on commit 8b2f21e

Please sign in to comment.