Skip to content

Commit

Permalink
Deduce ksize from SBT for sourmash watch
Browse files Browse the repository at this point in the history
  • Loading branch information
betatim committed Jan 24, 2017
1 parent b4eec82 commit 7fbc2f5
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 6 deletions.
24 changes: 18 additions & 6 deletions sourmash_lib/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,14 +860,15 @@ def watch(args):
parser.add_argument('sbt_name', help='name of SBT to search')
parser.add_argument('inp_file', nargs='?', default='/dev/stdin')
parser.add_argument('-o', '--output', type=argparse.FileType('wt'))
parser.add_argument('-k', '--ksize', type=int, default=DEFAULT_K)
parser.add_argument('--threshold', default=0.05, type=float)
parser.add_argument('--input-is-protein', action='store_true')
sourmash_args.add_moltype_args(parser, default_dna=True)
parser.add_argument('-n', '--num-hashes', type=int,
default=DEFAULT_N,
help='number of hashes to use in each sketch (default: %(default)i)')
parser.add_argument('--name', type=str, default='stdin')
sourmash_args.add_ksize_arg(parser, DEFAULT_K)

args = parser.parse_args(args)

if args.input_is_protein and args.dna:
Expand All @@ -885,16 +886,27 @@ def watch(args):
moltype = 'protein'
is_protein = True

E = sourmash_lib.Estimators(ksize=args.ksize, n=args.num_hashes,
tree = SBT.load(args.sbt_name, leaf_loader=SigLeaf.load)

def get_ksize(tree):
"""Walk nodes in `tree` to find out ksize"""
for node in tree.nodes:
node = node.do_load()
if isinstance(node, sourmash_lib.sbtmh.SigLeaf):
return node.data.estimator.ksize

# deduce ksize from the SBT we are loading
ksize = args.ksize
if ksize is None:
ksize = get_ksize(tree)

E = sourmash_lib.Estimators(ksize=ksize, n=args.num_hashes,
is_protein=is_protein)
streamsig = sig.SourmashSignature('', E, filename='stdin',
name=args.name)

notify('Computing signature for k={}, {} from stdin',
args.ksize, moltype)


tree = SBT.load(args.sbt_name, leaf_loader=SigLeaf.load)
ksize, moltype)

def do_search():
search_fn = SearchMinHashesFindBest().search
Expand Down
24 changes: 24 additions & 0 deletions sourmash_lib/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,30 @@ def test_watch():
assert 'FOUND: genome-s10.fa.gz, at 1.000' in err


def test_watch_deduce_ksize():
with utils.TempDirectory() as location:
testdata0 = utils.get_test_data('genome-s10.fa.gz')
utils.runscript('sourmash',
['compute', testdata0, '-k', '31', '-o', '1.sig'],
in_directory=location)

args = ['sbt_index', '--dna', '-k', '31', 'zzz', '1.sig']
status, out, err = utils.runscript('sourmash', args,
in_directory=location)

cmd = """
gunzip -c {} | {}/sourmash watch --dna zzz
""".format(testdata0, utils.scriptpath())
status, out, err = utils.run_shell_cmd(cmd, in_directory=location)

print(out)
print(err)
assert 'Computing signature for k=31' in err
assert 'genome-s10.fa.gz, at 1.000' in err


def test_watch_coverage():
with utils.TempDirectory() as location:
testdata0 = utils.get_test_data('genome-s10.fa.gz')
Expand Down

0 comments on commit 7fbc2f5

Please sign in to comment.