Skip to content

Commit

Permalink
fix: add downloading nltk punkt for tokenization (#140)
Browse files Browse the repository at this point in the history
  • Loading branch information
yurakuratov authored and seliverstov committed Mar 23, 2018
1 parent ff8468f commit 3618ae6
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions deeppavlov/models/preprocessors/squad_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ def __init__(self, context_limit, question_limit, char_limit, *args, **kwargs):
self.context_limit = context_limit
self.question_limit = question_limit
self.char_limit = char_limit
try:
import nltk
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')

def __call__(self, contexts_raw, questions_raw, **kwargs):
contexts = []
Expand Down

0 comments on commit 3618ae6

Please sign in to comment.