From 3618ae60e3eee664a70f469da8ba1accfe529148 Mon Sep 17 00:00:00 2001 From: yurakuratov Date: Fri, 23 Mar 2018 18:05:56 +0300 Subject: [PATCH] fix: add downloading nltk punkt for tokenization (#140) --- deeppavlov/models/preprocessors/squad_preprocessor.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/deeppavlov/models/preprocessors/squad_preprocessor.py b/deeppavlov/models/preprocessors/squad_preprocessor.py index 4fdb0631d6..888a6070cf 100644 --- a/deeppavlov/models/preprocessors/squad_preprocessor.py +++ b/deeppavlov/models/preprocessors/squad_preprocessor.py @@ -39,6 +39,11 @@ def __init__(self, context_limit, question_limit, char_limit, *args, **kwargs): self.context_limit = context_limit self.question_limit = question_limit self.char_limit = char_limit + try: + import nltk + nltk.data.find('tokenizers/punkt') + except LookupError: + nltk.download('punkt') def __call__(self, contexts_raw, questions_raw, **kwargs): contexts = []