From 3618ae60e3eee664a70f469da8ba1accfe529148 Mon Sep 17 00:00:00 2001
From: yurakuratov <yurakuratov@gmail.com>
Date: Fri, 23 Mar 2018 18:05:56 +0300
Subject: [PATCH] fix: add downloading nltk punkt for tokenization (#140)

---
 deeppavlov/models/preprocessors/squad_preprocessor.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/deeppavlov/models/preprocessors/squad_preprocessor.py b/deeppavlov/models/preprocessors/squad_preprocessor.py
index 4fdb0631d6..888a6070cf 100644
--- a/deeppavlov/models/preprocessors/squad_preprocessor.py
+++ b/deeppavlov/models/preprocessors/squad_preprocessor.py
@@ -39,6 +39,11 @@ def __init__(self, context_limit, question_limit, char_limit, *args, **kwargs):
         self.context_limit = context_limit
         self.question_limit = question_limit
         self.char_limit = char_limit
+        try:
+            import nltk
+            nltk.data.find('tokenizers/punkt')
+        except LookupError:
+            nltk.download('punkt')
 
     def __call__(self, contexts_raw, questions_raw, **kwargs):
         contexts = []