#318: modify the type "TestCase"

online-judge-tools · Mar 1, 2019 · 684cb14 · 684cb14
1 parent 689208b
commit 684cb14
Show file tree

Hide file tree

Showing 24 changed files with 384 additions and 309 deletions.
diff --git a/onlinejudge/_implementation/command/download.py b/onlinejudge/_implementation/command/download.py
@@ -11,6 +11,7 @@
 
 import onlinejudge
 import onlinejudge._implementation.download_history
+import onlinejudge._implementation.format_utils as format_utils
 import onlinejudge._implementation.logging as log
 import onlinejudge._implementation.utils as utils
 import onlinejudge.type
@@ -19,12 +20,12 @@
     import argparse
 
 
-def convert_sample_to_dict(sample: onlinejudge.type.TestCase) -> dict:
-    data = {}
-    data["input"] = sample.input.data
-    data["output"] = sample.output.data
-    if sample.input.name == sample.output.name:
-        data["name"] = sample.input.name
+def convert_sample_to_dict(sample: onlinejudge.type.TestCase) -> Dict[str, str]:
+    data = {}  # type: Dict[str, str]
+    data["name"] = sample.name
+    data["input"] = sample.input_data.decode()
+    if sample.output_data is not None:
+        data["output"] = sample.output_data.decode()
     return data
 
 
@@ -37,13 +38,7 @@ def download(args: 'argparse.Namespace') -> None:
     if args.directory is None:
         args.directory = pathlib.Path('test')
     if args.format is None:
-        if args.system:
-            if problem.get_service().get_name() == 'yukicoder':
-                args.format = '%b.%e'
-            else:
-                args.format = '%i.%e'
-        else:
-            args.format = 'sample-%i.%e'
+        args.format = '%b.%e'
 
     # get samples from the server
     with utils.with_cookiejar(utils.new_default_session(), path=args.cookie) as sess:
@@ -65,17 +60,18 @@ def download(args: 'argparse.Namespace') -> None:
     for i, sample in enumerate(samples):
         log.emit('')
         log.info('sample %d', i)
-        for kind in ['input', 'output']:
-            ext = kind[:-3]
-            data = getattr(sample, kind).data
-            name = getattr(sample, kind).name
+        for ext in ['in', 'out']:
+            data = getattr(sample, ext + 'put_data')
+            if data is None:
+                continue
+            name = sample.name
             table = {}
             table['i'] = str(i + 1)
             table['e'] = ext
             table['n'] = name
             table['b'] = os.path.basename(name)
             table['d'] = os.path.dirname(name)
-            path = args.directory / utils.percentformat(args.format, table)  # type: pathlib.Path
+            path = args.directory / format_utils.percentformat(args.format, table)  # type: pathlib.Path
             log.status('%sput: %s', ext, name)
             if not args.silent:
                 log.emit(utils.snip_large_file_content(data.rstrip(), limit=40, head=20, tail=10, bold=True))
@@ -87,7 +83,7 @@ def download(args: 'argparse.Namespace') -> None:
                     log.warning('skipped')
                     continue
             path.parent.mkdir(parents=True, exist_ok=True)
-            with path.open('w') as fh:
+            with path.open('wb') as fh:
                 fh.write(data)
             log.success('saved to: %s', path)
 

diff --git a/onlinejudge/_implementation/command/split_input.py b/onlinejudge/_implementation/command/split_input.py
@@ -6,8 +6,8 @@
 from typing.io import *
 
 import onlinejudge
+import onlinejudge._implementation.format_utils as format_utils
 import onlinejudge._implementation.logging as log
-import onlinejudge._implementation.utils as utils
 
 if TYPE_CHECKING:
     import argparse
@@ -47,7 +47,7 @@ def split_input(args: 'argparse.Namespace') -> None:
             time.sleep(args.time)
             if non_block_read(proc.stdout):  # if output exists
                 index += 1
-                path = utils.percentformat(args.output, {'i': str(index)})
+                path = format_utils.percentformat(args.output, {'i': str(index)})
                 log.info('case found: %d', index)
                 if args.header:
                     if args.header == args.header.strip():

diff --git a/onlinejudge/_implementation/format_utils.py b/onlinejudge/_implementation/format_utils.py
@@ -4,18 +4,55 @@
 import pathlib
 import re
 import sys
-from typing import Dict, List, Match, Optional
+from typing import Dict, Generator, List, Match, Optional, Set
 
 import onlinejudge
 import onlinejudge._implementation.logging as log
 import onlinejudge._implementation.utils as utils
 
 
+def percentsplit(s: str) -> Generator[str, None, None]:
+    for m in re.finditer('[^%]|%(.)', s):
+        yield m.group(0)
+
+
+def percentformat(s: str, table: Dict[str, str]) -> str:
+    assert '%' not in table or table['%'] == '%'
+    table['%'] = '%'
+    result = ''
+    for c in percentsplit(s):
+        if c.startswith('%'):
+            result += table[c[1]]
+        else:
+            result += c
+    return result
+
+
+def percentparse(s: str, format: str, table: Dict[str, str]) -> Optional[Dict[str, str]]:
+    table = {key: '(?P<{}>{})'.format(key, value) for key, value in table.items()}
+    used = set()  # type: Set[str]
+    pattern = ''
+    for token in percentsplit(re.escape(format).replace('\\%', '%')):
+        if token.startswith('%'):
+            c = token[1]
+            if c not in used:
+                pattern += table[c]
+                used.add(c)
+            else:
+                pattern += r'(?P={})'.format(c)
+        else:
+            pattern += token
+    m = re.match(pattern, s)
+    if not m:
+        return None
+    return m.groupdict()
+
+
 def glob_with_format(directory: pathlib.Path, format: str) -> List[pathlib.Path]:
     table = {}
     table['s'] = '*'
     table['e'] = '*'
-    pattern = (glob.escape(str(directory)) + '/' + utils.percentformat(glob.escape(format).replace('\\%', '%'), table))
+    pattern = (glob.escape(str(directory)) + '/' + percentformat(glob.escape(format).replace('\\%', '%'), table))
     paths = list(map(pathlib.Path, glob.glob(pattern)))
     for path in paths:
         log.debug('testcase globbed: %s', path)
@@ -26,15 +63,15 @@ def match_with_format(directory: pathlib.Path, format: str, path: pathlib.Path)
     table = {}
     table['s'] = '(?P<name>.+)'
     table['e'] = '(?P<ext>in|out)'
-    pattern = re.compile('^' + re.escape(str(directory.resolve())) + '/' + utils.percentformat(re.escape(format).replace('\\%', '%'), table) + '$')
+    pattern = re.compile('^' + re.escape(str(directory.resolve())) + '/' + percentformat(re.escape(format).replace('\\%', '%'), table) + '$')
     return pattern.match(str(path.resolve()))
 
 
 def path_from_format(directory: pathlib.Path, format: str, name: str, ext: str) -> pathlib.Path:
     table = {}
     table['s'] = name
     table['e'] = ext
-    return directory / utils.percentformat(format, table)
+    return directory / percentformat(format, table)
 
 
 def is_backup_or_hidden_file(path: pathlib.Path) -> bool:

diff --git a/onlinejudge/_implementation/testcase_zipper.py b/onlinejudge/_implementation/testcase_zipper.py
@@ -0,0 +1,62 @@
+# Python Version: 3.x
+# -*- coding: utf-8 -*-
+import collections
+import io
+import re
+import zipfile
+from typing import *
+
+import onlinejudge._implementation.format_utils
+import onlinejudge._implementation.logging as log
+from onlinejudge.type import *
+
+
+class SampleZipper(object):
+    def __init__(self):
+        self.data = []
+        self.dangling = None  # Optional[Tuple(str, str)]
+
+    def add(self, s: str, name: str = '') -> None:
+        if self.dangling is None:
+            if re.search('output', name, re.IGNORECASE) or re.search('出力', name):
+                log.warning('strange name for input string: %s', name)
+            self.dangling = (name, s)
+        else:
+            if re.search('input', name, re.IGNORECASE) or re.search('入力', name):
+                if not (re.search('output', name, re.IGNORECASE) or re.search('出力', name)):  # to ignore titles like "Output for Sample Input 1"
+                    log.warning('strange name for output string: %s', name)
+            index = len(self.data)
+            input_name, input_s = self.dangling
+            self.data += [TestCase('sample-{}'.format(index + 1), input_name, input_s.encode(), name, s.encode())]
+            self.dangling = None
+
+    def get(self) -> List[TestCase]:
+        if self.dangling is not None:
+            log.error('dangling sample string: %s', self.dangling[1])
+        return self.data
+
+
+def extract_from_zip(zip_data: bytes, format: str, out: str = 'out') -> List[TestCase]:
+    table = {
+        's': r'[^/]+',
+        'e': r'(in|{})'.format(out),
+    }
+    names = collections.defaultdict(dict)  # type: Dict[str, Dict[str, Tuple[str, bytes]]]
+    with zipfile.ZipFile(io.BytesIO(zip_data)) as fh:
+        for filename in fh.namelist():
+            print(filename)
+            if filename.endswith('/'):
+                continue
+            m = onlinejudge._implementation.format_utils.percentparse(filename, format, table)
+            assert m
+            assert m['e'] not in names[m['s']]
+            names[m['s']][m['e']] = (filename, fh.read(filename))
+    samples = []  # type: List[TestCase]
+    for name in sorted(names.keys()):
+        data = names[name]
+        if 'in' not in data or out not in data:
+            log.error('dangling sample found: %s', str(data))
+            assert False
+        else:
+            samples += [TestCase(name, *data['in'], *data[out])]
+    return samples
diff --git a/onlinejudge/_implementation/utils.py b/onlinejudge/_implementation/utils.py
@@ -1,5 +1,4 @@
 # Python Version: 3.x
-# -*- coding: utf-8 -*-
 import contextlib
 import datetime
 import distutils.version
@@ -23,27 +22,14 @@
 
 import onlinejudge.__about__ as version
 import onlinejudge._implementation.logging as log
-from onlinejudge.type import LabeledString, Problem, Submission, TestCase
+from onlinejudge.type import *
 
 config_dir = pathlib.Path(appdirs.user_config_dir(version.__package_name__))
 data_dir = pathlib.Path(appdirs.user_data_dir(version.__package_name__))
 cache_dir = pathlib.Path(appdirs.user_cache_dir(version.__package_name__))
 html_parser = 'lxml'
 
 
-def percentformat(s: str, table: Dict[str, str]) -> str:
-    assert '%' not in table or table['%'] == '%'
-    table['%'] = '%'
-    result = ''
-    for m in re.finditer('[^%]|%(.)', s):
-        if m.group(1):
-            if m.group(1) in table:
-                result += table[m.group(1)]
-        else:
-            result += m.group(0)
-    return result
-
-
 def describe_status_code(status_code: int) -> str:
     return '{} {}'.format(status_code, http.client.responses[status_code])
 
@@ -84,29 +70,6 @@ def with_cookiejar(session: requests.Session, path: pathlib.Path = default_cooki
     path.chmod(0o600)  # NOTE: to make secure a little bit
 
 
-class SampleZipper(object):
-    def __init__(self):
-        self.data = []
-        self.dangling = None
-
-    def add(self, s: str, name: str = '') -> None:
-        if self.dangling is None:
-            if re.search('output', name, re.IGNORECASE) or re.search('出力', name):
-                log.warning('strange name for input string: %s', name)
-            self.dangling = LabeledString(name, s)
-        else:
-            if re.search('input', name, re.IGNORECASE) or re.search('入力', name):
-                if not (re.search('output', name, re.IGNORECASE) or re.search('出力', name)):  # to ignore titles like "Output for Sample Input 1"
-                    log.warning('strange name for output string: %s', name)
-            self.data += [TestCase(self.dangling, LabeledString(name, s))]
-            self.dangling = None
-
-    def get(self) -> List[TestCase]:
-        if self.dangling is not None:
-            log.error('dangling sample string: %s', self.dangling[1])
-        return self.data
-
-
 class FormSender(object):
     def __init__(self, form: bs4.Tag, url: str):
         assert isinstance(form, bs4.Tag)

diff --git a/onlinejudge/service/anarchygolf.py b/onlinejudge/service/anarchygolf.py
@@ -11,6 +11,7 @@
 import requests
 
 import onlinejudge._implementation.logging as log
+import onlinejudge._implementation.testcase_zipper
 import onlinejudge._implementation.utils as utils
 import onlinejudge.dispatch
 import onlinejudge.type
@@ -44,7 +45,7 @@ def download_sample_cases(self, session: Optional[requests.Session] = None) -> L
         resp = utils.request('GET', self.get_url(), session=session)
         # parse
         soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser)
-        samples = utils.SampleZipper()
+        samples = onlinejudge._implementation.testcase_zipper.SampleZipper()
         for h2 in soup.find_all('h2'):
             it = self._parse_sample_tag(h2)
             if it is not None:

diff --git a/onlinejudge/service/aoj.py b/onlinejudge/service/aoj.py
@@ -22,7 +22,7 @@
 import onlinejudge._implementation.utils as utils
 import onlinejudge.dispatch
 import onlinejudge.type
-from onlinejudge.type import LabeledString, TestCase
+from onlinejudge.type import TestCase
 
 
 @utils.singleton
@@ -55,10 +55,13 @@ def download_sample_cases(self, session: Optional[requests.Session] = None) -> L
         url = 'https://judgedat.u-aizu.ac.jp/testcases/samples/{}'.format(self.problem_id)
         resp = utils.request('GET', url, session=session)
         samples = []  # type: List[TestCase]
-        for sample in json.loads(resp.content.decode(resp.encoding)):
+        for i, sample in enumerate(json.loads(resp.content.decode(resp.encoding))):
             samples += [TestCase(
-                LabeledString(str(sample['serial']), sample['in']),
-                LabeledString(str(sample['serial']), sample['out']),
+                'sample-{}'.format(i + 1),
+                str(sample['serial']),
+                sample['in'].encode(),
+                str(sample['serial']),
+                sample['out'].encode(),
             )]
         return samples
 
@@ -87,8 +90,11 @@ def download_system_cases(self, session: Optional[requests.Session] = None) -> L
                 log.warning("skipped due to the limitation of AOJ API")
                 continue
             testcases += [TestCase(
-                LabeledString(header['name'], testcase['in']),
-                LabeledString(header['name'], testcase['out']),
+                header['name'],
+                header['name'],
+                testcase['in'].encode(),
+                header['name'],
+                testcase['out'].encode(),
             )]
         return testcases
 

diff --git a/onlinejudge/service/atcoder.py b/onlinejudge/service/atcoder.py
@@ -23,6 +23,7 @@
 import requests
 
 import onlinejudge._implementation.logging as log
+import onlinejudge._implementation.testcase_zipper
 import onlinejudge._implementation.utils as utils
 import onlinejudge.dispatch
 import onlinejudge.type
@@ -326,7 +327,7 @@ def download_sample_cases(self, session: Optional[requests.Session] = None) -> L
             return []
         # parse
         soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser)
-        samples = utils.SampleZipper()
+        samples = onlinejudge._implementation.testcase_zipper.SampleZipper()
         lang = None
         for pre, h3 in self._find_sample_tags(soup):
             s = utils.textfile(utils.dos2unix(pre.string.lstrip()))

diff --git a/onlinejudge/service/codeforces.py b/onlinejudge/service/codeforces.py
@@ -15,6 +15,7 @@
 import requests
 
 import onlinejudge._implementation.logging as log
+import onlinejudge._implementation.testcase_zipper
 import onlinejudge._implementation.utils as utils
 import onlinejudge.dispatch
 import onlinejudge.type
@@ -102,7 +103,7 @@ def download_sample_cases(self, session: Optional[requests.Session] = None) -> L
         resp = utils.request('GET', self.get_url(), session=session)
         # parse
         soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser)
-        samples = utils.SampleZipper()
+        samples = onlinejudge._implementation.testcase_zipper.SampleZipper()
         for tag in soup.find_all('div', class_=re.compile('^(in|out)put$')):  # Codeforces writes very nice HTML :)
             log.debug('tag: %s', str(tag))
             assert len(list(tag.children))