Merge pull request #833 from online-judge-tools/update-hack-feature

Make --hack option of generate-input subcommand more user-friendly
online-judge-tools · Oct 22, 2020 · 1dad109 · 1dad109
2 parents 35e8751 + 1450f98
commit 1dad109
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 10 deletions.
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
@@ -11,6 +11,8 @@ jobs:
 
     - name: Set up Python
       uses: actions/setup-python@v1
+      with:
+        python-version: 3.8
 
     - name: Install dependencies
       run: pip3 install .[dev]

diff --git a/onlinejudge_command/main.py b/onlinejudge_command/main.py
@@ -63,6 +63,8 @@ def get_parser() -> argparse.ArgumentParser:
   %%                    '%' itself
 
 tips:
+  This subcommand doesn't have the feature to download all test cases for all problems in a contest at once. If you want to do this, please use `oj-prepare` command at https://github.com/online-judge-tools/template-generator instead.
+
   You can do similar things with shell and oj-api command. see https://github.com/online-judge-tools/api-client
     e.g. $ oj-api get-problem https://atcoder.jp/contests/agc001/tasks/agc001_a | jq -cr '.result.tests | to_entries[] | [{path: "test/sample-\\(.key).in", data: .value.input}, {path: "test/sample-\\(.key).out", data: .value.output}][] | {path, data: @sh "\\(.data)"} | "mkdir -p test; echo -n \\(.data) > \\(.path)"' | sh
 ''')
@@ -83,6 +85,10 @@ def get_parser() -> argparse.ArgumentParser:
   yukicoder
   HackerRank
   Toph
+
+tips:
+  You can do similar things with shell and oj-api command. see https://github.com/online-judge-tools/api-client
+    e.g. $ USERNAME=foo PASSWORD=bar oj-api login-service https://atcoder.jp/
 ''')
     subparser.add_argument('url')
     subparser.add_argument('-u', '--username')
@@ -98,6 +104,12 @@ def get_parser() -> argparse.ArgumentParser:
   yukicoder
   HackerRank
   Toph (Problem Archive)
+
+tips:
+  This subcommand has the feature to guess the problem to submit to. To guess the problem, run `oj download https://...` in the same directory without `--directory` option before using `oj submit ...`.
+
+  you can do similar things with shell and oj-api command. see https://github.com/online-judge-tools/api-client
+    e.g. $ oj-api submit-code --file main.cpp --language $(oj-api guess-language-id --file main.cpp https://atcoder.jp/contests/agc001/tasks/agc001_a | jq -r .result.id) https://atcoder.jp/contests/agc001/tasks/agc001_a
 ''')
     subparser.add_argument('url', nargs='?', help='the URL of the problem to submit. if not given, guessed from history of download command.')
     subparser.add_argument('file', type=pathlib.Path)
@@ -122,6 +134,8 @@ def get_parser() -> argparse.ArgumentParser:
   (both %s and %e are required.)
 
 tips:
+  There is a feature to use special judges. See https://online-judge-tools.readthedocs.io/en/master/introduction.en.html#test-for-problems-with-special-judge for details.
+
   You can do similar things with shell
     e.g. $ for f in test/*.in ; do echo $f ; ./a.out < $f | diff - ${f%.in}.out ; done
 ''')
@@ -145,7 +159,7 @@ def get_parser() -> argparse.ArgumentParser:
     subparser.add_argument('--no-ignore-backup', action='store_false', dest='ignore_backup')
     subparser.add_argument('--ignore-backup', action='store_true', help='ignore backup files and hidden files (i.e. files like "*~", "\\#*\\#" and ".*") (default)')
     subparser.add_argument('--log-file', type=pathlib.Path, help=argparse.SUPPRESS)
-    subparser.add_argument('--judge-command', dest='judge', default=None, help='specify judge command instead of default diff judge. See https://online-judge-tools.readthedocs.io/en/master/introduction.en.html#test-for-special-forms-of-problem for details')
+    subparser.add_argument('--judge-command', dest='judge', default=None, help='specify judge command instead of default diff judge. The given command (e.g. `./judge`) will be called as `$ ./judge input.txt actual-output.txt expected-output.txt` and should return the result with the exit code of its `main` function.')
     subparser.add_argument('test', nargs='*', type=pathlib.Path, help='paths of test cases. (if empty: globbed from --format)')
 
     # generate output
@@ -176,6 +190,11 @@ def get_parser() -> argparse.ArgumentParser:
   (both %d and %e are required.)
 
 tips:
+  There is a command to automatically generate a input generator, `oj-template` command. See https://github.com/online-judge-tools/template-generator .
+
+  This subcommand has also the feature to find a hack case.
+    e.g. for a target program `a.out`, a correct (but possibly slow) program `naive`, and a random input-case generator `generate.py`, run $ oj g/i --hack-actual ./a.out --hack-expected ./naive 'python3 generate.py'
+
   You can do similar things with shell
     e.g. $ for i in `seq 100` ; do python3 generate.py > test/random-$i.in ; done
 ''')
@@ -186,7 +205,8 @@ def get_parser() -> argparse.ArgumentParser:
     subparser.add_argument('--width', type=int, default=3, help='specify the width of indices of cases. (default: 3)')
     subparser.add_argument('--name', help='specify the base name of cases. (default: "random")')
     subparser.add_argument('-c', '--command', help='specify your solution to generate output')
-    subparser.add_argument('--hack', help='specify your solution to be compared the reference solution given by --command')
+    subparser.add_argument('--hack-expected', dest='command', help='alias of --command')
+    subparser.add_argument('--hack', '--hack-actual', dest='hack', help='specify your wrong solution to be compared with the reference solution given by --hack-expected')
     subparser.add_argument('generator', type=str, help='your program to generate test cases')
     subparser.add_argument('count', nargs='?', type=int, help='the number of cases to generate (default: 100)')
 

diff --git a/onlinejudge_command/subcommand/generate_input.py b/onlinejudge_command/subcommand/generate_input.py
@@ -1,6 +1,7 @@
 import argparse
 import concurrent.futures
 import contextlib
+import hashlib
 import itertools
 import os
 import pathlib
@@ -38,7 +39,7 @@ def submit(f, *args, **kwargs):
 
 def write_result(input_data: bytes, output_data: Optional[bytes], *, input_path: pathlib.Path, output_path: pathlib.Path, print_data: bool, lock: Optional[threading.Lock] = None) -> None:
     # acquire lock to print logs properly, if in parallel
-    nullcontext = contextlib.ExitStack()
+    nullcontext = contextlib.ExitStack()  # TODO: use contextlib.nullcontext after Python 3.7
     with lock or nullcontext:
 
         if not input_path.parent.is_dir():
@@ -74,7 +75,31 @@ def check_status(info: Dict[str, Any], proc: subprocess.Popen, *, submit: Callab
     return True
 
 
-def generate_input_single_case(generator: str, *, input_path: pathlib.Path, output_path: pathlib.Path, command: Optional[str], tle: Optional[float], name: str, lock: Optional[threading.Lock] = None) -> None:
+def check_randomness_of_generator(input_data: bytes, *, name: str, lock: Optional[threading.Lock], generated_input_hashes: Dict[bytes, str]) -> Optional[str]:
+    """check_randomness_of_generator() checks the generated inputs. This adds some overheads but is needed for foolproof. Many users forget to initialize their library and use fixed seeds.
+
+    :returns: a previous name of the input when it was already once generated. None if it's a new input.
+    """
+
+    # To prevent consuming unlimited memories, do nothing if the user's generator is properly implemented.
+    limit = 1000
+    if len(generated_input_hashes) >= limit:
+        return None
+
+    input_digest = hashlib.sha1(input_data).digest()
+    nullcontext = contextlib.ExitStack()  # TODO: use contextlib.nullcontext after Python 3.7
+    with lock or nullcontext:
+        if len(generated_input_hashes) < limit:
+            if input_digest in generated_input_hashes:
+                return generated_input_hashes[input_digest]
+            else:
+                generated_input_hashes[input_digest] = name
+                if len(generated_input_hashes) == limit:
+                    logger.info('Conflict checking of generated inputs is disabled now because it seems the given input generator has enough randomness.')  # This prints a log line but it's safe because here is in a lock.
+    return None
+
+
+def generate_input_single_case(generator: str, *, input_path: pathlib.Path, output_path: pathlib.Path, command: Optional[str], tle: Optional[float], name: str, lock: Optional[threading.Lock] = None, generated_input_hashes: Dict[bytes, str]) -> None:
     with BufferedExecutor(lock) as submit:
 
         # print the header
@@ -88,6 +113,11 @@ def generate_input_single_case(generator: str, *, input_path: pathlib.Path, outp
         if not check_status(info, proc, submit=submit):
             return
 
+        # check the randomness of generator
+        conflicted_name = check_randomness_of_generator(input_data, name=name, lock=lock, generated_input_hashes=generated_input_hashes)
+        if conflicted_name is not None:
+            submit(logger.warning, 'The same input is already generated at %s. Please use a random input generator.', conflicted_name)
+
         # generate output
         if command is None:
             output_data: Optional[bytes] = None
@@ -111,7 +141,7 @@ def simple_match(a: str, b: str) -> bool:
     return False
 
 
-def try_hack_once(generator: str, command: str, hack: str, *, tle: Optional[float], attempt: int, lock: Optional[threading.Lock] = None) -> Optional[Tuple[bytes, bytes]]:
+def try_hack_once(generator: str, command: str, hack: str, *, tle: Optional[float], attempt: int, lock: Optional[threading.Lock] = None, generated_input_hashes: Dict[bytes, str]) -> Optional[Tuple[bytes, bytes]]:
     with BufferedExecutor(lock) as submit:
 
         # print the header
@@ -126,6 +156,14 @@ def try_hack_once(generator: str, command: str, hack: str, *, tle: Optional[floa
             return None
         assert input_data is not None
 
+        # check the randomness of generator
+        name = '{}-th attempt'
+        conflicted_name = check_randomness_of_generator(input_data, name=name, lock=lock, generated_input_hashes=generated_input_hashes)
+        if conflicted_name is not None:
+            submit(logger.warning, 'The same input is already generated at %s. Please use a random input generator.', conflicted_name)
+            submit(logger.info, utils.NO_HEADER + 'input:')
+            submit(logger.info, utils.NO_HEADER + '%s', pretty_printers.make_pretty_large_file_content(input_data, limit=40, head=20, tail=10, bold=True))
+
         # generate output
         submit(logger.info, 'generate output...')
         info, proc = utils.exec_command(command, input=input_data, timeout=tle)
@@ -186,16 +224,17 @@ def iterate_path():
                 yield (name, input_path, output_path)
 
     # generate cases
+    generated_input_hashes: Dict[bytes, str] = {}
     if args.jobs is None:
         for name, input_path, output_path in itertools.islice(iterate_path(), args.count):
             if not args.hack:
                 # generate serially
-                generate_input_single_case(args.generator, input_path=input_path, output_path=output_path, command=args.command, tle=args.tle, name=name)
+                generate_input_single_case(args.generator, input_path=input_path, output_path=output_path, command=args.command, tle=args.tle, name=name, generated_input_hashes=generated_input_hashes)
 
             else:
                 # hack serially
                 for attempt in itertools.count(1):
-                    data = try_hack_once(args.generator, command=args.command, hack=args.hack, tle=args.tle, attempt=attempt)
+                    data = try_hack_once(args.generator, command=args.command, hack=args.hack, tle=args.tle, attempt=attempt, generated_input_hashes=generated_input_hashes)
                     if data is not None:
                         write_result(*data, input_path=input_path, output_path=output_path, print_data=False)
                         break
@@ -207,7 +246,7 @@ def iterate_path():
             if not args.hack:
                 # generate concurrently
                 for name, input_path, output_path in itertools.islice(iterate_path(), args.count):
-                    futures += [executor.submit(generate_input_single_case, args.generator, input_path=input_path, output_path=output_path, command=args.command, tle=args.tle, name=name, lock=lock)]
+                    futures += [executor.submit(generate_input_single_case, args.generator, input_path=input_path, output_path=output_path, command=args.command, tle=args.tle, name=name, lock=lock, generated_input_hashes=generated_input_hashes)]
                 for future in futures:
                     future.result()
 
@@ -216,7 +255,7 @@ def iterate_path():
                 attempt = 0
                 for _ in range(args.jobs):
                     attempt += 1
-                    futures += [executor.submit(try_hack_once, args.generator, command=args.command, hack=args.hack, tle=args.tle, attempt=attempt, lock=lock)]
+                    futures += [executor.submit(try_hack_once, args.generator, command=args.command, hack=args.hack, tle=args.tle, attempt=attempt, lock=lock, generated_input_hashes=generated_input_hashes)]
                 for _, input_path, output_path in itertools.islice(iterate_path(), args.count):
                     data = None
                     while data is None:
@@ -226,7 +265,7 @@ def iterate_path():
                                 continue
                             data = futures[i].result()
                             attempt += 1
-                            futures[i] = executor.submit(try_hack_once, args.generator, command=args.command, hack=args.hack, tle=args.tle, attempt=attempt, lock=lock)
+                            futures[i] = executor.submit(try_hack_once, args.generator, command=args.command, hack=args.hack, tle=args.tle, attempt=attempt, lock=lock, generated_input_hashes=generated_input_hashes)
                             if data is not None:
                                 break
                     write_result(*data, input_path=input_path, output_path=output_path, print_data=False, lock=lock)