diff --git a/.circleci/config.yml b/.circleci/config.yml index 922c53db..be4da9f3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -25,6 +25,9 @@ jobs: name: Run Pytorch scripts command: ./scripts/run_pytorch.sh no_output_timeout: 1h + - store_test_results: + path: test-results + workflows: version: 2 diff --git a/scripts/install.sh b/scripts/install.sh index 896f66bf..1e05d87f 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -6,6 +6,8 @@ conda activate base conda install -y pytorch torchvision torchaudio -c pytorch-nightly +conda install -y pytest + # Dependencies required to load models conda install -y regex pillow tqdm boto3 requests numpy\ h5py scipy matplotlib unidecode ipython pyyaml diff --git a/scripts/run_pytorch.sh b/scripts/run_pytorch.sh index 541df731..a4b9132a 100755 --- a/scripts/run_pytorch.sh +++ b/scripts/run_pytorch.sh @@ -2,38 +2,20 @@ . ~/miniconda3/etc/profile.d/conda.sh conda activate base -ALL_FILE=$(find *.md ! -name README.md) -TEMP_PY="temp.py" -CUDAS="nvidia" +ALL_FILES=$(find *.md ! -name README.md) +PYTHON_CODE_DIR="python_code" -declare -i error_code=0 +mkdir $PYTHON_CODE_DIR -for f in $ALL_FILE +# Quick rundown: for each file we extract the python code that's within +# the ``` markers and we put that code in a corresponding .py file in $PYTHON_CODE_DIR +# Then we execute each of these python files with pytest in test_run_python_code.py +for f in $ALL_FILES do - echo "Running pytorch example in $f" - # FIXME: NVIDIA models checkoints are on cuda - if [[ $f = $CUDAS* ]]; then - echo "...skipped due to cuda checkpoints." - elif [[ $f = "pytorch_fairseq_translation"* ]]; then - echo "...temporarily disabled" - # FIXME: torch.nn.modules.module.ModuleAttributeError: 'autoShape' object has no attribute 'fuse' - elif [[ $f = "ultralytics_yolov5"* ]]; then - echo "...temporarily disabled" - elif [[ $f = "huggingface_pytorch-transformers"* ]]; then - echo "...temporarily disabled" - # FIXME: TypeError: compose() got an unexpected keyword argument 'strict' - elif [[ $f = "pytorch_fairseq_roberta"* ]]; then - echo "...temporarily disabled" - # FIXME: rate limiting - else - sed -n '/^```python/,/^```/ p' < $f | sed '/^```/ d' > $TEMP_PY - python $TEMP_PY - error_code+=$? - - if [ -f "$TEMP_PY" ]; then - rm $TEMP_PY - fi - fi + f_no_ext=${f%.md} # remove .md extension + out_py=$PYTHON_CODE_DIR/$f_no_ext.py + echo "Extracting Python code from $f into $out_py" + sed -n '/^```python/,/^```/ p' < $f | sed '/^```/ d' > $out_py done -exit $error_code +pytest --junitxml=test-results/junit.xml test_run_python_code.py -vv diff --git a/test_run_python_code.py b/test_run_python_code.py new file mode 100644 index 00000000..f44a2856 --- /dev/null +++ b/test_run_python_code.py @@ -0,0 +1,41 @@ +from subprocess import check_output, STDOUT, CalledProcessError +import sys +import pytest +import glob + + +PYTHON_CODE_DIR = "python_code" +ALL_FILES = glob.glob(PYTHON_CODE_DIR + "/*.py") + + +@pytest.mark.parametrize('file_path', ALL_FILES) +def test_run_file(file_path): + if 'nvidia' in file_path: + # FIXME: NVIDIA models checkoints are on cuda + pytest.skip("temporarily disabled") + if 'pytorch_fairseq_translation' in file_path: + pytest.skip("temporarily disabled") + if 'ultralytics_yolov5' in file_path: + # FIXME torch.nn.modules.module.ModuleAttributeError: 'autoShape' object has no attribute 'fuse + pytest.skip("temporarily disabled") + if 'huggingface_pytorch-transformers' in file_path: + # FIXME torch.nn.modules.module.ModuleAttributeError: 'autoShape' object has no attribute 'fuse + pytest.skip("temporarily disabled") + if 'pytorch_fairseq_roberta' in file_path: + pytest.skip("temporarily disabled") + + # We just run the python files in a separate sub-process. We really want a + # subprocess here because otherwise we might run into package versions + # issues: imagine script A that needs torchvivion 0.9 and script B that + # needs torchvision 0.10. If script A is run prior to script B in the same + # process, script B will still be run with torchvision 0.9 because the only + # "import torchvision" statement that counts is the first one, and even + # torchub sys.path shenanigans can do nothing about this. By creating + # subprocesses we're sure that all file executions are fully independent. + try: + # This is inspired (and heavily simplified) from + # https://github.com/cloudpipe/cloudpickle/blob/343da119685f622da2d1658ef7b3e2516a01817f/tests/testutils.py#L177 + out = check_output([sys.executable, file_path], stderr=STDOUT) + print(out.decode()) + except CalledProcessError as e: + raise RuntimeError(f"Script {file_path} errored with output:\n{e.output.decode()}")