From 46127c1ff3b05dd7095c563ccc245a231c744b99 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:09:46 +0000 Subject: [PATCH 01/33] Add lintrunner config --- .flake8 | 1 - .github/workflows/lint.yml | 56 ++++++----- .lintrunner.toml | 118 +++++++++++++++++++++++ docs/Coding_Conventions_and_Standards.md | 47 +++++++-- requirements-dev.txt | 2 - 5 files changed, 190 insertions(+), 34 deletions(-) create mode 100644 .lintrunner.toml diff --git a/.flake8 b/.flake8 index 299ca9aa354c2..4e8996fbf01e1 100644 --- a/.flake8 +++ b/.flake8 @@ -2,7 +2,6 @@ max-line-length = 120 per-file-ignores = __init__.py:F401 -format = [flake8 PEP8 ERROR] %(path)s:%(row)d:%(col)d: %(code)s %(text)s exclude = # ignore the .git directory ./.git, diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 4500beba13e56..de733e2637296 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,23 +12,7 @@ jobs: name: Lint Python runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: flake8 - uses: reviewdog/action-flake8@v3 - with: - github_token: ${{ secrets.github_token }} - # Change reviewdog reporter if you need [github-pr-check, github-check, github-pr-review]. - reporter: github-pr-check - # Change reporter level if you need. - # GitHub Status Check won't become failure with a warning. - level: error - filter_mode: file - - name: pyflakes - uses: reviewdog/action-pyflakes@v1 - with: - github_token: ${{ secrets.github_token }} - reporter: github-pr-check - level: warning + - uses: actions/checkout@v3 - name: misspell # Check spellings as well uses: reviewdog/action-misspell@v1 with: @@ -62,19 +46,41 @@ jobs: glob_pattern: "**/*.py" lint-python-format: - # Separated black/isort from other Python linters because we want this job to - # fail and not affect other linters + # Required workflow name: Python format runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 with: + # Version range or exact version of Python to use, using SemVer's version range syntax. Reads from .python-version if unset. python-version: "3.10" - - uses: psf/black@stable + - name: Install dependencies + run: | + python -m pip install -r requirements-dev.txt + lintrunner init + - name: Run lintrunner on all files + run: | + set +e + if ! lintrunner --force-color --all-files --tee-json=lint.json; then + echo "" + echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner\`.\e[0m" + exit 1 + fi + - name: Produce SARIF + if: always() + run: | + python -m lintrunner_adapters to-sarif lint.json lintrunner.sarif + - name: Upload SARIF file + if: always() + continue-on-error: true + uses: github/codeql-action/upload-sarif@v2 with: - options: "--check --diff --color" - - uses: isort/isort-action@master + # Path to SARIF file relative to the root of the repository + sarif_file: lintrunner.sarif + category: lintrunner + checkout_path: ${{ github.workspace }} lint-cpp: name: Lint C++ @@ -111,7 +117,7 @@ jobs: name: Lint JavaScript runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: reviewdog/action-eslint@v1 with: reporter: github-pr-check diff --git a/.lintrunner.toml b/.lintrunner.toml new file mode 100644 index 0000000000000..5ade706ef732c --- /dev/null +++ b/.lintrunner.toml @@ -0,0 +1,118 @@ +# Configuration for lintrunner https://github.com/suo/lintrunner +# You can install the dependencies and initialize with +# +# ```sh +# pip install lintrunner lintrunner-adapters +# lintrunner init +# ``` +# +# This will install lintrunner on your system and download all the necessary +# dependencies to run linters locally. +# If you want to see what lintrunner init will install, run +# `lintrunner init --dry-run`. +# +# To lint local changes: +# +# ```bash +# lintrunner +# ``` +# +# To lint all files: +# +# ```bash +# lintrunner --all-files +# ``` +# +# To format files: +# +# ```bash +# lintrunner -a --all-files +# ``` +# +# To read more about lintrunner, see [wiki](https://github.com/pytorch/pytorch/wiki/lintrunner). +# To update an existing linting rule or create a new one, modify this file or create a +# new adapter following examples in https://github.com/justinchuby/lintrunner-adapters. + +[[linter]] +code = 'FLAKE8' +include_patterns = [ + '**/*.py', +] +exclude_patterns = [ +] +command = [ + 'python3', + '-m', + 'lintrunner_adapters', + 'run', + 'flake8_linter', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + '-m', + 'lintrunner_adapters', + 'run', + 'pip_init', + '--dry-run={{DRYRUN}}', + 'flake8==5.0.4', + 'flake8-bugbear==22.10.27', + 'flake8-pyi==22.10.0', + 'dlint==0.13.0', +] + +[[linter]] +code = 'BLACK-ISORT' +include_patterns = [ + '**/*.py', +] +exclude_patterns = [ +] +command = [ + 'python3', + '-m', + 'lintrunner_adapters', + 'run', + 'black_isort_linter', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + '-m', + 'lintrunner_adapters', + 'run', + 'pip_init', + '--dry-run={{DRYRUN}}', + 'black==22.10.0', + 'isort==5.10.1', +] +is_formatter = true + +[[linter]] +code = 'PYLINT' +include_patterns = [ + # TODO: Opt in to pylint by adding paths here +] +exclude_patterns = [ +] +command = [ + 'python3', + '-m', + 'lintrunner_adapters', + 'run', + 'pylint_linter', + '--rcfile=pyproject.toml', + '--', + '@{{PATHSFILE}}' +] +init_command = [ + 'python3', + '-m', + 'lintrunner_adapters', + 'run', + 'pip_init', + '--dry-run={{DRYRUN}}', + 'pylint==2.15.5', +] diff --git a/docs/Coding_Conventions_and_Standards.md b/docs/Coding_Conventions_and_Standards.md index f8bc60ba152a2..1051e8a3e3ac6 100644 --- a/docs/Coding_Conventions_and_Standards.md +++ b/docs/Coding_Conventions_and_Standards.md @@ -112,15 +112,15 @@ void foo(gsl::span names) { * The following C++ warnings should never be disabled in onnxruntime VC++ projects(Required by [Binskim](https://github.com/microsoft/binskim/blob/d9afb65c89a621411efded74c27999281d87867e/src/BinSkim.Rules/PERules/BA2007.EnableCriticalCompilerWarnings.cs)). 1. [4018](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4018) 'token' : signed/unsigned mismatch 2. [4146](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4146?view=msvc-160) unary minus operator applied to unsigned type, result still unsigned - 3. [4244](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4244?view=msvc-160) 'argument' : conversion from 'type1' to 'type2', possible loss of data. For example, casting a int64_t to size_t. + 3. [4244](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4244?view=msvc-160) 'argument' : conversion from 'type1' to 'type2', possible loss of data. For example, casting a int64_t to size_t. 4. [4267](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4267?view=msvc-160) 'var' : conversion from 'size_t' to 'type', possible loss of data. 5. [4302](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4302?view=msvc-160) 'conversion' : truncation from 'type 1' to 'type 2' - 6. [4308](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4308?view=msvc-160) negative integral constant converted to unsigned type + 6. [4308](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4308?view=msvc-160) negative integral constant converted to unsigned type 7. [4532](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-1-c4532?view=msvc-160) 'continue' : jump out of \_\_finally/finally block has undefined behavior during termination handling 8. [4533](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-1-c4533?view=msvc-160) initialization of 'variable' is skipped by 'instruction' 9. [4700](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-1-and-level-4-c4700?view=msvc-160) uninitialized local variable 'name' used 10. [4789](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-1-c4789?view=msvc-160) buffer 'identifier' of size N bytes will be overrun; M bytes will be written starting at offset L - 11. [4995](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4995?view=msvc-160) 'function': name was marked as #pragma deprecated + 11. [4995](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4995?view=msvc-160) 'function': name was marked as #pragma deprecated 12. [4996](https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4996?view=msvc-160) Your code uses a function, class member, variable, or typedef that's marked deprecated #### Clang-format @@ -150,6 +150,42 @@ There is a configuration file in `onnxruntime/VSCodeCoverage.runsettings` that c Using `Show Code Coverage Coloring` will allow you to visually inspect which lines were hit by the tests. See . +## Linting + +This project uses [lintrunner](https://github.com/suo/lintrunner) for linting. It provides a consistent linting experience locally and in CI. You can install the dependencies and initialize with + +```sh +pip install lintrunner lintrunner-adapters +lintrunner init +``` + +This will install lintrunner on your system and download all the necessary +dependencies to run linters locally. +If you want to see what lintrunner init will install, run +`lintrunner init --dry-run`. + +To lint local changes: + +```bash +lintrunner -m main +``` + +To lint all files: + +```bash +lintrunner --all-files +``` + +To format files: + +```bash +lintrunner -a --all-files +``` + +To read more about lintrunner, see [wiki](https://github.com/pytorch/pytorch/wiki/lintrunner). +To update an existing linting rule or create a new one, modify `.lintrunner.toml` or create a +new adapter following examples in https://github.com/justinchuby/lintrunner-adapters. + ## Python Code Style Follow the [Black formatter](https://black.readthedocs.io)'s coding style when possible. A maximum line length of 120 characters is allowed for consistency with the C++ code. @@ -160,11 +196,10 @@ Code can be validated with [flake8](https://pypi.org/project/flake8/) using the Use `pyright`, which is provided as a component of the `pylance` extension in VS Code for static type checking. -Auto-formatting is done with `black` and `isort`. The tools are configured in `pyproject.toml`. From anywhere in the repository, you can run +Auto-formatting is done with `black` and `isort`. The tools are configured in `pyproject.toml`. From the root of the repository, you can run ```sh -black . -isort . +lintrunner f --all-files ``` to format Python files. diff --git a/requirements-dev.txt b/requirements-dev.txt index 4a4e718d9fa60..42f49c335e100 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,8 +1,6 @@ -black>=22.3 cerberus codecov flatbuffers -isort jinja2 numpy onnx From a01d696f3bafe3d25b1697c93a618b616a13581d Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:09:56 +0000 Subject: [PATCH 02/33] Remove obsolete python checks --- .../python-checks-ci-pipeline.yml | 19 ------------------- tools/ci_build/github/python_checks/readme.md | 18 ------------------ .../github/python_checks/requirements.txt | 1 - 3 files changed, 38 deletions(-) delete mode 100644 tools/ci_build/github/azure-pipelines/python-checks-ci-pipeline.yml delete mode 100644 tools/ci_build/github/python_checks/readme.md delete mode 100644 tools/ci_build/github/python_checks/requirements.txt diff --git a/tools/ci_build/github/azure-pipelines/python-checks-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/python-checks-ci-pipeline.yml deleted file mode 100644 index 17c2b8766d891..0000000000000 --- a/tools/ci_build/github/azure-pipelines/python-checks-ci-pipeline.yml +++ /dev/null @@ -1,19 +0,0 @@ -jobs: -- job: 'PythonCodeChecks' - pool: - vmImage: 'ubuntu-20.04' - - timeoutInMinutes: 10 - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.8' - addToPath: true - architecture: 'x64' - - - script: python -m pip install -r tools/ci_build/github/python_checks/requirements.txt - displayName: "Install requirements" - - - script: python -m flake8 --config .flake8 - displayName: "Run Flake8" diff --git a/tools/ci_build/github/python_checks/readme.md b/tools/ci_build/github/python_checks/readme.md deleted file mode 100644 index b31300d6cf07b..0000000000000 --- a/tools/ci_build/github/python_checks/readme.md +++ /dev/null @@ -1,18 +0,0 @@ -# Python Code Checks - -Python code checks are run by this [CI build](../azure-pipelines/python-checks-ci-pipeline.yml). -Here are instructions on how to run them manually. - -## Prerequisites - -Install requirements. - -From the repo root, run: - -`$ python -m pip install -r tools/ci_build/github/python_checks/requirements.txt` - -## Flake8 - -From the repo root, run: - -`$ python -m flake8 --config .flake8` diff --git a/tools/ci_build/github/python_checks/requirements.txt b/tools/ci_build/github/python_checks/requirements.txt deleted file mode 100644 index b5446261e8e51..0000000000000 --- a/tools/ci_build/github/python_checks/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -flake8==3.9 From 621e9865e2dbb5c1b1c9181ff4ac6e37fb59ce8b Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:10:41 +0000 Subject: [PATCH 03/33] requirements --- requirements-dev.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index 42f49c335e100..1e4917c1c031c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -17,3 +17,5 @@ scipy sympy wheel setuptools>=41.4.0 +lintrunner +lintrunner-adapters From c228bb8aea6e30e9d114e6464451eb2cffc4484e Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:18:37 +0000 Subject: [PATCH 04/33] Exclude --- .flake8 | 22 +--------------------- .lintrunner.toml | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/.flake8 b/.flake8 index 4e8996fbf01e1..b40efab4e07bf 100644 --- a/.flake8 +++ b/.flake8 @@ -2,25 +2,5 @@ max-line-length = 120 per-file-ignores = __init__.py:F401 -exclude = - # ignore the .git directory - ./.git, - # ignore default build directory - ./build, - # ignore external dependency files - ./cmake/external, - # TODO enable - ./docs/python, - # ignore generated flatbuffers code - ./onnxruntime/core/flatbuffers/ort_flatbuffers_py, - # TODO enable - ./onnxruntime/python/tools, - # ignore test code for now - ./onnxruntime/test, - # TODO enable - ./orttraining, - # ignore server code for now - ./server, - # ignore issues from different git branches - ./.git, +# NOTE: Edit exclude list in .lintrunner.toml ignore = W503, E203 diff --git a/.lintrunner.toml b/.lintrunner.toml index 5ade706ef732c..df8bd0ebba152 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -39,6 +39,20 @@ include_patterns = [ '**/*.py', ] exclude_patterns = [ + 'build/**', + 'cmake/external/**', + # TODO enable + 'docs/python/**', + # ignore generated flatbuffers code + 'onnxruntime/core/flatbuffers/ort_flatbuffers_py/**', + # TODO enable + './onnxruntime/python/tools/**', + # FIXME(#7032): ignore test code for now + 'onnxruntime/test/**', + # TODO enable + 'orttraining/**', + # FIXME(#7032): ignore server code for now + 'server/**', ] command = [ 'python3', From 343875a5e00da1ac712fffee32b0f7a2cd1e937e Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:32:12 +0000 Subject: [PATCH 05/33] exclude --- .lintrunner.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.lintrunner.toml b/.lintrunner.toml index df8bd0ebba152..ad3a43230291f 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -82,6 +82,9 @@ include_patterns = [ '**/*.py', ] exclude_patterns = [ + 'cmake/*', + 'orttraining/*', + 'onnxruntime/core/flatbuffers/*', ] command = [ 'python3', From e25fbb47ad85748127af2072918c533bc33bea1e Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:34:18 +0000 Subject: [PATCH 06/33] Ruff autofix --- onnxruntime/python/backend/backend.py | 4 ++-- onnxruntime/python/backend/backend_rep.py | 1 - .../python/tools/microbench/benchmark.py | 2 +- .../tools/quantization/onnx_quantizer.py | 2 +- .../quantization/operators/activation.py | 1 - .../tools/quantization/operators/attention.py | 1 - .../tools/quantization/operators/binary_op.py | 1 - .../tools/quantization/operators/concat.py | 3 +-- .../tools/quantization/operators/conv.py | 1 - .../quantization/operators/embed_layernorm.py | 1 - .../tools/quantization/operators/gemm.py | 4 ---- .../tools/quantization/operators/lstm.py | 2 +- .../tools/quantization/operators/pad.py | 1 - .../operators/qdq_base_operator.py | 2 -- .../tools/quantization/shape_inference.py | 2 +- .../python/tools/symbolic_shape_infer.py | 19 +++++++++---------- .../tools/tensorrt/perf/benchmark_wrapper.py | 5 ----- .../comparison_scripts/compare_latency.py | 2 +- .../python/tools/tensorrt/perf/perf_utils.py | 10 ++++------ .../python/tools/transformers/benchmark.py | 7 ++----- .../tools/transformers/bert_test_data.py | 2 +- .../tools/transformers/convert_generation.py | 2 +- .../convert_tf_models_to_pytorch.py | 4 ++-- .../python/tools/transformers/float16.py | 2 +- .../tools/transformers/fusion_attention.py | 4 ---- .../transformers/fusion_gelu_approximation.py | 1 - .../transformers/fusion_gpt_attention.py | 4 ++-- .../fusion_gpt_attention_megatron.py | 4 +--- .../fusion_gpt_attention_no_past.py | 8 +++----- .../tools/transformers/fusion_layernorm.py | 2 +- .../transformers/fusion_qordered_gelu.py | 2 +- .../transformers/fusion_qordered_layernorm.py | 2 +- .../transformers/fusion_qordered_matmul.py | 2 +- .../transformers/fusion_skiplayernorm.py | 8 ++++---- .../python/tools/transformers/fusion_utils.py | 2 +- .../tools/transformers/io_binding_helper.py | 2 +- .../python/tools/transformers/machine_info.py | 3 +-- .../tools/transformers/models/bart/export.py | 8 ++++---- .../utils/chain_enc_dec_with_beamsearch.py | 2 +- .../models/gpt2/benchmark_gpt2.py | 2 +- .../transformers/models/gpt2/gpt2_helper.py | 4 ++-- .../transformers/models/gpt2/gpt2_parity.py | 3 ++- .../models/gpt2/parity_check_helper.py | 2 +- .../models/longformer/convert_to_onnx.py | 4 ++-- .../models/longformer/generate_test_data.py | 2 +- .../tools/transformers/onnx_exporter.py | 10 +++++----- .../python/tools/transformers/onnx_model.py | 6 +++--- .../tools/transformers/onnx_model_bert.py | 4 ++-- .../transformers/onnx_model_bert_keras.py | 14 ++++---------- .../tools/transformers/onnx_model_bert_tf.py | 5 +---- .../tools/transformers/onnx_model_gpt2.py | 3 +-- .../python/tools/transformers/optimizer.py | 6 +++--- .../python/tools/transformers/profiler.py | 2 +- .../tools/transformers/quantize_helper.py | 1 - .../tools/transformers/shape_optimizer.py | 5 +---- onnxruntime/test/onnx/gen_test_models.py | 2 +- .../reduction_test_cases_generator.py | 1 - onnxruntime/test/providers/cpu/rnn/GRU.py | 6 ------ onnxruntime/test/providers/cpu/rnn/LSTM.py | 7 ++----- .../python/onnxruntime_test_ort_trainer.py | 13 +------------ .../test/python/onnxruntime_test_python.py | 13 ++++++------- .../onnxruntime_test_python_cudagraph.py | 6 ------ .../onnxruntime_test_python_iobinding.py | 2 +- .../python/onnxruntime_test_python_mlops.py | 1 - ...untime_test_python_symbolic_shape_infer.py | 2 +- .../python/quantization/test_conv_dynamic.py | 2 -- .../python/quantization/test_onnx_model.py | 3 +-- .../python/quantization/test_op_gavgpool.py | 2 +- .../python/quantization/test_op_pooling.py | 1 - .../quantization/test_symmetric_flag.py | 2 +- .../transformers/gpt2_model_generator.py | 4 ---- .../test/python/transformers/model_loader.py | 1 - .../python/transformers/parity_utilities.py | 3 +-- .../generate_tiny_keras2onnx_bert_models.py | 5 ++--- .../generate_tiny_gpt2_model.py | 3 --- .../python/transformers/test_optimizer.py | 2 +- .../test_parity_decoder_attention.py | 4 +--- .../test_parity_huggingface_gpt_attention.py | 4 ++-- .../testdata/dynamic_quantize_matmul_test.py | 1 - .../test/testdata/ep_partitioning_tests.py | 1 - .../test/testdata/matmul_integer_to_float.py | 1 - .../testdata/sparse_initializer_as_output.py | 8 -------- .../test/testdata/sparse_to_dense_matmul.py | 9 --------- .../lr_scheduler_test_data_generator.py | 1 - .../sgd_test/sgd_test_data_generator.py | 4 ++-- .../testdata/transform/cast_elimination.py | 3 +-- .../transform/computation_reduction.py | 2 +- .../gathernd/gathernd_add.py | 2 +- .../gathernd/gathernd_div.py | 2 +- .../gathernd/gathernd_gelu.py | 3 +-- .../gathernd/gathernd_layernormalization.py | 2 +- .../gathernd/gathernd_matmul.py | 2 +- .../transform/concat_slice_elimination.py | 3 +-- .../test/testdata/transform/cse/generate.py | 2 +- .../testdata/transform/expand_elimination.py | 2 +- .../transform/fusion/attention_gen.py | 1 - ...stant_folding_with_shape_to_initializer.py | 2 +- .../test/testdata/transform/fusion/div_mul.py | 1 - .../fusion/dynamic_quantize_matmul.py | 1 - .../transform/fusion/embed_layer_norm_gen.py | 1 - .../testdata/transform/fusion/fast_gelu.py | 2 +- .../testdata/transform/fusion/fast_gelu2.py | 2 +- .../transform/fusion/fast_gelu3_with_casts.py | 2 +- .../testdata/transform/fusion/gelu_gen.py | 2 +- .../transform/fusion/isinf_reducesum.py | 1 - .../transform/fusion/layer_norm_t5_gen.py | 1 - .../fusion/layer_norm_with_cast_2.py | 2 -- .../fusion/matmul_integer_to_float.py | 1 - .../testdata/transform/fusion/not_where.py | 1 - .../test/testdata/transform/id-elim.py | 3 +-- .../test/testdata/transform/id-scan9_sum.py | 3 +-- .../bart_mlp_megatron_basic_test.py | 2 +- ...bart_self_attention_megatron_basic_test.py | 3 +-- .../model_parallel/mlp_megatron_basic_test.py | 2 +- .../self_attention_megatron_basic_test.py | 2 +- 115 files changed, 125 insertions(+), 255 deletions(-) diff --git a/onnxruntime/python/backend/backend.py b/onnxruntime/python/backend/backend.py index 99592e3d5cf3e..b2dcd00a5a5fa 100644 --- a/onnxruntime/python/backend/backend.py +++ b/onnxruntime/python/backend/backend.py @@ -9,7 +9,7 @@ import unittest import packaging.version -from onnx import ModelProto, helper, version +from onnx import helper, version from onnx.backend.base import Backend from onnx.checker import check_model @@ -59,7 +59,7 @@ def is_opset_supported(cls, model): domain = opset.domain if opset.domain else "ai.onnx" try: key = (domain, opset.version) - if not (key in helper.OP_SET_ID_VERSION_MAP): + if key not in helper.OP_SET_ID_VERSION_MAP: error_message = ( "Skipping this test as only released onnx opsets are supported." "To run this test set env variable ALLOW_RELEASED_ONNX_OPSET_ONLY to 0." diff --git a/onnxruntime/python/backend/backend_rep.py b/onnxruntime/python/backend/backend_rep.py index 6dced3aba7f80..d7b18373f0e78 100644 --- a/onnxruntime/python/backend/backend_rep.py +++ b/onnxruntime/python/backend/backend_rep.py @@ -5,7 +5,6 @@ """ Implements ONNX's backend API. """ -from typing import Any, Tuple from onnx.backend.base import BackendRep diff --git a/onnxruntime/python/tools/microbench/benchmark.py b/onnxruntime/python/tools/microbench/benchmark.py index fcf8c6f23f362..da6849059536e 100644 --- a/onnxruntime/python/tools/microbench/benchmark.py +++ b/onnxruntime/python/tools/microbench/benchmark.py @@ -76,7 +76,7 @@ def get_default_provider(): class Benchmark: def __init__(self, model, inputs, outputs, args): - self.provider = get_default_provider() if args.provider == None else provider_name(args.provider) + self.provider = get_default_provider() if args.provider is None else provider_name(args.provider) logger.info(f"Execution provider: {self.provider}") self.profiling = args.profiling self.model = model diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py index 3c54748ea9df0..30c5cf8cd7287 100644 --- a/onnxruntime/python/tools/quantization/onnx_quantizer.py +++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py @@ -114,7 +114,7 @@ def __init__( self.opset_version = self.check_opset_version() - if not self.mode in QuantizationMode: + if self.mode not in QuantizationMode: raise ValueError("unsupported quantization mode {}".format(self.mode)) self.quantization_params = self.calculate_quantization_params() diff --git a/onnxruntime/python/tools/quantization/operators/activation.py b/onnxruntime/python/tools/quantization/operators/activation.py index 1029e7b679b60..1655ac416606c 100644 --- a/onnxruntime/python/tools/quantization/operators/activation.py +++ b/onnxruntime/python/tools/quantization/operators/activation.py @@ -1,5 +1,4 @@ import onnx -from onnx import onnx_pb as onnx_proto from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain from .base_operator import QuantOperatorBase diff --git a/onnxruntime/python/tools/quantization/operators/attention.py b/onnxruntime/python/tools/quantization/operators/attention.py index 36428b860d060..495cd97d81ee6 100644 --- a/onnxruntime/python/tools/quantization/operators/attention.py +++ b/onnxruntime/python/tools/quantization/operators/attention.py @@ -1,5 +1,4 @@ import onnx -from onnx import onnx_pb as onnx_proto from ..quant_utils import attribute_to_kwarg, ms_domain from .base_operator import QuantOperatorBase diff --git a/onnxruntime/python/tools/quantization/operators/binary_op.py b/onnxruntime/python/tools/quantization/operators/binary_op.py index 3beb96aabe575..c7d529c83553b 100644 --- a/onnxruntime/python/tools/quantization/operators/binary_op.py +++ b/onnxruntime/python/tools/quantization/operators/binary_op.py @@ -1,5 +1,4 @@ import onnx -from onnx import onnx_pb as onnx_proto from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain from .base_operator import QuantOperatorBase diff --git a/onnxruntime/python/tools/quantization/operators/concat.py b/onnxruntime/python/tools/quantization/operators/concat.py index 998ca5c558743..47329c178620a 100644 --- a/onnxruntime/python/tools/quantization/operators/concat.py +++ b/onnxruntime/python/tools/quantization/operators/concat.py @@ -1,8 +1,7 @@ import onnx -from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain +from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, attribute_to_kwarg, ms_domain from .base_operator import QuantOperatorBase -from .qdq_base_operator import QDQOperatorBase class QLinearConcat(QuantOperatorBase): diff --git a/onnxruntime/python/tools/quantization/operators/conv.py b/onnxruntime/python/tools/quantization/operators/conv.py index 0d137ab2eff14..62b7ce2afdf9d 100644 --- a/onnxruntime/python/tools/quantization/operators/conv.py +++ b/onnxruntime/python/tools/quantization/operators/conv.py @@ -4,7 +4,6 @@ from ..quant_utils import ( TENSOR_NAME_QUANT_SUFFIX, - BiasToQuantize, QuantizedValue, QuantizedValueType, attribute_to_kwarg, diff --git a/onnxruntime/python/tools/quantization/operators/embed_layernorm.py b/onnxruntime/python/tools/quantization/operators/embed_layernorm.py index 01b5fad1c3c75..2aacd306ecaea 100644 --- a/onnxruntime/python/tools/quantization/operators/embed_layernorm.py +++ b/onnxruntime/python/tools/quantization/operators/embed_layernorm.py @@ -1,7 +1,6 @@ import logging import onnx -from onnx import onnx_pb as onnx_proto from ..quant_utils import attribute_to_kwarg, ms_domain from .base_operator import QuantOperatorBase diff --git a/onnxruntime/python/tools/quantization/operators/gemm.py b/onnxruntime/python/tools/quantization/operators/gemm.py index 07e7678a34957..969e7074f4025 100644 --- a/onnxruntime/python/tools/quantization/operators/gemm.py +++ b/onnxruntime/python/tools/quantization/operators/gemm.py @@ -1,6 +1,5 @@ import logging -import numpy as np import onnx from onnx import onnx_pb as onnx_proto @@ -9,11 +8,8 @@ QuantizedValue, QuantizedValueType, attribute_to_kwarg, - find_by_name, - get_mul_node, ms_domain, ) -from .base_operator import QuantOperatorBase from .matmul import QOpMatMul from .qdq_base_operator import QDQOperatorBase diff --git a/onnxruntime/python/tools/quantization/operators/lstm.py b/onnxruntime/python/tools/quantization/operators/lstm.py index 87552a18a037e..263b69a5fa371 100644 --- a/onnxruntime/python/tools/quantization/operators/lstm.py +++ b/onnxruntime/python/tools/quantization/operators/lstm.py @@ -2,7 +2,7 @@ import onnx from onnx import onnx_pb as onnx_proto -from ..quant_utils import QuantType, attribute_to_kwarg, ms_domain +from ..quant_utils import attribute_to_kwarg, ms_domain from .base_operator import QuantOperatorBase """ diff --git a/onnxruntime/python/tools/quantization/operators/pad.py b/onnxruntime/python/tools/quantization/operators/pad.py index e7eeac2cec3ef..2d1690e545263 100644 --- a/onnxruntime/python/tools/quantization/operators/pad.py +++ b/onnxruntime/python/tools/quantization/operators/pad.py @@ -1,4 +1,3 @@ -import numpy as np import onnx from ..quant_utils import ( diff --git a/onnxruntime/python/tools/quantization/operators/qdq_base_operator.py b/onnxruntime/python/tools/quantization/operators/qdq_base_operator.py index 0fe05df5191fa..a84f490e8ac6e 100644 --- a/onnxruntime/python/tools/quantization/operators/qdq_base_operator.py +++ b/onnxruntime/python/tools/quantization/operators/qdq_base_operator.py @@ -1,7 +1,5 @@ import itertools -from ..quant_utils import QuantizedValue, QuantizedValueType, attribute_to_kwarg, quantize_nparray -from .base_operator import QuantOperatorBase class QDQOperatorBase: diff --git a/onnxruntime/python/tools/quantization/shape_inference.py b/onnxruntime/python/tools/quantization/shape_inference.py index 7df2dec59bf42..9aaac95a8dc5b 100644 --- a/onnxruntime/python/tools/quantization/shape_inference.py +++ b/onnxruntime/python/tools/quantization/shape_inference.py @@ -89,7 +89,7 @@ def quant_pre_process( sess_option.optimized_model_filepath = opt_model_path sess_option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_BASIC _ = onnxruntime.InferenceSession(input_model_path, sess_option, providers=["CPUExecutionProvider"]) - except Exception as e: + except Exception: logger.error( "ONNX Runtime Model Optimization Failed! Consider rerun with option `--skip_optimization'." ) diff --git a/onnxruntime/python/tools/symbolic_shape_infer.py b/onnxruntime/python/tools/symbolic_shape_infer.py index 53937bc7f6a9d..3b4ce53fcc752 100755 --- a/onnxruntime/python/tools/symbolic_shape_infer.py +++ b/onnxruntime/python/tools/symbolic_shape_infer.py @@ -385,7 +385,7 @@ def _get_sympy_shape(self, node, idx): else sympy.Symbol(d, integer=True, nonnegative=True) ) else: - assert None != d + assert None is not d sympy_shape.append(d) return sympy_shape @@ -412,7 +412,7 @@ def _update_computed_dims(self, new_sympy_shape): new_sympy_shape[i] = self.symbolic_dims_[self.suggested_merge_[str_dim]] else: # add new_dim if it's a computational expression - if not str(new_dim) in self.symbolic_dims_: + if str(new_dim) not in self.symbolic_dims_: self.symbolic_dims_[str(new_dim)] = new_dim def _onnx_infer_single_node(self, node): @@ -481,7 +481,7 @@ def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True, inc_subgraph # for example, with Scan/Loop, subgraph input shape would be trimmed from node input shape # besides, inputs in subgraph could shadow implicit inputs subgraph_inputs = set([i.name for i in list(subgraph.initializer) + list(subgraph.input)]) - subgraph_implicit_input = set([name for name in self.known_vi_.keys() if not name in subgraph_inputs]) + subgraph_implicit_input = set([name for name in self.known_vi_.keys() if name not in subgraph_inputs]) tmp_graph = helper.make_graph( list(subgraph.node), "tmp", @@ -502,11 +502,10 @@ def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True, inc_subgraph if inc_subgraph_id: self.subgraph_id_ += 1 - all_shapes_inferred = False symbolic_shape_inference._preprocess(self.tmp_mp_) symbolic_shape_inference.suggested_merge_ = self.suggested_merge_.copy() while symbolic_shape_inference.run_: - all_shapes_inferred = symbolic_shape_inference._infer_impl(self.sympy_data_.copy()) + symbolic_shape_inference._infer_impl(self.sympy_data_.copy()) symbolic_shape_inference._update_output_from_vi() if use_node_input: # if subgraph uses node input, it needs to update to merged dims @@ -521,7 +520,7 @@ def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True, inc_subgraph # for new symbolic dims from subgraph output, add to main graph symbolic dims subgraph_shapes = [get_shape_from_value_info(o) for o in symbolic_shape_inference.out_mp_.graph.output] subgraph_new_symbolic_dims = set( - [d for s in subgraph_shapes if s for d in s if type(d) == str and not d in self.symbolic_dims_] + [d for s in subgraph_shapes if s for d in s if type(d) == str and d not in self.symbolic_dims_] ) new_dims = {} for d in subgraph_new_symbolic_dims: @@ -793,7 +792,7 @@ def _infer_Compress(self, node): # create a new symbolic dimension for Compress output compress_len = str(self._new_symbolic_dim_from_output(node)) axis = get_attribute(node, "axis") - if axis == None: + if axis is None: # when axis is not specified, input is flattened before compress so output is 1D output_shape = [compress_len] else: @@ -1028,7 +1027,7 @@ def _infer_GatherND(self, node): data_shape = self._get_shape(node, 0) data_rank = len(data_shape) indices_shape = self._get_shape(node, 1) - indices_rank = len(indices_shape) + len(indices_shape) last_index_dimension = indices_shape[-1] assert is_literal(last_index_dimension) and last_index_dimension <= data_rank new_shape = indices_shape[:-1] + data_shape[last_index_dimension:] @@ -1881,7 +1880,7 @@ def _infer_TopK(self, node): else: k = self._get_int_values(node)[1] - if k == None: + if k is None: k = self._new_symbolic_dim_from_output(node) else: k = as_scalar(k) @@ -2350,7 +2349,7 @@ def get_prereq(node): self.run_ = False # create new dynamic dims for ops not handled by symbolic shape inference - if self.run_ == False and not node.op_type in self.dispatcher_ and not known_aten_op: + if self.run_ is False and node.op_type not in self.dispatcher_ and not known_aten_op: is_unknown_op = out_type_undefined and (out_shape is None or len(out_shape) == 0) if is_unknown_op: # unknown op to ONNX, maybe from higher opset or other domain diff --git a/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py b/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py index 918add64ce5f3..031cb293d0d78 100644 --- a/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py +++ b/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py @@ -1,13 +1,8 @@ -import argparse -import copy -import csv import json -import logging import os import pprint import re -import coloredlogs from benchmark import * from perf_utils import * diff --git a/onnxruntime/python/tools/tensorrt/perf/comparison_scripts/compare_latency.py b/onnxruntime/python/tools/tensorrt/perf/comparison_scripts/compare_latency.py index 93df53c9825db..b44a672e7723b 100644 --- a/onnxruntime/python/tools/tensorrt/perf/comparison_scripts/compare_latency.py +++ b/onnxruntime/python/tools/tensorrt/perf/comparison_scripts/compare_latency.py @@ -48,7 +48,7 @@ def main(): condition_fp16 = get_table_condition(common, "fp16", args.ep, args.tolerance) common["greater"] = np.where((condition_fp32 | condition_fp16), True, False) - greater = common[common["greater"] == True].drop(["greater"], axis=1) + greater = common[common["greater"] is True].drop(["greater"], axis=1) # arrange columns keys = list(greater.keys().sort_values()) diff --git a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py index 61cac72b271c1..65d5d5517a982 100644 --- a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py +++ b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py @@ -1,11 +1,9 @@ import json -import logging import pprint import re import subprocess import sys -import coloredlogs debug = False debug_verbose = False @@ -122,7 +120,7 @@ def parse_single_file(f): try: data = json.load(f) - except Exception as e: + except Exception: return None model_run_flag = False @@ -131,7 +129,7 @@ def parse_single_file(f): provider_op_map_first_run = {} # ep -> map of operator to duration for row in data: - if not "cat" in row: + if "cat" not in row: continue if row["cat"] == "Session": @@ -146,7 +144,7 @@ def parse_single_file(f): if "name" in row and "args" in row and re.search(".*kernel_time", row["name"]): args = row["args"] - if not "op_name" in args or not "provider" in args: + if "op_name" not in args or "provider" not in args: continue provider = args["provider"] @@ -172,7 +170,7 @@ def parse_single_file(f): op_map = provider_op_map[provider] # avoid duplicated metrics - if not row["name"] in op_map: + if row["name"] not in op_map: op_map[row["name"]] = row["dur"] provider_op_map[provider] = op_map diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index e3062d6ff7e26..dec7bc7beadfc 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -45,16 +45,13 @@ import os import timeit from datetime import datetime -from enum import Enum import numpy -import onnx import psutil from benchmark_helper import ( ConfigModifier, OptimizerInfo, Precision, - allocateOutputBuffers, create_onnxruntime_session, get_latency_result, inference_ort, @@ -84,7 +81,7 @@ os.environ["OMP_NUM_THREADS"] = str(cpu_count) import torch -from transformers import AutoConfig, AutoModel, AutoTokenizer, GPT2Model, LxmertConfig +from transformers import AutoConfig, AutoTokenizer, LxmertConfig def run_onnxruntime( @@ -864,7 +861,7 @@ def main(): args, ) except: - logger.error(f"Exception", exc_info=True) + logger.error("Exception", exc_info=True) time_stamp = datetime.now().strftime("%Y%m%d-%H%M%S") if model_fusion_statistics: diff --git a/onnxruntime/python/tools/transformers/bert_test_data.py b/onnxruntime/python/tools/transformers/bert_test_data.py index 12c2145fe3eb0..38c14cdb0dfda 100644 --- a/onnxruntime/python/tools/transformers/bert_test_data.py +++ b/onnxruntime/python/tools/transformers/bert_test_data.py @@ -317,7 +317,7 @@ def find_bert_inputs( if "mask" in input_name_lower: input_mask = input if input_mask is None: - raise ValueError(f"Failed to find attention mask input") + raise ValueError("Failed to find attention mask input") return input_ids, segment_ids, input_mask diff --git a/onnxruntime/python/tools/transformers/convert_generation.py b/onnxruntime/python/tools/transformers/convert_generation.py index ed9f84cf0be40..316cca24c7da8 100644 --- a/onnxruntime/python/tools/transformers/convert_generation.py +++ b/onnxruntime/python/tools/transformers/convert_generation.py @@ -1453,7 +1453,7 @@ def convert_generation_model(args: argparse.Namespace, generation_type: Generati args.decoder_onnx, args.use_external_data_format ): # Can't proceed further - better to raise an exception - raise ValueError(f"Could not update the input shapes for the non-initial decoder subgraph.") + raise ValueError("Could not update the input shapes for the non-initial decoder subgraph.") # If the user explicitly requests running shape inference or if we padded/mutated # weight(s)/input shape(s) in the decoder, we want to run shape inference to capture the new diff --git a/onnxruntime/python/tools/transformers/convert_tf_models_to_pytorch.py b/onnxruntime/python/tools/transformers/convert_tf_models_to_pytorch.py index a035790b50954..fb2acad9e5096 100644 --- a/onnxruntime/python/tools/transformers/convert_tf_models_to_pytorch.py +++ b/onnxruntime/python/tools/transformers/convert_tf_models_to_pytorch.py @@ -90,7 +90,7 @@ def download_tf_checkpoint(model_name, tf_models_dir="tf_models"): import re - if re.search(".zip$", tf_ckpt_url) != None: + if re.search(".zip$", tf_ckpt_url) is not None: zip_dir = download_compressed_file(tf_ckpt_url, ckpt_dir) # unzip file @@ -102,7 +102,7 @@ def download_tf_checkpoint(model_name, tf_models_dir="tf_models"): return get_ckpt_prefix_path(ckpt_dir) - elif re.search(".tar.gz$", tf_ckpt_url) != None: + elif re.search(".tar.gz$", tf_ckpt_url) is not None: tar_dir = download_compressed_file(tf_ckpt_url, ckpt_dir) # untar file diff --git a/onnxruntime/python/tools/transformers/float16.py b/onnxruntime/python/tools/transformers/float16.py index 437e72fce0a31..2768e340002ac 100644 --- a/onnxruntime/python/tools/transformers/float16.py +++ b/onnxruntime/python/tools/transformers/float16.py @@ -8,7 +8,7 @@ import itertools import logging -from typing import Dict, List +from typing import Dict import numpy as np import onnx diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index 8a403e3318f04..46e9886138e68 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -2,10 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -from enum import Enum from logging import getLogger -from os import name -from sys import path from typing import Tuple, Union import numpy as np @@ -14,7 +11,6 @@ from fusion_utils import FusionUtils, NumpyHelper from onnx import NodeProto, TensorProto, helper, numpy_helper from onnx_model import OnnxModel -from shape_infer_helper import SymbolicShapeInferenceHelper, get_shape_from_type_proto logger = getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/fusion_gelu_approximation.py b/onnxruntime/python/tools/transformers/fusion_gelu_approximation.py index ba231e9e05ea4..fc9013fff0a80 100644 --- a/onnxruntime/python/tools/transformers/fusion_gelu_approximation.py +++ b/onnxruntime/python/tools/transformers/fusion_gelu_approximation.py @@ -3,7 +3,6 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from logging import getLogger from fusion_base import Fusion from onnx import helper diff --git a/onnxruntime/python/tools/transformers/fusion_gpt_attention.py b/onnxruntime/python/tools/transformers/fusion_gpt_attention.py index 6c1d665d90e37..15f6c7f38123f 100644 --- a/onnxruntime/python/tools/transformers/fusion_gpt_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_gpt_attention.py @@ -7,7 +7,7 @@ import numpy as np from fusion_base import Fusion from fusion_utils import FusionUtils -from onnx import TensorProto, helper, numpy_helper +from onnx import helper, numpy_helper from onnx_model import OnnxModel logger = getLogger(__name__) @@ -331,7 +331,7 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): # (2) SkipLayerNorm fusion was turned ON but upstream layer's LayerNorm + Add was not # fused into a SkipLayerNorm. This can happen if the shapes to the Add node are different. # So, keep the following check if SkipLayerNorm fusion is turned ON or OFF. - if another_input is not None and not another_input in layernorm_before_attention.input: + if another_input is not None and another_input not in layernorm_before_attention.input: logger.debug("Upstream Add and (Skip)LayerNormalization shall have one same input") return diff --git a/onnxruntime/python/tools/transformers/fusion_gpt_attention_megatron.py b/onnxruntime/python/tools/transformers/fusion_gpt_attention_megatron.py index a8939a5789d5f..1268c35cc75e1 100644 --- a/onnxruntime/python/tools/transformers/fusion_gpt_attention_megatron.py +++ b/onnxruntime/python/tools/transformers/fusion_gpt_attention_megatron.py @@ -5,10 +5,8 @@ from logging import getLogger import numpy as np -from fusion_base import Fusion from fusion_gpt_attention import FusionGptAttentionPastBase -from fusion_utils import FusionUtils -from onnx import TensorProto, helper, numpy_helper +from onnx import helper from onnx_model import OnnxModel logger = getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/fusion_gpt_attention_no_past.py b/onnxruntime/python/tools/transformers/fusion_gpt_attention_no_past.py index 183c44c07aa98..8379ed7d02e81 100644 --- a/onnxruntime/python/tools/transformers/fusion_gpt_attention_no_past.py +++ b/onnxruntime/python/tools/transformers/fusion_gpt_attention_no_past.py @@ -4,10 +4,8 @@ # -------------------------------------------------------------------------- from logging import getLogger -import numpy as np from fusion_base import Fusion -from fusion_utils import FusionUtils -from onnx import TensorProto, helper, numpy_helper +from onnx import helper from onnx_model import OnnxModel logger = getLogger(__name__) @@ -143,9 +141,9 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): # fused into a SkipLayerNorm. This can happen if the shapes to the Add node are different. # So, keep the following check if SkipLayerNorm fusion is turned ON or OFF. if another_input is not None: - if not another_input in layernorm_before_attention.input: + if another_input not in layernorm_before_attention.input: # match openai-gpt - if not another_input in layernorm_before_attention.output: + if another_input not in layernorm_before_attention.output: logger.debug("Add and (Skip)LayerNormalization shall have one same input") return diff --git a/onnxruntime/python/tools/transformers/fusion_layernorm.py b/onnxruntime/python/tools/transformers/fusion_layernorm.py index 893d3283691be..3a1629197f6de 100644 --- a/onnxruntime/python/tools/transformers/fusion_layernorm.py +++ b/onnxruntime/python/tools/transformers/fusion_layernorm.py @@ -106,7 +106,7 @@ def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): input_name_to_nodes, output_name_to_node, ): - logger.debug(f"It is not safe to fuse LayerNormalization node. Skip") + logger.debug("It is not safe to fuse LayerNormalization node. Skip") return weight_input = mul_node.input[1 - self.model.input_index(div_node.output[0], mul_node)] diff --git a/onnxruntime/python/tools/transformers/fusion_qordered_gelu.py b/onnxruntime/python/tools/transformers/fusion_qordered_gelu.py index a92c8f94d49af..6c44bb11e24dc 100644 --- a/onnxruntime/python/tools/transformers/fusion_qordered_gelu.py +++ b/onnxruntime/python/tools/transformers/fusion_qordered_gelu.py @@ -81,7 +81,7 @@ def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): input_name_to_nodes, output_name_to_node, ): - logger.debug(f"It is not safe to fuse QOrderedGelu node. Skip") + logger.debug("It is not safe to fuse QOrderedGelu node. Skip") return self.nodes_to_remove.extend(subgraph_nodes) diff --git a/onnxruntime/python/tools/transformers/fusion_qordered_layernorm.py b/onnxruntime/python/tools/transformers/fusion_qordered_layernorm.py index f8198bcaa1419..cf2b357721757 100644 --- a/onnxruntime/python/tools/transformers/fusion_qordered_layernorm.py +++ b/onnxruntime/python/tools/transformers/fusion_qordered_layernorm.py @@ -83,7 +83,7 @@ def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): input_name_to_nodes, output_name_to_node, ): - logger.debug(f"It is not safe to fuse QOrderedLayerNormalization node. Skip") + logger.debug("It is not safe to fuse QOrderedLayerNormalization node. Skip") return self.nodes_to_remove.extend(subgraph_nodes) diff --git a/onnxruntime/python/tools/transformers/fusion_qordered_matmul.py b/onnxruntime/python/tools/transformers/fusion_qordered_matmul.py index 2fbd3262684ce..681160479faef 100644 --- a/onnxruntime/python/tools/transformers/fusion_qordered_matmul.py +++ b/onnxruntime/python/tools/transformers/fusion_qordered_matmul.py @@ -170,7 +170,7 @@ def fuse(self, node, input_name_to_nodes: Dict, output_name_to_node: Dict): if not self.model.is_safe_to_fuse_nodes( subgraph_nodes, downstream_quantize_node.output, input_name_to_nodes, output_name_to_node ): - logger.debug(f"It is not safe to fuse QOrderedMatMul node. Skip") + logger.debug("It is not safe to fuse QOrderedMatMul node. Skip") return # Deal with the case where-in the Attention subgraph is not fused diff --git a/onnxruntime/python/tools/transformers/fusion_skiplayernorm.py b/onnxruntime/python/tools/transformers/fusion_skiplayernorm.py index 5a32415aba3e3..11bac52df125d 100644 --- a/onnxruntime/python/tools/transformers/fusion_skiplayernorm.py +++ b/onnxruntime/python/tools/transformers/fusion_skiplayernorm.py @@ -37,7 +37,7 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node): return for add_input in add.input: - if self.model.get_initializer(add_input) != None: + if self.model.get_initializer(add_input) is not None: return # The number of input node of add should be 2 @@ -146,15 +146,15 @@ def fuse(self, node, input_name_to_nodes, output_name_to_node): bias_weight = NumpyHelper.to_array(initializer) break if bias_weight is None: - logger.debug(f"Bias weight not found") + logger.debug("Bias weight not found") return if len(bias_weight.shape) != 1: - logger.debug(f"Bias weight is not 1D") + logger.debug("Bias weight is not 1D") return subgraph_nodes = [node, add] if not self.model.is_safe_to_fuse_nodes(subgraph_nodes, node.output, input_name_to_nodes, output_name_to_node): - logger.debug(f"Skip fusing SkipLayerNormalization with Bias since it is not safe") + logger.debug("Skip fusing SkipLayerNormalization with Bias since it is not safe") return self.nodes_to_remove.extend(subgraph_nodes) diff --git a/onnxruntime/python/tools/transformers/fusion_utils.py b/onnxruntime/python/tools/transformers/fusion_utils.py index 865c1542c1cc9..52bad9aac294c 100644 --- a/onnxruntime/python/tools/transformers/fusion_utils.py +++ b/onnxruntime/python/tools/transformers/fusion_utils.py @@ -121,7 +121,7 @@ def check_qdq_node_for_fusion(node: NodeProto, model: OnnxModel, allow_per_tenso Returns: bool: whether the check is passed or not """ - if not node.op_type in {"QuantizeLinear", "DequantizeLinear"}: + if node.op_type not in {"QuantizeLinear", "DequantizeLinear"}: logger.debug(f"Provided node is not a Q/DQ node. Op Type: {node.op_type}") scale = model.get_constant_value(node.input[1]) diff --git a/onnxruntime/python/tools/transformers/io_binding_helper.py b/onnxruntime/python/tools/transformers/io_binding_helper.py index 3182107cd8050..43515a6187aaf 100644 --- a/onnxruntime/python/tools/transformers/io_binding_helper.py +++ b/onnxruntime/python/tools/transformers/io_binding_helper.py @@ -1,5 +1,5 @@ import logging -from typing import Dict, List, Union +from typing import Dict, List import numpy import torch diff --git a/onnxruntime/python/tools/transformers/machine_info.py b/onnxruntime/python/tools/transformers/machine_info.py index e872e2a6c00c6..72f6a26153564 100644 --- a/onnxruntime/python/tools/transformers/machine_info.py +++ b/onnxruntime/python/tools/transformers/machine_info.py @@ -9,9 +9,8 @@ import json import logging import platform -import sys from os import environ -from typing import Dict, List, Tuple, Union +from typing import Dict, List import cpuinfo import psutil diff --git a/onnxruntime/python/tools/transformers/models/bart/export.py b/onnxruntime/python/tools/transformers/models/bart/export.py index c1e0f3224a445..8cddaea822139 100644 --- a/onnxruntime/python/tools/transformers/models/bart/export.py +++ b/onnxruntime/python/tools/transformers/models/bart/export.py @@ -87,14 +87,14 @@ def user_command(): ) if not args.no_encoder: - logger.info(f"========== EXPORTING ENCODER ==========") + logger.info("========== EXPORTING ENCODER ==========") export_summarization_edinit.export_encoder(args) if not args.no_decoder: - logger.info(f"========== EXPORTING DECODER ==========") + logger.info("========== EXPORTING DECODER ==========") export_summarization_enc_dec_past.export_decoder(args) if not args.no_chain: - logger.info(f"========== CONVERTING MODELS ==========") + logger.info("========== CONVERTING MODELS ==========") chain_enc_dec_with_beamsearch.convert_model(args) if not args.no_inference: - logger.info(f"========== INFERENCING WITH ONNX MODEL ==========") + logger.info("========== INFERENCING WITH ONNX MODEL ==========") onnx_inference.run_inference(args) diff --git a/onnxruntime/python/tools/transformers/models/bart/utils/chain_enc_dec_with_beamsearch.py b/onnxruntime/python/tools/transformers/models/bart/utils/chain_enc_dec_with_beamsearch.py index 4230684e5e7ee..e729b07013774 100644 --- a/onnxruntime/python/tools/transformers/models/bart/utils/chain_enc_dec_with_beamsearch.py +++ b/onnxruntime/python/tools/transformers/models/bart/utils/chain_enc_dec_with_beamsearch.py @@ -88,7 +88,7 @@ def convert_model(args): ] outputs = ["sequences"] - node = helper.make_node("BeamSearch", inputs=inputs, outputs=outputs, name=f"BeamSearch_zcode") + node = helper.make_node("BeamSearch", inputs=inputs, outputs=outputs, name="BeamSearch_zcode") node.domain = "com.microsoft" # NOTE: take value from args or config node.attribute.extend( diff --git a/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py b/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py index 01a5e5d8883d7..b928232ab62a4 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py @@ -405,7 +405,7 @@ def main(args): } csv_writer.writerow(row) except: - logger.error(f"Exception", exc_info=True) + logger.error("Exception", exc_info=True) return None logger.info(f"Results are saved to file {csv_filename}") diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py index f5de69e8f0524..570e0b89329a3 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py @@ -647,7 +647,7 @@ def pytorch_inference(model, inputs: Gpt2Inputs, total_runs: int = 0): @staticmethod def onnxruntime_inference(ort_session, inputs: Gpt2Inputs, total_runs: int = 0): """Run inference of ONNX model, and returns average latency in ms when total_runs > 0 besides outputs.""" - logger.debug(f"start onnxruntime_inference") + logger.debug("start onnxruntime_inference") ort_inputs = {"input_ids": numpy.ascontiguousarray(inputs.input_ids.cpu().numpy())} @@ -715,7 +715,7 @@ def onnxruntime_inference_with_binded_io( include_copy_output_latency: bool = False, ): """Inference with IO binding. Returns outputs, and optional latency when total_runs > 0.""" - logger.debug(f"start onnxruntime_inference_with_binded_io") + logger.debug("start onnxruntime_inference_with_binded_io") # Bind inputs and outputs to onnxruntime session io_binding = Gpt2Helper.prepare_io_binding( diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py index e48fcc1cfc119..2af159b370f76 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py @@ -450,7 +450,8 @@ def run_parity(task: ParityTask, args): # Mixed precision baseline run_candidate(task, args, last_matmul_node_name, op_block_list=[]) - get_fp32_ops = lambda x: [op for op in x if op in all_ops] + def get_fp32_ops(x): + return [op for op in x if op in all_ops] if args.all: run_tuning_step0(task, fp16_baseline, all_ops, optimized_ops) diff --git a/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py index c122e243293aa..d57ca9bca9a5b 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py @@ -107,7 +107,7 @@ def post_processing(outputs_path, outputs_path_other): record[Path(filename).name.split(".")[0]] = diff if_close[Path(filename).name.split(".")[0]] = numpy.allclose(array, array_other, rtol=1e-04, atol=1e-04) - results = [f"Node\tDiff\tClose"] + results = ["Node\tDiff\tClose"] for k, v in sorted(record.items(), key=lambda x: x[1], reverse=True): results.append(f"{k}\t{v}\t{if_close[k]}") for line in results: diff --git a/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py index 8e6196d47026b..8e1f55d1384f2 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py @@ -265,7 +265,7 @@ def my_longformer_self_attention_forward_4_3( is_global_attn=None, output_attentions=False, ): - assert output_attentions == False + assert output_attentions is False return my_longformer_self_attention_forward_4( self, hidden_states, @@ -287,7 +287,7 @@ def my_longformer_self_attention_forward_4_3_2( is_global_attn=None, output_attentions=False, ): - assert output_attentions == False + assert output_attentions is False assert layer_head_mask is None return my_longformer_self_attention_forward_4( self, diff --git a/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py index 379efce27b27a..2c886c28e707a 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py +++ b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py @@ -12,7 +12,7 @@ from pathlib import Path import numpy as np -from onnx import ModelProto, TensorProto, numpy_helper +from onnx import ModelProto, TensorProto sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index c4dda99496ebe..b60c455ad11b5 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -172,7 +172,7 @@ def get_onnx_file_path( filename = f"{normalized_model_name}_{input_count}_{precision}_{device}" if optimized_by_onnxruntime: - filename += f"_ort" + filename += "_ort" directory = onnx_dir # ONNXRuntime will not write external data so the raw and optimized models shall be in same directory. @@ -268,7 +268,7 @@ def optimize_onnx_model( def modelclass_dispatcher(model_name, custom_model_class): - if custom_model_class != None: + if custom_model_class is not None: if custom_model_class in MODEL_CLASSES: return custom_model_class else: @@ -279,11 +279,11 @@ def modelclass_dispatcher(model_name, custom_model_class): import re - if re.search("-squad$", model_name) != None: + if re.search("-squad$", model_name) is not None: return "AutoModelForQuestionAnswering" - elif re.search("-mprc$", model_name) != None: + elif re.search("-mprc$", model_name) is not None: return "AutoModelForSequenceClassification" - elif re.search("gpt2", model_name) != None: + elif re.search("gpt2", model_name) is not None: return "AutoModelWithLMHead" return "AutoModel" diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py index 2dcd2db9019f2..1a84ba4e5cc50 100644 --- a/onnxruntime/python/tools/transformers/onnx_model.py +++ b/onnxruntime/python/tools/transformers/onnx_model.py @@ -526,7 +526,7 @@ def remove_useless_cast_nodes(self): """Remove cast nodes that are not needed: input and output has same data type.""" shape_infer = self.infer_runtime_shape(update=True) if shape_infer is None: - logger.info(f"Skip removing useless cast nodes since shape inference failed.") + logger.info("Skip removing useless cast nodes since shape inference failed.") return def get_data_type(input_or_output_name): @@ -734,7 +734,7 @@ def prune_graph(self, outputs=None): outputs (list): a list of graph outputs to retain. If it is None, all graph outputs will be kept. """ if len(self.graphs()) > 1: - logger.debug(f"Skip prune_graph since graph has subgraph") + logger.debug("Skip prune_graph since graph has subgraph") return if outputs is None: @@ -960,7 +960,7 @@ def save( save_model(model, output_path) def save_model_to_file(self, output_path, use_external_data_format=False, all_tensors_to_one_file=True): - logger.info(f"Sort graphs in topological order") + logger.info("Sort graphs in topological order") self.topological_sort() if output_path.endswith(".json"): # Output text for testing small model. diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert.py b/onnxruntime/python/tools/transformers/onnx_model_bert.py index 81c83d222529f..590f00045b2d0 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert.py @@ -33,7 +33,7 @@ class BertOptimizationOptions(FusionOptions): """This class is deprecated""" def __init__(self, model_type): - logger.warning(f"BertOptimizationOptions is depreciated. Please use FusionOptions instead.") + logger.warning("BertOptimizationOptions is depreciated. Please use FusionOptions instead.") super().__init__(model_type) @@ -235,7 +235,7 @@ def use_dynamic_axes(self, dynamic_batch_dim="batch_size", dynamic_seq_len="max_ casted=True ) + self.get_graph_inputs_from_fused_nodes(casted=False) - dynamic_batch_inputs = {} + {} for input in self.model.graph.input: if input.name in bert_graph_inputs: dim_proto = input.type.tensor_type.shape.dim[0] diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py b/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py index 33bb1d66a7528..f26f8da1baa2f 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py @@ -3,14 +3,10 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -import argparse import logging -import sys -from collections import deque -import numpy as np import onnx -from onnx import ModelProto, TensorProto, numpy_helper +from onnx import numpy_helper from onnx_model_bert_tf import BertOnnxModelTF logger = logging.getLogger(__name__) @@ -61,7 +57,7 @@ def check_attention_input(self, matmul_q, matmul_k, matmul_v, parent, output_nam return True, reshape_nodes def fuse_attention(self): - input_name_to_nodes = self.input_name_to_nodes() + self.input_name_to_nodes() output_name_to_node = self.output_name_to_node() nodes_to_remove = [] @@ -227,11 +223,9 @@ def preprocess(self): self.skip_reshape() def skip_reshape(self): - input_name_to_nodes = self.input_name_to_nodes() - output_name_to_node = self.output_name_to_node() + self.input_name_to_nodes() + self.output_name_to_node() - nodes_to_remove = [] - attention_count = 0 count = 0 reshape_nodes = self.get_nodes_by_op_type("Reshape") diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert_tf.py b/onnxruntime/python/tools/transformers/onnx_model_bert_tf.py index 7455777273846..6d0c7cf055d27 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert_tf.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert_tf.py @@ -3,14 +3,11 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -import argparse import logging -import sys -from collections import deque import numpy as np import onnx -from onnx import ModelProto, TensorProto, helper, numpy_helper +from onnx import TensorProto, helper, numpy_helper from onnx_model_bert import BertOnnxModel logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/onnx_model_gpt2.py b/onnxruntime/python/tools/transformers/onnx_model_gpt2.py index 92197e7e4f09f..263857ffbc130 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_gpt2.py +++ b/onnxruntime/python/tools/transformers/onnx_model_gpt2.py @@ -31,7 +31,7 @@ def postprocess(self): """ Remove extra reshape nodes. """ - logger.debug(f"start postprocessing...") + logger.debug("start postprocessing...") input_name_to_nodes = self.input_name_to_nodes() output_name_to_node = self.output_name_to_node() @@ -42,7 +42,6 @@ def postprocess(self): gemm_node, "Reshape", input_name_to_nodes, recursive=False ) - return_indice = [] nodes = self.match_parent_path(gemm_node, ["Reshape", "FastGelu"], [0, 0], output_name_to_node) if nodes is None: nodes = self.match_parent_path( diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py index 57c2fc380adec..773dfdf4144b5 100644 --- a/onnxruntime/python/tools/transformers/optimizer.py +++ b/onnxruntime/python/tools/transformers/optimizer.py @@ -99,7 +99,7 @@ def optimize_by_onnxruntime( kwargs["disabled_optimizers"] = disabled_optimizers if not use_gpu: - session = onnxruntime.InferenceSession( + onnxruntime.InferenceSession( onnx_model_path, sess_options, providers=["CPUExecutionProvider"], **kwargs ) else: @@ -111,7 +111,7 @@ def optimize_by_onnxruntime( gpu_ep.append("MIGraphXExecutionProvider") gpu_ep.append("ROCMExecutionProvider") - session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=gpu_ep, **kwargs) + onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=gpu_ep, **kwargs) assert not set(onnxruntime.get_available_providers()).isdisjoint( ["CUDAExecutionProvider", "ROCMExecutionProvider", "MIGraphXExecutionProvider"] ) @@ -408,7 +408,7 @@ def main(): logger.debug(f"arguments:{args}") if os.path.realpath(args.input) == os.path.realpath(args.output): - logger.warning(f"Specified the same input and output path. Note that this may overwrite the original model") + logger.warning("Specified the same input and output path. Note that this may overwrite the original model") optimization_options = FusionOptions.parse(args) diff --git a/onnxruntime/python/tools/transformers/profiler.py b/onnxruntime/python/tools/transformers/profiler.py index 9f41654af3533..047c1e007d2b1 100644 --- a/onnxruntime/python/tools/transformers/profiler.py +++ b/onnxruntime/python/tools/transformers/profiler.py @@ -256,7 +256,7 @@ def parse_kernel_results(sess_time, threshold=0): else: op_time[op_name] = duration - lines.append(f"\nGroup kernel time by operator:") + lines.append("\nGroup kernel time by operator:") lines.append("-" * 64) lines.append("Total(μs)\tTime%\tOperator") for op_name, duration in sorted(op_time.items(), key=lambda x: x[1], reverse=True): diff --git a/onnxruntime/python/tools/transformers/quantize_helper.py b/onnxruntime/python/tools/transformers/quantize_helper.py index d7e9eb9718a9e..eb30ba7dc3b31 100644 --- a/onnxruntime/python/tools/transformers/quantize_helper.py +++ b/onnxruntime/python/tools/transformers/quantize_helper.py @@ -7,7 +7,6 @@ import logging import os -import onnx import torch from transformers.modeling_utils import Conv1D diff --git a/onnxruntime/python/tools/transformers/shape_optimizer.py b/onnxruntime/python/tools/transformers/shape_optimizer.py index 7174af0ac9ba0..0fc4f448a0b28 100644 --- a/onnxruntime/python/tools/transformers/shape_optimizer.py +++ b/onnxruntime/python/tools/transformers/shape_optimizer.py @@ -10,12 +10,9 @@ import argparse import logging import os -import re import sys import tempfile -from collections import deque from datetime import datetime -from pathlib import Path from typing import List import numpy as np @@ -73,7 +70,7 @@ def get_reshape_shape_inputs(self): """ Returns a list of shape input names of Reshape nodes. """ - output_name_to_node = self.output_name_to_node() + self.output_name_to_node() shape_inputs = [] for node in self.model.graph.node: diff --git a/onnxruntime/test/onnx/gen_test_models.py b/onnxruntime/test/onnx/gen_test_models.py index 509c27ec4efea..e83da7bcb7b44 100644 --- a/onnxruntime/test/onnx/gen_test_models.py +++ b/onnxruntime/test/onnx/gen_test_models.py @@ -7,7 +7,7 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, TensorProto, helper, numpy_helper, utils +from onnx import TensorProto, helper, numpy_helper def parse_arguments(): diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_test_cases_generator.py b/onnxruntime/test/providers/cpu/reduction/reduction_test_cases_generator.py index 235b4111bbcb0..4cefe1439fcbc 100644 --- a/onnxruntime/test/providers/cpu/reduction/reduction_test_cases_generator.py +++ b/onnxruntime/test/providers/cpu/reduction/reduction_test_cases_generator.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import os import numpy as np diff --git a/onnxruntime/test/providers/cpu/rnn/GRU.py b/onnxruntime/test/providers/cpu/rnn/GRU.py index 3fee29e9928f0..bb016ee540a65 100644 --- a/onnxruntime/test/providers/cpu/rnn/GRU.py +++ b/onnxruntime/test/providers/cpu/rnn/GRU.py @@ -29,7 +29,6 @@ def __init__(self, **params): assert i in params, "Missing Required Input: {0}".format(i) num_directions = params["W"].shape[0] - sequence_length = params["X"].shape[0] hidden_size = params["R"].shape[-1] batch_size = params["X"].shape[1] @@ -138,7 +137,6 @@ def execute(self): # print_with_shape("r_br", r_br) # print_with_shape("r_bh", r_bh) - seq_len = self.X.shape[0] num_directions = 1 hidden_size = self.R.shape[-1] batch_size = self.X.shape[1] @@ -249,8 +247,6 @@ def ReverseDefaultActivationsSimpleWeightsNoBiasTwoRows(): print(GRU_ONNXRuntimeUnitTests.ReverseDefaultActivationsSimpleWeightsNoBiasTwoRows.__name__) - seq_length = 2 - batch_size = 2 input_size = 1 hidden_size = 3 input = np.array([[[1.0], [2.0]], [[10.0], [11.0]]]).astype(np.float32) @@ -273,8 +269,6 @@ def BidirectionalDefaultActivationsSimpleWeightsNoBias(linear_before_reset=0): + str(linear_before_reset) ) - seq_length = 2 - batch_size = 3 if linear_before_reset else 2 input_size = 1 hidden_size = 3 diff --git a/onnxruntime/test/providers/cpu/rnn/LSTM.py b/onnxruntime/test/providers/cpu/rnn/LSTM.py index 039a419552586..cae8c27513aff 100644 --- a/onnxruntime/test/providers/cpu/rnn/LSTM.py +++ b/onnxruntime/test/providers/cpu/rnn/LSTM.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals -from typing import Any, Tuple import numpy as np # type: ignore @@ -42,7 +41,7 @@ def __init__(self, **params): # type: (*Any) -> None R = params["R"] num_directions = W.shape[0] - sequence_length = X.shape[0] + X.shape[0] batch_size = X.shape[1] hidden_size = R.shape[-1] @@ -256,8 +255,6 @@ def SimpleWeightsNoBiasTwoRows(direction): # type: () -> None print(LSTM.SimpleWeightsNoBiasTwoRows.__name__ + " direction=" + direction) - seq_length = 2 - batch_size = 2 input_size = 1 hidden_size = 3 number_of_gates = 4 @@ -395,7 +392,7 @@ def export_peepholes(): # type: () -> None W = weight_scale * np.ones((1, number_of_gates * hidden_size, input_size)).astype(np.float32) R = weight_scale * np.ones((1, number_of_gates * hidden_size, hidden_size)).astype(np.float32) B = np.zeros((1, 2 * number_of_gates * hidden_size)).astype(np.float32) - seq_lens = np.repeat(input.shape[0], input.shape[1]).astype(np.int32) + np.repeat(input.shape[0], input.shape[1]).astype(np.int32) init_h = np.zeros((1, input.shape[1], hidden_size)).astype(np.float32) init_c = np.zeros((1, input.shape[1], hidden_size)).astype(np.float32) P = weight_scale * np.ones((1, number_of_peepholes * hidden_size)).astype(np.float32) diff --git a/onnxruntime/test/python/onnxruntime_test_ort_trainer.py b/onnxruntime/test/python/onnxruntime_test_ort_trainer.py index 30e2dc62e16da..f60e80d60d47c 100644 --- a/onnxruntime/test/python/onnxruntime_test_ort_trainer.py +++ b/onnxruntime/test/python/onnxruntime_test_ort_trainer.py @@ -3,17 +3,15 @@ import copy import os -import sys import unittest import numpy as np import onnx -import pytest import torch import torch.nn as nn import torch.nn.functional as F from helper import get_name -from numpy.testing import assert_allclose, assert_array_equal +from numpy.testing import assert_allclose from torchvision import datasets, transforms import onnxruntime @@ -603,7 +601,6 @@ def testMNISTStateDict(self): assert state_dict == {} learningRate = 0.02 - epoch = 0 data, target = next(iter(train_loader)) data, target = data.to(device), target.to(device) @@ -636,7 +633,6 @@ def testMNISTSaveAsONNX(self): assert not os.path.exists(onnx_file_name) learningRate = 0.02 - epoch = 0 data, target = next(iter(train_loader)) data, target = data.to(device), target.to(device) @@ -659,7 +655,6 @@ def testMNISTDevice(self): model.to(model_device) trainer = mnist.get_trainer(model, model_desc, device) learningRate = 0.02 - epoch = 0 data, target = next(iter(train_loader)) data, target = data.to(device), target.to(device) @@ -677,7 +672,6 @@ def testMNISTInitializerNames(self): trainer = mnist.get_trainer(model, model_desc, device) learningRate = 0.02 - epoch = 0 data, target = next(iter(train_loader)) data, target = data.to(device), target.to(device) @@ -708,7 +702,6 @@ def get_lr_this_step(global_step): internal_loss_fn=True, get_lr_this_step=get_lr_this_step, ) - epoch = 0 data, target = next(iter(train_loader)) data, target = data.to(device), target.to(device) @@ -731,7 +724,6 @@ def testMNISTFrozenWeight(self): trainer = mnist.get_trainer(model, model_desc, device, frozen_weights=["fc1.weight"]) learningRate = 0.02 - epoch = 0 data, target = next(iter(train_loader)) data, target = data.to(device), target.to(device) @@ -759,7 +751,6 @@ def testMNISTTorchBuffer(self): trainer = mnist.get_trainer(model, model_desc, device) learningRate = 0.02 - epoch = 0 data, target = next(iter(train_loader)) data, target = data.to(device), target.to(device) @@ -789,7 +780,6 @@ def testMNISTFrozenWeightCheckpoint(self): trainer = mnist.get_trainer(model, model_desc, device, frozen_weights=["fc1.weight"]) learningRate = 0.02 - epoch = 0 # do one train step data, target = next(iter(train_loader)) @@ -835,7 +825,6 @@ def testMNISTTrainingCheckpoint(self): ) learningRate = 0.02 - epoch = 0 # do 5 train step for i in range(5): diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 89fd90ad3a19e..e8b0b029bea5f 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -170,7 +170,6 @@ def testSetProvidersWithOptions(self): if "CUDAExecutionProvider" in onnxrt.get_available_providers(): import ctypes - import sys CUDA_SUCCESS = 0 @@ -663,7 +662,7 @@ def testRaiseWrongNumInputs(self): with self.assertRaises(ValueError) as context: sess = onnxrt.InferenceSession(get_name("logicaland.onnx"), providers=onnxrt.get_available_providers()) a = np.array([[True, True], [False, False]], dtype=bool) - res = sess.run([], {"input:0": a}) + sess.run([], {"input:0": a}) self.assertTrue("Model requires 2 inputs" in str(context.exception)) @@ -734,7 +733,7 @@ def testGraphOptimizationLevel(self): a = np.array([[True, True], [False, False]], dtype=bool) b = np.array([[True, False], [True, False]], dtype=bool) - res = sess.run([], {"input1:0": a, "input:0": b}) + sess.run([], {"input1:0": a, "input:0": b}) def testSequenceLength(self): sess = onnxrt.InferenceSession(get_name("sequence_length.onnx"), providers=available_providers_without_tvm) @@ -984,14 +983,14 @@ def testRegisterCustomOpsLibrary(self): so2 = so1 # Model loading successfully indicates that the custom op node could be resolved successfully - sess2 = onnxrt.InferenceSession( + onnxrt.InferenceSession( custom_op_model, sess_options=so2, providers=available_providers_without_tvm_and_tensorrt ) # Create another SessionOptions instance with the same shared library referenced so3 = onnxrt.SessionOptions() so3.register_custom_ops_library(shared_library) - sess3 = onnxrt.InferenceSession( + onnxrt.InferenceSession( custom_op_model, sess_options=so3, providers=available_providers_without_tvm_and_tensorrt ) @@ -1159,7 +1158,7 @@ def testSparseTensorCsrFormat(self): def testRunModelWithCudaCopyStream(self): available_providers = onnxrt.get_available_providers() - if not "CUDAExecutionProvider" in available_providers: + if "CUDAExecutionProvider" not in available_providers: print("Skipping testRunModelWithCudaCopyStream when CUDA is not available") else: # adapted from issue #4829 for a race condition when copy is not on default stream @@ -1176,7 +1175,7 @@ def testRunModelWithCudaCopyStream(self): session = onnxrt.InferenceSession(get_name("issue4829.onnx"), providers=providers) shape = np.array([2, 2], dtype=np.int64) for iteration in range(100000): - result = session.run(output_names=["output"], input_feed={"shape": shape}) + session.run(output_names=["output"], input_feed={"shape": shape}) def testSharedAllocatorUsingCreateAndRegisterAllocator(self): # Create and register an arena based allocator diff --git a/onnxruntime/test/python/onnxruntime_test_python_cudagraph.py b/onnxruntime/test/python/onnxruntime_test_python_cudagraph.py index b71b3a07cd41f..eecc31edb78d1 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_cudagraph.py +++ b/onnxruntime/test/python/onnxruntime_test_python_cudagraph.py @@ -1,11 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import gc -import os -import sys -import threading -import time # -*- coding: UTF-8 -*- import unittest @@ -14,7 +9,6 @@ from helper import get_name import onnxruntime as onnxrt -from onnxruntime.capi.onnxruntime_pybind11_state import Fail class TestInferenceSessionWithCudaGraph(unittest.TestCase): diff --git a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py index ff1c0d17fd3ec..03eb64e88af90 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_iobinding.py +++ b/onnxruntime/test/python/onnxruntime_test_python_iobinding.py @@ -43,7 +43,7 @@ def create_expected_output_alternate(self): return np.array([[2.0, 8.0], [18.0, 32.0], [50.0, 72.0]], dtype=np.float32) def test_bind_input_to_cpu_arr(self): - input = self.create_numpy_input() + self.create_numpy_input() session = onnxrt.InferenceSession(get_name("mul_1.onnx"), providers=onnxrt.get_available_providers()) io_binding = session.io_binding() diff --git a/onnxruntime/test/python/onnxruntime_test_python_mlops.py b/onnxruntime/test/python/onnxruntime_test_python_mlops.py index b6604a6d51e8a..217361f7a880f 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_mlops.py +++ b/onnxruntime/test/python/onnxruntime_test_python_mlops.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import os # -*- coding: UTF-8 -*- import unittest diff --git a/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py b/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py index fed6892f13f4e..8c75ce331a4de 100644 --- a/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py +++ b/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py @@ -5,7 +5,7 @@ # -*- coding: UTF-8 -*- import onnx -from onnx import AttributeProto, GraphProto, TensorProto, helper +from onnx import TensorProto, helper if os.path.exists( os.path.join( diff --git a/onnxruntime/test/python/quantization/test_conv_dynamic.py b/onnxruntime/test/python/quantization/test_conv_dynamic.py index 045bddccbfbb2..e11ef1187d922 100644 --- a/onnxruntime/test/python/quantization/test_conv_dynamic.py +++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py @@ -12,10 +12,8 @@ import onnx from onnx import TensorProto, helper, numpy_helper from op_test_utils import ( - TestDataFeeds, check_model_correctness, check_op_type_count, - check_op_type_order, check_qtype_by_node_type, ) diff --git a/onnxruntime/test/python/quantization/test_onnx_model.py b/onnxruntime/test/python/quantization/test_onnx_model.py index fc29810e9b97d..d2c34f159a97c 100644 --- a/onnxruntime/test/python/quantization/test_onnx_model.py +++ b/onnxruntime/test/python/quantization/test_onnx_model.py @@ -11,9 +11,8 @@ import numpy as np import onnx from onnx import TensorProto, helper, numpy_helper -from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_op_type_order +from op_test_utils import check_op_type_order -import onnxruntime from onnxruntime.quantization.onnx_model import ONNXModel diff --git a/onnxruntime/test/python/quantization/test_op_gavgpool.py b/onnxruntime/test/python/quantization/test_op_gavgpool.py index a34c52f912ced..eea115b6f6847 100644 --- a/onnxruntime/test/python/quantization/test_op_gavgpool.py +++ b/onnxruntime/test/python/quantization/test_op_gavgpool.py @@ -13,7 +13,7 @@ from onnx import TensorProto, helper from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type -from onnxruntime.quantization import QuantFormat, QuantType, quantize_dynamic, quantize_static +from onnxruntime.quantization import QuantFormat, QuantType, quantize_static class TestOpGlobalAveragePool(unittest.TestCase): diff --git a/onnxruntime/test/python/quantization/test_op_pooling.py b/onnxruntime/test/python/quantization/test_op_pooling.py index b0561bd79f8e1..f47b87f64c403 100644 --- a/onnxruntime/test/python/quantization/test_op_pooling.py +++ b/onnxruntime/test/python/quantization/test_op_pooling.py @@ -14,7 +14,6 @@ from op_test_utils import ( TestDataFeeds, check_model_correctness, - check_op_nodes, check_op_type_count, check_qtype_by_node_type, ) diff --git a/onnxruntime/test/python/quantization/test_symmetric_flag.py b/onnxruntime/test/python/quantization/test_symmetric_flag.py index 26f7ba6ce59b3..c3aa94db99685 100644 --- a/onnxruntime/test/python/quantization/test_symmetric_flag.py +++ b/onnxruntime/test/python/quantization/test_symmetric_flag.py @@ -48,7 +48,7 @@ def perform_quantization(self, activations, weight, act_sym, wgt_sym): # One-layer convolution model act = helper.make_tensor_value_info("ACT", TensorProto.FLOAT, activations[0].shape) - wgt = helper.make_tensor_value_info("WGT", TensorProto.FLOAT, weight.shape) + helper.make_tensor_value_info("WGT", TensorProto.FLOAT, weight.shape) res = helper.make_tensor_value_info("RES", TensorProto.FLOAT, [None, None, None, None]) wgt_init = numpy_helper.from_array(weight, "WGT") conv_node = onnx.helper.make_node("Conv", ["ACT", "WGT"], ["RES"]) diff --git a/onnxruntime/test/python/transformers/gpt2_model_generator.py b/onnxruntime/test/python/transformers/gpt2_model_generator.py index 2fe24739fcffb..aabe4ae391bb6 100644 --- a/onnxruntime/test/python/transformers/gpt2_model_generator.py +++ b/onnxruntime/test/python/transformers/gpt2_model_generator.py @@ -5,7 +5,6 @@ # -------------------------------------------------------------------------- import math -from typing import List import numpy import onnx @@ -494,9 +493,6 @@ def create_gpt2_attention(hidden_size=64, num_heads=4, max_seq_len=32, switch_ad ) initializers.append(helper.make_tensor("axes_1", TensorProto.INT64, [1], [1])) - batch_size = 1 - sequence_length = 3 - past_sequence_length = 2 graph = helper.make_graph( [node for node in nodes if node], "GPT2", # name diff --git a/onnxruntime/test/python/transformers/model_loader.py b/onnxruntime/test/python/transformers/model_loader.py index 126df89240c70..2d871123ec8bb 100644 --- a/onnxruntime/test/python/transformers/model_loader.py +++ b/onnxruntime/test/python/transformers/model_loader.py @@ -5,7 +5,6 @@ # -------------------------------------------------------------------------- import os -import unittest from onnx import ModelProto, TensorProto, external_data_helper, load_model, numpy_helper from parity_utilities import find_transformers_source diff --git a/onnxruntime/test/python/transformers/parity_utilities.py b/onnxruntime/test/python/transformers/parity_utilities.py index b61f9fbcf2b61..2322c0a0996dd 100644 --- a/onnxruntime/test/python/transformers/parity_utilities.py +++ b/onnxruntime/test/python/transformers/parity_utilities.py @@ -48,7 +48,7 @@ def export_onnx(model, onnx_model_path, float16, hidden_size, device): input_hidden_states = create_inputs(hidden_size=hidden_size, float16=float16, device=device) with torch.no_grad(): - outputs = model(input_hidden_states) + model(input_hidden_states) dynamic_axes = { "input": {0: "batch_size", 1: "seq_len"}, @@ -132,7 +132,6 @@ def compare_outputs(torch_outputs, ort_outputs, atol=1e-06, verbose=True): def create_ort_session(onnx_model_path, use_gpu=True): from onnxruntime import GraphOptimizationLevel, InferenceSession, SessionOptions - from onnxruntime import __version__ as onnxruntime_version sess_options = SessionOptions() sess_options.graph_optimization_level = GraphOptimizationLevel.ORT_DISABLE_ALL diff --git a/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py b/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py index 47145fc213a0d..2a2f17852652d 100644 --- a/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py +++ b/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py @@ -26,7 +26,6 @@ import argparse import os import random -import sys import timeit from pathlib import Path @@ -302,7 +301,7 @@ def use_dynamic_axes(self, dynamic_batch_dim="batch_size", seq_len=7): """ Update input and output shape to use dynamic axes. """ - dynamic_batch_inputs = {} + {} for input in self.model.graph.input: dim_proto = input.type.tensor_type.shape.dim[0] dim_proto.dim_param = dynamic_batch_dim @@ -355,7 +354,7 @@ def generate_test_data( sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL sess = onnxruntime.InferenceSession(onnx_file, sess_options, providers=["CPUExecutionProvider"]) - input1_name = sess.get_inputs()[0].name + sess.get_inputs()[0].name output_names = [output.name for output in sess.get_outputs()] inputs = { "input_ids": input_1, diff --git a/onnxruntime/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py b/onnxruntime/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py index 7f613a8674989..afa683751cf96 100644 --- a/onnxruntime/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py +++ b/onnxruntime/test/python/transformers/test_data/gpt2_pytorch1.5_opset11/generate_tiny_gpt2_model.py @@ -7,9 +7,6 @@ import argparse import os -import random -import sys -import timeit from pathlib import Path import numpy as np diff --git a/onnxruntime/test/python/transformers/test_optimizer.py b/onnxruntime/test/python/transformers/test_optimizer.py index dcc83a51f70d9..2c7f1dc4f6bea 100644 --- a/onnxruntime/test/python/transformers/test_optimizer.py +++ b/onnxruntime/test/python/transformers/test_optimizer.py @@ -310,7 +310,7 @@ def test_huggingface_bart_fusion(self): class TestTensorflowModelOptimization(unittest.TestCase): def Setup(self): try: - import tf2onnx + pass except ImportError: self.skipTest("skip TestBertOptimizationTF since tf2onnx not installed") diff --git a/onnxruntime/test/python/transformers/test_parity_decoder_attention.py b/onnxruntime/test/python/transformers/test_parity_decoder_attention.py index 6c05e321f7618..710013e4e8bab 100644 --- a/onnxruntime/test/python/transformers/test_parity_decoder_attention.py +++ b/onnxruntime/test/python/transformers/test_parity_decoder_attention.py @@ -10,9 +10,7 @@ # license information. # ------------------------------------------------------------------------- -import math -import os -from typing import Dict, List, Optional, Tuple +from typing import List, Optional, Tuple import numpy import torch diff --git a/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py b/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py index c29cf969734c4..a3b7efc08fd82 100644 --- a/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py +++ b/onnxruntime/test/python/transformers/test_parity_huggingface_gpt_attention.py @@ -19,7 +19,7 @@ import pytest import torch from onnx import helper -from parity_utilities import compare_outputs, create_ort_session, diff_outputs +from parity_utilities import compare_outputs, create_ort_session from torch import nn from transformers.modeling_utils import Conv1D @@ -219,7 +219,7 @@ def export_onnx(model, onnx_model_path, float16, hidden_size, num_attention_head ) with torch.no_grad(): - outputs = model(input_hidden_states, attention_mask=attention_mask, layer_past=layer_past) + model(input_hidden_states, attention_mask=attention_mask, layer_past=layer_past) dynamic_axes = { "input_hidden_states": {0: "batch_size", 1: "seq_len"}, diff --git a/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py b/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py index d681723810e65..df6f526e89999 100644 --- a/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py +++ b/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/ep_partitioning_tests.py b/onnxruntime/test/testdata/ep_partitioning_tests.py index a85b9bda6c187..367cafb795bad 100644 --- a/onnxruntime/test/testdata/ep_partitioning_tests.py +++ b/onnxruntime/test/testdata/ep_partitioning_tests.py @@ -1,4 +1,3 @@ -import numpy as np import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index 6b126fb3a2a1f..8f4d5f4fc2f39 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/sparse_initializer_as_output.py b/onnxruntime/test/testdata/sparse_initializer_as_output.py index 741ed6439e815..1c598416093ad 100644 --- a/onnxruntime/test/testdata/sparse_initializer_as_output.py +++ b/onnxruntime/test/testdata/sparse_initializer_as_output.py @@ -1,21 +1,13 @@ import argparse -import os import sys import traceback -from typing import Any, Callable, Dict, List, Optional, Sequence, Text, Tuple, TypeVar, Union, cast import numpy as np import onnx from onnx import ( - AttributeProto, - GraphProto, - SparseTensorProto, TensorProto, ValueInfoProto, helper, - mapping, - numpy_helper, - utils, ) from onnx.helper import make_opsetid diff --git a/onnxruntime/test/testdata/sparse_to_dense_matmul.py b/onnxruntime/test/testdata/sparse_to_dense_matmul.py index 26fb426968c39..c65d06808f038 100644 --- a/onnxruntime/test/testdata/sparse_to_dense_matmul.py +++ b/onnxruntime/test/testdata/sparse_to_dense_matmul.py @@ -1,21 +1,12 @@ import argparse -import os import sys import traceback -from typing import Any, Callable, Dict, List, Optional, Sequence, Text, Tuple, TypeVar, Union, cast -import numpy as np import onnx from onnx import ( - AttributeProto, - GraphProto, - SparseTensorProto, TensorProto, ValueInfoProto, helper, - mapping, - numpy_helper, - utils, ) from onnx.helper import make_opsetid diff --git a/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py b/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py index d54b15c276e28..7df02979f2bd9 100644 --- a/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py +++ b/onnxruntime/test/testdata/test_data_generation/lr_scheduler/lr_scheduler_test_data_generator.py @@ -42,7 +42,6 @@ def lr_lambda(self, step): def main(): """Main entry.""" num_training_steps = 100 - seed = 8888 device = "cuda" batch_size, dimension_in, dimension_hidden = 2, 2, 3 diff --git a/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py b/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py index ac064963b5e43..a3d7946d63214 100644 --- a/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py +++ b/onnxruntime/test/testdata/test_data_generation/sgd_test/sgd_test_data_generator.py @@ -136,7 +136,7 @@ def _data_func(): target = torch.randn(batch_size, dimension_hidden, device=device, dtype=torch.float32) return input, target - json_file_name = f"sgd_test_single_weight.json" + json_file_name = "sgd_test_single_weight.json" generate_sgd_test_data(seed, _model_setup_func, _data_func, run_step_count, json_file_name, device) @@ -154,7 +154,7 @@ def data_func(): target = torch.randn(batch_size, dim_out, device=device, dtype=torch.float32) return input, target - json_file_name = f"sgd_test_multiple_weights.json" + json_file_name = "sgd_test_multiple_weights.json" generate_sgd_test_data(seed, _model_setup_func, data_func, run_step_count, json_file_name, device) diff --git a/onnxruntime/test/testdata/transform/cast_elimination.py b/onnxruntime/test/testdata/transform/cast_elimination.py index fbf0932dcaa0d..466221bcf7aac 100644 --- a/onnxruntime/test/testdata/transform/cast_elimination.py +++ b/onnxruntime/test/testdata/transform/cast_elimination.py @@ -1,6 +1,5 @@ -import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper X1 = helper.make_tensor_value_info("x1", TensorProto.INT64, [4, 4]) X2 = helper.make_tensor_value_info("x2", TensorProto.INT64, [4, 1]) diff --git a/onnxruntime/test/testdata/transform/computation_reduction.py b/onnxruntime/test/testdata/transform/computation_reduction.py index 7d33c9cc66c89..9f2e6ac7c07f0 100644 --- a/onnxruntime/test/testdata/transform/computation_reduction.py +++ b/onnxruntime/test/testdata/transform/computation_reduction.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper vocab_size = 256 # 30258 diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_add.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_add.py index ec0fdc888bed8..216e0d2a05d5f 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_add.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_add.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_div.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_div.py index d14f8a71adfc5..29abfc18e5d0e 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_div.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_div.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_gelu.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_gelu.py index eade1b868ba84..b8b81a747b118 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_gelu.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_gelu.py @@ -1,6 +1,5 @@ -import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_layernormalization.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_layernormalization.py index 9473d05010129..f403b093b16b6 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_layernormalization.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_layernormalization.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_matmul.py b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_matmul.py index 50167bbd0a3a3..65767a8986746 100755 --- a/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_matmul.py +++ b/onnxruntime/test/testdata/transform/computation_reduction/gathernd/gathernd_matmul.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, ["batch", "seqlen", 128]) unsqueezed_masked_lm_positions = helper.make_tensor_value_info( diff --git a/onnxruntime/test/testdata/transform/concat_slice_elimination.py b/onnxruntime/test/testdata/transform/concat_slice_elimination.py index 88a1236922a19..1134409c630a3 100644 --- a/onnxruntime/test/testdata/transform/concat_slice_elimination.py +++ b/onnxruntime/test/testdata/transform/concat_slice_elimination.py @@ -1,8 +1,7 @@ -import random import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper batch = 3 hidden_size = 4 diff --git a/onnxruntime/test/testdata/transform/cse/generate.py b/onnxruntime/test/testdata/transform/cse/generate.py index 1cd1b54b09a53..01d62422983b5 100644 --- a/onnxruntime/test/testdata/transform/cse/generate.py +++ b/onnxruntime/test/testdata/transform/cse/generate.py @@ -1,7 +1,7 @@ import os import onnx -from onnx import AttributeProto, GraphProto, TensorProto, helper, shape_inference +from onnx import TensorProto, helper, shape_inference _this_dir = os.path.abspath(os.path.dirname(__file__)) diff --git a/onnxruntime/test/testdata/transform/expand_elimination.py b/onnxruntime/test/testdata/transform/expand_elimination.py index da1530876348e..226c23fa66389 100644 --- a/onnxruntime/test/testdata/transform/expand_elimination.py +++ b/onnxruntime/test/testdata/transform/expand_elimination.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X1 = helper.make_tensor_value_info("input1", TensorProto.FLOAT, [2, 1]) X2 = helper.make_tensor_value_info("input2", TensorProto.FLOAT, ["dynamic", 4]) diff --git a/onnxruntime/test/testdata/transform/fusion/attention_gen.py b/onnxruntime/test/testdata/transform/fusion/attention_gen.py index cd1569ae5cd2a..888242a1ba9ba 100644 --- a/onnxruntime/test/testdata/transform/fusion/attention_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/attention_gen.py @@ -1,5 +1,4 @@ import sys -from enum import Enum import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/constant_folding_with_shape_to_initializer.py b/onnxruntime/test/testdata/transform/fusion/constant_folding_with_shape_to_initializer.py index 6cc5cdeb79f4a..65b37a8ed9dab 100644 --- a/onnxruntime/test/testdata/transform/fusion/constant_folding_with_shape_to_initializer.py +++ b/onnxruntime/test/testdata/transform/fusion/constant_folding_with_shape_to_initializer.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper X = helper.make_tensor_value_info("input", TensorProto.FLOAT, [2, 4, 8]) Y = helper.make_tensor_value_info("output", TensorProto.FLOAT, [2, 4, 16]) diff --git a/onnxruntime/test/testdata/transform/fusion/div_mul.py b/onnxruntime/test/testdata/transform/fusion/div_mul.py index 7263a986d40ca..a229f7f441f0e 100644 --- a/onnxruntime/test/testdata/transform/fusion/div_mul.py +++ b/onnxruntime/test/testdata/transform/fusion/div_mul.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py b/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py index 6eff2e01ec8bf..11b7dd899c452 100644 --- a/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py +++ b/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py b/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py index cc1058c37e31f..2a55e5d52e6b1 100644 --- a/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu.py b/onnxruntime/test/testdata/transform/fusion/fast_gelu.py index aaaffa4ab398a..90d1231093f1a 100644 --- a/onnxruntime/test/testdata/transform/fusion/fast_gelu.py +++ b/onnxruntime/test/testdata/transform/fusion/fast_gelu.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper # Gelu formula: x * 0.5 * (1.0 + tanh(0.7978845608028654 * x * (1.0 + 0.044715 * x * x))) diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py b/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py index 5ff752afa7e6a..48483ba50fe9e 100644 --- a/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py +++ b/onnxruntime/test/testdata/transform/fusion/fast_gelu2.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper # Gelu formula: x * 0.5 * (1.0 + tanh((sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))) has_bias = False # change it to True to generate fast_gelu_openai_with_bias.onnx diff --git a/onnxruntime/test/testdata/transform/fusion/fast_gelu3_with_casts.py b/onnxruntime/test/testdata/transform/fusion/fast_gelu3_with_casts.py index 5220751a3e364..d91e186296137 100644 --- a/onnxruntime/test/testdata/transform/fusion/fast_gelu3_with_casts.py +++ b/onnxruntime/test/testdata/transform/fusion/fast_gelu3_with_casts.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper # Gelu formula: x * 0.5 * (1.0 + tanh((sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))) diff --git a/onnxruntime/test/testdata/transform/fusion/gelu_gen.py b/onnxruntime/test/testdata/transform/fusion/gelu_gen.py index 45f546a04635e..22c214032b6a0 100644 --- a/onnxruntime/test/testdata/transform/fusion/gelu_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/gelu_gen.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper """ Generate test model for Gelu subgraph pattern 2: diff --git a/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py b/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py index 447b873f01c6e..18ac2adc247ae 100644 --- a/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py +++ b/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py b/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py index eb184fef5e59d..37ece6e927b0c 100644 --- a/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py b/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py index 091d38d9e6797..b289db74cd267 100644 --- a/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py +++ b/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py @@ -1,6 +1,4 @@ -from enum import Enum -import numpy as np import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py index 7bba71723b2c8..233c692b68e8a 100644 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/not_where.py b/onnxruntime/test/testdata/transform/fusion/not_where.py index 7e48164d5161a..36523d8fa3f5a 100644 --- a/onnxruntime/test/testdata/transform/fusion/not_where.py +++ b/onnxruntime/test/testdata/transform/fusion/not_where.py @@ -1,4 +1,3 @@ -from enum import Enum import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/id-elim.py b/onnxruntime/test/testdata/transform/id-elim.py index 838fbb1f4a798..eef8011e7fe23 100644 --- a/onnxruntime/test/testdata/transform/id-elim.py +++ b/onnxruntime/test/testdata/transform/id-elim.py @@ -1,6 +1,5 @@ -import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper X1 = helper.make_tensor_value_info("x1", TensorProto.INT64, [4, 4]) X2 = helper.make_tensor_value_info("x2", TensorProto.INT64, [4, 4]) diff --git a/onnxruntime/test/testdata/transform/id-scan9_sum.py b/onnxruntime/test/testdata/transform/id-scan9_sum.py index f2a7de656c8ee..c813bbfc18d8e 100644 --- a/onnxruntime/test/testdata/transform/id-scan9_sum.py +++ b/onnxruntime/test/testdata/transform/id-scan9_sum.py @@ -1,6 +1,5 @@ -import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper initial = helper.make_tensor_value_info("initial", TensorProto.FLOAT, [2]) x = helper.make_tensor_value_info("x", TensorProto.FLOAT, [3, 2]) diff --git a/onnxruntime/test/testdata/transform/model_parallel/bart_mlp_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/bart_mlp_megatron_basic_test.py index 323ebf08e4acd..7879bb4d4e0ff 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/bart_mlp_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/bart_mlp_megatron_basic_test.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper hidden_size = 4 weight_dim_to_split = 16 diff --git a/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py index 596b294ca27ae..77cc146f4f8ce 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py @@ -1,8 +1,7 @@ -import random import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper batch = 6 hidden_size = 4 diff --git a/onnxruntime/test/testdata/transform/model_parallel/mlp_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/mlp_megatron_basic_test.py index b26d384cbb4c9..5dec4899d59af 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/mlp_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/mlp_megatron_basic_test.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import AttributeProto, GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper hidden_size = 4 weight_dim_to_split = 16 diff --git a/onnxruntime/test/testdata/transform/model_parallel/self_attention_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/self_attention_megatron_basic_test.py index 5083ceeb434db..30e0a58a53d2d 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/self_attention_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/self_attention_megatron_basic_test.py @@ -1,6 +1,6 @@ import numpy as np import onnx -from onnx import GraphProto, OperatorSetIdProto, TensorProto, helper, numpy_helper +from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper hidden_size = 4 attention_head = 2 From a3df22cffec3632f5681331adfe8a5802619c249 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:35:26 +0000 Subject: [PATCH 07/33] Ignore E501 --- .flake8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.flake8 b/.flake8 index b40efab4e07bf..7831fd148230a 100644 --- a/.flake8 +++ b/.flake8 @@ -3,4 +3,4 @@ max-line-length = 120 per-file-ignores = __init__.py:F401 # NOTE: Edit exclude list in .lintrunner.toml -ignore = W503, E203 +ignore = W503, E203, E501 From 5605cba39c539484cf4acbb92dbf03336d96dd0d Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:36:44 +0000 Subject: [PATCH 08/33] Update config --- .lintrunner.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.lintrunner.toml b/.lintrunner.toml index ad3a43230291f..d06f6e2bf0a8b 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -82,9 +82,9 @@ include_patterns = [ '**/*.py', ] exclude_patterns = [ - 'cmake/*', + 'cmake/**', 'orttraining/*', - 'onnxruntime/core/flatbuffers/*', + 'onnxruntime/core/flatbuffers/**', ] command = [ 'python3', From da3ebcfc246b2f291b74518d1e0624db543dbd0a Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:37:49 +0000 Subject: [PATCH 09/33] Remove future --- .../orttraining/test/python/orttraining_run_bert_pretrain.py | 4 ---- .../orttraining/test/python/orttraining_test_transformers.py | 4 ---- orttraining/tools/scripts/nv_run_pretraining.py | 3 --- 3 files changed, 11 deletions(-) diff --git a/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py b/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py index a087a97da5a54..3ed121543d24a 100644 --- a/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py +++ b/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py @@ -1,7 +1,3 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - # ================== import os import shutil diff --git a/orttraining/orttraining/test/python/orttraining_test_transformers.py b/orttraining/orttraining/test/python/orttraining_test_transformers.py index 1e73da0f65b3f..611e7f0bc0c6b 100644 --- a/orttraining/orttraining/test/python/orttraining_test_transformers.py +++ b/orttraining/orttraining/test/python/orttraining_test_transformers.py @@ -1,7 +1,3 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - import unittest import shutil import pytest diff --git a/orttraining/tools/scripts/nv_run_pretraining.py b/orttraining/tools/scripts/nv_run_pretraining.py index 3e51a8886ecb6..e7977595ac813 100644 --- a/orttraining/tools/scripts/nv_run_pretraining.py +++ b/orttraining/tools/scripts/nv_run_pretraining.py @@ -15,9 +15,6 @@ # limitations under the License. """BERT finetuning runner.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function # ================== import csv From 028558d62c1289127aa41d0dfcc5b0e8a3d7cd47 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:40:03 +0000 Subject: [PATCH 10/33] Black format --- .../tools/quantization/operators/gemm.py | 8 +--- .../operators/qdq_base_operator.py | 1 - .../tensorrt/perf/build/ort_build_latest.py | 40 ++++++++++++++---- .../python/tools/tensorrt/perf/perf_utils.py | 1 - .../transformers/onnx_model_bert_keras.py | 1 - .../python/tools/transformers/optimizer.py | 4 +- onnxruntime/test/providers/cpu/rnn/LSTM.py | 1 - .../python/quantization/test_conv_dynamic.py | 6 +-- .../python/quantization/test_op_pooling.py | 7 +--- .../testdata/dynamic_quantize_matmul_test.py | 1 - .../test/testdata/matmul_integer_to_float.py | 1 - .../testdata/sparse_initializer_as_output.py | 6 +-- .../test/testdata/sparse_to_dense_matmul.py | 6 +-- .../transform/concat_slice_elimination.py | 1 - .../test/testdata/transform/fusion/div_mul.py | 1 - .../fusion/dynamic_quantize_matmul.py | 1 - .../transform/fusion/embed_layer_norm_gen.py | 1 - .../transform/fusion/isinf_reducesum.py | 1 - .../transform/fusion/layer_norm_t5_gen.py | 1 - .../fusion/layer_norm_with_cast_2.py | 1 - .../fusion/matmul_integer_to_float.py | 1 - .../testdata/transform/fusion/not_where.py | 1 - ...bart_self_attention_megatron_basic_test.py | 1 - .../orttraining/eager/opgen/onnxgen.py | 1 + .../orttraining/eager/opgen/opgen/ast.py | 3 +- .../eager/opgen/opgen/custom_ops.py | 2 +- .../orttraining/eager/opgen/opgen/lexer.py | 4 +- .../orttraining/eager/opgen/opgen/onnxops.py | 2 +- .../orttraining/eager/opgen/opgen/parser.py | 5 ++- .../orttraining/eager/opgen/opgen/writer.py | 2 +- .../eager/opgen/opgen_test/lexer_test.py | 2 +- .../orttraining/eager/test/__main__.py | 2 +- .../orttraining/eager/test/ort_eps_test.py | 9 ++-- .../orttraining/eager/test/ort_init.py | 1 + orttraining/orttraining/eager/test/ort_ops.py | 2 +- .../orttraining/eager/test/ort_tensor.py | 3 +- .../test_model_OrtModule/mnist_fc_training.py | 12 +++--- .../orttraining/eager/test_models/mnist_fc.py | 8 ++-- .../eager/test_models/mnist_fc_training.py | 1 - .../eager/test_models/scratchpad.py | 2 +- .../orttraining/python/checkpointing_utils.py | 1 + .../python/deprecated/training_session.py | 4 +- orttraining/orttraining/python/ort_trainer.py | 2 +- orttraining/orttraining/python/pt_patch.py | 6 +-- .../orttraining/python/training/__init__.py | 6 +-- .../python/training/_checkpoint_storage.py | 5 ++- .../python/training/amp/__init__.py | 2 +- .../orttraining/python/training/checkpoint.py | 11 ++--- .../gradient_graph/_gradient_graph_tools.py | 3 +- .../python/training/model_desc_validation.py | 5 ++- .../training/onnxblock/loss/__init__.py | 2 +- .../python/training/optim/__init__.py | 9 ++-- .../training/optim/_apex_amp_modifier.py | 2 + .../training/optim/_modifier_registry.py | 2 +- .../python/training/optim/fused_adam.py | 4 +- .../_custom_autograd_function_runner.py | 6 +-- .../python/training/ortmodule/_fallback.py | 13 +++--- .../_gradient_accumulation_manager.py | 3 +- .../_graph_execution_manager_factory.py | 4 +- .../python/training/ortmodule/_logger.py | 7 ++-- .../ortmodule/_torch_module_factory.py | 2 +- .../ortmodule/_torch_module_interface.py | 4 +- .../training/ortmodule/_torch_module_ort.py | 13 +++--- .../ortmodule/_torch_module_pytorch.py | 6 +-- .../python/training/ortmodule/_utils.py | 2 +- .../json_config/_load_config_from_json.py | 9 ++-- .../python/training/ortmodule/ortmodule.py | 24 +++++------ .../python/training/postprocess.py | 11 +++-- .../training/torchdynamo/register_backend.py | 2 +- .../python/training/utils/data/__init__.py | 2 +- .../python/training/utils/data/sampler.py | 9 ++-- .../test/external_custom_ops/setup.py | 10 +++-- .../test/external_custom_ops/test.py | 7 ++-- .../test/external_transformers_test.py | 10 +++-- .../orttraining/test/python/launch_test.py | 5 +-- .../python/onnxruntime_test_register_ep.py | 3 +- ...orttraining_ortmodule_distributed_tests.py | 5 +-- .../python/orttraining_run_bert_pretrain.py | 37 ++++++++-------- ...rttraining_run_frontend_batch_size_test.py | 2 +- .../test/python/orttraining_run_glue.py | 9 ++-- .../python/orttraining_run_multiple_choice.py | 17 ++++---- .../orttraining_test_bert_postprocess.py | 2 +- .../orttraining_test_checkpoint_storage.py | 9 ++-- .../python/orttraining_test_data_loader.py | 6 ++- .../python/orttraining_test_debuggability.py | 34 ++++++--------- .../test/python/orttraining_test_dort.py | 2 +- ...aining_test_experimental_gradient_graph.py | 3 +- ...rttraining_test_ortmodule_autograd_dist.py | 9 ++-- ...training_test_ortmodule_bert_classifier.py | 31 ++++++++------ ...test_ortmodule_bert_classifier_autocast.py | 31 ++++++++------ ...t_ortmodule_deepspeed_pipeline_parallel.py | 10 ++--- ...g_test_ortmodule_deepspeed_zero_stage_1.py | 10 ++--- ...test_ortmodule_experimental_json_config.py | 4 +- ...t_ortmodule_fairscale_sharded_optimizer.py | 16 +++---- .../python/orttraining_test_ortmodule_poc.py | 5 ++- .../orttraining_test_ortmodule_pytorch_ddp.py | 7 ++-- ...ng_test_ortmodule_torch_lightning_basic.py | 6 +-- ...ttraining_test_orttrainer_bert_toy_onnx.py | 35 +++++++--------- ...ng_test_orttrainer_checkpoint_functions.py | 10 +++-- .../orttraining_test_orttrainer_frontend.py | 3 +- .../test/python/orttraining_test_ortvalue.py | 16 +++---- .../test/python/orttraining_test_sampler.py | 4 +- .../python/orttraining_test_transformers.py | 17 ++++---- .../python/orttraining_transformer_trainer.py | 20 +++------ .../perf_log/ort_module_perf_test_tools.py | 10 ++--- .../test/python/utils_multiple_choice.py | 6 +-- .../mnist_training.py | 7 ++-- orttraining/tools/amdgpu/script/rocprof.py | 5 ++- .../tools/ci_test/compare_huggingface.py | 4 +- .../tools/ci_test/run_batch_size_test.py | 2 +- .../tools/ci_test/run_bert_perf_test.py | 4 +- .../tools/ci_test/run_convergence_test.py | 4 +- .../tools/ci_test/run_gpt2_perf_test.py | 2 +- orttraining/tools/scripts/experiment.py | 11 ++--- .../tools/scripts/gpt2_model_transform.py | 7 ++-- .../tools/scripts/layer_norm_transform.py | 7 ++-- orttraining/tools/scripts/model_transform.py | 7 ++-- .../tools/scripts/nv_run_pretraining.py | 42 +++++++++---------- .../tools/scripts/opset12_model_transform.py | 7 ++-- .../scripts/performance_investigation.py | 1 + .../tools/scripts/pipeline_model_split.py | 7 ++-- orttraining/tools/scripts/sqldb_to_tensors.py | 1 + orttraining/tools/scripts/watch_experiment.py | 7 ++-- 123 files changed, 428 insertions(+), 443 deletions(-) diff --git a/onnxruntime/python/tools/quantization/operators/gemm.py b/onnxruntime/python/tools/quantization/operators/gemm.py index 969e7074f4025..98ba0f7493429 100644 --- a/onnxruntime/python/tools/quantization/operators/gemm.py +++ b/onnxruntime/python/tools/quantization/operators/gemm.py @@ -3,13 +3,7 @@ import onnx from onnx import onnx_pb as onnx_proto -from ..quant_utils import ( - TENSOR_NAME_QUANT_SUFFIX, - QuantizedValue, - QuantizedValueType, - attribute_to_kwarg, - ms_domain, -) +from ..quant_utils import TENSOR_NAME_QUANT_SUFFIX, QuantizedValue, QuantizedValueType, attribute_to_kwarg, ms_domain from .matmul import QOpMatMul from .qdq_base_operator import QDQOperatorBase diff --git a/onnxruntime/python/tools/quantization/operators/qdq_base_operator.py b/onnxruntime/python/tools/quantization/operators/qdq_base_operator.py index a84f490e8ac6e..73256e9115769 100644 --- a/onnxruntime/python/tools/quantization/operators/qdq_base_operator.py +++ b/onnxruntime/python/tools/quantization/operators/qdq_base_operator.py @@ -1,7 +1,6 @@ import itertools - class QDQOperatorBase: def __init__(self, onnx_quantizer, onnx_node): self.quantizer = onnx_quantizer diff --git a/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py b/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py index 2efacd1965f40..a1d33a7425b28 100755 --- a/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py +++ b/onnxruntime/python/tools/tensorrt/perf/build/ort_build_latest.py @@ -1,8 +1,9 @@ +import argparse import os import subprocess -import argparse import tarfile + def parse_arguments(): parser = argparse.ArgumentParser() @@ -15,52 +16,73 @@ def parse_arguments(): args = parser.parse_args() return args + def archive_wheel_file(save_path, ort_wheel_file): if not os.path.exists(save_path): os.mkdir(save_path) subprocess.run(["cp", ort_wheel_file, save_path], check=True) + def install_new_ort_wheel(ort_master_path): - ort_wheel_path = os.path.join(ort_master_path, "build", "Linux", "Release", "dist") + ort_wheel_path = os.path.join(ort_master_path, "build", "Linux", "Release", "dist") p1 = subprocess.run(["find", ort_wheel_path, "-name", "*.whl"], stdout=subprocess.PIPE, check=True) stdout = p1.stdout.decode("utf-8").strip() ort_wheel = stdout.split("\n")[0] subprocess.run(["python3", "-m", "pip", "install", "--force-reinstall", ort_wheel], check=True) return ort_wheel + def main(): args = parse_arguments() - cmake_tar = "cmake-3.18.4-Linux-x86_64.tar.gz" + cmake_tar = "cmake-3.18.4-Linux-x86_64.tar.gz" if not os.path.exists(cmake_tar): p = subprocess.run(["wget", "-c", "https://cmake.org/files/v3.18/" + cmake_tar], check=True) tar = tarfile.open(cmake_tar) tar.extractall() tar.close() - + os.environ["PATH"] = os.path.join(os.path.abspath("cmake-3.18.4-Linux-x86_64"), "bin") + ":" + os.environ["PATH"] - os.environ["CUDACXX"] = os.path.join(args.cuda_home, "bin", "nvcc") + os.environ["CUDACXX"] = os.path.join(args.cuda_home, "bin", "nvcc") - ort_master_path = args.ort_master_path + ort_master_path = args.ort_master_path pwd = os.getcwd() os.chdir(ort_master_path) if args.use_archived: ort_wheel_file = args.use_archived subprocess.run(["python3", "-m", "pip", "install", "--force-reinstall", ort_wheel_file], check=True) - + else: subprocess.run(["git", "fetch"], check=True) subprocess.run(["git", "checkout", args.branch], check=True) subprocess.run(["git", "pull", "origin", args.branch], check=True) - subprocess.run(["./build.sh", "--config", "Release", "--use_tensorrt", "--tensorrt_home", args.tensorrt_home, "--cuda_home", args.cuda_home, "--cudnn", "/usr/lib/x86_64-linux-gnu", "--build_wheel", "--skip_tests", "--parallel"], check=True) + subprocess.run( + [ + "./build.sh", + "--config", + "Release", + "--use_tensorrt", + "--tensorrt_home", + args.tensorrt_home, + "--cuda_home", + args.cuda_home, + "--cudnn", + "/usr/lib/x86_64-linux-gnu", + "--build_wheel", + "--skip_tests", + "--parallel", + ], + check=True, + ) ort_wheel_file = install_new_ort_wheel(ort_master_path) - + if args.save: archive_wheel_file(args.save, ort_wheel_file) os.chdir(pwd) + if __name__ == "__main__": main() diff --git a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py index 65d5d5517a982..755d3f00a40cf 100644 --- a/onnxruntime/python/tools/tensorrt/perf/perf_utils.py +++ b/onnxruntime/python/tools/tensorrt/perf/perf_utils.py @@ -4,7 +4,6 @@ import subprocess import sys - debug = False debug_verbose = False diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py b/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py index f26f8da1baa2f..3d88b5a775bc3 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert_keras.py @@ -226,7 +226,6 @@ def skip_reshape(self): self.input_name_to_nodes() self.output_name_to_node() - count = 0 reshape_nodes = self.get_nodes_by_op_type("Reshape") for reshape_node in reshape_nodes: diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py index 773dfdf4144b5..74136bba276f8 100644 --- a/onnxruntime/python/tools/transformers/optimizer.py +++ b/onnxruntime/python/tools/transformers/optimizer.py @@ -99,9 +99,7 @@ def optimize_by_onnxruntime( kwargs["disabled_optimizers"] = disabled_optimizers if not use_gpu: - onnxruntime.InferenceSession( - onnx_model_path, sess_options, providers=["CPUExecutionProvider"], **kwargs - ) + onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=["CPUExecutionProvider"], **kwargs) else: gpu_ep = [] diff --git a/onnxruntime/test/providers/cpu/rnn/LSTM.py b/onnxruntime/test/providers/cpu/rnn/LSTM.py index cae8c27513aff..11402936752f2 100644 --- a/onnxruntime/test/providers/cpu/rnn/LSTM.py +++ b/onnxruntime/test/providers/cpu/rnn/LSTM.py @@ -3,7 +3,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals - import numpy as np # type: ignore # import onnx diff --git a/onnxruntime/test/python/quantization/test_conv_dynamic.py b/onnxruntime/test/python/quantization/test_conv_dynamic.py index e11ef1187d922..08f329cdf3735 100644 --- a/onnxruntime/test/python/quantization/test_conv_dynamic.py +++ b/onnxruntime/test/python/quantization/test_conv_dynamic.py @@ -11,11 +11,7 @@ import numpy as np import onnx from onnx import TensorProto, helper, numpy_helper -from op_test_utils import ( - check_model_correctness, - check_op_type_count, - check_qtype_by_node_type, -) +from op_test_utils import check_model_correctness, check_op_type_count, check_qtype_by_node_type from onnxruntime.quantization import DynamicQuantConfig, QuantType, quantize, quantize_dynamic diff --git a/onnxruntime/test/python/quantization/test_op_pooling.py b/onnxruntime/test/python/quantization/test_op_pooling.py index f47b87f64c403..f18ae564c4730 100644 --- a/onnxruntime/test/python/quantization/test_op_pooling.py +++ b/onnxruntime/test/python/quantization/test_op_pooling.py @@ -11,12 +11,7 @@ import numpy as np import onnx from onnx import TensorProto, helper -from op_test_utils import ( - TestDataFeeds, - check_model_correctness, - check_op_type_count, - check_qtype_by_node_type, -) +from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type from onnxruntime.quantization import QuantFormat, QuantType, quantize_static diff --git a/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py b/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py index df6f526e89999..136db2949b101 100644 --- a/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py +++ b/onnxruntime/test/testdata/dynamic_quantize_matmul_test.py @@ -1,4 +1,3 @@ - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/matmul_integer_to_float.py b/onnxruntime/test/testdata/matmul_integer_to_float.py index 8f4d5f4fc2f39..4b1dc90f6f468 100644 --- a/onnxruntime/test/testdata/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/matmul_integer_to_float.py @@ -1,4 +1,3 @@ - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/sparse_initializer_as_output.py b/onnxruntime/test/testdata/sparse_initializer_as_output.py index 1c598416093ad..1c5cc8f783340 100644 --- a/onnxruntime/test/testdata/sparse_initializer_as_output.py +++ b/onnxruntime/test/testdata/sparse_initializer_as_output.py @@ -4,11 +4,7 @@ import numpy as np import onnx -from onnx import ( - TensorProto, - ValueInfoProto, - helper, -) +from onnx import TensorProto, ValueInfoProto, helper from onnx.helper import make_opsetid diff --git a/onnxruntime/test/testdata/sparse_to_dense_matmul.py b/onnxruntime/test/testdata/sparse_to_dense_matmul.py index c65d06808f038..f5c9fb347baef 100644 --- a/onnxruntime/test/testdata/sparse_to_dense_matmul.py +++ b/onnxruntime/test/testdata/sparse_to_dense_matmul.py @@ -3,11 +3,7 @@ import traceback import onnx -from onnx import ( - TensorProto, - ValueInfoProto, - helper, -) +from onnx import TensorProto, ValueInfoProto, helper from onnx.helper import make_opsetid diff --git a/onnxruntime/test/testdata/transform/concat_slice_elimination.py b/onnxruntime/test/testdata/transform/concat_slice_elimination.py index 1134409c630a3..97f0c6f243f60 100644 --- a/onnxruntime/test/testdata/transform/concat_slice_elimination.py +++ b/onnxruntime/test/testdata/transform/concat_slice_elimination.py @@ -1,4 +1,3 @@ - import numpy as np import onnx from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper diff --git a/onnxruntime/test/testdata/transform/fusion/div_mul.py b/onnxruntime/test/testdata/transform/fusion/div_mul.py index a229f7f441f0e..480db8967ba57 100644 --- a/onnxruntime/test/testdata/transform/fusion/div_mul.py +++ b/onnxruntime/test/testdata/transform/fusion/div_mul.py @@ -1,4 +1,3 @@ - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py b/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py index 11b7dd899c452..2957f06b5b4eb 100644 --- a/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py +++ b/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.py @@ -1,4 +1,3 @@ - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py b/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py index 2a55e5d52e6b1..90f436d4e67b2 100644 --- a/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/embed_layer_norm_gen.py @@ -1,4 +1,3 @@ - import onnx from onnx import TensorProto, helper from packaging import version diff --git a/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py b/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py index 18ac2adc247ae..ba3c9fadbc9c6 100644 --- a/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py +++ b/onnxruntime/test/testdata/transform/fusion/isinf_reducesum.py @@ -1,4 +1,3 @@ - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py b/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py index 37ece6e927b0c..4580ec68aecd1 100644 --- a/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py +++ b/onnxruntime/test/testdata/transform/fusion/layer_norm_t5_gen.py @@ -1,4 +1,3 @@ - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py b/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py index b289db74cd267..7068fb02f2821 100644 --- a/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py +++ b/onnxruntime/test/testdata/transform/fusion/layer_norm_with_cast_2.py @@ -1,4 +1,3 @@ - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py index 233c692b68e8a..56556fe327a63 100644 --- a/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py +++ b/onnxruntime/test/testdata/transform/fusion/matmul_integer_to_float.py @@ -1,4 +1,3 @@ - import onnx from onnx import TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/fusion/not_where.py b/onnxruntime/test/testdata/transform/fusion/not_where.py index 36523d8fa3f5a..28a4eb914f0a3 100644 --- a/onnxruntime/test/testdata/transform/fusion/not_where.py +++ b/onnxruntime/test/testdata/transform/fusion/not_where.py @@ -1,4 +1,3 @@ - import onnx from onnx import OperatorSetIdProto, TensorProto, helper diff --git a/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py b/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py index 77cc146f4f8ce..886cd5c25fb08 100644 --- a/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py +++ b/onnxruntime/test/testdata/transform/model_parallel/bart_self_attention_megatron_basic_test.py @@ -1,4 +1,3 @@ - import numpy as np import onnx from onnx import OperatorSetIdProto, TensorProto, helper, numpy_helper diff --git a/orttraining/orttraining/eager/opgen/onnxgen.py b/orttraining/orttraining/eager/opgen/onnxgen.py index 87c4036f48b0a..e46e8eb73fac2 100755 --- a/orttraining/orttraining/eager/opgen/onnxgen.py +++ b/orttraining/orttraining/eager/opgen/onnxgen.py @@ -5,6 +5,7 @@ import os.path as path from sys import argv + from onnx import defs out_file = path.join(path.dirname(path.realpath(__file__)), "opgen", "onnxops.py") diff --git a/orttraining/orttraining/eager/opgen/opgen/ast.py b/orttraining/orttraining/eager/opgen/opgen/ast.py index f41a93712aa51..9c73322c361fe 100644 --- a/orttraining/orttraining/eager/opgen/opgen/ast.py +++ b/orttraining/orttraining/eager/opgen/opgen/ast.py @@ -2,7 +2,8 @@ # Licensed under the MIT License. import io -from typing import TextIO, List, Union +from typing import List, TextIO, Union + from opgen.lexer import Token diff --git a/orttraining/orttraining/eager/opgen/opgen/custom_ops.py b/orttraining/orttraining/eager/opgen/opgen/custom_ops.py index a8031fe7d8635..0c303780a1e7f 100644 --- a/orttraining/orttraining/eager/opgen/opgen/custom_ops.py +++ b/orttraining/orttraining/eager/opgen/opgen/custom_ops.py @@ -1,4 +1,4 @@ -from opgen.onnxops import BatchNormalization, Gemm, Concat +from opgen.onnxops import BatchNormalization, Concat, Gemm ops = { "gemm": Gemm("A", "B", "C", "alpha", "beta", "transA", "transB"), diff --git a/orttraining/orttraining/eager/opgen/opgen/lexer.py b/orttraining/orttraining/eager/opgen/opgen/lexer.py index 661d646350f53..5d2737574aa92 100644 --- a/orttraining/orttraining/eager/opgen/opgen/lexer.py +++ b/orttraining/orttraining/eager/opgen/opgen/lexer.py @@ -1,9 +1,9 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from enum import Enum from abc import ABC -from typing import List, Optional, Union, Tuple +from enum import Enum +from typing import List, Optional, Tuple, Union class SourceLocation(object): diff --git a/orttraining/orttraining/eager/opgen/opgen/onnxops.py b/orttraining/orttraining/eager/opgen/opgen/onnxops.py index 98a2dd4d5997e..ee91dbc1c5748 100644 --- a/orttraining/orttraining/eager/opgen/opgen/onnxops.py +++ b/orttraining/orttraining/eager/opgen/opgen/onnxops.py @@ -1,7 +1,7 @@ # AUTO-GENERATED CODE! - DO NOT EDIT! # $ python onnxgen.py -from opgen.generator import ONNXAttr, ONNXOp, AttrType +from opgen.generator import AttrType, ONNXAttr, ONNXOp class Abs(ONNXOp): diff --git a/orttraining/orttraining/eager/opgen/opgen/parser.py b/orttraining/orttraining/eager/opgen/opgen/parser.py index c1ba7e8378c5b..6fd27655104b6 100644 --- a/orttraining/orttraining/eager/opgen/opgen/parser.py +++ b/orttraining/orttraining/eager/opgen/opgen/parser.py @@ -1,9 +1,10 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from opgen.lexer import * +from typing import List, Optional, Tuple, Union + from opgen.ast import * -from typing import List, Tuple, Union, Optional +from opgen.lexer import * class UnexpectedTokenError(RuntimeError): diff --git a/orttraining/orttraining/eager/opgen/opgen/writer.py b/orttraining/orttraining/eager/opgen/opgen/writer.py index 460a29a879dfc..b5281e1843ed8 100644 --- a/orttraining/orttraining/eager/opgen/opgen/writer.py +++ b/orttraining/orttraining/eager/opgen/opgen/writer.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from typing import TextIO, List +from typing import List, TextIO class SourceWriter: diff --git a/orttraining/orttraining/eager/opgen/opgen_test/lexer_test.py b/orttraining/orttraining/eager/opgen/opgen_test/lexer_test.py index 30e78377b2445..cdbe6bf68c5c0 100644 --- a/orttraining/orttraining/eager/opgen/opgen_test/lexer_test.py +++ b/orttraining/orttraining/eager/opgen/opgen_test/lexer_test.py @@ -3,7 +3,7 @@ import unittest -from opgen.lexer import StringReader, Lexer, Token, TokenKind, SourceLocation +from opgen.lexer import Lexer, SourceLocation, StringReader, Token, TokenKind class LexerTestCase(unittest.TestCase): diff --git a/orttraining/orttraining/eager/test/__main__.py b/orttraining/orttraining/eager/test/__main__.py index f188f3c1fc3c3..cd381c050ec00 100644 --- a/orttraining/orttraining/eager/test/__main__.py +++ b/orttraining/orttraining/eager/test/__main__.py @@ -3,8 +3,8 @@ import glob import os -import sys import subprocess +import sys selfdir = os.path.dirname(os.path.realpath(__file__)) diff --git a/orttraining/orttraining/eager/test/ort_eps_test.py b/orttraining/orttraining/eager/test/ort_eps_test.py index 7a4c8de5c5d25..a0f10eb115da9 100644 --- a/orttraining/orttraining/eager/test/ort_eps_test.py +++ b/orttraining/orttraining/eager/test/ort_eps_test.py @@ -1,21 +1,22 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import unittest -import torch -import onnxruntime_pybind11_state as torch_ort import os import sys +import unittest + +import onnxruntime_pybind11_state as torch_ort +import torch def is_windows(): return sys.platform.startswith("win") -from io import StringIO import sys import threading import time +from io import StringIO class OutputGrabber(object): diff --git a/orttraining/orttraining/eager/test/ort_init.py b/orttraining/orttraining/eager/test/ort_init.py index 43602cc6a5fdb..9876fd506ede8 100644 --- a/orttraining/orttraining/eager/test/ort_init.py +++ b/orttraining/orttraining/eager/test/ort_init.py @@ -8,6 +8,7 @@ # after the import, hence this test is isolated from the others. import unittest + import torch diff --git a/orttraining/orttraining/eager/test/ort_ops.py b/orttraining/orttraining/eager/test/ort_ops.py index 9f5fdfdf2413c..2473a85c1902a 100644 --- a/orttraining/orttraining/eager/test/ort_ops.py +++ b/orttraining/orttraining/eager/test/ort_ops.py @@ -8,7 +8,7 @@ import numpy as np import onnxruntime_pybind11_state as torch_ort import torch -from parameterized import parameterized, param +from parameterized import param, parameterized class OrtOpTests(unittest.TestCase): diff --git a/orttraining/orttraining/eager/test/ort_tensor.py b/orttraining/orttraining/eager/test/ort_tensor.py index a0cfdaa2cd0d6..5f399f9e8a2e5 100644 --- a/orttraining/orttraining/eager/test/ort_tensor.py +++ b/orttraining/orttraining/eager/test/ort_tensor.py @@ -2,8 +2,9 @@ # Licensed under the MIT License. import unittest -import torch + import onnxruntime_pybind11_state as torch_ort +import torch class OrtTensorTests(unittest.TestCase): diff --git a/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py b/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py index 505fdf24933de..a5977ed2b9e21 100644 --- a/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py +++ b/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py @@ -4,17 +4,19 @@ ## Model testing is not complete. -from __future__ import print_function + import argparse +import os + +import numpy as np import torch -from onnxruntime.training import ORTModule -from onnxruntime.capi import _pybind_state as torch_ort_eager import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torchvision import datasets, transforms -import numpy as np -import os + +from onnxruntime.capi import _pybind_state as torch_ort_eager +from onnxruntime.training import ORTModule class NeuralNet(nn.Module): diff --git a/orttraining/orttraining/eager/test_models/mnist_fc.py b/orttraining/orttraining/eager/test_models/mnist_fc.py index 0f0b3bb604149..26a91f957b5f8 100644 --- a/orttraining/orttraining/eager/test_models/mnist_fc.py +++ b/orttraining/orttraining/eager/test_models/mnist_fc.py @@ -1,11 +1,11 @@ -from __future__ import print_function import argparse +import os + +import numpy as np +import onnxruntime_pybind11_state as torch_ort import torch import torch.nn as nn import torch.nn.functional as F -import numpy as np -import os -import onnxruntime_pybind11_state as torch_ort class NeuralNet(nn.Module): diff --git a/orttraining/orttraining/eager/test_models/mnist_fc_training.py b/orttraining/orttraining/eager/test_models/mnist_fc_training.py index 95ba3bf060332..a7bb6e57fcd9f 100644 --- a/orttraining/orttraining/eager/test_models/mnist_fc_training.py +++ b/orttraining/orttraining/eager/test_models/mnist_fc_training.py @@ -4,7 +4,6 @@ # pylint: disable=missing-docstring # pylint: disable=C0103 -from __future__ import print_function import argparse import os diff --git a/orttraining/orttraining/eager/test_models/scratchpad.py b/orttraining/orttraining/eager/test_models/scratchpad.py index 049aa859c842c..01237d0cd029d 100644 --- a/orttraining/orttraining/eager/test_models/scratchpad.py +++ b/orttraining/orttraining/eager/test_models/scratchpad.py @@ -1,8 +1,8 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import torch import onnxruntime_pybind11_state as torch_ort +import torch device = torch_ort.device() diff --git a/orttraining/orttraining/python/checkpointing_utils.py b/orttraining/orttraining/python/checkpointing_utils.py index 359f6a8c53552..b7c055eaba51b 100644 --- a/orttraining/orttraining/python/checkpointing_utils.py +++ b/orttraining/orttraining/python/checkpointing_utils.py @@ -1,4 +1,5 @@ import os + import torch diff --git a/orttraining/orttraining/python/deprecated/training_session.py b/orttraining/orttraining/python/deprecated/training_session.py index b6a63dbee35d2..61408485a0f86 100644 --- a/orttraining/orttraining/python/deprecated/training_session.py +++ b/orttraining/orttraining/python/deprecated/training_session.py @@ -3,14 +3,14 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -import sys import os +import sys from onnxruntime.capi import _pybind_state as C from onnxruntime.capi.onnxruntime_inference_collection import ( - Session, InferenceSession, IOBinding, + Session, check_and_normalize_provider_args, ) diff --git a/orttraining/orttraining/python/ort_trainer.py b/orttraining/orttraining/python/ort_trainer.py index 5434edd7d4439..b1901380f9313 100644 --- a/orttraining/orttraining/python/ort_trainer.py +++ b/orttraining/orttraining/python/ort_trainer.py @@ -1,7 +1,6 @@ import io import os import warnings -from packaging.version import Version as LooseVersion import numpy as np import onnx @@ -9,6 +8,7 @@ import torch.nn import torch.onnx from onnx import helper, numpy_helper +from packaging.version import Version as LooseVersion import onnxruntime as ort import onnxruntime.capi.pt_patch diff --git a/orttraining/orttraining/python/pt_patch.py b/orttraining/orttraining/python/pt_patch.py index b524a286c9de7..5c5d205b21318 100644 --- a/orttraining/orttraining/python/pt_patch.py +++ b/orttraining/orttraining/python/pt_patch.py @@ -1,9 +1,7 @@ import torch - -from torch.onnx import symbolic_opset10 -from torch.onnx import symbolic_opset12 -from torch.onnx.symbolic_helper import parse_args import torch.onnx.symbolic_helper as sym_help +from torch.onnx import symbolic_opset10, symbolic_opset12 +from torch.onnx.symbolic_helper import parse_args @parse_args("v", "v", "v", "v", "i", "none") diff --git a/orttraining/orttraining/python/training/__init__.py b/orttraining/orttraining/python/training/__init__.py index 4a69f1439c656..ef785166a1a6a 100644 --- a/orttraining/orttraining/python/training/__init__.py +++ b/orttraining/orttraining/python/training/__init__.py @@ -6,11 +6,11 @@ from onnxruntime.capi._pybind_state import PropagateCastOpsStrategy, TrainingParameters from onnxruntime.capi.training.training_session import TrainingSession -# Options need to be imported before `ORTTrainer`. -from .orttrainer_options import ORTTrainerOptions -from .orttrainer import ORTTrainer, TrainStepInfo from . import amp, checkpoint, model_desc_validation, optim +from .orttrainer import ORTTrainer, TrainStepInfo +# Options need to be imported before `ORTTrainer`. +from .orttrainer_options import ORTTrainerOptions try: from .ortmodule import ORTModule diff --git a/orttraining/orttraining/python/training/_checkpoint_storage.py b/orttraining/orttraining/python/training/_checkpoint_storage.py index 461daa57134c0..b5c03ee3c1102 100644 --- a/orttraining/orttraining/python/training/_checkpoint_storage.py +++ b/orttraining/orttraining/python/training/_checkpoint_storage.py @@ -3,9 +3,10 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -import h5py -from collections.abc import Mapping import pickle +from collections.abc import Mapping + +import h5py def _dfs_save(group, save_obj): diff --git a/orttraining/orttraining/python/training/amp/__init__.py b/orttraining/orttraining/python/training/amp/__init__.py index 33274a8d5e10d..dec2c8e3b868a 100644 --- a/orttraining/orttraining/python/training/amp/__init__.py +++ b/orttraining/orttraining/python/training/amp/__init__.py @@ -1 +1 @@ -from .loss_scaler import LossScaler, DynamicLossScaler +from .loss_scaler import DynamicLossScaler, LossScaler diff --git a/orttraining/orttraining/python/training/checkpoint.py b/orttraining/orttraining/python/training/checkpoint.py index e4a2f1230b7a4..d9f8a0904b877 100644 --- a/orttraining/orttraining/python/training/checkpoint.py +++ b/orttraining/orttraining/python/training/checkpoint.py @@ -1,12 +1,13 @@ -import numpy as np -import onnx import os -import torch -import warnings import tempfile +import warnings from enum import Enum -from . import _checkpoint_storage, _utils +import numpy as np +import onnx +import torch + +from . import _checkpoint_storage, _utils ################################################################################ # Experimental Checkpoint APIs diff --git a/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py b/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py index 91c656619f621..b1b5257e28c28 100644 --- a/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py +++ b/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py @@ -3,9 +3,10 @@ from typing import Any, Callable, Optional, Union import torch -from onnxruntime.capi._pybind_state import GradientGraphBuilder from torch.onnx import TrainingMode +from onnxruntime.capi._pybind_state import GradientGraphBuilder + from ...ortmodule._custom_op_symbolic_registry import CustomOpSymbolicRegistry diff --git a/orttraining/orttraining/python/training/model_desc_validation.py b/orttraining/orttraining/python/training/model_desc_validation.py index e9181f732cb32..4cfc46c4ae8b5 100644 --- a/orttraining/orttraining/python/training/model_desc_validation.py +++ b/orttraining/orttraining/python/training/model_desc_validation.py @@ -1,8 +1,9 @@ -import cerberus from collections import namedtuple + +import cerberus import torch -from ._utils import static_vars +from ._utils import static_vars LEARNING_RATE_IO_DESCRIPTION_NAME = "__learning_rate" ALL_FINITE_IO_DESCRIPTION_NAME = "__all_finite" diff --git a/orttraining/orttraining/python/training/onnxblock/loss/__init__.py b/orttraining/orttraining/python/training/onnxblock/loss/__init__.py index ac21bb0f42438..5cb9ead2d2019 100644 --- a/orttraining/orttraining/python/training/onnxblock/loss/__init__.py +++ b/orttraining/orttraining/python/training/onnxblock/loss/__init__.py @@ -2,4 +2,4 @@ # Licensed under the MIT License. # __init__.py -from .loss import MSELoss, CrossEntropyLoss, BCEWithLogitsLoss +from .loss import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss diff --git a/orttraining/orttraining/python/training/optim/__init__.py b/orttraining/orttraining/python/training/optim/__init__.py index f74fe08202397..5e57d7df3a864 100644 --- a/orttraining/orttraining/python/training/optim/__init__.py +++ b/orttraining/orttraining/python/training/optim/__init__.py @@ -1,11 +1,10 @@ -from .config import _OptimizerConfig, AdamConfig, LambConfig, SGDConfig +from .config import AdamConfig, LambConfig, SGDConfig, _OptimizerConfig +from .fp16_optimizer import FP16_Optimizer +from .fused_adam import AdamWMode, FusedAdam from .lr_scheduler import ( - _LRScheduler, ConstantWarmupLRScheduler, CosineWarmupLRScheduler, LinearWarmupLRScheduler, PolyWarmupLRScheduler, + _LRScheduler, ) - -from .fused_adam import FusedAdam, AdamWMode -from .fp16_optimizer import FP16_Optimizer diff --git a/orttraining/orttraining/python/training/optim/_apex_amp_modifier.py b/orttraining/orttraining/python/training/optim/_apex_amp_modifier.py index 1b91ec2bf3594..64c30ab6618a1 100644 --- a/orttraining/orttraining/python/training/optim/_apex_amp_modifier.py +++ b/orttraining/orttraining/python/training/optim/_apex_amp_modifier.py @@ -8,6 +8,7 @@ import types import warnings + from ._modifier import FP16OptimizerModifier @@ -23,6 +24,7 @@ def can_be_modified(self): def override_function(m_self): from apex import amp as apex_amp + from onnxruntime.training.ortmodule.torch_cpp_extensions import fused_ops warnings.warn("Apex AMP fp16_optimizer functions are overrided with faster implementation.", UserWarning) diff --git a/orttraining/orttraining/python/training/optim/_modifier_registry.py b/orttraining/orttraining/python/training/optim/_modifier_registry.py index 4291b792a4607..4a3a33ecc0513 100644 --- a/orttraining/orttraining/python/training/optim/_modifier_registry.py +++ b/orttraining/orttraining/python/training/optim/_modifier_registry.py @@ -3,9 +3,9 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- +from ._apex_amp_modifier import ApexAMPModifier from ._ds_modifier import DeepSpeedZeROModifier from ._megatron_modifier import LegacyMegatronLMModifier -from ._apex_amp_modifier import ApexAMPModifier OptimizerModifierTypeRegistry = { "megatron.fp16.fp16.FP16_Optimizer": LegacyMegatronLMModifier, diff --git a/orttraining/orttraining/python/training/optim/fused_adam.py b/orttraining/orttraining/python/training/optim/fused_adam.py index 30ebcf30e4844..4de467f9d16eb 100644 --- a/orttraining/orttraining/python/training/optim/fused_adam.py +++ b/orttraining/orttraining/python/training/optim/fused_adam.py @@ -10,9 +10,11 @@ This file is adapted from fused adam in NVIDIA/apex, commit a109f85 """ +from enum import IntEnum + import torch + from ._multi_tensor_apply import MultiTensorApply -from enum import IntEnum class AdamWMode(IntEnum): diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py index 63daf53266291..5ff0d217dd33d 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py @@ -4,14 +4,14 @@ # -------------------------------------------------------------------------- import sys -import torch +import torch from torch.utils.dlpack import from_dlpack, to_dlpack -from ._fallback import _FallbackManager, ORTModuleFallbackException, ORTModuleIOError, wrap_exception - from onnxruntime.training.ortmodule.torch_cpp_extensions import torch_interop_utils +from ._fallback import ORTModuleFallbackException, ORTModuleIOError, _FallbackManager, wrap_exception + def wrap_as_dlpack_or_not(grad_flag, tensor_flag, inplace_flag, training_mode_flag, arg): """ diff --git a/orttraining/orttraining/python/training/ortmodule/_fallback.py b/orttraining/orttraining/python/training/ortmodule/_fallback.py index 7129e522b8c49..59de6114ed8c7 100644 --- a/orttraining/orttraining/python/training/ortmodule/_fallback.py +++ b/orttraining/orttraining/python/training/ortmodule/_fallback.py @@ -3,24 +3,23 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from . import _logger - import os -import torch import warnings - from enum import IntFlag from typing import Optional + +import torch + +from . import _logger, _utils from ._fallback_exceptions import ( + ORTModuleDeviceException, ORTModuleFallbackException, ORTModuleInitException, - ORTModuleDeviceException, ORTModuleIOError, - ORTModuleTorchModelException, ORTModuleONNXModelException, + ORTModuleTorchModelException, wrap_exception, ) -from . import _utils class _FallbackPolicy(IntFlag): diff --git a/orttraining/orttraining/python/training/ortmodule/_gradient_accumulation_manager.py b/orttraining/orttraining/python/training/ortmodule/_gradient_accumulation_manager.py index 501a559a6ee0c..dbb3507856b2e 100644 --- a/orttraining/orttraining/python/training/ortmodule/_gradient_accumulation_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_gradient_accumulation_manager.py @@ -2,9 +2,10 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -from . import _utils from onnxruntime.capi import _pybind_state as C +from . import _utils + class GradientAccumulationManager(object): """Handles Gradient accumulation optimization during training diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager_factory.py b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager_factory.py index 216417249bd20..a902f511713ad 100644 --- a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager_factory.py +++ b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager_factory.py @@ -3,10 +3,10 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from ._training_manager import TrainingManager +from ._fallback import _FallbackManager from ._inference_manager import InferenceManager +from ._training_manager import TrainingManager from .debug_options import DebugOptions -from ._fallback import _FallbackManager class GraphExecutionManagerFactory(object): diff --git a/orttraining/orttraining/python/training/ortmodule/_logger.py b/orttraining/orttraining/python/training/ortmodule/_logger.py index 66e1cb556538f..f3d4d930746b6 100644 --- a/orttraining/orttraining/python/training/ortmodule/_logger.py +++ b/orttraining/orttraining/python/training/ortmodule/_logger.py @@ -3,12 +3,13 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from onnxruntime.capi._pybind_state import Severity -from contextlib import contextmanager -from enum import IntEnum import io import sys import warnings +from contextlib import contextmanager +from enum import IntEnum + +from onnxruntime.capi._pybind_state import Severity class LogLevel(IntEnum): diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_factory.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_factory.py index 41d82eded40c1..d2954a287e804 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_factory.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_factory.py @@ -2,9 +2,9 @@ # Licensed under the MIT License. # _torch_module_factory.py +from ._fallback import _FallbackManager from ._torch_module_ort import TorchModuleORT from .debug_options import DebugOptions -from ._fallback import _FallbackManager class TorchModuleFactory: diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_interface.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_interface.py index 6d7a9db2433a0..d7e369613e6bc 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_interface.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_interface.py @@ -3,9 +3,9 @@ # _torch_module_interface.py from collections import OrderedDict -import torch -from typing import Iterator, Optional, Tuple, TypeVar, Callable +from typing import Callable, Iterator, Optional, Tuple, TypeVar +import torch T = TypeVar("T", bound="torch.nn.Module") diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py index ea0676b12587c..dfca706f76aa8 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py @@ -2,15 +2,16 @@ # Licensed under the MIT License. # _torch_module_ort.py -from . import _io, _utils -from .debug_options import DebugOptions -from ._graph_execution_manager_factory import GraphExecutionManagerFactory -from ._torch_module_interface import TorchModuleInterface -from ._fallback import _FallbackManager, ORTModuleTorchModelException, wrap_exception from collections import OrderedDict +from typing import Callable, Iterator, Optional, Tuple, TypeVar + import torch -from typing import Iterator, Optional, Tuple, TypeVar, Callable +from . import _io, _utils +from ._fallback import ORTModuleTorchModelException, _FallbackManager, wrap_exception +from ._graph_execution_manager_factory import GraphExecutionManagerFactory +from ._torch_module_interface import TorchModuleInterface +from .debug_options import DebugOptions T = TypeVar("T", bound="torch.nn.Module") diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_pytorch.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_pytorch.py index 44a43b2429e1c..5335f34172436 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_pytorch.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_pytorch.py @@ -2,12 +2,12 @@ # Licensed under the MIT License. # _torch_module_pytorch.py -from ._torch_module_interface import TorchModuleInterface - from collections import OrderedDict +from typing import Callable, Iterator, Optional, Tuple, TypeVar + import torch -from typing import Iterator, Optional, Tuple, TypeVar, Callable +from ._torch_module_interface import TorchModuleInterface T = TypeVar("T", bound="torch.nn.Module") diff --git a/orttraining/orttraining/python/training/ortmodule/_utils.py b/orttraining/orttraining/python/training/ortmodule/_utils.py index a43f0c3e66c7d..51c54e1dd5187 100644 --- a/orttraining/orttraining/python/training/ortmodule/_utils.py +++ b/orttraining/orttraining/python/training/ortmodule/_utils.py @@ -13,10 +13,10 @@ import types import warnings from typing import List -from packaging.version import Version as LooseVersion import numpy as np import torch +from packaging.version import Version as LooseVersion from torch._C import _from_dlpack from torch.utils.dlpack import to_dlpack diff --git a/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py b/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py index f251df27360ee..2f1451497ffcd 100644 --- a/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py +++ b/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py @@ -3,17 +3,18 @@ # _load_config_from_json.py import json -import os import logging +import os +from functools import reduce from types import SimpleNamespace from onnxruntime.capi import _pybind_state as C -from functools import reduce -from . import JSON_PATH_ENVIRONMENT_KEY +from onnxruntime.training import ortmodule + from ..._fallback import _FallbackPolicy from ..._graph_execution_manager import _SkipCheck from ...debug_options import DebugOptions, LogLevel, _SaveOnnxOptions -from onnxruntime.training import ortmodule +from . import JSON_PATH_ENVIRONMENT_KEY log = logging.getLogger(__name__) diff --git a/orttraining/orttraining/python/training/ortmodule/ortmodule.py b/orttraining/orttraining/python/training/ortmodule/ortmodule.py index 18000e0462d00..8f2f3eb6e4eef 100644 --- a/orttraining/orttraining/python/training/ortmodule/ortmodule.py +++ b/orttraining/orttraining/python/training/ortmodule/ortmodule.py @@ -3,21 +3,21 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from ._torch_module_factory import TorchModuleFactory -from ._torch_module_pytorch import TorchModulePytorch -from ._torch_module_ort import TorchModuleORT -from ._custom_op_symbolic_registry import CustomOpSymbolicRegistry -from ._custom_gradient_registry import CustomGradientRegistry -from . import _utils -from .debug_options import DebugOptions -from ._fallback import _FallbackManager, _FallbackPolicy, ORTModuleFallbackException -from onnxruntime.training import ortmodule - -from onnxruntime.tools import pytorch_export_contrib_ops +from typing import Callable, Iterator, Optional, Tuple, TypeVar import torch -from typing import Iterator, Optional, Tuple, TypeVar, Callable +from onnxruntime.tools import pytorch_export_contrib_ops +from onnxruntime.training import ortmodule + +from . import _utils +from ._custom_gradient_registry import CustomGradientRegistry +from ._custom_op_symbolic_registry import CustomOpSymbolicRegistry +from ._fallback import ORTModuleFallbackException, _FallbackManager, _FallbackPolicy +from ._torch_module_factory import TorchModuleFactory +from ._torch_module_ort import TorchModuleORT +from ._torch_module_pytorch import TorchModulePytorch +from .debug_options import DebugOptions # Needed to override PyTorch methods T = TypeVar("T", bound="Module") diff --git a/orttraining/orttraining/python/training/postprocess.py b/orttraining/orttraining/python/training/postprocess.py index ff77a05e41e31..6edc6db44ab47 100644 --- a/orttraining/orttraining/python/training/postprocess.py +++ b/orttraining/orttraining/python/training/postprocess.py @@ -1,12 +1,11 @@ -import sys import os.path -from onnx import * -import onnx -import numpy as np import struct +import sys -from onnx import helper -from onnx import numpy_helper +import numpy as np +import onnx +from onnx import * +from onnx import helper, numpy_helper def run_postprocess(model): diff --git a/orttraining/orttraining/python/training/torchdynamo/register_backend.py b/orttraining/orttraining/python/training/torchdynamo/register_backend.py index a9450e119c7a5..d8aeb17c96216 100644 --- a/orttraining/orttraining/python/training/torchdynamo/register_backend.py +++ b/orttraining/orttraining/python/training/torchdynamo/register_backend.py @@ -5,8 +5,8 @@ from functorch.compile import min_cut_rematerialization_partition from torch._dynamo.optimizations.training import aot_autograd -from .ort_backend import OrtBackend +from .ort_backend import OrtBackend # This should be the underlying compiler for ALL graphs if # the user uses ORT to accelerate PyTorch via Dynamo. diff --git a/orttraining/orttraining/python/training/utils/data/__init__.py b/orttraining/orttraining/python/training/utils/data/__init__.py index 91207012216d3..ea1195f247d90 100644 --- a/orttraining/orttraining/python/training/utils/data/__init__.py +++ b/orttraining/orttraining/python/training/utils/data/__init__.py @@ -2,4 +2,4 @@ # Licensed under the MIT License. # __init__.py -from .sampler import LoadBalancingDistributedSampler, LoadBalancingDistributedBatchSampler +from .sampler import LoadBalancingDistributedBatchSampler, LoadBalancingDistributedSampler diff --git a/orttraining/orttraining/python/training/utils/data/sampler.py b/orttraining/orttraining/python/training/utils/data/sampler.py index 932f9e76dc13c..2fab9a11d95e9 100644 --- a/orttraining/orttraining/python/training/utils/data/sampler.py +++ b/orttraining/orttraining/python/training/utils/data/sampler.py @@ -2,13 +2,14 @@ # Licensed under the MIT License. # sampler.py -import torch import math +from typing import Callable, Iterator, Optional + +import numpy as np +import torch import torch.distributed as dist -from torch.utils.data.sampler import Sampler from torch.utils.data.dataset import Dataset -from typing import Optional, Iterator, Callable -import numpy as np +from torch.utils.data.sampler import Sampler def _shard_wrapped_indices_across_workers(dataset_index_list, num_shards, num_samples_per_shard): diff --git a/orttraining/orttraining/test/external_custom_ops/setup.py b/orttraining/orttraining/test/external_custom_ops/setup.py index 57ba10b91ad2d..2fb2e2243056d 100644 --- a/orttraining/orttraining/test/external_custom_ops/setup.py +++ b/orttraining/orttraining/test/external_custom_ops/setup.py @@ -1,13 +1,15 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import sys import os import subprocess -from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext +import sys from subprocess import CalledProcessError -import pybind11 + import onnx +import pybind11 +from setuptools import Extension, setup +from setuptools.command.build_ext import build_ext + import onnxruntime diff --git a/orttraining/orttraining/test/external_custom_ops/test.py b/orttraining/orttraining/test/external_custom_ops/test.py index 7d3e4edf48bd8..f3101434d5186 100644 --- a/orttraining/orttraining/test/external_custom_ops/test.py +++ b/orttraining/orttraining/test/external_custom_ops/test.py @@ -3,16 +3,17 @@ import os import sys + import numpy as np +# Restore dlopen flags. +import orttraining_external_custom_ops + # Expose available (onnx::* and protobuf::*) symbols from onnxruntime to resolve references in # the custom ops shared library. Deepbind flag is required to avoid conflicts with other # instances of onnx/protobuf libraries. import onnxruntime -# Restore dlopen flags. -import orttraining_external_custom_ops - so = onnxruntime.SessionOptions() sess = onnxruntime.InferenceSession("testdata/model.onnx", so) input = np.random.rand(2, 2).astype(np.float32) diff --git a/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py b/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py index a1377d2448bfd..f435bdd8f7d07 100644 --- a/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py +++ b/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py @@ -78,14 +78,16 @@ def readOutput(self): self.capturedtext += char +import os +import unittest + +import numpy as np import torch -from onnxruntime.capi import _pybind_state as torch_ort_eager import torch.nn as nn import torch.nn.functional as F -import numpy as np -import os + +from onnxruntime.capi import _pybind_state as torch_ort_eager from onnxruntime.training import optim, orttrainer, orttrainer_options -import unittest def my_loss(x, target): diff --git a/orttraining/orttraining/test/python/launch_test.py b/orttraining/orttraining/test/python/launch_test.py index d183f3189511c..3743e31229ce2 100755 --- a/orttraining/orttraining/test/python/launch_test.py +++ b/orttraining/orttraining/test/python/launch_test.py @@ -2,14 +2,13 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import argparse +import logging import os import sys -import argparse from _test_commons import run_subprocess -import logging - logging.basicConfig(format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", level=logging.DEBUG) log = logging.getLogger("Build") diff --git a/orttraining/orttraining/test/python/onnxruntime_test_register_ep.py b/orttraining/orttraining/test/python/onnxruntime_test_register_ep.py index 5f71125cff413..e3030bced8439 100644 --- a/orttraining/orttraining/test/python/onnxruntime_test_register_ep.py +++ b/orttraining/orttraining/test/python/onnxruntime_test_register_ep.py @@ -1,6 +1,7 @@ +import os import unittest + import onnxruntime_pybind11_state as C -import os class EPRegistrationTests(unittest.TestCase): diff --git a/orttraining/orttraining/test/python/orttraining_ortmodule_distributed_tests.py b/orttraining/orttraining/test/python/orttraining_ortmodule_distributed_tests.py index 4f778444b88f0..08b304cb0e3b2 100644 --- a/orttraining/orttraining/test/python/orttraining_ortmodule_distributed_tests.py +++ b/orttraining/orttraining/test/python/orttraining_ortmodule_distributed_tests.py @@ -2,13 +2,12 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import sys import argparse +import logging +import sys from _test_commons import run_subprocess -import logging - logging.basicConfig(format="%(asctime)s %(name)s [%(levelname)s] - %(message)s", level=logging.DEBUG) log = logging.getLogger("ORTModuleDistributedTests") diff --git a/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py b/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py index 3ed121543d24a..2dba2cd5c86b4 100644 --- a/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py +++ b/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py @@ -1,33 +1,27 @@ # ================== -import os -import shutil -import logging -import random -import h5py -from tqdm import tqdm -import datetime -import numpy as np import dataclasses -from dataclasses import dataclass, field -from typing import Optional, Any, Dict -import json +import datetime import glob - +import json +import logging +import os +import random +import shutil import unittest +from concurrent.futures import ProcessPoolExecutor +from dataclasses import dataclass, field +from typing import Any, Dict, Optional +import h5py +import numpy as np import torch -from torch.utils.data import DataLoader, RandomSampler, Dataset import torch.distributed as dist +from torch.utils.data import DataLoader, Dataset, RandomSampler from torch.utils.tensorboard import SummaryWriter - -from transformers import BertForPreTraining, BertConfig, HfArgumentParser - -from concurrent.futures import ProcessPoolExecutor +from tqdm import tqdm +from transformers import BertConfig, BertForPreTraining, HfArgumentParser import onnxruntime as ort -from onnxruntime.training import amp, optim, orttrainer -from onnxruntime.training.optim import PolyWarmupLRScheduler, LinearWarmupLRScheduler -from onnxruntime.training.checkpoint import aggregate_checkpoints # need to override torch.onnx.symbolic_opset12.nll_loss to handle ignore_index == -100 cases. # the fix for ignore_index == -100 cases is already in pytorch master. @@ -35,6 +29,9 @@ # eventually we will use pytorch with fixed nll_loss once computation # issues are understood and solved. import onnxruntime.capi.pt_patch +from onnxruntime.training import amp, optim, orttrainer +from onnxruntime.training.checkpoint import aggregate_checkpoints +from onnxruntime.training.optim import LinearWarmupLRScheduler, PolyWarmupLRScheduler # we cannot make full convergence run in nightly pipeling because of its timeout limit, # max_steps is still needed to calculate learning rate. force_to_stop_max_steps is used to diff --git a/orttraining/orttraining/test/python/orttraining_run_frontend_batch_size_test.py b/orttraining/orttraining/test/python/orttraining_run_frontend_batch_size_test.py index db03a636d046e..e96b90138c3d5 100644 --- a/orttraining/orttraining/test/python/orttraining_run_frontend_batch_size_test.py +++ b/orttraining/orttraining/test/python/orttraining_run_frontend_batch_size_test.py @@ -1,6 +1,6 @@ -import sys import collections import subprocess +import sys Config = collections.namedtuple( "Config", diff --git a/orttraining/orttraining/test/python/orttraining_run_glue.py b/orttraining/orttraining/test/python/orttraining_run_glue.py index a9b514599fb78..bd7b7f993ec06 100644 --- a/orttraining/orttraining/test/python/orttraining_run_glue.py +++ b/orttraining/orttraining/test/python/orttraining_run_glue.py @@ -3,12 +3,12 @@ import dataclasses import logging import os +import unittest from dataclasses import dataclass, field from typing import Dict, Optional -import unittest + import numpy as np from numpy.testing import assert_allclose - from transformers import ( AutoConfig, AutoModelForSequenceClassification, @@ -24,7 +24,7 @@ ) import onnxruntime -from onnxruntime.capi.ort_trainer import ORTTrainer, LossScaler, ModelDescription, IODescription +from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer try: from onnxruntime.capi._pybind_state import ( @@ -40,9 +40,8 @@ pass -from orttraining_transformer_trainer import ORTTransformerTrainer - import torch +from orttraining_transformer_trainer import ORTTransformerTrainer logger = logging.getLogger(__name__) diff --git a/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py b/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py index a4c069c683e1c..5d0143867deb7 100644 --- a/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py +++ b/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py @@ -4,12 +4,15 @@ import dataclasses import logging import os +import unittest from dataclasses import dataclass, field from typing import Dict, Optional -import unittest + import numpy as np +import torch from numpy.testing import assert_allclose - +from orttraining_run_glue import verify_old_and_new_api_are_equal +from orttraining_transformer_trainer import ORTTransformerTrainer from transformers import ( AutoConfig, AutoModelForMultipleChoice, @@ -20,16 +23,10 @@ TrainingArguments, set_seed, ) +from utils_multiple_choice import MultipleChoiceDataset, Split, SwagProcessor import onnxruntime -from onnxruntime.capi.ort_trainer import ORTTrainer, LossScaler, ModelDescription, IODescription - -from orttraining_transformer_trainer import ORTTransformerTrainer - -import torch - -from utils_multiple_choice import MultipleChoiceDataset, Split, SwagProcessor -from orttraining_run_glue import verify_old_and_new_api_are_equal +from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer logger = logging.getLogger(__name__) diff --git a/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py b/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py index 66de14dce6852..8894ea9835848 100644 --- a/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py +++ b/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py @@ -1,5 +1,5 @@ -from orttraining_test_model_transform import add_name, fix_transpose, add_expand_shape from orttraining_test_layer_norm_transform import layer_norm_transform +from orttraining_test_model_transform import add_expand_shape, add_name, fix_transpose def postprocess_model(model): diff --git a/orttraining/orttraining/test/python/orttraining_test_checkpoint_storage.py b/orttraining/orttraining/test/python/orttraining_test_checkpoint_storage.py index 2ef8322bd9cfd..87eef2c8c40fe 100644 --- a/orttraining/orttraining/test/python/orttraining_test_checkpoint_storage.py +++ b/orttraining/orttraining/test/python/orttraining_test_checkpoint_storage.py @@ -2,12 +2,13 @@ # Licensed under the MIT License. # orttraining_test_checkpoint_storage.py -import pytest -import torch -import numpy as np import os -import shutil import pickle +import shutil + +import numpy as np +import pytest +import torch from onnxruntime.training import _checkpoint_storage diff --git a/orttraining/orttraining/test/python/orttraining_test_data_loader.py b/orttraining/orttraining/test/python/orttraining_test_data_loader.py index 2df5a3964bc94..cfe44b83883b6 100644 --- a/orttraining/orttraining/test/python/orttraining_test_data_loader.py +++ b/orttraining/orttraining/test/python/orttraining_test_data_loader.py @@ -1,7 +1,9 @@ -from enum import Enum import random +from enum import Enum + import torch -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import DataLoader, Dataset + from onnxruntime.capi.ort_trainer import generate_sample global_rng = random.Random() diff --git a/orttraining/orttraining/test/python/orttraining_test_debuggability.py b/orttraining/orttraining/test/python/orttraining_test_debuggability.py index d3d6987f47c2a..99c95ff09cecf 100644 --- a/orttraining/orttraining/test/python/orttraining_test_debuggability.py +++ b/orttraining/orttraining/test/python/orttraining_test_debuggability.py @@ -1,33 +1,23 @@ import inspect -import onnx import os + +import _test_helpers +import onnx import pytest import torch import torchvision - +from _test_commons import _load_pytorch_transformer_model from numpy.testing import assert_allclose from onnxruntime import set_seed -from onnxruntime.capi.ort_trainer import ( - IODescription as Legacy_IODescription, - ModelDescription as Legacy_ModelDescription, - LossScaler as Legacy_LossScaler, - ORTTrainer as Legacy_ORTTrainer, -) -from onnxruntime.training import ( - _utils, - amp, - optim, - orttrainer, - TrainStepInfo, - model_desc_validation as md_val, - orttrainer_options as orttrainer_options, -) - -from _test_commons import _load_pytorch_transformer_model - -import _test_helpers - +from onnxruntime.capi.ort_trainer import IODescription as Legacy_IODescription +from onnxruntime.capi.ort_trainer import LossScaler as Legacy_LossScaler +from onnxruntime.capi.ort_trainer import ModelDescription as Legacy_ModelDescription +from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer +from onnxruntime.training import TrainStepInfo, _utils, amp +from onnxruntime.training import model_desc_validation as md_val +from onnxruntime.training import optim, orttrainer +from onnxruntime.training import orttrainer_options as orttrainer_options ############################################################################### # Testing starts here ######################################################### diff --git a/orttraining/orttraining/test/python/orttraining_test_dort.py b/orttraining/orttraining/test/python/orttraining_test_dort.py index 3c2166a735e8f..ae6d1ac3c46f4 100644 --- a/orttraining/orttraining/test/python/orttraining_test_dort.py +++ b/orttraining/orttraining/test/python/orttraining_test_dort.py @@ -7,7 +7,7 @@ from torch import nn from torch.nn import functional as F -from onnxruntime.training.torchdynamo.register_backend import ort, aot_ort +from onnxruntime.training.torchdynamo.register_backend import aot_ort, ort class TestTorchDynamoOrt(unittest.TestCase): diff --git a/orttraining/orttraining/test/python/orttraining_test_experimental_gradient_graph.py b/orttraining/orttraining/test/python/orttraining_test_experimental_gradient_graph.py index c67de052753ad..8f81d03dbae55 100644 --- a/orttraining/orttraining/test/python/orttraining_test_experimental_gradient_graph.py +++ b/orttraining/orttraining/test/python/orttraining_test_experimental_gradient_graph.py @@ -4,8 +4,9 @@ import numpy as np import onnx -import onnxruntime import torch + +import onnxruntime from onnxruntime.training.experimental import export_gradient_graph diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py index 188e053e4711e..9b499963506ec 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py @@ -3,18 +3,19 @@ import copy -import onnxruntime import os import sys + +import _test_helpers import torch import torch.distributed as dist import torch.multiprocessing as mp -from onnxruntime.training.ortmodule import ORTModule -from onnxruntime.training.ortmodule._graph_execution_manager_factory import GraphExecutionManagerFactory from torch.nn.parallel import DistributedDataParallel as DDP from torch.nn.parameter import Parameter -import _test_helpers +import onnxruntime +from onnxruntime.training.ortmodule import ORTModule +from onnxruntime.training.ortmodule._graph_execution_manager_factory import GraphExecutionManagerFactory torch.manual_seed(1) onnxruntime.set_seed(1) diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py index 8f1d57ff138a8..fcbb26e707f4e 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py @@ -1,23 +1,28 @@ -import logging import argparse -import torch -import wget +import datetime +import logging import os -import pandas as pd -import zipfile -from transformers import BertTokenizer, AutoConfig -from sklearn.model_selection import train_test_split -from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler -from transformers import BertForSequenceClassification, AdamW, BertConfig -from transformers import get_linear_schedule_with_warmup -import numpy as np import random import time -import datetime +import zipfile +import numpy as np +import pandas as pd +import torch +import wget +from sklearn.model_selection import train_test_split +from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset +from transformers import ( + AdamW, + AutoConfig, + BertConfig, + BertForSequenceClassification, + BertTokenizer, + get_linear_schedule_with_warmup, +) import onnxruntime -from onnxruntime.training.ortmodule import ORTModule, DebugOptions +from onnxruntime.training.ortmodule import DebugOptions, ORTModule def train(model, optimizer, scheduler, train_dataloader, epoch, device, args): diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py index 42697766c9815..4061b24a276f8 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py @@ -1,23 +1,28 @@ -import logging import argparse -import torch -import wget +import datetime +import logging import os -import pandas as pd -import zipfile -from transformers import BertTokenizer, AutoConfig -from sklearn.model_selection import train_test_split -from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler -from transformers import BertForSequenceClassification, AdamW, BertConfig -from transformers import get_linear_schedule_with_warmup -import numpy as np import random import time -import datetime +import zipfile +import numpy as np +import pandas as pd +import torch +import wget +from sklearn.model_selection import train_test_split +from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset +from transformers import ( + AdamW, + AutoConfig, + BertConfig, + BertForSequenceClassification, + BertTokenizer, + get_linear_schedule_with_warmup, +) import onnxruntime -from onnxruntime.training.ortmodule import ORTModule, DebugOptions +from onnxruntime.training.ortmodule import DebugOptions, ORTModule def train(model, optimizer, scaler, scheduler, train_dataloader, epoch, device, args): diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py index 21ebdb52037d4..5d4c36b31c526 100755 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py @@ -1,14 +1,14 @@ +import argparse + +import deepspeed import torch -from torch import nn, optim import torch.distributed as dist -import deepspeed -from deepspeed.pipe import PipelineModule, LayerSpec +from deepspeed.pipe import LayerSpec, PipelineModule from deepspeed.utils import RepeatingLoader +from torch import nn, optim from onnxruntime.training.ortmodule import ORTModule, _utils -import argparse - # USAGE: # pip install deepspeed # deepspeed orttraining_test_ortmodule_deepspeed_pipeline_parallel.py --deepspeed_config=orttraining_test_ortmodule_deepspeed_pipeline_parallel_config.json --pipeline-parallel-size 2 --steps=100 diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_zero_stage_1.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_zero_stage_1.py index 037558663b428..91c48f4b0edd3 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_zero_stage_1.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_zero_stage_1.py @@ -9,15 +9,15 @@ ``` """ import argparse -import torch import time -from torchvision import datasets, transforms + +import deepspeed +import torch import torch.distributed as dist +from torchvision import datasets, transforms import onnxruntime -from onnxruntime.training.ortmodule import ORTModule, DebugOptions, LogLevel - -import deepspeed +from onnxruntime.training.ortmodule import DebugOptions, LogLevel, ORTModule class NeuralNet(torch.nn.Module): diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_experimental_json_config.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_experimental_json_config.py index 7b2e08dc9ed6c..b8f824db28d8c 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_experimental_json_config.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_experimental_json_config.py @@ -1,7 +1,9 @@ import os + import torch -from onnxruntime.training import ortmodule + from onnxruntime.capi import _pybind_state as C +from onnxruntime.training import ortmodule from onnxruntime.training.ortmodule.experimental.json_config import load_from_json diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_fairscale_sharded_optimizer.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_fairscale_sharded_optimizer.py index e1a7dd591ec36..59ae550999601 100755 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_fairscale_sharded_optimizer.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_fairscale_sharded_optimizer.py @@ -1,16 +1,18 @@ import argparse +import os +import time + +import numpy as np import torch import torch.distributed as dist import torch.multiprocessing as mp -from fairscale.optim.oss import OSS -from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP import torchvision -from torchvision import datasets, transforms -import time +from fairscale.nn.data_parallel import ShardedDataParallel as ShardedDDP +from fairscale.optim.oss import OSS from torch.nn.parallel import DistributedDataParallel as DDP -import os -from onnxruntime.training.ortmodule import ORTModule, DebugOptions -import numpy as np +from torchvision import datasets, transforms + +from onnxruntime.training.ortmodule import DebugOptions, ORTModule # Usage : # pip install fairscale diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_poc.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_poc.py index bb94a6c514977..6cc060e10665c 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_poc.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_poc.py @@ -1,11 +1,12 @@ import argparse import logging -import torch import time + +import torch from torchvision import datasets, transforms import onnxruntime -from onnxruntime.training.ortmodule import ORTModule, DebugOptions +from onnxruntime.training.ortmodule import DebugOptions, ORTModule class NeuralNet(torch.nn.Module): diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py index 93426659991fe..ee90c614af069 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py @@ -1,16 +1,15 @@ # This test script is a modified version of Pytorch's tutorial. # For details, see https://pytorch.org/tutorials/intermediate/ddp_tutorial.html. +import argparse import os import sys import tempfile -import torch -import argparse +import torch import torch.distributed as dist +import torch.multiprocessing as mp import torch.nn as nn import torch.optim as optim -import torch.multiprocessing as mp - from torch.nn.parallel import DistributedDataParallel as DDP import onnxruntime diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_torch_lightning_basic.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_torch_lightning_basic.py index 9f8f273837d85..626bc4c946ed1 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_torch_lightning_basic.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_torch_lightning_basic.py @@ -1,13 +1,13 @@ import argparse from multiprocessing import cpu_count +import pytorch_lightning as pl import torch -from torch import nn import torch.nn.functional as F +from torch import nn +from torch.utils.data import DataLoader from torchvision import transforms from torchvision.datasets import MNIST -from torch.utils.data import DataLoader -import pytorch_lightning as pl import onnxruntime from onnxruntime.training.ortmodule import ORTModule diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py index 531085f21ce61..f2aac9f17f2aa 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py @@ -1,33 +1,26 @@ import copy -from functools import partial import inspect import math +import os +from functools import partial + +import _test_commons +import _test_helpers import numpy as np -from numpy.testing import assert_allclose import onnx -import os import pytest import torch +from numpy.testing import assert_allclose import onnxruntime -from onnxruntime.capi.ort_trainer import ( - IODescription as Legacy_IODescription, - ModelDescription as Legacy_ModelDescription, - LossScaler as Legacy_LossScaler, - ORTTrainer as Legacy_ORTTrainer, -) -from onnxruntime.training import ( - _utils, - amp, - checkpoint, - optim, - orttrainer, - TrainStepInfo, - model_desc_validation as md_val, - orttrainer_options as orttrainer_options, -) - -import _test_commons, _test_helpers +from onnxruntime.capi.ort_trainer import IODescription as Legacy_IODescription +from onnxruntime.capi.ort_trainer import LossScaler as Legacy_LossScaler +from onnxruntime.capi.ort_trainer import ModelDescription as Legacy_ModelDescription +from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer +from onnxruntime.training import TrainStepInfo, _utils, amp, checkpoint +from onnxruntime.training import model_desc_validation as md_val +from onnxruntime.training import optim, orttrainer +from onnxruntime.training import orttrainer_options as orttrainer_options ############################################################################### # Helper functions ############################################################ diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py index 99606d923e1d2..97688db9262cb 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py @@ -1,10 +1,12 @@ -import pytest -from unittest.mock import patch, Mock -from _test_commons import _load_pytorch_transformer_model -from onnxruntime.training import amp, checkpoint, optim, orttrainer, _checkpoint_storage +from unittest.mock import Mock, patch + import numpy as np import onnx +import pytest import torch +from _test_commons import _load_pytorch_transformer_model + +from onnxruntime.training import _checkpoint_storage, amp, checkpoint, optim, orttrainer # Helper functions diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py index 57b5af656eb66..ce82fcd1f1045 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py @@ -2,7 +2,7 @@ import os import tempfile from functools import partial -from packaging.version import Version as StrictVersion + import _test_commons import _test_helpers import onnx @@ -10,6 +10,7 @@ import torch import torch.nn.functional as F from numpy.testing import assert_allclose +from packaging.version import Version as StrictVersion from onnxruntime import SessionOptions, set_seed from onnxruntime.capi.ort_trainer import IODescription as Legacy_IODescription diff --git a/orttraining/orttraining/test/python/orttraining_test_ortvalue.py b/orttraining/orttraining/test/python/orttraining_test_ortvalue.py index cfdc52a9f0848..26792f7491384 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortvalue.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortvalue.py @@ -3,20 +3,22 @@ # Licensed under the MIT License. # pylint: disable=W0212,C0114,C0116 -import unittest import copy import sys +import unittest + +import _test_helpers import numpy as np -from numpy.testing import assert_almost_equal -import onnxruntime as onnxrt -from onnxruntime.capi.onnxruntime_pybind11_state import OrtValue as C_OrtValue, OrtValueVector -from onnxruntime.training.ortmodule import ORTModule, _utils -from onnxruntime.capi import _pybind_state as C import torch +from numpy.testing import assert_almost_equal from torch._C import _from_dlpack from torch.utils.dlpack import from_dlpack -import _test_helpers +import onnxruntime as onnxrt +from onnxruntime.capi import _pybind_state as C +from onnxruntime.capi.onnxruntime_pybind11_state import OrtValue as C_OrtValue +from onnxruntime.capi.onnxruntime_pybind11_state import OrtValueVector +from onnxruntime.training.ortmodule import ORTModule, _utils has_cuda = torch.cuda.is_available() diff --git a/orttraining/orttraining/test/python/orttraining_test_sampler.py b/orttraining/orttraining/test/python/orttraining_test_sampler.py index c47b721b7d100..a1281fbf286f2 100644 --- a/orttraining/orttraining/test/python/orttraining_test_sampler.py +++ b/orttraining/orttraining/test/python/orttraining_test_sampler.py @@ -2,9 +2,11 @@ # Licensed under the MIT License. # orttraining_test_sampler.py +import random + import torch + from onnxruntime.training.utils.data import sampler -import random class MyDataset(torch.utils.data.Dataset): diff --git a/orttraining/orttraining/test/python/orttraining_test_transformers.py b/orttraining/orttraining/test/python/orttraining_test_transformers.py index 611e7f0bc0c6b..64e356aaded7d 100644 --- a/orttraining/orttraining/test/python/orttraining_test_transformers.py +++ b/orttraining/orttraining/test/python/orttraining_test_transformers.py @@ -1,19 +1,18 @@ -import unittest -import shutil -import pytest import os import random +import shutil +import unittest + import numpy as np +import pytest +import torch from numpy.testing import assert_allclose +from orttraining_test_data_loader import BatchArgsOption, ids_tensor +from orttraining_test_utils import get_lr, run_test from transformers import BertConfig, BertForPreTraining, BertModel -from orttraining_test_data_loader import ids_tensor, BatchArgsOption -from orttraining_test_utils import run_test, get_lr - import onnxruntime -from onnxruntime.capi.ort_trainer import ORTTrainer, IODescription, ModelDescription, LossScaler - -import torch +from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer class BertModelTest(unittest.TestCase): diff --git a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py index 0185670dac79f..ccbd93fdcd99d 100644 --- a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py +++ b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py @@ -4,35 +4,27 @@ import logging import os import random - from typing import Callable, Dict, List, NamedTuple, Optional, Tuple import numpy as np import torch +from orttraining_test_bert_postprocess import postprocess_model from torch import nn from torch.utils.data.dataloader import DataLoader from torch.utils.data.dataset import Dataset from torch.utils.data.distributed import DistributedSampler from torch.utils.data.sampler import RandomSampler, SequentialSampler from tqdm import tqdm, trange - from transformers.data.data_collator import DataCollator, DefaultDataCollator from transformers.modeling_utils import PreTrainedModel from transformers.training_args import TrainingArguments import onnxruntime -from orttraining_test_bert_postprocess import postprocess_model -from onnxruntime.capi.ort_trainer import ORTTrainer, LossScaler, ModelDescription, IODescription - -from onnxruntime.training import ( - _utils, - amp, - optim, - orttrainer, - TrainStepInfo, - model_desc_validation as md_val, - orttrainer_options as orttrainer_options, -) +from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer +from onnxruntime.training import TrainStepInfo, _utils, amp +from onnxruntime.training import model_desc_validation as md_val +from onnxruntime.training import optim, orttrainer +from onnxruntime.training import orttrainer_options as orttrainer_options from onnxruntime.training.optim import LinearWarmupLRScheduler, _LRScheduler try: diff --git a/orttraining/orttraining/test/python/perf_log/ort_module_perf_test_tools.py b/orttraining/orttraining/test/python/perf_log/ort_module_perf_test_tools.py index b7b619a92e53b..a71ed93001230 100644 --- a/orttraining/orttraining/test/python/perf_log/ort_module_perf_test_tools.py +++ b/orttraining/orttraining/test/python/perf_log/ort_module_perf_test_tools.py @@ -1,13 +1,13 @@ # https://docs.microsoft.com/en-us/azure/mysql/connect-python -import mysql.connector -from mysql.connector import errorcode -import git -import os - import argparse +import os from datetime import datetime +import git +import mysql.connector +from mysql.connector import errorcode + def get_repo_commit(repo_path): repo = git.Repo(repo_path, search_parent_directories=True) diff --git a/orttraining/orttraining/test/python/utils_multiple_choice.py b/orttraining/orttraining/test/python/utils_multiple_choice.py index 562ecbf8c496d..9d859060b42ad 100644 --- a/orttraining/orttraining/test/python/utils_multiple_choice.py +++ b/orttraining/orttraining/test/python/utils_multiple_choice.py @@ -10,13 +10,11 @@ from enum import Enum from typing import List, Optional +import torch import tqdm from filelock import FileLock - -from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available - -import torch from torch.utils.data.dataset import Dataset +from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available logger = logging.getLogger(__name__) diff --git a/orttraining/pytorch_frontend_examples/mnist_training.py b/orttraining/pytorch_frontend_examples/mnist_training.py index afab8a3bf7ec2..59598c8d2ed1f 100644 --- a/orttraining/pytorch_frontend_examples/mnist_training.py +++ b/orttraining/pytorch_frontend_examples/mnist_training.py @@ -4,18 +4,17 @@ ## Model testing is not complete. -from __future__ import print_function import argparse +import os + import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim +from mpi4py import MPI from torchvision import datasets, transforms -import numpy as np -import os from onnxruntime.capi.ort_trainer import IODescription, ModelDescription, ORTTrainer -from mpi4py import MPI try: from onnxruntime.capi._pybind_state import set_cuda_device_id diff --git a/orttraining/tools/amdgpu/script/rocprof.py b/orttraining/tools/amdgpu/script/rocprof.py index dc91d13606fb0..4653f427014da 100644 --- a/orttraining/tools/amdgpu/script/rocprof.py +++ b/orttraining/tools/amdgpu/script/rocprof.py @@ -1,7 +1,8 @@ import argparse -import numpy as np -import os import csv +import os + +import numpy as np parser = argparse.ArgumentParser() parser.add_argument("--input", type=str) diff --git a/orttraining/tools/ci_test/compare_huggingface.py b/orttraining/tools/ci_test/compare_huggingface.py index c484cfb56adcb..fd7244a0cf0b7 100755 --- a/orttraining/tools/ci_test/compare_huggingface.py +++ b/orttraining/tools/ci_test/compare_huggingface.py @@ -1,6 +1,6 @@ -import sys -import json import collections +import json +import sys actual = sys.argv[1] expect = sys.argv[2] diff --git a/orttraining/tools/ci_test/run_batch_size_test.py b/orttraining/tools/ci_test/run_batch_size_test.py index 4a7ec51062914..cd93c44cf73b6 100755 --- a/orttraining/tools/ci_test/run_batch_size_test.py +++ b/orttraining/tools/ci_test/run_batch_size_test.py @@ -4,9 +4,9 @@ import argparse import collections +import os import subprocess import sys -import os def parse_args(): diff --git a/orttraining/tools/ci_test/run_bert_perf_test.py b/orttraining/tools/ci_test/run_bert_perf_test.py index 8f6a59c1fd883..27cd9fb01c99b 100644 --- a/orttraining/tools/ci_test/run_bert_perf_test.py +++ b/orttraining/tools/ci_test/run_bert_perf_test.py @@ -3,10 +3,10 @@ # Licensed under the MIT License. import argparse +import json +import os import subprocess import sys -import os -import json from collections import namedtuple SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) diff --git a/orttraining/tools/ci_test/run_convergence_test.py b/orttraining/tools/ci_test/run_convergence_test.py index 568e3c4cd9c4c..bdea6ad95c944 100755 --- a/orttraining/tools/ci_test/run_convergence_test.py +++ b/orttraining/tools/ci_test/run_convergence_test.py @@ -3,12 +3,12 @@ # Licensed under the MIT License. import argparse +import os import subprocess import sys import tempfile -import os -from compare_results import compare_results_files, Comparisons +from compare_results import Comparisons, compare_results_files SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) diff --git a/orttraining/tools/ci_test/run_gpt2_perf_test.py b/orttraining/tools/ci_test/run_gpt2_perf_test.py index 8c0ac1953feed..e64fc3c7812e3 100644 --- a/orttraining/tools/ci_test/run_gpt2_perf_test.py +++ b/orttraining/tools/ci_test/run_gpt2_perf_test.py @@ -3,9 +3,9 @@ # Licensed under the MIT License. import argparse +import os import subprocess import sys -import os from collections import namedtuple SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__)) diff --git a/orttraining/tools/scripts/experiment.py b/orttraining/tools/scripts/experiment.py index 0e3e2ceead465..a3ef82577208f 100644 --- a/orttraining/tools/scripts/experiment.py +++ b/orttraining/tools/scripts/experiment.py @@ -5,15 +5,12 @@ from azure.common.client_factory import get_client_from_cli_profile from azure.mgmt.containerregistry import ContainerRegistryManagementClient - -from azureml.core import Workspace, Experiment, Run, Datastore -from azureml.core.compute import ComputeTarget, AmlCompute - +from azureml.core import Datastore, Experiment, Run, Workspace +from azureml.core.compute import AmlCompute, ComputeTarget from azureml.core.container_registry import ContainerRegistry -from azureml.train.estimator import Estimator - -from azureml.data.azure_storage_datastore import AzureFileDatastore, AzureBlobDatastore from azureml.core.runconfig import MpiConfiguration, RunConfiguration +from azureml.data.azure_storage_datastore import AzureBlobDatastore, AzureFileDatastore +from azureml.train.estimator import Estimator parser = argparse.ArgumentParser() parser.add_argument( diff --git a/orttraining/tools/scripts/gpt2_model_transform.py b/orttraining/tools/scripts/gpt2_model_transform.py index 9e018a34069e5..ca3680987cb02 100644 --- a/orttraining/tools/scripts/gpt2_model_transform.py +++ b/orttraining/tools/scripts/gpt2_model_transform.py @@ -1,11 +1,10 @@ ### Be noted: this script is developed against the model exported from Megatron GPT2 Pretraining script. import sys -import onnx -from onnx import helper, shape_inference -from onnx import TensorProto + import numpy as np -from onnx import numpy_helper +import onnx +from onnx import TensorProto, helper, numpy_helper, shape_inference if len(sys.argv) < 2: print("Please give model path...") diff --git a/orttraining/tools/scripts/layer_norm_transform.py b/orttraining/tools/scripts/layer_norm_transform.py index 0ad4ea2559207..21b53ddfe44e7 100644 --- a/orttraining/tools/scripts/layer_norm_transform.py +++ b/orttraining/tools/scripts/layer_norm_transform.py @@ -1,8 +1,9 @@ -import sys import os.path -from onnx import * -import onnx +import sys + import numpy as np +import onnx +from onnx import * def find_node(graph_proto, op_type): diff --git a/orttraining/tools/scripts/model_transform.py b/orttraining/tools/scripts/model_transform.py index 8c0be5b08c04a..4de49278622b0 100644 --- a/orttraining/tools/scripts/model_transform.py +++ b/orttraining/tools/scripts/model_transform.py @@ -1,9 +1,8 @@ import sys -import onnx -from onnx import helper, shape_inference -from onnx import TensorProto + import numpy as np -from onnx import numpy_helper +import onnx +from onnx import TensorProto, helper, numpy_helper, shape_inference if len(sys.argv) < 2: print("Please give model path...") diff --git a/orttraining/tools/scripts/nv_run_pretraining.py b/orttraining/tools/scripts/nv_run_pretraining.py index e7977595ac813..0ed4fe2b9b6f5 100644 --- a/orttraining/tools/scripts/nv_run_pretraining.py +++ b/orttraining/tools/scripts/nv_run_pretraining.py @@ -16,38 +16,36 @@ """BERT finetuning runner.""" +import argparse + # ================== import csv -import os -import time import logging -import argparse +import math +import multiprocessing +import os import random +import time +from concurrent.futures import ProcessPoolExecutor + +import amp_C +import apex_C import h5py -from tqdm import tqdm, trange -import os import numpy as np import torch -from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, Dataset -from torch.utils.data.distributed import DistributedSampler -import math from apex import amp -import multiprocessing - -from tokenization import BertTokenizer -from modeling import BertForPreTraining, BertConfig -from optimization import BertLAMB - -from file_utils import PYTORCH_PRETRAINED_BERT_CACHE -from utils import is_main_process +from apex.amp import _amp_state from apex.parallel import DistributedDataParallel as DDP -from schedulers import LinearWarmUpScheduler from apex.parallel.distributed import flat_dist_call -import amp_C -import apex_C -from apex.amp import _amp_state - -from concurrent.futures import ProcessPoolExecutor +from file_utils import PYTORCH_PRETRAINED_BERT_CACHE +from modeling import BertConfig, BertForPreTraining +from optimization import BertLAMB +from schedulers import LinearWarmUpScheduler +from tokenization import BertTokenizer +from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler +from torch.utils.data.distributed import DistributedSampler +from tqdm import tqdm, trange +from utils import is_main_process logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO diff --git a/orttraining/tools/scripts/opset12_model_transform.py b/orttraining/tools/scripts/opset12_model_transform.py index c19aceb6216d8..73111c0e40a4a 100644 --- a/orttraining/tools/scripts/opset12_model_transform.py +++ b/orttraining/tools/scripts/opset12_model_transform.py @@ -13,11 +13,10 @@ # bert-base-uncased_L_12_H_768_A_12_V_30528_S_512_Dp_0.1_optimized_layer_norm_opset12.onnx import sys -import onnx -from onnx import helper, shape_inference -from onnx import TensorProto + import numpy as np -from onnx import numpy_helper +import onnx +from onnx import TensorProto, helper, numpy_helper, shape_inference if len(sys.argv) < 2: print("Please give model path...") diff --git a/orttraining/tools/scripts/performance_investigation.py b/orttraining/tools/scripts/performance_investigation.py index b064b13fa6d34..c8550a4d73c49 100644 --- a/orttraining/tools/scripts/performance_investigation.py +++ b/orttraining/tools/scripts/performance_investigation.py @@ -1,4 +1,5 @@ import argparse + import onnx parser = argparse.ArgumentParser(description="ONNX file analyzer for performance investigation.") diff --git a/orttraining/tools/scripts/pipeline_model_split.py b/orttraining/tools/scripts/pipeline_model_split.py index b95bbe49003ec..6bdaac31f435b 100644 --- a/orttraining/tools/scripts/pipeline_model_split.py +++ b/orttraining/tools/scripts/pipeline_model_split.py @@ -1,9 +1,8 @@ -import sys import os +import sys + import onnx -from onnx import helper -from onnx import TensorProto -from onnx import OperatorSetIdProto +from onnx import OperatorSetIdProto, TensorProto, helper # Edge that needs to be cut for the split. # If the edge is feeding into more than one nodes, and not all the nodes belong to the same cut, diff --git a/orttraining/tools/scripts/sqldb_to_tensors.py b/orttraining/tools/scripts/sqldb_to_tensors.py index cf24e0c294450..7476d2cbabc32 100644 --- a/orttraining/tools/scripts/sqldb_to_tensors.py +++ b/orttraining/tools/scripts/sqldb_to_tensors.py @@ -2,6 +2,7 @@ # Licensed under the MIT License. import sqlite3 + import onnx from onnx import numpy_helper diff --git a/orttraining/tools/scripts/watch_experiment.py b/orttraining/tools/scripts/watch_experiment.py index 33bb73f8dc9b9..310dc408431e3 100644 --- a/orttraining/tools/scripts/watch_experiment.py +++ b/orttraining/tools/scripts/watch_experiment.py @@ -1,13 +1,12 @@ import argparse -import sys import os - +import sys from concurrent.futures import ThreadPoolExecutor -from requests import Session from threading import Event, Thread -from azureml.core import Workspace, Experiment, Run from azureml._run_impl.run_watcher import RunWatcher +from azureml.core import Experiment, Run, Workspace +from requests import Session parser = argparse.ArgumentParser() parser.add_argument( From 22038abda0d556640bac43ebc6c8a232ac81c925 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 04:56:10 +0000 Subject: [PATCH 11/33] Fix some --- .../tools/quantization/qdq_loss_debug.py | 2 +- .../tools/transformers/benchmark_helper.py | 3 +- .../operator_type_usage_processors.py | 38 +++++++++---------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/onnxruntime/python/tools/quantization/qdq_loss_debug.py b/onnxruntime/python/tools/quantization/qdq_loss_debug.py index a3adf675d890c..5eed354a05f89 100644 --- a/onnxruntime/python/tools/quantization/qdq_loss_debug.py +++ b/onnxruntime/python/tools/quantization/qdq_loss_debug.py @@ -10,7 +10,7 @@ A use case is to debug quantization induced accuracy drop. An AI engineer can run the original float32 model and the quantized model with the same inputs, then compare the corresponding activations between the two models to find -where the divergence is. +where the divergence is. Example Usage: diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py index 64efeb22f4ad4..90161a61adf84 100644 --- a/onnxruntime/python/tools/transformers/benchmark_helper.py +++ b/onnxruntime/python/tools/transformers/benchmark_helper.py @@ -81,8 +81,9 @@ def create_onnxruntime_session( num_threads=-1, enable_profiling=False, verbose=False, - provider_options={}, # map execution provider name to its option + provider_options: Optional[dict] = None, # map execution provider name to its option ): + provider_options = provider_options or {} session = None try: sess_options = onnxruntime.SessionOptions() diff --git a/tools/python/util/ort_format_model/operator_type_usage_processors.py b/tools/python/util/ort_format_model/operator_type_usage_processors.py index 8f21298518f87..0d0ca38a4e2a3 100644 --- a/tools/python/util/ort_format_model/operator_type_usage_processors.py +++ b/tools/python/util/ort_format_model/operator_type_usage_processors.py @@ -1,10 +1,12 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import json -import typing from abc import ABC, abstractmethod +from typing import Dict, Optional, Set + import ort_flatbuffers_py.fbs as fbs from .types import FbsTypeInfo, value_name_to_typestr @@ -65,9 +67,7 @@ def __init__(self, domain: str, optype: str): def process_node(self, node: fbs.Node, value_name_to_typeinfo: dict): pass - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: Optional[Set[str]]): """ Given the string from a kernel registration, determine if the registration is required or not. :param type_in_registration: Type string from kernel registration @@ -113,10 +113,10 @@ def __init__( self, domain: str, optype: str, - inputs: [int] = [0], - outputs: [int] = [], - required_input_types: typing.Dict[int, typing.Set[str]] = {}, - required_output_types: typing.Dict[int, typing.Set[str]] = {}, + inputs: Optional[list[int]] = None, + outputs: Optional[list[int]] = None, + required_input_types: Optional[Dict[int, Set[str]]] = None, + required_output_types: Optional[Dict[int, Set[str]]] = None, ): """ Create DefaultTypeUsageProcessor. Types for one or more inputs and/or outputs can be tracked by the processor. @@ -134,6 +134,10 @@ def __init__( :param required_output_types: Required output types. May be empty. """ super().__init__(domain, optype) + inputs = inputs or [0] + outputs = outputs or [] + required_input_types = required_input_types or {} + required_output_types = required_output_types or {} self._input_types = {} self._output_types = {} @@ -190,9 +194,7 @@ def process_node(self, node: fbs.Node, value_name_to_typeinfo: dict): type_str = value_name_to_typestr(node.Outputs(o), value_name_to_typeinfo) self._output_types[o].add(type_str) - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: Optional[Set[str]]): if 0 not in self._input_types.keys(): # currently all standard typed registrations are for input 0. # custom registrations can be handled by operator specific processors (e.g. OneHotProcessor below). @@ -266,9 +268,7 @@ def __init__(self, domain: str, optype: str): # init with tracking of input 1 only. super().__init__(domain, optype, inputs=[1], outputs=[]) - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: Optional[Set[str]]): return self.is_input_type_enabled(type_in_registration, 1, globally_allowed_types) @@ -281,9 +281,7 @@ def __init__(self, domain: str, optype: str): # init with tracking of output 0 only. super().__init__(domain, optype, inputs=[], outputs=[0]) - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: Optional[Set[str]]): return self.is_output_type_enabled(type_in_registration, 0, globally_allowed_types) @@ -305,9 +303,7 @@ def process_node(self, node: fbs.Node, value_name_to_typeinfo: dict): key = (type0, type2, type1) self._triples.add(key) - def is_typed_registration_needed( - self, type_in_registration: str, globally_allowed_types: typing.Optional[typing.Set[str]] - ): + def is_typed_registration_needed(self, type_in_registration: str, globally_allowed_types: Optional[Set[str]]): # the OneHot registration involves a concatenation of the 3 types involved reg_types = tuple([_reg_type_to_cpp_type(reg_type) for reg_type in _split_reg_types(type_in_registration)]) if globally_allowed_types is not None: @@ -640,7 +636,7 @@ class GloballyAllowedTypesOpTypeImplFilter(OpTypeImplFilterInterface): _valid_allowed_types = set(FbsTypeInfo.tensordatatype_to_string.values()) - def __init__(self, globally_allowed_types: typing.Set[str]): + def __init__(self, globally_allowed_types: Set[str]): self._operator_processors = _create_operator_type_usage_processors() if not globally_allowed_types.issubset(self._valid_allowed_types): From e576bd269db98c0089eed16a608abcd403b8bf54 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 05:04:28 +0000 Subject: [PATCH 12/33] More mutable default args --- onnxruntime/python/backend/backend_rep.py | 2 +- onnxruntime/python/tools/onnxruntime_test.py | 6 ++++-- onnxruntime/python/tools/quantization/calibrate.py | 6 +++--- onnxruntime/python/tools/quantization/quant_utils.py | 8 ++++---- .../python/tools/transformers/models/gpt2/gpt2_helper.py | 2 +- .../python/tools/transformers/models/gpt2/gpt2_parity.py | 3 ++- onnxruntime/python/tools/transformers/onnx_model.py | 8 +++++--- onnxruntime/python/tools/transformers/optimizer.py | 2 +- tools/ci_build/build.py | 4 ++-- tools/python/util/ort_format_model/ort_model_processor.py | 3 ++- 10 files changed, 25 insertions(+), 19 deletions(-) diff --git a/onnxruntime/python/backend/backend_rep.py b/onnxruntime/python/backend/backend_rep.py index d7b18373f0e78..177af5aa6d8e3 100644 --- a/onnxruntime/python/backend/backend_rep.py +++ b/onnxruntime/python/backend/backend_rep.py @@ -23,7 +23,7 @@ def __init__(self, session): """ self._session = session - def run(self, inputs, **kwargs): # type: (Any, **Any) -> Tuple[Any, ...] + def run(self, inputs, **kwargs): """ Computes the prediction. See :meth:`onnxruntime.InferenceSession.run`. diff --git a/onnxruntime/python/tools/onnxruntime_test.py b/onnxruntime/python/tools/onnxruntime_test.py index 11759f3ad17d5..e2a3dc514e066 100644 --- a/onnxruntime/python/tools/onnxruntime_test.py +++ b/onnxruntime/python/tools/onnxruntime_test.py @@ -29,7 +29,8 @@ } -def generate_feeds(sess, symbolic_dims={}): +def generate_feeds(sess, symbolic_dims=None): + symbolic_dims = symbolic_dims or {} feeds = {} for input_meta in sess.get_inputs(): # replace any symbolic dimensions @@ -67,10 +68,11 @@ def run_model( num_iters=1, debug=None, profile=None, - symbolic_dims={}, + symbolic_dims=None, feeds=None, override_initializers=True, ): + symbolic_dims = symbolic_dims or {} if debug: print("Pausing execution ready for debugger to attach to pid: {}".format(os.getpid())) print("Press key to continue.") diff --git a/onnxruntime/python/tools/quantization/calibrate.py b/onnxruntime/python/tools/quantization/calibrate.py index b431647313ad4..a7feba05cef43 100644 --- a/onnxruntime/python/tools/quantization/calibrate.py +++ b/onnxruntime/python/tools/quantization/calibrate.py @@ -81,7 +81,7 @@ def __init__( self.infer_session = None self.execution_providers = ["CPUExecutionProvider"] - def set_execution_providers(self, execution_providers=["CPUExecutionProvider"]): + def set_execution_providers(self, execution_providers=("CPUExecutionProvider",)): """ reset the execution providers to execute the collect_data. It triggers to re-creating inference session. """ @@ -847,9 +847,9 @@ def create_calibrator( augmented_model_path="augmented_model.onnx", calibrate_method=CalibrationMethod.MinMax, use_external_data_format=False, - extra_options={}, + extra_options=None, ): - + extra_options = extra_options or {} calibrator = None if calibrate_method == CalibrationMethod.MinMax: # default settings for min-max algorithm diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py index 2ceefeadcd1e5..e49e9fdb53a8e 100644 --- a/onnxruntime/python/tools/quantization/quant_utils.py +++ b/onnxruntime/python/tools/quantization/quant_utils.py @@ -245,8 +245,8 @@ def __init__( rmaxs, zero_points, scales, - data=[], - quantized_data=[], + data=None, + quantized_data=None, axis=None, ): self.name = name @@ -256,8 +256,8 @@ def __init__( # 1D tensor of zero points computed for each axis. scalar if axis is empty self.zero_points = zero_points self.scales = scales # 1D tensor of scales computed for each axis. scalar if axis is empty - self.data = data # original data from initializer TensorProto - self.quantized_data = quantized_data # weight-packed data from data + self.data = data or [] # original data from initializer TensorProto + self.quantized_data = quantized_data or [] # weight-packed data from data # Scalar to specify which dimension in the initializer to weight pack. self.axis = axis # If empty, single zero point and scales computed from a single rmin and rmax diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py index 570e0b89329a3..250aa52a479c6 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py @@ -986,7 +986,7 @@ def get_onnx_paths( model_class: str = "GPT2LMHeadModel", has_past=True, new_folder=False, - remove_existing=["raw", "fp32", "fp16", "int8"], + remove_existing=frozenset(["raw", "fp32", "fp16", "int8"]), ): """Build a path name for given model based on given attributes.""" model_name = model_name_or_path diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py index 2af159b370f76..0943c8d059b7b 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py @@ -330,8 +330,9 @@ def run_candidate( task: ParityTask, args, last_matmul_node_name, - op_block_list=["FastGelu", "LayerNormalization"], + op_block_list=("FastGelu", "LayerNormalization"), ): + op_block_list = list(op_block_list) parameters = get_mixed_precision_parameters(args, last_matmul_node_name, op_block_list) op_block_list_str = ",".join(sorted(op_block_list)) diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py index 1a84ba4e5cc50..4e8d63b7106fa 100644 --- a/onnxruntime/python/tools/transformers/onnx_model.py +++ b/onnxruntime/python/tools/transformers/onnx_model.py @@ -31,7 +31,8 @@ def initialize(self, model): def disable_shape_inference(self): self.enable_shape_infer = False - def infer_runtime_shape(self, dynamic_axis_mapping={}, update=False): + def infer_runtime_shape(self, dynamic_axis_mapping=None, update=False): + dynamic_axis_mapping = dynamic_axis_mapping or {} if self.enable_shape_infer: if self.shape_infer_helper is None or update: self.shape_infer_helper = SymbolicShapeInferenceHelper(self.model) @@ -233,7 +234,7 @@ def get_parent(self, node, i, output_name_to_node=None): return output_name_to_node[input] - def match_first_parent(self, node, parent_op_type, output_name_to_node, exclude=[]): + def match_first_parent(self, node, parent_op_type, output_name_to_node, exclude=()): """ Find parent node based on constraints on op_type. @@ -262,7 +263,7 @@ def match_parent( parent_op_type, input_index=None, output_name_to_node=None, - exclude=[], + exclude=None, return_indice=None, ): """ @@ -280,6 +281,7 @@ def match_parent( Returns: parent: The matched parent node. """ + exclude = exclude or [] assert node is not None assert input_index is None or input_index >= 0 diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py index 74136bba276f8..0dac598ed6bf7 100644 --- a/onnxruntime/python/tools/transformers/optimizer.py +++ b/onnxruntime/python/tools/transformers/optimizer.py @@ -55,7 +55,7 @@ def optimize_by_onnxruntime( use_gpu: bool = False, optimized_model_path: Optional[str] = None, opt_level: Optional[int] = 99, - disabled_optimizers=[], + disabled_optimizers=None, ) -> str: """ Use onnxruntime to optimize model. diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 21485083f962e..05dc7ee7cbd88 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -737,11 +737,11 @@ def get_config_build_dir(build_dir, config): def run_subprocess( - args, cwd=None, capture_stdout=False, dll_path=None, shell=False, env={}, python_path=None, cuda_home=None + args, cwd=None, capture_stdout=False, dll_path=None, shell=False, env=None, python_path=None, cuda_home=None ): if isinstance(args, str): raise ValueError("args should be a sequence of strings, not a string") - + env = env or {} my_env = os.environ.copy() if dll_path: if is_windows(): diff --git a/tools/python/util/ort_format_model/ort_model_processor.py b/tools/python/util/ort_format_model/ort_model_processor.py index 7c65930e4cd0e..8fd228e1f4627 100644 --- a/tools/python/util/ort_format_model/ort_model_processor.py +++ b/tools/python/util/ort_format_model/ort_model_processor.py @@ -25,7 +25,7 @@ def __init__(self, model_path: str, required_ops: dict, processors: OperatorType self._op_type_processors = processors @staticmethod - def _setup_type_info(graph: fbs.Graph, outer_scope_value_typeinfo={}): + def _setup_type_info(graph: fbs.Graph, outer_scope_value_typeinfo=None): """ Setup the node args for this level of Graph. We copy the current list which represents the outer scope values, and add the local node args to that @@ -34,6 +34,7 @@ def _setup_type_info(graph: fbs.Graph, outer_scope_value_typeinfo={}): :param outer_scope_value_typeinfo: TypeInfo for outer scope values. Empty for the top-level graph in a model. :return: Dictionary of NodeArg name to TypeInfo """ + outer_scope_value_typeinfo = outer_scope_value_typeinfo or {} value_name_to_typeinfo = outer_scope_value_typeinfo.copy() for j in range(0, graph.NodeArgsLength()): n = graph.NodeArgs(j) From 718b123eb1bd1de9c0d9984314482a5780fb1957 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 05:10:05 +0000 Subject: [PATCH 13/33] Bare except --- .lintrunner.toml | 5 +++++ .../python/onnxruntime_collect_build_info.py | 4 ++-- onnxruntime/python/onnxruntime_validation.py | 4 ++-- .../tools/profile_explorer/profile_explorer.py | 2 +- .../python/tools/transformers/benchmark.py | 2 +- .../tools/transformers/benchmark_helper.py | 2 +- .../tools/transformers/bert_test_data.py | 6 +++--- .../transformers/fusion_qordered_attention.py | 2 +- .../transformers/models/gpt2/benchmark_gpt2.py | 6 +++--- .../transformers/models/gpt2/gpt2_parity.py | 4 ++-- .../models/longformer/benchmark_longformer.py | 2 +- .../python/tools/transformers/onnx_exporter.py | 2 +- .../python/tools/transformers/onnx_model.py | 2 +- .../tools/transformers/shape_infer_helper.py | 2 +- .../orttraining/eager/opgen/opgen/generator.py | 2 +- .../ortmodule/_custom_autograd_function.py | 4 ++-- .../python/training/ortmodule/_utils.py | 2 +- .../orttraining/python/training/orttrainer.py | 2 +- .../python/training/orttrainer_options.py | 2 +- ...orttraining_test_ortmodule_autograd_dist.py | 2 +- .../tools/scripts/pipeline_model_split.py | 8 ++++---- setup.py | 4 ++-- tools/python/PythonTools.md | 18 +++++++++--------- .../operator_type_usage_processors.py | 1 - 24 files changed, 47 insertions(+), 43 deletions(-) diff --git a/.lintrunner.toml b/.lintrunner.toml index d06f6e2bf0a8b..92a94b03007ca 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -53,6 +53,11 @@ exclude_patterns = [ 'orttraining/**', # FIXME(#7032): ignore server code for now 'server/**', + # FIXME: DUO106 + 'tools/nuget/generate_nuspec_for_native_nuget.py', + # FIXME: DUO116 + 'js/scripts/build_web.py', + ] command = [ 'python3', diff --git a/onnxruntime/python/onnxruntime_collect_build_info.py b/onnxruntime/python/onnxruntime_collect_build_info.py index 6cd67938dd0ba..07ac21a11eb04 100644 --- a/onnxruntime/python/onnxruntime_collect_build_info.py +++ b/onnxruntime/python/onnxruntime_collect_build_info.py @@ -35,7 +35,7 @@ def get_cudart_version(find_cudart_version=None): status = cudart.cudaRuntimeGetVersion(ctypes.byref(version)) if status != 0: return None - except: # noqa + except Exception: return None return version.value @@ -93,7 +93,7 @@ def get_cudnn_supported_cuda_version(find_cudnn_version=None): # cudnn_ver = cudnn.cudnnGetVersion() cuda_ver = cudnn.cudnnGetCudartVersion() return cuda_ver - except: # noqa + except Exception: return None # use set to avoid duplications diff --git a/onnxruntime/python/onnxruntime_validation.py b/onnxruntime/python/onnxruntime_validation.py index 8b313635527ac..5b0dd3198aa62 100644 --- a/onnxruntime/python/onnxruntime_validation.py +++ b/onnxruntime/python/onnxruntime_validation.py @@ -102,7 +102,7 @@ def validate_build_package_info(): try: from .build_and_package_info import cuda_version - except: # noqa + except Exception: pass if cuda_version: @@ -110,7 +110,7 @@ def validate_build_package_info(): # when the build environment has none or multiple libraries installed try: from .build_and_package_info import cudart_version - except: # noqa + except Exception: warnings.warn("WARNING: failed to get cudart_version from onnxruntime build info.") cudart_version = None diff --git a/onnxruntime/python/tools/profile_explorer/profile_explorer.py b/onnxruntime/python/tools/profile_explorer/profile_explorer.py index f3430a89e7a34..7012d6163dc66 100644 --- a/onnxruntime/python/tools/profile_explorer/profile_explorer.py +++ b/onnxruntime/python/tools/profile_explorer/profile_explorer.py @@ -13,7 +13,7 @@ def _demangle(name, demangler="c++filt"): with sp.Popen([demangler, name], stdin=sp.PIPE, stdout=sp.PIPE) as proc: out, _ = proc.communicate() return out.decode("utf-8").strip() - except: + except Exception: return name diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index dec7bc7beadfc..5f25debd3a866 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -860,7 +860,7 @@ def main(): args.model_source, args, ) - except: + except Exception: logger.error("Exception", exc_info=True) time_stamp = datetime.now().strftime("%Y%m%d-%H%M%S") diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py index 90161a61adf84..e04f3256cd92d 100644 --- a/onnxruntime/python/tools/transformers/benchmark_helper.py +++ b/onnxruntime/python/tools/transformers/benchmark_helper.py @@ -134,7 +134,7 @@ def create_onnxruntime_session( providers = [(name, provider_options[name]) if name in provider_options else name for name in providers] session = onnxruntime.InferenceSession(onnx_model_path, sess_options, providers=providers) - except: + except Exception: logger.error("Exception", exc_info=True) return session diff --git a/onnxruntime/python/tools/transformers/bert_test_data.py b/onnxruntime/python/tools/transformers/bert_test_data.py index 38c14cdb0dfda..c89c99c593587 100644 --- a/onnxruntime/python/tools/transformers/bert_test_data.py +++ b/onnxruntime/python/tools/transformers/bert_test_data.py @@ -99,7 +99,7 @@ def fake_input_mask_data( ] if random_mask_length: - actual_seq_len = random.randint(int(sequence_length * 2 / 3), sequence_length) + actual_seq_len = random.randint(int(sequence_length * 2 / 3), sequence_length) # noqa: DUO102 data = np.zeros((batch_size, sequence_length), dtype=np.int32) temp = np.ones((batch_size, actual_seq_len), dtype=np.int32) data[: temp.shape[0], : temp.shape[1]] = temp @@ -172,10 +172,10 @@ def fake_test_data( assert input_ids is not None np.random.seed(random_seed) - random.seed(random_seed) + random.seed(random_seed) # noqa: DUO102 all_inputs = [] - for test_case in range(test_cases): + for _ in range(test_cases): input_1 = fake_input_ids_data(input_ids, batch_size, sequence_length, dictionary_size) inputs = {input_ids.name: input_1} diff --git a/onnxruntime/python/tools/transformers/fusion_qordered_attention.py b/onnxruntime/python/tools/transformers/fusion_qordered_attention.py index b3d8743414b91..cac79ebd3e327 100644 --- a/onnxruntime/python/tools/transformers/fusion_qordered_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_qordered_attention.py @@ -128,7 +128,7 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): # Identify the root input to the Attention node other_inputs = [] - for i, input in enumerate(start_node.input): + for input in start_node.input: if input not in output_name_to_node: continue diff --git a/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py b/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py index b928232ab62a4..e8553e2cae0f7 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/benchmark_gpt2.py @@ -21,14 +21,14 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import ( +from benchmark_helper import ( # noqa: E402 Precision, create_onnxruntime_session, get_ort_environment_variables, prepare_environment, setup_logger, ) -from quantize_helper import QuantizeHelper +from quantize_helper import QuantizeHelper # noqa: E402 logger = logging.getLogger("") @@ -404,7 +404,7 @@ def main(args): "onnxruntime_latency": f"{ort_latency:.2f}", } csv_writer.writerow(row) - except: + except Exception: logger.error("Exception", exc_info=True) return None diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py index 0943c8d059b7b..9fdadbba623a0 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py @@ -120,7 +120,7 @@ def run(self, argv, experiment_name): ) if result: self.results.append(result) - except: + except Exception: logger.exception(f"Failed to run experiment {experiment_name}") result = None @@ -511,7 +511,7 @@ def get_fp32_ops(x): try: rows = load_results_from_csv(task.csv_path) - except: + except Exception: logger.exception(f"Failed to load csv {task.csv_path}") rows = task.results diff --git a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py index 679004c6ea89c..e41834a323a3d 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py +++ b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py @@ -647,7 +647,7 @@ def run_tests( latency_results = launch_test(args) except KeyboardInterrupt as exc: raise RuntimeError("Keyboard Interrupted") from exc - except: + except Exception: traceback.print_exc() continue diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index b60c455ad11b5..c42e286922bbb 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -600,7 +600,7 @@ def export_onnx_model_from_tf( # Use no past state for these models if config.use_cache: config.use_cache = False - except: + except Exception: pass example_outputs = model(example_inputs, training=False) diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py index 4e8d63b7106fa..7aaf15152977e 100644 --- a/onnxruntime/python/tools/transformers/onnx_model.py +++ b/onnxruntime/python/tools/transformers/onnx_model.py @@ -40,7 +40,7 @@ def infer_runtime_shape(self, dynamic_axis_mapping=None, update=False): try: if self.shape_infer_helper.infer(dynamic_axis_mapping): return self.shape_infer_helper - except: + except Exception: self.enable_shape_infer = False # disable shape inference to suppress same error message. print("failed in shape inference", sys.exc_info()[0]) diff --git a/onnxruntime/python/tools/transformers/shape_infer_helper.py b/onnxruntime/python/tools/transformers/shape_infer_helper.py index e877497ffb1cb..f8a5464d8af78 100644 --- a/onnxruntime/python/tools/transformers/shape_infer_helper.py +++ b/onnxruntime/python/tools/transformers/shape_infer_helper.py @@ -15,7 +15,7 @@ else: sys.path.append(os.path.join(file_path, "..")) -from symbolic_shape_infer import SymbolicShapeInference, get_shape_from_type_proto, sympy +from symbolic_shape_infer import SymbolicShapeInference, get_shape_from_type_proto, sympy # noqa: E402 logger = logging.getLogger(__name__) diff --git a/orttraining/orttraining/eager/opgen/opgen/generator.py b/orttraining/orttraining/eager/opgen/opgen/generator.py index 8813ad15ba483..5497d01d3fcdd 100644 --- a/orttraining/orttraining/eager/opgen/opgen/generator.py +++ b/orttraining/orttraining/eager/opgen/opgen/generator.py @@ -776,7 +776,7 @@ def _parse_mapped_function_decls(self, cpp_parser: parser.CPPParser): try: op_namespace = op_name[0 : op_name.index("::")] op_namewithoutnamespace = op_name[len(op_namespace) + 2 :] - except: + except Exception: op_namespace = None op_namewithoutnamespace = op_name diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function.py index 1c2fce2b1a80e..e18a46ec16fe4 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function.py @@ -66,7 +66,7 @@ def enable_custom_autograd_support(to_enable=True): # This is for the latest Pytorch nightly after this commit: # https://github.com/pytorch/pytorch/commit/11bc435622e6b7207bbf37ed1aafe999e1f296ec register_custom_op_symbolic("prim::PythonOp", _export, 1) - except: + except Exception: # This applies to Pytorch 1.9 and 1.9.1. register_custom_op_symbolic("::prim_PythonOp", _export, 1) @@ -78,7 +78,7 @@ def enable_custom_autograd_support(to_enable=True): # This is for the latest Pytorch nightly after this commit: # https://github.com/pytorch/pytorch/commit/11bc435622e6b7207bbf37ed1aafe999e1f296ec unregister_custom_op_symbolic("prim::PythonOp", 1) - except: + except Exception: # This applies to Pytorch 1.9 and 1.9.1. unregister_custom_op_symbolic("::prim_PythonOp", 1) diff --git a/orttraining/orttraining/python/training/ortmodule/_utils.py b/orttraining/orttraining/python/training/ortmodule/_utils.py index 51c54e1dd5187..5423b653921b3 100644 --- a/orttraining/orttraining/python/training/ortmodule/_utils.py +++ b/orttraining/orttraining/python/training/ortmodule/_utils.py @@ -316,7 +316,7 @@ def get_exception_as_string(exception): try: raise exception - except: + except Exception: return traceback.format_exc() diff --git a/orttraining/orttraining/python/training/orttrainer.py b/orttraining/orttraining/python/training/orttrainer.py index bdf6a1e9e1ea1..a19bfa2844aec 100644 --- a/orttraining/orttraining/python/training/orttrainer.py +++ b/orttraining/orttraining/python/training/orttrainer.py @@ -931,7 +931,7 @@ def _training_session_run_helper(self, is_train, inputs, inputs_desc, outputs_de # so output will be on the same device as input. try: test_pt_device = torch.device(target_device) - except: + except Exception: # in this case, input/output must on CPU assert input.device.type == "cpu" target_device = "cpu" diff --git a/orttraining/orttraining/python/training/orttrainer_options.py b/orttraining/orttraining/python/training/orttrainer_options.py index 9e7a2bde4dfa0..6bc382c1afdc3 100644 --- a/orttraining/orttraining/python/training/orttrainer_options.py +++ b/orttraining/orttraining/python/training/orttrainer_options.py @@ -536,7 +536,7 @@ def _check_is_callable(field, value, error): try: # Python 3 result = value is None or callable(value) - except: + except Exception: # Python 3 but < 3.2 if hasattr(value, "__call__"): result = True diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py index 9b499963506ec..231c8f45c93b2 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py @@ -124,7 +124,7 @@ def run_with_ort_on_gpu(model, args, rank, device): size = 2 try: mp.spawn(test_Distributed_ReduceWithMarkDirtyModel, nprocs=size, args=(size,)) - except: + except Exception: import sys sys.stdout.flush() diff --git a/orttraining/tools/scripts/pipeline_model_split.py b/orttraining/tools/scripts/pipeline_model_split.py index 6bdaac31f435b..e9b441454ebde 100644 --- a/orttraining/tools/scripts/pipeline_model_split.py +++ b/orttraining/tools/scripts/pipeline_model_split.py @@ -286,7 +286,7 @@ def generate_subgraph(model, start_nodes, identity_node_list): try: if i in identity_node_index: del main_graph.graph.node[i] - except: + except Exception: print("error deleting identity node", i) all_visited_nodes = [] @@ -337,7 +337,7 @@ def generate_subgraph(model, start_nodes, identity_node_list): del subgraph.graph.node[i] else: del main_graph.graph.node[i] - except: + except Exception: print("error deleting node", i) for i in reversed(range(len(main_graph.graph.input))): @@ -346,7 +346,7 @@ def generate_subgraph(model, start_nodes, identity_node_list): del subgraph.graph.input[i] else: del main_graph.graph.input[i] - except: + except Exception: print("error deleting inputs", i) for i in reversed(range(len(main_graph.graph.output))): @@ -355,7 +355,7 @@ def generate_subgraph(model, start_nodes, identity_node_list): del subgraph.graph.output[i] else: del main_graph.graph.output[i] - except: + except Exception: print("error deleting outputs ", i) print("model", str(model_count), " length ", len(subgraph.graph.node)) diff --git a/setup.py b/setup.py index b7cd7c753a2e9..537befa4769aa 100644 --- a/setup.py +++ b/setup.py @@ -621,7 +621,7 @@ def check_date_format(date_str): try: datetime.datetime.strptime(date_str, "%Y%m%d") return True - except: # noqa + except Exception: return False def reformat_run_count(count_str): @@ -632,7 +632,7 @@ def reformat_run_count(count_str): elif count >= 1000: raise RuntimeError(f"Too many builds for the same day: {count}") return "" - except: # noqa + except Exception: return "" build_suffix_is_date_format = check_date_format(build_suffix[:8]) diff --git a/tools/python/PythonTools.md b/tools/python/PythonTools.md index 49a4ac6a337d1..969ab3f461180 100644 --- a/tools/python/PythonTools.md +++ b/tools/python/PythonTools.md @@ -4,13 +4,13 @@ Provides helpers for creating ONNX test directories that can be run using onnx_test_runner and onnxruntime_perf_test. -In order to import ort_test_dir_utils you need to either +In order to import ort_test_dir_utils you need to either - run python from the `/tools/python directory - - add the directory to your PYTHONPATH + - add the directory to your PYTHONPATH - add the directory to sys.path prior to importing e.g. to add to sys.path -```python +```python import sys sys.path.append('/tools/python') @@ -19,7 +19,7 @@ import ort_test_dir_utils ### Creating a test directory for a model. -The create_test_dir helper can create the input and output pb files in various ways. +The create_test_dir helper can create the input and output pb files in various ways. Often a support request will only provide a problematic model and no input data. create_test_dir can be used to create input to allow the model to be debugged more easily. Random input can be generated if not provided. If expected output is not provided, the model will be run with the input, and the output from that will be saved as the expected output. @@ -57,7 +57,7 @@ model_path = '/onnxruntime/test/testdata/transform/expand_ # when using the default data generation any symbolic dimension values must be provided symbolic_vals = {'dynamic':2} # provide value for symbolic dim named 'dynamic' in 'input2' -# let create_test_dir create random input in the (arbitrary) default range of -10 to 10. +# let create_test_dir create random input in the (arbitrary) default range of -10 to 10. # it will create data of the correct type based on the model. ort_test_dir_utils.create_test_dir(model_path, 'temp/examples', 'test1', symbolic_dim_values_map=symbolic_vals) @@ -75,7 +75,7 @@ onnx_test_data_utils.dump_pb('temp/examples/test2/test_data_set_0') ### Running the test using python -To execute the test once the directory is created you can use the onnx_test_runner or onnxruntime_perf_test executables if you have built onnxruntime from source, or the run_test_dir helper. Input can be either the test directory, or the model in case there are multiple in the test directory. +To execute the test once the directory is created you can use the onnx_test_runner or onnxruntime_perf_test executables if you have built onnxruntime from source, or the run_test_dir helper. Input can be either the test directory, or the model in case there are multiple in the test directory. ```python def run_test_dir(model_or_dir): @@ -98,7 +98,7 @@ import ort_test_dir_utils try: ort_test_dir_utils.run_test_dir('temp/examples/test1') ort_test_dir_utils.run_test_dir('temp/examples/test2/expand_elimination.onnx') -except: +except Exception: print("Exception:", sys.exc_info()[1]) ``` @@ -107,7 +107,7 @@ except: Provides helpers for generating/reading protobuf files containing ONNX TensorProto data. ``` -usage: onnx_test_data_utils.py [-h] --action {dump_pb,numpy_to_pb,image_to_pb,random_to_pb,update_name_in_pb} +usage: onnx_test_data_utils.py [-h] --action {dump_pb,numpy_to_pb,image_to_pb,random_to_pb,update_name_in_pb} [--input INPUT] [--name NAME] [--output OUTPUT] [--resize RESIZE] [--channels_last] [--add_batch_dim] [--shape SHAPE] [--datatype DATATYPE] [--min_value MIN_VALUE] [--max_value MAX_VALUE] [--seed SEED] @@ -162,4 +162,4 @@ optional arguments: -h, --help show this help message and exit -m MODEL, --model MODEL model file -o OUT, --out OUT output directory (default: Date: Mon, 16 Jan 2023 05:16:38 +0000 Subject: [PATCH 14/33] Fix more --- onnxruntime/python/backend/backend.py | 2 +- onnxruntime/python/tools/microbench/benchmark.py | 4 ++-- .../python/tools/transformers/convert_generation.py | 4 ++-- onnxruntime/python/tools/transformers/fusion_attention.py | 2 +- onnxruntime/python/tools/transformers/fusion_shape.py | 2 +- .../python/tools/transformers/io_binding_helper.py | 4 ++-- onnxruntime/python/tools/transformers/onnx_model.py | 2 +- setup.py | 8 +++++--- tools/ci_build/github/android/build_aar_package.py | 8 +++++--- 9 files changed, 20 insertions(+), 16 deletions(-) diff --git a/onnxruntime/python/backend/backend.py b/onnxruntime/python/backend/backend.py index b2dcd00a5a5fa..a473011c64ffc 100644 --- a/onnxruntime/python/backend/backend.py +++ b/onnxruntime/python/backend/backend.py @@ -124,7 +124,7 @@ def prepare(cls, model, device=None, **kwargs): raise RuntimeError("Incompatible device expected '{0}', got '{1}'".format(device, get_device())) return cls.prepare(inf, device, **kwargs) else: - # type: ModelProto + # ModelProto # check_model serializes the model anyways, so serialize the model once here # and reuse it below in the cls.prepare call to avoid an additional serialization # only works with onnx >= 1.10.0 hence the version check diff --git a/onnxruntime/python/tools/microbench/benchmark.py b/onnxruntime/python/tools/microbench/benchmark.py index da6849059536e..323717441674f 100644 --- a/onnxruntime/python/tools/microbench/benchmark.py +++ b/onnxruntime/python/tools/microbench/benchmark.py @@ -126,13 +126,13 @@ def benchmark(self): io_binding = self.create_io_binding(sess, input_tensors, output_tensors) # warm up - for iter in range(10): + for _ in range(10): sess.run_with_iobinding(io_binding) # measure max_iters = 100 start_time = time.time() - for iter in range(max_iters): + for _ in range(max_iters): sess.run_with_iobinding(io_binding) # time is in milliseconds diff --git a/onnxruntime/python/tools/transformers/convert_generation.py b/onnxruntime/python/tools/transformers/convert_generation.py index 316cca24c7da8..e71450108620f 100644 --- a/onnxruntime/python/tools/transformers/convert_generation.py +++ b/onnxruntime/python/tools/transformers/convert_generation.py @@ -64,10 +64,10 @@ from models.gpt2.convert_to_onnx import main as convert_gpt2_to_onnx # noqa: E402 sys.path.append(os.path.join(os.path.dirname(__file__), "models", "t5")) -from benchmark_helper import setup_logger +from benchmark_helper import setup_logger # noqa: E402 from models.t5.convert_to_onnx import export_onnx_models as export_t5_onnx_models # noqa: E402 from models.t5.t5_helper import PRETRAINED_MT5_MODELS, PRETRAINED_T5_MODELS # noqa: E402 -from onnx_model import OnnxModel +from onnx_model import OnnxModel # noqa: E402 logger = logging.getLogger("") diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index 46e9886138e68..3d47131f88258 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -412,7 +412,7 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): return other_inputs = [] - for i, input in enumerate(start_node.input): + for input in start_node.input: if input not in output_name_to_node: continue diff --git a/onnxruntime/python/tools/transformers/fusion_shape.py b/onnxruntime/python/tools/transformers/fusion_shape.py index a6a74719b9c42..f7f6d0ac4ab4b 100644 --- a/onnxruntime/python/tools/transformers/fusion_shape.py +++ b/onnxruntime/python/tools/transformers/fusion_shape.py @@ -48,7 +48,7 @@ def fuse( input_name_to_nodes: Dict[str, List[NodeProto]], output_name_to_node: Dict[str, NodeProto], ): - """ + r""" Smplify subgraph like (2d_input) diff --git a/onnxruntime/python/tools/transformers/io_binding_helper.py b/onnxruntime/python/tools/transformers/io_binding_helper.py index 43515a6187aaf..84295bb205321 100644 --- a/onnxruntime/python/tools/transformers/io_binding_helper.py +++ b/onnxruntime/python/tools/transformers/io_binding_helper.py @@ -12,14 +12,14 @@ class TypeHelper: @staticmethod def get_input_type(ort_session: InferenceSession, name: str) -> str: - for i, input in enumerate(ort_session.get_inputs()): + for input in ort_session.get_inputs(): if input.name == name: return input.type raise ValueError(f"input name {name} not found") @staticmethod def get_output_type(ort_session, name: str) -> str: - for i, output in enumerate(ort_session.get_outputs()): + for output in ort_session.get_outputs(): if output.name == name: return output.type diff --git a/onnxruntime/python/tools/transformers/onnx_model.py b/onnxruntime/python/tools/transformers/onnx_model.py index 7aaf15152977e..d84cfad46c4cb 100644 --- a/onnxruntime/python/tools/transformers/onnx_model.py +++ b/onnxruntime/python/tools/transformers/onnx_model.py @@ -967,7 +967,7 @@ def save_model_to_file(self, output_path, use_external_data_format=False, all_te if output_path.endswith(".json"): # Output text for testing small model. with open(output_path, "w") as out: - out.write(str(model)) + out.write(str(self.model)) else: OnnxModel.save(self.model, output_path, use_external_data_format, all_tensors_to_one_file) logger.info(f"Model saved to {output_path}") diff --git a/setup.py b/setup.py index 537befa4769aa..267b31a56fe31 100644 --- a/setup.py +++ b/setup.py @@ -130,9 +130,11 @@ def get_tag(self): _, _, plat = _bdist_wheel.get_tag(self) if platform.system() == "Linux": # Get the right platform tag by querying the linker version - glibc_major, glibc_minor = popen("ldd --version | head -1").read().split()[-1].split(".") - """# See https://github.com/mayeut/pep600_compliance/blob/master/ - pep600_compliance/tools/manylinux-policy.json""" + glibc_major, glibc_minor = ( + popen("ldd --version | head -1").read().split()[-1].split(".") # noqa: DUO106 + ) + # See https://github.com/mayeut/pep600_compliance/blob/master/ + # pep600_compliance/tools/manylinux-policy.json if glibc_major == "2" and glibc_minor == "17": plat = "manylinux_2_17_x86_64.manylinux2014_x86_64" else: # For manylinux2014 and above, no alias is required diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index c0cacb4231665..fac04797cfff1 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -153,9 +153,11 @@ def _build_aar(args): use_shell = True if is_windows() else False # clean, build, and publish to a local directory - subprocess.run(gradle_command + ["clean"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT) - subprocess.run(gradle_command + ["build"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT) - subprocess.run(gradle_command + ["publish"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT) + subprocess.run(gradle_command + ["clean"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT) # noqa: DUO116 + subprocess.run(gradle_command + ["build"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT) # noqa: DUO116 + subprocess.run( + gradle_command + ["publish"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT + ) # noqa: DUO116 def parse_args(): From a3ca255f15daa274629aae157d0ff58e0d980f56 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 05:32:12 +0000 Subject: [PATCH 15/33] More fixes --- onnxruntime/python/tools/onnxruntime_test.py | 2 +- .../python/tools/quantization/quant_utils.py | 2 +- .../python/tools/tensorrt/perf/benchmark.py | 2 +- .../python/tools/transformers/__init__.py | 4 +-- .../python/tools/transformers/benchmark.py | 27 +++++++++---------- .../tools/transformers/fusion_embedlayer.py | 4 +-- .../transformers/models/gpt2/gpt2_parity.py | 2 +- .../models/longformer/benchmark_longformer.py | 4 +-- .../models/longformer/convert_to_onnx.py | 4 +-- .../models/longformer/generate_test_data.py | 4 +-- tools/nuget/validate_package.py | 6 ++--- 11 files changed, 30 insertions(+), 31 deletions(-) diff --git a/onnxruntime/python/tools/onnxruntime_test.py b/onnxruntime/python/tools/onnxruntime_test.py index e2a3dc514e066..a437a2e9aca7b 100644 --- a/onnxruntime/python/tools/onnxruntime_test.py +++ b/onnxruntime/python/tools/onnxruntime_test.py @@ -113,7 +113,7 @@ def run_model( sys.exit(-1) start = timer() - for i in range(num_iters): + for _ in range(num_iters): outputs = sess.run([], feeds) # fetch all outputs end = timer() diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py index e49e9fdb53a8e..662739c4ed832 100644 --- a/onnxruntime/python/tools/quantization/quant_utils.py +++ b/onnxruntime/python/tools/quantization/quant_utils.py @@ -265,7 +265,7 @@ def __init__( class QuantizedValue: """ - Represents a linearly quantized value (input\output\intializer) + Represents a linearly quantized value (input/output/intializer) """ def __init__( diff --git a/onnxruntime/python/tools/tensorrt/perf/benchmark.py b/onnxruntime/python/tools/tensorrt/perf/benchmark.py index 7bb23084e1ca9..6441278ea249f 100644 --- a/onnxruntime/python/tools/tensorrt/perf/benchmark.py +++ b/onnxruntime/python/tools/tensorrt/perf/benchmark.py @@ -205,7 +205,7 @@ def run_trt_standalone(trtexec, model_name, model_path, test_data_dir, all_input avg_latency_match = re.search("mean = (.*?) ms", target) if avg_latency_match: result["average_latency_ms"] = avg_latency_match.group(1) # extract number - percentile_match = re.search("percentile\(90%\) = (.*?) ms", target) + percentile_match = re.search(r"percentile\(90%\) = (.*?) ms", target) if percentile_match: result["latency_90_percentile"] = percentile_match.group(1) # extract number if mem_usage: diff --git a/onnxruntime/python/tools/transformers/__init__.py b/onnxruntime/python/tools/transformers/__init__.py index 4200447eefee5..9bf76fc38153c 100644 --- a/onnxruntime/python/tools/transformers/__init__.py +++ b/onnxruntime/python/tools/transformers/__init__.py @@ -8,9 +8,9 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) -import convert_to_onnx +import convert_to_onnx # noqa: E402 # added for backward compatible -import gpt2_helper +import gpt2_helper # noqa: E402 sys.path.append(os.path.join(os.path.dirname(__file__), "models", "t5")) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 5f25debd3a866..deac7dba72544 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -43,6 +43,7 @@ import argparse import logging import os +import random import timeit from datetime import datetime @@ -72,7 +73,7 @@ logger = logging.getLogger("") -from huggingface_models import MODEL_CLASSES, MODELS +from huggingface_models import MODEL_CLASSES, MODELS # noqa: E402 cpu_count = psutil.cpu_count(logical=False) @@ -80,8 +81,8 @@ if "OMP_NUM_THREADS" not in os.environ: os.environ["OMP_NUM_THREADS"] = str(cpu_count) -import torch -from transformers import AutoConfig, AutoTokenizer, LxmertConfig +import torch # noqa: E402 +from transformers import AutoConfig, AutoTokenizer, LxmertConfig # noqa: E402 def run_onnxruntime( @@ -366,7 +367,7 @@ def run_pytorch( inference = torch.jit.trace(model, input_ids) if torchscript else model inference(input_ids) - runtimes = timeit.repeat(lambda: inference(input_ids), repeat=repeat_times, number=1) + runtimes = timeit.repeat(lambda: inference(input_ids), repeat=repeat_times, number=1) # noqa: B023 result = { "engine": "torchscript" if torchscript else "torch", @@ -488,9 +489,7 @@ def run_tensorflow( "Run Tensorflow on {} with input shape {}".format(model_name, [batch_size, sequence_length]) ) - import random - - rng = random.Random() + rng = random.Random() # noqa: D102 values = [rng.randint(0, config.vocab_size - 1) for i in range(batch_size * sequence_length)] input_ids = tf.constant(values, shape=(batch_size, sequence_length), dtype=tf.int32) @@ -498,18 +497,18 @@ def run_tensorflow( # Disable both for better inference perf @run_with_tf_optimizations(do_eager_mode=False, use_xla=False) def encoder_forward(): - return model(input_ids, training=False) + return model(input_ids, training=False) # noqa: B023 @run_with_tf_optimizations(do_eager_mode=False, use_xla=False) def encoder_decoder_forward(): - return model(input_ids, decoder_input_ids=input_ids, training=False) + return model(input_ids, decoder_input_ids=input_ids, training=False) # noqa: B023 @run_with_tf_optimizations(do_eager_mode=False, use_xla=False) def lxmert_forward(): - feats = tf.random.normal([1, 1, config.visual_feat_dim]) - pos = tf.random.normal([1, 1, config.visual_pos_dim]) - return model( - input_ids, + feats = tf.random.normal([1, 1, config.visual_feat_dim]) # noqa: B023 + pos = tf.random.normal([1, 1, config.visual_pos_dim]) # noqa: B023 + return model( # noqa: B023 + input_ids, # noqa: B023 visual_feats=feats, visual_pos=pos, training=False, @@ -523,7 +522,7 @@ def lxmert_forward(): inference() - runtimes = timeit.repeat(lambda: inference(), repeat=repeat_times, number=1) + runtimes = timeit.repeat(lambda: inference(), repeat=repeat_times, number=1) # noqa: B023 result = { "engine": "tensorflow", diff --git a/onnxruntime/python/tools/transformers/fusion_embedlayer.py b/onnxruntime/python/tools/transformers/fusion_embedlayer.py index f4ae184bdf825..42c7faa37d649 100644 --- a/onnxruntime/python/tools/transformers/fusion_embedlayer.py +++ b/onnxruntime/python/tools/transformers/fusion_embedlayer.py @@ -112,7 +112,7 @@ def check_attention_subgraph( logger.debug("No Attention like subgraph in children of LayerNormalization") return False else: - if children_types != ["Add", "MatMul", "MatMul", "MatMul",] and children_types != [ + if children_types != ["Add", "MatMul", "MatMul", "MatMul"] and children_types != [ "MatMul", "MatMul", "MatMul", @@ -233,7 +233,7 @@ def match_position_embedding_roberta(self, position_embedding_gather, input_ids, return False def match_position_embedding_bert(self, position_embedding_gather, input_ids, output_name_to_node): - """ Match position embedding path from input_ids to Gather for BERT. + r"""Match position embedding path from input_ids to Gather for BERT. BERT Embedding Layer Pattern: (input_ids) diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py index 9fdadbba623a0..265e76cf6e4d0 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_parity.py @@ -26,7 +26,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import get_ort_environment_variables, setup_logger +from benchmark_helper import get_ort_environment_variables, setup_logger # noqa: E402 logger = logging.getLogger("") diff --git a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py index e41834a323a3d..fe62d0889a25f 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py +++ b/onnxruntime/python/tools/transformers/models/longformer/benchmark_longformer.py @@ -51,7 +51,7 @@ import onnxruntime sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -import benchmark_helper +import benchmark_helper # noqa: E402 logger = logging.getLogger("") @@ -80,7 +80,7 @@ def test_torch_latency( input_list = inputs.to_list() _ = model(*input_list) - runtimes = timeit.repeat(lambda: model(*input_list), repeat=test_times, number=1) + runtimes = timeit.repeat(lambda: model(*input_list), repeat=test_times, number=1) # noqa: B023 result = { "engine": "torch", # TODO: test torchscript "version": torch.__version__, diff --git a/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py index 8e1f55d1384f2..d7ae07967274f 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/longformer/convert_to_onnx.py @@ -47,8 +47,8 @@ from transformers import LongformerModel, LongformerSelfAttention sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from onnx_model_bert import BertOnnxModel -from torch_onnx_export_helper import torch_onnx_export +from onnx_model_bert import BertOnnxModel # noqa: E402 +from torch_onnx_export_helper import torch_onnx_export # noqa: E402 # Supports format 0 or 1 weight_bias_format = 0 diff --git a/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py index 2c886c28e707a..109f739667f6d 100644 --- a/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py +++ b/onnxruntime/python/tools/transformers/models/longformer/generate_test_data.py @@ -15,8 +15,8 @@ from onnx import ModelProto, TensorProto sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data -from onnx_model import OnnxModel +from bert_test_data import fake_input_ids_data, fake_input_mask_data, output_test_data # noqa: E402 +from onnx_model import OnnxModel # noqa: E402 def parse_arguments(): diff --git a/tools/nuget/validate_package.py b/tools/nuget/validate_package.py index 5baa2f603c5d7..a8dbe1a955a8e 100644 --- a/tools/nuget/validate_package.py +++ b/tools/nuget/validate_package.py @@ -89,7 +89,7 @@ def check_if_dlls_are_present( platforms = platforms_supported.strip().split(",") if package_type == "tarball": file_list_in_package = list() - for (dirpath, dirnames, filenames) in os.walk(package_path): + for (dirpath, _, filenames) in os.walk(package_path): file_list_in_package += [os.path.join(dirpath, file) for file in filenames] else: file_list_in_package = zip_file.namelist() @@ -194,7 +194,7 @@ def validate_tarball(args): package_folder = re.search("(.*)[.].*", package_name).group(1) print("tar zxvf " + package_name) - os.system("tar zxvf " + package_name) + os.system("tar zxvf " + package_name) # noqa: DUO106 is_windows_ai_package = False zip_file = None @@ -276,7 +276,7 @@ def validate_nuget(args): # Make a copy of the Nuget package print("Copying [" + full_nuget_path + "] -> [" + nupkg_copy_name + "], and extracting its contents") - os.system("copy " + full_nuget_path + " " + nupkg_copy_name) + os.system("copy " + full_nuget_path + " " + nupkg_copy_name) # noqa: DUO106 # Convert nupkg to zip os.rename(nupkg_copy_name, zip_copy_name) From e55b6ff5e0001b561d35ccb96ae8c059a888ed1a Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 05:34:36 +0000 Subject: [PATCH 16/33] Fix --- .lintrunner.toml | 3 ++- onnxruntime/python/tools/transformers/float16.py | 2 +- onnxruntime/python/tools/transformers/onnx_model_tnlr.py | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.lintrunner.toml b/.lintrunner.toml index 92a94b03007ca..41a65bfebfaf2 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -57,7 +57,8 @@ exclude_patterns = [ 'tools/nuget/generate_nuspec_for_native_nuget.py', # FIXME: DUO116 'js/scripts/build_web.py', - + # FIXME: Too many errors + 'onnxruntime/python/tools/tensorrt/perf/**', ] command = [ 'python3', diff --git a/onnxruntime/python/tools/transformers/float16.py b/onnxruntime/python/tools/transformers/float16.py index 2768e340002ac..f6f550d833eb6 100644 --- a/onnxruntime/python/tools/transformers/float16.py +++ b/onnxruntime/python/tools/transformers/float16.py @@ -317,7 +317,7 @@ def convert_float_to_float16( queue = next_level - for key, value in fp32_initializers.items(): + for value in fp32_initializers.values(): # By default, to avoid precision loss, do not convert an initializer to fp16 when it is used only by fp32 nodes. if force_fp16_initializers or value.fp16_nodes: value.initializer = convert_tensor_float_to_float16(value.initializer, min_positive_val, max_finite_val) diff --git a/onnxruntime/python/tools/transformers/onnx_model_tnlr.py b/onnxruntime/python/tools/transformers/onnx_model_tnlr.py index dc8f6810914a7..a464c4af5d49c 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_tnlr.py +++ b/onnxruntime/python/tools/transformers/onnx_model_tnlr.py @@ -11,6 +11,8 @@ from onnx_model import OnnxModel from onnx_model_bert import BertOnnxModel +import numpy as np + logger = logging.getLogger(__name__) @@ -118,12 +120,12 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): [1, 1, 1, 0, 0, 0], ) if qkv_nodes is not None: - (_, _, matmul_below, reshape_qkv, transpose_qkv, matmul_qkv) = qkv_nodes + (_, _, _, reshape_qkv, transpose_qkv, matmul_qkv) = qkv_nodes else: return other_inputs = [] - for i, input in enumerate(start_node.input): + for input in start_node.input: if input not in output_name_to_node: continue From 635669ec96db8462cd5ed3f0390f9d91489d745d Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 05:38:51 +0000 Subject: [PATCH 17/33] typo --- onnxruntime/python/tools/transformers/benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index deac7dba72544..45fba6d3e0a93 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -489,7 +489,7 @@ def run_tensorflow( "Run Tensorflow on {} with input shape {}".format(model_name, [batch_size, sequence_length]) ) - rng = random.Random() # noqa: D102 + rng = random.Random() # noqa: DUO102 values = [rng.randint(0, config.vocab_size - 1) for i in range(batch_size * sequence_length)] input_ids = tf.constant(values, shape=(batch_size, sequence_length), dtype=tf.int32) From d1e55a480c0dddc4d0e7023d12d6872f6d142478 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 05:45:04 +0000 Subject: [PATCH 18/33] DUO102 --- .flake8 | 5 ++++- onnxruntime/python/tools/transformers/benchmark.py | 2 +- onnxruntime/python/tools/transformers/bert_test_data.py | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.flake8 b/.flake8 index 7831fd148230a..2e601fbe11088 100644 --- a/.flake8 +++ b/.flake8 @@ -3,4 +3,7 @@ max-line-length = 120 per-file-ignores = __init__.py:F401 # NOTE: Edit exclude list in .lintrunner.toml -ignore = W503, E203, E501 + +# Ignored codes: +# DUO102: We use random only for math +ignore = W503, E203, E501, DUO102 diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 45fba6d3e0a93..f5b3b69e00abc 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -489,7 +489,7 @@ def run_tensorflow( "Run Tensorflow on {} with input shape {}".format(model_name, [batch_size, sequence_length]) ) - rng = random.Random() # noqa: DUO102 + rng = random.Random() values = [rng.randint(0, config.vocab_size - 1) for i in range(batch_size * sequence_length)] input_ids = tf.constant(values, shape=(batch_size, sequence_length), dtype=tf.int32) diff --git a/onnxruntime/python/tools/transformers/bert_test_data.py b/onnxruntime/python/tools/transformers/bert_test_data.py index c89c99c593587..88b0f2f9bafdd 100644 --- a/onnxruntime/python/tools/transformers/bert_test_data.py +++ b/onnxruntime/python/tools/transformers/bert_test_data.py @@ -99,7 +99,7 @@ def fake_input_mask_data( ] if random_mask_length: - actual_seq_len = random.randint(int(sequence_length * 2 / 3), sequence_length) # noqa: DUO102 + actual_seq_len = random.randint(int(sequence_length * 2 / 3), sequence_length) data = np.zeros((batch_size, sequence_length), dtype=np.int32) temp = np.ones((batch_size, actual_seq_len), dtype=np.int32) data[: temp.shape[0], : temp.shape[1]] = temp @@ -172,7 +172,7 @@ def fake_test_data( assert input_ids is not None np.random.seed(random_seed) - random.seed(random_seed) # noqa: DUO102 + random.seed(random_seed) all_inputs = [] for _ in range(test_cases): From 213cfb626bd12de2df143a8c0196028e21ee1a87 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 17:34:08 +0000 Subject: [PATCH 19/33] Fix init --- orttraining/orttraining/python/training/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/orttraining/orttraining/python/training/__init__.py b/orttraining/orttraining/python/training/__init__.py index ef785166a1a6a..3b28514819b2e 100644 --- a/orttraining/orttraining/python/training/__init__.py +++ b/orttraining/orttraining/python/training/__init__.py @@ -2,15 +2,16 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- - +# isort:skip_file from onnxruntime.capi._pybind_state import PropagateCastOpsStrategy, TrainingParameters from onnxruntime.capi.training.training_session import TrainingSession -from . import amp, checkpoint, model_desc_validation, optim -from .orttrainer import ORTTrainer, TrainStepInfo - # Options need to be imported before `ORTTrainer`. from .orttrainer_options import ORTTrainerOptions +from .orttrainer import ORTTrainer, TrainStepInfo + +from . import amp, checkpoint, model_desc_validation, optim + try: from .ortmodule import ORTModule From 4b84d822a63620d50f60c2975dbce3df08dbf6e6 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 17:46:30 +0000 Subject: [PATCH 20/33] Fix the rest; fix training import order --- .../tools/transformers/bert_perf_test.py | 14 +++--- .../models/gpt2/convert_to_onnx.py | 4 +- .../transformers/models/gpt2/gpt2_helper.py | 40 +++++++++-------- .../transformers/models/gpt2/gpt2_tester.py | 6 +-- .../models/gpt2/parity_check_helper.py | 6 +-- .../transformers/models/t5/past_helper.py | 2 +- .../tools/transformers/models/t5/t5_helper.py | 43 +++++++++++-------- .../tools/transformers/onnx_exporter.py | 2 +- .../tools/transformers/onnx_model_bart.py | 2 +- .../tools/transformers/onnx_model_bert.py | 1 - .../tools/transformers/onnx_model_tnlr.py | 3 +- .../transformers/torch_onnx_export_helper.py | 2 +- .../github/android/build_aar_package.py | 4 +- tools/doc/rename_folders.py | 4 +- tools/python/example_operator_perf_test.py | 2 +- tools/python/gen_contrib_doc.py | 2 +- tools/python/util/run.py | 2 +- 17 files changed, 71 insertions(+), 68 deletions(-) diff --git a/onnxruntime/python/tools/transformers/bert_perf_test.py b/onnxruntime/python/tools/transformers/bert_perf_test.py index 022ee076770be..16f376bafeeae 100644 --- a/onnxruntime/python/tools/transformers/bert_perf_test.py +++ b/onnxruntime/python/tools/transformers/bert_perf_test.py @@ -173,7 +173,7 @@ def onnxruntime_inference_with_io_binding(session, all_inputs, output_names, tes results = [] latency_list = [] device = "cuda" if test_setting.use_gpu else "cpu" - for test_case_id, inputs in enumerate(all_inputs): + for inputs in all_inputs: result = session.run(output_names, inputs) results.append(result) outputs = {} @@ -201,7 +201,7 @@ def onnxruntime_inference(session, all_inputs, output_names): results = [] latency_list = [] - for test_case_id, inputs in enumerate(all_inputs): + for inputs in all_inputs: start_time = timeit.default_timer() result = session.run(output_names, inputs) latency = timeit.default_timer() - start_time @@ -240,14 +240,12 @@ def run_one_test(model_setting, test_setting, perf_results, all_inputs, intra_op all_latency_list = [] if test_setting.use_io_binding: - for i in range(test_setting.test_times): - results, latency_list = onnxruntime_inference_with_io_binding( - session, all_inputs, output_names, test_setting - ) + for _ in range(test_setting.test_times): + _, latency_list = onnxruntime_inference_with_io_binding(session, all_inputs, output_names, test_setting) all_latency_list.extend(latency_list) else: - for i in range(test_setting.test_times): - results, latency_list = onnxruntime_inference(session, all_inputs, output_names) + for _ in range(test_setting.test_times): + _, latency_list = onnxruntime_inference(session, all_inputs, output_names) all_latency_list.extend(latency_list) # latency in miliseconds diff --git a/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py b/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py index 78e718e6e80c4..cb8e1a337de68 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/convert_to_onnx.py @@ -30,14 +30,14 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import ( +from benchmark_helper import ( # noqa: E402 Precision, create_onnxruntime_session, get_ort_environment_variables, prepare_environment, setup_logger, ) -from quantize_helper import QuantizeHelper +from quantize_helper import QuantizeHelper # noqa: E402 logger = logging.getLogger("") diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py index 250aa52a479c6..95f6a7dcaa000 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_helper.py @@ -13,7 +13,7 @@ import tempfile import time from pathlib import Path -from typing import Dict, List, Tuple, Union +from typing import Collection, Dict, List, Tuple, Union import numpy import onnx @@ -22,12 +22,12 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import Precision -from float16 import float_to_float16_max_diff -from fusion_options import AttentionMaskFormat -from io_binding_helper import IOBindingHelper -from onnx_model import OnnxModel -from torch_onnx_export_helper import torch_onnx_export +from benchmark_helper import Precision # noqa: E402 +from float16 import float_to_float16_max_diff # noqa: E402 +from fusion_options import AttentionMaskFormat # noqa: E402 +from io_binding_helper import IOBindingHelper # noqa: E402 +from onnx_model import OnnxModel # noqa: E402 +from torch_onnx_export_helper import torch_onnx_export # noqa: E402 logger = logging.getLogger(__name__) @@ -551,24 +551,26 @@ def optimize_onnx( @staticmethod def auto_mixed_precision( onnx_model: OnnxModel, - op_block_list: List[str] = [ - "Add", - "LayerNormalization", - "SkipLayerNormalization", - "FastGelu", - "EmbedLayerNormalization", - ], + op_block_list: Collection[str] = frozenset( + ( + "Add", + "LayerNormalization", + "SkipLayerNormalization", + "FastGelu", + "EmbedLayerNormalization", + ) + ), ): """Convert GPT-2 model to mixed precision. It detects whether original model has fp16 weights, and set parameters for float16 conversion automatically. Args: - onnx_model (OnnxModel): optimized ONNX model - op_block_list (List[str], optional): operators to compute in fp32. Defaults to ["Add", "LayerNormalization", - "SkipLayerNormalization", "FastGelu", "EmbedLayerNormalization"] + onnx_model: Optimized ONNX model + op_block_list: Operators to compute in fp32. Defaults to {"Add", "LayerNormalization", + "SkipLayerNormalization", "FastGelu", "EmbedLayerNormalization"}. Returns: - parameters(dict): a dictionary of parameters used in float16 conversion + A dictionary of parameters used in float16 conversion. """ - op_full_set = set([node.op_type for node in onnx_model.nodes()]) + op_full_set = set(node.op_type for node in onnx_model.nodes()) fp32_op_set = set(op_block_list) fp16_op_set = op_full_set.difference(fp32_op_set) logger.info(f"fp32 op: {fp32_op_set} fp16 op: {fp16_op_set}") diff --git a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_tester.py b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_tester.py index be303b4e188bf..b6e9a1406fb5c 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/gpt2_tester.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/gpt2_tester.py @@ -17,7 +17,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import Precision +from benchmark_helper import Precision # noqa: E402 logger = logging.getLogger(__name__) @@ -151,7 +151,7 @@ def __init__( 0, hidden_size // num_attention_heads, ] - for i in range(num_layer): + for _ in range(num_layer): empty_past = torch.empty(past_shape).type(torch.float16 if is_fp16 else torch.float32) self.past.append(empty_past.to(device)) @@ -194,7 +194,7 @@ def add_tensor(input_tensors, torch_tensor, name): f.write(tensor.SerializeToString()) output_names = [output.name for output in session.get_outputs()] - for i, name in enumerate(output_names): + for i, _ in enumerate(output_names): tensor = numpy_helper.from_array( output[i] if isinstance(output[i], numpy.ndarray) else output[i].clone().cpu().numpy() ) diff --git a/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py b/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py index d57ca9bca9a5b..87dc766628aab 100644 --- a/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py +++ b/onnxruntime/python/tools/transformers/models/gpt2/parity_check_helper.py @@ -19,7 +19,7 @@ sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from benchmark_helper import create_onnxruntime_session +from benchmark_helper import create_onnxruntime_session # noqa: E402 NON_ZERO_VALUE = str(1) ZERO_VALUE = str(0) @@ -137,8 +137,8 @@ def post_processing(outputs_path, outputs_path_other): dummy_inputs_fp32 = dummy_inputs_fp16.to_fp32() # Get GPT-2 model from huggingface using convert_to_onnx.py - os.system("python convert_to_onnx.py -m gpt2 --output gpt2_fp32.onnx -o -p fp32 --use_gpu") - os.system("python convert_to_onnx.py -m gpt2 --output gpt2_fp16.onnx -o -p fp16 --use_gpu") + os.system("python convert_to_onnx.py -m gpt2 --output gpt2_fp32.onnx -o -p fp32 --use_gpu") # noqa: DUO106 + os.system("python convert_to_onnx.py -m gpt2 --output gpt2_fp16.onnx -o -p fp16 --use_gpu") # noqa: DUO106 # Specify the directory to dump the node's I/O outputs_path_fp32_gpu = "./fp32_gpu" diff --git a/onnxruntime/python/tools/transformers/models/t5/past_helper.py b/onnxruntime/python/tools/transformers/models/t5/past_helper.py index fe113491067fd..d6b1d50f5a47f 100644 --- a/onnxruntime/python/tools/transformers/models/t5/past_helper.py +++ b/onnxruntime/python/tools/transformers/models/t5/past_helper.py @@ -38,7 +38,7 @@ def group_by_self_or_cross(present_key_values): """ present_self = [] present_cross = [] - for i, present_layer_i in enumerate(present_key_values): + for present_layer_i in present_key_values: assert len(present_layer_i) == 4, f"Expected to have four items. Got {len(present_layer_i)}" ( present_key_self, diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_helper.py b/onnxruntime/python/tools/transformers/models/t5/t5_helper.py index 4d853a6544ef0..56f19c7407931 100644 --- a/onnxruntime/python/tools/transformers/models/t5/t5_helper.py +++ b/onnxruntime/python/tools/transformers/models/t5/t5_helper.py @@ -8,7 +8,7 @@ import os import sys from pathlib import Path -from typing import Dict, List, Union +from typing import Collection, Dict, Union import torch from t5_decoder import T5Decoder, T5DecoderHelper, T5DecoderInit @@ -19,9 +19,9 @@ from onnxruntime import InferenceSession sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) -from float16 import float_to_float16_max_diff -from onnx_model import OnnxModel -from optimizer import optimize_model +from float16 import float_to_float16_max_diff # noqa: E402 +from onnx_model import OnnxModel # noqa: E402 +from optimizer import optimize_model # noqa: E402 logger = logging.getLogger(__name__) @@ -150,26 +150,31 @@ def export_onnx( @staticmethod def auto_mixed_precision( onnx_model: OnnxModel, - op_block_list: List[str] = [ - "Pow", - "ReduceMean", - "Add", - "Sqrt", - "Div", - "Mul", - "Softmax", - "Relu", - ], + op_block_list: Collection[str] = frozenset( + ( + "Pow", + "ReduceMean", + "Add", + "Sqrt", + "Div", + "Mul", + "Softmax", + "Relu", + ) + ), ): """Convert model to mixed precision. - It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically. + + It detects whether original model has fp16 precision weights, + and set parameters for float16 conversion automatically. + Args: - onnx_model (OnnxModel): optimized ONNX model - op_block_list (List[str], optional): . Defaults to ["Pow", "ReduceMean", "Add", "Sqrt", "Div", "Mul", "Softmax", "Relu"] + onnx_model: Optimized ONNX model + op_block_list: Defaults to {"Pow", "ReduceMean", "Add", "Sqrt", "Div", "Mul", "Softmax", "Relu"} Returns: - parameters(dict): a dictionary of parameters used in float16 conversion + parameters: A dictionary of parameters used in float16 conversion """ - op_full_set = set([node.op_type for node in onnx_model.nodes()]) + op_full_set = set(node.op_type for node in onnx_model.nodes()) fp32_op_set = set(op_block_list) fp16_op_set = op_full_set.difference(fp32_op_set) logger.info(f"fp32 op: {fp32_op_set} fp16 op: {fp16_op_set}") diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index c42e286922bbb..edb10a17832ba 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -19,7 +19,7 @@ from transformers import AutoConfig, AutoTokenizer, LxmertConfig, TransfoXLConfig sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) -from gpt2_helper import PRETRAINED_GPT2_MODELS, GPT2ModelNoPastState, TFGPT2ModelNoPastState +from gpt2_helper import PRETRAINED_GPT2_MODELS, GPT2ModelNoPastState, TFGPT2ModelNoPastState # noqa: E402 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" diff --git a/onnxruntime/python/tools/transformers/onnx_model_bart.py b/onnxruntime/python/tools/transformers/onnx_model_bart.py index 33db231c52332..df5c841938b90 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bart.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bart.py @@ -97,7 +97,7 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): return other_inputs = [] - for i, input in enumerate(normalize_node.input): + for input in normalize_node.input: if input not in output_name_to_node: continue if input == qkv_nodes[0].output[0]: diff --git a/onnxruntime/python/tools/transformers/onnx_model_bert.py b/onnxruntime/python/tools/transformers/onnx_model_bert.py index 590f00045b2d0..bb3218a5c0af0 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_bert.py +++ b/onnxruntime/python/tools/transformers/onnx_model_bert.py @@ -235,7 +235,6 @@ def use_dynamic_axes(self, dynamic_batch_dim="batch_size", dynamic_seq_len="max_ casted=True ) + self.get_graph_inputs_from_fused_nodes(casted=False) - {} for input in self.model.graph.input: if input.name in bert_graph_inputs: dim_proto = input.type.tensor_type.shape.dim[0] diff --git a/onnxruntime/python/tools/transformers/onnx_model_tnlr.py b/onnxruntime/python/tools/transformers/onnx_model_tnlr.py index a464c4af5d49c..5947672a43845 100644 --- a/onnxruntime/python/tools/transformers/onnx_model_tnlr.py +++ b/onnxruntime/python/tools/transformers/onnx_model_tnlr.py @@ -5,14 +5,13 @@ import logging from typing import Union +import numpy as np from fusion_attention import AttentionMask, FusionAttention from fusion_utils import NumpyHelper from onnx import NodeProto, TensorProto, helper, numpy_helper from onnx_model import OnnxModel from onnx_model_bert import BertOnnxModel -import numpy as np - logger = logging.getLogger(__name__) diff --git a/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py b/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py index 119455684cea1..997461befd198 100644 --- a/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py +++ b/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py @@ -4,9 +4,9 @@ # -------------------------------------------------------------------------- import torch +from packaging.version import Version TrainingMode = torch.onnx.TrainingMode -from packaging.version import Version def torch_onnx_export( diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index fac04797cfff1..5f6f753d1b33c 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -155,9 +155,9 @@ def _build_aar(args): # clean, build, and publish to a local directory subprocess.run(gradle_command + ["clean"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT) # noqa: DUO116 subprocess.run(gradle_command + ["build"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT) # noqa: DUO116 - subprocess.run( + subprocess.run( # noqa: DUO116 gradle_command + ["publish"], env=temp_env, shell=use_shell, check=True, cwd=JAVA_ROOT - ) # noqa: DUO116 + ) def parse_args(): diff --git a/tools/doc/rename_folders.py b/tools/doc/rename_folders.py index d65b8a350eed1..56b2a39357411 100644 --- a/tools/doc/rename_folders.py +++ b/tools/doc/rename_folders.py @@ -13,7 +13,7 @@ def rename_folder(root): Returns the list of renamed folders. """ found = [] - for r, dirs, files in os.walk(root): + for r, dirs, _ in os.walk(root): for name in dirs: if name.startswith("_"): found.append((r, name)) @@ -35,7 +35,7 @@ def replace_files(root, renamed): subs = {r[1]: r[2] for r in renamed} reg = re.compile('(\\"[a-zA-Z0-9\\.\\/\\?\\:@\\-_=#]+\\.([a-zA-Z]){2,6}' '([a-zA-Z0-9\\.\\&\\/\\?\\:@\\-_=#])*\\")') - for r, dirs, files in os.walk(root): + for r, _, files in os.walk(root): for name in files: if os.path.splitext(name)[-1] != ".html": continue diff --git a/tools/python/example_operator_perf_test.py b/tools/python/example_operator_perf_test.py index 50a3edd5c9b27..41c0d605bc636 100644 --- a/tools/python/example_operator_perf_test.py +++ b/tools/python/example_operator_perf_test.py @@ -99,7 +99,7 @@ def run_test(): # run the model and measure time after 'iters' calls while total < num_seconds: start = time.time_ns() - for i in range(iters): + for _ in range(iters): # ignore the outputs as we're not validating them in a performance test sess.run(None, inputs) end = time.time_ns() diff --git a/tools/python/gen_contrib_doc.py b/tools/python/gen_contrib_doc.py index 15e7f65d093d9..6604a433ac528 100644 --- a/tools/python/gen_contrib_doc.py +++ b/tools/python/gen_contrib_doc.py @@ -361,7 +361,7 @@ def main(output_path: str, domain_filter: [str]): fout.write(s) for _, namemap in supportmap: - for n, schema, versions in namemap: + for n, schema, _ in namemap: s = ' * {}{}\n'.format( support_level_str(schema.support_level), format_name_with_domain(domain, n), diff --git a/tools/python/util/run.py b/tools/python/util/run.py index c3a389233ff72..98724ae956402 100644 --- a/tools/python/util/run.py +++ b/tools/python/util/run.py @@ -46,7 +46,7 @@ def run( def output(is_stream_captured): return subprocess.PIPE if is_stream_captured else (subprocess.DEVNULL if quiet else None) - completed_process = subprocess.run( + completed_process = subprocess.run( # noqa: DUO116 cmd, cwd=cwd, check=check, From a35bfa2e53b5f6f735d6a56f1245c436acfa4d98 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 18:04:18 +0000 Subject: [PATCH 21/33] Remove unused imports --- .../orttraining/eager/opgen/onnxgen.py | 6 +- .../orttraining/eager/opgen/opgen/atenops.py | 2 - .../eager/opgen/opgen/generator.py | 6 +- .../eager/opgen/opgen_test/lexer_test.py | 2 +- .../test/linux_only_ortmodule_eager_test.py | 4 +- .../orttraining/eager/test/ort_eps_test.py | 10 ++-- .../orttraining/eager/test/ort_init.py | 2 - orttraining/orttraining/eager/test/ort_ops.py | 25 ++++---- .../orttraining/eager/test/ort_tensor.py | 5 +- .../test_model_OrtModule/mnist_fc_training.py | 6 +- .../orttraining/eager/test_models/mnist_fc.py | 5 -- .../eager/test_models/mnist_fc_training.py | 2 +- .../python/deprecated/training_session.py | 3 - orttraining/orttraining/python/ort_trainer.py | 3 +- .../orttraining/python/training/_utils.py | 2 - .../python/training/experimental/exporter.py | 2 +- .../gradient_graph/_gradient_graph_tools.py | 2 +- .../python/training/optim/_modifier.py | 2 +- .../_custom_autograd_function_runner.py | 2 +- .../training/ortmodule/_execution_agent.py | 2 +- .../python/training/ortmodule/_fallback.py | 1 - .../ortmodule/_graph_execution_manager.py | 4 +- .../python/training/ortmodule/_io.py | 4 +- .../python/training/ortmodule/_utils.py | 1 - .../python/training/ortmodule/ortmodule.py | 1 - .../cpu/torch_interop_utils/setup.py | 2 +- .../cuda/fused_ops/setup.py | 2 - .../orttraining/python/training/orttrainer.py | 4 +- .../python/training/orttrainer_options.py | 1 - .../python/training/postprocess.py | 7 +-- .../test/external_custom_ops/setup.py | 1 - .../test/external_custom_ops/test.py | 8 +-- .../test/external_transformers_test.py | 6 +- .../orttraining/test/python/_test_helpers.py | 3 +- .../orttraining/test/python/launch_test.py | 1 - .../python/onnxruntime_test_postprocess.py | 10 +--- .../python/orttraining_run_bert_pretrain.py | 12 +--- .../test/python/orttraining_run_glue.py | 10 +--- .../python/orttraining_run_multiple_choice.py | 7 --- .../orttraining_test_bert_postprocess.py | 3 +- .../python/orttraining_test_debuggability.py | 13 ----- ...orttraining_test_hierarchical_ortmodule.py | 1 - .../python/orttraining_test_ortmodule_api.py | 58 +++++++++---------- .../orttraining_test_ortmodule_autograd.py | 4 +- ...rttraining_test_ortmodule_autograd_dist.py | 2 - ...training_test_ortmodule_bert_classifier.py | 1 - ...test_ortmodule_bert_classifier_autocast.py | 9 +-- ...t_ortmodule_deepspeed_pipeline_parallel.py | 7 +-- ...test_ortmodule_experimental_json_config.py | 28 ++++----- ...t_ortmodule_fairscale_sharded_optimizer.py | 4 +- .../orttraining_test_ortmodule_fallback.py | 4 +- .../orttraining_test_ortmodule_pytorch_ddp.py | 2 - ...ttraining_test_orttrainer_bert_toy_onnx.py | 12 +--- ...ng_test_orttrainer_checkpoint_functions.py | 6 +- .../orttraining_test_orttrainer_frontend.py | 24 ++++---- .../python/orttraining_test_transformers.py | 7 +-- .../test/python/orttraining_test_utils.py | 5 +- .../python/orttraining_transformer_trainer.py | 16 ++--- .../test/python/utils_multiple_choice.py | 4 +- .../mnist_training.py | 2 - orttraining/tools/amdgpu/script/rocprof.py | 3 - orttraining/tools/ci_test/compare_results.py | 2 - .../ci_test/download_azure_blob_archive.py | 2 - orttraining/tools/scripts/experiment.py | 9 +-- .../tools/scripts/gpt2_model_transform.py | 4 +- orttraining/tools/scripts/model_transform.py | 2 +- .../tools/scripts/nv_run_pretraining.py | 12 +--- .../tools/scripts/opset12_model_transform.py | 2 +- .../tools/scripts/pipeline_model_split.py | 5 +- orttraining/tools/scripts/watch_experiment.py | 4 +- tools/python/gen_contrib_doc.py | 48 +++++++-------- tools/python/sparsify_initializers.py | 8 +-- 72 files changed, 166 insertions(+), 325 deletions(-) diff --git a/orttraining/orttraining/eager/opgen/onnxgen.py b/orttraining/orttraining/eager/opgen/onnxgen.py index e46e8eb73fac2..e42956551ca79 100755 --- a/orttraining/orttraining/eager/opgen/onnxgen.py +++ b/orttraining/orttraining/eager/opgen/onnxgen.py @@ -47,7 +47,7 @@ def write(s): def writeline(s=""): fp.write(s + "\n") - writeline(f"# AUTO-GENERATED CODE! - DO NOT EDIT!") + writeline("# AUTO-GENERATED CODE! - DO NOT EDIT!") writeline(f'# $ python {" ".join(argv)}') writeline() @@ -56,11 +56,11 @@ def writeline(s=""): for op_name, schema in sorted(onnx_ops.items()): writeline(f"class {schema.name}(ONNXOp):") - writeline(f' """') + writeline(' """') doc_str = schema.doc.strip("\r\n") for doc_line in str.splitlines(doc_str, keepends=False): writeline(f" {doc_line}") - writeline(f' """') + writeline(' """') writeline() write(" def __init__(self") diff --git a/orttraining/orttraining/eager/opgen/opgen/atenops.py b/orttraining/orttraining/eager/opgen/opgen/atenops.py index b8b54b5d6ce84..10fc971894302 100644 --- a/orttraining/orttraining/eager/opgen/opgen/atenops.py +++ b/orttraining/orttraining/eager/opgen/opgen/atenops.py @@ -1,5 +1,3 @@ -from copy import deepcopy - import torch from opgen.generator import MakeTorchFallback, ONNXOp, SignatureOnly from opgen.onnxops import * diff --git a/orttraining/orttraining/eager/opgen/opgen/generator.py b/orttraining/orttraining/eager/opgen/opgen/generator.py index 5497d01d3fcdd..9926891f395ed 100644 --- a/orttraining/orttraining/eager/opgen/opgen/generator.py +++ b/orttraining/orttraining/eager/opgen/opgen/generator.py @@ -17,7 +17,7 @@ def __init__(self, count: int): self.name = None def __str__(self): - return self.name if self.name else f"" + return self.name if self.name else "" class AttrType: @@ -282,7 +282,7 @@ def _write_function_body_onnx_op_node_attributes(self, writer, onnx_op, attrs, a if attr.type.startswith("at::ScalarType::"): writer.write(f", {attr.type}") elif attr.type == AttrType.TENSOR: - writer.write(f", true") + writer.write(", true") elif attr.type != AttrType.STRING: raise FunctionGenerationError( cpp_func, @@ -432,7 +432,7 @@ def _write_function_body_return_multiple(self, writer, cpp_func, in_place_params isinstance(cpp_func.return_type, ast.TemplateType) and cpp_func.return_type.identifier_tokens[-1].value == "std::tuple" ): - raise Exception(f"") + raise Exception("") tensorRef = "Tensor&," * len(in_place_params) tensorRef = tensorRef[: len(tensorRef) - 1] writer.write(f"return std::tuple<{tensorRef}>(") diff --git a/orttraining/orttraining/eager/opgen/opgen_test/lexer_test.py b/orttraining/orttraining/eager/opgen/opgen_test/lexer_test.py index cdbe6bf68c5c0..dbe9289fdb21e 100644 --- a/orttraining/orttraining/eager/opgen/opgen_test/lexer_test.py +++ b/orttraining/orttraining/eager/opgen/opgen_test/lexer_test.py @@ -3,7 +3,7 @@ import unittest -from opgen.lexer import Lexer, SourceLocation, StringReader, Token, TokenKind +from opgen.lexer import Lexer, StringReader, Token, TokenKind class LexerTestCase(unittest.TestCase): diff --git a/orttraining/orttraining/eager/test/linux_only_ortmodule_eager_test.py b/orttraining/orttraining/eager/test/linux_only_ortmodule_eager_test.py index 17318710850ed..fa9cd109e8eac 100644 --- a/orttraining/orttraining/eager/test/linux_only_ortmodule_eager_test.py +++ b/orttraining/orttraining/eager/test/linux_only_ortmodule_eager_test.py @@ -1,7 +1,5 @@ -import os import unittest -import numpy as np import torch import torch.nn as nn import torch.nn.functional as F @@ -62,7 +60,7 @@ def test_ortmodule_inference(self): with torch.no_grad(): data = torch.rand(batch_size, input_size) - y = model(data.to(device)) + model(data.to(device)) print("Done") @unittest.skip("Test fails with newest pytorch version.") diff --git a/orttraining/orttraining/eager/test/ort_eps_test.py b/orttraining/orttraining/eager/test/ort_eps_test.py index a0f10eb115da9..bdfcb68d01efa 100644 --- a/orttraining/orttraining/eager/test/ort_eps_test.py +++ b/orttraining/orttraining/eager/test/ort_eps_test.py @@ -2,7 +2,6 @@ # Licensed under the MIT License. import os -import sys import unittest import onnxruntime_pybind11_state as torch_ort @@ -16,7 +15,6 @@ def is_windows(): import sys import threading import time -from io import StringIO class OutputGrabber(object): @@ -108,22 +106,22 @@ def test_import_custom_eps(self): # capture std out with OutputGrabber() as out: torch_ort.set_device(1, "TestExecutionProvider", {"device_id": "0", "some_config": "val"}) - ort_device = torch_ort.device(1) + torch_ort.device(1) assert "My EP provider created, with device id: 0, some_option: val" in out.capturedtext with OutputGrabber() as out: torch_ort.set_device(2, "TestExecutionProvider", {"device_id": "1", "some_config": "val"}) - ort_device = torch_ort.device(1) + torch_ort.device(1) assert "My EP provider created, with device id: 1, some_option: val" in out.capturedtext # test the reusing EP instance with OutputGrabber() as out: torch_ort.set_device(3, "TestExecutionProvider", {"device_id": "0", "some_config": "val"}) - ort_device = torch_ort.device(1) + torch_ort.device(1) assert "My EP provider created, with device id: 0, some_option: val" not in out.capturedtext # test clear training ep instance pool torch_ort.clear_training_ep_instances() with OutputGrabber() as out: torch_ort.set_device(3, "TestExecutionProvider", {"device_id": "0", "some_config": "val"}) - ort_device = torch_ort.device(1) + torch_ort.device(1) assert "My EP provider created, with device id: 0, some_option: val" in out.capturedtext @unittest.skip("Test fails with newest pytorch version.") diff --git a/orttraining/orttraining/eager/test/ort_init.py b/orttraining/orttraining/eager/test/ort_init.py index 9876fd506ede8..e8f1e7f2bf88a 100644 --- a/orttraining/orttraining/eager/test/ort_init.py +++ b/orttraining/orttraining/eager/test/ort_init.py @@ -23,8 +23,6 @@ def ort_alloc(): with self.assertRaises(BaseException): ort_alloc() - import onnxruntime_pybind11_state as torch_ort - ort_alloc() self.assertIn(config_match, torch._C._show_config()) diff --git a/orttraining/orttraining/eager/test/ort_ops.py b/orttraining/orttraining/eager/test/ort_ops.py index 2473a85c1902a..e9ea3822c3fee 100644 --- a/orttraining/orttraining/eager/test/ort_ops.py +++ b/orttraining/orttraining/eager/test/ort_ops.py @@ -5,7 +5,6 @@ import unittest -import numpy as np import onnxruntime_pybind11_state as torch_ort import torch from parameterized import param, parameterized @@ -643,7 +642,7 @@ def test_op_out(self, test_name, tensor_test=torch.rand(6)): self.skipTest(f" {test_name}_output Fails - skipping for now") device = self.get_device() cpu_tensor = tensor_test - ort_tensor = cpu_tensor.to(device) + cpu_tensor.to(device) cpu_out_tensor = torch.tensor([], dtype=tensor_test.dtype) ort_out_tensor = cpu_out_tensor.to(device) @@ -663,9 +662,9 @@ def test_op_out(self, test_name, tensor_test=torch.rand(6)): def test_op_tensor(self, math_sign_ops): device = self.get_device() cpu_a = torch.Tensor([1.0, 1.5, 2.0, 3.5]) - ort_a = cpu_a.to(device) + cpu_a.to(device) cpu_b = torch.Tensor([1.0, 1.4, 2.1, 2.4]) - ort_b = cpu_b.to(device) + cpu_b.to(device) for tensor_type in {torch.float, torch.bool}: cpu_out_tensor = torch.tensor([], dtype=tensor_type) @@ -687,13 +686,11 @@ def test_op_scalar(self, math_sign_ops): cpu_scalar_int_lt = torch.scalar_tensor(2, dtype=torch.int) cpu_scalar_int_gt = torch.scalar_tensor(0, dtype=torch.int) cpu_tensor_float = torch.tensor([1.1, 1.1], dtype=torch.float32) - float_lt = 1.0 - float_gt = 1.2 - ort_tensor_int = cpu_tensor_int.to(device) - ort_scalar_int_lt = cpu_scalar_int_lt.to(device) - ort_scalar_int_gt = cpu_scalar_int_gt.to(device) - ort_tensor_float = cpu_tensor_float.to(device) + cpu_tensor_int.to(device) + cpu_scalar_int_lt.to(device) + cpu_scalar_int_gt.to(device) + cpu_tensor_float.to(device) # compare int to int, float to float - ort only supports same type at the moment cpu_out_tensor = torch.tensor([], dtype=torch.bool) @@ -746,9 +743,9 @@ def test_op_scalar(self, math_sign_ops): def test_op_binary_tensor(self, binary_op, op_sign, alpha_supported): device = self.get_device() cpu_input = torch.rand(3, 1) # use broadcasting in the second dim. - ort_input = cpu_input.to(device) + cpu_input.to(device) cpu_other = torch.rand(3, 3) - ort_other = cpu_other.to(device) + cpu_other.to(device) # verify op_sign works cpu_result = eval(compile("cpu_input " + op_sign + " cpu_other", "", "eval")) @@ -785,9 +782,7 @@ def test_op_binary_tensor(self, binary_op, op_sign, alpha_supported): def test_op_binary_scalar(self, binary_op, op_sign, alpha_supported): device = self.get_device() cpu_input = torch.ones(3, 3) - ort_input = cpu_input.to(device) - cpu_other = 3.1 - ort_other = 3.1 + cpu_input.to(device) # verify op_sign works cpu_result = eval(compile("cpu_input " + op_sign + " cpu_other", "", "eval")) diff --git a/orttraining/orttraining/eager/test/ort_tensor.py b/orttraining/orttraining/eager/test/ort_tensor.py index 5f399f9e8a2e5..e4d94f137da77 100644 --- a/orttraining/orttraining/eager/test/ort_tensor.py +++ b/orttraining/orttraining/eager/test/ort_tensor.py @@ -3,7 +3,6 @@ import unittest -import onnxruntime_pybind11_state as torch_ort import torch @@ -50,7 +49,7 @@ def test_stride(self): ort_ones = cpu_ones.to("ort") y = torch.as_strided(ort_ones, (2, 2), (1, 2)) assert y.size() == (2, 2) - assert y.is_contiguous() == False + assert y.is_contiguous() is False contiguous_y = y.contiguous() w = torch.ones((2, 3)) ort_w = w.to("ort") @@ -66,7 +65,7 @@ def test_slice(self): ort_ones = cpu_ones.to("ort") y_cpu = cpu_ones[0:128, :128] y = ort_ones[0:128, :128] - assert y.is_contiguous() == False + assert y.is_contiguous() is False assert y.size() == (128, 128) assert torch.allclose(y.cpu(), y_cpu) diff --git a/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py b/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py index a5977ed2b9e21..b40cc228a4e5f 100644 --- a/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py +++ b/orttraining/orttraining/eager/test_model_OrtModule/mnist_fc_training.py @@ -6,9 +6,7 @@ import argparse -import os -import numpy as np import torch import torch.nn as nn import torch.nn.functional as F @@ -84,7 +82,7 @@ def main(): ) args = parser.parse_args() - use_cuda = not args.no_cuda and torch.cuda.is_available() + not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) @@ -100,7 +98,7 @@ def main(): shuffle=True, **kwargs, ) - test_loader = torch.utils.data.DataLoader( + torch.utils.data.DataLoader( datasets.MNIST( "./data", train=False, diff --git a/orttraining/orttraining/eager/test_models/mnist_fc.py b/orttraining/orttraining/eager/test_models/mnist_fc.py index 26a91f957b5f8..6a2c03785b4cb 100644 --- a/orttraining/orttraining/eager/test_models/mnist_fc.py +++ b/orttraining/orttraining/eager/test_models/mnist_fc.py @@ -1,11 +1,6 @@ -import argparse -import os - -import numpy as np import onnxruntime_pybind11_state as torch_ort import torch import torch.nn as nn -import torch.nn.functional as F class NeuralNet(nn.Module): diff --git a/orttraining/orttraining/eager/test_models/mnist_fc_training.py b/orttraining/orttraining/eager/test_models/mnist_fc_training.py index a7bb6e57fcd9f..744a264e87cfb 100644 --- a/orttraining/orttraining/eager/test_models/mnist_fc_training.py +++ b/orttraining/orttraining/eager/test_models/mnist_fc_training.py @@ -98,7 +98,7 @@ def main(): shuffle=True, **kwargs, ) - test_loader = torch.utils.data.DataLoader( + torch.utils.data.DataLoader( datasets.MNIST( dataset_root_dir, train=False, diff --git a/orttraining/orttraining/python/deprecated/training_session.py b/orttraining/orttraining/python/deprecated/training_session.py index 61408485a0f86..37ec8552a2acf 100644 --- a/orttraining/orttraining/python/deprecated/training_session.py +++ b/orttraining/orttraining/python/deprecated/training_session.py @@ -3,13 +3,10 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -import os -import sys from onnxruntime.capi import _pybind_state as C from onnxruntime.capi.onnxruntime_inference_collection import ( InferenceSession, - IOBinding, Session, check_and_normalize_provider_args, ) diff --git a/orttraining/orttraining/python/ort_trainer.py b/orttraining/orttraining/python/ort_trainer.py index b1901380f9313..8c4fe6d8004ac 100644 --- a/orttraining/orttraining/python/ort_trainer.py +++ b/orttraining/orttraining/python/ort_trainer.py @@ -264,7 +264,6 @@ def forward(self, *inputs): # *inputs is given by torch trace. It is in the order of input_names. # model_ takes input in a order (which can be obtained via inspect.signature(model.forward)) different than input_names. sig = inspect.signature(self.model_.forward) - ordered_list_keys = list(sig.parameters.keys()) input_dict = {} for key in sig.parameters.keys(): @@ -556,7 +555,7 @@ def create_ort_training_session_with_optimizer( def save_checkpoint( model, checkpoint_dir, checkpoint_prefix="ORT_checkpoint", checkpoint_state_dict=None, include_optimizer_state=True ): - if checkpoint_state_dict == None: + if checkpoint_state_dict is None: checkpoint_state_dict = {"model": model.state_dict(include_optimizer_state)} else: checkpoint_state_dict.update({"model": model.state_dict(include_optimizer_state)}) diff --git a/orttraining/orttraining/python/training/_utils.py b/orttraining/orttraining/python/training/_utils.py index 099a29764839f..657cdd8e9937e 100644 --- a/orttraining/orttraining/python/training/_utils.py +++ b/orttraining/orttraining/python/training/_utils.py @@ -6,11 +6,9 @@ import importlib.util import os import sys -from functools import wraps import numpy as np import torch -from onnx import TensorProto from packaging.version import Version diff --git a/orttraining/orttraining/python/training/experimental/exporter.py b/orttraining/orttraining/python/training/experimental/exporter.py index 8c5ccd1119576..8a779cafabec1 100644 --- a/orttraining/orttraining/python/training/experimental/exporter.py +++ b/orttraining/orttraining/python/training/experimental/exporter.py @@ -4,7 +4,7 @@ def _export_jit_graph_to_onnx_model_proto(graph: torch._C.Graph, operator_export_type: int): - from torch.onnx.symbolic_helper import _set_onnx_shape_inference, _set_operator_export_type, _set_opset_version + from torch.onnx.symbolic_helper import _set_onnx_shape_inference, _set_operator_export_type _set_onnx_shape_inference(True) _set_operator_export_type(operator_export_type) diff --git a/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py b/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py index b1b5257e28c28..3f163e2417a29 100644 --- a/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py +++ b/orttraining/orttraining/python/training/experimental/gradient_graph/_gradient_graph_tools.py @@ -1,6 +1,6 @@ import io from pathlib import Path -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, Union import torch from torch.onnx import TrainingMode diff --git a/orttraining/orttraining/python/training/optim/_modifier.py b/orttraining/orttraining/python/training/optim/_modifier.py index 952e90ee431ee..b2ca6c9ec8c8b 100644 --- a/orttraining/orttraining/python/training/optim/_modifier.py +++ b/orttraining/orttraining/python/training/optim/_modifier.py @@ -34,7 +34,7 @@ def check_requirements(self, required_funcs, require_apex=False, require_torch_n from apex import amp if require_torch_non_finite_check is True: _ = torch._amp_foreach_non_finite_check_and_unscale_ - except Exception as _: + except Exception: warnings.warn("Skip modifying optimizer because of Apex or torch_non_finite_check not found.", UserWarning) return False diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py index 5ff0d217dd33d..8c678181c5c7b 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py @@ -10,7 +10,7 @@ from onnxruntime.training.ortmodule.torch_cpp_extensions import torch_interop_utils -from ._fallback import ORTModuleFallbackException, ORTModuleIOError, _FallbackManager, wrap_exception +from ._fallback import ORTModuleFallbackException, ORTModuleIOError, wrap_exception def wrap_as_dlpack_or_not(grad_flag, tensor_flag, inplace_flag, training_mode_flag, arg): diff --git a/orttraining/orttraining/python/training/ortmodule/_execution_agent.py b/orttraining/orttraining/python/training/ortmodule/_execution_agent.py index be2cf01b1f33b..dcdc854e8d59e 100644 --- a/orttraining/orttraining/python/training/ortmodule/_execution_agent.py +++ b/orttraining/orttraining/python/training/ortmodule/_execution_agent.py @@ -6,7 +6,7 @@ import onnxruntime from onnxruntime.capi import _pybind_state as C from onnxruntime.capi._pybind_state import TrainingAgent as C_TrainingAgent -from onnxruntime.capi.onnxruntime_inference_collection import IOBinding, OrtValue +from onnxruntime.capi.onnxruntime_inference_collection import IOBinding class ExecutionAgentOutput: # pylint: disable=R0903 diff --git a/orttraining/orttraining/python/training/ortmodule/_fallback.py b/orttraining/orttraining/python/training/ortmodule/_fallback.py index 59de6114ed8c7..a31a14dfe2e73 100644 --- a/orttraining/orttraining/python/training/ortmodule/_fallback.py +++ b/orttraining/orttraining/python/training/ortmodule/_fallback.py @@ -18,7 +18,6 @@ ORTModuleIOError, ORTModuleONNXModelException, ORTModuleTorchModelException, - wrap_exception, ) diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py index c48d16636d43b..4a61b2d9dfd0a 100644 --- a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py @@ -8,7 +8,7 @@ import io import os import warnings -from abc import ABC, abstractmethod +from abc import abstractmethod from enum import IntFlag from functools import reduce @@ -236,7 +236,7 @@ def execution_session_run_forward(execution_session, onnx_model, device, *inputs run_info: A _RunStateInfo which contains extra information about the execution of the graph """ - raise NotImplemented + raise NotImplementedError @abstractmethod def forward(self): diff --git a/orttraining/orttraining/python/training/ortmodule/_io.py b/orttraining/orttraining/python/training/ortmodule/_io.py index dd9220e5c082a..cf24a4241df6c 100644 --- a/orttraining/orttraining/python/training/ortmodule/_io.py +++ b/orttraining/orttraining/python/training/ortmodule/_io.py @@ -11,7 +11,7 @@ import torch -from ._fallback import ORTModuleIOError, ORTModuleONNXModelException, _FallbackManager, wrap_exception +from ._fallback import ORTModuleIOError, ORTModuleONNXModelException, wrap_exception from ._utils import warn_of_constant_inputs @@ -298,7 +298,7 @@ def __eq__(self, other): if not other: return False elif not isinstance(other, _TensorStub): - raise NotImplemented("_TensorStub must only be compared to another _TensorStub instance!") + raise NotImplementedError("_TensorStub must only be compared to another _TensorStub instance!") elif self.name != other.name: return False elif self.dtype != other.dtype: diff --git a/orttraining/orttraining/python/training/ortmodule/_utils.py b/orttraining/orttraining/python/training/ortmodule/_utils.py index 5423b653921b3..fcdfb83b89b2b 100644 --- a/orttraining/orttraining/python/training/ortmodule/_utils.py +++ b/orttraining/orttraining/python/training/ortmodule/_utils.py @@ -12,7 +12,6 @@ import traceback import types import warnings -from typing import List import numpy as np import torch diff --git a/orttraining/orttraining/python/training/ortmodule/ortmodule.py b/orttraining/orttraining/python/training/ortmodule/ortmodule.py index 8f2f3eb6e4eef..4540406ddfb66 100644 --- a/orttraining/orttraining/python/training/ortmodule/ortmodule.py +++ b/orttraining/orttraining/python/training/ortmodule/ortmodule.py @@ -16,7 +16,6 @@ from ._fallback import ORTModuleFallbackException, _FallbackManager, _FallbackPolicy from ._torch_module_factory import TorchModuleFactory from ._torch_module_ort import TorchModuleORT -from ._torch_module_pytorch import TorchModulePytorch from .debug_options import DebugOptions # Needed to override PyTorch methods diff --git a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils/setup.py b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils/setup.py index 0ab8a0c1899e2..9b705f514e1a1 100644 --- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils/setup.py +++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cpu/torch_interop_utils/setup.py @@ -5,7 +5,7 @@ import os -from setuptools import Extension, setup +from setuptools import setup from torch.utils import cpp_extension filename = os.path.join(os.path.dirname(__file__), "torch_interop_utils.cc") diff --git a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py index b73623c430525..7fd5a236b18d2 100644 --- a/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py +++ b/orttraining/orttraining/python/training/ortmodule/torch_cpp_extensions/cuda/fused_ops/setup.py @@ -3,9 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -import fileinput import os -import sys from setuptools import setup from torch.utils import cpp_extension diff --git a/orttraining/orttraining/python/training/orttrainer.py b/orttraining/orttraining/python/training/orttrainer.py index a19bfa2844aec..ee9c59472bedd 100644 --- a/orttraining/orttraining/python/training/orttrainer.py +++ b/orttraining/orttraining/python/training/orttrainer.py @@ -298,7 +298,7 @@ def save_as_onnx(self, path): def _check_model_export(self, input): from numpy.testing import assert_allclose - from onnx import TensorProto, helper, numpy_helper + from onnx import numpy_helper onnx_model_copy = copy.deepcopy(self._onnx_model) @@ -930,7 +930,7 @@ def _training_session_run_helper(self, is_train, inputs, inputs_desc, outputs_de # to move the data between device and host. # so output will be on the same device as input. try: - test_pt_device = torch.device(target_device) + torch.device(target_device) except Exception: # in this case, input/output must on CPU assert input.device.type == "cpu" diff --git a/orttraining/orttraining/python/training/orttrainer_options.py b/orttraining/orttraining/python/training/orttrainer_options.py index 6bc382c1afdc3..c4c54a57a4565 100644 --- a/orttraining/orttraining/python/training/orttrainer_options.py +++ b/orttraining/orttraining/python/training/orttrainer_options.py @@ -1,5 +1,4 @@ import cerberus -import torch import onnxruntime as ort diff --git a/orttraining/orttraining/python/training/postprocess.py b/orttraining/orttraining/python/training/postprocess.py index 6edc6db44ab47..3108880c06f46 100644 --- a/orttraining/orttraining/python/training/postprocess.py +++ b/orttraining/orttraining/python/training/postprocess.py @@ -1,11 +1,8 @@ -import os.path import struct -import sys -import numpy as np import onnx from onnx import * -from onnx import helper, numpy_helper +from onnx import helper def run_postprocess(model): @@ -168,7 +165,7 @@ def fix_expand_shape_pt_1_5(model): if n_shape.op_type != "Shape" or n_constant_g.op_type != "Constant": break n_input = n_shape.input[0] - if not n_input in model_inputs_names: + if n_input not in model_inputs_names: break n_input_candidates.append(n_input) diff --git a/orttraining/orttraining/test/external_custom_ops/setup.py b/orttraining/orttraining/test/external_custom_ops/setup.py index 2fb2e2243056d..6179a746493e4 100644 --- a/orttraining/orttraining/test/external_custom_ops/setup.py +++ b/orttraining/orttraining/test/external_custom_ops/setup.py @@ -3,7 +3,6 @@ import os import subprocess import sys -from subprocess import CalledProcessError import onnx import pybind11 diff --git a/orttraining/orttraining/test/external_custom_ops/test.py b/orttraining/orttraining/test/external_custom_ops/test.py index f3101434d5186..f7a2d38c4f185 100644 --- a/orttraining/orttraining/test/external_custom_ops/test.py +++ b/orttraining/orttraining/test/external_custom_ops/test.py @@ -1,19 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -import os -import sys import numpy as np -# Restore dlopen flags. -import orttraining_external_custom_ops - # Expose available (onnx::* and protobuf::*) symbols from onnxruntime to resolve references in # the custom ops shared library. Deepbind flag is required to avoid conflicts with other # instances of onnx/protobuf libraries. import onnxruntime +# Restore dlopen flags. + + so = onnxruntime.SessionOptions() sess = onnxruntime.InferenceSession("testdata/model.onnx", so) input = np.random.rand(2, 2).astype(np.float32) diff --git a/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py b/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py index f435bdd8f7d07..7c749135decb3 100644 --- a/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py +++ b/orttraining/orttraining/test/external_transformer/test/external_transformers_test.py @@ -81,13 +81,11 @@ def readOutput(self): import os import unittest -import numpy as np import torch import torch.nn as nn import torch.nn.functional as F -from onnxruntime.capi import _pybind_state as torch_ort_eager -from onnxruntime.training import optim, orttrainer, orttrainer_options +from onnxruntime.training import optim, orttrainer def my_loss(x, target): @@ -136,7 +134,7 @@ def test_external_graph_transformer_triggering(self): target = torch.randint(0, 10, (batch_size,)) with OutputGrabber() as out: - loss = model.train_step(data, target) + model.train_step(data, target) assert "******************Trigger Customized Graph Transformer: MyGraphTransformer!" in out.capturedtext diff --git a/orttraining/orttraining/test/python/_test_helpers.py b/orttraining/orttraining/test/python/_test_helpers.py index 95c3b58521a56..8b13bf14afd55 100644 --- a/orttraining/orttraining/test/python/_test_helpers.py +++ b/orttraining/orttraining/test/python/_test_helpers.py @@ -11,7 +11,6 @@ try: from onnxruntime.training.ortmodule import ORTModule from onnxruntime.training.ortmodule._fallback import ORTModuleInitException - from onnxruntime.training.ortmodule._graph_execution_manager_factory import GraphExecutionManagerFactory except ImportError: # Some pipelines do not contain ORTModule pass @@ -61,7 +60,7 @@ def assert_model_outputs(output_a, output_b, verbose=False, rtol=1e-7, atol=0): ) # for idx in range(len(output_a)): - assert_allclose(output_a, output_b, rtol=rtol, atol=atol, err_msg=f"Model output value mismatch") + assert_allclose(output_a, output_b, rtol=rtol, atol=atol, err_msg="Model output value mismatch") def assert_onnx_weights(model_a, model_b, verbose=False, rtol=1e-7, atol=0): diff --git a/orttraining/orttraining/test/python/launch_test.py b/orttraining/orttraining/test/python/launch_test.py index 3743e31229ce2..d3427b00a3061 100755 --- a/orttraining/orttraining/test/python/launch_test.py +++ b/orttraining/orttraining/test/python/launch_test.py @@ -4,7 +4,6 @@ import argparse import logging -import os import sys from _test_commons import run_subprocess diff --git a/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py b/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py index 83bd524e7d6f3..99e7698f07cb0 100644 --- a/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py +++ b/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py @@ -1,21 +1,15 @@ -import copy import os -import sys import unittest -import onnx -import pytest import torch import torch.nn as nn -import torch.nn.functional as F -from numpy.testing import assert_allclose, assert_array_equal from orttraining_test_bert_postprocess import postprocess_model from orttraining_test_data_loader import create_ort_test_dataloader from orttraining_test_transformers import BertForPreTraining, BertModelTest from orttraining_test_utils import map_optimizer_attributes import onnxruntime -from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer, generate_sample +from onnxruntime.capi.ort_trainer import IODescription, ModelDescription, ORTTrainer torch.manual_seed(1) onnxruntime.set_seed(1) @@ -47,7 +41,7 @@ def get_onnx_model( _extra_postprocess=_extra_postprocess, ) - train_output = model.train_step(*inputs) + model.train_step(*inputs) return model.onnx_model_ def count_all_nodes(self, model): diff --git a/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py b/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py index 2dba2cd5c86b4..17b25290c9b7f 100644 --- a/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py +++ b/orttraining/orttraining/test/python/orttraining_run_bert_pretrain.py @@ -31,7 +31,7 @@ import onnxruntime.capi.pt_patch from onnxruntime.training import amp, optim, orttrainer from onnxruntime.training.checkpoint import aggregate_checkpoints -from onnxruntime.training.optim import LinearWarmupLRScheduler, PolyWarmupLRScheduler +from onnxruntime.training.optim import LinearWarmupLRScheduler # we cannot make full convergence run in nightly pipeling because of its timeout limit, # max_steps is still needed to calculate learning rate. force_to_stop_max_steps is used to @@ -486,7 +486,6 @@ def do_pretrain(args): logger.info("Running training: Batch size = %d, initial LR = %f", args.train_batch_size, args.learning_rate) - most_recent_ckpts_paths = [] average_loss = 0.0 epoch = 0 training_steps = 0 @@ -689,7 +688,7 @@ def test_pretrain_zero(self): deepspeed_zero_stage=self.deepspeed_zero_stage, save_checkpoint=True, ) - train_loss = do_pretrain(args) + do_pretrain(args) # ensure all workers reach this point before loading the checkpointed state torch.distributed.barrier() @@ -726,12 +725,7 @@ def test_pretrain_zero(self): # calling unpublished get_mpi_context_xxx to get rank/size numbers. try: # In case ORT is not built with MPI/NCCL, there are no get_mpi_context_xxx internal apis. - from onnxruntime.capi._pybind_state import ( - get_mpi_context_local_rank, - get_mpi_context_local_size, - get_mpi_context_world_rank, - get_mpi_context_world_size, - ) + from onnxruntime.capi._pybind_state import get_mpi_context_local_rank, get_mpi_context_world_size has_get_mpi_context_internal_api = True except ImportError: diff --git a/orttraining/orttraining/test/python/orttraining_run_glue.py b/orttraining/orttraining/test/python/orttraining_run_glue.py index bd7b7f993ec06..a40d97e00be50 100644 --- a/orttraining/orttraining/test/python/orttraining_run_glue.py +++ b/orttraining/orttraining/test/python/orttraining_run_glue.py @@ -1,6 +1,5 @@ # adapted from run_glue.py of huggingface transformers -import dataclasses import logging import os import unittest @@ -24,15 +23,9 @@ ) import onnxruntime -from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer try: - from onnxruntime.capi._pybind_state import ( - get_mpi_context_local_rank, - get_mpi_context_local_size, - get_mpi_context_world_rank, - get_mpi_context_world_size, - ) + from onnxruntime.capi._pybind_state import get_mpi_context_local_rank, get_mpi_context_world_size has_get_mpi_context_internal_api = True except ImportError: @@ -40,7 +33,6 @@ pass -import torch from orttraining_transformer_trainer import ORTTransformerTrainer logger = logging.getLogger(__name__) diff --git a/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py b/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py index 5d0143867deb7..d7154de44a7c3 100644 --- a/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py +++ b/orttraining/orttraining/test/python/orttraining_run_multiple_choice.py @@ -1,7 +1,6 @@ # adapted from run_multiple_choice.py of huggingface transformers # https://github.com/huggingface/transformers/blob/master/examples/multiple-choice/run_multiple_choice.py -import dataclasses import logging import os import unittest @@ -9,24 +8,18 @@ from typing import Dict, Optional import numpy as np -import torch -from numpy.testing import assert_allclose -from orttraining_run_glue import verify_old_and_new_api_are_equal from orttraining_transformer_trainer import ORTTransformerTrainer from transformers import ( AutoConfig, AutoModelForMultipleChoice, AutoTokenizer, EvalPrediction, - HfArgumentParser, - Trainer, TrainingArguments, set_seed, ) from utils_multiple_choice import MultipleChoiceDataset, Split, SwagProcessor import onnxruntime -from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer logger = logging.getLogger(__name__) diff --git a/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py b/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py index 8894ea9835848..57238d35d51f6 100644 --- a/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py +++ b/orttraining/orttraining/test/python/orttraining_test_bert_postprocess.py @@ -1,5 +1,4 @@ -from orttraining_test_layer_norm_transform import layer_norm_transform -from orttraining_test_model_transform import add_expand_shape, add_name, fix_transpose +from orttraining_test_model_transform import add_name def postprocess_model(model): diff --git a/orttraining/orttraining/test/python/orttraining_test_debuggability.py b/orttraining/orttraining/test/python/orttraining_test_debuggability.py index 99c95ff09cecf..44c06287ffa88 100644 --- a/orttraining/orttraining/test/python/orttraining_test_debuggability.py +++ b/orttraining/orttraining/test/python/orttraining_test_debuggability.py @@ -1,21 +1,8 @@ -import inspect -import os - -import _test_helpers -import onnx import pytest import torch -import torchvision from _test_commons import _load_pytorch_transformer_model -from numpy.testing import assert_allclose from onnxruntime import set_seed -from onnxruntime.capi.ort_trainer import IODescription as Legacy_IODescription -from onnxruntime.capi.ort_trainer import LossScaler as Legacy_LossScaler -from onnxruntime.capi.ort_trainer import ModelDescription as Legacy_ModelDescription -from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer -from onnxruntime.training import TrainStepInfo, _utils, amp -from onnxruntime.training import model_desc_validation as md_val from onnxruntime.training import optim, orttrainer from onnxruntime.training import orttrainer_options as orttrainer_options diff --git a/orttraining/orttraining/test/python/orttraining_test_hierarchical_ortmodule.py b/orttraining/orttraining/test/python/orttraining_test_hierarchical_ortmodule.py index 42daff79bd7d1..1e7fc2df57895 100644 --- a/orttraining/orttraining/test/python/orttraining_test_hierarchical_ortmodule.py +++ b/orttraining/orttraining/test/python/orttraining_test_hierarchical_ortmodule.py @@ -8,7 +8,6 @@ import torch.nn.functional as F from torch.utils.checkpoint import checkpoint -from onnxruntime.training.ortmodule import ORTModule from onnxruntime.training.ortmodule.experimental.hierarchical_ortmodule import HierarchicalORTModule diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py index 7758603c484fc..753d28345315e 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py @@ -557,7 +557,7 @@ def test_forward_call_positional_and_keyword_arguments(): ], ) def test_compare_pytorch_forward_call_positional_and_keyword_arguments(forward_statement): - one = torch.FloatTensor([1]) + torch.FloatTensor([1]) model = NeuralNetSimplePositionalAndKeywordArguments() pytorch_result = eval(forward_statement + ".item()") @@ -679,7 +679,7 @@ def test_model_with_different_devices_same_session(): model.to(device) x = torch.randn(N, D_in, device=device) - y = model(x) + model(x) del os.environ["ORTMODULE_SKIPCHECK_POLICY"] @@ -1661,8 +1661,6 @@ def run_step(model, input): @pytest.mark.parametrize("input_shape", ([4, 2],)) def test_aten_argmax(input_shape): - import torch.nn.functional as F - class TopKGate(torch.nn.Module): def forward(self, input: torch.Tensor): indices = torch.argmax(input, dim=1) @@ -2046,7 +2044,7 @@ def run_step(backbone_layers, task_layers, x): _test_helpers.assert_gradients_match_and_reset_gradient(ort_model1, pt_model1) # Run task 2 - x2 = torch.randn(N, D_in, device=device) + torch.randn(N, D_in, device=device) pt_prediction = run_step(pt_model0, pt_model2, x1) ort_prediction = run_step(ort_model0, ort_model2, x1) @@ -2286,11 +2284,11 @@ def run_step(model, x1): ort_x1 = pt_x1.clone() with pytest.raises(Exception) as ex_info: - pt_y1 = run_step(pt_model, pt_x1) + run_step(pt_model, pt_x1) assert "modified by an inplace operation" in str(ex_info.value) with pytest.raises(Exception) as ex_info: - ort_y1 = run_step(ort_model, ort_x1) + run_step(ort_model, ort_x1) assert "modified by an inplace operation" in str(ex_info.value) @@ -2643,7 +2641,7 @@ def forward(self, x): ort_model = ORTModule(copy.deepcopy(pt_model)) with pytest.raises(RuntimeError) as runtime_error: ort_model(x) - assert f"Expected all tensors to be on the same device, but found at least two devices" in str( + assert "Expected all tensors to be on the same device, but found at least two devices" in str( runtime_error.value ) else: @@ -2674,7 +2672,7 @@ def forward(self, x): with pytest.raises(RuntimeError) as runtime_error: ort_model = ORTModule(copy.deepcopy(pt_model)) ort_model(x) - assert f"Expected all tensors to be on the same device, but found at least two devices" in str( + assert "Expected all tensors to be on the same device, but found at least two devices" in str( runtime_error.value ) else: @@ -2705,7 +2703,7 @@ def forward(self, x): ort_model = ORTModule(copy.deepcopy(pt_model)) with pytest.raises(RuntimeError) as runtime_error: ort_model(x) - assert f"Expected all tensors to be on the same device, but found at least two devices" in str( + assert "Expected all tensors to be on the same device, but found at least two devices" in str( runtime_error.value ) else: @@ -2736,7 +2734,7 @@ def forward(self, x): ort_model = ORTModule(copy.deepcopy(pt_model)) with pytest.raises(RuntimeError) as runtime_error: ort_model(x) - assert f"Expected all tensors to be on the same device, but found at least two devices" in str( + assert "Expected all tensors to be on the same device, but found at least two devices" in str( runtime_error.value ) else: @@ -2913,7 +2911,7 @@ def test_forward_data_and_model_on_different_devices(data_device, model_device): ort_model = ORTModule(model) # When exporting the model, ensure device is same between input data and model (else pytorch will raise while exporting) x = torch.randn(N, D_in, device=model_device) - output = ort_model(x) + ort_model(x) # Now that the model has been exported, feed in data from device other than the model device x = torch.randn(N, D_in, device=data_device) @@ -2923,7 +2921,7 @@ def test_forward_data_and_model_on_different_devices(data_device, model_device): # Fallback with pytest.raises(RuntimeError) as runtime_error: ort_model(x) - assert f"Expected all tensors to be on the same device, but found at least two devices" in str( + assert "Expected all tensors to be on the same device, but found at least two devices" in str( runtime_error.value ) else: @@ -3035,7 +3033,7 @@ def test_model_wrapped_inside_torch_no_grad(): # Make sure no exception is raised with torch.no_grad(): - output = model(x) + model(x) def test_model_initializer_requires_grad_changes_from_one_forward_to_next(): @@ -3179,7 +3177,7 @@ def test_state_dict(): pt_model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device) ort_model = ORTModule(copy.deepcopy(pt_model)) x = torch.randn(N, D_in, device=device) - y = x.clone() + x.clone() state_dict_ort = ort_model.state_dict() state_dict_pt = pt_model.state_dict() @@ -3208,7 +3206,7 @@ def test_load_state_dict(): pt_model = NeuralNetSinglePositionalArgument(D_in, H, D_out).to(device) ort_model = ORTModule(copy.deepcopy(pt_model)) x = torch.randn(N, D_in, device=device) - y = x.clone() + x.clone() state_dict_pt = pt_model.state_dict() list(next(iter(state_dict_pt.items())))[1] += 10 @@ -3608,12 +3606,12 @@ def forward(self, pos_0, pos_1, *args, kw_0=None, kw_1=None, **kwargs): model = ORTModule(model) # Dummy inputs used - pos_0 = torch.randn(N, D_in, device=device) - pos_1 = torch.randn(N, D_in, device=device) - kw_0 = torch.randn(N, D_in, device=device) - kw_1 = torch.randn(N, D_in, device=device) - args = [torch.randn(N, D_in, device=device)] * 2 - kwargs = {"kwargs_0": torch.randn(N, D_in, device=device), "kwargs_1": torch.randn(D_in, D_in, device=device)} + torch.randn(N, D_in, device=device) + torch.randn(N, D_in, device=device) + torch.randn(N, D_in, device=device) + torch.randn(N, D_in, device=device) + [torch.randn(N, D_in, device=device)] * 2 + {"kwargs_0": torch.randn(N, D_in, device=device), "kwargs_1": torch.randn(D_in, D_in, device=device)} # Training step prediction = eval(forward_statement) @@ -3989,7 +3987,7 @@ def forward(self, bool_argument, input1): x = torch.randn(N, D_in, device=device) # Ensure that no exceptions are raised - out = model(bool_argument, x) + model(bool_argument, x) @pytest.mark.parametrize( @@ -4252,7 +4250,7 @@ def __init__(self): self.dummy = torch.nn.Parameter(torch.FloatTensor([0])) def forward(self, batch): - a = batch[0] + batch[0] b = batch[1] return self.dummy + b @@ -4384,7 +4382,7 @@ def test_debug_options_save_onnx_models_validate_fail_on_non_str_prefix(): def test_debug_options_save_onnx_models_validate_fail_on_no_prefix(): with pytest.raises(Exception) as ex_info: _ = DebugOptions(save_onnx=True) - assert f"onnx_prefix must be provided when save_onnx is set." in str(ex_info.value) + assert "onnx_prefix must be provided when save_onnx is set." in str(ex_info.value) def test_debug_options_log_level(): @@ -4507,7 +4505,7 @@ def __init__(self): self.dummy = torch.nn.Parameter(torch.FloatTensor([0])) def forward(self, batch): - b = batch["b"] + batch["b"] a = batch["a"] return self.dummy + a @@ -4760,7 +4758,7 @@ def forward(self, a): assert not hasattr(pt_model, "_torch_module") assert "_torch_module" in ort_model.__dict__ - assert ort_model._torch_module == True + assert ort_model._torch_module is True def test_ortmodule_setattr_signals_model_changed(): @@ -4788,11 +4786,11 @@ def forward(self, a): exported_model1 = ort_model._torch_module._execution_manager(True)._onnx_models.exported_model for training_mode in [False, True]: - assert ort_model._torch_module._execution_manager(training_mode)._original_model_has_changed == False + assert ort_model._torch_module._execution_manager(training_mode)._original_model_has_changed is False ort_model.input_flag = False for training_mode in [False, True]: - assert ort_model._torch_module._execution_manager(training_mode)._original_model_has_changed == True + assert ort_model._torch_module._execution_manager(training_mode)._original_model_has_changed is True _ = ort_model(torch.randn(N, D_in, device=device)) exported_model2 = ort_model._torch_module._execution_manager(True)._onnx_models.exported_model @@ -4818,7 +4816,7 @@ def load_state_dict(self): device = "cuda" pt_model = UserNet().to(device) with pytest.warns(UserWarning) as warning_record: - ort_model = ORTModule(pt_model) + ORTModule(pt_model) # FutureWarning('The first argument to symbolic functions is deprecated in 1.13 and will be removed in the future. # Please annotate treat the first argument (g) as GraphContext and use context information from the object diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py index a625735c8c039..bf7c1b266f986 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py @@ -643,7 +643,7 @@ def forward(ctx, x): @staticmethod def backward(ctx, grad_output): - x = ctx.saved_tensors + ctx.saved_tensors return None class EvalTestModel(torch.nn.Module): @@ -1176,7 +1176,7 @@ def forward(ctx, x): @staticmethod def backward(ctx, grad_output): - x = ctx.saved_tensors + ctx.saved_tensors return None class TestSkippedModel(torch.nn.Module): diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py index 231c8f45c93b2..1f9e3d4584483 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd_dist.py @@ -4,7 +4,6 @@ import copy import os -import sys import _test_helpers import torch @@ -15,7 +14,6 @@ import onnxruntime from onnxruntime.training.ortmodule import ORTModule -from onnxruntime.training.ortmodule._graph_execution_manager_factory import GraphExecutionManagerFactory torch.manual_seed(1) onnxruntime.set_seed(1) diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py index fcbb26e707f4e..f186214bebb51 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier.py @@ -15,7 +15,6 @@ from transformers import ( AdamW, AutoConfig, - BertConfig, BertForSequenceClassification, BertTokenizer, get_linear_schedule_with_warmup, diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py index 4061b24a276f8..ab1be7c90f869 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_bert_classifier_autocast.py @@ -12,14 +12,7 @@ import wget from sklearn.model_selection import train_test_split from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset -from transformers import ( - AdamW, - AutoConfig, - BertConfig, - BertForSequenceClassification, - BertTokenizer, - get_linear_schedule_with_warmup, -) +from transformers import AutoConfig, BertForSequenceClassification, BertTokenizer, get_linear_schedule_with_warmup import onnxruntime from onnxruntime.training.ortmodule import DebugOptions, ORTModule diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py index 5d4c36b31c526..ee31ac302640f 100755 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_deepspeed_pipeline_parallel.py @@ -3,11 +3,10 @@ import deepspeed import torch import torch.distributed as dist -from deepspeed.pipe import LayerSpec, PipelineModule -from deepspeed.utils import RepeatingLoader -from torch import nn, optim +from deepspeed.pipe import PipelineModule +from torch import nn -from onnxruntime.training.ortmodule import ORTModule, _utils +from onnxruntime.training.ortmodule import ORTModule # USAGE: # pip install deepspeed diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_experimental_json_config.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_experimental_json_config.py index b8f824db28d8c..2998d0afb8336 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_experimental_json_config.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_experimental_json_config.py @@ -43,30 +43,30 @@ def test_load_config_from_json_1(): assert ort_model_attributes._propagate_cast_ops_allow == ["ABC", "DEF"] # test use external gpu allocator - assert ort_model_attributes._use_external_gpu_allocator == False + assert ort_model_attributes._use_external_gpu_allocator is False # test enable custom autograd function - assert ort_model_attributes._enable_custom_autograd_function == True + assert ort_model_attributes._enable_custom_autograd_function is True # test use static shape - assert ort_model_attributes._use_static_shape == True + assert ort_model_attributes._use_static_shape is True # test run symbolic shape inference - assert ort_model_attributes._run_symbolic_shape_infer == False + assert ort_model_attributes._run_symbolic_shape_infer is False # test enable grad acc optimization - assert ort_model_attributes._enable_grad_acc_optimization == True + assert ort_model_attributes._enable_grad_acc_optimization is True # test skip check assert ort_model_attributes._skip_check.value == 14 # test debug options - assert ort_model_attributes._debug_options.save_onnx_models.save == True + assert ort_model_attributes._debug_options.save_onnx_models.save is True assert ort_model_attributes._debug_options.save_onnx_models.name_prefix == "my_model" assert ort_model_attributes._debug_options.logging.log_level.name == "VERBOSE" # test use memory aware gradient builder. - assert ort_model_attributes._use_memory_efficient_gradient == False + assert ort_model_attributes._use_memory_efficient_gradient is False # test fallback policy assert ort_model_attributes._fallback_manager.policy.value == 1 @@ -96,30 +96,30 @@ def test_load_config_from_json_2(): assert ort_model_attributes._propagate_cast_ops_allow == ["XYZ", "PQR"] # test use external gpu allocator - assert ort_model_attributes._use_external_gpu_allocator == True + assert ort_model_attributes._use_external_gpu_allocator is True # test enable custom autograd function - assert ort_model_attributes._enable_custom_autograd_function == False + assert ort_model_attributes._enable_custom_autograd_function is False # test use static shape - assert ort_model_attributes._use_static_shape == False + assert ort_model_attributes._use_static_shape is False # test run symbolic shape inference - assert ort_model_attributes._run_symbolic_shape_infer == True + assert ort_model_attributes._run_symbolic_shape_infer is True # test enable grad acc optimization - assert ort_model_attributes._enable_grad_acc_optimization == False + assert ort_model_attributes._enable_grad_acc_optimization is False # test skip check assert ort_model_attributes._skip_check.value == 10 # test debug options - assert ort_model_attributes._debug_options.save_onnx_models.save == True + assert ort_model_attributes._debug_options.save_onnx_models.save is True assert ort_model_attributes._debug_options.save_onnx_models.name_prefix == "my_other_model" assert ort_model_attributes._debug_options.logging.log_level.name == "INFO" # test use memory aware gradient builder. - assert ort_model_attributes._use_memory_efficient_gradient == True + assert ort_model_attributes._use_memory_efficient_gradient is True # test fallback policy assert ort_model_attributes._fallback_manager.policy.value == 250 diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_fairscale_sharded_optimizer.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_fairscale_sharded_optimizer.py index 59ae550999601..672abce394a9b 100755 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_fairscale_sharded_optimizer.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_fairscale_sharded_optimizer.py @@ -203,9 +203,7 @@ def train(rank: int, args, world_size: int, epochs: int): train_dataloader, test_dataloader = get_dataloader(args, rank, args.batch_size) loss_fn = my_loss base_optimizer = torch.optim.SGD # pick any pytorch compliant optimizer here - base_optimizer_arguments = ( - {} - ) # pass any optimizer specific arguments here, or directly below when instantiating OSS + # pass any optimizer specific arguments here, or directly below when instantiating OSS if args.use_sharded_optimizer: # Wrap the optimizer in its state sharding brethren optimizer = OSS(params=model.parameters(), optim=base_optimizer, lr=args.lr) diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_fallback.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_fallback.py index 6cde304a6570b..e58b903013ecd 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_fallback.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_fallback.py @@ -440,7 +440,7 @@ def test_ortmodule_fallback_init__missing_cpp_extensions( if is_torch_cpp_extensions_installed(ORTMODULE_TORCH_CPP_DIR): warnings.warn( "Skipping test_ortmodule_fallback_init__missing_cpp_extensions." - f" It requires PyTorch CPP extensions to be missing" + " It requires PyTorch CPP extensions to be missing" ) else: @@ -647,7 +647,7 @@ def get_batch(source, i): for epoch in range(1, 2): model.train() # turn on train mode - num_batches = len(train_data) // bptt + len(train_data) // bptt for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)): data, targets = get_batch(train_data, i) batch_size = data.size(0) diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py index ee90c614af069..fd9384f41652c 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_pytorch_ddp.py @@ -2,7 +2,6 @@ # For details, see https://pytorch.org/tutorials/intermediate/ddp_tutorial.html. import argparse import os -import sys import tempfile import torch @@ -12,7 +11,6 @@ import torch.optim as optim from torch.nn.parallel import DistributedDataParallel as DDP -import onnxruntime from onnxruntime.training.ortmodule import ORTModule diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py index f2aac9f17f2aa..5795594312011 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py @@ -1,12 +1,8 @@ -import copy -import inspect -import math import os from functools import partial import _test_commons import _test_helpers -import numpy as np import onnx import pytest import torch @@ -17,9 +13,7 @@ from onnxruntime.capi.ort_trainer import LossScaler as Legacy_LossScaler from onnxruntime.capi.ort_trainer import ModelDescription as Legacy_ModelDescription from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer -from onnxruntime.training import TrainStepInfo, _utils, amp, checkpoint -from onnxruntime.training import model_desc_validation as md_val -from onnxruntime.training import optim, orttrainer +from onnxruntime.training import amp, optim, orttrainer from onnxruntime.training import orttrainer_options as orttrainer_options ############################################################################### @@ -184,7 +178,6 @@ def legacy_ort_trainer_learning_rate_description(): def legacy_bert_model_description(): - vocab_size = 30528 input_ids_desc = Legacy_IODescription("input_ids", ["batch", "max_seq_len_in_batch"]) segment_ids_desc = Legacy_IODescription("segment_ids", ["batch", "max_seq_len_in_batch"]) input_mask_desc = Legacy_IODescription("input_mask", ["batch", "max_seq_len_in_batch"]) @@ -257,7 +250,7 @@ def testToyBERTDeterministicCheck(expected_losses): # Modeling model_desc = bert_model_description() model = load_bert_onnx_model() - params = optimizer_parameters(model) + optimizer_parameters(model) optim_config = optim.LambConfig() opts = orttrainer.ORTTrainerOptions( { @@ -720,7 +713,6 @@ def testToyBertCheckpointFrozenWeights(): ) def testToyBertLoadOptimState(optimizer, mixedprecision_enabled): # Common setup - rtol = 1e-03 device = "cuda" seed = 1 torch.manual_seed(seed) diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py index 97688db9262cb..5848682a108c8 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_checkpoint_functions.py @@ -6,7 +6,7 @@ import torch from _test_commons import _load_pytorch_transformer_model -from onnxruntime.training import _checkpoint_storage, amp, checkpoint, optim, orttrainer +from onnxruntime.training import _checkpoint_storage, checkpoint, optim, orttrainer # Helper functions @@ -627,7 +627,7 @@ def test_checkpoint_aggregation(load_mock): assert (state_dict["optimizer"]["non_sharded"]["Moment_2"] == np.array([6666, 5555, 4444])).all() assert (state_dict["optimizer"]["non_sharded"]["Step"] == np.array([55])).all() - assert state_dict["trainer_options"]["mixed_precision"] == False + assert state_dict["trainer_options"]["mixed_precision"] is False assert state_dict["trainer_options"]["world_rank"] == 0 assert state_dict["trainer_options"]["world_size"] == 1 assert state_dict["trainer_options"]["horizontal_parallel_size"] == 1 @@ -713,7 +713,7 @@ def test_checkpoint_aggregation_mixed_precision(load_mock): assert (state_dict["optimizer"]["non_sharded"]["Moment_2"] == np.array([6666, 5555, 4444])).all() assert (state_dict["optimizer"]["non_sharded"]["Step"] == np.array([55])).all() - assert state_dict["trainer_options"]["mixed_precision"] == True + assert state_dict["trainer_options"]["mixed_precision"] is True assert state_dict["trainer_options"]["world_rank"] == 0 assert state_dict["trainer_options"]["world_size"] == 1 assert state_dict["trainer_options"]["horizontal_parallel_size"] == 1 diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py index ce82fcd1f1045..34feb83eaad41 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py @@ -13,11 +13,9 @@ from packaging.version import Version as StrictVersion from onnxruntime import SessionOptions, set_seed -from onnxruntime.capi.ort_trainer import IODescription as Legacy_IODescription from onnxruntime.capi.ort_trainer import LossScaler as Legacy_LossScaler -from onnxruntime.capi.ort_trainer import ModelDescription as Legacy_ModelDescription from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer -from onnxruntime.training import PropagateCastOpsStrategy, TrainStepInfo, _utils, amp, checkpoint +from onnxruntime.training import PropagateCastOpsStrategy, TrainStepInfo, _utils, amp from onnxruntime.training import model_desc_validation as md_val from onnxruntime.training import optim, orttrainer from onnxruntime.training import orttrainer_options as orttrainer_options @@ -61,8 +59,6 @@ def testORTTrainerOptionsDefaultValues(test_input): "sliced_tensor_names": [], }, "allreduce_post_accumulation": False, - "data_parallel_size": 1, - "horizontal_parallel_size": 1, "deepspeed_zero_optimization": { "stage": 0, }, @@ -316,7 +312,7 @@ def testDynamicLossScalerCustomValues(): scaler = amp.loss_scaler.DynamicLossScaler( automatic_update=False, loss_scale=3, up_scale_window=7, min_loss_scale=5, max_loss_scale=10 ) - assert scaler.automatic_update == False + assert scaler.automatic_update is False assert_allclose(scaler.loss_scale, 3, rtol=rtol, err_msg="loss scale mismatch") assert_allclose(scaler.min_loss_scale, 5, rtol=rtol, err_msg="min loss scale mismatch") assert_allclose(scaler.max_loss_scale, 10, rtol=rtol, err_msg="max loss scale mismatch") @@ -332,14 +328,14 @@ def testTrainStepInfo(): optimizer_config=optimizer_config, all_finite=False, fetches=fetches, optimization_step=123, step=456 ) assert step_info.optimizer_config == optimizer_config - assert step_info.all_finite == False + assert step_info.all_finite is False assert step_info.fetches == fetches assert step_info.optimization_step == 123 assert step_info.step == 456 step_info = orttrainer.TrainStepInfo(optimizer_config) assert step_info.optimizer_config == optimizer_config - assert step_info.all_finite == True + assert step_info.all_finite is True assert step_info.fetches == [] assert step_info.optimization_step == 0 assert step_info.step == 0 @@ -459,7 +455,7 @@ def testOptimizerConfigAdam(): assert_allclose(0.0, cfg.lambda_coef, rtol=rtol, err_msg="lambda_coef mismatch") assert_allclose(1e-8, cfg.epsilon, rtol=rtol, err_msg="epsilon mismatch") assert_allclose(1.0, cfg.max_norm_clip, rtol=rtol, err_msg="max_norm_clip mismatch") - assert cfg.do_bias_correction == True, "lambda_coef mismatch" + assert cfg.do_bias_correction is True, "lambda_coef mismatch" assert cfg.weight_decay_mode == optim.AdamConfig.DecayMode.BEFORE_WEIGHT_UPDATE, "weight_decay_mode mismatch" @@ -476,7 +472,7 @@ def testOptimizerConfigLamb(): assert cfg.ratio_max == float("inf"), "ratio_max mismatch" assert_allclose(1e-6, cfg.epsilon, rtol=rtol, err_msg="epsilon mismatch") assert_allclose(1.0, cfg.max_norm_clip, rtol=rtol, err_msg="max_norm_clip mismatch") - assert cfg.do_bias_correction == False, "do_bias_correction mismatch" + assert cfg.do_bias_correction is False, "do_bias_correction mismatch" @pytest.mark.parametrize("optim_name", [("Adam"), ("Lamb")]) @@ -1045,7 +1041,7 @@ def testORTTrainerInternalUseContribOps(enable_onnx_contrib_ops): # Training loop data, targets = batcher_fn(train_data, 0) if not enable_onnx_contrib_ops and not pytorch_110: - with pytest.raises(Exception) as e_info: + with pytest.raises(Exception): _, _ = trainer.train_step(data, targets) else: _, _ = trainer.train_step(data, targets) @@ -1592,7 +1588,7 @@ def testORTTrainerLegacyAndExperimentalLRScheduler(seed, device, optimizer_confi def testLossScalerLegacyAndExperimentalFullCycle(): - info = orttrainer.TrainStepInfo( + orttrainer.TrainStepInfo( optimizer_config=optim.LambConfig(lr=0.001), all_finite=True, fetches=[], optimization_step=0, step=0 ) new_ls = amp.DynamicLossScaler() @@ -1758,7 +1754,7 @@ def testORTTrainerOptionsEnabledAdasumFlag(test_input): """Test the enabled_adasum flag values when set enabled""" actual_values = orttrainer_options.ORTTrainerOptions(test_input) - assert actual_values.distributed.enable_adasum == True + assert actual_values.distributed.enable_adasum is True @pytest.mark.parametrize( @@ -1775,7 +1771,7 @@ def testORTTrainerOptionsDisabledAdasumFlag(test_input): """Test the enabled_adasum flag values when set disabled""" actual_values = orttrainer_options.ORTTrainerOptions(test_input) - assert actual_values.distributed.enable_adasum == False + assert actual_values.distributed.enable_adasum is False def testORTTrainerUnusedInput(): diff --git a/orttraining/orttraining/test/python/orttraining_test_transformers.py b/orttraining/orttraining/test/python/orttraining_test_transformers.py index 64e356aaded7d..0d41105cc34be 100644 --- a/orttraining/orttraining/test/python/orttraining_test_transformers.py +++ b/orttraining/orttraining/test/python/orttraining_test_transformers.py @@ -1,18 +1,15 @@ -import os import random -import shutil import unittest import numpy as np -import pytest import torch from numpy.testing import assert_allclose from orttraining_test_data_loader import BatchArgsOption, ids_tensor from orttraining_test_utils import get_lr, run_test -from transformers import BertConfig, BertForPreTraining, BertModel +from transformers import BertConfig, BertForPreTraining import onnxruntime -from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer +from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription class BertModelTest(unittest.TestCase): diff --git a/orttraining/orttraining/test/python/orttraining_test_utils.py b/orttraining/orttraining/test/python/orttraining_test_utils.py index 7397cc9d517b9..db38139ea53b2 100644 --- a/orttraining/orttraining/test/python/orttraining_test_utils.py +++ b/orttraining/orttraining/test/python/orttraining_test_utils.py @@ -1,11 +1,8 @@ import torch -from orttraining_test_bert_postprocess import postprocess_model from orttraining_test_data_loader import BatchArgsOption, create_ort_test_dataloader, split_batch from onnxruntime.capi.ort_trainer import IODescription, ORTTrainer -from onnxruntime.training import TrainStepInfo, _utils, amp -from onnxruntime.training import model_desc_validation as md_val -from onnxruntime.training import optim, orttrainer +from onnxruntime.training import amp, optim, orttrainer from onnxruntime.training import orttrainer_options as orttrainer_options from onnxruntime.training.optim import _LRScheduler diff --git a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py index ccbd93fdcd99d..486fb543d9aa5 100644 --- a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py +++ b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py @@ -4,36 +4,28 @@ import logging import os import random -from typing import Callable, Dict, List, NamedTuple, Optional, Tuple +from typing import Callable, Dict, List, NamedTuple, Optional import numpy as np import torch -from orttraining_test_bert_postprocess import postprocess_model -from torch import nn from torch.utils.data.dataloader import DataLoader from torch.utils.data.dataset import Dataset from torch.utils.data.distributed import DistributedSampler -from torch.utils.data.sampler import RandomSampler, SequentialSampler +from torch.utils.data.sampler import SequentialSampler from tqdm import tqdm, trange -from transformers.data.data_collator import DataCollator, DefaultDataCollator +from transformers.data.data_collator import DefaultDataCollator from transformers.modeling_utils import PreTrainedModel from transformers.training_args import TrainingArguments import onnxruntime -from onnxruntime.capi.ort_trainer import IODescription, LossScaler, ModelDescription, ORTTrainer -from onnxruntime.training import TrainStepInfo, _utils, amp -from onnxruntime.training import model_desc_validation as md_val -from onnxruntime.training import optim, orttrainer +from onnxruntime.training import amp, optim, orttrainer from onnxruntime.training import orttrainer_options as orttrainer_options -from onnxruntime.training.optim import LinearWarmupLRScheduler, _LRScheduler try: - from torch.utils.tensorboard import SummaryWriter _has_tensorboard = True except ImportError: try: - from tensorboardX import SummaryWriter _has_tensorboard = True except ImportError: diff --git a/orttraining/orttraining/test/python/utils_multiple_choice.py b/orttraining/orttraining/test/python/utils_multiple_choice.py index 9d859060b42ad..04aee10c45303 100644 --- a/orttraining/orttraining/test/python/utils_multiple_choice.py +++ b/orttraining/orttraining/test/python/utils_multiple_choice.py @@ -2,8 +2,6 @@ # https://github.com/huggingface/transformers/blob/master/examples/multiple-choice/utils_multiple_choice.py import csv -import glob -import json import logging import os from dataclasses import dataclass @@ -14,7 +12,7 @@ import tqdm from filelock import FileLock from torch.utils.data.dataset import Dataset -from transformers import PreTrainedTokenizer, is_tf_available, is_torch_available +from transformers import PreTrainedTokenizer logger = logging.getLogger(__name__) diff --git a/orttraining/pytorch_frontend_examples/mnist_training.py b/orttraining/pytorch_frontend_examples/mnist_training.py index 59598c8d2ed1f..b71a9d75921e8 100644 --- a/orttraining/pytorch_frontend_examples/mnist_training.py +++ b/orttraining/pytorch_frontend_examples/mnist_training.py @@ -10,7 +10,6 @@ import torch import torch.nn as nn import torch.nn.functional as F -import torch.optim as optim from mpi4py import MPI from torchvision import datasets, transforms @@ -192,7 +191,6 @@ def main(): for epoch in range(1, args.epochs + 1): train_with_trainer(args, trainer, device, train_loader, epoch) - import pdb test_with_trainer(args, trainer, device, test_loader) diff --git a/orttraining/tools/amdgpu/script/rocprof.py b/orttraining/tools/amdgpu/script/rocprof.py index 4653f427014da..baafdafc98578 100644 --- a/orttraining/tools/amdgpu/script/rocprof.py +++ b/orttraining/tools/amdgpu/script/rocprof.py @@ -1,8 +1,5 @@ import argparse import csv -import os - -import numpy as np parser = argparse.ArgumentParser() parser.add_argument("--input", type=str) diff --git a/orttraining/tools/ci_test/compare_results.py b/orttraining/tools/ci_test/compare_results.py index ba76b9eaf414c..1c302f7dcd07b 100644 --- a/orttraining/tools/ci_test/compare_results.py +++ b/orttraining/tools/ci_test/compare_results.py @@ -1,10 +1,8 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import argparse import collections import csv -import re import sys Comparison = collections.namedtuple("Comparison", ["name", "fn"]) diff --git a/orttraining/tools/ci_test/download_azure_blob_archive.py b/orttraining/tools/ci_test/download_azure_blob_archive.py index 6fa875a1d2373..dea1964cc0f66 100755 --- a/orttraining/tools/ci_test/download_azure_blob_archive.py +++ b/orttraining/tools/ci_test/download_azure_blob_archive.py @@ -9,8 +9,6 @@ import subprocess import sys import tempfile -import urllib.request -import zipfile SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) REPO_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "..", "..")) diff --git a/orttraining/tools/scripts/experiment.py b/orttraining/tools/scripts/experiment.py index a3ef82577208f..1841a89d91849 100644 --- a/orttraining/tools/scripts/experiment.py +++ b/orttraining/tools/scripts/experiment.py @@ -1,15 +1,12 @@ import argparse -import os import re -import sys from azure.common.client_factory import get_client_from_cli_profile from azure.mgmt.containerregistry import ContainerRegistryManagementClient -from azureml.core import Datastore, Experiment, Run, Workspace -from azureml.core.compute import AmlCompute, ComputeTarget +from azureml.core import Datastore, Experiment, Workspace +from azureml.core.compute import ComputeTarget from azureml.core.container_registry import ContainerRegistry -from azureml.core.runconfig import MpiConfiguration, RunConfiguration -from azureml.data.azure_storage_datastore import AzureBlobDatastore, AzureFileDatastore +from azureml.core.runconfig import MpiConfiguration from azureml.train.estimator import Estimator parser = argparse.ArgumentParser() diff --git a/orttraining/tools/scripts/gpt2_model_transform.py b/orttraining/tools/scripts/gpt2_model_transform.py index ca3680987cb02..a5b43d3a39969 100644 --- a/orttraining/tools/scripts/gpt2_model_transform.py +++ b/orttraining/tools/scripts/gpt2_model_transform.py @@ -4,7 +4,7 @@ import numpy as np import onnx -from onnx import TensorProto, helper, numpy_helper, shape_inference +from onnx import numpy_helper if len(sys.argv) < 2: print("Please give model path...") @@ -111,7 +111,7 @@ def process_concat(model): skip = True input_nodes.append(concat_input_node) - if skip == True: + if skip is True: continue # figure out target shape diff --git a/orttraining/tools/scripts/model_transform.py b/orttraining/tools/scripts/model_transform.py index 4de49278622b0..c4de2dfe00235 100644 --- a/orttraining/tools/scripts/model_transform.py +++ b/orttraining/tools/scripts/model_transform.py @@ -2,7 +2,7 @@ import numpy as np import onnx -from onnx import TensorProto, helper, numpy_helper, shape_inference +from onnx import numpy_helper if len(sys.argv) < 2: print("Please give model path...") diff --git a/orttraining/tools/scripts/nv_run_pretraining.py b/orttraining/tools/scripts/nv_run_pretraining.py index 0ed4fe2b9b6f5..db0c51e8d2373 100644 --- a/orttraining/tools/scripts/nv_run_pretraining.py +++ b/orttraining/tools/scripts/nv_run_pretraining.py @@ -19,10 +19,7 @@ import argparse # ================== -import csv import logging -import math -import multiprocessing import os import random import time @@ -37,14 +34,10 @@ from apex.amp import _amp_state from apex.parallel import DistributedDataParallel as DDP from apex.parallel.distributed import flat_dist_call -from file_utils import PYTORCH_PRETRAINED_BERT_CACHE from modeling import BertConfig, BertForPreTraining from optimization import BertLAMB -from schedulers import LinearWarmUpScheduler -from tokenization import BertTokenizer -from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler -from torch.utils.data.distributed import DistributedSampler -from tqdm import tqdm, trange +from torch.utils.data import DataLoader, Dataset, RandomSampler +from tqdm import tqdm from utils import is_main_process logging.basicConfig( @@ -454,7 +447,6 @@ def main(): # Note: We loop infinitely over epochs, termination is handled via iteration count while True: - thread = None if not args.resume_from_checkpoint or epoch > 0 or args.phase2: files = [ os.path.join(args.input_dir, f) diff --git a/orttraining/tools/scripts/opset12_model_transform.py b/orttraining/tools/scripts/opset12_model_transform.py index 73111c0e40a4a..453f2bd2a250e 100644 --- a/orttraining/tools/scripts/opset12_model_transform.py +++ b/orttraining/tools/scripts/opset12_model_transform.py @@ -16,7 +16,7 @@ import numpy as np import onnx -from onnx import TensorProto, helper, numpy_helper, shape_inference +from onnx import numpy_helper if len(sys.argv) < 2: print("Please give model path...") diff --git a/orttraining/tools/scripts/pipeline_model_split.py b/orttraining/tools/scripts/pipeline_model_split.py index e9b441454ebde..6cb33603d57da 100644 --- a/orttraining/tools/scripts/pipeline_model_split.py +++ b/orttraining/tools/scripts/pipeline_model_split.py @@ -1,8 +1,7 @@ import os -import sys import onnx -from onnx import OperatorSetIdProto, TensorProto, helper +from onnx import TensorProto, helper # Edge that needs to be cut for the split. # If the edge is feeding into more than one nodes, and not all the nodes belong to the same cut, @@ -300,7 +299,7 @@ def generate_subgraph(model, start_nodes, identity_node_list): outputs0 = [] while stack0: node = stack0.pop() - if not node in visited0: + if node not in visited0: tranversed_node += 1 visited0.append(node) all_visited_nodes.append(node) diff --git a/orttraining/tools/scripts/watch_experiment.py b/orttraining/tools/scripts/watch_experiment.py index 310dc408431e3..5af7a11eeab67 100644 --- a/orttraining/tools/scripts/watch_experiment.py +++ b/orttraining/tools/scripts/watch_experiment.py @@ -2,10 +2,10 @@ import os import sys from concurrent.futures import ThreadPoolExecutor -from threading import Event, Thread +from threading import Event from azureml._run_impl.run_watcher import RunWatcher -from azureml.core import Experiment, Run, Workspace +from azureml.core import Experiment, Workspace from requests import Session parser = argparse.ArgumentParser() diff --git a/tools/python/gen_contrib_doc.py b/tools/python/gen_contrib_doc.py index 6604a433ac528..b8a5a943ab698 100644 --- a/tools/python/gen_contrib_doc.py +++ b/tools/python/gen_contrib_doc.py @@ -2,7 +2,6 @@ # This file is copied and adapted from https://github.com/onnx/onnx repository. # There was no copyright statement on the file at the time of copying. -from __future__ import absolute_import, division, print_function, unicode_literals import argparse import io @@ -10,9 +9,9 @@ import pathlib import sys from collections import defaultdict -from typing import Any, Dict, List, Sequence, Set, Text, Tuple +from typing import Any, DefaultDict, List, Sequence, Set, Text, Tuple -import numpy as np # type: ignore +import numpy as np from onnx import AttributeProto, FunctionProto import onnxruntime.capi.onnxruntime_pybind11_state as rtpy @@ -29,13 +28,13 @@ ext = ".md" -def display_number(v): # type: (int) -> Text +def display_number(v: int) -> str: if OpSchema.is_infinite(v): return "∞" return Text(v) -def should_render_domain(domain, domain_filter): # type: (Text) -> bool +def should_render_domain(domain, domain_filter) -> bool: if domain == ONNX_DOMAIN or domain == "" or domain == ONNX_ML_DOMAIN or domain == "ai.onnx.ml": return False @@ -45,18 +44,18 @@ def should_render_domain(domain, domain_filter): # type: (Text) -> bool return True -def format_name_with_domain(domain, schema_name): # type: (Text, Text) -> Text +def format_name_with_domain(domain: str, schema_name: str) -> str: if domain: return "{}.{}".format(domain, schema_name) else: return schema_name -def format_name_with_version(schema_name, version): # type: (Text, Text) -> Text +def format_name_with_version(schema_name: str, version: str) -> str: return "{}-{}".format(schema_name, version) -def display_attr_type(v): # type: (OpSchema.AttrType) -> Text +def display_attr_type(v: OpSchema.AttrType) -> str: assert isinstance(v, OpSchema.AttrType) s = Text(v) s = s[s.rfind(".") + 1 :].lower() @@ -65,33 +64,33 @@ def display_attr_type(v): # type: (OpSchema.AttrType) -> Text return s -def display_domain(domain): # type: (Text) -> Text +def display_domain(domain: str) -> str: if domain: return "the '{}' operator set".format(domain) else: return "the default ONNX operator set" -def display_domain_short(domain): # type: (Text) -> Text +def display_domain_short(domain: str) -> str: if domain: return domain else: return "ai.onnx (default)" -def display_version_link(name, version): # type: (Text, int) -> Text +def display_version_link(name: str, version: int) -> str: changelog_md = "Changelog" + ext name_with_ver = "{}-{}".format(name, version) return '{}'.format(changelog_md, name_with_ver, name_with_ver) -def display_function_version_link(name, version): # type: (Text, int) -> Text +def display_function_version_link(name: str, version: int) -> str: changelog_md = "FunctionsChangelog" + ext name_with_ver = "{}-{}".format(name, version) return '{}'.format(changelog_md, name_with_ver, name_with_ver) -def get_attribute_value(attr): # type: (AttributeProto) -> Any +def get_attribute_value(attr: AttributeProto) -> Any: if attr.HasField("f"): return attr.f elif attr.HasField("i"): @@ -116,7 +115,7 @@ def get_attribute_value(attr): # type: (AttributeProto) -> Any raise ValueError("Unsupported ONNX attribute: {}".format(attr)) -def display_schema(schema, versions): # type: (OpSchema, Sequence[OpSchema]) -> Text +def display_schema(schema: OpSchema, versions: Sequence[OpSchema]) -> str: s = "" # doc @@ -163,7 +162,7 @@ def display_schema(schema, versions): # type: (OpSchema, Sequence[OpSchema]) -> elif hasattr(attr, "default_value") and attr.default_value.name: default_value = get_attribute_value(attr.default_value) - def format_value(value): # type: (Any) -> Text + def format_value(value: Any) -> str: if isinstance(value, float): value = np.round(value, 5) if isinstance(value, (bytes, bytearray)) and sys.version_info[0] == 3: @@ -247,7 +246,7 @@ def format_value(value): # type: (Any) -> Text return s -def display_function(function, versions, domain=ONNX_DOMAIN): # type: (FunctionProto, List[int], Text) -> Text +def display_function(function: FunctionProto, versions: List[int], domain: str = ONNX_DOMAIN) -> str: s = "" if domain: @@ -303,15 +302,10 @@ def display_function(function, versions, domain=ONNX_DOMAIN): # type: (Function return s -def support_level_str(level): # type: (OpSchema.SupportType) -> Text +def support_level_str(level: OpSchema.SupportType) -> str: return "experimental " if level == OpSchema.SupportType.EXPERIMENTAL else "" -# def function_status_str(status=OperatorStatus.Value("EXPERIMENTAL")): # type: ignore -# return \ -# "experimental " if status == OperatorStatus.Value('EXPERIMENTAL') else "" # type: ignore - - def main(output_path: str, domain_filter: [str]): with io.open(output_path, "w", newline="", encoding="utf-8") as fout: @@ -323,9 +317,9 @@ def main(output_path: str, domain_filter: [str]): ) # domain -> support level -> name -> [schema] - index = defaultdict( + index: DefaultDict[Text, DefaultDict[int, DefaultDict[Text, List[OpSchema]]]] = defaultdict( lambda: defaultdict(lambda: defaultdict(list)) - ) # type: Dict[Text, Dict[int, Dict[Text, List[OpSchema]]]] # noqa: E501 + ) for schema in rtpy.get_all_operator_schema(): index[schema.domain][int(schema.support_level)][schema.name].append(schema) @@ -334,10 +328,8 @@ def main(output_path: str, domain_filter: [str]): # Preprocess the Operator Schemas # [(domain, [(support_level, [(schema name, current schema, all versions schemas)])])] - operator_schemas = ( - list() - ) # type: List[Tuple[Text, List[Tuple[int, List[Tuple[Text, OpSchema, List[OpSchema]]]]]]] # noqa: E501 - exsting_ops = set() # type: Set[Text] + operator_schemas: List[Tuple[Text, List[Tuple[int, List[Tuple[Text, OpSchema, List[OpSchema]]]]]]] = [] + exsting_ops: Set[str] = set() for domain, _supportmap in sorted(index.items()): if not should_render_domain(domain, domain_filter): continue diff --git a/tools/python/sparsify_initializers.py b/tools/python/sparsify_initializers.py index 17bddae6bbe40..7a5fba7429166 100644 --- a/tools/python/sparsify_initializers.py +++ b/tools/python/sparsify_initializers.py @@ -55,8 +55,8 @@ def setup_logging(verbose): # type: (bool) -> None def convert_tensor_to_sparse( - tensor, sparsity_threshold, tolerance -): # type: (TensorProto, float, float) -> Tuple[SparseTensorProto, float] + tensor: TensorProto, sparsity_threshold: float, tolerance: float +) -> Tuple[SparseTensorProto, float]: """returns a tuple of sparse_tensor and sparsity level""" values = [] indices = [] @@ -141,8 +141,8 @@ def convert_tensor_to_sparse( def convert_initializers( - model, exclude_names, sparsity_threshold, tolerance -): # type: (ModelProto, List[str], float, float) -> None + model: ModelProto, exclude_names: List[str], sparsity_threshold: float, tolerance: float +) -> None: graph = model.graph converted_sparse = [] remaining_initializers = [] From 86babedc19a4707c32b888e13bee1891bfb54b31 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Mon, 16 Jan 2023 18:12:09 +0000 Subject: [PATCH 22/33] Restore imports with side effects Signed-off-by: Justin Chu --- orttraining/orttraining/test/external_custom_ops/test.py | 6 +++--- .../test/python/orttraining_test_ortmodule_api.py | 8 -------- .../test/python/orttraining_test_orttrainer_frontend.py | 2 ++ .../test/python/orttraining_transformer_trainer.py | 2 ++ orttraining/tools/scripts/gpt2_model_transform.py | 2 +- 5 files changed, 8 insertions(+), 12 deletions(-) diff --git a/orttraining/orttraining/test/external_custom_ops/test.py b/orttraining/orttraining/test/external_custom_ops/test.py index f7a2d38c4f185..18b37f9305e20 100644 --- a/orttraining/orttraining/test/external_custom_ops/test.py +++ b/orttraining/orttraining/test/external_custom_ops/test.py @@ -4,14 +4,14 @@ import numpy as np +# Restore dlopen flags. +import orttraining_external_custom_ops + # Expose available (onnx::* and protobuf::*) symbols from onnxruntime to resolve references in # the custom ops shared library. Deepbind flag is required to avoid conflicts with other # instances of onnx/protobuf libraries. import onnxruntime -# Restore dlopen flags. - - so = onnxruntime.SessionOptions() sess = onnxruntime.InferenceSession("testdata/model.onnx", so) input = np.random.rand(2, 2).astype(np.float32) diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py index 753d28345315e..146f7724430e5 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py @@ -3605,14 +3605,6 @@ def forward(self, pos_0, pos_1, *args, kw_0=None, kw_1=None, **kwargs): model = KwargsNet(input_size=D_in, hidden_size=H, num_classes=D_out).to(device) model = ORTModule(model) - # Dummy inputs used - torch.randn(N, D_in, device=device) - torch.randn(N, D_in, device=device) - torch.randn(N, D_in, device=device) - torch.randn(N, D_in, device=device) - [torch.randn(N, D_in, device=device)] * 2 - {"kwargs_0": torch.randn(N, D_in, device=device), "kwargs_1": torch.randn(D_in, D_in, device=device)} - # Training step prediction = eval(forward_statement) assert prediction is not None diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py index 34feb83eaad41..6517ca2b6b455 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py @@ -59,6 +59,8 @@ def testORTTrainerOptionsDefaultValues(test_input): "sliced_tensor_names": [], }, "allreduce_post_accumulation": False, + "data_parallel_size": 1, + "horizontal_parallel_size": 1, "deepspeed_zero_optimization": { "stage": 0, }, diff --git a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py index 486fb543d9aa5..6da43001c80dc 100644 --- a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py +++ b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py @@ -22,10 +22,12 @@ from onnxruntime.training import orttrainer_options as orttrainer_options try: + from torch.utils.tensorboard import SummaryWriter _has_tensorboard = True except ImportError: try: + from tensorboardX import SummaryWriter _has_tensorboard = True except ImportError: diff --git a/orttraining/tools/scripts/gpt2_model_transform.py b/orttraining/tools/scripts/gpt2_model_transform.py index a5b43d3a39969..b9efea22066f1 100644 --- a/orttraining/tools/scripts/gpt2_model_transform.py +++ b/orttraining/tools/scripts/gpt2_model_transform.py @@ -111,7 +111,7 @@ def process_concat(model): skip = True input_nodes.append(concat_input_node) - if skip is True: + if skip: continue # figure out target shape From cc1667311c4c92fa50c291cc8d78148d3b70471e Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 00:23:39 +0000 Subject: [PATCH 23/33] Fix wrap_exception import --- .../orttraining/python/training/ortmodule/__init__.py | 5 +++-- .../ortmodule/_custom_autograd_function_exporter.py | 2 +- .../ortmodule/_custom_autograd_function_runner.py | 2 +- .../training/ortmodule/_graph_execution_manager.py | 4 ++-- .../python/training/ortmodule/_inference_manager.py | 3 ++- .../orttraining/python/training/ortmodule/_io.py | 2 +- .../python/training/ortmodule/_torch_module_ort.py | 3 ++- .../python/training/ortmodule/_training_manager.py | 3 ++- .../orttraining/python/training/ortmodule/ortmodule.py | 3 ++- .../test/python/orttraining_test_ortmodule_api.py | 10 +--------- 10 files changed, 17 insertions(+), 20 deletions(-) diff --git a/orttraining/orttraining/python/training/ortmodule/__init__.py b/orttraining/orttraining/python/training/ortmodule/__init__.py index f6ed8827bded3..88aa71387b821 100644 --- a/orttraining/orttraining/python/training/ortmodule/__init__.py +++ b/orttraining/orttraining/python/training/ortmodule/__init__.py @@ -13,7 +13,8 @@ from onnxruntime import set_seed from onnxruntime.capi import build_and_package_info as ort_info -from ._fallback import ORTModuleFallbackException, ORTModuleInitException, _FallbackPolicy, wrap_exception +from ._fallback import _FallbackPolicy +from ._fallback_exceptions import ORTModuleFallbackException, ORTModuleInitException, wrap_exception from .torch_cpp_extensions import is_installed as is_torch_cpp_extensions_installed @@ -86,7 +87,7 @@ def _defined_from_envvar(name, default_value, warn=True): ), ) -# Initalized ORT's random seed with pytorch's initial seed +# Initialize ORT's random seed with pytorch's initial seed # in case user has set pytorch seed before importing ORTModule set_seed((torch.initial_seed() % sys.maxsize)) diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py index 887066d2a3dbc..250bb1c251fba 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_exporter.py @@ -15,7 +15,7 @@ from onnxruntime.training import ortmodule from . import _logger -from ._fallback import ORTModuleONNXModelException, wrap_exception +from ._fallback_exceptions import ORTModuleONNXModelException, wrap_exception # Some autograd.Function's shouldn't be exported as PythonOp. # If CheckpointFunction is exported as PythonOp, the checkpointed computation diff --git a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py index 8c678181c5c7b..5b57417c42cef 100644 --- a/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py +++ b/orttraining/orttraining/python/training/ortmodule/_custom_autograd_function_runner.py @@ -10,7 +10,7 @@ from onnxruntime.training.ortmodule.torch_cpp_extensions import torch_interop_utils -from ._fallback import ORTModuleFallbackException, ORTModuleIOError, wrap_exception +from ._fallback_exceptions import ORTModuleFallbackException, ORTModuleIOError, wrap_exception def wrap_as_dlpack_or_not(grad_flag, tensor_flag, inplace_flag, training_mode_flag, arg): diff --git a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py index 4a61b2d9dfd0a..fe4bf43d94c56 100644 --- a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py @@ -23,11 +23,11 @@ from . import _are_deterministic_algorithms_enabled, _io, _logger, _onnx_models, _utils from ._custom_autograd_function_exporter import _post_process_after_export -from ._fallback import ( +from ._fallback import _FallbackManager +from ._fallback_exceptions import ( ORTModuleDeviceException, ORTModuleONNXModelException, ORTModuleTorchModelException, - _FallbackManager, wrap_exception, ) from ._gradient_accumulation_manager import GradientAccumulationManager diff --git a/orttraining/orttraining/python/training/ortmodule/_inference_manager.py b/orttraining/orttraining/python/training/ortmodule/_inference_manager.py index e72601efdd431..fb4a3ea9580aa 100644 --- a/orttraining/orttraining/python/training/ortmodule/_inference_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_inference_manager.py @@ -12,7 +12,8 @@ from . import _are_deterministic_algorithms_enabled, _io, _logger, _use_deterministic_algorithms, _utils from ._execution_agent import InferenceAgent -from ._fallback import ORTModuleFallbackException, _FallbackManager, _FallbackPolicy +from ._fallback import _FallbackManager, _FallbackPolicy +from ._fallback_exceptions import ORTModuleFallbackException from ._graph_execution_manager import GraphExecutionManager, _RunStateInfo, _SkipCheck from .debug_options import DebugOptions diff --git a/orttraining/orttraining/python/training/ortmodule/_io.py b/orttraining/orttraining/python/training/ortmodule/_io.py index cf24a4241df6c..4c11b3d670ecd 100644 --- a/orttraining/orttraining/python/training/ortmodule/_io.py +++ b/orttraining/orttraining/python/training/ortmodule/_io.py @@ -11,7 +11,7 @@ import torch -from ._fallback import ORTModuleIOError, ORTModuleONNXModelException, wrap_exception +from ._fallback_exceptions import ORTModuleIOError, ORTModuleONNXModelException, wrap_exception from ._utils import warn_of_constant_inputs diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py index dfca706f76aa8..d78c2aca0ac86 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py @@ -8,7 +8,8 @@ import torch from . import _io, _utils -from ._fallback import ORTModuleTorchModelException, _FallbackManager, wrap_exception +from ._fallback import _FallbackManager +from ._fallback_exceptions import ORTModuleTorchModelException, wrap_exception from ._graph_execution_manager_factory import GraphExecutionManagerFactory from ._torch_module_interface import TorchModuleInterface from .debug_options import DebugOptions diff --git a/orttraining/orttraining/python/training/ortmodule/_training_manager.py b/orttraining/orttraining/python/training/ortmodule/_training_manager.py index 6c73866aa6bc6..01717a1ad7328 100644 --- a/orttraining/orttraining/python/training/ortmodule/_training_manager.py +++ b/orttraining/orttraining/python/training/ortmodule/_training_manager.py @@ -13,7 +13,8 @@ from . import _are_deterministic_algorithms_enabled, _io, _logger, _use_deterministic_algorithms, _utils from ._execution_agent import TrainingAgent -from ._fallback import ORTModuleFallbackException, _FallbackManager, _FallbackPolicy +from ._fallback import _FallbackManager, _FallbackPolicy +from ._fallback_exceptions import ORTModuleFallbackException from ._graph_execution_manager import GraphExecutionManager, _RunStateInfo, _SkipCheck from .debug_options import DebugOptions diff --git a/orttraining/orttraining/python/training/ortmodule/ortmodule.py b/orttraining/orttraining/python/training/ortmodule/ortmodule.py index 4540406ddfb66..7c54c33f1fc6e 100644 --- a/orttraining/orttraining/python/training/ortmodule/ortmodule.py +++ b/orttraining/orttraining/python/training/ortmodule/ortmodule.py @@ -13,7 +13,8 @@ from . import _utils from ._custom_gradient_registry import CustomGradientRegistry from ._custom_op_symbolic_registry import CustomOpSymbolicRegistry -from ._fallback import ORTModuleFallbackException, _FallbackManager, _FallbackPolicy +from ._fallback import _FallbackManager, _FallbackPolicy +from ._fallback_exceptions import ORTModuleFallbackException from ._torch_module_factory import TorchModuleFactory from ._torch_module_ort import TorchModuleORT from .debug_options import DebugOptions diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py index 146f7724430e5..eb4315a7a7625 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py @@ -28,15 +28,7 @@ import onnxruntime.training.ortmodule as ortmodule_module from onnxruntime.training.optim import AdamWMode, FusedAdam -from onnxruntime.training.ortmodule import ( - DebugOptions, - LogLevel, - ORTModule, - _fallback, - _graph_execution_manager, - _io, - _utils, -) +from onnxruntime.training.ortmodule import DebugOptions, LogLevel, _fallback, _graph_execution_manager, _io, _utils from onnxruntime.training.ortmodule._custom_gradient_registry import register_gradient DEFAULT_OPSET = 15 From ac38c1d150b4ddf5838f86521c0c5f0a96f8735d Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 00:30:24 +0000 Subject: [PATCH 24/33] Missing imports --- .../python/training/ortmodule/_torch_module_ort.py | 2 +- .../python/training/ortmodule/ortmodule.py | 14 +++++++------- .../test/python/orttraining_test_ortmodule_api.py | 10 +++++++++- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py b/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py index d78c2aca0ac86..fd03ede82e104 100644 --- a/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py +++ b/orttraining/orttraining/python/training/ortmodule/_torch_module_ort.py @@ -147,7 +147,7 @@ def _replicate_for_data_parallel(self): ), ) - def add_module(self, name: str, module: Optional["Module"]) -> None: + def add_module(self, name: str, module: Optional["torch.nn.Module"]) -> None: raise wrap_exception( ORTModuleTorchModelException, NotImplementedError("ORTModule does not support adding modules to it.") ) diff --git a/orttraining/orttraining/python/training/ortmodule/ortmodule.py b/orttraining/orttraining/python/training/ortmodule/ortmodule.py index 7c54c33f1fc6e..f57940637468b 100644 --- a/orttraining/orttraining/python/training/ortmodule/ortmodule.py +++ b/orttraining/orttraining/python/training/ortmodule/ortmodule.py @@ -3,7 +3,7 @@ # Licensed under the MIT License. # -------------------------------------------------------------------------- -from typing import Callable, Iterator, Optional, Tuple, TypeVar +from typing import Callable, Iterator, Optional, OrderedDict, Tuple, TypeVar import torch @@ -20,7 +20,7 @@ from .debug_options import DebugOptions # Needed to override PyTorch methods -T = TypeVar("T", bound="Module") +T = TypeVar("T", bound="torch.nn.Module") class ORTModule(torch.nn.Module): @@ -145,7 +145,7 @@ def _replicate_for_data_parallel(self): return self._torch_module._replicate_for_data_parallel() - def add_module(self, name: str, module: Optional["Module"]) -> None: + def add_module(self, name: str, module: Optional["torch.nn.Module"]) -> None: """Raises a ORTModuleTorchModelException exception since ORTModule does not support adding modules to it""" self._torch_module.add_module(name, module) @@ -176,7 +176,7 @@ def _apply(self, fn): self._torch_module._apply(fn) return self - def apply(self: T, fn: Callable[["Module"], None]) -> T: + def apply(self: T, fn: Callable[["torch.nn.Module"], None]) -> T: """Override :meth:`~torch.nn.Module.apply` to delegate execution to ONNX Runtime""" self._torch_module.apply(fn) @@ -203,7 +203,7 @@ def state_dict(self, destination=None, prefix="", keep_vars=False): return self._torch_module.state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars) - def load_state_dict(self, state_dict: "OrderedDict[str, Tensor]", strict: bool = True): + def load_state_dict(self, state_dict: OrderedDict[str, torch.Tensor], strict: bool = True): """Override :meth:`~torch.nn.Module.load_state_dict` to delegate execution to ONNX Runtime""" return self._torch_module.load_state_dict(state_dict, strict=strict) @@ -257,12 +257,12 @@ def _load_from_state_dict( state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ) - def named_children(self) -> Iterator[Tuple[str, "Module"]]: + def named_children(self) -> Iterator[Tuple[str, "torch.nn.Module"]]: """Override :meth:`~torch.nn.Module.named_children`""" yield from self._torch_module.named_children() - def modules(self) -> Iterator["Module"]: + def modules(self) -> Iterator["torch.nn.Module"]: """Override :meth:`~torch.nn.Module.modules`""" yield from self._torch_module.modules() diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py index eb4315a7a7625..146f7724430e5 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py @@ -28,7 +28,15 @@ import onnxruntime.training.ortmodule as ortmodule_module from onnxruntime.training.optim import AdamWMode, FusedAdam -from onnxruntime.training.ortmodule import DebugOptions, LogLevel, _fallback, _graph_execution_manager, _io, _utils +from onnxruntime.training.ortmodule import ( + DebugOptions, + LogLevel, + ORTModule, + _fallback, + _graph_execution_manager, + _io, + _utils, +) from onnxruntime.training.ortmodule._custom_gradient_registry import register_gradient DEFAULT_OPSET = 15 From a33b1f883b4e4b7f94da004504507812827a4fa7 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 00:40:26 +0000 Subject: [PATCH 25/33] Remove import stars (uhh) --- .../python/transformers/test_parity_gelu.py | 13 +++-- .../transformers/test_parity_layernorm.py | 12 ++--- .../python/training/postprocess.py | 1 - .../orttraining_test_ortmodule_autograd.py | 47 ++++++++++--------- .../tools/scripts/layer_norm_transform.py | 7 ++- 5 files changed, 38 insertions(+), 42 deletions(-) diff --git a/onnxruntime/test/python/transformers/test_parity_gelu.py b/onnxruntime/test/python/transformers/test_parity_gelu.py index 7fe42dc76f193..791d2702a1959 100644 --- a/onnxruntime/test/python/transformers/test_parity_gelu.py +++ b/onnxruntime/test/python/transformers/test_parity_gelu.py @@ -27,8 +27,8 @@ import os import unittest +import parity_utilities import torch -from parity_utilities import * from torch import nn @@ -36,6 +36,7 @@ class Gelu(nn.Module): def __init__(self, formula=4, fp32_gelu_op=False): super().__init__() self.formula = formula + # FIXME(justinchuby): fp32_gelu_op is always True self.fp32_gelu_op = True def gelu(self, x): @@ -97,12 +98,12 @@ def run( # Do not re-use onnx file from previous test since weights of model are random. onnx_model_path = "./temp/gelu_{}_{}.onnx".format(formula, "fp16" if float16 else "fp32") - export_onnx(model, onnx_model_path, float16, hidden_size, device) + parity_utilities.export_onnx(model, onnx_model_path, float16, hidden_size, device) if optimized: optimized_onnx_path = "./temp/gelu_{}_opt_{}.onnx".format(formula, "fp16" if float16 else "fp32") use_gpu = float16 and not fp32_gelu_op - optimize_onnx( + parity_utilities.optimize_onnx( onnx_model_path, optimized_onnx_path, Gelu.get_fused_op(formula), @@ -113,7 +114,7 @@ def run( else: onnx_path = onnx_model_path - num_failure = run_parity( + num_failure = parity_utilities.run_parity( model, onnx_path, batch_size, @@ -217,9 +218,7 @@ def test_cpu(self): def test_cuda(self): if not torch.cuda.is_available(): - import pytest - - pytest.skip("test requires GPU and torch+cuda") + self.skipTest("test requires GPU and torch+cuda") else: gpu = torch.device("cuda") for i in self.formula_to_test: diff --git a/onnxruntime/test/python/transformers/test_parity_layernorm.py b/onnxruntime/test/python/transformers/test_parity_layernorm.py index 01122b4830bfa..e0ab8f4801e08 100644 --- a/onnxruntime/test/python/transformers/test_parity_layernorm.py +++ b/onnxruntime/test/python/transformers/test_parity_layernorm.py @@ -8,8 +8,8 @@ import unittest import onnx +import parity_utilities import torch -from parity_utilities import * from torch import nn if find_transformers_source(): @@ -150,12 +150,12 @@ def run( # Do not re-use onnx file from previous test since weights of model are random. onnx_model_path = "./temp/layer_norm_{}_formula{}.onnx".format("fp16" if float16 else "fp32", formula) - export_onnx(model, onnx_model_path, float16, hidden_size, device) + parity_utilities.export_onnx(model, onnx_model_path, float16, hidden_size, device) if optimized: optimized_onnx_path = "./temp/layer_norm_{}_formula{}_opt.onnx".format("fp16" if float16 else "fp32", formula) if (not float16) or cast_fp16: - optimize_onnx( + parity_utilities.optimize_onnx( onnx_model_path, optimized_onnx_path, expected_op=LayerNorm.get_fused_op(), @@ -170,7 +170,7 @@ def run( else: onnx_path = onnx_model_path - num_failure = run_parity( + num_failure = parity_utilities.run_parity( model, onnx_path, batch_size, @@ -295,9 +295,7 @@ def test_cpu(self): def test_cuda(self): if not torch.cuda.is_available(): - import pytest - - pytest.skip("test requires GPU and torch+cuda") + self.skipTest("test requires GPU and torch+cuda") else: gpu = torch.device("cuda") self.run_one(self.optimized, gpu, hidden_size=self.hidden_size, run_extra_tests=True) diff --git a/orttraining/orttraining/python/training/postprocess.py b/orttraining/orttraining/python/training/postprocess.py index 3108880c06f46..12ae8ed34181f 100644 --- a/orttraining/orttraining/python/training/postprocess.py +++ b/orttraining/orttraining/python/training/postprocess.py @@ -1,7 +1,6 @@ import struct import onnx -from onnx import * from onnx import helper diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py index bf7c1b266f986..e8a5be0d624d6 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py @@ -5,11 +5,12 @@ # pylint: disable=C0103 # pylint: disable=W0212 -import pytest -import torch +import os # Import ORT modules. -from _test_helpers import * +import _test_helpers +import pytest +import torch from packaging.version import Version from torch.nn.parameter import Parameter @@ -86,7 +87,7 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) def test_GeLU_custom_func_rets_not_as_module_output(): @@ -144,7 +145,7 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) def test_GeLU_multiple_forward_runs(): @@ -196,7 +197,7 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input, run_forward_twice=True) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input, run_forward_twice=True) def test_MegatronF(): @@ -236,7 +237,7 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) def test_ScalarAndTuple(): @@ -283,7 +284,7 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) def test_ScalarAndTupleReordered(): @@ -330,7 +331,7 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) @pytest.mark.skip( @@ -380,7 +381,7 @@ def input_generator(): label_input = torch.ones([output_size]) # Test when input is in-place updated, but does not require gradient. - run_training_test_and_compare(model_builder, input_generator, label_input, ignore_grad_compare=True) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input, ignore_grad_compare=True) @pytest.mark.skip( @@ -429,7 +430,7 @@ def input_generator(): # which is a duplicated computation with the PythonOp. # So for the weights that are used twice BUT SHOULD only used once, the gradients are almost 2x than PyTorch's grad, # this is the reason we ignore the gradient compare here. - run_training_test_and_compare(model_builder, input_generator, label_input, ignore_grad_compare=True) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input, ignore_grad_compare=True) @pytest.mark.skip(reason="disable due to exporter bug https://github.com/microsoft/onnx-converters-private/issues/37.") @@ -476,7 +477,7 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) @pytest.mark.skip( @@ -528,7 +529,7 @@ def input_generator(): # duplicated computation with the PythonOp. Thus, for the weights that are used twice BUT SHOULD # only used once, the gradients are almost 2x than PyTorch's grad, this is the reason we # ignore the gradient compare here. - run_training_test_and_compare(model_builder, input_generator, label_input, ignore_grad_compare=True) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input, ignore_grad_compare=True) @pytest.mark.skip( @@ -580,7 +581,7 @@ def input_generator(): # should reuse the input torch tensor @140214095996104, 140212816617984 but actually not." It seems # if we don't have mark_dirty() in auto grad forward, the result is not using the input_, # (maybe a view of it, because data address is same) - run_training_test_and_compare(model_builder, input_generator, label_input, ignore_grad_compare=True) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input, ignore_grad_compare=True) ########################################################################################## @@ -630,7 +631,7 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) def test_EvalTest(): @@ -736,7 +737,7 @@ def input_generator(): label_input = torch.ones([output_size]) # Test multi-input and multi-output custom function. - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) def test_InnerModuleCall(): @@ -860,9 +861,9 @@ def input_generator_with_requires_grad(): label_input = torch.ones([output_size]) # Test multi-input and multi-output custom function. - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) - run_training_test_and_compare(model_builder, input_generator_with_requires_grad, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator_with_requires_grad, label_input) def test_MultipleStream_InForwardFunction(): @@ -908,7 +909,7 @@ def input_generator(): label_input = torch.ones([output_size]) # Test multi-input and multi-output custom function. - run_training_test_and_compare( + _test_helpers.run_training_test_and_compare( model_builder, input_generator, label_input, expected_outputs=[torch.tensor([0.224, 0.272])] ) @@ -956,7 +957,7 @@ def input_generator(): label_input = torch.ones([output_size]) # Test multi-input and multi-output custom function. - run_training_test_and_compare( + _test_helpers.run_training_test_and_compare( model_builder, input_generator, label_input, expected_outputs=[torch.tensor([0.224, 0.272])] ) @@ -1003,7 +1004,7 @@ def input_generator(): label_input = torch.ones([output_size]) # Test multi-input and multi-output custom function. - run_training_test_and_compare( + _test_helpers.run_training_test_and_compare( model_builder, input_generator, label_input, expected_outputs=[torch.tensor([0.224, 0.272])] ) @@ -1052,7 +1053,7 @@ def input_generator(): label_input = torch.ones([output_size]) # Test multi-input and multi-output custom function. - run_training_test_and_compare( + _test_helpers.run_training_test_and_compare( model_builder, input_generator, label_input, expected_outputs=[torch.tensor([0.224, 0.272])] ) @@ -1333,4 +1334,4 @@ def input_generator(): return torch.randn(output_size, output_size, dtype=torch.float).requires_grad_() label_input = torch.ones([output_size]) - run_training_test_and_compare(model_builder, input_generator, label_input) + _test_helpers.run_training_test_and_compare(model_builder, input_generator, label_input) diff --git a/orttraining/tools/scripts/layer_norm_transform.py b/orttraining/tools/scripts/layer_norm_transform.py index 21b53ddfe44e7..92db4a5149e3b 100644 --- a/orttraining/tools/scripts/layer_norm_transform.py +++ b/orttraining/tools/scripts/layer_norm_transform.py @@ -3,7 +3,6 @@ import numpy as np import onnx -from onnx import * def find_node(graph_proto, op_type): @@ -18,10 +17,10 @@ def find_node(graph_proto, op_type): def gen_attribute(key, value): - attr = AttributeProto() + attr = onnx.AttributeProto() attr.name = key attr.ints.extend(int(v) for v in value) - attr.type = AttributeProto.INTS + attr.type = onnx.AttributeProto.INTS return attr @@ -121,7 +120,7 @@ def main(): layer_norm_output.append("saved_mean_" + str(id)) id = id + 1 layer_norm_output.append("saved_inv_std_var_" + str(id)) - layer_norm = helper.make_node( + layer_norm = onnx.helper.make_node( "LayerNormalization", layer_norm_input, layer_norm_output, From 049838f31a3784103455bb3ae5836d347aaeb6a3 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 00:43:55 +0000 Subject: [PATCH 26/33] undef --- .../python/transformers/test_parity_layernorm.py | 2 +- .../testdata/model_with_external_initializers.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/onnxruntime/test/python/transformers/test_parity_layernorm.py b/onnxruntime/test/python/transformers/test_parity_layernorm.py index e0ab8f4801e08..d0ddfe23fac73 100644 --- a/onnxruntime/test/python/transformers/test_parity_layernorm.py +++ b/onnxruntime/test/python/transformers/test_parity_layernorm.py @@ -12,7 +12,7 @@ import torch from torch import nn -if find_transformers_source(): +if parity_utilities.find_transformers_source(): from onnx_model import OnnxModel else: from onnxruntime.transformers.onnx_model import OnnxModel diff --git a/onnxruntime/test/testdata/model_with_external_initializers.py b/onnxruntime/test/testdata/model_with_external_initializers.py index 8b591549963fd..0413efc7e1017 100644 --- a/onnxruntime/test/testdata/model_with_external_initializers.py +++ b/onnxruntime/test/testdata/model_with_external_initializers.py @@ -1,14 +1,17 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import os +from typing import Any, List + import numpy as np import onnx -from onnx import TensorProto, helper +from onnx import helper from onnx.external_data_helper import set_external_data from onnx.numpy_helper import from_array -def create_external_data_tensor(value, tensor_name): # type: (List[Any], Text) -> TensorProto +def create_external_data_tensor(value, tensor_name): # type: (List[Any], str) -> onnx.TensorProto tensor = from_array(np.array(value)) tensor.name = tensor_name tensor_filename = "{}.bin".format(tensor_name) @@ -23,13 +26,13 @@ def create_external_data_tensor(value, tensor_name): # type: (List[Any], Text) def GenerateModel(model_name): # Create one input (ValueInfoProto) - X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 2]) + X = helper.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [1, 2]) # Create second input (ValueInfoProto) - Pads = helper.make_tensor_value_info("Pads", TensorProto.INT64, [4]) + Pads = helper.make_tensor_value_info("Pads", onnx.TensorProto.INT64, [4]) # Create one output (ValueInfoProto) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4]) + Y = helper.make_tensor_value_info("Y", onnx.TensorProto.FLOAT, [1, 4]) # Create a node (NodeProto) node_def = helper.make_node( From 5282648f23fcf6125cb87a4ce44f594e375463e6 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 00:56:00 +0000 Subject: [PATCH 27/33] Fix undefined vars --- onnxruntime/__init__.py | 2 ++ .../python/tools/tensorrt/perf/benchmark_wrapper.py | 1 + orttraining/orttraining/python/training/checkpoint.py | 2 +- .../orttraining/python/training/optim/lr_scheduler.py | 2 +- orttraining/orttraining/python/training/orttrainer.py | 2 +- .../test/python/orttraining_test_layer_norm_transform.py | 4 ++-- .../test/python/orttraining_test_ortmodule_autograd.py | 8 ++++---- .../orttraining/test/python/orttraining_test_utils.py | 5 +++-- 8 files changed, 15 insertions(+), 11 deletions(-) diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py index c5c5ecd6aeb4d..b22ee24787399 100644 --- a/onnxruntime/__init__.py +++ b/onnxruntime/__init__.py @@ -61,6 +61,8 @@ OrtValue, SparseTensor, ) + +# FIXME: Remove star imports from onnxruntime.capi.training import * # noqa: F403 # TODO: thiagofc: Temporary experimental namespace for new PyTorch front-end diff --git a/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py b/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py index 031cb293d0d78..c6f334de266b1 100644 --- a/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py +++ b/onnxruntime/python/tools/tensorrt/perf/benchmark_wrapper.py @@ -3,6 +3,7 @@ import pprint import re +# FIXME: Remove star imports from benchmark import * from perf_utils import * diff --git a/orttraining/orttraining/python/training/checkpoint.py b/orttraining/orttraining/python/training/checkpoint.py index d9f8a0904b877..2fbd402410016 100644 --- a/orttraining/orttraining/python/training/checkpoint.py +++ b/orttraining/orttraining/python/training/checkpoint.py @@ -423,7 +423,7 @@ def _aggregate_over_ranks( assert ( ranks[i] == rank_state_dict[_utils.state_dict_trainer_options_key()][world_rank] ), "Unexpected rank in file at path {}. Expected {}, got {}".format( - path, rank, rank_state_dict[_utils.state_dict_trainer_options_key()][world_rank] + path, ranks[i], rank_state_dict[_utils.state_dict_trainer_options_key()][world_rank] ) if loaded_mixed_precision is None: loaded_mixed_precision = rank_state_dict[_utils.state_dict_trainer_options_key()][mixed_precision] diff --git a/orttraining/orttraining/python/training/optim/lr_scheduler.py b/orttraining/orttraining/python/training/optim/lr_scheduler.py index cbe013d32f310..17b024693babd 100644 --- a/orttraining/orttraining/python/training/optim/lr_scheduler.py +++ b/orttraining/orttraining/python/training/optim/lr_scheduler.py @@ -276,7 +276,7 @@ def _warmup_poly(self, train_step_info): assert ( train_step_info.optimizer_config.lr > self.lr_end - ), f"lr_end ({lr_end}) must be be smaller than initial lr ({train_step_info.optimizer_config.lr})" + ), f"lr_end ({self.lr_end}) must be be smaller than initial lr ({train_step_info.optimizer_config.lr})" if train_step_info.optimization_step < self._num_warmup_steps: return float(train_step_info.optimization_step) / float(max(1, self._num_warmup_steps)) diff --git a/orttraining/orttraining/python/training/orttrainer.py b/orttraining/orttraining/python/training/orttrainer.py index ee9c59472bedd..8f7b0842f1ce6 100644 --- a/orttraining/orttraining/python/training/orttrainer.py +++ b/orttraining/orttraining/python/training/orttrainer.py @@ -729,7 +729,7 @@ def get_providers(provider_options): if gpu_ep_name not in providers: raise RuntimeError( "ORTTrainer options specify a CUDA device but the {} provider is unavailable.".format( - cuda_ep_name + gpu_ep_name ) ) diff --git a/orttraining/orttraining/test/python/orttraining_test_layer_norm_transform.py b/orttraining/orttraining/test/python/orttraining_test_layer_norm_transform.py index 241a963e28498..370fa52e6d2cd 100644 --- a/orttraining/orttraining/test/python/orttraining_test_layer_norm_transform.py +++ b/orttraining/orttraining/test/python/orttraining_test_layer_norm_transform.py @@ -14,10 +14,10 @@ def find_node(graph_proto, op_type): def gen_attribute(key, value): - attr = AttributeProto() + attr = onnx.AttributeProto() attr.name = key attr.ints.extend(int(v) for v in value) - attr.type = AttributeProto.INTS + attr.type = onnx.AttributeProto.INTS return attr diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py index e8a5be0d624d6..a760b1ed168a1 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_autograd.py @@ -672,8 +672,8 @@ def input_generator(): # generate a label that have same shape as forward output. label_input = torch.ones([output_size]) - # Test pure inferencing scenarios, when inputs don't requires_grad. - run_evaluate_test_and_compare(model_builder, input_generator, label_input) + # Test pure inference scenarios, when inputs don't requires_grad. + _test_helpers.run_evaluate_test_and_compare(model_builder, input_generator, label_input) @pytest.mark.skipif( @@ -803,12 +803,12 @@ def get_inner_module_call_result(x, device, use_ort): # Test indirect ORTModule call from custom function result_pth = get_inner_module_call_result(x.detach(), "cuda:0", False) result_ort = get_inner_module_call_result(x.detach(), "cuda:0", True) - compare_tensor_list(result_ort, result_pth) + _test_helpers.compare_tensor_list(result_ort, result_pth) # Test indirect ORTModule call from custom function result_ort = get_inner_module_call_result(x.detach(), "cpu", True) result_pth = get_inner_module_call_result(x.detach(), "cpu", False) - compare_tensor_list(result_ort, result_pth) + _test_helpers.compare_tensor_list(result_ort, result_pth) @pytest.mark.skipif( diff --git a/orttraining/orttraining/test/python/orttraining_test_utils.py b/orttraining/orttraining/test/python/orttraining_test_utils.py index db38139ea53b2..62dd3a4504a40 100644 --- a/orttraining/orttraining/test/python/orttraining_test_utils.py +++ b/orttraining/orttraining/test/python/orttraining_test_utils.py @@ -1,9 +1,10 @@ +import math + import torch from orttraining_test_data_loader import BatchArgsOption, create_ort_test_dataloader, split_batch from onnxruntime.capi.ort_trainer import IODescription, ORTTrainer -from onnxruntime.training import amp, optim, orttrainer -from onnxruntime.training import orttrainer_options as orttrainer_options +from onnxruntime.training import amp, optim, orttrainer, orttrainer_options from onnxruntime.training.optim import _LRScheduler From 32041c95c43e48c3a32e8d0658801492c675ac3f Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 01:01:01 +0000 Subject: [PATCH 28/33] Remove unused --- .../generate_tiny_keras2onnx_bert_models.py | 1 - .../orttraining/test/python/orttraining_test_debuggability.py | 1 - .../orttraining/test/python/orttraining_test_ortmodule_api.py | 2 -- .../test/python/orttraining_test_orttrainer_bert_toy_onnx.py | 1 - .../test/python/orttraining_test_orttrainer_frontend.py | 3 +-- .../orttraining/test/python/orttraining_transformer_trainer.py | 1 - 6 files changed, 1 insertion(+), 8 deletions(-) diff --git a/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py b/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py index 2a2f17852652d..0ffd70d01815d 100644 --- a/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py +++ b/onnxruntime/test/python/transformers/test_data/bert_squad_tensorflow2.1_keras2onnx_opset11/generate_tiny_keras2onnx_bert_models.py @@ -301,7 +301,6 @@ def use_dynamic_axes(self, dynamic_batch_dim="batch_size", seq_len=7): """ Update input and output shape to use dynamic axes. """ - {} for input in self.model.graph.input: dim_proto = input.type.tensor_type.shape.dim[0] dim_proto.dim_param = dynamic_batch_dim diff --git a/orttraining/orttraining/test/python/orttraining_test_debuggability.py b/orttraining/orttraining/test/python/orttraining_test_debuggability.py index 44c06287ffa88..499f0ba7a1ff5 100644 --- a/orttraining/orttraining/test/python/orttraining_test_debuggability.py +++ b/orttraining/orttraining/test/python/orttraining_test_debuggability.py @@ -4,7 +4,6 @@ from onnxruntime import set_seed from onnxruntime.training import optim, orttrainer -from onnxruntime.training import orttrainer_options as orttrainer_options ############################################################################### # Testing starts here ######################################################### diff --git a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py index 146f7724430e5..adbf7da6fd9f2 100644 --- a/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py +++ b/orttraining/orttraining/test/python/orttraining_test_ortmodule_api.py @@ -4242,7 +4242,6 @@ def __init__(self): self.dummy = torch.nn.Parameter(torch.FloatTensor([0])) def forward(self, batch): - batch[0] b = batch[1] return self.dummy + b @@ -4497,7 +4496,6 @@ def __init__(self): self.dummy = torch.nn.Parameter(torch.FloatTensor([0])) def forward(self, batch): - batch["b"] a = batch["a"] return self.dummy + a diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py index 5795594312011..feabea05b8e79 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_bert_toy_onnx.py @@ -14,7 +14,6 @@ from onnxruntime.capi.ort_trainer import ModelDescription as Legacy_ModelDescription from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer from onnxruntime.training import amp, optim, orttrainer -from onnxruntime.training import orttrainer_options as orttrainer_options ############################################################################### # Helper functions ############################################################ diff --git a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py index 6517ca2b6b455..7d788d1308cd9 100644 --- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py +++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py @@ -17,8 +17,7 @@ from onnxruntime.capi.ort_trainer import ORTTrainer as Legacy_ORTTrainer from onnxruntime.training import PropagateCastOpsStrategy, TrainStepInfo, _utils, amp from onnxruntime.training import model_desc_validation as md_val -from onnxruntime.training import optim, orttrainer -from onnxruntime.training import orttrainer_options as orttrainer_options +from onnxruntime.training import optim, orttrainer, orttrainer_options ############################################################################### # Testing starts here ######################################################### diff --git a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py index 6da43001c80dc..b80c3ef5bba38 100644 --- a/orttraining/orttraining/test/python/orttraining_transformer_trainer.py +++ b/orttraining/orttraining/test/python/orttraining_transformer_trainer.py @@ -19,7 +19,6 @@ import onnxruntime from onnxruntime.training import amp, optim, orttrainer -from onnxruntime.training import orttrainer_options as orttrainer_options try: from torch.utils.tensorboard import SummaryWriter From 711de92353874929d0d4af7c58047a7768becade Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 01:10:23 +0000 Subject: [PATCH 29/33] codeql --- orttraining/orttraining/test/python/orttraining_test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orttraining/orttraining/test/python/orttraining_test_utils.py b/orttraining/orttraining/test/python/orttraining_test_utils.py index 62dd3a4504a40..af1022c4f0e6b 100644 --- a/orttraining/orttraining/test/python/orttraining_test_utils.py +++ b/orttraining/orttraining/test/python/orttraining_test_utils.py @@ -4,7 +4,7 @@ from orttraining_test_data_loader import BatchArgsOption, create_ort_test_dataloader, split_batch from onnxruntime.capi.ort_trainer import IODescription, ORTTrainer -from onnxruntime.training import amp, optim, orttrainer, orttrainer_options +from onnxruntime.training import amp, optim, orttrainer from onnxruntime.training.optim import _LRScheduler From baf25880e17e80e51e215ee7952af83c57b376c7 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 01:34:55 +0000 Subject: [PATCH 30/33] rename to optional --- .github/workflows/lint.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index de733e2637296..9f8209ff78f59 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -8,8 +8,8 @@ on: pull_request: jobs: - lint-python: - name: Lint Python + optional-lint: + name: Optional Lint runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 From 5d1ef153c8d0ef59a3b7daf186f4aa653bc20040 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Tue, 17 Jan 2023 07:44:54 -0800 Subject: [PATCH 31/33] Delete ReformatSourcePython.bat --- onnxruntime/ReformatSourcePython.bat | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 onnxruntime/ReformatSourcePython.bat diff --git a/onnxruntime/ReformatSourcePython.bat b/onnxruntime/ReformatSourcePython.bat deleted file mode 100644 index ca92964b934ac..0000000000000 --- a/onnxruntime/ReformatSourcePython.bat +++ /dev/null @@ -1,14 +0,0 @@ -:: Copyright (c) Microsoft Corporation. All rights reserved. -:: Licensed under the MIT License. - -:: Before running this, please make sure python.exe is in path, and black is installed like the following -:: pip install --upgrade black isort - -:: For more info about black, see https://github.com/psf/black - -python -m isort ./python -python -m isort ./test -python -m black ./python -python -m black ./test - -if errorlevel 1 echo please install python, then pip install --upgrade black isort From 6ee3cf2fe39fcea15db8d6e24afc75efb6ab19c5 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Thu, 16 Feb 2023 14:18:05 -0800 Subject: [PATCH 32/33] Update .lintrunner.toml --- .lintrunner.toml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.lintrunner.toml b/.lintrunner.toml index 41a65bfebfaf2..bfa0276e5cfce 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -14,7 +14,7 @@ # To lint local changes: # # ```bash -# lintrunner +# lintrunner -m main # ``` # # To lint all files: @@ -26,7 +26,7 @@ # To format files: # # ```bash -# lintrunner -a --all-files +# lintrunner f --all-files # ``` # # To read more about lintrunner, see [wiki](https://github.com/pytorch/pytorch/wiki/lintrunner). @@ -51,8 +51,6 @@ exclude_patterns = [ 'onnxruntime/test/**', # TODO enable 'orttraining/**', - # FIXME(#7032): ignore server code for now - 'server/**', # FIXME: DUO106 'tools/nuget/generate_nuspec_for_native_nuget.py', # FIXME: DUO116 From d34e7904c6f635e484a1441a8593083cf2f13103 Mon Sep 17 00:00:00 2001 From: Justin Chu Date: Thu, 16 Feb 2023 14:18:55 -0800 Subject: [PATCH 33/33] Update onnxruntime/python/tools/symbolic_shape_infer.py --- onnxruntime/python/tools/symbolic_shape_infer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onnxruntime/python/tools/symbolic_shape_infer.py b/onnxruntime/python/tools/symbolic_shape_infer.py index ed92677e22cdd..d504efa6cc58a 100755 --- a/onnxruntime/python/tools/symbolic_shape_infer.py +++ b/onnxruntime/python/tools/symbolic_shape_infer.py @@ -1056,7 +1056,6 @@ def _infer_GatherND(self, node): data_shape = self._get_shape(node, 0) data_rank = len(data_shape) indices_shape = self._get_shape(node, 1) - len(indices_shape) last_index_dimension = indices_shape[-1] assert is_literal(last_index_dimension) and last_index_dimension <= data_rank new_shape = indices_shape[:-1] + data_shape[last_index_dimension:]