From 2ac5a46d801a92f03004fa00fb96bbc27a8e2653 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Tue, 8 Oct 2024 14:52:10 +0800 Subject: [PATCH] lang/rust: Migrate to avro-rs (#3199) --- .asf.yaml | 1 - .devcontainer/devcontainer.json | 6 - .github/dependabot.yml | 6 - .github/labeler.yml | 3 - .github/workflows/test-lang-rust-audit.yml | 73 - .github/workflows/test-lang-rust-ci-ARM.yml | 78 - .github/workflows/test-lang-rust-ci.yml | 254 - .github/workflows/test-lang-rust-clippy.yml | 57 - BUILD.md | 1 - DIST_README.txt | 2 +- README.md | 16 - build.sh | 5 - lang/rust/.activate.sh | 20 - lang/rust/.cargo-rdme.toml | 19 - lang/rust/.deactivate.sh | 22 - lang/rust/.gitignore | 8 - lang/rust/.pre-commit-config.yaml | 33 - lang/rust/.requirements-precommit.txt | 1 - lang/rust/CHANGELOG.md | 166 - lang/rust/Cargo.lock | 1735 ----- lang/rust/Cargo.toml | 51 - lang/rust/LICENSE | 202 - lang/rust/Makefile | 105 - lang/rust/README.md | 47 +- lang/rust/README.tpl | 19 - lang/rust/avro/Cargo.toml | 99 - lang/rust/avro/README.md | 743 -- lang/rust/avro/benches/quickstop-null.avro | Bin 166053 -> 0 bytes lang/rust/avro/benches/serde.rs | 335 - lang/rust/avro/benches/serde_json.rs | 79 - lang/rust/avro/benches/single.rs | 193 - lang/rust/avro/examples/benchmark.rs | 155 - .../avro/examples/generate_interop_data.rs | 112 - lang/rust/avro/examples/test_interop_data.rs | 80 - .../test_interop_single_object_encoding.rs | 75 - lang/rust/avro/examples/to_value.rs | 29 - lang/rust/avro/src/bigdecimal.rs | 200 - lang/rust/avro/src/bytes.rs | 684 -- lang/rust/avro/src/codec.rs | 366 - lang/rust/avro/src/de.rs | 1559 ---- lang/rust/avro/src/decimal.rs | 146 - lang/rust/avro/src/decode.rs | 912 --- lang/rust/avro/src/duration.rs | 145 - lang/rust/avro/src/encode.rs | 919 --- lang/rust/avro/src/error.rs | 567 -- lang/rust/avro/src/lib.rs | 1068 --- lang/rust/avro/src/rabin.rs | 164 - lang/rust/avro/src/reader.rs | 1030 --- lang/rust/avro/src/schema.rs | 6823 ----------------- lang/rust/avro/src/schema_compatibility.rs | 1774 ----- lang/rust/avro/src/schema_equality.rs | 590 -- lang/rust/avro/src/ser.rs | 1041 --- lang/rust/avro/src/types.rs | 3222 -------- lang/rust/avro/src/util.rs | 288 - lang/rust/avro/src/validator.rs | 318 - lang/rust/avro/src/writer.rs | 1447 ---- lang/rust/avro/tests/append_to_existing.rs | 111 - lang/rust/avro/tests/avro-3786.rs | 886 --- lang/rust/avro/tests/avro-3787.rs | 279 - lang/rust/avro/tests/big_decimal.rs | 23 - lang/rust/avro/tests/bigdec.avro | Bin 189 -> 0 bytes lang/rust/avro/tests/codecs.rs | 89 - lang/rust/avro/tests/io.rs | 476 -- lang/rust/avro/tests/schema.rs | 2019 ----- lang/rust/avro/tests/shared.rs | 150 - .../avro/tests/to_from_avro_datum_schemata.rs | 88 - lang/rust/avro/tests/union_schema.rs | 342 - lang/rust/avro/tests/uuids.rs | 26 - lang/rust/avro/tests/validators.rs | 85 - lang/rust/avro_derive/Cargo.toml | 49 - lang/rust/avro_derive/README.md | 69 - lang/rust/avro_derive/src/lib.rs | 650 -- lang/rust/avro_derive/tests/derive.rs | 1598 ---- lang/rust/avro_test_helper/Cargo.toml | 38 - lang/rust/avro_test_helper/README.md | 51 - lang/rust/avro_test_helper/src/data.rs | 636 -- lang/rust/avro_test_helper/src/lib.rs | 71 - lang/rust/avro_test_helper/src/logger.rs | 96 - lang/rust/build.sh | 69 - lang/rust/deny.toml | 124 - lang/rust/fuzz/.gitignore | 4 - lang/rust/fuzz/Cargo.toml | 49 - .../fuzz/corpus/roundtrip/infinite_iteration | Bin 132 -> 0 bytes .../corpus/roundtrip/negate_with_overflow | Bin 17 -> 0 bytes .../fuzz/corpus/roundtrip/unchecked_resize | Bin 58 -> 0 bytes lang/rust/fuzz/fuzz_targets/roundtrip.rs | 117 - lang/rust/migration_guide.md | 107 - lang/rust/rustfmt.toml | 19 - lang/rust/wasm-demo/Cargo.toml | 44 - lang/rust/wasm-demo/README.md | 28 - lang/rust/wasm-demo/src/lib.rs | 16 - lang/rust/wasm-demo/tests/demos.rs | 86 - pom.xml | 6 +- 93 files changed, 7 insertions(+), 36587 deletions(-) delete mode 100644 .github/workflows/test-lang-rust-audit.yml delete mode 100644 .github/workflows/test-lang-rust-ci-ARM.yml delete mode 100644 .github/workflows/test-lang-rust-ci.yml delete mode 100644 .github/workflows/test-lang-rust-clippy.yml delete mode 100644 lang/rust/.activate.sh delete mode 100644 lang/rust/.cargo-rdme.toml delete mode 100644 lang/rust/.deactivate.sh delete mode 100644 lang/rust/.gitignore delete mode 100644 lang/rust/.pre-commit-config.yaml delete mode 100644 lang/rust/.requirements-precommit.txt delete mode 100644 lang/rust/CHANGELOG.md delete mode 100644 lang/rust/Cargo.lock delete mode 100644 lang/rust/Cargo.toml delete mode 100644 lang/rust/LICENSE delete mode 100644 lang/rust/Makefile delete mode 100644 lang/rust/README.tpl delete mode 100644 lang/rust/avro/Cargo.toml delete mode 100644 lang/rust/avro/README.md delete mode 100644 lang/rust/avro/benches/quickstop-null.avro delete mode 100644 lang/rust/avro/benches/serde.rs delete mode 100644 lang/rust/avro/benches/serde_json.rs delete mode 100644 lang/rust/avro/benches/single.rs delete mode 100644 lang/rust/avro/examples/benchmark.rs delete mode 100644 lang/rust/avro/examples/generate_interop_data.rs delete mode 100644 lang/rust/avro/examples/test_interop_data.rs delete mode 100644 lang/rust/avro/examples/test_interop_single_object_encoding.rs delete mode 100644 lang/rust/avro/examples/to_value.rs delete mode 100644 lang/rust/avro/src/bigdecimal.rs delete mode 100644 lang/rust/avro/src/bytes.rs delete mode 100644 lang/rust/avro/src/codec.rs delete mode 100644 lang/rust/avro/src/de.rs delete mode 100644 lang/rust/avro/src/decimal.rs delete mode 100644 lang/rust/avro/src/decode.rs delete mode 100644 lang/rust/avro/src/duration.rs delete mode 100644 lang/rust/avro/src/encode.rs delete mode 100644 lang/rust/avro/src/error.rs delete mode 100644 lang/rust/avro/src/lib.rs delete mode 100644 lang/rust/avro/src/rabin.rs delete mode 100644 lang/rust/avro/src/reader.rs delete mode 100644 lang/rust/avro/src/schema.rs delete mode 100644 lang/rust/avro/src/schema_compatibility.rs delete mode 100644 lang/rust/avro/src/schema_equality.rs delete mode 100644 lang/rust/avro/src/ser.rs delete mode 100644 lang/rust/avro/src/types.rs delete mode 100644 lang/rust/avro/src/util.rs delete mode 100644 lang/rust/avro/src/validator.rs delete mode 100644 lang/rust/avro/src/writer.rs delete mode 100644 lang/rust/avro/tests/append_to_existing.rs delete mode 100644 lang/rust/avro/tests/avro-3786.rs delete mode 100644 lang/rust/avro/tests/avro-3787.rs delete mode 100644 lang/rust/avro/tests/big_decimal.rs delete mode 100644 lang/rust/avro/tests/bigdec.avro delete mode 100644 lang/rust/avro/tests/codecs.rs delete mode 100644 lang/rust/avro/tests/io.rs delete mode 100644 lang/rust/avro/tests/schema.rs delete mode 100644 lang/rust/avro/tests/shared.rs delete mode 100644 lang/rust/avro/tests/to_from_avro_datum_schemata.rs delete mode 100644 lang/rust/avro/tests/union_schema.rs delete mode 100644 lang/rust/avro/tests/uuids.rs delete mode 100644 lang/rust/avro/tests/validators.rs delete mode 100644 lang/rust/avro_derive/Cargo.toml delete mode 100644 lang/rust/avro_derive/README.md delete mode 100644 lang/rust/avro_derive/src/lib.rs delete mode 100644 lang/rust/avro_derive/tests/derive.rs delete mode 100644 lang/rust/avro_test_helper/Cargo.toml delete mode 100644 lang/rust/avro_test_helper/README.md delete mode 100644 lang/rust/avro_test_helper/src/data.rs delete mode 100644 lang/rust/avro_test_helper/src/lib.rs delete mode 100644 lang/rust/avro_test_helper/src/logger.rs delete mode 100755 lang/rust/build.sh delete mode 100644 lang/rust/deny.toml delete mode 100644 lang/rust/fuzz/.gitignore delete mode 100644 lang/rust/fuzz/Cargo.toml delete mode 100644 lang/rust/fuzz/corpus/roundtrip/infinite_iteration delete mode 100644 lang/rust/fuzz/corpus/roundtrip/negate_with_overflow delete mode 100644 lang/rust/fuzz/corpus/roundtrip/unchecked_resize delete mode 100644 lang/rust/fuzz/fuzz_targets/roundtrip.rs delete mode 100644 lang/rust/migration_guide.md delete mode 100644 lang/rust/rustfmt.toml delete mode 100644 lang/rust/wasm-demo/Cargo.toml delete mode 100644 lang/rust/wasm-demo/README.md delete mode 100644 lang/rust/wasm-demo/src/lib.rs delete mode 100644 lang/rust/wasm-demo/tests/demos.rs diff --git a/.asf.yaml b/.asf.yaml index 3f81aa7ee94..00662352a5e 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -33,7 +33,6 @@ github: - php - python - ruby - - rust enabled_merge_buttons: merge: false diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index bb261cfd8c1..2196d458ce5 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -16,12 +16,6 @@ "ms-vscode.cpptools", // C# "ms-dotnettools.csharp", - // Rust - "vadimcn.vscode-lldb", - "mutantdino.resourcemonitor", - "matklad.rust-analyzer", - "tamasfe.even-better-toml", - "serayuzgur.crates", // Java "vscjava.vscode-java-pack", // Shell script diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 14d2f29b768..b11db2b271d 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -79,12 +79,6 @@ updates: day: "sunday" open-pull-requests-limit: 20 - - package-ecosystem: "cargo" - directory: "/lang/rust/" - schedule: - interval: "daily" - open-pull-requests-limit: 20 - - package-ecosystem: "github-actions" directory: "/" schedule: diff --git a/.github/labeler.yml b/.github/labeler.yml index d694c7e6d65..a6184f05386 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -45,9 +45,6 @@ Python: Ruby: - changed-files: - any-glob-to-any-file: "lang/ruby/**/*" -Rust: - - changed-files: - - any-glob-to-any-file: "lang/rust/**/*" build: - changed-files: - any-glob-to-any-file: ["**/*Dockerfile*", "**/*.sh", "**/*pom.xml", ".github/**/*"] diff --git a/.github/workflows/test-lang-rust-audit.yml b/.github/workflows/test-lang-rust-audit.yml deleted file mode 100644 index d4bbd4b6931..00000000000 --- a/.github/workflows/test-lang-rust-audit.yml +++ /dev/null @@ -1,73 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Rust Security Audit -on: - workflow_dispatch: - push: - branches: [ main ] - paths: - - .github/workflows/test-lang-rust-audit.yml - - lang/rust/**/Cargo.toml - - lang/rust/Cargo.lock - - lang/rust/deny.toml - pull_request: - branches: [ main ] - paths: - - .github/workflows/test-lang-rust-audit.yml - - lang/rust/**/Cargo.toml - - lang/rust/Cargo.lock - - lang/rust/deny.toml - -permissions: - contents: read - -env: - RUSTFLAGS: -Dwarnings - -defaults: - run: - working-directory: lang/rust - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - audit: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Dependency Review - if: github.event_name == 'pull_request' - uses: actions/dependency-review-action@v4 - - name: Install Cargo Audit - run: cargo install cargo-audit - - name: Audit - run: cargo audit - - deny: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Install Cargo Deny - run: cargo install cargo-deny - - name: Check - run: cargo deny check - \ No newline at end of file diff --git a/.github/workflows/test-lang-rust-ci-ARM.yml b/.github/workflows/test-lang-rust-ci-ARM.yml deleted file mode 100644 index 3ac8e0bfdd8..00000000000 --- a/.github/workflows/test-lang-rust-ci-ARM.yml +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: 'Rust Continuous Integration on ARM' -on: - workflow_dispatch: - push: - branches: [ main ] - pull_request: - branches: [ main ] - paths: - - .github/workflows/test-lang-rust-ci.yml - - lang/rust/** - -permissions: - contents: read - -env: - RUSTFLAGS: -Dwarnings - -defaults: - run: - working-directory: lang/rust - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - arm64: - name: Rust on Linux ARM64 - runs-on: ["self-hosted", "asf-arm"] - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Cache Cargo - uses: actions/cache@v4 - with: - # these represent dependencies downloaded by cargo - # and thus do not depend on the OS, arch nor rust version. - path: ~/.cargo - key: ${{ runner.os }}-target-arm64-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Rust dependencies - uses: actions/cache@v4 - with: - # these represent compiled steps of both dependencies and avro - # and thus are specific for a particular OS, arch and rust version. - path: lang/rust/target - key: ${{ runner.os }}-target-cache1-stable- - - - name: Rust Toolchain - uses: dtolnay/rust-toolchain@nightly - with: - toolchain: stable - components: rustfmt - targets: x86_64-unknown-linux-gnu - - - name: Build - run: | - set -x - ./build.sh test diff --git a/.github/workflows/test-lang-rust-ci.yml b/.github/workflows/test-lang-rust-ci.yml deleted file mode 100644 index bdea0be9bc3..00000000000 --- a/.github/workflows/test-lang-rust-ci.yml +++ /dev/null @@ -1,254 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Rust Continuous Integration -on: - workflow_dispatch: - push: - branches: [ main ] - pull_request: - branches: [ main ] - paths: - - .github/workflows/test-lang-rust-ci.yml - - lang/rust/** - -permissions: - contents: read - -env: - RUSTFLAGS: -Dwarnings - -defaults: - run: - working-directory: lang/rust - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - ci: - runs-on: ubuntu-latest - strategy: - matrix: - rust: - - 'stable' - - 'beta' - - 'nightly' - - '1.73.0' # MSRV - target: - - x86_64-unknown-linux-gnu - - wasm32-unknown-unknown - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Cache Cargo - uses: actions/cache@v4 - with: - # these represent dependencies downloaded by cargo - # and thus do not depend on the OS, arch nor rust version. - path: ~/.cargo - key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }} - - name: Cache Rust dependencies - uses: actions/cache@v4 - with: - # these represent compiled steps of both dependencies and avro - # and thus are specific for a particular OS, arch and rust version. - path: lang/rust/target - key: ${{ runner.os }}-target-cache1-${{ matrix.rust }}-${{ hashFiles('**/Cargo.lock') }} - - - name: Rust Toolchain - uses: dtolnay/rust-toolchain@nightly - with: - toolchain: ${{ matrix.rust }} - components: rustfmt - targets: ${{ matrix.target }} - - - name: Cache cargo-rdme - if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu' - uses: actions/cache@v4 - with: - path: ~/.cargo-${{ matrix.rust }}/cargo-rdme - key: cargo-rdme- - - # Check if the doc cumment in avro/src/lib.rs and avro/README.md are in sync. - - name: Run cargo-rdme - # The result is environment independent so one test pattern is enough. - if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu' - run: | - cargo install --root ~/.cargo-${{ matrix.rust }}/cargo-rdme --locked cargo-rdme - export PATH=$PATH:~/.cargo-${{ matrix.rust }}/cargo-rdme/bin - cargo rdme --check - - - name: Rust Format - if: matrix.target != 'wasm32-unknown-unknown' - run: cargo fmt --all -- --check - - - name: Rust Build - run: cargo build --all-features --all-targets - - - name: Rust Test - if: matrix.target != 'wasm32-unknown-unknown' - run: cargo test --all-features --target ${{ matrix.target }} - - - name: Rust Test AVRO-3549 - if: matrix.target != 'wasm32-unknown-unknown' - run: cargo test --target ${{ matrix.target }} test_avro_3549_read_not_enabled_codec - - # because of https://github.com/rust-lang/cargo/issues/6669 - - name: Rust Test docs - if: matrix.target != 'wasm32-unknown-unknown' - run: cargo test --doc - - interop: - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Rust Toolchain - uses: dtolnay/rust-toolchain@nightly - with: - toolchain: stable - - - name: Cache Cargo - uses: actions/cache@v4 - with: - # these represent dependencies downloaded by cargo - # and thus do not depend on the OS, arch nor rust version. - path: ~/.cargo - key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }} - - name: Cache Rust dependencies - uses: actions/cache@v4 - with: - # these represent compiled steps of both dependencies and avro - # and thus are specific for a particular OS, arch and rust version. - path: lang/rust/target - key: ${{ runner.os }}-target-cache1-stable-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Local Maven Repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-maven- - - - name: 'Setup Temurin JDK 8, 11, 17 & 21' - uses: actions/setup-java@6a0805fcefea3d4657a47ac4c165951e33482018 # v4.2.2 - with: - distribution: 'temurin' - java-version: | - 8 - 11 - 17 - 21 - - - name: 'Setup Maven 3.9.6' - uses: stCarolas/setup-maven@d6af6abeda15e98926a57b5aa970a96bb37f97d1 # v5 - with: - maven-version: 3.9.6 - - - name: Install Java Avro for Interop Test - working-directory: . - run: mvn -B install -PskipQuality - - - name: Create Interop Data Directory - working-directory: . - run: mkdir -p build/interop/data - - - name: Generate Interop Resources - working-directory: lang/java/avro - run: mvn -B -P interop-data-generate generate-resources - - - name: Generate interop data - run: ./build.sh interop-data-generate - - - name: Rust reads interop files created by Java and Rust - run: ./build.sh interop-data-test - - - uses: shogo82148/actions-setup-perl@v1 - with: - perl-version: 5.32 - - - name: Install Dependencies - run: | - sudo apt-get -qqy install --no-install-recommends libcompress-raw-zlib-perl \ - libcpan-uploader-perl \ - libencode-perl \ - libio-string-perl \ - libjansson-dev \ - libjson-xs-perl \ - libmodule-install-perl \ - libmodule-install-readmefrompod-perl \ - libobject-tiny-perl \ - libsnappy-dev \ - libtest-exception-perl \ - libtest-pod-perl - cpanm --mirror https://www.cpan.org/ install Compress::Zstd \ - Error::Simple \ - Module::Install::Repository \ - Object::Tiny \ - Regexp::Common \ - Try::Tiny \ - inc::Module::Install - - - - name: Perl reads interop files created by Java and Rust - working-directory: lang/perl - run: ./build.sh interop-data-test - - web-assembly: - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Rust Toolchain - uses: dtolnay/rust-toolchain@nightly - with: - toolchain: stable - targets: wasm32-unknown-unknown - - - name: Cache Cargo - uses: actions/cache@v4 - with: - # these represent dependencies downloaded by cargo - # and thus do not depend on the OS, arch nor rust version. - path: ~/.cargo - key: ${{ runner.os }}-target-cache1-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Rust dependencies - uses: actions/cache@v4 - with: - # these represent compiled steps of both dependencies and avro - # and thus are specific for a particular OS, arch and rust version. - path: lang/rust/target - key: ${{ runner.os }}-target-cache1-stable-${{ hashFiles('**/Cargo.lock') }} - - - name: Install wasm-pack - run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - - - name: Build the Web Assembly demo app - run: wasm-pack build wasm-demo - - - name: Test the Web Assembly demo app - run: RUST_BACKTRACE=1 wasm-pack test --headless --firefox wasm-demo diff --git a/.github/workflows/test-lang-rust-clippy.yml b/.github/workflows/test-lang-rust-clippy.yml deleted file mode 100644 index e7e0ba85448..00000000000 --- a/.github/workflows/test-lang-rust-clippy.yml +++ /dev/null @@ -1,57 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -name: Rust Clippy Check -on: - workflow_dispatch: - push: - branches: [ main ] - pull_request: - branches: [ main ] - paths: - - .github/workflows/test-lang-rust-clippy.yml - - lang/rust/** - -permissions: - contents: read - -env: - RUSTFLAGS: -Dwarnings - -defaults: - run: - working-directory: lang/rust - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - clippy_check: - runs-on: ubuntu-latest - strategy: - matrix: - rust: - - 'stable' - - '1.73.0' # MSRV - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@nightly - with: - toolchain: ${{ matrix.rust }} - components: clippy - - run: cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports diff --git a/BUILD.md b/BUILD.md index 2c3a192899c..5943abefe6a 100644 --- a/BUILD.md +++ b/BUILD.md @@ -17,7 +17,6 @@ The following packages must be installed before Avro can be built: Math::BigInt, JSON::MaybeXS, Try::Tiny, Regexp::Common, Encode, Object::Tiny, Compress::ZLib, Error::Simple, Test::More, Test::Exception, Test::Pod - - Rust: rustc and Cargo 1.73.0 or greater - Apache Ant 1.7 - md5sum, sha1sum, used by top-level dist target diff --git a/DIST_README.txt b/DIST_README.txt index 9c68790ee86..279909f483e 100644 --- a/DIST_README.txt +++ b/DIST_README.txt @@ -9,6 +9,6 @@ This distribution contains the following files: - avro-doc-x.y.z.tar.gz contains Avro's pre-built documentation. - - the c/, cpp/, csharp/, java/, js/, perl/, php/, py/, rust/ and ruby/ + - the c/, cpp/, csharp/, java/, js/, perl/, php/, py/ and ruby/ subdirectories contain pre-built, language-specific binaries, bundles, etc. as conveniences. diff --git a/README.md b/README.md index 517a15c7027..7954d27015c 100644 --- a/README.md +++ b/README.md @@ -17,10 +17,6 @@ [![test python][test python img]][test python] [![test php][test php img]][test php] -[![rust continuous integration][rust continuous integration img]][rust continuous integration] -[![rust clippy check][rust clippy check img]][rust clippy check] -[![rust security audit][rust security audit img]][rust security audit] - ### Current CI status (ARM based servers) [![test c ARM][test c ARM img]][test c ARM] [![test c# ARM][test c# ARM img]][test c# ARM] @@ -31,7 +27,6 @@ [![test ruby ARM][test ruby ARM img]][test ruby ARM] [![test python ARM][test python ARM img]][test python ARM] [![test php ARM][test php ARM img]][test php ARM] -[![rust continuous integration ARM][rust continuous integration ARM img]][rust continuous integration ARM] ### Current CodeQL status [![codeql c#][codeql c# img]][codeql c#] @@ -74,11 +69,6 @@ To contribute to Avro, please read: [test python ARM]: https://github.com/apache/avro/actions/workflows/test-lang-py-ARM.yml [test php ARM]: https://github.com/apache/avro/actions/workflows/test-lang-php-ARM.yml -[rust continuous integration]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml -[rust continuous integration ARM]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci-ARM.yml -[rust clippy check]: https://github.com/apache/avro/actions/workflows/test-lang-rust-clippy.yml -[rust security audit]: https://github.com/apache/avro/actions/workflows/test-lang-rust-audit.yml - [codeql c#]: https://github.com/apache/avro/actions/workflows/codeql-csharp-analysis.yml [codeql java]: https://github.com/apache/avro/actions/workflows/codeql-java-analysis.yml [codeql javascript]: https://github.com/apache/avro/actions/workflows/codeql-js-analysis.yml @@ -104,12 +94,6 @@ To contribute to Avro, please read: [test python ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-py-ARM.yml/badge.svg [test php ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-php-ARM.yml/badge.svg -[rust continuous integration img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml/badge.svg -[rust clippy check img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-clippy.yml/badge.svg -[rust security audit img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-audit.yml/badge.svg - -[rust continuous integration ARM img]: https://github.com/apache/avro/actions/workflows/test-lang-rust-ci-ARM.yml/badge.svg - [codeql c# img]: https://github.com/apache/avro/actions/workflows/codeql-csharp-analysis.yml/badge.svg [codeql java img]: https://github.com/apache/avro/actions/workflows/codeql-java-analysis.yml/badge.svg [codeql javascript img]: https://github.com/apache/avro/actions/workflows/codeql-js-analysis.yml/badge.svg diff --git a/build.sh b/build.sh index 0f31805a63e..26a87b1a3ef 100755 --- a/build.sh +++ b/build.sh @@ -107,7 +107,6 @@ do (cd lang/ruby; ./build.sh lint test) (cd lang/php; ./build.sh lint test) (cd lang/perl; ./build.sh lint test) - (cd lang/rust; ./build.sh lint test) (cd lang/py; ./build.sh interop-data-generate) (cd lang/c; ./build.sh interop-data-generate) @@ -176,7 +175,6 @@ do (cd lang/js; ./build.sh dist) (cd lang/ruby; ./build.sh dist) (cd lang/php; ./build.sh dist) - (cd lang/rust; ./build.sh dist) mkdir -p dist/perl (cd lang/perl; ./build.sh dist) @@ -252,7 +250,6 @@ do (cd lang/perl; ./build.sh clean) - (cd lang/rust; ./build.sh clean) ;; veryclean) @@ -280,8 +277,6 @@ do (cd lang/perl; ./build.sh clean) - (cd lang/rust; ./build.sh clean) - rm -rf lang/c++/build rm -rf lang/js/node_modules rm -rf lang/perl/inc/ diff --git a/lang/rust/.activate.sh b/lang/rust/.activate.sh deleted file mode 100644 index 9ff85b2fa8b..00000000000 --- a/lang/rust/.activate.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -make install-hooks diff --git a/lang/rust/.cargo-rdme.toml b/lang/rust/.cargo-rdme.toml deleted file mode 100644 index 3f27313be86..00000000000 --- a/lang/rust/.cargo-rdme.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -readme-path = "avro/README.md" -workspace-project = "apache-avro" \ No newline at end of file diff --git a/lang/rust/.deactivate.sh b/lang/rust/.deactivate.sh deleted file mode 100644 index 1b80d592d09..00000000000 --- a/lang/rust/.deactivate.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -if [[ "$VIRTUAL_ENV" != "" ]]; then - deactivate -fi diff --git a/lang/rust/.gitignore b/lang/rust/.gitignore deleted file mode 100644 index 875c6ff7096..00000000000 --- a/lang/rust/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -/target/ -**/*.rs.bk -*.swp -.idea/ -*.iml -precommit_venv/ -fleet.toml -**/.cargo/config.toml diff --git a/lang/rust/.pre-commit-config.yaml b/lang/rust/.pre-commit-config.yaml deleted file mode 100644 index 1bf8aad4fbf..00000000000 --- a/lang/rust/.pre-commit-config.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -- repo: local - hooks: - - id: rust-linting - name: Rust linting - description: Run cargo fmt on files included in the commit. rustfmt should be installed before-hand. - entry: cargo fmt --all -- - pass_filenames: true - types: [file, rust] - language: system - - id: rust-clippy - name: Rust clippy - description: Run cargo clippy on files included in the commit. clippy should be installed before-hand. - entry: cargo clippy --all-targets --all-features -- -Dclippy::all - pass_filenames: false - types: [file, rust] - language: system diff --git a/lang/rust/.requirements-precommit.txt b/lang/rust/.requirements-precommit.txt deleted file mode 100644 index 57ee4613a97..00000000000 --- a/lang/rust/.requirements-precommit.txt +++ /dev/null @@ -1 +0,0 @@ -pre-commit==1.14.4 diff --git a/lang/rust/CHANGELOG.md b/lang/rust/CHANGELOG.md deleted file mode 100644 index 47b959e02c2..00000000000 --- a/lang/rust/CHANGELOG.md +++ /dev/null @@ -1,166 +0,0 @@ - - -# Changelog - -This file has been used by [avro-rs](https://github.com/flavray/avro-rs) before donating the project to Apache Avro. -Apache Avro uses [JIRA](https://issues.apache.org/jira/browse/AVRO) for issue tracking and changelog! - - -The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## Unreleased - -## [0.13.0] - 2021-01-29 -### Added -- Support for parsing a list of schemas which may have cross dependencies (#173) - -### Changed -- Allow Value::Bytes to be assigned to Schema::Fixed (#171) - -### Fixed -- Allow resolution of union schemas with logical types (#176) - - -## [0.12.0] - 2020-11-27 -### Added -- Added support for the Rabin fingerprint (#157) - -### Fixed -- Strip more fields in PCF and fix panic (#164) - -## [0.11.0] - 2020-08-13 -### Changed -- Introduce custom Error enum to replace all existing errors (backward-incompatible) (#135) -- Swapped failure for thiserror (backward-incompatible) (#135) -- Update digest crate and digest::Digest trait to 0.9 (backward-incompatible with digest::Digest 0.8) (#133) -- Replace some manual from_str implementations with strum (#136) -- Handle logical types in canonical form schemas (#144) -- Move to specific error variants for errors (#146) - -### Added -- Support to convert avro value to json value (#155) -- Implement deserialize for Uuid (#153) - -## Deprecated -- Deprecate ToAvro in favor of From for Value implementations (#137) - -## [0.10.0] - 2020-05-31 -### Changed -- Writer::into_inner() now calls flush() and returns a Result (backward-incompatible) - -### Added -- Add utility for schema compatibility check - -## [0.9.1] - 2020-05-02 -### Changed -- Port benchmarks to criterion - -### Fixed -- Fix bug in the reader buffer length - -## [0.9.0] - 2020-04-24 -### Added -- Add support for logical types -- Make writer block size configurable via builder pattern - -## [0.8.0] - 2020-04-15 -### Added -- Partial rust enum serialization/deserialization support - -## [0.7.0] - 2020-02-16 -### Added -- Export de::Error and ser::Error as DeError and SerError - -### Fixed -- Fix union resolution of default values - -## [0.6.6] - 2019-12-22 -### Fixed -- Negative block lengths are not handled - -## [0.6.5] - 2019-03-09 -### Fixed -- Allow Array(Int) to be converted to Bytes -- Fix enum type deserialization bug - -## [0.6.4] - 2018-12-24 -### Fixed -- Variable-length encoding for big i64 numbers - -## [0.6.3]- 2018-12-19 -### Added -- Schema fingerprint (md5, sha256) generation - -## [0.6.2]- 2018-12-04 -### Fixed -- Snappy codec - -## [0.6.1]- 2018-10-07 -### Fixed -- Encoding of i32/i64 - -## [0.6.0]- 2018-08-11 -### Added -- impl Send+Sync for Schema (backwards-incompatible) - -## [0.5.0] - 2018-08-06 -### Added -- A maximum allocation size when decoding -- Support for Parsing Canonical Form -- `to_value` to serialize anything that implements Serialize into a Value -- Full support for union types (non-backwards compatible) -### Fixed -- Encoding of empty containers (array/map) - -## [0.4.1] - 2018-06-17 -### Changed -- Implemented clippy suggestions - -## [0.4.0] - 2018-06-17 -### Changed -- Many performance improvements to both encoding and decoding -### Added -- New public method extend_from_slice for Writer -- serde_json benchmark for comparison -- bench_from_file function and a file from the goavro repository for comparison - -## [0.3.2] - 2018-06-07 -### Added -- Some missing serialization fields for Schema::Record - -## [0.3.1] - 2018-06-02 -### Fixed -- Encode/decode Union values with a leading zig-zag long - -## [0.3.0] - 2018-05-29 -### Changed -- Move from string as errors to custom fail types - -### Fixed -- Avoid reading the first item over and over in Reader - -## [0.2.0] - 2018-05-22 -### Added -- `from_avro_datum` to decode Avro-encoded bytes into a `Value` -- Documentation for `from_value` - -## [0.1.1] - 2018-05-16 -- Initial release diff --git a/lang/rust/Cargo.lock b/lang/rust/Cargo.lock deleted file mode 100644 index 97d5772fb8f..00000000000 --- a/lang/rust/Cargo.lock +++ /dev/null @@ -1,1735 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "addr2line" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "adler32" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" - -[[package]] -name = "ahash" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - -[[package]] -name = "aho-corasick" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" -dependencies = [ - "memchr", -] - -[[package]] -name = "allocator-api2" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" - -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "anstyle" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" - -[[package]] -name = "anyhow" -version = "1.0.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" - -[[package]] -name = "apache-avro" -version = "0.18.0" -dependencies = [ - "anyhow", - "apache-avro-derive", - "apache-avro-test-helper", - "bigdecimal", - "bzip2", - "crc32fast", - "criterion", - "digest", - "hex-literal", - "libflate", - "log", - "md-5", - "num-bigint", - "paste", - "pretty_assertions", - "quad-rand", - "rand", - "regex-lite", - "rstest", - "serde", - "serde_bytes", - "serde_json", - "serial_test", - "sha2", - "snap", - "strum", - "strum_macros", - "thiserror", - "typed-builder", - "uuid", - "xz2", - "zstd", -] - -[[package]] -name = "apache-avro-derive" -version = "0.18.0" -dependencies = [ - "apache-avro", - "darling", - "proc-macro2", - "proptest", - "quote", - "serde", - "serde_json", - "syn", -] - -[[package]] -name = "apache-avro-test-helper" -version = "0.18.0" -dependencies = [ - "anyhow", - "better-panic", - "ctor", - "env_logger", - "log", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "backtrace" -version = "0.3.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "better-panic" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fa9e1d11a268684cbd90ed36370d7577afb6c62d912ddff5c15fc34343e5036" -dependencies = [ - "backtrace", - "console", -] - -[[package]] -name = "bigdecimal" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d712318a27c7150326677b321a5fa91b55f6d9034ffd67f20319e147d40cee" -dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", - "serde", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bitflags" -version = "2.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "bumpalo" -version = "3.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" - -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.0.83" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" -dependencies = [ - "jobserver", - "libc", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "clap" -version = "4.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" - -[[package]] -name = "console" -version = "0.15.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" -dependencies = [ - "encode_unicode", - "lazy_static", - "libc", - "windows-sys", -] - -[[package]] -name = "console_error_panic_hook" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" -dependencies = [ - "cfg-if", - "wasm-bindgen", -] - -[[package]] -name = "core2" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" -dependencies = [ - "memchr", -] - -[[package]] -name = "cpufeatures" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "criterion" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "is-terminal", - "itertools", - "num-traits", - "once_cell", - "oorandom", - "regex", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools", -] - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "ctor" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" -dependencies = [ - "quote", - "syn", -] - -[[package]] -name = "darling" -version = "0.20.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" -dependencies = [ - "darling_core", - "darling_macro", -] - -[[package]] -name = "darling_core" -version = "0.20.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "darling_macro" -version = "0.20.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" -dependencies = [ - "darling_core", - "quote", - "syn", -] - -[[package]] -name = "dary_heap" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" - -[[package]] -name = "diff" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - -[[package]] -name = "either" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" - -[[package]] -name = "encode_unicode" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" - -[[package]] -name = "env_filter" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" -dependencies = [ - "log", -] - -[[package]] -name = "env_logger" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" -dependencies = [ - "env_filter", - "log", -] - -[[package]] -name = "errno" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "futures" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-executor" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" - -[[package]] -name = "glob" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" - -[[package]] -name = "half" -version = "2.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" -dependencies = [ - "cfg-if", - "crunchy", -] - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "hello-wasm" -version = "0.1.0" -dependencies = [ - "apache-avro", - "console_error_panic_hook", - "serde", - "wasm-bindgen", - "wasm-bindgen-test", -] - -[[package]] -name = "hermit-abi" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c62115964e08cb8039170eb33c1d0e2388a256930279edca206fff675f82c3" - -[[package]] -name = "hex-literal" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" - -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" - -[[package]] -name = "is-terminal" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" -dependencies = [ - "hermit-abi", - "rustix", - "windows-sys", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" - -[[package]] -name = "jobserver" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" -dependencies = [ - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.70" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.153" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" - -[[package]] -name = "libflate" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" -dependencies = [ - "adler32", - "core2", - "crc32fast", - "dary_heap", - "libflate_lz77", -] - -[[package]] -name = "libflate_lz77" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" -dependencies = [ - "core2", - "hashbrown", - "rle-decode-fast", -] - -[[package]] -name = "libm" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" - -[[package]] -name = "linux-raw-sys" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" - -[[package]] -name = "lock_api" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" - -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "memchr" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" - -[[package]] -name = "minicov" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c71e683cd655513b99affab7d317deb690528255a0d5f717f1024093c12b169" -dependencies = [ - "cc", - "walkdir", -] - -[[package]] -name = "miniz_oxide" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" -dependencies = [ - "adler", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", - "serde", -] - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "object" -version = "0.32.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" - -[[package]] -name = "oorandom" -version = "11.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.48.5", -] - -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pin-project-lite" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "pretty_assertions" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" -dependencies = [ - "diff", - "yansi", -] - -[[package]] -name = "proc-macro2" -version = "1.0.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "proptest" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" -dependencies = [ - "bitflags 2.4.2", - "lazy_static", - "num-traits", - "rand", - "rand_chacha", - "rand_xorshift", - "regex-syntax", - "unarray", -] - -[[package]] -name = "quad-rand" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b76f1009795ca44bb5aaae8fd3f18953e209259c33d9b059b1f53d58ab7511db" - -[[package]] -name = "quote" -version = "1.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_xorshift" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" -dependencies = [ - "rand_core", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - -[[package]] -name = "regex" -version = "1.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-lite" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" - -[[package]] -name = "regex-syntax" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" - -[[package]] -name = "relative-path" -version = "1.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e898588f33fdd5b9420719948f9f2a32c922a246964576f71ba7f24f80610fbc" - -[[package]] -name = "rle-decode-fast" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" - -[[package]] -name = "rstest" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b423f0e62bdd61734b67cd21ff50871dfaeb9cc74f869dcd6af974fbcb19936" -dependencies = [ - "rstest_macros", - "rustc_version", -] - -[[package]] -name = "rstest_macros" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e1711e7d14f74b12a58411c542185ef7fb7f2e7f8ee6e2940a883628522b42" -dependencies = [ - "cfg-if", - "glob", - "proc-macro2", - "quote", - "regex", - "relative-path", - "rustc_version", - "syn", - "unicode-ident", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" - -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - -[[package]] -name = "rustix" -version = "0.38.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" -dependencies = [ - "bitflags 2.4.2", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - -[[package]] -name = "rustversion" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" - -[[package]] -name = "ryu" -version = "1.0.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scc" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec96560eea317a9cc4e0bb1f6a2c93c09a19b8c4fc5cb3fcc0ec1c094cd783e2" -dependencies = [ - "sdd", -] - -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "sdd" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84345e4c9bd703274a082fb80caaa99b7612be48dfaa1dd9266577ec412309d" - -[[package]] -name = "semver" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" - -[[package]] -name = "serde" -version = "1.0.210" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_bytes" -version = "0.11.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" -dependencies = [ - "serde", -] - -[[package]] -name = "serde_derive" -version = "1.0.210" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.128" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "serial_test" -version = "3.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b4b487fe2acf240a021cf57c6b2b4903b1e78ca0ecd862a71b71d2a51fed77d" -dependencies = [ - "futures", - "log", - "once_cell", - "parking_lot", - "scc", - "serial_test_derive", -] - -[[package]] -name = "serial_test_derive" -version = "3.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" - -[[package]] -name = "snap" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" - -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - -[[package]] -name = "syn" -version = "2.0.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "thiserror" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "typed-builder" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e14ed59dc8b7b26cacb2a92bad2e8b1f098806063898ab42a3bd121d7d45e75" -dependencies = [ - "typed-builder-macro", -] - -[[package]] -name = "typed-builder-macro" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "unarray" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" - -[[package]] -name = "unicode-ident" -version = "1.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "uuid" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" -dependencies = [ - "serde", -] - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "walkdir" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wasm-bindgen" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" -dependencies = [ - "cfg-if", - "once_cell", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.93" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" - -[[package]] -name = "wasm-bindgen-test" -version = "0.3.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68497a05fb21143a08a7d24fc81763384a3072ee43c44e86aad1744d6adef9d9" -dependencies = [ - "console_error_panic_hook", - "js-sys", - "minicov", - "scoped-tls", - "wasm-bindgen", - "wasm-bindgen-futures", - "wasm-bindgen-test-macro", -] - -[[package]] -name = "wasm-bindgen-test-macro" -version = "0.3.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8220be1fa9e4c889b30fd207d4906657e7e90b12e0e6b0c8b8d8709f5de021" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "web-sys" -version = "0.3.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets 0.52.0", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - -[[package]] -name = "windows-targets" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" -dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" - -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - -[[package]] -name = "yansi" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" - -[[package]] -name = "zerocopy" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zstd" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "7.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/lang/rust/Cargo.toml b/lang/rust/Cargo.toml deleted file mode 100644 index 51d3b107ec3..00000000000 --- a/lang/rust/Cargo.toml +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[workspace] -members = [ - "avro_test_helper", - "avro_derive", - "avro", - "wasm-demo" -] -exclude = [ - "fuzz" -] - -resolver = "2" - -[workspace.package] -version = "0.18.0" -authors = ["Apache Avro team "] -license = "Apache-2.0" -repository = "https://github.com/apache/avro" -edition = "2021" -rust-version = "1.73.0" -keywords = ["avro", "data", "serialization"] -categories = ["encoding"] -documentation = "https://docs.rs/apache-avro" - -# dependencies used by more than one members -[workspace.dependencies] -log = { default-features = false, version = "0.4.22" } -serde = { default-features = false, version = "1.0.210", features = ["derive"] } -serde_bytes = { default-features = false, version = "0.11.15", features = ["std"] } -serde_json = { default-features = false, version = "1.0.128", features = ["std"] } - -[profile.release.package.hello-wasm] -# Tell `rustc` to optimize for small code size. -opt-level = "s" diff --git a/lang/rust/LICENSE b/lang/rust/LICENSE deleted file mode 100644 index 62589edd12a..00000000000 --- a/lang/rust/LICENSE +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - https://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/lang/rust/Makefile b/lang/rust/Makefile deleted file mode 100644 index 4a903c1c6d7..00000000000 --- a/lang/rust/Makefile +++ /dev/null @@ -1,105 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -VENV := precommit_venv -HOOKS := .git/hooks/pre-commit - -# PRE-COMMIT HOOKS - -$(VENV): .requirements-precommit.txt - virtualenv -p python3 $(VENV) - $(VENV)/bin/pip install -r .requirements-precommit.txt - -.PHONY: env -env: $(VENV) - -.PHONY: clean-env -clean-env: - rm -rf $(VENV) - -$(HOOKS): $(VENV) .pre-commit-config.yaml - $(VENV)/bin/pre-commit install -f --install-hooks - cargo fmt --help > /dev/null || rustup component add rustfmt - cargo clippy --help > /dev/null || rustup component add clippy - cargo readme --help > /dev/null || cargo install cargo-readme - -.PHONY: install-hooks -install-hooks: $(HOOKS) - -.PHONY: clean-hooks -clean-hooks: - rm -rf $(HOOKS) - -# LINTING - -.PHONY: lint -lint: - cargo fmt - -.PHONY: clean-lint -clean-lint: - find . -type f -name *.rs.bk -delete - -.PHONY: clippy -clippy: install-hooks - cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports - -# TESTING - -.PHONY: test -test: install-hooks - cargo test --all-features --all-targets - # because of https://github.com/rust-lang/cargo/issues/6669 - cargo test --doc - $(VENV)/bin/pre-commit run --all-files - -# BENCHMARKING - -.PHONY: benchmark -benchmark: - cargo bench - -# DOCS - -.PHONY: doc -doc: - cargo doc --no-deps --all-features - -.PHONY: doc-local -doc-local: - cargo doc --no-deps --all-features --open - -.PHONY: readme -readme: - cargo rdme - - -# BUILDING - -.PHONY: build -build: - cargo build --all-features - -.PHONY: release -release: - cargo build --all-features --release - -# CLEAN -# -.PHONY: clean -clean: clean-env clean-hooks clean-lint - cargo clean diff --git a/lang/rust/README.md b/lang/rust/README.md index 58d9afcba8f..40c005ecfa1 100644 --- a/lang/rust/README.md +++ b/lang/rust/README.md @@ -1,46 +1,9 @@ - +We apologize for the inconvenience and greatly appreciate your contributions! -# apache-avro +For more information about this change, please visit: -Apache Avro Rust SDK - -[![Current Crates.io Version](https://img.shields.io/crates/v/apache_avro.svg)](https://crates.io/crates/apache-avro) -[![Documentation](https://img.shields.io/badge/docs-latest-blue)](https://docs.rs/apache-avro/latest/apache_avro/) -[![CI](https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml/badge.svg)](https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml) -![Rust Version](https://img.shields.io/crates/msrv/apache_avro.svg?label=MSRV&color=red) -![license](https://shields.io/badge/license-Apache--2.0-blue) - -# Subprojects - -## Avro - -See [avro/README.md](./avro/README.md) - -## Avro derive - -See [avro_derive/README.md](./avro_derive/README.md) - -## Avro test helper - -See [avro_test_helper/README.md](./avro_test_helper/README.md) - -## WebAssembly demo application - -See [wasm-demo/README.md](./wasm-demo/README.md) +- [[VOTE] Extract the Rust SDK into its own Git repository](https://lists.apache.org/thread/rpvgxrgsc7obv3qnj5zsmnkp4z112g0g) \ No newline at end of file diff --git a/lang/rust/README.tpl b/lang/rust/README.tpl deleted file mode 100644 index 9b3c370a056..00000000000 --- a/lang/rust/README.tpl +++ /dev/null @@ -1,19 +0,0 @@ -# {{crate}} - -[![Latest Version](https://img.shields.io/crates/v/apache-avro.svg)](https://crates.io/crates/apache-avro) -[![Rust Continuous Integration](https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml/badge.svg)](https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml) -[![Latest Documentation](https://docs.rs/apache-avro/badge.svg)](https://docs.rs/apache-avro) -[![Apache License 2.0](https://img.shields.io/badge/license-Apache%202-blue.svg](https://github.com/apache/avro/blob/main/LICENSE.txt) - -{{readme}} - -## License -This project is licensed under [Apache License 2.0](https://github.com/apache/avro/blob/main/LICENSE.txt). - -## Contributing -Everyone is encouraged to contribute! You can contribute by forking the GitHub repo and making a pull request or opening an issue. -All contributions will be licensed under [Apache License 2.0](https://github.com/apache/avro/blob/main/LICENSE.txt). - -Please consider adding documentation and tests! -If you introduce a backward-incompatible change, please consider adding instruction to migrate in the [Migration Guide](migration_guide.md) -If you modify the crate documentation in `lib.rs`, run `make readme` to sync the README file. diff --git a/lang/rust/avro/Cargo.toml b/lang/rust/avro/Cargo.toml deleted file mode 100644 index 7043f622870..00000000000 --- a/lang/rust/avro/Cargo.toml +++ /dev/null @@ -1,99 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "apache-avro" -description = "A library for working with Apache Avro in Rust" -readme = "README.md" -version.workspace = true -authors.workspace = true -license.workspace = true -repository.workspace = true -edition.workspace = true -rust-version.workspace = true -keywords.workspace = true -categories.workspace = true -documentation.workspace = true - -[features] -bzip = ["dep:bzip2"] -derive = ["dep:apache-avro-derive"] -snappy = ["dep:crc32fast", "dep:snap"] -xz = ["dep:xz2"] -zstandard = ["dep:zstd"] - -[lib] -# disable benchmarks to allow passing criterion arguments to `cargo bench` -bench = false -path = "src/lib.rs" - -[[bench]] -harness = false -name = "serde" - -[[bench]] -harness = false -name = "serde_json" - -[[bench]] -harness = false -name = "single" - -[dependencies] -apache-avro-derive = { default-features = false, version = "0.18.0", path = "../avro_derive", optional = true } -bigdecimal = { default-features = false, version = "0.4.5", features = ["std", "serde"] } -bzip2 = { default-features = false, version = "0.4.4", optional = true } -crc32fast = { default-features = false, version = "1.4.2", optional = true } -digest = { default-features = false, version = "0.10.7", features = ["core-api"] } -libflate = { default-features = false, version = "2.1.0", features = ["std"] } -log = { workspace = true } -num-bigint = { default-features = false, version = "0.4.6", features = ["std", "serde"] } -regex-lite = { default-features = false, version = "0.1.6", features = ["std", "string"] } -serde = { workspace = true } -serde_bytes = { workspace = true } -serde_json = { workspace = true } -snap = { default-features = false, version = "1.1.0", optional = true } -strum = { default-features = false, version = "0.26.3" } -strum_macros = { default-features = false, version = "0.26.4" } -thiserror = { default-features = false, version = "1.0.63" } -typed-builder = { default-features = false, version = "0.20.0" } -uuid = { default-features = false, version = "1.10.0", features = ["serde", "std"] } -xz2 = { default-features = false, version = "0.1.7", optional = true } -zstd = { default-features = false, version = "0.13.2", optional = true } - - -[target.'cfg(target_arch = "wasm32")'.dependencies] -quad-rand = { default-features = false, version = "0.2.2" } - -[target.'cfg(not(target_arch = "wasm32"))'.dependencies] -rand = { default-features = false, version = "0.8.5", features = ["default"] } - -[dev-dependencies] -anyhow = { default-features = false, version = "1.0.89", features = ["std"] } -apache-avro-test-helper = { default-features = false, version = "0.18.0", path = "../avro_test_helper" } -criterion = { default-features = false, version = "0.5.1" } -hex-literal = { default-features = false, version = "0.4.1" } -md-5 = { default-features = false, version = "0.10.6" } -pretty_assertions = { default-features = false, version = "1.4.1", features = ["std"] } -serial_test = "3.1.1" -sha2 = { default-features = false, version = "0.10.8" } -paste = { default-features = false, version = "1.0.15" } -rstest = { default-features = false, version = "0.22.0" } - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] diff --git a/lang/rust/avro/README.md b/lang/rust/avro/README.md deleted file mode 100644 index 986dd318c7e..00000000000 --- a/lang/rust/avro/README.md +++ /dev/null @@ -1,743 +0,0 @@ - - -# apache-avro - -[![Latest Version](https://img.shields.io/crates/v/apache-avro.svg)](https://crates.io/crates/apache-avro) -[![Rust Continuous Integration](https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml/badge.svg)](https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml) -[![Latest Documentation](https://docs.rs/apache-avro/badge.svg)](https://docs.rs/apache-avro) -[![Apache License 2.0](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://github.com/apache/avro/blob/main/LICENSE.txt) - - - -A library for working with [Apache Avro](https://avro.apache.org/) in Rust. - -Please check our [documentation](https://docs.rs/apache-avro) for examples, tutorials and API reference. - -**[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich -data structures and a compact, fast, binary data format. - -All data in Avro is schematized, as in the following example: - -```json -{ - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] -} -``` - -There are basically two ways of handling Avro data in Rust: - -* **as Avro-specialized data types** based on an Avro schema; -* **as generic Rust serde-compatible types** implementing/deriving `Serialize` and `Deserialize`; - -**apache-avro** provides a way to read and write both these data representations easily and -efficiently. - -## Installing the library - - -Add to your `Cargo.toml`: - -```toml -[dependencies] -apache-avro = "x.y" -``` - -Or in case you want to leverage the **Snappy** codec: - -```toml -[dependencies.apache-avro] -version = "x.y" -features = ["snappy"] -``` - -Or in case you want to leverage the **Zstandard** codec: - -```toml -[dependencies.apache-avro] -version = "x.y" -features = ["zstandard"] -``` - -Or in case you want to leverage the **Bzip2** codec: - -```toml -[dependencies.apache-avro] -version = "x.y" -features = ["bzip"] -``` - -Or in case you want to leverage the **Xz** codec: - -```toml -[dependencies.apache-avro] -version = "x.y" -features = ["xz"] -``` - -## Upgrading to a newer minor version - -The library is still in beta, so there might be backward-incompatible changes between minor -versions. If you have troubles upgrading, check the [version upgrade guide](https://github.com/apache/avro/blob/main/lang/rust/migration_guide.md). - -## Defining a schema - -An Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and -**can** be used while reading and they carry the information regarding the type of data we are -handling. Avro schemas are used for both schema validation and resolution of Avro data. - -Avro schemas are defined in **JSON** format and can just be parsed out of a raw string: - -```rust -use apache_avro::Schema; - -let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } -"#; - -// if the schema is not valid, this function will return an error -let schema = Schema::parse_str(raw_schema).unwrap(); - -// schemas can be printed for debugging -println!("{:?}", schema); -``` - -Additionally, a list of of definitions (which may depend on each other) can be given and all of -them will be parsed into the corresponding schemas. - -```rust -use apache_avro::Schema; - -let raw_schema_1 = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - -// This definition depends on the definition of A above -let raw_schema_2 = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_one", "type": "A"} - ] - }"#; - -// if the schemas are not valid, this function will return an error -let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap(); - -// schemas can be printed for debugging -println!("{:?}", schemas); -``` -*N.B.* It is important to note that the composition of schema definitions requires schemas with names. -For this reason, only schemas of type Record, Enum, and Fixed should be input into this function. - -The library provides also a programmatic interface to define schemas without encoding them in -JSON (for advanced use), but we highly recommend the JSON interface. Please read the API -reference in case you are interested. - -For more information about schemas and what kind of information you can encapsulate in them, -please refer to the appropriate section of the -[Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration). - -## Writing data - -Once we have defined a schema, we are ready to serialize data in Avro, validating them against -the provided schema in the process. As mentioned before, there are two ways of handling Avro -data in Rust. - -**NOTE:** The library also provides a low-level interface for encoding a single datum in Avro -bytecode without generating markers and headers (for advanced use), but we highly recommend the -`Writer` interface to be totally Avro-compatible. Please read the API reference in case you are -interested. - -### The avro way - -Given that the schema we defined above is that of an Avro *Record*, we are going to use the -associated type provided by the library to specify the data we want to serialize: - -```rust -use apache_avro::types::Record; -use apache_avro::Writer; -// a writer needs a schema and something to write to -let mut writer = Writer::new(&schema, Vec::new()); - -// the Record type models our Record schema -let mut record = Record::new(writer.schema()).unwrap(); -record.put("a", 27i64); -record.put("b", "foo"); - -// schema validation happens here -writer.append(record).unwrap(); - -// this is how to get back the resulting avro bytecode -// this performs a flush operation to make sure data has been written, so it can fail -// you can also call `writer.flush()` yourself without consuming the writer -let encoded = writer.into_inner().unwrap(); -``` - -The vast majority of the times, schemas tend to define a record as a top-level container -encapsulating all the values to convert as fields and providing documentation for them, but in -case we want to directly define an Avro value, the library offers that capability via the -`Value` interface. - -```rust -use apache_avro::types::Value; - -let mut value = Value::String("foo".to_string()); -``` - -### The serde way - -Given that the schema we defined above is an Avro *Record*, we can directly use a Rust struct -deriving `Serialize` to model our data: - -```rust -use apache_avro::Writer; - -#[derive(Debug, Serialize)] -struct Test { - a: i64, - b: String, -} - -// a writer needs a schema and something to write to -let mut writer = Writer::new(&schema, Vec::new()); - -// the structure models our Record schema -let test = Test { - a: 27, - b: "foo".to_owned(), -}; - -// schema validation happens here -writer.append_ser(test).unwrap(); - -// this is how to get back the resulting avro bytecode -// this performs a flush operation to make sure data is written, so it can fail -// you can also call `writer.flush()` yourself without consuming the writer -let encoded = writer.into_inner(); -``` - -The vast majority of the times, schemas tend to define a record as a top-level container -encapsulating all the values to convert as fields and providing documentation for them, but in -case we want to directly define an Avro value, any type implementing `Serialize` should work. - -```rust -let mut value = "foo".to_string(); -``` - -### Using codecs to compress data - -Avro supports three different compression codecs when encoding data: - -* **Null**: leaves data uncompressed; -* **Deflate**: writes the data block using the deflate algorithm as specified in RFC 1951, and - typically implemented using the zlib library. Note that this format (unlike the "zlib format" in - RFC 1950) does not have a checksum. -* **Snappy**: uses Google's [Snappy](http://google.github.io/snappy/) compression library. Each - compressed block is followed by the 4-byte, big-endianCRC32 checksum of the uncompressed data in - the block. You must enable the `snappy` feature to use this codec. -* **Zstandard**: uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. - You must enable the `zstandard` feature to use this codec. -* **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library. - You must enable the `bzip` feature to use this codec. -* **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library. - You must enable the `xz` feature to use this codec. - -To specify a codec to use to compress data, just specify it while creating a `Writer`: -```rust -use apache_avro::Writer; -use apache_avro::Codec; -let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); -``` - -## Reading data - -As far as reading Avro encoded data goes, we can just use the schema encoded with the data to -read them. The library will do it automatically for us, as it already does for the compression -codec: - -```rust -use apache_avro::Reader; -// reader creation can fail in case the input to read from is not Avro-compatible or malformed -let reader = Reader::new(&input[..]).unwrap(); -``` - -In case, instead, we want to specify a different (but compatible) reader schema from the schema -the data has been written with, we can just do as the following: -```rust -use apache_avro::Schema; -use apache_avro::Reader; - -let reader_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - {"name": "c", "type": "long", "default": 43} - ] - } -"#; - -let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); - -// reader creation can fail in case the input to read from is not Avro-compatible or malformed -let reader = Reader::with_schema(&reader_schema, &input[..]).unwrap(); -``` - -The library will also automatically perform schema resolution while reading the data. - -For more information about schema compatibility and resolution, please refer to the -[Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration). - -As usual, there are two ways to handle Avro data in Rust, as you can see below. - -**NOTE:** The library also provides a low-level interface for decoding a single datum in Avro -bytecode without markers and header (for advanced use), but we highly recommend the `Reader` -interface to leverage all Avro features. Please read the API reference in case you are -interested. - - -### The avro way - -We can just read directly instances of `Value` out of the `Reader` iterator: - -```rust -use apache_avro::Reader; -let reader = Reader::new(&input[..]).unwrap(); - -// value is a Result of an Avro Value in case the read operation fails -for value in reader { - println!("{:?}", value.unwrap()); -} - -``` - -### The serde way - -Alternatively, we can use a Rust type implementing `Deserialize` and representing our schema to -read the data into: - -```rust -use apache_avro::Reader; -use apache_avro::from_value; - -#[derive(Debug, Deserialize)] -struct Test { - a: i64, - b: String, -} - -let reader = Reader::new(&input[..]).unwrap(); - -// value is a Result in case the read operation fails -for value in reader { - println!("{:?}", from_value::(&value.unwrap())); -} -``` - -## Putting everything together - -The following is an example of how to combine everything showed so far and it is meant to be a -quick reference of the library interface: - -```rust -use apache_avro::{Codec, Reader, Schema, Writer, from_value, types::Record, Error}; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Deserialize, Serialize)] -struct Test { - a: i64, - b: String, -} - -fn main() -> Result<(), Error> { - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } - "#; - - let schema = Schema::parse_str(raw_schema)?; - - println!("{:?}", schema); - - let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); - - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - - writer.append(record)?; - - let test = Test { - a: 27, - b: "foo".to_owned(), - }; - - writer.append_ser(test)?; - - let input = writer.into_inner()?; - let reader = Reader::with_schema(&schema, &input[..])?; - - for record in reader { - println!("{:?}", from_value::(&record?)); - } - Ok(()) -} -``` - -`apache-avro` also supports the logical types listed in the [Avro specification](https://avro.apache.org/docs/current/specification/#logical-types): - -1. `Decimal` using the [`num_bigint`](https://docs.rs/num-bigint/latest/num_bigint) crate -1. UUID using the [`uuid`](https://docs.rs/uuid/latest/uuid) crate -1. Date, Time (milli) as `i32` and Time (micro) as `i64` -1. Timestamp (milli and micro) as `i64` -1. Local timestamp (milli and micro) as `i64` -1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32` - -Note that the on-disk representation is identical to the underlying primitive/complex type. - -#### Read and write logical types - -```rust -use apache_avro::{ - types::Record, types::Value, Codec, Days, Decimal, Duration, Millis, Months, Reader, Schema, - Writer, Error, -}; -use num_bigint::ToBigInt; - -fn main() -> Result<(), Error> { - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - { - "name": "decimal_fixed", - "type": { - "type": "fixed", - "size": 2, - "name": "decimal" - }, - "logicalType": "decimal", - "precision": 4, - "scale": 2 - }, - { - "name": "decimal_var", - "type": "bytes", - "logicalType": "decimal", - "precision": 10, - "scale": 3 - }, - { - "name": "uuid", - "type": "string", - "logicalType": "uuid" - }, - { - "name": "date", - "type": "int", - "logicalType": "date" - }, - { - "name": "time_millis", - "type": "int", - "logicalType": "time-millis" - }, - { - "name": "time_micros", - "type": "long", - "logicalType": "time-micros" - }, - { - "name": "timestamp_millis", - "type": "long", - "logicalType": "timestamp-millis" - }, - { - "name": "timestamp_micros", - "type": "long", - "logicalType": "timestamp-micros" - }, - { - "name": "local_timestamp_millis", - "type": "long", - "logicalType": "local-timestamp-millis" - }, - { - "name": "local_timestamp_micros", - "type": "long", - "logicalType": "local-timestamp-micros" - }, - { - "name": "duration", - "type": { - "type": "fixed", - "size": 12, - "name": "duration" - }, - "logicalType": "duration" - } - ] - } - "#; - - let schema = Schema::parse_str(raw_schema)?; - - println!("{:?}", schema); - - let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); - - let mut record = Record::new(writer.schema()).unwrap(); - record.put("decimal_fixed", Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be())); - record.put("decimal_var", Decimal::from(((-32442).to_bigint().unwrap()).to_signed_bytes_be())); - record.put("uuid", uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap()); - record.put("date", Value::Date(1)); - record.put("time_millis", Value::TimeMillis(2)); - record.put("time_micros", Value::TimeMicros(3)); - record.put("timestamp_millis", Value::TimestampMillis(4)); - record.put("timestamp_micros", Value::TimestampMicros(5)); - record.put("timestamp_nanos", Value::TimestampNanos(6)); - record.put("local_timestamp_millis", Value::LocalTimestampMillis(4)); - record.put("local_timestamp_micros", Value::LocalTimestampMicros(5)); - record.put("local_timestamp_nanos", Value::LocalTimestampMicros(6)); - record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8))); - - writer.append(record)?; - - let input = writer.into_inner()?; - let reader = Reader::with_schema(&schema, &input[..])?; - - for record in reader { - println!("{:?}", record?); - } - Ok(()) -} -``` - -### Calculate Avro schema fingerprint - -This library supports calculating the following fingerprints: - - - SHA-256 - - MD5 - - Rabin - -An example of fingerprinting for the supported fingerprints: - -```rust -use apache_avro::rabin::Rabin; -use apache_avro::{Schema, Error}; -use md5::Md5; -use sha2::Sha256; - -fn main() -> Result<(), Error> { - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } - "#; - let schema = Schema::parse_str(raw_schema)?; - println!("{}", schema.fingerprint::()); - println!("{}", schema.fingerprint::()); - println!("{}", schema.fingerprint::()); - Ok(()) -} -``` - -### Ill-formed data - -In order to ease decoding, the Binary Encoding specification of Avro data -requires some fields to have their length encoded alongside the data. - -If encoded data passed to a `Reader` has been ill-formed, it can happen that -the bytes meant to contain the length of data are bogus and could result -in extravagant memory allocation. - -To shield users from ill-formed data, `apache-avro` sets a limit (default: 512MB) -to any allocation it will perform when decoding data. - -If you expect some of your data fields to be larger than this limit, be sure -to make use of the `max_allocation_bytes` function before reading **any** data -(we leverage Rust's [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html) -mechanism to initialize this value, if -any call to decode is made before a call to `max_allocation_bytes`, the limit -will be 512MB throughout the lifetime of the program). - - -```rust -use apache_avro::max_allocation_bytes; - -max_allocation_bytes(2 * 1024 * 1024 * 1024); // 2GB - -// ... happily decode large data - -``` - -### Check schemas compatibility - -This library supports checking for schemas compatibility. - -Examples of checking for compatibility: - -1. Compatible schemas - -Explanation: an int array schema can be read by a long array schema- an int -(32bit signed integer) fits into a long (64bit signed integer) - -```rust -use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; - -let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); -let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap(); -assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_ok()); -``` - -2. Incompatible schemas (a long array schema cannot be read by an int array schema) - -Explanation: a long array schema cannot be read by an int array schema- a -long (64bit signed integer) does not fit into an int (32bit signed integer) - -```rust -use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; - -let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap(); -let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); -assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); -``` -### Custom names validators - -By default the library follows the rules by the -[Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)! - -Some of the other Apache Avro language SDKs are not that strict and allow more -characters in names. For interoperability with those SDKs, the library provides -a way to customize the names validation. - -```rust -use apache_avro::AvroResult; -use apache_avro::schema::Namespace; -use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; - -struct MyCustomValidator; - -impl SchemaNameValidator for MyCustomValidator { - fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { - todo!() - } -} - -// don't parse any schema before registering the custom validator(s) ! - -set_schema_name_validator(Box::new(MyCustomValidator)); - -// ... use the library -``` - -Similar logic could be applied to the schema namespace, enum symbols and field names validation. - -**Note**: the library allows to set a validator only once per the application lifetime! -If the application parses schemas before setting a validator, the default validator will be -registered and used! - -### Custom schema equality comparators - -The library provides two implementations of schema equality comparators: -1. `SpecificationEq` - a comparator that serializes the schemas to their - canonical forms (i.e. JSON) and compares them as strings. It is the only implementation - until apache_avro 0.16.0. - See the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas) - for more information! -2. `StructFieldEq` - a comparator that compares the schemas structurally. - It is faster than the `SpecificationEq` because it returns `false` as soon as a difference - is found and is recommended for use! - It is the default comparator since apache_avro 0.17.0. - -To use a custom comparator, you need to implement the `SchemataEq` trait and set it using the -`set_schemata_equality_comparator` function: - -```rust -use apache_avro::{AvroResult, Schema}; -use apache_avro::schema::Namespace; -use apache_avro::schema_equality::{SchemataEq, set_schemata_equality_comparator}; - -#[derive(Debug)] -struct MyCustomSchemataEq; - -impl SchemataEq for MyCustomSchemataEq { - fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool { - todo!() - } -} - -// don't parse any schema before registering the custom comparator ! - -set_schemata_equality_comparator(Box::new(MyCustomSchemataEq)); - -// ... use the library -``` -**Note**: the library allows to set a comparator only once per the application lifetime! -If the application parses schemas before setting a comparator, the default comparator will be -registered and used! - - - -## Minimal supported Rust version - -1.73.0 - -## License -This project is licensed under [Apache License 2.0](https://github.com/apache/avro/blob/main/LICENSE.txt). - -## Contributing -Everyone is encouraged to contribute! You can contribute by forking the GitHub repo and making a pull request or opening an issue. -All contributions will be licensed under [Apache License 2.0](https://github.com/apache/avro/blob/main/LICENSE.txt). - -Please consider adding documentation and tests! -If you introduce a backward-incompatible change, please consider adding instruction to migrate in the [Migration Guide](migration_guide.md) -If you modify the crate documentation in `lib.rs`, run `make readme` to sync the README file. diff --git a/lang/rust/avro/benches/quickstop-null.avro b/lang/rust/avro/benches/quickstop-null.avro deleted file mode 100644 index 5b3bc7369aef55d0d897f02c78b94c2b448361fb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 166053 zcma*wd1&f;zUKQ|r>0%=JkPtvRkf>XueGwK8h6d}JWrRqPo3T8RNv}dhh23Jx9_>9 zvBehKmLMV`;*}s`2N4kw5eXq}i-<^CL_|Umksu+25JCtcgm54G-gDOP^ZQ<&&*!g% z_(fg`gg0LjAAj)I|Mo>MdF!+P_`;t)_2=LJ-E;r$+2xD>>B)cdhv)wIH-CQeGr#$# zC!c@z-#z!_XP*4lSD$?9$-n&jzsKkQ^#6T)@{fOd{%@cB=YM|cUk`o#4=?=rFYdWN z|K){0_)dQJS>wSU{@tIRf6nxjU;5kspL2hB{x1;z?x}zIHh%cr3xEF8^Z)vMPrmTn z@Bj3|-#q!xZ~mV@fZzY=Z=U=86VLy{AOHBj{Ks$q=l}FS{`(WLr~bo#8@}_uyzaj) zemDBR-~3mv{OYsM|K+(?e)A8%|KI=ovX4D|>zS9l`p3^c|2NP6@zvjW;n{zD?$7W3 z*u8uAZoU8Zox7jBcmKh|&%NyRKY#9pKRy44-#`2M@BZl@{`T))c=mbEk*6O$eDLsz z*M8;MKm6s7e|Y|hFTe2IbDl@<+`V)6{wrVh*2+ul!Y zPZHaQ9^3BRTKkB!-M#h9$GGj|y6q`y`xLP~{n&Q**4iCuTf6nl8n@loZ4asKGsO0} z$F{XwYhRGId$*o>hTFcZ+rCO|UnjP2KDOPvwf1dkyMODM?{eGsb=wcA?Z?FS)5o^^ zx7L0xZ4Yie^Gj~~wQl<@wf(Qe_P;&0J-D^@*V6Xz)-!*@ZU08M{T;RaTVng;W81@9 zYtKvDqg&7XJ-5A}+x|jr|43~A$79>0TWkNP*Y?E6ZX*o;7svev9T#{J)dgQfc%c{l zi|>DG*1|9H36Y1$i#Rd*BHb9{#&u?ldyR-h!f!+-l8P~9Fs8XN!;D$45wXbmjmX7@ zV$2(i1#T=d~UkC83$e?VzTQuA}2$|I5HT=+&E#zsn>|8?D>tz%D!Tp8H{soTrlH- z*NC_*{YK>FP%$1EjK|z~!i=Y0BO-I=Gu}aE&K2W@!Fb7ySIl_rF~YNU<2Rx+fxqIN z3I3Jdnb2R+7KZJyfPTC zx$%Y>122BunY-vr@Wt}Zbr+opy_mN!{9?U@kr&ezMqf-?81ov@nRUMrorx>Pgu$5P z#uPKAy+(8<<2Rx+S;d$$7&o{v&x{4H5uGXejp)p#Vk{YqWp1o6W7TU!XKH>UI#XAS z4TG`CjV)%}@*2^ZZNCwn*-?ybgR#SnU1scgjp$6@Z$xJXigDLq9CG7`8OL5DIy3Pb z(V3}Y+%p*WxpBsfbFUGdS@?`==*)p)TpEmr+<3%{#~vd*YbSmqI&-QR&kV+MZoFW| zORo`~x$+y)nQO&(V=x9@LOT&zJU z8qt}A--ym66=TX^Omky~8M9s^I+ODo(U}d!m^T;;+*oABO|KE1Dfx})Oj$8j48|%q z)|j#GHKH>OzY(2jD#n(JgHRWa5K#yU4Pn6c?KqBAYO5uMpmjN1m|4mY-$vEwzOGhM$Co#`pYzQH)) z#$9F{dX4DJ$Y;Ed&Wsh~#9*9q;~q2adyMd`&HP4mX08|)2IB!YE}8MrYeZ*`{6=)< zSTUX$jHldq#*F7)BRX^8H=;9_it)-|yynImW(>USacAzMGr^b1&fG_5LNDVj3>*Go zG4e9n!syFL3u9g*I` zzY(3;RE#BqvCNGXW~_RR=uFLTL}%)Xv0*Saxv|BJTV5kNv+XycGdqg0Z7_DYvCE7- zuMwT;`-~6JnSo;5H5iB7IAX@J#|Y2b#BW4rriyXTVBF`%88gnkMs#N3H=;8Kig9T$ z9&+OmGah@5=*)@Vh|ZiU#xsNQoEtBg@zQHVXRiE4bmm$y-WZI5m($J!Urss`dimoP zK0s%}FPANRfX+l-&RZCLxqdHozpxMapduMwR&@*B~aW5sx4FrISb88e=Hjp)pU--ym!D#j~=@tPZN zm@)8*$DMhE&IDf}JM##g3B7{1F#HO=g^^d#dtvkyq=hlB5uI808_}7#VoVr}Np4Ir zW7=y(XEJ^xI+In5IfHS78}rOq@EXyXqTh(lY%0c*!C2kT%p+s*MjuOE+P?Sr9@{lW!nDSUCQJfP;iR7Fr$}@xVoGUMw z@=_?#oGV9(=v*tx8-p_NN*bLY+vy0s@?XNZ`W;7g!msp}arH%HC-O?3#^@{cG{(4c zohjo&iS8sECBln_Z93|S*RFo})a*HdsnQ})c(Vn)WM0+}lvTIQGxU$cb1EEBFb{!?! zGgOo#gL2H36Q-OBCEBy+DAAsMML9Dl=Ulm9$^)T9dzOw8?KxDGM+W6FSDrBCsZgRl zXO0r>Iaiby2IVDJUNPmhP@+9IjuPz&yo$Fc_$s|Up;ysDhF?XlWsz5T3V9ptiN4BL z$lGX7Oi`{IlyR<1FlACG(Vmo}M0?VTGGkC?xiZI;8$yZpi967A_J%DzE4 z;L2U59111cGjfz@&sb4T49Y22?lI-QP@+9EM~U{#73IR9JmAVDQyvN>+H>S6(Vk;P zd16qWa^)FQo(m<~bKxk_o=Zh}Wl&yo^g_S`{xLa*k948K}0 zWaQPfkkMC@LdJv=?OAt}Xir>GCJf3XSEiUUEtF_a#;;tB31k&z&Y;}j$~;pRyvo&@ zK+#d6J)4TMWKfp5vci;Ap+tLXjuP#uE6RpJ+2qO=Q*H?*+OzE_(ViVe*)}LUT-jyH zo=~DaeMgD*3>4+AK{@2g5mSzZ6788dO0;LHDEAD?eXg7_r;75-pgiZw3#PmjO0?(7QKCK9it@&w47`T6C-@rDp3rMNx3jxw zPxv*yLf%DtBCp|vjJ`%MWQ;4^e%cXQ(Jg2IZJ5CrmjNO0;LsQKCKjigIR9&be~Ilm|kI z_ADJG+H>HE=uH0qHp-`edBS(q$j1}d?pqz5$9#ifMCE7D{lxWXf zQ7#P11Fl>$<)KibJx7ib?KxJICkEvySDrEDxlp1#7mgC`xm1)_2IVzZ-Y{k0b)NRz zLwka+^WDx?LjqsGEUgc_uVBJxoJ#j^uFesB;nPSScP@+8< zM~U`i6=lw#+~CSQQx=30?I}7+v}aRMmJG@=S5}y^DwJqX%~7H~bw$}QD4Sf_V#+O{ zM0>U!CEBy2DBA{Qhby~G*%L~%r|&4yo`Is=H7JK%IbzDOP@+8(M~U`K73H2mxzCj| zrko2U+Ou$!XwQM7TpE;zTzSNl$3ltroH$Cf=TuRi8IvBN+!0E&r|l@wo{pmI8k9Y*>@(#+DAAr>M~U_f73IjF9CPJ_DW^h-_Ut)I zv}a#Y&J4;qS1y?HKq%3krK3c94i)8*L3zxTCro)NlxWYHU%47AI9HSx2IVDJUNPmh zSGig&xN($dPv8ywb{2et-k#7KXd%OIAcc&)!BfbGXixMFzCu1kdt!=m-JpzfWr8V_ zLW%aI93|S5R+JfoGRu`YrrZ!pv?uQ<(Vl{$EE<%XTv=kuvQVNu6-SBoR25~-psaIc zgDIOriT1P{CEBy4D7OvD9j|u^Ot~+VXwS@1qCIm(xiBaXxN^yqheC<=9Ql>25rbnzd16qWa^)FQo_m$66@v>$ ziS}G7$}5BNnk#RZGVn%Edmf=Z!8iKa^9bz;y^$9(Y5u!(|43;&p=V`8k9q>95Lls zDAAsYqeOeAigM4O+~>*}Q_h7F?OFJgcUHRw2a0lOP#$vS5mO#}lq=BMiK9e&P8H>u zL3z%V7fg96lxWYDqeOeI73GaV8F&+IPw-8oJ)t)}F65o3(Vp;|WFhZ7jrK&|#0wdH zlYWPcapgKw#)T5?NjOThC#fh?24$KnGfbHkO0*~EDAAq`MVU7!3tU-b%1xm}drFQH z?I|nDia}ZB${JJFg%a&)I7+mqsVG|p^e%cXQ(Jg2IZJ5CrmjNO0;LsuUzdK>?_KdK{@Bj1yde)m8)HYrK3c94i)8*L3zxT zCro)NlxWYHqeOeo73GCNdC8SmOnEJoXwQwKM0)~n=Isf-S#MA1&9so=HD6-vcZ&1p+tLHjuP$JQk2^UY2h8q6Fe+A~*_3xo22E0;`pD3oZ= zk)uR=juqvJL3zrRXH0o6lxWX|qeOcy73GyddCiqKOc{8Kr#*Mjp5R-2?YV>YgxC=%DGUYJqt&P_8ch6 zr9pYfl}Ai@ER<-^iK9e&P8H>uL3z%V7fg96lxWYDqeOeI73GaV8F(vgPw=gzJ)yUH z3V9dp3BT1>$h&AyF=)6-u-x=P1#h z4MmwZC<|O!WXesUM0-k(674A~%8Ef*<;ogU)`b%7X*f!>r>Q7g2IUr4ZZqYMP@+9; zzjC!}&{33KgR;k!eWo0Em8)HYT}O%b3>D?bpd54Mgej*&iT3O{O0;KRQO*p?Iae;2 z@<1rjo~5Hidkz)lkwJOPl_yMjDwJr?nWIE|&K2c_L3zoQS4?>=lxWY5qeOcGZ{zI= zzD;jW=xwx+;kS`OM&9NrUwiJMJ)yVrLWbY27c%m8TFB_zNg-oGiT12JO0*}gC=&){k}Fe8nHEa4 zC*vs5o~)wG8I&7bnPN76W>B7U%$vH~2XG2lu4ax#n7MXHWDAAshU%A>fC@ac}L0RR>8dKK2%GItx z!%?C=O-0!i967A_J%DzE4;L2U59111cGjfz@ z&sb4T49Y22?lI-QP@+9EM~U{#73IR9JmAVDQyvN>+H>S6(Vk;Pd16qWa^)FQo(m<~ zbKxk_o=Zh}Wl&yoM4(VkO9 zd1g?abL9n7UJ51JbLA+}o@+&UV^9X(P1_TEH)&7k-JU`|LVLpR_7(CG+7o#rt*iYdJ@W_G~E1yg^yu$|6&43MJZ8 za+GLKSy5ID$|_gZn6fUEXivjYqCHJT*)k}%xN@5*cZ3q{X*)`^r=uvl24#;c`%F0y zO0;L!QKCIVML9Ak$6PsK%BfJIJ$sH4?b%n9GlO!@l?$dk5K6RX=_t{jLq&OHP#$yT z2~(a5CE9c5DAAsCMR{RRUUKCXQ(g-t+H>P5(VoD2czc5H(c2Sx4=rT)J*1G4_dG7- z-KWu>=zC-#@2++YVv2Izpp0{6f+>?;iS`r} zWznGAZDx24$Tq8%)_0O0=itDAArRMY(NI?r>$BDLX=m_H-R3 z+S5~%eS>nqmAgzi6iT#bS4r*VCTcXixCHzV@tk4MOkb zg$%z}FJ$Dsw2;yFl0wG3%GIvHx}!vU;)*h1P$s!D#gu8GM0+xh679(<%A7&D!IgQY zEC?mqQ*@MQ&!(a*8I)zNtT1I&DAAspqeOe^in3u)Ho3CJlv_fH_G~*!v}Z?AwhhV- zS9Y1QCzNPU-%+AH14X%OP!73r#FS&9M0+NV6788P$~}W}pDSlfITuQ_XW=N(o&!a> zG$;?b@`x#qg%a&Kag=D!siHhHD9^d_f+;VB679M2D_6S)*NXDSpbWf^wkP;L(w@-! zJcV5C8ie2HE970YC-OdC$msj@LdLjqohjo&iS{HMCEAlzlqrKU&6OFZ%nBvilXH}4 z&xWGR8sC-i<=$ng6~AtUei6mkviiN4=g$ThSlrYP49$~adhm@+ApXiv&fqCIIvnK3A{ zT$y9a4WUGP@{SViDJaUKLAlA5C8jJ3CE8PQlxR;?QPvE~I#)KBvMH2kPs>rFJzI)$ z+o0Ux$~IGWgc9xPI!d&srzrad<$x=9nQ|zUXwS$|qCI0pIWZ`wT)D@T`$CEK%p4`! zGgp)ggYtkYmrQvmlxWY9U%A>fI98M=2IVPNo-yUQSGn3XxNwwc&!wWgGAOUP@`fn` zAMmv29@-Q9fUiCG(4No-cp<|d&$hk_R~|9tu~)g;H8^pUXwRvlJToZIx$=T3FNG5AxpI_f z&$XhwF(?C1()I+OB<%@3=_%xWv?u(euaNiAp2(BDkkKdgLdLjqohjo&iS{HMCEAlz zlqrKU&6OFZ%nBvilXH}4&xWGR8s;Aj%BE1FJuOFx_G~H2ZG&=$E89%j5lXbD>nPElo}%m|u^Ou6q>u67M(juP#eE6RmIdBBxRraTl%wCBiCqCLlo^2DG#<;pXrJQqr| z=fY8?`CWv?uamUdZT& z^+LwDa-AvTLW%Yy93|S5RFo-$GR>74rpyW@+LLpXXwQbC%o~&it}HU;rck0iB}a+& zloe&gpsaFbjVbFwiS{%cCEC+elr4jDiz~O8az`l9p0-c9w%Rr5D9Wxu+2hJSQw}`J z6=-eOQKCIVML9Ak$6PsK%BfJIJ$sH4?b%n9GlO!@l?$dk5K6RX=_t{jLq&OHP#$yT z2~(a5CE9c5DAAsCMR{RRUUKCXQ(g-t+H>P5(VoCZczc2$(c2UH2rXpzBcza#k325q z+S6!H^dqv6Yfqy+F-5s675MV%8Wso<;omWZU`mXlXsM8PeD-@ z4a!ZfEHPzSDAAsZqeOeEin3-<*1595lue;Tds=?wYS&;(QEnTQJ6zdj%8pmL+BN7p zO0=h^DEkKGfGc;IawwE&&&W}tJ!3^VF({{8xyO|ILW%aw93|Q_SCk8b@_;LsOnE4j zXwQ+OM0<`E<%vOg%9UqKc`lS_&xNBzdoC5_l|gyUl{ZWo_^78nx6z*9M}6(NjrN2- z$_p9(s9wm(M`s12_@RI?kLfoxS~uLlu52kF=bjP(VmQ>M0>J|GG|b3aAlq; z3qpzZ6dfhnv#BUc24$HmD@<7xO0=iuSFUyq>WZ>qP&T=;#gtoK4l7OP*PyH@D+XniD{D+y_bOMr1`S7v_B0h`%b?ui%5A3H z5lXbD?I_Wnj-u=uls&HOGvz=i(VksLiS`T?<;b8MbLE67r$UML>^VxbXJ1jy49YoI zE|~H_DAAs!qeOcS73GmZdCZk3OnEAlXwR9WM0?H^<%L0c$(2`3c`cM^&yAx*djhw3 zdxE$0_JnTHLWXaVLPlMT2saD@#mS_9|Dq1{Ft%_EZ&R&7iDvWrHc3LW%aY93|Sb zr6{)z${nt3Gi668(VniOM0Q$@LF zQ0{Z(j49_riS{fUCE9bKD3=E1Ay*zT<*`trJtvM5?KxGHX9ndtS6(pXrBI?hSB?_x zxmJ`n24&zAv^~L3koJT=;VI-jv?u%tUm@?IJ&{lFLPkHK7c$0`>r5FJO0*~8SFUyq zl8Q2AP^P&u!<1RCaq3e4 zG#n+`(^Ql#gK~>2x0!NBDAAs_qeOc;in41^_PDaolmnqedv+Zq+A~y?BZG3xl@q3% z3MJaJ=P1#heMLDlDCb8u3R$Zp-`edM~)KhIaZV>2IVPNo-yUQP@+8- zjuP#;RFqc+$h< zrW^|;+B0#KXwOtp?irN(TsdRPxlp1#3rC6e94N}AL3zlPM@)GvlxWY1qeOd773G;h zdCrv=OnE7kXwQ{jx!N_jR+KjeW#Ch^J;6_r_Jls=DdcL`Ap9v`As?YVkx%hLMn9z& zGRBqbOc@tSv?t*x(VnEDOc|7EuFNoHRw&V)oTEg0HWX#vpe%4@ktsKY674BDO0=h} zC@ThKl`CsZSr?_KdK{@Bj1yddfCEBxelxWYPqC7GvkGb-MDNltG z?K$%)@2z$X&K2c_L3zoQS4?^BQLaF1H;xkR34EHjC-`Z-J)uw2LWVz03K{wI<3iqh z8tsXGS{Cx&(`ZjjQLY=5ajr}-Wl|{7o|L0Rd(w(BV^C(fGRKq~LW%a|9VOaRP?SZ3 za+51dOj#C6w5Q@I(VnWJtQnMbu52)6Qz+4%mZL;_wiM;ILAk?~ZKmu9CEC+vh$)Y~%GIvHiK9e&P8H>uL3z%V7fg96lxWYDqeOeI73GaV8MsZ`6TD5@6T0mw zSFST7?f46tTAO>DAAsVqeOd}in3);ZgJ%{Q|<^Q+S7KF zXirB`b`8oNSN555Ae3m&uA@YIhKh1zP>#8B!jw~?M0@u9%GIvHzM`BNlyk0JFy(<) zx!N^YI!d(XP*ENkl*e3o!jz{%iT0d1O0?%(QC=97mt1+pl-ELu_S`s1v?p+fw675MV%8Wso<;omW zZU`mXlXsM8PeD-@4a!ZfEHPzSDAAsZqeOeEin3-<*1595lue;Tds>bX?b%Y4+Xm$h zSGJk5Ba~=Q*HNN9Jw@3!Cub*%+7r6V3mLwv7cz2}7BYI56f!21XwSN%M0?_jGGS0AxiZC+X`w`WGL91M z$tud6LAk+|d8RA~CE8PTlxWYUqAVGdWv;9+WmPEAo|>aXd+Lg^VNf=?vc;5JLW%Zl zJ4&=?M^Uy7$_`g{nX)I8Xiwj-Tf#IpoR_Q;xmL)vm$BQKCIlMY(5C?sMgg zDd$3o_ADGF+H;^Nmj>k_R~|9tu~4EtCyo;BIaQQr2IV4l7O8dKJV676X?O0=h`C|d^Q z7FTXF<&IFIJ#D{owQJB(lwE_e$CZ7i9C($hU4vamiS`T?<;b8MbLE67r$UML>^Vxb zXJ1jy49YoIE|~H_DAAs!qeOcS73GmZdCZk3OnEAlXwR9WM0?H^<%L0c$(2`3c`cM^ z&yAx*djj`(dxH1$_Jr=yLWb{=LPqX+3V9#xiQe-S@;=%VQu9i678utO0=h{C~F2~ohutm*%V5& zr{!0!b`7=^<+ee&!9LWb|_g^b*%g^b=Og^UR$+OzH`(Vn=XOc<0& zu1qmyS}4(;jH5(*vWhZiP;PK#o+%4LiS`s7CEBy8C`$%qnJX(ySrtmOr{-6#b`9!^ zvSCm*xw6HSTVCaA*I?UGqCGo`vTaayxU$QXJ)uN<`i>Iq87RtKgL24~Bc>b+CE7D_ zlxWXXQSKR(`&>C=%DGUYJqt&P_8ch6r9pYfl}Ai@ER<-^iK9e&P8H>uL3z%V7fg96 zlxWYDqeOeI73GaV8F)b36MR706MEn&YphiFga0WW0qfnLZMSFSTf+Kv+K=_tysLD}QVK2r{a67AV_lxWXTQH~7C zF;`BQaw?Q)&z_@1d-fIO%%Gfe<$@^>gc9vpI!d(XP*ENkl*e3o!jz{%iT0d1O0?%( zQC=97mt1+pl-ELu_S`s1v?uV8w675MV%8Wso<;omWZU`mXllLj_uXYUzin3@>ZgORbDa#(^3ba;n zlxR;?QPvE~I#)KBvMH2kPs>rFJzI)$+o0Ux$~IGWgc9xPI!d&srzrad<$x=9nQ|zU zXwS$|qCI0pIWZ`wT)D@T`$CEK%p4`!Ggp)ggYtkYmrQvmlxWY9qeOd-73GORdCHY% zOnEMpXwQYCM0+k3<&{Bs&6PJy8F=)#J@=nRdxDQ-d+tAt_JkhsLWUpdg^WC+g^WHT zg^UR$+OzH`(Vn=XOc<0&u1qmyS}4(;j9Iq z87RtKgL24~Bc>b+CE7D_lxWXXQSKR(`&>C=%DGUYJqt&P_8ch6r9pYfl}Ai@ER<-^ ziK9e&P8H>uL3z%V7fg96lxWYDqeOeI73GaV8TbrsPw+FOJ)zHd3V9pt34g{{$lGX7 z>89k zuIw}AKq%3kT}O%b3>D?bpd54Mgej*&iT3O{O0;KRQO*p?Iae;2@<1rjo~5Hidkz)l zkwJOPl_yMjDwJr?nWIE|&K2c_L3zoQS4?>=lxWY5qeOcGpXKcdepYW!=(Dtt;m?vn zMn3B)f zx{ea<=_$&-K{?>cU8WogCE7D`lxWXbQBDlXDOc_><-Sm&Ju^p%_RJOK!k|3h$|X}C z3MJZeC-^yEdse#! zq0jL`hCinlGV(cE$mr)tA!AQ&E--$}(40n6fIAXiv>iqCIs**)S-ZT-jpEEulnvwjCwfv!f{6 z24#mUyG+>=O0=i%DAAsQqTDqohg>;g%CS(QJrhTX_DmJ!oD3%o-@C4wQF#$C@&1kORl_P%4@H3wQF$WDAAt4 z7kGPuU(nkV`T{Lv_zR?vkuP`(c^~bGe!*AB`)E&0QLY=5ajr}-Wl|{7o|L0Rd(w(B zV^C(fGRKq~LW%a|9VOaRP?SZ3a+51dOj#C6w5Q@I(VnWJtQnMbu52)6Qz+4%mZL;_ zwiM;ILAk?~ZKmu9CEC+Q zD_cytC6s8-wxdLQb`)jXpzLsEmnnNfiT3mzCE7Dkl)DDykSj+_ITlK^XW}T)o~feT zGbs1Da>kT%p+tKYe&uS{;6PC>4a!5VJYvdYuX43(aN;P@o>N76W>B7UF=)6-u-x=P1#h4MmwZC<|O!WXesUM0-k(674A~%8Ef*<;ogU)`b%7 zX*f!>r>Q7g2IUr4ZZqYMP@+9;M~U`y6lK?->~UqEDF;G{_Ut-Jv}dR&M+W7XD<@1j z6-u;c&#zqV8tf~|nL#<{$^}y%c$KSNgQcTHdkz)lkwJOPl_yMjDwJr?nWIE|&K2c_ zL3zoQS4?>=lxWY5qeOcGU*hcveo1dn=u5Pa;V+RwM!w`JAE7-lMY(QJ z#@${bT}2qoH+ca&&PK~WYB%1y2;F=bgO(VmKqO2H{Rj#ZtWnC!Io`$1Ddzy-}Wl(N$&rng049YQAPMC5klxWYMqeOf573IvJoO9)ZDG!7a?O8fXwC7M! z9vPI!TzSHjr$UMLoHdBv31LW%a=I7+l9@KxTP;8*qbguY4(8U89M zWaO)!Lf%1pqF?nD@($V)Qu9i678utO0=h{C~F2~ohutm*%V5&r{!0!b`7=^<+ee&!4l7ZjTSQcHB!i!P@+BSjuP#OE6RjHndHh8Q>KLy?a4Swv?r@5a|Y!G zSLT_rAe3lN(NUs3n~JhzP?ou}!jx5^M0;v}r5FJO0*~8DAAszqD&c-X|BvLWmYKB zo}8mZdo~nh-k>aSWsxa2g%a&4`IW0(gR-Km7?f46tTAQXt6c3GG#n+`(^Ql#gK~>2 zx0!NBDAAs_qeOc;in41^_PDaolmnqedv+Zq+A~y?BZG3xl@q3%3MJaJ=P1#heMLDl zDCbT-jjCrck0i zEk}v=Y$?iZgK~!}+f3OJO0=izDAAstqU;-#1Fqa<%AruAJtIen_KX$f#GssV?q2%LD}KTE>re|67A_bO0;L7D0dCYAy$674xvlxGIzIagjV<)u)f zJy(tr?YUNzHwIsQMNkIn5qCIg%nJ_4mT$y6Zv{0fw8AplsWEEx3pxoffJX02g674BEO0;KFQI-tK zGFMiZvMQ8lPt8%HJ#|IdFesZ`*<#8qp+tMO9VObcqbS=3Wrr)fOxY7kw5RVV(Vl^# z+%+hNTsdOOu~4Et6Gw^mOcmvxLAlSBGp3viCEBxalxWX^qFfr3hg^BYl*dAe_MA9M zwC7Y&o*9(qTzSEimqLm5T=|u&U4v^yd1FupzDwH^{4Qxv=(~>#`Ea#s5dN;OkhjsE z$ai@mquYYcv!y7v4ayy^Y%^s?DAAs- zqeOdpin4D|4!ClcDThLd_KX}Q+A~&^6N7Tfm3vINFO+D{%u%8}b49r@C=a-D$&`me ziS``%m8)HYV?}vlP@Zz-8B?Bnm8)HY3rC6eTq?>dgYudyZQ&E--$}(40n6fIAXiv>iqCIs**)S-ZT-jpEEulnvwjCwfv!f{6 z24#mUyG+>=O0=i%DAAsQqTDqohg>;g%CS(QJrhTX_DmJ!os;Aj%BE1FJuOFx z_G~H2ZG&=$E89%j5lXbD>nPElo}%m|u^Ou6q> zu67M(juP#eE6RmIdBBxRraTl%wCBiCqCLlo^2DG#<;pXrJQqr|=fY8?`C0v?uaoUdZT=^+LwDa-AvTLW%Yy z93|S5RFo-$GR>74rpyW@+LLpXXwQbC%o~&it}HU;rck0iB}a+&loe&gpsaFbjVbFw ziS{%cCEC+elr4jDiz~O8az`l9p0;1P+BN7X%C14#TwS$C9ZPh3$Z49X-|rkFAKw6^Ui(ViVe*)}LUT-jyHo=~DaeMgD* z3>4+AK{@2g5mSzZ6788dO0;LHDEAD?eXg7_r;75-pgiZw3#PmjO0?(7QKCK9it@&w4E&6?C-^hcp3u)87xK~5XixZOvXGCS zMtdSZvc{HE1|Ww5O>kTL$G8S8g-qj!>dKZAXdrbQERR zpzLvFpD71IiT3O|O0;LFC`Sh6m@6ktITcE@XU|chJ^PAsW>C(#a>0}bLW%Y)9VOax zs3?yN%44oPVaijXM0?I0CE9bYC@&1kORl_P%4?xSdu|*h+7tLWZ%^>&dV4}Yr-cmv zoD?$hb59{}qdn1|`wDp*?TIPMb%Qd_l?kRy3MJZ;a+GLKT2W>U$}Csrm~ulX(Vo0t zx!N@-D9WNixyh9!rYw7vt6hVNqeOeEin3-<*1595lue;Tds>bX?b%Y4+Xm$hSGJk5 zBa~=Q*HNN9Jw@3!CY1b^Xc z&mFWU^b20d@Gta2Mt(sH8T|z*WK1a0o^?lw_QVxs!k|oYWr``&LW%Zd{L0m?K~_=b z49X3z%rj-dt6c3G6dfhnv#BUc24$HmD@<7xO0=iuDAAs}qHGwHO|EP)<(5#QJ==~F z?b%V3ZG*DIm0hOn2_@Roca&()KvC`*ltZo@G38h&(VmH;M0=)+a?haL=gJvV&V>@~ zSvX3x=Ri>|4a!5VJYvdYp+tL593|RwswmG4%5$!~V9HCOM0>6rCE9bXC~pkPz%OZg zg1;o~3H{Pj$h&Ay_?Nyy-bH&NzvP9C{!%Ywj4RifGA@*8Pr|QU?HVK%Wy+vTb7h7p zvtH$D*C6L8(Vh)OnKvj4Tv=qwO`$}4N{$lkDJ#l~L0RR>8dKJV676X?O0=h`C|d^Q z7FTXF<&IFIJ#9yc_H-0w*P!fiWuGYrLW%b5I!d%>s3=DU<(MlcOgR-wv}eyzqCNYH za%ND@xpKjj2SSPVEFC4m@QS9*IwzoLZ<|B4hc@+(gv*U+BmuY84E?Ha@s<+?!`=gI_ACcVnlu0hIC zqCIIvnK3A{T$y9a4WUGP@{SViDJaUKLAlA5C8jJ3CE8PQlxR;?QPvE~I#)KBvMH2k zPs>rFJzI)$+o0Ux$~IGWgc9xPI!d&srzrad<$x=9nQ|zUXwS$|qCI0pIWZ`wT)D@T z`$CEK%p4`!Ggp)ggYtkYmrQvmlxWY9qeOd-73GORdCHY%OnEMpXwQYCM0+k3<&{Bs z&6PJy8ThrQJ@?R_;IDn{S?wBxe$5LR{^hlU0;CgK~o_^GsO~O0=iwDAArxMOiW^%UoGu%BoPJJvB#(_S6++ z!=P+(Ws51dgc9x9c9dw(j-qTElpU_@GG$LF(Vo7eM0*B`a@U|7a^;9A$3ltrOdKWJ zGgXv(2IW3i&X{s8lxWYwQKCHuigIaC9&+UoQyvQ?+H>M4(VkO9d1g?abL9n7UJ51J zbLCgAb`7o-<&8lZ_zi7O@HeDAq2G84x!N@d|HfCy2WU^^H@uM1-{^&mapgKw#)T5? zNjOThC#fh?24$KnGfbHkO0*~EDAAq`MVU7!3tU-b%1xm}drFQH?I|nDia}ZB${JJF zg%a&)I7+mqsVG|p^e%cXQ(Jg2IZJ5 zCrmjNO0;LsQKCKjigIR9&be~Ilm|kI_ADJG+HD6-vcZ&1p+tLHjuP$JQk2^Uz<`_A39 gd-uQaAHV&d|I`2Y?@z>@`VaqY_|E_Gy8rtB0L9uwtpET3 diff --git a/lang/rust/avro/benches/serde.rs b/lang/rust/avro/benches/serde.rs deleted file mode 100644 index 141f7603b14..00000000000 --- a/lang/rust/avro/benches/serde.rs +++ /dev/null @@ -1,335 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - schema::Schema, - types::{Record, Value}, - AvroResult, Reader, Writer, -}; -use criterion::{criterion_group, criterion_main, Criterion}; -use std::time::Duration; - -const RAW_SMALL_SCHEMA: &str = r#" -{ - "namespace": "test", - "type": "record", - "name": "Test", - "fields": [ - { - "type": { - "type": "string" - }, - "name": "field" - } - ] -} -"#; - -const RAW_BIG_SCHEMA: &str = r#" -{ - "namespace": "my.example", - "type": "record", - "name": "userInfo", - "fields": [ - { - "default": "NONE", - "type": "string", - "name": "username" - }, - { - "default": -1, - "type": "int", - "name": "age" - }, - { - "default": "NONE", - "type": "string", - "name": "phone" - }, - { - "default": "NONE", - "type": "string", - "name": "housenum" - }, - { - "default": {}, - "type": { - "fields": [ - { - "default": "NONE", - "type": "string", - "name": "street" - }, - { - "default": "NONE", - "type": "string", - "name": "city" - }, - { - "default": "NONE", - "type": "string", - "name": "state_prov" - }, - { - "default": "NONE", - "type": "string", - "name": "country" - }, - { - "default": "NONE", - "type": "string", - "name": "zip" - } - ], - "type": "record", - "name": "mailing_address" - }, - "name": "address" - } - ] -} -"#; - -const RAW_ADDRESS_SCHEMA: &str = r#" -{ - "fields": [ - { - "default": "NONE", - "type": "string", - "name": "street" - }, - { - "default": "NONE", - "type": "string", - "name": "city" - }, - { - "default": "NONE", - "type": "string", - "name": "state_prov" - }, - { - "default": "NONE", - "type": "string", - "name": "country" - }, - { - "default": "NONE", - "type": "string", - "name": "zip" - } - ], - "type": "record", - "name": "mailing_address" -} -"#; - -fn make_small_record() -> anyhow::Result<(Schema, Value)> { - let small_schema = Schema::parse_str(RAW_SMALL_SCHEMA)?; - let small_record = { - let mut small_record = Record::new(&small_schema).unwrap(); - small_record.put("field", "foo"); - small_record.into() - }; - Ok((small_schema, small_record)) -} - -fn make_big_record() -> anyhow::Result<(Schema, Value)> { - let big_schema = Schema::parse_str(RAW_BIG_SCHEMA)?; - let address_schema = Schema::parse_str(RAW_ADDRESS_SCHEMA)?; - let mut address = Record::new(&address_schema).unwrap(); - address.put("street", "street"); - address.put("city", "city"); - address.put("state_prov", "state_prov"); - address.put("country", "country"); - address.put("zip", "zip"); - - let big_record = { - let mut big_record = Record::new(&big_schema).unwrap(); - big_record.put("username", "username"); - big_record.put("age", 10i32); - big_record.put("phone", "000000000"); - big_record.put("housenum", "0000"); - big_record.put("address", address); - big_record.into() - }; - - Ok((big_schema, big_record)) -} - -fn make_records(record: Value, count: usize) -> Vec { - std::iter::repeat(record).take(count).collect() -} - -fn write(schema: &Schema, records: &[Value]) -> AvroResult> { - let mut writer = Writer::new(schema, Vec::new()); - writer.extend_from_slice(records).unwrap(); - writer.into_inner() -} - -fn read(schema: &Schema, bytes: &[u8]) -> anyhow::Result<()> { - let reader = Reader::with_schema(schema, bytes)?; - - for record in reader { - let _ = record?; - } - Ok(()) -} - -fn read_schemaless(bytes: &[u8]) -> anyhow::Result<()> { - let reader = Reader::new(bytes)?; - - for record in reader { - let _ = record?; - } - Ok(()) -} - -fn bench_write( - c: &mut Criterion, - make_record: impl Fn() -> anyhow::Result<(Schema, Value)>, - n_records: usize, - name: &str, -) -> anyhow::Result<()> { - let (schema, record) = make_record()?; - let records = make_records(record, n_records); - c.bench_function(name, |b| b.iter(|| write(&schema, &records))); - Ok(()) -} - -fn bench_read( - c: &mut Criterion, - make_record: impl Fn() -> anyhow::Result<(Schema, Value)>, - n_records: usize, - name: &str, -) -> anyhow::Result<()> { - let (schema, record) = make_record()?; - let records = make_records(record, n_records); - let bytes = write(&schema, &records).unwrap(); - c.bench_function(name, |b| b.iter(|| read(&schema, &bytes))); - Ok(()) -} - -fn bench_from_file(c: &mut Criterion, file_path: &str, name: &str) -> anyhow::Result<()> { - let bytes = std::fs::read(file_path)?; - c.bench_function(name, |b| b.iter(|| read_schemaless(&bytes))); - Ok(()) -} - -fn bench_small_schema_write_1_record(c: &mut Criterion) { - bench_write(c, make_small_record, 1, "small schema, write 1 record").unwrap(); -} - -fn bench_small_schema_write_100_record(c: &mut Criterion) { - bench_write(c, make_small_record, 100, "small schema, write 100 records").unwrap(); -} - -fn bench_small_schema_write_10_000_record(c: &mut Criterion) { - bench_write( - c, - make_small_record, - 10_000, - "small schema, write 10k records", - ) - .unwrap(); -} - -fn bench_small_schema_read_1_record(c: &mut Criterion) { - bench_read(c, make_small_record, 1, "small schema, read 1 record").unwrap(); -} - -fn bench_small_schema_read_100_record(c: &mut Criterion) { - bench_read(c, make_small_record, 100, "small schema, read 100 records").unwrap(); -} - -fn bench_small_schema_read_10_000_record(c: &mut Criterion) { - bench_read( - c, - make_small_record, - 10_000, - "small schema, read 10k records", - ) - .unwrap(); -} - -fn bench_big_schema_write_1_record(c: &mut Criterion) { - bench_write(c, make_big_record, 1, "big schema, write 1 record").unwrap(); -} - -fn bench_big_schema_write_100_record(c: &mut Criterion) { - bench_write(c, make_big_record, 100, "big schema, write 100 records").unwrap(); -} - -fn bench_big_schema_write_10_000_record(c: &mut Criterion) { - bench_write(c, make_big_record, 10_000, "big schema, write 10k records").unwrap(); -} - -fn bench_big_schema_read_1_record(c: &mut Criterion) { - bench_read(c, make_big_record, 1, "big schema, read 1 record").unwrap(); -} - -fn bench_big_schema_read_100_record(c: &mut Criterion) { - bench_read(c, make_big_record, 100, "big schema, read 100 records").unwrap(); -} - -fn bench_big_schema_read_10_000_record(c: &mut Criterion) { - bench_read(c, make_big_record, 10_000, "big schema, read 10k records").unwrap(); -} - -fn bench_big_schema_read_100_000_record(c: &mut Criterion) { - bench_read(c, make_big_record, 100_000, "big schema, read 100k records").unwrap(); -} - -// This benchmark reads from the `benches/quickstop-null.avro` file, which was pulled from -// the `goavro` project benchmarks: -// https://github.com/linkedin/goavro/blob/master/fixtures/quickstop-null.avro -// This was done for the sake of comparing this crate against the `goavro` implementation. -fn bench_file_quickstop_null(c: &mut Criterion) { - bench_from_file(c, "benches/quickstop-null.avro", "quickstop null file").unwrap(); -} - -criterion_group!( - benches, - bench_small_schema_write_1_record, - bench_small_schema_write_100_record, - bench_small_schema_read_1_record, - bench_small_schema_read_100_record, - bench_big_schema_write_1_record, - bench_big_schema_write_100_record, - bench_big_schema_read_1_record, - bench_big_schema_read_100_record, -); - -criterion_group!( - name = long_benches; - config = Criterion::default().sample_size(20).measurement_time(Duration::from_secs(10)); - targets = - bench_file_quickstop_null, - bench_small_schema_write_10_000_record, - bench_small_schema_read_10_000_record, - bench_big_schema_read_10_000_record, - bench_big_schema_write_10_000_record -); - -criterion_group!( - name = very_long_benches; - config = Criterion::default().sample_size(10).measurement_time(Duration::from_secs(20)); - targets = - bench_big_schema_read_100_000_record, -); - -criterion_main!(benches, long_benches, very_long_benches); diff --git a/lang/rust/avro/benches/serde_json.rs b/lang/rust/avro/benches/serde_json.rs deleted file mode 100644 index 780de2b2a08..00000000000 --- a/lang/rust/avro/benches/serde_json.rs +++ /dev/null @@ -1,79 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use criterion::{criterion_group, criterion_main, Criterion}; -use serde_json::Value; -use std::collections::HashMap; - -fn make_big_json_record() -> Value { - let address = HashMap::<_, _>::from_iter(vec![ - ("street", "street"), - ("city", "city"), - ("state_prov", "state_prov"), - ("country", "country"), - ("zip", "zip"), - ]); - let address_json = serde_json::to_value(address).unwrap(); - let big_record = HashMap::<_, _>::from_iter(vec![ - ("username", serde_json::to_value("username").unwrap()), - ("age", serde_json::to_value(10i32).unwrap()), - ("phone", serde_json::to_value("000000000").unwrap()), - ("housenum", serde_json::to_value("0000").unwrap()), - ("address", address_json), - ]); - serde_json::to_value(big_record).unwrap() -} - -fn write_json(records: &[Value]) -> Vec { - serde_json::to_vec(records).unwrap() -} - -fn read_json(bytes: &[u8]) { - let reader: serde_json::Value = serde_json::from_slice(bytes).unwrap(); - for record in reader.as_array().unwrap() { - let _ = record; - } -} - -fn bench_read_json( - c: &mut Criterion, - make_record: impl Fn() -> Value, - n_records: usize, - name: &str, -) { - let records = std::iter::repeat(make_record()) - .take(n_records) - .collect::>(); - let bytes = write_json(&records); - c.bench_function(name, |b| b.iter(|| read_json(&bytes))); -} - -fn bench_big_schema_json_read_10_000_record(c: &mut Criterion) { - bench_read_json( - c, - make_big_json_record, - 10_000, - "big schema, read 10k JSON records", - ); -} - -criterion_group!( - name = benches; - config = Criterion::default().sample_size(10); - targets = bench_big_schema_json_read_10_000_record, -); -criterion_main!(benches); diff --git a/lang/rust/avro/benches/single.rs b/lang/rust/avro/benches/single.rs deleted file mode 100644 index 39d6c9c8641..00000000000 --- a/lang/rust/avro/benches/single.rs +++ /dev/null @@ -1,193 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - schema::Schema, - to_avro_datum, - types::{Record, Value}, -}; -use criterion::{criterion_group, criterion_main, Criterion}; - -const RAW_SMALL_SCHEMA: &str = r#" -{ - "namespace": "test", - "type": "record", - "name": "Test", - "fields": [ - { - "type": { - "type": "string" - }, - "name": "field" - } - ] -} -"#; - -const RAW_BIG_SCHEMA: &str = r#" -{ - "namespace": "my.example", - "type": "record", - "name": "userInfo", - "fields": [ - { - "default": "NONE", - "type": "string", - "name": "username" - }, - { - "default": -1, - "type": "int", - "name": "age" - }, - { - "default": "NONE", - "type": "string", - "name": "phone" - }, - { - "default": "NONE", - "type": "string", - "name": "housenum" - }, - { - "default": {}, - "type": { - "fields": [ - { - "default": "NONE", - "type": "string", - "name": "street" - }, - { - "default": "NONE", - "type": "string", - "name": "city" - }, - { - "default": "NONE", - "type": "string", - "name": "state_prov" - }, - { - "default": "NONE", - "type": "string", - "name": "country" - }, - { - "default": "NONE", - "type": "string", - "name": "zip" - } - ], - "type": "record", - "name": "mailing_address" - }, - "name": "address" - } - ] -} -"#; - -const RAW_ADDRESS_SCHEMA: &str = r#" -{ - "fields": [ - { - "default": "NONE", - "type": "string", - "name": "street" - }, - { - "default": "NONE", - "type": "string", - "name": "city" - }, - { - "default": "NONE", - "type": "string", - "name": "state_prov" - }, - { - "default": "NONE", - "type": "string", - "name": "country" - }, - { - "default": "NONE", - "type": "string", - "name": "zip" - } - ], - "type": "record", - "name": "mailing_address" -} -"#; - -fn make_small_record() -> anyhow::Result<(Schema, Value)> { - let small_schema = Schema::parse_str(RAW_SMALL_SCHEMA)?; - let small_record = { - let mut small_record = Record::new(&small_schema).unwrap(); - small_record.put("field", "foo"); - small_record.into() - }; - - Ok((small_schema, small_record)) -} - -fn make_big_record() -> anyhow::Result<(Schema, Value)> { - let big_schema = Schema::parse_str(RAW_BIG_SCHEMA)?; - let address_schema = Schema::parse_str(RAW_ADDRESS_SCHEMA)?; - let mut address = Record::new(&address_schema).unwrap(); - address.put("street", "street"); - address.put("city", "city"); - address.put("state_prov", "state_prov"); - address.put("country", "country"); - address.put("zip", "zip"); - - let big_record = { - let mut big_record = Record::new(&big_schema).unwrap(); - big_record.put("username", "username"); - big_record.put("age", 10i32); - big_record.put("phone", "000000000"); - big_record.put("housenum", "0000"); - big_record.put("address", address); - big_record.into() - }; - - Ok((big_schema, big_record)) -} - -fn bench_small_schema_write_record(c: &mut Criterion) { - let (schema, record) = make_small_record().unwrap(); - c.bench_function("small record", |b| { - b.iter(|| to_avro_datum(&schema, record.clone())) - }); -} - -fn bench_big_schema_write_record(c: &mut Criterion) { - let (schema, record) = make_big_record().unwrap(); - c.bench_function("big record", |b| { - b.iter(|| to_avro_datum(&schema, record.clone())) - }); -} - -criterion_group!( - benches, - bench_small_schema_write_record, - bench_big_schema_write_record -); -criterion_main!(benches); diff --git a/lang/rust/avro/examples/benchmark.rs b/lang/rust/avro/examples/benchmark.rs deleted file mode 100644 index 53dfb1ddbda..00000000000 --- a/lang/rust/avro/examples/benchmark.rs +++ /dev/null @@ -1,155 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - schema::Schema, - types::{Record, Value}, - Reader, Writer, -}; -use apache_avro_test_helper::TestResult; -use std::{ - io::{BufReader, BufWriter}, - time::{Duration, Instant}, -}; - -fn nanos(duration: Duration) -> u64 { - duration.as_secs() * 1_000_000_000 + duration.subsec_nanos() as u64 -} - -fn seconds(nanos: u64) -> f64 { - (nanos as f64) / 1_000_000_000f64 -} - -/* -fn duration(nanos: u64) -> Duration { - Duration::new(nanos / 1_000_000_000, (nanos % 1_000_000_000) as u32) -} -*/ - -fn benchmark( - schema: &Schema, - record: &Value, - big_or_small: &str, - count: usize, - runs: usize, -) -> TestResult { - let mut records = Vec::new(); - for __ in 0..count { - records.push(record.clone()); - } - - let mut durations = Vec::with_capacity(runs); - - let mut bytes = None; - for _ in 0..runs { - let records = records.clone(); - - let start = Instant::now(); - let mut writer = Writer::new(schema, BufWriter::new(Vec::new())); - writer.extend(records)?; - - let duration = Instant::now().duration_since(start); - durations.push(duration); - - bytes = Some(writer.into_inner()?.into_inner()?); - } - - let total_duration_write = durations.into_iter().fold(0u64, |a, b| a + nanos(b)); - - // println!("Write: {} {} {:?}", count, runs, seconds(total_duration)); - - let bytes = bytes.unwrap(); - - let mut durations = Vec::with_capacity(runs); - - for _ in 0..runs { - let start = Instant::now(); - let reader = Reader::with_schema(schema, BufReader::new(&bytes[..]))?; - - let mut read_records = Vec::with_capacity(count); - for record in reader { - read_records.push(record); - } - - let duration = Instant::now().duration_since(start); - durations.push(duration); - - assert_eq!(count, read_records.len()); - } - - let total_duration_read = durations.into_iter().fold(0u64, |a, b| a + nanos(b)); - - // println!("Read: {} {} {:?}", count, runs, seconds(total_duration)); - let (total_write_secs, total_read_secs) = - (seconds(total_duration_write), seconds(total_duration_read)); - - println!("{count}\t\t{runs}\t\t{big_or_small}\t\t{total_write_secs}\t\t{total_read_secs}"); - Ok(()) -} - -fn main() -> TestResult { - let raw_small_schema = r#" - {"namespace": "test", "type": "record", "name": "Test", "fields": [{"type": {"type": "string"}, "name": "field"}]} - "#; - - let raw_big_schema = r#" - {"namespace": "my.example", "type": "record", "name": "userInfo", "fields": [{"default": "NONE", "type": "string", "name": "username"}, {"default": -1, "type": "int", "name": "age"}, {"default": "NONE", "type": "string", "name": "phone"}, {"default": "NONE", "type": "string", "name": "housenum"}, {"default": {}, "type": {"fields": [{"default": "NONE", "type": "string", "name": "street"}, {"default": "NONE", "type": "string", "name": "city"}, {"default": "NONE", "type": "string", "name": "state_prov"}, {"default": "NONE", "type": "string", "name": "country"}, {"default": "NONE", "type": "string", "name": "zip"}], "type": "record", "name": "mailing_address"}, "name": "address"}]} - "#; - - let small_schema = Schema::parse_str(raw_small_schema)?; - let big_schema = Schema::parse_str(raw_big_schema)?; - - println!("{small_schema:?}"); - println!("{big_schema:?}"); - - let mut small_record = Record::new(&small_schema).unwrap(); - small_record.put("field", "foo"); - let small_record = small_record.into(); - - let raw_address_schema = r#"{"fields": [{"default": "NONE", "type": "string", "name": "street"}, {"default": "NONE", "type": "string", "name": "city"}, {"default": "NONE", "type": "string", "name": "state_prov"}, {"default": "NONE", "type": "string", "name": "country"}, {"default": "NONE", "type": "string", "name": "zip"}], "type": "record", "name": "mailing_address"}"#; - let address_schema = Schema::parse_str(raw_address_schema).unwrap(); - let mut address = Record::new(&address_schema).unwrap(); - address.put("street", "street"); - address.put("city", "city"); - address.put("state_prov", "state_prov"); - address.put("country", "country"); - address.put("zip", "zip"); - - let mut big_record = Record::new(&big_schema).unwrap(); - big_record.put("username", "username"); - big_record.put("age", 10i32); - big_record.put("phone", "000000000"); - big_record.put("housenum", "0000"); - big_record.put("address", address); - let big_record = big_record.into(); - - println!(); - println!("Count\t\tRuns\t\tBig/Small\tTotal write secs\tTotal read secs"); - - benchmark(&small_schema, &small_record, "Small", 10_000, 1)?; - benchmark(&big_schema, &big_record, "Big", 10_000, 1)?; - - benchmark(&small_schema, &small_record, "Small", 1, 100_000)?; - benchmark(&small_schema, &small_record, "Small", 100, 1000)?; - benchmark(&small_schema, &small_record, "Small", 10_000, 10)?; - - benchmark(&big_schema, &big_record, "Big", 1, 100_000)?; - benchmark(&big_schema, &big_record, "Big", 100, 1000)?; - benchmark(&big_schema, &big_record, "Big", 10_000, 10)?; - - Ok(()) -} diff --git a/lang/rust/avro/examples/generate_interop_data.rs b/lang/rust/avro/examples/generate_interop_data.rs deleted file mode 100644 index 29d50a144de..00000000000 --- a/lang/rust/avro/examples/generate_interop_data.rs +++ /dev/null @@ -1,112 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - schema::Schema, - types::{Record, Value}, - Codec, Writer, -}; -use std::{ - collections::HashMap, - error::Error, - io::{BufWriter, Write}, -}; -use strum::IntoEnumIterator; - -fn create_datum(schema: &Schema) -> Record { - let mut datum = Record::new(schema).unwrap(); - datum.put("intField", 12_i32); - datum.put("longField", 15234324_i64); - datum.put("stringField", "hey"); - datum.put("boolField", true); - datum.put("floatField", 1234.0_f32); - datum.put("doubleField", -1234.0_f64); - datum.put("bytesField", b"12312adf".to_vec()); - datum.put("nullField", Value::Null); - datum.put( - "arrayField", - Value::Array(vec![ - Value::Double(5.0), - Value::Double(0.0), - Value::Double(12.0), - ]), - ); - let mut map = HashMap::new(); - map.insert( - "a".into(), - Value::Record(vec![("label".into(), Value::String("a".into()))]), - ); - map.insert( - "bee".into(), - Value::Record(vec![("label".into(), Value::String("cee".into()))]), - ); - datum.put("mapField", Value::Map(map)); - datum.put("unionField", Value::Union(1, Box::new(Value::Double(12.0)))); - datum.put("enumField", Value::Enum(2, "C".to_owned())); - datum.put("fixedField", Value::Fixed(16, b"1019181716151413".to_vec())); - datum.put( - "recordField", - Value::Record(vec![ - ("label".into(), Value::String("outer".into())), - ( - "children".into(), - Value::Array(vec![Value::Record(vec![ - ("label".into(), Value::String("inner".into())), - ("children".into(), Value::Array(vec![])), - ])]), - ), - ]), - ); - - datum -} - -fn main() -> Result<(), Box> { - let schema_str = std::fs::read_to_string("../../share/test/schemas/interop.avsc") - .expect("Unable to read the interop Avro schema"); - let schema = Schema::parse_str(schema_str.as_str())?; - let data_folder = "../../build/interop/data"; - std::fs::create_dir_all(data_folder)?; - - for codec in Codec::iter() { - let codec_name = <&str>::from(codec); - let suffix = if codec_name == "null" { - "".to_owned() - } else { - format!("_{codec_name}") - }; - - let file_name = format!("{data_folder}/rust{suffix}.avro"); - let output_file = std::fs::File::create(&file_name)?; - - let mut writer = Writer::with_codec(&schema, BufWriter::new(output_file), codec); - write_user_metadata(&mut writer)?; - - let datum = create_datum(&schema); - writer.append(datum)?; - writer.flush()?; - println!("Wrote {file_name}"); - } - - Ok(()) -} - -fn write_user_metadata(writer: &mut Writer>) -> Result<(), Box> { - writer.add_user_metadata("user_metadata".to_string(), b"someByteArray")?; - - Ok(()) -} diff --git a/lang/rust/avro/examples/test_interop_data.rs b/lang/rust/avro/examples/test_interop_data.rs deleted file mode 100644 index 39c97d0649e..00000000000 --- a/lang/rust/avro/examples/test_interop_data.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::Reader; -use std::{ - collections::HashMap, - error::Error, - ffi::OsStr, - io::{BufReader, Read}, -}; - -fn main() -> Result<(), Box> { - let mut expected_user_metadata: HashMap> = HashMap::new(); - expected_user_metadata.insert("user_metadata".to_string(), b"someByteArray".to_vec()); - - let data_dir = std::fs::read_dir("../../build/interop/data/") - .expect("Unable to list the interop data directory"); - - let mut errors = Vec::new(); - - for entry in data_dir { - let path = entry - .expect("Unable to read the interop data directory's files") - .path(); - - if path.is_file() { - let ext = path.extension().and_then(OsStr::to_str).unwrap(); - - if ext == "avro" { - println!("Checking {:?}", &path); - let content = std::fs::File::open(&path)?; - let reader = Reader::new(BufReader::new(&content))?; - - test_user_metadata(&reader, &expected_user_metadata); - - for value in reader { - if let Err(e) = value { - errors.push(format!( - "There is a problem with reading of '{:?}', \n {:?}\n", - &path, e - )); - } - } - } - } - } - - if errors.is_empty() { - Ok(()) - } else { - panic!( - "There were errors reading some .avro files:\n{}", - errors.join(", ") - ); - } -} - -fn test_user_metadata( - reader: &Reader>, - expected_user_metadata: &HashMap>, -) { - let user_metadata = reader.user_metadata(); - if !user_metadata.is_empty() { - assert_eq!(user_metadata, expected_user_metadata); - } -} diff --git a/lang/rust/avro/examples/test_interop_single_object_encoding.rs b/lang/rust/avro/examples/test_interop_single_object_encoding.rs deleted file mode 100644 index ef13465d772..00000000000 --- a/lang/rust/avro/examples/test_interop_single_object_encoding.rs +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{schema::AvroSchema, types::Value}; - -const RESOURCES_FOLDER: &str = "../../share/test/data/messageV1"; - -struct InteropMessage; - -impl AvroSchema for InteropMessage { - fn get_schema() -> apache_avro::Schema { - let schema = std::fs::read_to_string(format!("{RESOURCES_FOLDER}/test_schema.avsc")) - .expect("File should exist with schema inside"); - apache_avro::Schema::parse_str(schema.as_str()) - .expect("File should exist with schema inside") - } -} - -impl From for Value { - fn from(_: InteropMessage) -> Value { - Value::Record(vec![ - ("id".into(), 42i64.into()), - ("name".into(), "Bill".into()), - ( - "tags".into(), - Value::Array( - vec!["dog_lover", "cat_hater"] - .into_iter() - .map(|s| s.into()) - .collect(), - ), - ), - ]) - } -} - -fn main() { - let single_object = std::fs::read(format!("{RESOURCES_FOLDER}/test_message.bin")) - .expect("File with single object not found or error occurred while reading it."); - test_write(&single_object); - test_read(single_object); -} - -fn test_write(expected: &[u8]) { - let mut encoded: Vec = Vec::new(); - apache_avro::SpecificSingleObjectWriter::::with_capacity(1024) - .expect("Resolving failed") - .write_value(InteropMessage, &mut encoded) - .expect("Encoding failed"); - assert_eq!(expected, &encoded) -} - -fn test_read(encoded: Vec) { - let mut encoded = &encoded[..]; - let read_message = apache_avro::GenericSingleObjectReader::new(InteropMessage::get_schema()) - .expect("Resolving failed") - .read_value(&mut encoded) - .expect("Decoding failed"); - let expected_value: Value = InteropMessage.into(); - assert_eq!(expected_value, read_message) -} diff --git a/lang/rust/avro/examples/to_value.rs b/lang/rust/avro/examples/to_value.rs deleted file mode 100644 index 4a78383e224..00000000000 --- a/lang/rust/avro/examples/to_value.rs +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#[derive(Debug, serde::Serialize)] -struct Test { - a: i64, - b: &'static str, -} - -fn main() -> anyhow::Result<()> { - let test = Test { a: 27, b: "foo" }; - let value = apache_avro::to_value(test)?; - println!("{value:?}"); - Ok(()) -} diff --git a/lang/rust/avro/src/bigdecimal.rs b/lang/rust/avro/src/bigdecimal.rs deleted file mode 100644 index a4e503b1377..00000000000 --- a/lang/rust/avro/src/bigdecimal.rs +++ /dev/null @@ -1,200 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{ - decode::{decode_len, decode_long}, - encode::{encode_bytes, encode_long}, - types::Value, - Error, -}; -pub use bigdecimal::BigDecimal; -use num_bigint::BigInt; -use std::io::Read; - -pub(crate) fn serialize_big_decimal(decimal: &BigDecimal) -> Vec { - // encode big decimal, without global size - let mut buffer: Vec = Vec::new(); - let (big_int, exponent): (BigInt, i64) = decimal.as_bigint_and_exponent(); - let big_endian_value: Vec = big_int.to_signed_bytes_be(); - encode_bytes(&big_endian_value, &mut buffer); - encode_long(exponent, &mut buffer); - - // encode global size and content - let mut final_buffer: Vec = Vec::new(); - encode_bytes(&buffer, &mut final_buffer); - final_buffer -} - -pub(crate) fn deserialize_big_decimal(bytes: &Vec) -> Result { - let mut bytes: &[u8] = bytes.as_slice(); - let mut big_decimal_buffer = match decode_len(&mut bytes) { - Ok(size) => vec![0u8; size], - Err(err) => return Err(Error::BigDecimalLen(Box::new(err))), - }; - - bytes - .read_exact(&mut big_decimal_buffer[..]) - .map_err(Error::ReadDouble)?; - - match decode_long(&mut bytes) { - Ok(Value::Long(scale_value)) => { - let big_int: BigInt = BigInt::from_signed_bytes_be(&big_decimal_buffer); - let decimal = BigDecimal::new(big_int, scale_value); - Ok(decimal) - } - _ => Err(Error::BigDecimalScale), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{types::Record, Codec, Reader, Schema, Writer}; - use apache_avro_test_helper::TestResult; - use bigdecimal::{One, Zero}; - use pretty_assertions::assert_eq; - use std::{ - fs::File, - io::BufReader, - ops::{Div, Mul}, - str::FromStr, - }; - - #[test] - fn test_avro_3779_bigdecimal_serial() -> TestResult { - let value: BigDecimal = - bigdecimal::BigDecimal::from(-1421).div(bigdecimal::BigDecimal::from(2)); - let mut current: BigDecimal = BigDecimal::one(); - - for iter in 1..180 { - let buffer: Vec = serialize_big_decimal(¤t); - - let mut as_slice = buffer.as_slice(); - decode_long(&mut as_slice)?; - - let mut result: Vec = Vec::new(); - result.extend_from_slice(as_slice); - - let deserialize_big_decimal: Result = - deserialize_big_decimal(&result); - assert!( - deserialize_big_decimal.is_ok(), - "can't deserialize for iter {iter}" - ); - assert_eq!(current, deserialize_big_decimal?, "not equals for {iter}"); - current = current.mul(&value); - } - - let buffer: Vec = serialize_big_decimal(&BigDecimal::zero()); - let mut as_slice = buffer.as_slice(); - decode_long(&mut as_slice)?; - - let mut result: Vec = Vec::new(); - result.extend_from_slice(as_slice); - - let deserialize_big_decimal: Result = deserialize_big_decimal(&result); - assert!( - deserialize_big_decimal.is_ok(), - "can't deserialize for zero" - ); - assert_eq!( - BigDecimal::zero(), - deserialize_big_decimal?, - "not equals for zero" - ); - - Ok(()) - } - - #[test] - fn test_avro_3779_record_with_bg() -> TestResult { - let schema_str = r#" - { - "type": "record", - "name": "test", - "fields": [ - { - "name": "field_name", - "type": "bytes", - "logicalType": "big-decimal" - } - ] - } - "#; - let schema = Schema::parse_str(schema_str)?; - - // build record with big decimal value - let mut record = Record::new(&schema).unwrap(); - let val = BigDecimal::new(BigInt::from(12), 2); - record.put("field_name", val.clone()); - - // write a record - let codec = Codec::Null; - let mut writer = Writer::builder() - .schema(&schema) - .codec(codec) - .writer(Vec::new()) - .build(); - - writer.append(record.clone())?; - writer.flush()?; - - // read record - let wrote_data = writer.into_inner()?; - let mut reader = Reader::new(&wrote_data[..])?; - - let value = reader.next().unwrap()?; - - // extract field value - let big_decimal_value: &Value = match value { - Value::Record(ref fields) => Ok(&fields[0].1), - other => Err(format!("Expected a Value::Record, got: {other:?}")), - }?; - - let x1res: &BigDecimal = match big_decimal_value { - Value::BigDecimal(ref s) => Ok(s), - other => Err(format!("Expected Value::BigDecimal, got: {other:?}")), - }?; - assert_eq!(&val, x1res); - - Ok(()) - } - - #[test] - fn test_avro_3779_from_java_file() -> TestResult { - // Open file generated with Java code to ensure compatibility - // with Java big decimal logical type. - let file: File = File::open("./tests/bigdec.avro")?; - let mut reader = Reader::new(BufReader::new(&file))?; - let next_element = reader.next(); - assert!(next_element.is_some()); - let value = next_element.unwrap()?; - let bg = match value { - Value::Record(ref fields) => Ok(&fields[0].1), - other => Err(format!("Expected a Value::Record, got: {other:?}")), - }?; - let value_big_decimal = match bg { - Value::BigDecimal(val) => Ok(val), - other => Err(format!("Expected a Value::BigDecimal, got: {other:?}")), - }?; - - let ref_value = BigDecimal::from_str("2.24")?; - assert_eq!(&ref_value, value_big_decimal); - - Ok(()) - } -} diff --git a/lang/rust/avro/src/bytes.rs b/lang/rust/avro/src/bytes.rs deleted file mode 100644 index 5c10df27b68..00000000000 --- a/lang/rust/avro/src/bytes.rs +++ /dev/null @@ -1,684 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::cell::Cell; - -thread_local! { - /// A thread local that is used to decide how to serialize Rust bytes into an Avro - /// `types::Value` of type bytes. - /// - /// Relies on the fact that serde's serialization process is single-threaded. - pub(crate) static SER_BYTES_TYPE: Cell = const { Cell::new(BytesType::Bytes) }; - - /// A thread local that is used to decide how to deserialize an Avro `types::Value` - /// of type bytes into Rust bytes. - /// - /// Relies on the fact that serde's deserialization process is single-threaded. - pub(crate) static DE_BYTES_BORROWED: Cell = const { Cell::new(false) }; -} - -#[derive(Debug, Clone, Copy)] -pub(crate) enum BytesType { - Bytes, - Fixed, -} - -/// Efficient (de)serialization of Avro bytes values. -/// -/// This module is intended to be used through the Serde `with` attribute. Use -/// [`serde_avro_bytes_opt`](crate::serde_avro_bytes_opt) for optional bytes. -/// -/// See usage with below example: -/// ```rust -/// use apache_avro::{serde_avro_bytes, serde_avro_fixed}; -/// use serde::{Deserialize, Serialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct StructWithBytes { -/// #[serde(with = "serde_avro_bytes")] -/// vec_field: Vec, -/// -/// #[serde(with = "serde_avro_fixed")] -/// fixed_field: [u8; 6], -/// } -/// ``` -pub mod serde_avro_bytes { - use serde::{Deserializer, Serializer}; - - pub fn serialize(bytes: &[u8], serializer: S) -> Result - where - S: Serializer, - { - serde_bytes::serialize(bytes, serializer) - } - - pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> - where - D: Deserializer<'de>, - { - serde_bytes::deserialize(deserializer) - } -} - -/// Efficient (de)serialization of optional Avro bytes values. -/// -/// This module is intended to be used through the Serde `with` attribute. Use -/// [`serde_avro_bytes`](crate::serde_avro_bytes) for non optional bytes. -/// -/// See usage with below example: -/// ```rust -/// use apache_avro::{serde_avro_bytes_opt, serde_avro_fixed_opt}; -/// use serde::{Deserialize, Serialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct StructWithBytes { -/// #[serde(with = "serde_avro_bytes_opt")] -/// vec_field: Option>, -/// -/// #[serde(with = "serde_avro_fixed_opt")] -/// fixed_field: Option<[u8; 6]>, -/// } -/// ``` -pub mod serde_avro_bytes_opt { - use serde::{Deserializer, Serializer}; - use std::borrow::Borrow; - - pub fn serialize(bytes: &Option, serializer: S) -> Result - where - S: Serializer, - B: Borrow<[u8]> + serde_bytes::Serialize, - { - serde_bytes::serialize(bytes, serializer) - } - - pub fn deserialize<'de, D>(deserializer: D) -> Result>, D::Error> - where - D: Deserializer<'de>, - { - serde_bytes::deserialize(deserializer) - } -} - -/// Efficient (de)serialization of Avro fixed values. -/// -/// This module is intended to be used through the Serde `with` attribute. Use -/// [`serde_avro_fixed_opt`](crate::serde_avro_fixed_opt) for optional fixed values. -/// -/// See usage with below example: -/// ```rust -/// use apache_avro::{serde_avro_bytes, serde_avro_fixed}; -/// use serde::{Deserialize, Serialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct StructWithBytes { -/// #[serde(with = "serde_avro_bytes")] -/// vec_field: Vec, -/// -/// #[serde(with = "serde_avro_fixed")] -/// fixed_field: [u8; 6], -/// } -/// ``` -pub mod serde_avro_fixed { - use super::{BytesType, SER_BYTES_TYPE}; - use serde::{Deserializer, Serializer}; - - pub fn serialize(bytes: &[u8], serializer: S) -> Result - where - S: Serializer, - { - SER_BYTES_TYPE.set(BytesType::Fixed); - let res = serde_bytes::serialize(bytes, serializer); - SER_BYTES_TYPE.set(BytesType::Bytes); - res - } - - pub fn deserialize<'de, D, const N: usize>(deserializer: D) -> Result<[u8; N], D::Error> - where - D: Deserializer<'de>, - { - serde_bytes::deserialize(deserializer) - } -} - -/// Efficient (de)serialization of optional Avro fixed values. -/// -/// This module is intended to be used through the Serde `with` attribute. Use -/// [`serde_avro_fixed`](crate::serde_avro_fixed) for non optional fixed values. -/// -/// See usage with below example: -/// ```rust -/// use apache_avro::{serde_avro_bytes_opt, serde_avro_fixed_opt}; -/// use serde::{Deserialize, Serialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct StructWithBytes { -/// #[serde(with = "serde_avro_bytes_opt")] -/// vec_field: Option>, -/// -/// #[serde(with = "serde_avro_fixed_opt")] -/// fixed_field: Option<[u8; 6]>, -/// } -/// ``` -pub mod serde_avro_fixed_opt { - use super::{BytesType, SER_BYTES_TYPE}; - use serde::{Deserializer, Serializer}; - use std::borrow::Borrow; - - pub fn serialize(bytes: &Option, serializer: S) -> Result - where - S: Serializer, - B: Borrow<[u8]> + serde_bytes::Serialize, - { - SER_BYTES_TYPE.set(BytesType::Fixed); - let res = serde_bytes::serialize(bytes, serializer); - SER_BYTES_TYPE.set(BytesType::Bytes); - res - } - - pub fn deserialize<'de, D, const N: usize>(deserializer: D) -> Result, D::Error> - where - D: Deserializer<'de>, - { - serde_bytes::deserialize(deserializer) - } -} - -/// Efficient (de)serialization of Avro bytes/fixed borrowed values. -/// -/// This module is intended to be used through the Serde `with` attribute. Note that -/// `bytes: &[u8]` are always serialized as -/// [`Value::Bytes`](crate::types::Value::Bytes). However, both -/// [`Value::Bytes`](crate::types::Value::Bytes) and -/// [`Value::Fixed`](crate::types::Value::Fixed) can be deserialized as `bytes: -/// &[u8]`. Use [`serde_avro_slice_opt`](crate::serde_avro_slice_opt) for optional -/// bytes/fixed borrowed values. -/// -/// See usage with below example: -/// ```rust -/// use apache_avro::serde_avro_slice; -/// use serde::{Deserialize, Serialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct StructWithBytes<'a> { -/// #[serde(with = "serde_avro_slice")] -/// slice_field: &'a [u8], -/// } -/// ``` -pub mod serde_avro_slice { - use super::DE_BYTES_BORROWED; - use serde::{Deserializer, Serializer}; - - pub fn serialize(bytes: &[u8], serializer: S) -> Result - where - S: Serializer, - { - serde_bytes::serialize(bytes, serializer) - } - - pub fn deserialize<'de, D>(deserializer: D) -> Result<&'de [u8], D::Error> - where - D: Deserializer<'de>, - { - DE_BYTES_BORROWED.set(true); - let res = serde_bytes::deserialize(deserializer); - DE_BYTES_BORROWED.set(false); - res - } -} - -/// Efficient (de)serialization of optional Avro bytes/fixed borrowed values. -/// -/// This module is intended to be used through the Serde `with` attribute. Note that -/// `bytes: &[u8]` are always serialized as -/// [`Value::Bytes`](crate::types::Value::Bytes). However, both -/// [`Value::Bytes`](crate::types::Value::Bytes) and -/// [`Value::Fixed`](crate::types::Value::Fixed) can be deserialized as `bytes: -/// &[u8]`. Use [`serde_avro_slice`](crate::serde_avro_slice) for non optional -/// bytes/fixed borrowed values. -/// -/// See usage with below example: -/// ```rust -/// use apache_avro::serde_avro_slice_opt; -/// use serde::{Deserialize, Serialize}; -/// -/// #[derive(Serialize, Deserialize)] -/// struct StructWithBytes<'a> { -/// #[serde(with = "serde_avro_slice_opt")] -/// slice_field: Option<&'a [u8]>, -/// } -/// ``` -pub mod serde_avro_slice_opt { - use super::DE_BYTES_BORROWED; - use serde::{Deserializer, Serializer}; - use std::borrow::Borrow; - - pub fn serialize(bytes: &Option, serializer: S) -> Result - where - S: Serializer, - B: Borrow<[u8]> + serde_bytes::Serialize, - { - serde_bytes::serialize(&bytes, serializer) - } - - pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> - where - D: Deserializer<'de>, - { - DE_BYTES_BORROWED.set(true); - let res = serde_bytes::deserialize(deserializer); - DE_BYTES_BORROWED.set(false); - res - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{from_value, to_value, types::Value, Schema}; - use serde::{Deserialize, Serialize}; - - #[test] - fn avro_3631_validate_schema_for_struct_with_byte_types() { - #[derive(Debug, Serialize)] - struct TestStructWithBytes<'a> { - #[serde(with = "serde_avro_bytes")] - vec_field: Vec, - #[serde(with = "serde_avro_bytes_opt")] - vec_field_opt: Option>, - - #[serde(with = "serde_avro_fixed")] - fixed_field: [u8; 6], - #[serde(with = "serde_avro_fixed_opt")] - fixed_field_opt: Option<[u8; 7]>, - - #[serde(with = "serde_avro_slice")] - slice_field: &'a [u8], - #[serde(with = "serde_avro_slice_opt")] - slice_field_opt: Option<&'a [u8]>, - } - - let test = TestStructWithBytes { - vec_field: vec![2, 3, 4], - vec_field_opt: Some(vec![2, 3, 4]), - fixed_field: [1; 6], - fixed_field_opt: Some([1; 7]), - slice_field: &[1, 2, 3], - slice_field_opt: Some(&[1, 2, 3]), - }; - let value: Value = to_value(test).unwrap(); - let schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "TestStructWithBytes", - "fields": [ { - "name": "vec_field", - "type": "bytes" - }, { - "name": "vec_field_opt", - "type": ["null", "bytes"] - }, { - "name": "fixed_field", - "type": { - "name": "ByteData", - "type": "fixed", - "size": 6 - } - }, { - "name": "fixed_field_opt", - "type": ["null", { - "name": "ByteData2", - "type": "fixed", - "size": 7 - } ] - }, { - "name": "slice_field", - "type": "bytes" - }, { - "name": "slice_field_opt", - "type": ["null", "bytes"] - } ] - }"#, - ) - .unwrap(); - assert!(value.validate(&schema)); - } - - #[test] - fn avro_3631_deserialize_value_to_struct_with_byte_types() { - #[derive(Debug, Deserialize, PartialEq)] - struct TestStructWithBytes<'a> { - #[serde(with = "serde_avro_bytes")] - vec_field: Vec, - #[serde(with = "serde_avro_bytes_opt")] - vec_field_opt: Option>, - #[serde(with = "serde_avro_bytes_opt")] - vec_field_opt2: Option>, - - #[serde(with = "serde_avro_fixed")] - fixed_field: [u8; 6], - #[serde(with = "serde_avro_fixed_opt")] - fixed_field_opt: Option<[u8; 7]>, - #[serde(with = "serde_avro_fixed_opt")] - fixed_field_opt2: Option<[u8; 8]>, - - #[serde(with = "serde_avro_slice")] - slice_bytes_field: &'a [u8], - #[serde(with = "serde_avro_slice_opt")] - slice_bytes_field_opt: Option<&'a [u8]>, - #[serde(with = "serde_avro_slice_opt")] - slice_bytes_field_opt2: Option<&'a [u8]>, - - #[serde(with = "serde_avro_slice")] - slice_fixed_field: &'a [u8], - #[serde(with = "serde_avro_slice_opt")] - slice_fixed_field_opt: Option<&'a [u8]>, - #[serde(with = "serde_avro_slice_opt")] - slice_fixed_field_opt2: Option<&'a [u8]>, - } - - let expected = TestStructWithBytes { - vec_field: vec![3, 33], - vec_field_opt: Some(vec![4, 44]), - vec_field_opt2: None, - fixed_field: [1; 6], - fixed_field_opt: Some([7; 7]), - fixed_field_opt2: None, - slice_bytes_field: &[1, 11, 111], - slice_bytes_field_opt: Some(&[5, 5, 5, 5, 5]), - slice_bytes_field_opt2: None, - slice_fixed_field: &[2, 22, 222], - slice_fixed_field_opt: Some(&[3, 3, 3]), - slice_fixed_field_opt2: None, - }; - - let value = Value::Record(vec![ - ( - "vec_field".to_owned(), - Value::Bytes(expected.vec_field.clone()), - ), - ( - "vec_field_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Bytes( - expected.vec_field_opt.as_ref().unwrap().clone(), - )), - ), - ), - ( - "vec_field_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ( - "fixed_field".to_owned(), - Value::Fixed(expected.fixed_field.len(), expected.fixed_field.to_vec()), - ), - ( - "fixed_field_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Fixed( - expected.fixed_field_opt.as_ref().unwrap().len(), - expected.fixed_field_opt.as_ref().unwrap().to_vec(), - )), - ), - ), - ( - "fixed_field_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ( - "slice_bytes_field".to_owned(), - Value::Bytes(expected.slice_bytes_field.to_vec()), - ), - ( - "slice_bytes_field_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Bytes( - expected.slice_bytes_field_opt.as_ref().unwrap().to_vec(), - )), - ), - ), - ( - "slice_bytes_field_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ( - "slice_fixed_field".to_owned(), - Value::Fixed( - expected.slice_fixed_field.len(), - expected.slice_fixed_field.to_vec(), - ), - ), - ( - "slice_fixed_field_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Fixed( - expected.slice_fixed_field_opt.as_ref().unwrap().len(), - expected.slice_fixed_field_opt.as_ref().unwrap().to_vec(), - )), - ), - ), - ( - "slice_fixed_field_opt2".to_owned(), - Value::Union(1, Box::new(Value::Null)), - ), - ]); - assert_eq!(expected, from_value(&value).unwrap()); - } - - #[test] - fn avro_3631_serialize_struct_to_value_with_byte_types() { - #[derive(Debug, Serialize)] - struct TestStructWithBytes<'a> { - array_field: &'a [u8], - vec_field: Vec, - - #[serde(with = "serde_avro_fixed")] - vec_field2: Vec, - #[serde(with = "serde_avro_fixed_opt")] - vec_field2_opt: Option>, - #[serde(with = "serde_avro_fixed_opt")] - vec_field2_opt2: Option>, - - #[serde(with = "serde_avro_bytes")] - vec_field3: Vec, - #[serde(with = "serde_avro_bytes_opt")] - vec_field3_opt: Option>, - #[serde(with = "serde_avro_bytes_opt")] - vec_field3_opt2: Option>, - - #[serde(with = "serde_avro_fixed")] - fixed_field: [u8; 6], - #[serde(with = "serde_avro_fixed_opt")] - fixed_field_opt: Option<[u8; 5]>, - #[serde(with = "serde_avro_fixed_opt")] - fixed_field_opt2: Option<[u8; 4]>, - - #[serde(with = "serde_avro_fixed")] - fixed_field2: &'a [u8], - #[serde(with = "serde_avro_fixed_opt")] - fixed_field2_opt: Option<&'a [u8]>, - #[serde(with = "serde_avro_fixed_opt")] - fixed_field2_opt2: Option<&'a [u8]>, - - #[serde(with = "serde_avro_bytes")] - bytes_field: &'a [u8], - #[serde(with = "serde_avro_bytes_opt")] - bytes_field_opt: Option<&'a [u8]>, - #[serde(with = "serde_avro_bytes_opt")] - bytes_field_opt2: Option<&'a [u8]>, - - #[serde(with = "serde_avro_bytes")] - bytes_field2: [u8; 6], - #[serde(with = "serde_avro_bytes_opt")] - bytes_field2_opt: Option<[u8; 7]>, - #[serde(with = "serde_avro_bytes_opt")] - bytes_field2_opt2: Option<[u8; 8]>, - } - - let test = TestStructWithBytes { - array_field: &[1, 11, 111], - vec_field: vec![3, 33], - vec_field2: vec![4, 44], - vec_field2_opt: Some(vec![14, 144]), - vec_field2_opt2: None, - vec_field3: vec![5, 55], - vec_field3_opt: Some(vec![15, 155]), - vec_field3_opt2: None, - fixed_field: [1; 6], - fixed_field_opt: Some([6; 5]), - fixed_field_opt2: None, - fixed_field2: &[6, 66], - fixed_field2_opt: Some(&[7, 77]), - fixed_field2_opt2: None, - bytes_field: &[2, 22, 222], - bytes_field_opt: Some(&[3, 33, 233]), - bytes_field_opt2: None, - bytes_field2: [2; 6], - bytes_field2_opt: Some([2; 7]), - bytes_field2_opt2: None, - }; - let expected = Value::Record(vec![ - ( - "array_field".to_owned(), - Value::Array( - test.array_field - .iter() - .map(|&i| Value::Int(i as i32)) - .collect(), - ), - ), - ( - "vec_field".to_owned(), - Value::Array( - test.vec_field - .iter() - .map(|&i| Value::Int(i as i32)) - .collect(), - ), - ), - ( - "vec_field2".to_owned(), - Value::Fixed(test.vec_field2.len(), test.vec_field2.clone()), - ), - ( - "vec_field2_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Fixed( - test.vec_field2_opt.as_ref().unwrap().len(), - test.vec_field2_opt.as_ref().unwrap().to_vec(), - )), - ), - ), - ( - "vec_field2_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ( - "vec_field3".to_owned(), - Value::Bytes(test.vec_field3.clone()), - ), - ( - "vec_field3_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Bytes(test.vec_field3_opt.as_ref().unwrap().clone())), - ), - ), - ( - "vec_field3_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ( - "fixed_field".to_owned(), - Value::Fixed(test.fixed_field.len(), test.fixed_field.to_vec()), - ), - ( - "fixed_field_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Fixed( - test.fixed_field_opt.as_ref().unwrap().len(), - test.fixed_field_opt.as_ref().unwrap().to_vec(), - )), - ), - ), - ( - "fixed_field_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ( - "fixed_field2".to_owned(), - Value::Fixed(test.fixed_field2.len(), test.fixed_field2.to_vec()), - ), - ( - "fixed_field2_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Fixed( - test.fixed_field2_opt.as_ref().unwrap().len(), - test.fixed_field2_opt.as_ref().unwrap().to_vec(), - )), - ), - ), - ( - "fixed_field2_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ( - "bytes_field".to_owned(), - Value::Bytes(test.bytes_field.to_vec()), - ), - ( - "bytes_field_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Bytes( - test.bytes_field_opt.as_ref().unwrap().to_vec(), - )), - ), - ), - ( - "bytes_field_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ( - "bytes_field2".to_owned(), - Value::Bytes(test.bytes_field2.to_vec()), - ), - ( - "bytes_field2_opt".to_owned(), - Value::Union( - 1, - Box::new(Value::Bytes( - test.bytes_field2_opt.as_ref().unwrap().to_vec(), - )), - ), - ), - ( - "bytes_field2_opt2".to_owned(), - Value::Union(0, Box::new(Value::Null)), - ), - ]); - assert_eq!(expected, to_value(test).unwrap()); - } -} diff --git a/lang/rust/avro/src/codec.rs b/lang/rust/avro/src/codec.rs deleted file mode 100644 index 9ec9d88c690..00000000000 --- a/lang/rust/avro/src/codec.rs +++ /dev/null @@ -1,366 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Logic for all supported compression codecs in Avro. -use crate::{types::Value, AvroResult, Error}; -use libflate::deflate::{Decoder, Encoder}; -use std::io::{Read, Write}; -use strum_macros::{EnumIter, EnumString, IntoStaticStr}; - -/// The compression codec used to compress blocks. -#[derive(Clone, Copy, Debug, Eq, PartialEq, EnumIter, EnumString, IntoStaticStr)] -#[strum(serialize_all = "kebab_case")] -pub enum Codec { - /// The `Null` codec simply passes through data uncompressed. - Null, - /// The `Deflate` codec writes the data block using the deflate algorithm - /// as specified in RFC 1951, and typically implemented using the zlib library. - /// Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. - Deflate, - #[cfg(feature = "snappy")] - /// The `Snappy` codec uses Google's [Snappy](http://google.github.io/snappy/) - /// compression library. Each compressed block is followed by the 4-byte, big-endian - /// CRC32 checksum of the uncompressed data in the block. - Snappy, - #[cfg(feature = "zstandard")] - /// The `Zstandard` codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) - Zstandard(zstandard::ZstandardSettings), - #[cfg(feature = "bzip")] - /// The `BZip2` codec uses [BZip2](https://sourceware.org/bzip2/) - /// compression library. - Bzip2(bzip::Bzip2Settings), - #[cfg(feature = "xz")] - /// The `Xz` codec uses [Xz utils](https://tukaani.org/xz/) - /// compression library. - Xz(xz::XzSettings), -} - -impl From for Value { - fn from(value: Codec) -> Self { - Self::Bytes(<&str>::from(value).as_bytes().to_vec()) - } -} - -impl Codec { - /// Compress a stream of bytes in-place. - pub fn compress(self, stream: &mut Vec) -> AvroResult<()> { - match self { - Codec::Null => (), - Codec::Deflate => { - let mut encoder = Encoder::new(Vec::new()); - encoder.write_all(stream).map_err(Error::DeflateCompress)?; - // Deflate errors seem to just be io::Error - *stream = encoder - .finish() - .into_result() - .map_err(Error::DeflateCompressFinish)?; - } - #[cfg(feature = "snappy")] - Codec::Snappy => { - let mut encoded: Vec = vec![0; snap::raw::max_compress_len(stream.len())]; - let compressed_size = snap::raw::Encoder::new() - .compress(&stream[..], &mut encoded[..]) - .map_err(Error::SnappyCompress)?; - - let mut hasher = crc32fast::Hasher::new(); - hasher.update(&stream[..]); - let checksum = hasher.finalize(); - let checksum_as_bytes = checksum.to_be_bytes(); - let checksum_len = checksum_as_bytes.len(); - encoded.truncate(compressed_size + checksum_len); - encoded[compressed_size..].copy_from_slice(&checksum_as_bytes); - - *stream = encoded; - } - #[cfg(feature = "zstandard")] - Codec::Zstandard(settings) => { - let mut encoder = - zstd::Encoder::new(Vec::new(), settings.compression_level as i32).unwrap(); - encoder.write_all(stream).map_err(Error::ZstdCompress)?; - *stream = encoder.finish().unwrap(); - } - #[cfg(feature = "bzip")] - Codec::Bzip2(settings) => { - use bzip2::read::BzEncoder; - - let mut encoder = BzEncoder::new(&stream[..], settings.compression()); - let mut buffer = Vec::new(); - encoder.read_to_end(&mut buffer).unwrap(); - *stream = buffer; - } - #[cfg(feature = "xz")] - Codec::Xz(settings) => { - use xz2::read::XzEncoder; - - let mut encoder = XzEncoder::new(&stream[..], settings.compression_level as u32); - let mut buffer = Vec::new(); - encoder.read_to_end(&mut buffer).unwrap(); - *stream = buffer; - } - }; - - Ok(()) - } - - /// Decompress a stream of bytes in-place. - pub fn decompress(self, stream: &mut Vec) -> AvroResult<()> { - *stream = match self { - Codec::Null => return Ok(()), - Codec::Deflate => { - let mut decoded = Vec::new(); - let mut decoder = Decoder::new(&stream[..]); - decoder - .read_to_end(&mut decoded) - .map_err(Error::DeflateDecompress)?; - decoded - } - #[cfg(feature = "snappy")] - Codec::Snappy => { - let decompressed_size = snap::raw::decompress_len(&stream[..stream.len() - 4]) - .map_err(Error::GetSnappyDecompressLen)?; - let mut decoded = vec![0; decompressed_size]; - snap::raw::Decoder::new() - .decompress(&stream[..stream.len() - 4], &mut decoded[..]) - .map_err(Error::SnappyDecompress)?; - - let mut last_four: [u8; 4] = [0; 4]; - last_four.copy_from_slice(&stream[(stream.len() - 4)..]); - let expected: u32 = u32::from_be_bytes(last_four); - - let mut hasher = crc32fast::Hasher::new(); - hasher.update(&decoded); - let actual = hasher.finalize(); - - if expected != actual { - return Err(Error::SnappyCrc32 { expected, actual }); - } - decoded - } - #[cfg(feature = "zstandard")] - Codec::Zstandard(_settings) => { - use std::io::BufReader; - use zstd::zstd_safe; - - let mut decoded = Vec::new(); - let buffer_size = zstd_safe::DCtx::in_size(); - let buffer = BufReader::with_capacity(buffer_size, &stream[..]); - let mut decoder = zstd::Decoder::new(buffer).unwrap(); - std::io::copy(&mut decoder, &mut decoded).map_err(Error::ZstdDecompress)?; - decoded - } - #[cfg(feature = "bzip")] - Codec::Bzip2(_) => { - use bzip2::read::BzDecoder; - - let mut decoder = BzDecoder::new(&stream[..]); - let mut decoded = Vec::new(); - decoder.read_to_end(&mut decoded).unwrap(); - decoded - } - #[cfg(feature = "xz")] - Codec::Xz(_) => { - use xz2::read::XzDecoder; - - let mut decoder = XzDecoder::new(&stream[..]); - let mut decoded: Vec = Vec::new(); - decoder.read_to_end(&mut decoded).unwrap(); - decoded - } - }; - Ok(()) - } -} - -#[cfg(feature = "bzip")] -pub mod bzip { - use bzip2::Compression; - - #[derive(Clone, Copy, Eq, PartialEq, Debug)] - pub struct Bzip2Settings { - pub compression_level: u8, - } - - impl Bzip2Settings { - pub fn new(compression_level: u8) -> Self { - Self { compression_level } - } - - pub(crate) fn compression(&self) -> Compression { - Compression::new(self.compression_level as u32) - } - } - - impl Default for Bzip2Settings { - fn default() -> Self { - Bzip2Settings::new(Compression::best().level() as u8) - } - } -} - -#[cfg(feature = "zstandard")] -pub mod zstandard { - #[derive(Clone, Copy, Eq, PartialEq, Debug)] - pub struct ZstandardSettings { - pub compression_level: u8, - } - - impl ZstandardSettings { - pub fn new(compression_level: u8) -> Self { - Self { compression_level } - } - } - - impl Default for ZstandardSettings { - fn default() -> Self { - Self::new(0) - } - } -} - -#[cfg(feature = "xz")] -pub mod xz { - #[derive(Clone, Copy, Eq, PartialEq, Debug)] - pub struct XzSettings { - pub compression_level: u8, - } - - impl XzSettings { - pub fn new(compression_level: u8) -> Self { - Self { compression_level } - } - } - - impl Default for XzSettings { - fn default() -> Self { - XzSettings::new(9) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use apache_avro_test_helper::TestResult; - use pretty_assertions::{assert_eq, assert_ne}; - - const INPUT: &[u8] = b"theanswertolifetheuniverseandeverythingis42theanswertolifetheuniverseandeverythingis4theanswertolifetheuniverseandeverythingis2"; - - #[test] - fn null_compress_and_decompress() -> TestResult { - let codec = Codec::Null; - let mut stream = INPUT.to_vec(); - codec.compress(&mut stream)?; - assert_eq!(INPUT, stream.as_slice()); - codec.decompress(&mut stream)?; - assert_eq!(INPUT, stream.as_slice()); - Ok(()) - } - - #[test] - fn deflate_compress_and_decompress() -> TestResult { - compress_and_decompress(Codec::Deflate) - } - - #[cfg(feature = "snappy")] - #[test] - fn snappy_compress_and_decompress() -> TestResult { - compress_and_decompress(Codec::Snappy) - } - - #[cfg(feature = "zstandard")] - #[test] - fn zstd_compress_and_decompress() -> TestResult { - compress_and_decompress(Codec::Zstandard(zstandard::ZstandardSettings::default())) - } - - #[cfg(feature = "bzip")] - #[test] - fn bzip_compress_and_decompress() -> TestResult { - compress_and_decompress(Codec::Bzip2(bzip::Bzip2Settings::default())) - } - - #[cfg(feature = "xz")] - #[test] - fn xz_compress_and_decompress() -> TestResult { - compress_and_decompress(Codec::Xz(xz::XzSettings::default())) - } - - fn compress_and_decompress(codec: Codec) -> TestResult { - let mut stream = INPUT.to_vec(); - codec.compress(&mut stream)?; - assert_ne!(INPUT, stream.as_slice()); - assert!(INPUT.len() > stream.len()); - codec.decompress(&mut stream)?; - assert_eq!(INPUT, stream.as_slice()); - Ok(()) - } - - #[test] - fn codec_to_str() { - assert_eq!(<&str>::from(Codec::Null), "null"); - assert_eq!(<&str>::from(Codec::Deflate), "deflate"); - - #[cfg(feature = "snappy")] - assert_eq!(<&str>::from(Codec::Snappy), "snappy"); - - #[cfg(feature = "zstandard")] - assert_eq!( - <&str>::from(Codec::Zstandard(zstandard::ZstandardSettings::default())), - "zstandard" - ); - - #[cfg(feature = "bzip")] - assert_eq!( - <&str>::from(Codec::Bzip2(bzip::Bzip2Settings::default())), - "bzip2" - ); - - #[cfg(feature = "xz")] - assert_eq!(<&str>::from(Codec::Xz(xz::XzSettings::default())), "xz"); - } - - #[test] - fn codec_from_str() { - use std::str::FromStr; - - assert_eq!(Codec::from_str("null").unwrap(), Codec::Null); - assert_eq!(Codec::from_str("deflate").unwrap(), Codec::Deflate); - - #[cfg(feature = "snappy")] - assert_eq!(Codec::from_str("snappy").unwrap(), Codec::Snappy); - - #[cfg(feature = "zstandard")] - assert_eq!( - Codec::from_str("zstandard").unwrap(), - Codec::Zstandard(zstandard::ZstandardSettings::default()) - ); - - #[cfg(feature = "bzip")] - assert_eq!( - Codec::from_str("bzip2").unwrap(), - Codec::Bzip2(bzip::Bzip2Settings::default()) - ); - - #[cfg(feature = "xz")] - assert_eq!( - Codec::from_str("xz").unwrap(), - Codec::Xz(xz::XzSettings::default()) - ); - - assert!(Codec::from_str("not a codec").is_err()); - } -} diff --git a/lang/rust/avro/src/de.rs b/lang/rust/avro/src/de.rs deleted file mode 100644 index 2dcfc401ac2..00000000000 --- a/lang/rust/avro/src/de.rs +++ /dev/null @@ -1,1559 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Logic for serde-compatible deserialization. -use crate::{bytes::DE_BYTES_BORROWED, types::Value, Error}; -use serde::{ - de::{self, DeserializeSeed, Visitor}, - forward_to_deserialize_any, Deserialize, -}; -use std::{ - collections::{ - hash_map::{Keys, Values}, - HashMap, - }, - slice::Iter, -}; - -pub struct Deserializer<'de> { - input: &'de Value, -} - -struct SeqDeserializer<'de> { - input: Iter<'de, Value>, -} - -struct MapDeserializer<'de> { - input_keys: Keys<'de, String, Value>, - input_values: Values<'de, String, Value>, -} - -struct RecordDeserializer<'de> { - input: Iter<'de, (String, Value)>, - value: Option<&'de Value>, -} - -pub struct EnumUnitDeserializer<'a> { - input: &'a str, -} - -pub struct EnumDeserializer<'de> { - input: &'de [(String, Value)], -} - -impl<'de> Deserializer<'de> { - pub fn new(input: &'de Value) -> Self { - Deserializer { input } - } -} - -impl<'de> SeqDeserializer<'de> { - pub fn new(input: &'de [Value]) -> Self { - SeqDeserializer { - input: input.iter(), - } - } -} - -impl<'de> MapDeserializer<'de> { - pub fn new(input: &'de HashMap) -> Self { - MapDeserializer { - input_keys: input.keys(), - input_values: input.values(), - } - } -} - -impl<'de> RecordDeserializer<'de> { - pub fn new(input: &'de [(String, Value)]) -> Self { - RecordDeserializer { - input: input.iter(), - value: None, - } - } -} - -impl<'a> EnumUnitDeserializer<'a> { - pub fn new(input: &'a str) -> Self { - EnumUnitDeserializer { input } - } -} - -impl<'de> EnumDeserializer<'de> { - pub fn new(input: &'de [(String, Value)]) -> Self { - EnumDeserializer { input } - } -} - -impl<'de> de::EnumAccess<'de> for EnumUnitDeserializer<'de> { - type Error = Error; - type Variant = Self; - - fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> - where - V: DeserializeSeed<'de>, - { - Ok(( - seed.deserialize(StringDeserializer { - input: self.input.to_owned(), - })?, - self, - )) - } -} - -impl<'de> de::VariantAccess<'de> for EnumUnitDeserializer<'de> { - type Error = Error; - - fn unit_variant(self) -> Result<(), Error> { - Ok(()) - } - - fn newtype_variant_seed(self, _seed: T) -> Result - where - T: DeserializeSeed<'de>, - { - Err(de::Error::custom("Unexpected Newtype variant")) - } - - fn tuple_variant(self, _len: usize, _visitor: V) -> Result - where - V: Visitor<'de>, - { - Err(de::Error::custom("Unexpected tuple variant")) - } - - fn struct_variant( - self, - _fields: &'static [&'static str], - _visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - Err(de::Error::custom("Unexpected struct variant")) - } -} - -impl<'de> de::EnumAccess<'de> for EnumDeserializer<'de> { - type Error = Error; - type Variant = Self; - - fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> - where - V: DeserializeSeed<'de>, - { - self.input.first().map_or( - Err(de::Error::custom("A record must have a least one field")), - |item| match (item.0.as_ref(), &item.1) { - ("type", Value::String(x)) | ("type", Value::Enum(_, x)) => Ok(( - seed.deserialize(StringDeserializer { - input: x.to_owned(), - })?, - self, - )), - (field, Value::String(_)) => Err(de::Error::custom(format!( - "Expected first field named 'type': got '{field}' instead" - ))), - (_, _) => Err(de::Error::custom( - "Expected first field of type String or Enum for the type name".to_string(), - )), - }, - ) - } -} - -impl<'de> de::VariantAccess<'de> for EnumDeserializer<'de> { - type Error = Error; - - fn unit_variant(self) -> Result<(), Error> { - Ok(()) - } - - fn newtype_variant_seed(self, seed: T) -> Result - where - T: DeserializeSeed<'de>, - { - self.input.get(1).map_or( - Err(de::Error::custom( - "Expected a newtype variant, got nothing instead.", - )), - |item| seed.deserialize(&Deserializer::new(&item.1)), - ) - } - - fn tuple_variant(self, _len: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.input.get(1).map_or( - Err(de::Error::custom( - "Expected a tuple variant, got nothing instead.", - )), - |item| de::Deserializer::deserialize_seq(&Deserializer::new(&item.1), visitor), - ) - } - - fn struct_variant( - self, - fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.input.get(1).map_or( - Err(de::Error::custom("Expected a struct variant, got nothing")), - |item| { - de::Deserializer::deserialize_struct( - &Deserializer::new(&item.1), - "", - fields, - visitor, - ) - }, - ) - } -} - -impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match self.input { - Value::Null => visitor.visit_unit(), - &Value::Boolean(b) => visitor.visit_bool(b), - Value::Int(i) | Value::Date(i) | Value::TimeMillis(i) => visitor.visit_i32(*i), - Value::Long(i) - | Value::TimeMicros(i) - | Value::TimestampMillis(i) - | Value::TimestampMicros(i) - | Value::TimestampNanos(i) - | Value::LocalTimestampMillis(i) - | Value::LocalTimestampMicros(i) - | Value::LocalTimestampNanos(i) => visitor.visit_i64(*i), - &Value::Float(f) => visitor.visit_f32(f), - &Value::Double(d) => visitor.visit_f64(d), - Value::Union(_i, u) => match **u { - Value::Null => visitor.visit_unit(), - Value::Boolean(b) => visitor.visit_bool(b), - Value::Int(i) | Value::Date(i) | Value::TimeMillis(i) => visitor.visit_i32(i), - Value::Long(i) - | Value::TimeMicros(i) - | Value::TimestampMillis(i) - | Value::TimestampMicros(i) - | Value::TimestampNanos(i) - | Value::LocalTimestampMillis(i) - | Value::LocalTimestampMicros(i) - | Value::LocalTimestampNanos(i) => visitor.visit_i64(i), - Value::Float(f) => visitor.visit_f32(f), - Value::Double(d) => visitor.visit_f64(d), - Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), - Value::Array(ref fields) => visitor.visit_seq(SeqDeserializer::new(fields)), - Value::String(ref s) => visitor.visit_borrowed_str(s), - Value::Uuid(uuid) => visitor.visit_str(&uuid.to_string()), - Value::Map(ref items) => visitor.visit_map(MapDeserializer::new(items)), - Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => visitor.visit_bytes(bytes), - Value::Decimal(ref d) => visitor.visit_bytes(&d.to_vec()?), - _ => Err(de::Error::custom(format!( - "unsupported union: {:?}", - self.input - ))), - }, - Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), - Value::Array(ref fields) => visitor.visit_seq(SeqDeserializer::new(fields)), - Value::String(ref s) => visitor.visit_borrowed_str(s), - Value::Uuid(uuid) => visitor.visit_str(&uuid.to_string()), - Value::Map(ref items) => visitor.visit_map(MapDeserializer::new(items)), - Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => visitor.visit_bytes(bytes), - Value::Decimal(ref d) => visitor.visit_bytes(&d.to_vec()?), - value => Err(de::Error::custom(format!( - "incorrect value of type: {:?}", - crate::schema::SchemaKind::from(value) - ))), - } - } - - forward_to_deserialize_any! { - bool i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 - } - - fn deserialize_char(self, _: V) -> Result - where - V: Visitor<'de>, - { - Err(de::Error::custom("avro does not support char")) - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::String(ref s) => visitor.visit_borrowed_str(s), - Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => ::std::str::from_utf8(bytes) - .map_err(|e| de::Error::custom(e.to_string())) - .and_then(|s| visitor.visit_borrowed_str(s)), - Value::Uuid(ref u) => visitor.visit_str(&u.to_string()), - _ => Err(de::Error::custom(format!( - "Expected a String|Bytes|Fixed|Uuid, but got {:?}", - self.input - ))), - } - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::Enum(_, ref s) | Value::String(ref s) => visitor.visit_borrowed_str(s), - Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => { - String::from_utf8(bytes.to_owned()) - .map_err(|e| de::Error::custom(e.to_string())) - .and_then(|s| visitor.visit_string(s)) - } - Value::Uuid(ref u) => visitor.visit_str(&u.to_string()), - Value::Union(_i, ref x) => match **x { - Value::String(ref s) => visitor.visit_borrowed_str(s), - Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => { - String::from_utf8(bytes.to_owned()) - .map_err(|e| de::Error::custom(e.to_string())) - .and_then(|s| visitor.visit_string(s)) - } - Value::Uuid(ref u) => visitor.visit_str(&u.to_string()), - _ => Err(de::Error::custom(format!( - "Expected a String|Bytes|Fixed|Uuid, but got {x:?}" - ))), - }, - _ => Err(de::Error::custom(format!( - "Expected a String|Bytes|Fixed|Uuid|Union|Enum, but got {:?}", - self.input - ))), - } - } - - fn deserialize_bytes(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::String(ref s) => visitor.visit_bytes(s.as_bytes()), - Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => { - if DE_BYTES_BORROWED.get() { - visitor.visit_borrowed_bytes(bytes) - } else { - visitor.visit_bytes(bytes) - } - } - Value::Uuid(ref u) => visitor.visit_bytes(u.as_bytes()), - Value::Decimal(ref d) => visitor.visit_bytes(&d.to_vec()?), - _ => Err(de::Error::custom(format!( - "Expected a String|Bytes|Fixed|Uuid|Decimal, but got {:?}", - self.input - ))), - } - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::String(ref s) => visitor.visit_byte_buf(s.clone().into_bytes()), - Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => { - visitor.visit_byte_buf(bytes.to_owned()) - } - _ => Err(de::Error::custom(format!( - "Expected a String|Bytes|Fixed, but got {:?}", - self.input - ))), - } - } - - fn deserialize_option(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::Union(_i, ref inner) if inner.as_ref() == &Value::Null => visitor.visit_none(), - Value::Union(_i, ref inner) => visitor.visit_some(&Deserializer::new(inner)), - _ => Err(de::Error::custom(format!( - "Expected a Union, but got {:?}", - self.input - ))), - } - } - - fn deserialize_unit(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::Null => visitor.visit_unit(), - Value::Union(_i, ref x) => match **x { - Value::Null => visitor.visit_unit(), - _ => Err(de::Error::custom(format!( - "Expected a Null, but got {:?}", - self.input - ))), - }, - _ => Err(de::Error::custom(format!( - "Expected a Null|Union, but got {:?}", - self.input - ))), - } - } - - fn deserialize_unit_struct( - self, - _struct_name: &'static str, - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_unit(visitor) - } - - fn deserialize_newtype_struct( - self, - _struct_name: &'static str, - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - visitor.visit_newtype_struct(self) - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::Array(ref items) => visitor.visit_seq(SeqDeserializer::new(items)), - Value::Union(_i, ref inner) => match **inner { - Value::Array(ref items) => visitor.visit_seq(SeqDeserializer::new(items)), - Value::Null => visitor.visit_seq(SeqDeserializer::new(&[])), - _ => Err(de::Error::custom(format!( - "Expected an Array or Null, but got: {inner:?}" - ))), - }, - _ => Err(de::Error::custom(format!( - "Expected an Array or Union, but got: {:?}", - self.input - ))), - } - } - - fn deserialize_tuple(self, _: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_tuple_struct( - self, - _struct_name: &'static str, - _len: usize, - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_map(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::Map(ref items) => visitor.visit_map(MapDeserializer::new(items)), - Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), - _ => Err(de::Error::custom(format_args!( - "Expected a record or a map. Got: {:?}", - &self.input - ))), - } - } - - fn deserialize_struct( - self, - _struct_name: &'static str, - _fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - match *self.input { - Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), - Value::Union(_i, ref inner) => match **inner { - Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), - Value::Null => visitor.visit_map(RecordDeserializer::new(&[])), - _ => Err(de::Error::custom(format!( - "Expected a Record or Null, got: {inner:?}" - ))), - }, - _ => Err(de::Error::custom(format!( - "Expected a Record or Union, got: {:?}", - self.input - ))), - } - } - - fn deserialize_enum( - self, - _enum_name: &'static str, - _variants: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - match *self.input { - // This branch can be anything... - Value::Record(ref fields) => visitor.visit_enum(EnumDeserializer::new(fields)), - Value::String(ref field) => visitor.visit_enum(EnumUnitDeserializer::new(field)), - // This has to be a unit Enum - Value::Enum(_index, ref field) => visitor.visit_enum(EnumUnitDeserializer::new(field)), - _ => Err(de::Error::custom(format!( - "Expected a Record|Enum, but got {:?}", - self.input - ))), - } - } - - fn deserialize_identifier(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_str(visitor) - } - - fn deserialize_ignored_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_any(visitor) - } - - fn is_human_readable(&self) -> bool { - crate::util::is_human_readable() - } -} - -impl<'de> de::SeqAccess<'de> for SeqDeserializer<'de> { - type Error = Error; - - fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> - where - T: DeserializeSeed<'de>, - { - match self.input.next() { - Some(item) => seed.deserialize(&Deserializer::new(item)).map(Some), - None => Ok(None), - } - } -} - -impl<'de> de::MapAccess<'de> for MapDeserializer<'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> - where - K: DeserializeSeed<'de>, - { - match self.input_keys.next() { - Some(key) => seed - .deserialize(StringDeserializer { - input: (*key).clone(), - }) - .map(Some), - None => Ok(None), - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - match self.input_values.next() { - Some(value) => seed.deserialize(&Deserializer::new(value)), - None => Err(de::Error::custom("should not happen - too many values")), - } - } -} - -impl<'de> de::MapAccess<'de> for RecordDeserializer<'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> - where - K: DeserializeSeed<'de>, - { - match self.input.next() { - Some(item) => { - let (ref field, ref value) = *item; - self.value = Some(value); - seed.deserialize(StringDeserializer { - input: field.clone(), - }) - .map(Some) - } - None => Ok(None), - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: DeserializeSeed<'de>, - { - match self.value.take() { - Some(value) => seed.deserialize(&Deserializer::new(value)), - None => Err(de::Error::custom("should not happen - too many values")), - } - } -} - -#[derive(Clone)] -struct StringDeserializer { - input: String, -} - -impl<'de> de::Deserializer<'de> for StringDeserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_string(self.input) - } - - forward_to_deserialize_any! { - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit option - seq bytes byte_buf map unit_struct newtype_struct - tuple_struct struct tuple enum identifier ignored_any - } -} - -/// Interpret a `Value` as an instance of type `D`. -/// -/// This conversion can fail if the structure of the `Value` does not match the -/// structure expected by `D`. -pub fn from_value<'de, D: Deserialize<'de>>(value: &'de Value) -> Result { - let de = Deserializer::new(value); - D::deserialize(&de) -} - -#[cfg(test)] -mod tests { - use num_bigint::BigInt; - use pretty_assertions::assert_eq; - use serde::{Deserialize, Serialize}; - use serial_test::serial; - use std::sync::atomic::Ordering; - use uuid::Uuid; - - use apache_avro_test_helper::TestResult; - - use crate::Decimal; - - use super::*; - - #[derive(PartialEq, Eq, Serialize, Deserialize, Debug)] - pub struct StringEnum { - pub source: String, - } - - #[test] - fn avro_3955_decode_enum() -> TestResult { - let schema_content = r#" -{ - "name": "AccessLog", - "namespace": "com.clevercloud.accesslogs.common.avro", - "type": "record", - "fields": [ - { - "name": "source", - "type": { - "type": "enum", - "name": "SourceType", - "items": "string", - "symbols": ["SOZU", "HAPROXY", "HAPROXY_TCP"] - } - } - ] -} -"#; - - let schema = crate::Schema::parse_str(schema_content)?; - let data = StringEnum { - source: "SOZU".to_string(), - }; - - // encode into avro - let value = crate::to_value(&data)?; - - let mut buf = std::io::Cursor::new(crate::to_avro_datum(&schema, value)?); - - // decode from avro - let value = crate::from_avro_datum(&schema, &mut buf, None)?; - - let decoded_data: StringEnum = crate::from_value(&value)?; - - assert_eq!(decoded_data, data); - - Ok(()) - } - - #[test] - fn avro_3955_encode_enum_data_with_wrong_content() -> TestResult { - let schema_content = r#" -{ - "name": "AccessLog", - "namespace": "com.clevercloud.accesslogs.common.avro", - "type": "record", - "fields": [ - { - "name": "source", - "type": { - "type": "enum", - "name": "SourceType", - "items": "string", - "symbols": ["SOZU", "HAPROXY", "HAPROXY_TCP"] - } - } - ] -} -"#; - - let schema = crate::Schema::parse_str(schema_content)?; - let data = StringEnum { - source: "WRONG_ITEM".to_string(), - }; - - // encode into avro - let value = crate::to_value(data)?; - - // The following sentence have to fail has the data is wrong. - let encoded_data = crate::to_avro_datum(&schema, value); - - assert!(encoded_data.is_err()); - - Ok(()) - } - - #[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)] - struct Test { - a: i64, - b: String, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - struct TestInner { - a: Test, - b: i32, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - struct TestUnitExternalEnum { - a: UnitExternalEnum, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - enum UnitExternalEnum { - Val1, - Val2, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - struct TestUnitInternalEnum { - a: UnitInternalEnum, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - #[serde(tag = "t")] - enum UnitInternalEnum { - Val1, - Val2, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - struct TestUnitAdjacentEnum { - a: UnitAdjacentEnum, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - #[serde(tag = "t", content = "v")] - enum UnitAdjacentEnum { - Val1, - Val2, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - struct TestUnitUntaggedEnum { - a: UnitUntaggedEnum, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - #[serde(untagged)] - enum UnitUntaggedEnum { - Val1, - Val2, - } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - struct TestSingleValueExternalEnum { - a: SingleValueExternalEnum, - } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - enum SingleValueExternalEnum { - Double(f64), - String(String), - } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - struct TestStructExternalEnum { - a: StructExternalEnum, - } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - enum StructExternalEnum { - Val1 { x: f32, y: f32 }, - Val2 { x: f32, y: f32 }, - } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - struct TestTupleExternalEnum { - a: TupleExternalEnum, - } - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - enum TupleExternalEnum { - Val1(f32, f32), - Val2(f32, f32, f32), - } - - #[test] - fn test_from_value() -> TestResult { - let test = Value::Record(vec![ - ("a".to_owned(), Value::Long(27)), - ("b".to_owned(), Value::String("foo".to_owned())), - ]); - let expected = Test { - a: 27, - b: "foo".to_owned(), - }; - let final_value: Test = from_value(&test)?; - assert_eq!(final_value, expected); - - let test_inner = Value::Record(vec![ - ( - "a".to_owned(), - Value::Record(vec![ - ("a".to_owned(), Value::Long(27)), - ("b".to_owned(), Value::String("foo".to_owned())), - ]), - ), - ("b".to_owned(), Value::Int(35)), - ]); - - let expected_inner = TestInner { a: expected, b: 35 }; - let final_value: TestInner = from_value(&test_inner)?; - assert_eq!(final_value, expected_inner); - - Ok(()) - } - - #[test] - fn test_from_value_unit_enum() -> TestResult { - let expected = TestUnitExternalEnum { - a: UnitExternalEnum::Val1, - }; - - let test = Value::Record(vec![("a".to_owned(), Value::Enum(0, "Val1".to_owned()))]); - let final_value: TestUnitExternalEnum = from_value(&test)?; - assert_eq!( - final_value, expected, - "Error deserializing unit external enum" - ); - - let expected = TestUnitInternalEnum { - a: UnitInternalEnum::Val1, - }; - - let test = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![("t".to_owned(), Value::String("Val1".to_owned()))]), - )]); - let final_value: TestUnitInternalEnum = from_value(&test)?; - assert_eq!( - final_value, expected, - "Error deserializing unit internal enum" - ); - let expected = TestUnitAdjacentEnum { - a: UnitAdjacentEnum::Val1, - }; - - let test = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![("t".to_owned(), Value::String("Val1".to_owned()))]), - )]); - let final_value: TestUnitAdjacentEnum = from_value(&test)?; - assert_eq!( - final_value, expected, - "Error deserializing unit adjacent enum" - ); - let expected = TestUnitUntaggedEnum { - a: UnitUntaggedEnum::Val1, - }; - - let test = Value::Record(vec![("a".to_owned(), Value::Null)]); - let final_value: TestUnitUntaggedEnum = from_value(&test)?; - assert_eq!( - final_value, expected, - "Error deserializing unit untagged enum" - ); - Ok(()) - } - - #[test] - fn avro_3645_3646_test_from_value_enum() -> TestResult { - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - struct TestNullExternalEnum { - a: NullExternalEnum, - } - - #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] - enum NullExternalEnum { - Val1, - Val2(), - Val3(()), - Val4(u64), - } - - let data = vec![ - ( - TestNullExternalEnum { - a: NullExternalEnum::Val1, - }, - Value::Record(vec![("a".to_owned(), Value::Enum(0, "Val1".to_owned()))]), - ), - ( - TestNullExternalEnum { - a: NullExternalEnum::Val2(), - }, - Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::Enum(1, "Val2".to_owned())), - ("value".to_owned(), Value::Union(1, Box::new(Value::Null))), - ]), - )]), - ), - ( - TestNullExternalEnum { - a: NullExternalEnum::Val2(), - }, - Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::Enum(1, "Val2".to_owned())), - ("value".to_owned(), Value::Array(vec![])), - ]), - )]), - ), - ( - TestNullExternalEnum { - a: NullExternalEnum::Val3(()), - }, - Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::Enum(2, "Val3".to_owned())), - ("value".to_owned(), Value::Union(2, Box::new(Value::Null))), - ]), - )]), - ), - ( - TestNullExternalEnum { - a: NullExternalEnum::Val4(123), - }, - Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::Enum(3, "Val4".to_owned())), - ("value".to_owned(), Value::Union(3, Value::Long(123).into())), - ]), - )]), - ), - ]; - - for (expected, test) in data.iter() { - let actual: TestNullExternalEnum = from_value(test)?; - assert_eq!(actual, *expected); - } - - Ok(()) - } - - #[test] - fn test_from_value_single_value_enum() -> TestResult { - let expected = TestSingleValueExternalEnum { - a: SingleValueExternalEnum::Double(64.0), - }; - - let test = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::String("Double".to_owned())), - ( - "value".to_owned(), - Value::Union(1, Box::new(Value::Double(64.0))), - ), - ]), - )]); - let final_value: TestSingleValueExternalEnum = from_value(&test)?; - assert_eq!( - final_value, expected, - "Error deserializing single value external enum(union)" - ); - - Ok(()) - } - - #[test] - fn test_from_value_struct_enum() -> TestResult { - let expected = TestStructExternalEnum { - a: StructExternalEnum::Val1 { x: 1.0, y: 2.0 }, - }; - - let test = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::String("Val1".to_owned())), - ( - "value".to_owned(), - Value::Union( - 0, - Box::new(Value::Record(vec![ - ("x".to_owned(), Value::Float(1.0)), - ("y".to_owned(), Value::Float(2.0)), - ])), - ), - ), - ]), - )]); - let final_value: TestStructExternalEnum = from_value(&test)?; - assert_eq!( - final_value, expected, - "error deserializing struct external enum(union)" - ); - - Ok(()) - } - - #[test] - fn test_avro_3692_from_value_struct_flatten() -> TestResult { - #[derive(Deserialize, PartialEq, Debug)] - struct S1 { - f1: String, - #[serde(flatten)] - inner: S2, - } - #[derive(Deserialize, PartialEq, Debug)] - struct S2 { - f2: String, - } - let expected = S1 { - f1: "Hello".to_owned(), - inner: S2 { - f2: "World".to_owned(), - }, - }; - - let test = Value::Record(vec![ - ("f1".to_owned(), "Hello".into()), - ("f2".to_owned(), "World".into()), - ]); - let final_value: S1 = from_value(&test)?; - assert_eq!(final_value, expected); - - Ok(()) - } - - #[test] - fn test_from_value_tuple_enum() -> TestResult { - let expected = TestTupleExternalEnum { - a: TupleExternalEnum::Val1(1.0, 2.0), - }; - - let test = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::String("Val1".to_owned())), - ( - "value".to_owned(), - Value::Union( - 0, - Box::new(Value::Array(vec![Value::Float(1.0), Value::Float(2.0)])), - ), - ), - ]), - )]); - let final_value: TestTupleExternalEnum = from_value(&test)?; - assert_eq!( - final_value, expected, - "error serializing tuple external enum(union)" - ); - - Ok(()) - } - - #[test] - fn test_date() -> TestResult { - let raw_value = 1; - let value = Value::Date(raw_value); - let result = crate::from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_time_millis() -> TestResult { - let raw_value = 1; - let value = Value::TimeMillis(raw_value); - let result = crate::from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_time_micros() -> TestResult { - let raw_value = 1; - let value = Value::TimeMicros(raw_value); - let result = crate::from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_timestamp_millis() -> TestResult { - let raw_value = 1; - let value = Value::TimestampMillis(raw_value); - let result = crate::from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_timestamp_micros() -> TestResult { - let raw_value = 1; - let value = Value::TimestampMicros(raw_value); - let result = from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_avro_3916_timestamp_nanos() -> TestResult { - let raw_value = 1; - let value = Value::TimestampNanos(raw_value); - let result = from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_avro_3853_local_timestamp_millis() -> TestResult { - let raw_value = 1; - let value = Value::LocalTimestampMillis(raw_value); - let result = from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_avro_3853_local_timestamp_micros() -> TestResult { - let raw_value = 1; - let value = Value::LocalTimestampMicros(raw_value); - let result = crate::from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_avro_3916_local_timestamp_nanos() -> TestResult { - let raw_value = 1; - let value = Value::LocalTimestampNanos(raw_value); - let result = crate::from_value::(&value)?; - assert_eq!(result, raw_value); - Ok(()) - } - - #[test] - fn test_from_value_uuid_str() -> TestResult { - let raw_value = "9ec535ff-3e2a-45bd-91d3-0a01321b5a49"; - let value = Value::Uuid(Uuid::parse_str(raw_value)?); - let result = from_value::(&value)?; - assert_eq!(result.to_string(), raw_value); - Ok(()) - } - - #[test] - fn test_from_value_uuid_slice() -> TestResult { - let raw_value = &[4, 54, 67, 12, 43, 2, 2, 76, 32, 50, 87, 5, 1, 33, 43, 87]; - let value = Value::Uuid(Uuid::from_slice(raw_value)?); - let result = crate::from_value::(&value)?; - assert_eq!(result.as_bytes(), raw_value); - Ok(()) - } - - #[test] - fn test_from_value_with_union() -> TestResult { - // AVRO-3232 test for deserialize_any on missing fields on the destination struct: - // Error: DeserializeValue("Unsupported union") - // Error: DeserializeValue("incorrect value of type: String") - #[derive(Debug, Deserialize, PartialEq, Eq)] - struct RecordInUnion { - record_in_union: i32, - } - - #[derive(Debug, Deserialize, PartialEq, Eq)] - struct StructWithMissingFields { - a_string: String, - a_record: Option, - an_array: Option<[bool; 2]>, - a_union_map: Option>, - } - - let raw_map: HashMap = [ - ("long_one".to_string(), 1), - ("long_two".to_string(), 2), - ("long_three".to_string(), 3), - ("time_micros_a".to_string(), 123), - ("timestamp_millis_b".to_string(), 234), - ("timestamp_micros_c".to_string(), 345), - ("timestamp_nanos_d".to_string(), 345_001), - ("local_timestamp_millis_d".to_string(), 678), - ("local_timestamp_micros_e".to_string(), 789), - ("local_timestamp_nanos_f".to_string(), 345_002), - ] - .iter() - .cloned() - .collect(); - - let value_map = raw_map - .iter() - .map(|(k, v)| match k { - key if key.starts_with("long_") => (k.clone(), Value::Long(*v)), - key if key.starts_with("time_micros_") => (k.clone(), Value::TimeMicros(*v)), - key if key.starts_with("timestamp_millis_") => { - (k.clone(), Value::TimestampMillis(*v)) - } - key if key.starts_with("timestamp_micros_") => { - (k.clone(), Value::TimestampMicros(*v)) - } - key if key.starts_with("timestamp_nanos_") => { - (k.clone(), Value::TimestampNanos(*v)) - } - key if key.starts_with("local_timestamp_millis_") => { - (k.clone(), Value::LocalTimestampMillis(*v)) - } - key if key.starts_with("local_timestamp_micros_") => { - (k.clone(), Value::LocalTimestampMicros(*v)) - } - key if key.starts_with("local_timestamp_nanos_") => { - (k.clone(), Value::LocalTimestampNanos(*v)) - } - _ => unreachable!("unexpected key: {:?}", k), - }) - .collect(); - - let record = Value::Record(vec![ - ( - "a_string".to_string(), - Value::String("a valid message field".to_string()), - ), - ( - "a_non_existing_string".to_string(), - Value::String("a string".to_string()), - ), - ( - "a_union_string".to_string(), - Value::Union(0, Box::new(Value::String("a union string".to_string()))), - ), - ( - "a_union_long".to_string(), - Value::Union(0, Box::new(Value::Long(412))), - ), - ( - "a_union_long".to_string(), - Value::Union(0, Box::new(Value::Long(412))), - ), - ( - "a_time_micros".to_string(), - Value::Union(0, Box::new(Value::TimeMicros(123))), - ), - ( - "a_non_existing_time_micros".to_string(), - Value::Union(0, Box::new(Value::TimeMicros(-123))), - ), - ( - "a_timestamp_millis".to_string(), - Value::Union(0, Box::new(Value::TimestampMillis(234))), - ), - ( - "a_non_existing_timestamp_millis".to_string(), - Value::Union(0, Box::new(Value::TimestampMillis(-234))), - ), - ( - "a_timestamp_micros".to_string(), - Value::Union(0, Box::new(Value::TimestampMicros(345))), - ), - ( - "a_non_existing_timestamp_micros".to_string(), - Value::Union(0, Box::new(Value::TimestampMicros(-345))), - ), - ( - "a_timestamp_nanos".to_string(), - Value::Union(0, Box::new(Value::TimestampNanos(345))), - ), - ( - "a_non_existing_timestamp_nanos".to_string(), - Value::Union(0, Box::new(Value::TimestampNanos(-345))), - ), - ( - "a_local_timestamp_millis".to_string(), - Value::Union(0, Box::new(Value::LocalTimestampMillis(678))), - ), - ( - "a_non_existing_local_timestamp_millis".to_string(), - Value::Union(0, Box::new(Value::LocalTimestampMillis(-678))), - ), - ( - "a_local_timestamp_micros".to_string(), - Value::Union(0, Box::new(Value::LocalTimestampMicros(789))), - ), - ( - "a_non_existing_local_timestamp_micros".to_string(), - Value::Union(0, Box::new(Value::LocalTimestampMicros(-789))), - ), - ( - "a_local_timestamp_nanos".to_string(), - Value::Union(0, Box::new(Value::LocalTimestampNanos(789))), - ), - ( - "a_non_existing_local_timestamp_nanos".to_string(), - Value::Union(0, Box::new(Value::LocalTimestampNanos(-789))), - ), - ( - "a_record".to_string(), - Value::Union( - 0, - Box::new(Value::Record(vec![( - "record_in_union".to_string(), - Value::Int(-2), - )])), - ), - ), - ( - "a_non_existing_record".to_string(), - Value::Union( - 0, - Box::new(Value::Record(vec![("blah".to_string(), Value::Int(-22))])), - ), - ), - ( - "an_array".to_string(), - Value::Union( - 0, - Box::new(Value::Array(vec![ - Value::Boolean(true), - Value::Boolean(false), - ])), - ), - ), - ( - "a_non_existing_array".to_string(), - Value::Union( - 0, - Box::new(Value::Array(vec![ - Value::Boolean(false), - Value::Boolean(true), - ])), - ), - ), - ( - "a_union_map".to_string(), - Value::Union(0, Box::new(Value::Map(value_map))), - ), - ( - "a_non_existing_union_map".to_string(), - Value::Union(0, Box::new(Value::Map(HashMap::new()))), - ), - ]); - - let deserialized: StructWithMissingFields = crate::from_value(&record)?; - let reference = StructWithMissingFields { - a_string: "a valid message field".to_string(), - a_record: Some(RecordInUnion { - record_in_union: -2, - }), - an_array: Some([true, false]), - a_union_map: Some(raw_map), - }; - assert_eq!(deserialized, reference); - Ok(()) - } - - #[test] - #[serial(avro_3747)] - fn avro_3747_human_readable_false() -> TestResult { - use serde::de::Deserializer as SerdeDeserializer; - - let is_human_readable = false; - crate::util::SERDE_HUMAN_READABLE.store(is_human_readable, Ordering::Release); - - let deser = &Deserializer::new(&Value::Null); - - assert_eq!(deser.is_human_readable(), is_human_readable); - - Ok(()) - } - - #[test] - #[serial(avro_3747)] - fn avro_3747_human_readable_true() -> TestResult { - use serde::de::Deserializer as SerdeDeserializer; - - crate::util::SERDE_HUMAN_READABLE.store(true, Ordering::Release); - - let deser = &Deserializer::new(&Value::Null); - - assert!(deser.is_human_readable()); - - Ok(()) - } - - #[test] - fn test_avro_3892_deserialize_string_from_bytes() -> TestResult { - let raw_value = vec![1, 2, 3, 4]; - let value = Value::Bytes(raw_value.clone()); - let result = from_value::(&value)?; - assert_eq!(result, String::from_utf8(raw_value)?); - Ok(()) - } - - #[test] - fn test_avro_3892_deserialize_str_from_bytes() -> TestResult { - let raw_value = &[1, 2, 3, 4]; - let value = Value::Bytes(raw_value.to_vec()); - let result = from_value::<&str>(&value)?; - assert_eq!(result, std::str::from_utf8(raw_value)?); - Ok(()) - } - - #[derive(Debug)] - struct Bytes(Vec); - - impl<'de> Deserialize<'de> for Bytes { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - struct BytesVisitor; - impl<'de> serde::de::Visitor<'de> for BytesVisitor { - type Value = Bytes; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("a byte array") - } - - fn visit_bytes(self, v: &[u8]) -> Result - where - E: serde::de::Error, - { - Ok(Bytes(v.to_vec())) - } - } - deserializer.deserialize_bytes(BytesVisitor) - } - } - - #[test] - fn test_avro_3892_deserialize_bytes_from_decimal() -> TestResult { - let expected_bytes = BigInt::from(123456789).to_signed_bytes_be(); - let value = Value::Decimal(Decimal::from(&expected_bytes)); - let raw_bytes = from_value::(&value)?; - assert_eq!(raw_bytes.0, expected_bytes); - - let value = Value::Union(0, Box::new(Value::Decimal(Decimal::from(&expected_bytes)))); - let raw_bytes = from_value::>(&value)?; - assert_eq!(raw_bytes.unwrap().0, expected_bytes); - Ok(()) - } - - #[test] - fn test_avro_3892_deserialize_bytes_from_uuid() -> TestResult { - let uuid_str = "10101010-2020-2020-2020-101010101010"; - let expected_bytes = Uuid::parse_str(uuid_str)?.as_bytes().to_vec(); - let value = Value::Uuid(Uuid::parse_str(uuid_str)?); - let raw_bytes = from_value::(&value)?; - assert_eq!(raw_bytes.0, expected_bytes); - - let value = Value::Union(0, Box::new(Value::Uuid(Uuid::parse_str(uuid_str)?))); - let raw_bytes = from_value::>(&value)?; - assert_eq!(raw_bytes.unwrap().0, expected_bytes); - Ok(()) - } - - #[test] - fn test_avro_3892_deserialize_bytes_from_fixed() -> TestResult { - let expected_bytes = vec![1, 2, 3, 4]; - let value = Value::Fixed(4, expected_bytes.clone()); - let raw_bytes = from_value::(&value)?; - assert_eq!(raw_bytes.0, expected_bytes); - - let value = Value::Union(0, Box::new(Value::Fixed(4, expected_bytes.clone()))); - let raw_bytes = from_value::>(&value)?; - assert_eq!(raw_bytes.unwrap().0, expected_bytes); - Ok(()) - } - - #[test] - fn test_avro_3892_deserialize_bytes_from_bytes() -> TestResult { - let expected_bytes = vec![1, 2, 3, 4]; - let value = Value::Bytes(expected_bytes.clone()); - let raw_bytes = from_value::(&value)?; - assert_eq!(raw_bytes.0, expected_bytes); - - let value = Value::Union(0, Box::new(Value::Bytes(expected_bytes.clone()))); - let raw_bytes = from_value::>(&value)?; - assert_eq!(raw_bytes.unwrap().0, expected_bytes); - Ok(()) - } -} diff --git a/lang/rust/avro/src/decimal.rs b/lang/rust/avro/src/decimal.rs deleted file mode 100644 index 6854f3424d9..00000000000 --- a/lang/rust/avro/src/decimal.rs +++ /dev/null @@ -1,146 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{AvroResult, Error}; -use num_bigint::{BigInt, Sign}; - -#[derive(Debug, Clone, Eq, serde::Serialize, serde::Deserialize)] -pub struct Decimal { - value: BigInt, - len: usize, -} - -// We only care about value equality, not byte length. Can two equal `BigInt`s have two different -// byte lengths? -impl PartialEq for Decimal { - fn eq(&self, other: &Self) -> bool { - self.value == other.value - } -} - -impl Decimal { - pub(crate) fn len(&self) -> usize { - self.len - } - - pub(crate) fn to_vec(&self) -> AvroResult> { - self.to_sign_extended_bytes_with_len(self.len) - } - - pub(crate) fn to_sign_extended_bytes_with_len(&self, len: usize) -> AvroResult> { - let sign_byte = 0xFF * u8::from(self.value.sign() == Sign::Minus); - let mut decimal_bytes = vec![sign_byte; len]; - let raw_bytes = self.value.to_signed_bytes_be(); - let num_raw_bytes = raw_bytes.len(); - let start_byte_index = len.checked_sub(num_raw_bytes).ok_or(Error::SignExtend { - requested: len, - needed: num_raw_bytes, - })?; - decimal_bytes[start_byte_index..].copy_from_slice(&raw_bytes); - Ok(decimal_bytes) - } -} - -impl From for BigInt { - fn from(decimal: Decimal) -> Self { - decimal.value - } -} - -/// Gets the internal byte array representation of a referenced decimal. -/// Usage: -/// ``` -/// use apache_avro::Decimal; -/// use std::convert::TryFrom; -/// -/// let decimal = Decimal::from(vec![1, 24]); -/// let maybe_bytes = >::try_from(&decimal); -/// ``` -impl std::convert::TryFrom<&Decimal> for Vec { - type Error = Error; - - fn try_from(decimal: &Decimal) -> Result { - decimal.to_vec() - } -} - -/// Gets the internal byte array representation of an owned decimal. -/// Usage: -/// ``` -/// use apache_avro::Decimal; -/// use std::convert::TryFrom; -/// -/// let decimal = Decimal::from(vec![1, 24]); -/// let maybe_bytes = >::try_from(decimal); -/// ``` -impl std::convert::TryFrom for Vec { - type Error = Error; - - fn try_from(decimal: Decimal) -> Result { - decimal.to_vec() - } -} - -impl> From for Decimal { - fn from(bytes: T) -> Self { - let bytes_ref = bytes.as_ref(); - Self { - value: BigInt::from_signed_bytes_be(bytes_ref), - len: bytes_ref.len(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use apache_avro_test_helper::TestResult; - use pretty_assertions::assert_eq; - - #[test] - fn test_decimal_from_bytes_from_ref_decimal() -> TestResult { - let input = vec![1, 24]; - let d = Decimal::from(&input); - - let output = >::try_from(&d)?; - assert_eq!(output, input); - - Ok(()) - } - - #[test] - fn test_decimal_from_bytes_from_owned_decimal() -> TestResult { - let input = vec![1, 24]; - let d = Decimal::from(&input); - - let output = >::try_from(d)?; - assert_eq!(output, input); - - Ok(()) - } - - #[test] - fn avro_3949_decimal_serde() -> TestResult { - let decimal = Decimal::from(&[1, 2, 3]); - - let ser = serde_json::to_string(&decimal)?; - let de = serde_json::from_str(&ser)?; - std::assert_eq!(decimal, de); - - Ok(()) - } -} diff --git a/lang/rust/avro/src/decode.rs b/lang/rust/avro/src/decode.rs deleted file mode 100644 index 46e3381f3c2..00000000000 --- a/lang/rust/avro/src/decode.rs +++ /dev/null @@ -1,912 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{ - bigdecimal::deserialize_big_decimal, - decimal::Decimal, - duration::Duration, - encode::encode_long, - schema::{ - DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, - Schema, - }, - types::Value, - util::{safe_len, zag_i32, zag_i64}, - AvroResult, Error, -}; -use std::{ - borrow::Borrow, - collections::HashMap, - io::{ErrorKind, Read}, - str::FromStr, -}; -use uuid::Uuid; - -#[inline] -pub(crate) fn decode_long(reader: &mut R) -> AvroResult { - zag_i64(reader).map(Value::Long) -} - -#[inline] -fn decode_int(reader: &mut R) -> AvroResult { - zag_i32(reader).map(Value::Int) -} - -#[inline] -pub(crate) fn decode_len(reader: &mut R) -> AvroResult { - let len = zag_i64(reader)?; - safe_len(usize::try_from(len).map_err(|e| Error::ConvertI64ToUsize(e, len))?) -} - -/// Decode the length of a sequence. -/// -/// Maps and arrays are 0-terminated, 0i64 is also encoded as 0 in Avro reading a length of 0 means -/// the end of the map or array. -fn decode_seq_len(reader: &mut R) -> AvroResult { - let raw_len = zag_i64(reader)?; - safe_len( - usize::try_from(match raw_len.cmp(&0) { - std::cmp::Ordering::Equal => return Ok(0), - std::cmp::Ordering::Less => { - let _size = zag_i64(reader)?; - raw_len.checked_neg().ok_or(Error::IntegerOverflow)? - } - std::cmp::Ordering::Greater => raw_len, - }) - .map_err(|e| Error::ConvertI64ToUsize(e, raw_len))?, - ) -} - -/// Decode a `Value` from avro format given its `Schema`. -pub fn decode(schema: &Schema, reader: &mut R) -> AvroResult { - let rs = ResolvedSchema::try_from(schema)?; - decode_internal(schema, rs.get_names(), &None, reader) -} - -pub(crate) fn decode_internal>( - schema: &Schema, - names: &HashMap, - enclosing_namespace: &Namespace, - reader: &mut R, -) -> AvroResult { - match *schema { - Schema::Null => Ok(Value::Null), - Schema::Boolean => { - let mut buf = [0u8; 1]; - match reader.read_exact(&mut buf[..]) { - Ok(_) => match buf[0] { - 0u8 => Ok(Value::Boolean(false)), - 1u8 => Ok(Value::Boolean(true)), - _ => Err(Error::BoolValue(buf[0])), - }, - Err(io_err) => { - if let ErrorKind::UnexpectedEof = io_err.kind() { - Ok(Value::Null) - } else { - Err(Error::ReadBoolean(io_err)) - } - } - } - } - Schema::Decimal(DecimalSchema { ref inner, .. }) => match &**inner { - Schema::Fixed { .. } => { - match decode_internal(inner, names, enclosing_namespace, reader)? { - Value::Fixed(_, bytes) => Ok(Value::Decimal(Decimal::from(bytes))), - value => Err(Error::FixedValue(value)), - } - } - Schema::Bytes => match decode_internal(inner, names, enclosing_namespace, reader)? { - Value::Bytes(bytes) => Ok(Value::Decimal(Decimal::from(bytes))), - value => Err(Error::BytesValue(value)), - }, - schema => Err(Error::ResolveDecimalSchema(schema.into())), - }, - Schema::BigDecimal => { - match decode_internal(&Schema::Bytes, names, enclosing_namespace, reader)? { - Value::Bytes(bytes) => deserialize_big_decimal(&bytes).map(Value::BigDecimal), - value => Err(Error::BytesValue(value)), - } - } - Schema::Uuid => { - let len = decode_len(reader)?; - let mut bytes = vec![0u8; len]; - reader.read_exact(&mut bytes).map_err(Error::ReadIntoBuf)?; - - // use a Vec to be able re-read the bytes more than once if needed - let mut reader = Vec::with_capacity(len + 1); - encode_long(len as i64, &mut reader); - reader.extend_from_slice(&bytes); - - let decode_from_string = |reader| match decode_internal( - &Schema::String, - names, - enclosing_namespace, - reader, - )? { - Value::String(ref s) => Uuid::from_str(s).map_err(Error::ConvertStrToUuid), - value => Err(Error::GetUuidFromStringValue(value)), - }; - - let uuid: Uuid = if len == 16 { - // most probably a Fixed schema - let fixed_result = decode_internal( - &Schema::Fixed(FixedSchema { - size: 16, - name: "uuid".into(), - aliases: None, - doc: None, - default: None, - attributes: Default::default(), - }), - names, - enclosing_namespace, - &mut bytes.as_slice(), - ); - if fixed_result.is_ok() { - match fixed_result? { - Value::Fixed(ref size, ref bytes) => { - if *size != 16 { - return Err(Error::ConvertFixedToUuid(*size)); - } - Uuid::from_slice(bytes).map_err(Error::ConvertSliceToUuid)? - } - _ => decode_from_string(&mut reader.as_slice())?, - } - } else { - // try to decode as string - decode_from_string(&mut reader.as_slice())? - } - } else { - // definitely a string - decode_from_string(&mut reader.as_slice())? - }; - Ok(Value::Uuid(uuid)) - } - Schema::Int => decode_int(reader), - Schema::Date => zag_i32(reader).map(Value::Date), - Schema::TimeMillis => zag_i32(reader).map(Value::TimeMillis), - Schema::Long => decode_long(reader), - Schema::TimeMicros => zag_i64(reader).map(Value::TimeMicros), - Schema::TimestampMillis => zag_i64(reader).map(Value::TimestampMillis), - Schema::TimestampMicros => zag_i64(reader).map(Value::TimestampMicros), - Schema::TimestampNanos => zag_i64(reader).map(Value::TimestampNanos), - Schema::LocalTimestampMillis => zag_i64(reader).map(Value::LocalTimestampMillis), - Schema::LocalTimestampMicros => zag_i64(reader).map(Value::LocalTimestampMicros), - Schema::LocalTimestampNanos => zag_i64(reader).map(Value::LocalTimestampNanos), - Schema::Duration => { - let mut buf = [0u8; 12]; - reader.read_exact(&mut buf).map_err(Error::ReadDuration)?; - Ok(Value::Duration(Duration::from(buf))) - } - Schema::Float => { - let mut buf = [0u8; std::mem::size_of::()]; - reader.read_exact(&mut buf[..]).map_err(Error::ReadFloat)?; - Ok(Value::Float(f32::from_le_bytes(buf))) - } - Schema::Double => { - let mut buf = [0u8; std::mem::size_of::()]; - reader.read_exact(&mut buf[..]).map_err(Error::ReadDouble)?; - Ok(Value::Double(f64::from_le_bytes(buf))) - } - Schema::Bytes => { - let len = decode_len(reader)?; - let mut buf = vec![0u8; len]; - reader.read_exact(&mut buf).map_err(Error::ReadBytes)?; - Ok(Value::Bytes(buf)) - } - Schema::String => { - let len = decode_len(reader)?; - let mut buf = vec![0u8; len]; - match reader.read_exact(&mut buf) { - Ok(_) => Ok(Value::String( - String::from_utf8(buf).map_err(Error::ConvertToUtf8)?, - )), - Err(io_err) => { - if let ErrorKind::UnexpectedEof = io_err.kind() { - Ok(Value::Null) - } else { - Err(Error::ReadString(io_err)) - } - } - } - } - Schema::Fixed(FixedSchema { size, .. }) => { - let mut buf = vec![0u8; size]; - reader - .read_exact(&mut buf) - .map_err(|e| Error::ReadFixed(e, size))?; - Ok(Value::Fixed(size, buf)) - } - Schema::Array(ref inner) => { - let mut items = Vec::new(); - - loop { - let len = decode_seq_len(reader)?; - if len == 0 { - break; - } - - items.reserve(len); - for _ in 0..len { - items.push(decode_internal( - &inner.items, - names, - enclosing_namespace, - reader, - )?); - } - } - - Ok(Value::Array(items)) - } - Schema::Map(ref inner) => { - let mut items = HashMap::new(); - - loop { - let len = decode_seq_len(reader)?; - if len == 0 { - break; - } - - items.reserve(len); - for _ in 0..len { - match decode_internal(&Schema::String, names, enclosing_namespace, reader)? { - Value::String(key) => { - let value = - decode_internal(&inner.types, names, enclosing_namespace, reader)?; - items.insert(key, value); - } - value => return Err(Error::MapKeyType(value.into())), - } - } - } - - Ok(Value::Map(items)) - } - Schema::Union(ref inner) => match zag_i64(reader) { - Ok(index) => { - let variants = inner.variants(); - let variant = variants - .get(usize::try_from(index).map_err(|e| Error::ConvertI64ToUsize(e, index))?) - .ok_or(Error::GetUnionVariant { - index, - num_variants: variants.len(), - })?; - let value = decode_internal(variant, names, enclosing_namespace, reader)?; - Ok(Value::Union(index as u32, Box::new(value))) - } - Err(Error::ReadVariableIntegerBytes(io_err)) => { - if let ErrorKind::UnexpectedEof = io_err.kind() { - Ok(Value::Union(0, Box::new(Value::Null))) - } else { - Err(Error::ReadVariableIntegerBytes(io_err)) - } - } - Err(io_err) => Err(io_err), - }, - Schema::Record(RecordSchema { - ref name, - ref fields, - .. - }) => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - // Benchmarks indicate ~10% improvement using this method. - let mut items = Vec::with_capacity(fields.len()); - for field in fields { - // TODO: This clone is also expensive. See if we can do away with it... - items.push(( - field.name.clone(), - decode_internal( - &field.schema, - names, - &fully_qualified_name.namespace, - reader, - )?, - )); - } - Ok(Value::Record(items)) - } - Schema::Enum(EnumSchema { ref symbols, .. }) => { - Ok(if let Value::Int(raw_index) = decode_int(reader)? { - let index = usize::try_from(raw_index) - .map_err(|e| Error::ConvertI32ToUsize(e, raw_index))?; - if (0..symbols.len()).contains(&index) { - let symbol = symbols[index].clone(); - Value::Enum(raw_index as u32, symbol) - } else { - return Err(Error::GetEnumValue { - index, - nsymbols: symbols.len(), - }); - } - } else { - return Err(Error::GetEnumUnknownIndexValue); - }) - } - Schema::Ref { ref name } => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if let Some(resolved) = names.get(&fully_qualified_name) { - decode_internal( - resolved.borrow(), - names, - &fully_qualified_name.namespace, - reader, - ) - } else { - Err(Error::SchemaResolutionError(fully_qualified_name)) - } - } - } -} - -#[cfg(test)] -#[allow(clippy::expect_fun_call)] -mod tests { - use crate::{ - decode::decode, - encode::{encode, tests::success}, - schema::{DecimalSchema, FixedSchema, Schema}, - types::{ - Value, - Value::{Array, Int, Map}, - }, - Decimal, - }; - use apache_avro_test_helper::TestResult; - use pretty_assertions::assert_eq; - use std::collections::HashMap; - use uuid::Uuid; - - #[test] - fn test_decode_array_without_size() -> TestResult { - let mut input: &[u8] = &[6, 2, 4, 6, 0]; - let result = decode(&Schema::array(Schema::Int), &mut input); - assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); - - Ok(()) - } - - #[test] - fn test_decode_array_with_size() -> TestResult { - let mut input: &[u8] = &[5, 6, 2, 4, 6, 0]; - let result = decode(&Schema::array(Schema::Int), &mut input); - assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); - - Ok(()) - } - - #[test] - fn test_decode_map_without_size() -> TestResult { - let mut input: &[u8] = &[0x02, 0x08, 0x74, 0x65, 0x73, 0x74, 0x02, 0x00]; - let result = decode(&Schema::map(Schema::Int), &mut input); - let mut expected = HashMap::new(); - expected.insert(String::from("test"), Int(1)); - assert_eq!(Map(expected), result?); - - Ok(()) - } - - #[test] - fn test_decode_map_with_size() -> TestResult { - let mut input: &[u8] = &[0x01, 0x0C, 0x08, 0x74, 0x65, 0x73, 0x74, 0x02, 0x00]; - let result = decode(&Schema::map(Schema::Int), &mut input); - let mut expected = HashMap::new(); - expected.insert(String::from("test"), Int(1)); - assert_eq!(Map(expected), result?); - - Ok(()) - } - - #[test] - fn test_negative_decimal_value() -> TestResult { - use crate::{encode::encode, schema::Name}; - use num_bigint::ToBigInt; - let inner = Box::new(Schema::Fixed(FixedSchema { - size: 2, - doc: None, - name: Name::new("decimal")?, - aliases: None, - default: None, - attributes: Default::default(), - })); - let schema = Schema::Decimal(DecimalSchema { - inner, - precision: 4, - scale: 2, - }); - let bigint = (-423).to_bigint().unwrap(); - let value = Value::Decimal(Decimal::from(bigint.to_signed_bytes_be())); - - let mut buffer = Vec::new(); - encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); - - let mut bytes = &buffer[..]; - let result = decode(&schema, &mut bytes)?; - assert_eq!(result, value); - - Ok(()) - } - - #[test] - fn test_decode_decimal_with_bigger_than_necessary_size() -> TestResult { - use crate::{encode::encode, schema::Name}; - use num_bigint::ToBigInt; - let inner = Box::new(Schema::Fixed(FixedSchema { - size: 13, - name: Name::new("decimal")?, - aliases: None, - doc: None, - default: None, - attributes: Default::default(), - })); - let schema = Schema::Decimal(DecimalSchema { - inner, - precision: 4, - scale: 2, - }); - let value = Value::Decimal(Decimal::from( - ((-423).to_bigint().unwrap()).to_signed_bytes_be(), - )); - let mut buffer = Vec::::new(); - - encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); - let mut bytes: &[u8] = &buffer[..]; - let result = decode(&schema, &mut bytes)?; - assert_eq!(result, value); - - Ok(()) - } - - #[test] - fn test_avro_3448_recursive_definition_decode_union() -> TestResult { - // if encoding fails in this test check the corresponding test in encode - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":[ "null", { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - }] - }, - { - "name":"b", - "type":"Inner" - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value1 = Value::Record(vec![ - ("a".into(), Value::Union(1, Box::new(inner_value1))), - ("b".into(), inner_value2.clone()), - ]); - let mut buf = Vec::new(); - encode(&outer_value1, &schema, &mut buf).expect(&success(&outer_value1, &schema)); - assert!(!buf.is_empty()); - let mut bytes = &buf[..]; - assert_eq!( - outer_value1, - decode(&schema, &mut bytes).expect(&format!( - "Failed to decode using recursive definitions with schema:\n {:?}\n", - &schema - )) - ); - - let mut buf = Vec::new(); - let outer_value2 = Value::Record(vec![ - ("a".into(), Value::Union(0, Box::new(Value::Null))), - ("b".into(), inner_value2), - ]); - encode(&outer_value2, &schema, &mut buf).expect(&success(&outer_value2, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_value2, - decode(&schema, &mut bytes).expect(&format!( - "Failed to decode using recursive definitions with schema:\n {:?}\n", - &schema - )) - ); - - Ok(()) - } - - #[test] - fn test_avro_3448_recursive_definition_decode_array() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"array", - "items": { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - } - }, - { - "name":"b", - "type": "Inner" - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = Value::Record(vec![ - ("a".into(), Value::Array(vec![inner_value1])), - ("b".into(), inner_value2), - ]); - let mut buf = Vec::new(); - encode(&outer_value, &schema, &mut buf).expect(&success(&outer_value, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_value, - decode(&schema, &mut bytes).expect(&format!( - "Failed to decode using recursive definitions with schema:\n {:?}\n", - &schema - )) - ); - - Ok(()) - } - - #[test] - fn test_avro_3448_recursive_definition_decode_map() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"map", - "values": { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - } - }, - { - "name":"b", - "type": "Inner" - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = Value::Record(vec![ - ( - "a".into(), - Value::Map(vec![("akey".into(), inner_value1)].into_iter().collect()), - ), - ("b".into(), inner_value2), - ]); - let mut buf = Vec::new(); - encode(&outer_value, &schema, &mut buf).expect(&success(&outer_value, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_value, - decode(&schema, &mut bytes).expect(&format!( - "Failed to decode using recursive definitions with schema:\n {:?}\n", - &schema - )) - ); - - Ok(()) - } - - #[test] - fn test_avro_3448_proper_multi_level_decoding_middle_namespace() -> TestResult { - // if encoding fails in this test check the corresponding test in encode - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type": "record", - "name": "middle_record_name", - "namespace":"middle_namespace", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "middle_namespace.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); - let middle_record_variation_1 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - )]); - let middle_record_variation_2 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(1, Box::new(inner_record.clone())), - )]); - let outer_record_variation_1 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_2 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_1)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_3 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_2)), - ), - ("outer_field_2".into(), inner_record), - ]); - - let mut buf = Vec::new(); - encode(&outer_record_variation_1, &schema, &mut buf) - .expect(&success(&outer_record_variation_1, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_record_variation_1, - decode(&schema, &mut bytes).expect(&format!( - "Failed to Decode with recursively defined namespace with schema:\n {:?}\n", - &schema - )) - ); - - let mut buf = Vec::new(); - encode(&outer_record_variation_2, &schema, &mut buf) - .expect(&success(&outer_record_variation_2, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_record_variation_2, - decode(&schema, &mut bytes).expect(&format!( - "Failed to Decode with recursively defined namespace with schema:\n {:?}\n", - &schema - )) - ); - - let mut buf = Vec::new(); - encode(&outer_record_variation_3, &schema, &mut buf) - .expect(&success(&outer_record_variation_3, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_record_variation_3, - decode(&schema, &mut bytes).expect(&format!( - "Failed to Decode with recursively defined namespace with schema:\n {:?}\n", - &schema - )) - ); - - Ok(()) - } - - #[test] - fn test_avro_3448_proper_multi_level_decoding_inner_namespace() -> TestResult { - // if encoding fails in this test check the corresponding test in encode - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type": "record", - "name": "middle_record_name", - "namespace":"middle_namespace", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "namespace":"inner_namespace", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_namespace.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); - let middle_record_variation_1 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - )]); - let middle_record_variation_2 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(1, Box::new(inner_record.clone())), - )]); - let outer_record_variation_1 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_2 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_1)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_3 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_2)), - ), - ("outer_field_2".into(), inner_record), - ]); - - let mut buf = Vec::new(); - encode(&outer_record_variation_1, &schema, &mut buf) - .expect(&success(&outer_record_variation_1, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_record_variation_1, - decode(&schema, &mut bytes).expect(&format!( - "Failed to Decode with recursively defined namespace with schema:\n {:?}\n", - &schema - )) - ); - - let mut buf = Vec::new(); - encode(&outer_record_variation_2, &schema, &mut buf) - .expect(&success(&outer_record_variation_2, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_record_variation_2, - decode(&schema, &mut bytes).expect(&format!( - "Failed to Decode with recursively defined namespace with schema:\n {:?}\n", - &schema - )) - ); - - let mut buf = Vec::new(); - encode(&outer_record_variation_3, &schema, &mut buf) - .expect(&success(&outer_record_variation_3, &schema)); - let mut bytes = &buf[..]; - assert_eq!( - outer_record_variation_3, - decode(&schema, &mut bytes).expect(&format!( - "Failed to Decode with recursively defined namespace with schema:\n {:?}\n", - &schema - )) - ); - - Ok(()) - } - - #[test] - fn avro_3926_encode_decode_uuid_to_string() -> TestResult { - use crate::encode::encode; - - let schema = Schema::String; - let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?); - - let mut buffer = Vec::new(); - encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); - - let result = decode(&Schema::Uuid, &mut &buffer[..])?; - assert_eq!(result, value); - - Ok(()) - } - - #[test] - fn avro_3926_encode_decode_uuid_to_fixed() -> TestResult { - use crate::encode::encode; - - let schema = Schema::Fixed(FixedSchema { - size: 16, - name: "uuid".into(), - aliases: None, - doc: None, - default: None, - attributes: Default::default(), - }); - let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?); - - let mut buffer = Vec::new(); - encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); - - let result = decode(&Schema::Uuid, &mut &buffer[..])?; - assert_eq!(result, value); - - Ok(()) - } -} diff --git a/lang/rust/avro/src/duration.rs b/lang/rust/avro/src/duration.rs deleted file mode 100644 index 4aa6bd53a0c..00000000000 --- a/lang/rust/avro/src/duration.rs +++ /dev/null @@ -1,145 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -/// A struct representing duration that hides the details of endianness and conversion between -/// platform-native u32 and byte arrays. -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Duration { - months: Months, - days: Days, - millis: Millis, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Months(u32); - -impl Months { - pub fn new(months: u32) -> Self { - Self(months) - } - - fn as_bytes(&self) -> [u8; 4] { - self.0.to_le_bytes() - } -} - -impl From for u32 { - fn from(days: Months) -> Self { - days.0 - } -} - -impl From<[u8; 4]> for Months { - fn from(bytes: [u8; 4]) -> Self { - Self(u32::from_le_bytes(bytes)) - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Days(u32); - -impl Days { - pub fn new(days: u32) -> Self { - Self(days) - } - - fn as_bytes(&self) -> [u8; 4] { - self.0.to_le_bytes() - } -} - -impl From for u32 { - fn from(days: Days) -> Self { - days.0 - } -} - -impl From<[u8; 4]> for Days { - fn from(bytes: [u8; 4]) -> Self { - Self(u32::from_le_bytes(bytes)) - } -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Millis(u32); - -impl Millis { - pub fn new(millis: u32) -> Self { - Self(millis) - } - - fn as_bytes(&self) -> [u8; 4] { - self.0.to_le_bytes() - } -} - -impl From for u32 { - fn from(days: Millis) -> Self { - days.0 - } -} - -impl From<[u8; 4]> for Millis { - fn from(bytes: [u8; 4]) -> Self { - Self(u32::from_le_bytes(bytes)) - } -} - -impl Duration { - /// Construct a new `Duration`. - pub fn new(months: Months, days: Days, millis: Millis) -> Self { - Self { - months, - days, - millis, - } - } - - /// Return the number of months in this duration. - pub fn months(&self) -> Months { - self.months - } - - /// Return the number of days in this duration. - pub fn days(&self) -> Days { - self.days - } - - /// Return the number of milliseconds in this duration. - pub fn millis(&self) -> Millis { - self.millis - } -} - -impl From for [u8; 12] { - fn from(duration: Duration) -> Self { - let mut bytes = [0u8; 12]; - bytes[0..4].copy_from_slice(&duration.months.as_bytes()); - bytes[4..8].copy_from_slice(&duration.days.as_bytes()); - bytes[8..12].copy_from_slice(&duration.millis.as_bytes()); - bytes - } -} - -impl From<[u8; 12]> for Duration { - fn from(bytes: [u8; 12]) -> Self { - Self { - months: Months::from([bytes[0], bytes[1], bytes[2], bytes[3]]), - days: Days::from([bytes[4], bytes[5], bytes[6], bytes[7]]), - millis: Millis::from([bytes[8], bytes[9], bytes[10], bytes[11]]), - } - } -} diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs deleted file mode 100644 index 214673ec9d4..00000000000 --- a/lang/rust/avro/src/encode.rs +++ /dev/null @@ -1,919 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{ - bigdecimal::serialize_big_decimal, - schema::{ - DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, - Schema, SchemaKind, UnionSchema, - }, - types::{Value, ValueKind}, - util::{zig_i32, zig_i64}, - AvroResult, Error, -}; -use std::{borrow::Borrow, collections::HashMap}; - -/// Encode a `Value` into avro format. -/// -/// **NOTE** This will not perform schema validation. The value is assumed to -/// be valid with regards to the schema. Schema are needed only to guide the -/// encoding for complex type values. -pub fn encode(value: &Value, schema: &Schema, buffer: &mut Vec) -> AvroResult<()> { - let rs = ResolvedSchema::try_from(schema)?; - encode_internal(value, schema, rs.get_names(), &None, buffer) -} - -pub(crate) fn encode_bytes + ?Sized>(s: &B, buffer: &mut Vec) { - let bytes = s.as_ref(); - encode_long(bytes.len() as i64, buffer); - buffer.extend_from_slice(bytes); -} - -pub(crate) fn encode_long(i: i64, buffer: &mut Vec) { - zig_i64(i, buffer) -} - -fn encode_int(i: i32, buffer: &mut Vec) { - zig_i32(i, buffer) -} - -pub(crate) fn encode_internal>( - value: &Value, - schema: &Schema, - names: &HashMap, - enclosing_namespace: &Namespace, - buffer: &mut Vec, -) -> AvroResult<()> { - if let Schema::Ref { ref name } = schema { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - let resolved = names - .get(&fully_qualified_name) - .ok_or(Error::SchemaResolutionError(fully_qualified_name))?; - return encode_internal(value, resolved.borrow(), names, enclosing_namespace, buffer); - } - - match value { - Value::Null => { - if let Schema::Union(union) = schema { - match union.schemas.iter().position(|sch| *sch == Schema::Null) { - None => { - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Null, - supported_schema: vec![SchemaKind::Null, SchemaKind::Union], - }) - } - Some(p) => encode_long(p as i64, buffer), - } - } // else {()} - } - Value::Boolean(b) => buffer.push(u8::from(*b)), - // Pattern | Pattern here to signify that these _must_ have the same encoding. - Value::Int(i) | Value::Date(i) | Value::TimeMillis(i) => encode_int(*i, buffer), - Value::Long(i) - | Value::TimestampMillis(i) - | Value::TimestampMicros(i) - | Value::TimestampNanos(i) - | Value::LocalTimestampMillis(i) - | Value::LocalTimestampMicros(i) - | Value::LocalTimestampNanos(i) - | Value::TimeMicros(i) => encode_long(*i, buffer), - Value::Float(x) => buffer.extend_from_slice(&x.to_le_bytes()), - Value::Double(x) => buffer.extend_from_slice(&x.to_le_bytes()), - Value::Decimal(decimal) => match schema { - Schema::Decimal(DecimalSchema { inner, .. }) => match *inner.clone() { - Schema::Fixed(FixedSchema { size, .. }) => { - let bytes = decimal.to_sign_extended_bytes_with_len(size).unwrap(); - let num_bytes = bytes.len(); - if num_bytes != size { - return Err(Error::EncodeDecimalAsFixedError(num_bytes, size)); - } - encode(&Value::Fixed(size, bytes), inner, buffer)? - } - Schema::Bytes => encode(&Value::Bytes(decimal.try_into()?), inner, buffer)?, - _ => { - return Err(Error::ResolveDecimalSchema(SchemaKind::from( - *inner.clone(), - ))); - } - }, - _ => { - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Decimal, - supported_schema: vec![SchemaKind::Decimal], - }); - } - }, - &Value::Duration(duration) => { - let slice: [u8; 12] = duration.into(); - buffer.extend_from_slice(&slice); - } - Value::Uuid(uuid) => match *schema { - Schema::Uuid | Schema::String => encode_bytes( - // we need the call .to_string() to properly convert ASCII to UTF-8 - #[allow(clippy::unnecessary_to_owned)] - &uuid.to_string(), - buffer, - ), - Schema::Fixed(FixedSchema { size, .. }) => { - if size != 16 { - return Err(Error::ConvertFixedToUuid(size)); - } - - let bytes = uuid.as_bytes(); - encode_bytes(bytes, buffer) - } - _ => { - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Uuid, - supported_schema: vec![SchemaKind::Uuid, SchemaKind::Fixed], - }); - } - }, - Value::BigDecimal(bg) => { - let buf: Vec = serialize_big_decimal(bg); - buffer.extend_from_slice(buf.as_slice()); - } - Value::Bytes(bytes) => match *schema { - Schema::Bytes => encode_bytes(bytes, buffer), - Schema::Fixed { .. } => buffer.extend(bytes), - _ => { - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Bytes, - supported_schema: vec![SchemaKind::Bytes, SchemaKind::Fixed], - }); - } - }, - Value::String(s) => match *schema { - Schema::String | Schema::Uuid => { - encode_bytes(s, buffer); - } - Schema::Enum(EnumSchema { ref symbols, .. }) => { - if let Some(index) = symbols.iter().position(|item| item == s) { - encode_int(index as i32, buffer); - } else { - error!("Invalid symbol string {:?}.", &s[..]); - return Err(Error::GetEnumSymbol(s.clone())); - } - } - _ => { - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::String, - supported_schema: vec![SchemaKind::String, SchemaKind::Enum], - }); - } - }, - Value::Fixed(_, bytes) => buffer.extend(bytes), - Value::Enum(i, _) => encode_int(*i as i32, buffer), - Value::Union(idx, item) => { - if let Schema::Union(ref inner) = *schema { - let inner_schema = inner - .schemas - .get(*idx as usize) - .expect("Invalid Union validation occurred"); - encode_long(*idx as i64, buffer); - encode_internal(item, inner_schema, names, enclosing_namespace, buffer)?; - } else { - error!("invalid schema type for Union: {:?}", schema); - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Union, - supported_schema: vec![SchemaKind::Union], - }); - } - } - Value::Array(items) => { - if let Schema::Array(ref inner) = *schema { - if !items.is_empty() { - encode_long(items.len() as i64, buffer); - for item in items.iter() { - encode_internal(item, &inner.items, names, enclosing_namespace, buffer)?; - } - } - buffer.push(0u8); - } else { - error!("invalid schema type for Array: {:?}", schema); - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Array, - supported_schema: vec![SchemaKind::Array], - }); - } - } - Value::Map(items) => { - if let Schema::Map(ref inner) = *schema { - if !items.is_empty() { - encode_long(items.len() as i64, buffer); - for (key, value) in items { - encode_bytes(key, buffer); - encode_internal(value, &inner.types, names, enclosing_namespace, buffer)?; - } - } - buffer.push(0u8); - } else { - error!("invalid schema type for Map: {:?}", schema); - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Map, - supported_schema: vec![SchemaKind::Map], - }); - } - } - Value::Record(value_fields) => { - if let Schema::Record(RecordSchema { - ref name, - fields: ref schema_fields, - .. - }) = *schema - { - let record_namespace = name.fully_qualified_name(enclosing_namespace).namespace; - - let mut lookup = HashMap::new(); - value_fields.iter().for_each(|(name, field)| { - lookup.insert(name, field); - }); - - for schema_field in schema_fields.iter() { - let name = &schema_field.name; - let value_opt = lookup.get(name).or_else(|| { - if let Some(aliases) = &schema_field.aliases { - aliases.iter().find_map(|alias| lookup.get(alias)) - } else { - None - } - }); - - if let Some(value) = value_opt { - encode_internal( - value, - &schema_field.schema, - names, - &record_namespace, - buffer, - )?; - } else { - return Err(Error::NoEntryInLookupTable( - name.clone(), - format!("{lookup:?}"), - )); - } - } - } else if let Schema::Union(UnionSchema { schemas, .. }) = schema { - let original_size = buffer.len(); - for (index, schema) in schemas.iter().enumerate() { - encode_long(index as i64, buffer); - match encode_internal(value, schema, names, enclosing_namespace, buffer) { - Ok(_) => return Ok(()), - Err(_) => { - buffer.truncate(original_size); //undo any partial encoding - } - } - } - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Record, - supported_schema: vec![SchemaKind::Record, SchemaKind::Union], - }); - } else { - error!("invalid schema type for Record: {:?}", schema); - return Err(Error::EncodeValueAsSchemaError { - value_kind: ValueKind::Record, - supported_schema: vec![SchemaKind::Record, SchemaKind::Union], - }); - } - } - }; - Ok(()) -} - -pub fn encode_to_vec(value: &Value, schema: &Schema) -> AvroResult> { - let mut buffer = Vec::new(); - encode(value, schema, &mut buffer)?; - Ok(buffer) -} - -#[cfg(test)] -#[allow(clippy::expect_fun_call)] -pub(crate) mod tests { - use super::*; - use apache_avro_test_helper::TestResult; - use pretty_assertions::assert_eq; - use uuid::Uuid; - - pub(crate) fn success(value: &Value, schema: &Schema) -> String { - format!( - "Value: {:?}\n should encode with schema:\n{:?}", - &value, &schema - ) - } - - #[test] - fn test_encode_empty_array() { - let mut buf = Vec::new(); - let empty: Vec = Vec::new(); - encode( - &Value::Array(empty.clone()), - &Schema::array(Schema::Int), - &mut buf, - ) - .expect(&success(&Value::Array(empty), &Schema::array(Schema::Int))); - assert_eq!(vec![0u8], buf); - } - - #[test] - fn test_encode_empty_map() { - let mut buf = Vec::new(); - let empty: HashMap = HashMap::new(); - encode( - &Value::Map(empty.clone()), - &Schema::map(Schema::Int), - &mut buf, - ) - .expect(&success(&Value::Map(empty), &Schema::map(Schema::Int))); - assert_eq!(vec![0u8], buf); - } - - #[test] - fn test_avro_3433_recursive_definition_encode_record() { - let mut buf = Vec::new(); - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - }, - { - "name":"b", - "type":"Inner" - } - ] - }"#, - ) - .unwrap(); - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = - Value::Record(vec![("a".into(), inner_value1), ("b".into(), inner_value2)]); - encode(&outer_value, &schema, &mut buf).expect(&success(&outer_value, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3433_recursive_definition_encode_array() { - let mut buf = Vec::new(); - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"array", - "items": { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - } - }, - { - "name":"b", - "type": { - "type":"map", - "values":"Inner" - } - } - ] - }"#, - ) - .unwrap(); - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = Value::Record(vec![ - ("a".into(), Value::Array(vec![inner_value1])), - ( - "b".into(), - Value::Map(vec![("akey".into(), inner_value2)].into_iter().collect()), - ), - ]); - encode(&outer_value, &schema, &mut buf).expect(&success(&outer_value, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3433_recursive_definition_encode_map() { - let mut buf = Vec::new(); - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - }, - { - "name":"b", - "type": { - "type":"map", - "values":"Inner" - } - } - ] - }"#, - ) - .unwrap(); - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = Value::Record(vec![ - ("a".into(), inner_value1), - ( - "b".into(), - Value::Map(vec![("akey".into(), inner_value2)].into_iter().collect()), - ), - ]); - encode(&outer_value, &schema, &mut buf).expect(&success(&outer_value, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3433_recursive_definition_encode_record_wrapper() { - let mut buf = Vec::new(); - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - }, - { - "name":"b", - "type": { - "type":"record", - "name": "InnerWrapper", - "fields": [ { - "name":"j", - "type":"Inner" - }] - } - } - ] - }"#, - ) - .unwrap(); - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![( - "j".into(), - Value::Record(vec![("z".into(), Value::Int(6))]), - )]); - let outer_value = - Value::Record(vec![("a".into(), inner_value1), ("b".into(), inner_value2)]); - encode(&outer_value, &schema, &mut buf).expect(&success(&outer_value, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3433_recursive_definition_encode_map_and_array() { - let mut buf = Vec::new(); - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"map", - "values": { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - } - }, - { - "name":"b", - "type": { - "type":"array", - "items":"Inner" - } - } - ] - }"#, - ) - .unwrap(); - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = Value::Record(vec![ - ( - "a".into(), - Value::Map(vec![("akey".into(), inner_value2)].into_iter().collect()), - ), - ("b".into(), Value::Array(vec![inner_value1])), - ]); - encode(&outer_value, &schema, &mut buf).expect(&success(&outer_value, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3433_recursive_definition_encode_union() { - let mut buf = Vec::new(); - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":["null", { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - }] - }, - { - "name":"b", - "type":"Inner" - } - ] - }"#, - ) - .unwrap(); - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value1 = Value::Record(vec![ - ("a".into(), Value::Union(1, Box::new(inner_value1))), - ("b".into(), inner_value2.clone()), - ]); - encode(&outer_value1, &schema, &mut buf).expect(&success(&outer_value1, &schema)); - assert!(!buf.is_empty()); - - buf.drain(..); - let outer_value2 = Value::Record(vec![ - ("a".into(), Value::Union(0, Box::new(Value::Null))), - ("b".into(), inner_value2), - ]); - encode(&outer_value2, &schema, &mut buf).expect(&success(&outer_value1, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3448_proper_multi_level_encoding_outer_namespace() { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type": "record", - "name": "middle_record_name", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "space.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); - let middle_record_variation_1 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - )]); - let middle_record_variation_2 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(1, Box::new(inner_record.clone())), - )]); - let outer_record_variation_1 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_2 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_1)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_3 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_2)), - ), - ("outer_field_2".into(), inner_record), - ]); - - let mut buf = Vec::new(); - encode(&outer_record_variation_1, &schema, &mut buf) - .expect(&success(&outer_record_variation_1, &schema)); - assert!(!buf.is_empty()); - buf.drain(..); - encode(&outer_record_variation_2, &schema, &mut buf) - .expect(&success(&outer_record_variation_2, &schema)); - assert!(!buf.is_empty()); - buf.drain(..); - encode(&outer_record_variation_3, &schema, &mut buf) - .expect(&success(&outer_record_variation_3, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3448_proper_multi_level_encoding_middle_namespace() { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type": "record", - "name": "middle_record_name", - "namespace":"middle_namespace", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "middle_namespace.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); - let middle_record_variation_1 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - )]); - let middle_record_variation_2 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(1, Box::new(inner_record.clone())), - )]); - let outer_record_variation_1 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_2 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_1)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_3 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_2)), - ), - ("outer_field_2".into(), inner_record), - ]); - - let mut buf = Vec::new(); - encode(&outer_record_variation_1, &schema, &mut buf) - .expect(&success(&outer_record_variation_1, &schema)); - assert!(!buf.is_empty()); - buf.drain(..); - encode(&outer_record_variation_2, &schema, &mut buf) - .expect(&success(&outer_record_variation_2, &schema)); - assert!(!buf.is_empty()); - buf.drain(..); - encode(&outer_record_variation_3, &schema, &mut buf) - .expect(&success(&outer_record_variation_3, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3448_proper_multi_level_encoding_inner_namespace() { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type": "record", - "name": "middle_record_name", - "namespace":"middle_namespace", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "namespace":"inner_namespace", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_namespace.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); - let middle_record_variation_1 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - )]); - let middle_record_variation_2 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(1, Box::new(inner_record.clone())), - )]); - let outer_record_variation_1 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_2 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_1)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_3 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_2)), - ), - ("outer_field_2".into(), inner_record), - ]); - - let mut buf = Vec::new(); - encode(&outer_record_variation_1, &schema, &mut buf) - .expect(&success(&outer_record_variation_1, &schema)); - assert!(!buf.is_empty()); - buf.drain(..); - encode(&outer_record_variation_2, &schema, &mut buf) - .expect(&success(&outer_record_variation_2, &schema)); - assert!(!buf.is_empty()); - buf.drain(..); - encode(&outer_record_variation_3, &schema, &mut buf) - .expect(&success(&outer_record_variation_3, &schema)); - assert!(!buf.is_empty()); - } - - #[test] - fn test_avro_3585_encode_uuids() { - let value = Value::String(String::from("00000000-0000-0000-0000-000000000000")); - let schema = Schema::Uuid; - let mut buffer = Vec::new(); - let encoded = encode(&value, &schema, &mut buffer); - assert!(encoded.is_ok()); - assert!(!buffer.is_empty()); - } - - #[test] - fn avro_3926_encode_decode_uuid_to_fixed_wrong_schema_size() -> TestResult { - let schema = Schema::Fixed(FixedSchema { - size: 15, - name: "uuid".into(), - aliases: None, - doc: None, - default: None, - attributes: Default::default(), - }); - let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?); - - let mut buffer = Vec::new(); - match encode(&value, &schema, &mut buffer) { - Err(Error::ConvertFixedToUuid(actual)) => { - assert_eq!(actual, 15); - } - _ => panic!("Expected Error::ConvertFixedToUuid"), - } - - Ok(()) - } -} diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs deleted file mode 100644 index d92daa48e17..00000000000 --- a/lang/rust/avro/src/error.rs +++ /dev/null @@ -1,567 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{ - schema::{Name, Schema, SchemaKind}, - types::{Value, ValueKind}, -}; -use std::{error::Error as _, fmt}; - -#[derive(thiserror::Error)] -pub enum Error { - #[error("Bad Snappy CRC32; expected {expected:x} but got {actual:x}")] - SnappyCrc32 { expected: u32, actual: u32 }, - - #[error("Invalid u8 for bool: {0}")] - BoolValue(u8), - - #[error("Not a fixed value, required for decimal with fixed schema: {0:?}")] - FixedValue(Value), - - #[error("Not a bytes value, required for decimal with bytes schema: {0:?}")] - BytesValue(Value), - - #[error("Not a string value, required for uuid: {0:?}")] - GetUuidFromStringValue(Value), - - #[error("Two schemas with the same fullname were given: {0:?}")] - NameCollision(String), - - #[error("Not a fixed or bytes type, required for decimal schema, got: {0:?}")] - ResolveDecimalSchema(SchemaKind), - - #[error("Invalid utf-8 string")] - ConvertToUtf8(#[source] std::string::FromUtf8Error), - - #[error("Invalid utf-8 string")] - ConvertToUtf8Error(#[source] std::str::Utf8Error), - - /// Describes errors happened while validating Avro data. - #[error("Value does not match schema")] - Validation, - - /// Describes errors happened while validating Avro data. - #[error("Value {value:?} does not match schema {schema:?}: Reason: {reason}")] - ValidationWithReason { - value: Value, - schema: Schema, - reason: String, - }, - - #[error("Unable to allocate {desired} bytes (maximum allowed: {maximum})")] - MemoryAllocation { desired: usize, maximum: usize }, - - /// Describe a specific error happening with decimal representation - #[error("Number of bytes requested for decimal sign extension {requested} is less than the number of bytes needed to decode {needed}")] - SignExtend { requested: usize, needed: usize }, - - #[error("Failed to read boolean bytes: {0}")] - ReadBoolean(#[source] std::io::Error), - - #[error("Failed to read bytes: {0}")] - ReadBytes(#[source] std::io::Error), - - #[error("Failed to read string: {0}")] - ReadString(#[source] std::io::Error), - - #[error("Failed to read double: {0}")] - ReadDouble(#[source] std::io::Error), - - #[error("Failed to read float: {0}")] - ReadFloat(#[source] std::io::Error), - - #[error("Failed to read duration: {0}")] - ReadDuration(#[source] std::io::Error), - - #[error("Failed to read fixed number of bytes '{1}': : {0}")] - ReadFixed(#[source] std::io::Error, usize), - - #[error("Failed to convert &str to UUID: {0}")] - ConvertStrToUuid(#[source] uuid::Error), - - #[error("Failed to convert Fixed bytes to UUID. It must be exactly 16 bytes, got {0}")] - ConvertFixedToUuid(usize), - - #[error("Failed to convert Fixed bytes to UUID: {0}")] - ConvertSliceToUuid(#[source] uuid::Error), - - #[error("Map key is not a string; key type is {0:?}")] - MapKeyType(ValueKind), - - #[error("Union index {index} out of bounds: {num_variants}")] - GetUnionVariant { index: i64, num_variants: usize }, - - #[error("Enum symbol index out of bounds: {num_variants}")] - EnumSymbolIndex { index: usize, num_variants: usize }, - - #[error("Enum symbol not found {0}")] - GetEnumSymbol(String), - - #[error("Unable to decode enum index")] - GetEnumUnknownIndexValue, - - #[error("Scale {scale} is greater than precision {precision}")] - GetScaleAndPrecision { scale: usize, precision: usize }, - - #[error( - "Fixed type number of bytes {size} is not large enough to hold decimal values of precision {precision}" - )] - GetScaleWithFixedSize { size: usize, precision: usize }, - - #[error("Expected Value::Uuid, got: {0:?}")] - GetUuid(Value), - - #[error("Expected Value::BigDecimal, got: {0:?}")] - GetBigDecimal(Value), - - #[error("Fixed bytes of size 12 expected, got Fixed of size {0}")] - GetDecimalFixedBytes(usize), - - #[error("Expected Value::Duration or Value::Fixed(12), got: {0:?}")] - ResolveDuration(Value), - - #[error("Expected Value::Decimal, Value::Bytes or Value::Fixed, got: {0:?}")] - ResolveDecimal(Value), - - #[error("Missing field in record: {0:?}")] - GetField(String), - - #[error("Unable to convert to u8, got {0:?}")] - GetU8(Value), - - #[error("Precision {precision} too small to hold decimal values with {num_bytes} bytes")] - ComparePrecisionAndSize { precision: usize, num_bytes: usize }, - - #[error("Cannot convert length to i32: {1}")] - ConvertLengthToI32(#[source] std::num::TryFromIntError, usize), - - #[error("Expected Value::Date or Value::Int, got: {0:?}")] - GetDate(Value), - - #[error("Expected Value::TimeMillis or Value::Int, got: {0:?}")] - GetTimeMillis(Value), - - #[error("Expected Value::TimeMicros, Value::Long or Value::Int, got: {0:?}")] - GetTimeMicros(Value), - - #[error("Expected Value::TimestampMillis, Value::Long or Value::Int, got: {0:?}")] - GetTimestampMillis(Value), - - #[error("Expected Value::TimestampMicros, Value::Long or Value::Int, got: {0:?}")] - GetTimestampMicros(Value), - - #[error("Expected Value::TimestampNanos, Value::Long or Value::Int, got: {0:?}")] - GetTimestampNanos(Value), - - #[error("Expected Value::LocalTimestampMillis, Value::Long or Value::Int, got: {0:?}")] - GetLocalTimestampMillis(Value), - - #[error("Expected Value::LocalTimestampMicros, Value::Long or Value::Int, got: {0:?}")] - GetLocalTimestampMicros(Value), - - #[error("Expected Value::LocalTimestampNanos, Value::Long or Value::Int, got: {0:?}")] - GetLocalTimestampNanos(Value), - - #[error("Expected Value::Null, got: {0:?}")] - GetNull(Value), - - #[error("Expected Value::Boolean, got: {0:?}")] - GetBoolean(Value), - - #[error("Expected Value::Int, got: {0:?}")] - GetInt(Value), - - #[error("Expected Value::Long or Value::Int, got: {0:?}")] - GetLong(Value), - - #[error(r#"Expected Value::Double, Value::Float, Value::Int, Value::Long or Value::String ("NaN", "INF", "Infinity", "-INF" or "-Infinity"), got: {0:?}"#)] - GetDouble(Value), - - #[error(r#"Expected Value::Float, Value::Double, Value::Int, Value::Long or Value::String ("NaN", "INF", "Infinity", "-INF" or "-Infinity"), got: {0:?}"#)] - GetFloat(Value), - - #[error("Expected Value::Bytes, got: {0:?}")] - GetBytes(Value), - - #[error("Expected Value::String, Value::Bytes or Value::Fixed, got: {0:?}")] - GetString(Value), - - #[error("Expected Value::Enum, got: {0:?}")] - GetEnum(Value), - - #[error("Fixed size mismatch, expected: {size}, got: {n}")] - CompareFixedSizes { size: usize, n: usize }, - - #[error("String expected for fixed, got: {0:?}")] - GetStringForFixed(Value), - - #[error("Enum default {symbol:?} is not among allowed symbols {symbols:?}")] - GetEnumDefault { - symbol: String, - symbols: Vec, - }, - - #[error("Enum value index {index} is out of bounds {nsymbols}")] - GetEnumValue { index: usize, nsymbols: usize }, - - #[error("Key {0} not found in decimal metadata JSON")] - GetDecimalMetadataFromJson(&'static str), - - #[error("Could not find matching type in union")] - FindUnionVariant, - - #[error("Union type should not be empty")] - EmptyUnion, - - #[error("Array({expected:?}) expected, got {other:?}")] - GetArray { expected: SchemaKind, other: Value }, - - #[error("Map({expected:?}) expected, got {other:?}")] - GetMap { expected: SchemaKind, other: Value }, - - #[error("Record with fields {expected:?} expected, got {other:?}")] - GetRecord { - expected: Vec<(String, SchemaKind)>, - other: Value, - }, - - #[error("No `name` field")] - GetNameField, - - #[error("No `name` in record field")] - GetNameFieldFromRecord, - - #[error("Unions may not directly contain a union")] - GetNestedUnion, - - #[error("Unions cannot contain duplicate types")] - GetUnionDuplicate, - - #[error("One union type {0:?} must match the `default`'s value type {1:?}")] - GetDefaultUnion(SchemaKind, ValueKind), - - #[error("`default`'s value type of field {0:?} in {1:?} must be {2:?}")] - GetDefaultRecordField(String, String, String), - - #[error("JSON value {0} claims to be u64 but cannot be converted")] - GetU64FromJson(serde_json::Number), - - #[error("JSON value {0} claims to be i64 but cannot be converted")] - GetI64FromJson(serde_json::Number), - - #[error("Cannot convert u64 to usize: {1}")] - ConvertU64ToUsize(#[source] std::num::TryFromIntError, u64), - - #[error("Cannot convert u32 to usize: {1}")] - ConvertU32ToUsize(#[source] std::num::TryFromIntError, u32), - - #[error("Cannot convert i64 to usize: {1}")] - ConvertI64ToUsize(#[source] std::num::TryFromIntError, i64), - - #[error("Cannot convert i32 to usize: {1}")] - ConvertI32ToUsize(#[source] std::num::TryFromIntError, i32), - - #[error("Invalid JSON value for decimal precision/scale integer: {0}")] - GetPrecisionOrScaleFromJson(serde_json::Number), - - #[error("Failed to parse schema from JSON")] - ParseSchemaJson(#[source] serde_json::Error), - - #[error("Failed to read schema")] - ReadSchemaFromReader(#[source] std::io::Error), - - #[error("Must be a JSON string, object or array")] - ParseSchemaFromValidJson, - - #[error("Unknown primitive type: {0}")] - ParsePrimitive(String), - - #[error("invalid JSON for {key:?}: {value:?}")] - GetDecimalMetadataValueFromJson { - key: String, - value: serde_json::Value, - }, - - #[error("The decimal precision ({precision}) must be bigger or equal to the scale ({scale})")] - DecimalPrecisionLessThanScale { precision: usize, scale: usize }, - - #[error("The decimal precision ({precision}) must be a positive number")] - DecimalPrecisionMuBePositive { precision: usize }, - - #[error("Unreadable big decimal sign")] - BigDecimalSign, - - #[error("Unreadable length for big decimal inner bytes: {0}")] - BigDecimalLen(#[source] Box), - - #[error("Unreadable big decimal scale")] - BigDecimalScale, - - #[error("Unexpected `type` {0} variant for `logicalType`")] - GetLogicalTypeVariant(serde_json::Value), - - #[error("No `type` field found for `logicalType`")] - GetLogicalTypeField, - - #[error("logicalType must be a string, but is {0:?}")] - GetLogicalTypeFieldType(serde_json::Value), - - #[error("Unknown complex type: {0}")] - GetComplexType(serde_json::Value), - - #[error("No `type` in complex type")] - GetComplexTypeField, - - #[error("No `fields` in record")] - GetRecordFieldsJson, - - #[error("No `symbols` field in enum")] - GetEnumSymbolsField, - - #[error("Unable to parse `symbols` in enum")] - GetEnumSymbols, - - #[error("Invalid enum symbol name {0}")] - EnumSymbolName(String), - - #[error("Invalid field name {0}")] - FieldName(String), - - #[error("Duplicate field name {0}")] - FieldNameDuplicate(String), - - #[error("Invalid schema name {0}. It must match the regex '{1}'")] - InvalidSchemaName(String, &'static str), - - #[error("Invalid namespace {0}. It must match the regex '{1}'")] - InvalidNamespace(String, &'static str), - - #[error("Duplicate enum symbol {0}")] - EnumSymbolDuplicate(String), - - #[error("Default value for enum must be a string! Got: {0}")] - EnumDefaultWrongType(serde_json::Value), - - #[error("No `items` in array")] - GetArrayItemsField, - - #[error("No `values` in map")] - GetMapValuesField, - - #[error("Fixed schema `size` value must be a positive integer: {0}")] - GetFixedSizeFieldPositive(serde_json::Value), - - #[error("Fixed schema has no `size`")] - GetFixedSizeField, - - #[error("Fixed schema's default value length ({0}) does not match its size ({1})")] - FixedDefaultLenSizeMismatch(usize, u64), - - #[error("Failed to compress with flate: {0}")] - DeflateCompress(#[source] std::io::Error), - - #[error("Failed to finish flate compressor: {0}")] - DeflateCompressFinish(#[source] std::io::Error), - - #[error("Failed to decompress with flate: {0}")] - DeflateDecompress(#[source] std::io::Error), - - #[cfg(feature = "snappy")] - #[error("Failed to compress with snappy: {0}")] - SnappyCompress(#[source] snap::Error), - - #[cfg(feature = "snappy")] - #[error("Failed to get snappy decompression length: {0}")] - GetSnappyDecompressLen(#[source] snap::Error), - - #[cfg(feature = "snappy")] - #[error("Failed to decompress with snappy: {0}")] - SnappyDecompress(#[source] snap::Error), - - #[error("Failed to compress with zstd: {0}")] - ZstdCompress(#[source] std::io::Error), - - #[error("Failed to decompress with zstd: {0}")] - ZstdDecompress(#[source] std::io::Error), - - #[error("Failed to read header: {0}")] - ReadHeader(#[source] std::io::Error), - - #[error("wrong magic in header")] - HeaderMagic, - - #[error("Message Header mismatch. Expected: {0:?}. Actual: {1:?}")] - SingleObjectHeaderMismatch([u8; 10], [u8; 10]), - - #[error("Failed to get JSON from avro.schema key in map")] - GetAvroSchemaFromMap, - - #[error("no metadata in header")] - GetHeaderMetadata, - - #[error("Failed to read marker bytes: {0}")] - ReadMarker(#[source] std::io::Error), - - #[error("Failed to read block marker bytes: {0}")] - ReadBlockMarker(#[source] std::io::Error), - - #[error("Read into buffer failed: {0}")] - ReadIntoBuf(#[source] std::io::Error), - - #[error("block marker does not match header marker")] - GetBlockMarker, - - #[error("Overflow when decoding integer value")] - IntegerOverflow, - - #[error("Failed to read bytes for decoding variable length integer: {0}")] - ReadVariableIntegerBytes(#[source] std::io::Error), - - #[error("Decoded integer out of range for i32: {1}: {0}")] - ZagI32(#[source] std::num::TryFromIntError, i64), - - #[error("unable to read block")] - ReadBlock, - - #[error("Failed to serialize value into Avro value: {0}")] - SerializeValue(String), - - #[error("Failed to deserialize Avro value into value: {0}")] - DeserializeValue(String), - - #[error("Failed to write buffer bytes during flush: {0}")] - WriteBytes(#[source] std::io::Error), - - #[error("Failed to write marker: {0}")] - WriteMarker(#[source] std::io::Error), - - #[error("Failed to convert JSON to string: {0}")] - ConvertJsonToString(#[source] serde_json::Error), - - /// Error while converting float to json value - #[error("failed to convert avro float to json: {0}")] - ConvertF64ToJson(f64), - - /// Error while resolving Schema::Ref - #[error("Unresolved schema reference: {0}")] - SchemaResolutionError(Name), - - #[error("The file metadata is already flushed.")] - FileHeaderAlreadyWritten, - - #[error("Metadata keys starting with 'avro.' are reserved for internal usage: {0}.")] - InvalidMetadataKey(String), - - /// Error when two named schema have the same fully qualified name - #[error("Two named schema defined for same fullname: {0}.")] - AmbiguousSchemaDefinition(Name), - - #[error("Signed decimal bytes length {0} not equal to fixed schema size {1}.")] - EncodeDecimalAsFixedError(usize, usize), - - #[error("There is no entry for '{0}' in the lookup table: {1}.")] - NoEntryInLookupTable(String, String), - - #[error("Can only encode value type {value_kind:?} as one of {supported_schema:?}")] - EncodeValueAsSchemaError { - value_kind: ValueKind, - supported_schema: Vec, - }, - #[error( - "Internal buffer not drained properly. Re-initialize the single object writer struct!" - )] - IllegalSingleObjectWriterState, - - #[error("Codec '{0}' is not supported/enabled")] - CodecNotSupported(String), - - #[error("Invalid Avro data! Cannot read codec type from value that is not Value::Bytes.")] - BadCodecMetadata, -} - -#[derive(thiserror::Error, PartialEq)] -pub enum CompatibilityError { - #[error("Incompatible schema types! Writer schema is '{writer_schema_type}', but reader schema is '{reader_schema_type}'")] - WrongType { - writer_schema_type: String, - reader_schema_type: String, - }, - - #[error("Incompatible schema types! The {schema_type} should have been {expected_type:?}")] - TypeExpected { - schema_type: String, - expected_type: Vec, - }, - - #[error("Incompatible schemata! Field '{0}' in reader schema does not match the type in the writer schema")] - FieldTypeMismatch(String, #[source] Box), - - #[error("Incompatible schemata! Field '{0}' in reader schema must have a default value")] - MissingDefaultValue(String), - - #[error("Incompatible schemata! Reader's symbols must contain all writer's symbols")] - MissingSymbols, - - #[error("Incompatible schemata! All elements in union must match for both schemas")] - MissingUnionElements, - - #[error("Incompatible schemata! Name and size don't match for fixed")] - FixedMismatch, - - #[error("Incompatible schemata! The name must be the same for both schemas. Writer's name {writer_name} and reader's name {reader_name}")] - NameMismatch { - writer_name: String, - reader_name: String, - }, - - #[error( - "Incompatible schemata! Unknown type for '{0}'. Make sure that the type is a valid one" - )] - Inconclusive(String), -} - -impl serde::ser::Error for Error { - fn custom(msg: T) -> Self { - Error::SerializeValue(msg.to_string()) - } -} - -impl serde::de::Error for Error { - fn custom(msg: T) -> Self { - Error::DeserializeValue(msg.to_string()) - } -} - -impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut msg = self.to_string(); - if let Some(e) = self.source() { - msg.extend([": ", &e.to_string()]); - } - write!(f, "{}", msg) - } -} - -impl fmt::Debug for CompatibilityError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut msg = self.to_string(); - if let Some(e) = self.source() { - msg.extend([": ", &e.to_string()]); - } - write!(f, "{}", msg) - } -} diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs deleted file mode 100644 index 7f7f57f3bfd..00000000000 --- a/lang/rust/avro/src/lib.rs +++ /dev/null @@ -1,1068 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! A library for working with [Apache Avro](https://avro.apache.org/) in Rust. -//! -//! Please check our [documentation](https://docs.rs/apache-avro) for examples, tutorials and API reference. -//! -//! **[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich -//! data structures and a compact, fast, binary data format. -//! -//! All data in Avro is schematized, as in the following example: -//! -//! ```json -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! {"name": "a", "type": "long", "default": 42}, -//! {"name": "b", "type": "string"} -//! ] -//! } -//! ``` -//! -//! There are basically two ways of handling Avro data in Rust: -//! -//! * **as Avro-specialized data types** based on an Avro schema; -//! * **as generic Rust serde-compatible types** implementing/deriving `Serialize` and `Deserialize`; -//! -//! **apache-avro** provides a way to read and write both these data representations easily and -//! efficiently. -//! -//! # Installing the library -//! -//! -//! Add to your `Cargo.toml`: -//! -//! ```toml -//! [dependencies] -//! apache-avro = "x.y" -//! ``` -//! -//! Or in case you want to leverage the **Snappy** codec: -//! -//! ```toml -//! [dependencies.apache-avro] -//! version = "x.y" -//! features = ["snappy"] -//! ``` -//! -//! Or in case you want to leverage the **Zstandard** codec: -//! -//! ```toml -//! [dependencies.apache-avro] -//! version = "x.y" -//! features = ["zstandard"] -//! ``` -//! -//! Or in case you want to leverage the **Bzip2** codec: -//! -//! ```toml -//! [dependencies.apache-avro] -//! version = "x.y" -//! features = ["bzip"] -//! ``` -//! -//! Or in case you want to leverage the **Xz** codec: -//! -//! ```toml -//! [dependencies.apache-avro] -//! version = "x.y" -//! features = ["xz"] -//! ``` -//! -//! # Upgrading to a newer minor version -//! -//! The library is still in beta, so there might be backward-incompatible changes between minor -//! versions. If you have troubles upgrading, check the [version upgrade guide](https://github.com/apache/avro/blob/main/lang/rust/migration_guide.md). -//! -//! # Defining a schema -//! -//! An Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and -//! **can** be used while reading and they carry the information regarding the type of data we are -//! handling. Avro schemas are used for both schema validation and resolution of Avro data. -//! -//! Avro schemas are defined in **JSON** format and can just be parsed out of a raw string: -//! -//! ``` -//! use apache_avro::Schema; -//! -//! let raw_schema = r#" -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! {"name": "a", "type": "long", "default": 42}, -//! {"name": "b", "type": "string"} -//! ] -//! } -//! "#; -//! -//! // if the schema is not valid, this function will return an error -//! let schema = Schema::parse_str(raw_schema).unwrap(); -//! -//! // schemas can be printed for debugging -//! println!("{:?}", schema); -//! ``` -//! -//! Additionally, a list of of definitions (which may depend on each other) can be given and all of -//! them will be parsed into the corresponding schemas. -//! -//! ``` -//! use apache_avro::Schema; -//! -//! let raw_schema_1 = r#"{ -//! "name": "A", -//! "type": "record", -//! "fields": [ -//! {"name": "field_one", "type": "float"} -//! ] -//! }"#; -//! -//! // This definition depends on the definition of A above -//! let raw_schema_2 = r#"{ -//! "name": "B", -//! "type": "record", -//! "fields": [ -//! {"name": "field_one", "type": "A"} -//! ] -//! }"#; -//! -//! // if the schemas are not valid, this function will return an error -//! let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap(); -//! -//! // schemas can be printed for debugging -//! println!("{:?}", schemas); -//! ``` -//! *N.B.* It is important to note that the composition of schema definitions requires schemas with names. -//! For this reason, only schemas of type Record, Enum, and Fixed should be input into this function. -//! -//! The library provides also a programmatic interface to define schemas without encoding them in -//! JSON (for advanced use), but we highly recommend the JSON interface. Please read the API -//! reference in case you are interested. -//! -//! For more information about schemas and what kind of information you can encapsulate in them, -//! please refer to the appropriate section of the -//! [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration). -//! -//! # Writing data -//! -//! Once we have defined a schema, we are ready to serialize data in Avro, validating them against -//! the provided schema in the process. As mentioned before, there are two ways of handling Avro -//! data in Rust. -//! -//! **NOTE:** The library also provides a low-level interface for encoding a single datum in Avro -//! bytecode without generating markers and headers (for advanced use), but we highly recommend the -//! `Writer` interface to be totally Avro-compatible. Please read the API reference in case you are -//! interested. -//! -//! ## The avro way -//! -//! Given that the schema we defined above is that of an Avro *Record*, we are going to use the -//! associated type provided by the library to specify the data we want to serialize: -//! -//! ``` -//! # use apache_avro::Schema; -//! use apache_avro::types::Record; -//! use apache_avro::Writer; -//! # -//! # let raw_schema = r#" -//! # { -//! # "type": "record", -//! # "name": "test", -//! # "fields": [ -//! # {"name": "a", "type": "long", "default": 42}, -//! # {"name": "b", "type": "string"} -//! # ] -//! # } -//! # "#; -//! # let schema = Schema::parse_str(raw_schema).unwrap(); -//! // a writer needs a schema and something to write to -//! let mut writer = Writer::new(&schema, Vec::new()); -//! -//! // the Record type models our Record schema -//! let mut record = Record::new(writer.schema()).unwrap(); -//! record.put("a", 27i64); -//! record.put("b", "foo"); -//! -//! // schema validation happens here -//! writer.append(record).unwrap(); -//! -//! // this is how to get back the resulting avro bytecode -//! // this performs a flush operation to make sure data has been written, so it can fail -//! // you can also call `writer.flush()` yourself without consuming the writer -//! let encoded = writer.into_inner().unwrap(); -//! ``` -//! -//! The vast majority of the times, schemas tend to define a record as a top-level container -//! encapsulating all the values to convert as fields and providing documentation for them, but in -//! case we want to directly define an Avro value, the library offers that capability via the -//! `Value` interface. -//! -//! ``` -//! use apache_avro::types::Value; -//! -//! let mut value = Value::String("foo".to_string()); -//! ``` -//! -//! ## The serde way -//! -//! Given that the schema we defined above is an Avro *Record*, we can directly use a Rust struct -//! deriving `Serialize` to model our data: -//! -//! ``` -//! # use apache_avro::Schema; -//! # use serde::Serialize; -//! use apache_avro::Writer; -//! -//! #[derive(Debug, Serialize)] -//! struct Test { -//! a: i64, -//! b: String, -//! } -//! -//! # let raw_schema = r#" -//! # { -//! # "type": "record", -//! # "name": "test", -//! # "fields": [ -//! # {"name": "a", "type": "long", "default": 42}, -//! # {"name": "b", "type": "string"} -//! # ] -//! # } -//! # "#; -//! # let schema = Schema::parse_str(raw_schema).unwrap(); -//! // a writer needs a schema and something to write to -//! let mut writer = Writer::new(&schema, Vec::new()); -//! -//! // the structure models our Record schema -//! let test = Test { -//! a: 27, -//! b: "foo".to_owned(), -//! }; -//! -//! // schema validation happens here -//! writer.append_ser(test).unwrap(); -//! -//! // this is how to get back the resulting avro bytecode -//! // this performs a flush operation to make sure data is written, so it can fail -//! // you can also call `writer.flush()` yourself without consuming the writer -//! let encoded = writer.into_inner(); -//! ``` -//! -//! The vast majority of the times, schemas tend to define a record as a top-level container -//! encapsulating all the values to convert as fields and providing documentation for them, but in -//! case we want to directly define an Avro value, any type implementing `Serialize` should work. -//! -//! ``` -//! let mut value = "foo".to_string(); -//! ``` -//! -//! ## Using codecs to compress data -//! -//! Avro supports three different compression codecs when encoding data: -//! -//! * **Null**: leaves data uncompressed; -//! * **Deflate**: writes the data block using the deflate algorithm as specified in RFC 1951, and -//! typically implemented using the zlib library. Note that this format (unlike the "zlib format" in -//! RFC 1950) does not have a checksum. -//! * **Snappy**: uses Google's [Snappy](http://google.github.io/snappy/) compression library. Each -//! compressed block is followed by the 4-byte, big-endianCRC32 checksum of the uncompressed data in -//! the block. You must enable the `snappy` feature to use this codec. -//! * **Zstandard**: uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. -//! You must enable the `zstandard` feature to use this codec. -//! * **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library. -//! You must enable the `bzip` feature to use this codec. -//! * **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library. -//! You must enable the `xz` feature to use this codec. -//! -//! To specify a codec to use to compress data, just specify it while creating a `Writer`: -//! ``` -//! # use apache_avro::Schema; -//! use apache_avro::Writer; -//! use apache_avro::Codec; -//! # -//! # let raw_schema = r#" -//! # { -//! # "type": "record", -//! # "name": "test", -//! # "fields": [ -//! # {"name": "a", "type": "long", "default": 42}, -//! # {"name": "b", "type": "string"} -//! # ] -//! # } -//! # "#; -//! # let schema = Schema::parse_str(raw_schema).unwrap(); -//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); -//! ``` -//! -//! # Reading data -//! -//! As far as reading Avro encoded data goes, we can just use the schema encoded with the data to -//! read them. The library will do it automatically for us, as it already does for the compression -//! codec: -//! -//! ``` -//! use apache_avro::Reader; -//! # use apache_avro::Schema; -//! # use apache_avro::types::Record; -//! # use apache_avro::Writer; -//! # -//! # let raw_schema = r#" -//! # { -//! # "type": "record", -//! # "name": "test", -//! # "fields": [ -//! # {"name": "a", "type": "long", "default": 42}, -//! # {"name": "b", "type": "string"} -//! # ] -//! # } -//! # "#; -//! # let schema = Schema::parse_str(raw_schema).unwrap(); -//! # let mut writer = Writer::new(&schema, Vec::new()); -//! # let mut record = Record::new(writer.schema()).unwrap(); -//! # record.put("a", 27i64); -//! # record.put("b", "foo"); -//! # writer.append(record).unwrap(); -//! # let input = writer.into_inner().unwrap(); -//! // reader creation can fail in case the input to read from is not Avro-compatible or malformed -//! let reader = Reader::new(&input[..]).unwrap(); -//! ``` -//! -//! In case, instead, we want to specify a different (but compatible) reader schema from the schema -//! the data has been written with, we can just do as the following: -//! ``` -//! use apache_avro::Schema; -//! use apache_avro::Reader; -//! # use apache_avro::types::Record; -//! # use apache_avro::Writer; -//! # -//! # let writer_raw_schema = r#" -//! # { -//! # "type": "record", -//! # "name": "test", -//! # "fields": [ -//! # {"name": "a", "type": "long", "default": 42}, -//! # {"name": "b", "type": "string"} -//! # ] -//! # } -//! # "#; -//! # let writer_schema = Schema::parse_str(writer_raw_schema).unwrap(); -//! # let mut writer = Writer::new(&writer_schema, Vec::new()); -//! # let mut record = Record::new(writer.schema()).unwrap(); -//! # record.put("a", 27i64); -//! # record.put("b", "foo"); -//! # writer.append(record).unwrap(); -//! # let input = writer.into_inner().unwrap(); -//! -//! let reader_raw_schema = r#" -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! {"name": "a", "type": "long", "default": 42}, -//! {"name": "b", "type": "string"}, -//! {"name": "c", "type": "long", "default": 43} -//! ] -//! } -//! "#; -//! -//! let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); -//! -//! // reader creation can fail in case the input to read from is not Avro-compatible or malformed -//! let reader = Reader::with_schema(&reader_schema, &input[..]).unwrap(); -//! ``` -//! -//! The library will also automatically perform schema resolution while reading the data. -//! -//! For more information about schema compatibility and resolution, please refer to the -//! [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration). -//! -//! As usual, there are two ways to handle Avro data in Rust, as you can see below. -//! -//! **NOTE:** The library also provides a low-level interface for decoding a single datum in Avro -//! bytecode without markers and header (for advanced use), but we highly recommend the `Reader` -//! interface to leverage all Avro features. Please read the API reference in case you are -//! interested. -//! -//! -//! ## The avro way -//! -//! We can just read directly instances of `Value` out of the `Reader` iterator: -//! -//! ``` -//! # use apache_avro::Schema; -//! # use apache_avro::types::Record; -//! # use apache_avro::Writer; -//! use apache_avro::Reader; -//! # -//! # let raw_schema = r#" -//! # { -//! # "type": "record", -//! # "name": "test", -//! # "fields": [ -//! # {"name": "a", "type": "long", "default": 42}, -//! # {"name": "b", "type": "string"} -//! # ] -//! # } -//! # "#; -//! # let schema = Schema::parse_str(raw_schema).unwrap(); -//! # let schema = Schema::parse_str(raw_schema).unwrap(); -//! # let mut writer = Writer::new(&schema, Vec::new()); -//! # let mut record = Record::new(writer.schema()).unwrap(); -//! # record.put("a", 27i64); -//! # record.put("b", "foo"); -//! # writer.append(record).unwrap(); -//! # let input = writer.into_inner().unwrap(); -//! let reader = Reader::new(&input[..]).unwrap(); -//! -//! // value is a Result of an Avro Value in case the read operation fails -//! for value in reader { -//! println!("{:?}", value.unwrap()); -//! } -//! -//! ``` -//! -//! ## The serde way -//! -//! Alternatively, we can use a Rust type implementing `Deserialize` and representing our schema to -//! read the data into: -//! -//! ``` -//! # use apache_avro::Schema; -//! # use apache_avro::Writer; -//! # use serde::{Deserialize, Serialize}; -//! use apache_avro::Reader; -//! use apache_avro::from_value; -//! -//! # #[derive(Serialize)] -//! #[derive(Debug, Deserialize)] -//! struct Test { -//! a: i64, -//! b: String, -//! } -//! -//! # let raw_schema = r#" -//! # { -//! # "type": "record", -//! # "name": "test", -//! # "fields": [ -//! # {"name": "a", "type": "long", "default": 42}, -//! # {"name": "b", "type": "string"} -//! # ] -//! # } -//! # "#; -//! # let schema = Schema::parse_str(raw_schema).unwrap(); -//! # let mut writer = Writer::new(&schema, Vec::new()); -//! # let test = Test { -//! # a: 27, -//! # b: "foo".to_owned(), -//! # }; -//! # writer.append_ser(test).unwrap(); -//! # let input = writer.into_inner().unwrap(); -//! let reader = Reader::new(&input[..]).unwrap(); -//! -//! // value is a Result in case the read operation fails -//! for value in reader { -//! println!("{:?}", from_value::(&value.unwrap())); -//! } -//! ``` -//! -//! # Putting everything together -//! -//! The following is an example of how to combine everything showed so far and it is meant to be a -//! quick reference of the library interface: -//! -//! ``` -//! use apache_avro::{Codec, Reader, Schema, Writer, from_value, types::Record, Error}; -//! use serde::{Deserialize, Serialize}; -//! -//! #[derive(Debug, Deserialize, Serialize)] -//! struct Test { -//! a: i64, -//! b: String, -//! } -//! -//! fn main() -> Result<(), Error> { -//! let raw_schema = r#" -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! {"name": "a", "type": "long", "default": 42}, -//! {"name": "b", "type": "string"} -//! ] -//! } -//! "#; -//! -//! let schema = Schema::parse_str(raw_schema)?; -//! -//! println!("{:?}", schema); -//! -//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); -//! -//! let mut record = Record::new(writer.schema()).unwrap(); -//! record.put("a", 27i64); -//! record.put("b", "foo"); -//! -//! writer.append(record)?; -//! -//! let test = Test { -//! a: 27, -//! b: "foo".to_owned(), -//! }; -//! -//! writer.append_ser(test)?; -//! -//! let input = writer.into_inner()?; -//! let reader = Reader::with_schema(&schema, &input[..])?; -//! -//! for record in reader { -//! println!("{:?}", from_value::(&record?)); -//! } -//! Ok(()) -//! } -//! ``` -//! -//! `apache-avro` also supports the logical types listed in the [Avro specification](https://avro.apache.org/docs/current/specification/#logical-types): -//! -//! 1. `Decimal` using the [`num_bigint`](https://docs.rs/num-bigint/latest/num_bigint) crate -//! 1. UUID using the [`uuid`](https://docs.rs/uuid/latest/uuid) crate -//! 1. Date, Time (milli) as `i32` and Time (micro) as `i64` -//! 1. Timestamp (milli and micro) as `i64` -//! 1. Local timestamp (milli and micro) as `i64` -//! 1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32` -//! -//! Note that the on-disk representation is identical to the underlying primitive/complex type. -//! -//! ### Read and write logical types -//! -//! ```rust -//! use apache_avro::{ -//! types::Record, types::Value, Codec, Days, Decimal, Duration, Millis, Months, Reader, Schema, -//! Writer, Error, -//! }; -//! use num_bigint::ToBigInt; -//! -//! fn main() -> Result<(), Error> { -//! let raw_schema = r#" -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! { -//! "name": "decimal_fixed", -//! "type": { -//! "type": "fixed", -//! "size": 2, -//! "name": "decimal" -//! }, -//! "logicalType": "decimal", -//! "precision": 4, -//! "scale": 2 -//! }, -//! { -//! "name": "decimal_var", -//! "type": "bytes", -//! "logicalType": "decimal", -//! "precision": 10, -//! "scale": 3 -//! }, -//! { -//! "name": "uuid", -//! "type": "string", -//! "logicalType": "uuid" -//! }, -//! { -//! "name": "date", -//! "type": "int", -//! "logicalType": "date" -//! }, -//! { -//! "name": "time_millis", -//! "type": "int", -//! "logicalType": "time-millis" -//! }, -//! { -//! "name": "time_micros", -//! "type": "long", -//! "logicalType": "time-micros" -//! }, -//! { -//! "name": "timestamp_millis", -//! "type": "long", -//! "logicalType": "timestamp-millis" -//! }, -//! { -//! "name": "timestamp_micros", -//! "type": "long", -//! "logicalType": "timestamp-micros" -//! }, -//! { -//! "name": "local_timestamp_millis", -//! "type": "long", -//! "logicalType": "local-timestamp-millis" -//! }, -//! { -//! "name": "local_timestamp_micros", -//! "type": "long", -//! "logicalType": "local-timestamp-micros" -//! }, -//! { -//! "name": "duration", -//! "type": { -//! "type": "fixed", -//! "size": 12, -//! "name": "duration" -//! }, -//! "logicalType": "duration" -//! } -//! ] -//! } -//! "#; -//! -//! let schema = Schema::parse_str(raw_schema)?; -//! -//! println!("{:?}", schema); -//! -//! let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); -//! -//! let mut record = Record::new(writer.schema()).unwrap(); -//! record.put("decimal_fixed", Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be())); -//! record.put("decimal_var", Decimal::from(((-32442).to_bigint().unwrap()).to_signed_bytes_be())); -//! record.put("uuid", uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap()); -//! record.put("date", Value::Date(1)); -//! record.put("time_millis", Value::TimeMillis(2)); -//! record.put("time_micros", Value::TimeMicros(3)); -//! record.put("timestamp_millis", Value::TimestampMillis(4)); -//! record.put("timestamp_micros", Value::TimestampMicros(5)); -//! record.put("timestamp_nanos", Value::TimestampNanos(6)); -//! record.put("local_timestamp_millis", Value::LocalTimestampMillis(4)); -//! record.put("local_timestamp_micros", Value::LocalTimestampMicros(5)); -//! record.put("local_timestamp_nanos", Value::LocalTimestampMicros(6)); -//! record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8))); -//! -//! writer.append(record)?; -//! -//! let input = writer.into_inner()?; -//! let reader = Reader::with_schema(&schema, &input[..])?; -//! -//! for record in reader { -//! println!("{:?}", record?); -//! } -//! Ok(()) -//! } -//! ``` -//! -//! ## Calculate Avro schema fingerprint -//! -//! This library supports calculating the following fingerprints: -//! -//! - SHA-256 -//! - MD5 -//! - Rabin -//! -//! An example of fingerprinting for the supported fingerprints: -//! -//! ```rust -//! use apache_avro::rabin::Rabin; -//! use apache_avro::{Schema, Error}; -//! use md5::Md5; -//! use sha2::Sha256; -//! -//! fn main() -> Result<(), Error> { -//! let raw_schema = r#" -//! { -//! "type": "record", -//! "name": "test", -//! "fields": [ -//! {"name": "a", "type": "long", "default": 42}, -//! {"name": "b", "type": "string"} -//! ] -//! } -//! "#; -//! let schema = Schema::parse_str(raw_schema)?; -//! println!("{}", schema.fingerprint::()); -//! println!("{}", schema.fingerprint::()); -//! println!("{}", schema.fingerprint::()); -//! Ok(()) -//! } -//! ``` -//! -//! ## Ill-formed data -//! -//! In order to ease decoding, the Binary Encoding specification of Avro data -//! requires some fields to have their length encoded alongside the data. -//! -//! If encoded data passed to a `Reader` has been ill-formed, it can happen that -//! the bytes meant to contain the length of data are bogus and could result -//! in extravagant memory allocation. -//! -//! To shield users from ill-formed data, `apache-avro` sets a limit (default: 512MB) -//! to any allocation it will perform when decoding data. -//! -//! If you expect some of your data fields to be larger than this limit, be sure -//! to make use of the `max_allocation_bytes` function before reading **any** data -//! (we leverage Rust's [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html) -//! mechanism to initialize this value, if -//! any call to decode is made before a call to `max_allocation_bytes`, the limit -//! will be 512MB throughout the lifetime of the program). -//! -//! -//! ```rust -//! use apache_avro::max_allocation_bytes; -//! -//! max_allocation_bytes(2 * 1024 * 1024 * 1024); // 2GB -//! -//! // ... happily decode large data -//! -//! ``` -//! -//! ## Check schemas compatibility -//! -//! This library supports checking for schemas compatibility. -//! -//! Examples of checking for compatibility: -//! -//! 1. Compatible schemas -//! -//! Explanation: an int array schema can be read by a long array schema- an int -//! (32bit signed integer) fits into a long (64bit signed integer) -//! -//! ```rust -//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; -//! -//! let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); -//! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap(); -//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_ok()); -//! ``` -//! -//! 2. Incompatible schemas (a long array schema cannot be read by an int array schema) -//! -//! Explanation: a long array schema cannot be read by an int array schema- a -//! long (64bit signed integer) does not fit into an int (32bit signed integer) -//! -//! ```rust -//! use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; -//! -//! let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap(); -//! let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); -//! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); -//! ``` -//! ## Custom names validators -//! -//! By default the library follows the rules by the -//! [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names)! -//! -//! Some of the other Apache Avro language SDKs are not that strict and allow more -//! characters in names. For interoperability with those SDKs, the library provides -//! a way to customize the names validation. -//! -//! ```rust -//! use apache_avro::AvroResult; -//! use apache_avro::schema::Namespace; -//! use apache_avro::validator::{SchemaNameValidator, set_schema_name_validator}; -//! -//! struct MyCustomValidator; -//! -//! impl SchemaNameValidator for MyCustomValidator { -//! fn validate(&self, name: &str) -> AvroResult<(String, Namespace)> { -//! todo!() -//! } -//! } -//! -//! // don't parse any schema before registering the custom validator(s) ! -//! -//! set_schema_name_validator(Box::new(MyCustomValidator)); -//! -//! // ... use the library -//! ``` -//! -//! Similar logic could be applied to the schema namespace, enum symbols and field names validation. -//! -//! **Note**: the library allows to set a validator only once per the application lifetime! -//! If the application parses schemas before setting a validator, the default validator will be -//! registered and used! -//! -//! ## Custom schema equality comparators -//! -//! The library provides two implementations of schema equality comparators: -//! 1. `SpecificationEq` - a comparator that serializes the schemas to their -//! canonical forms (i.e. JSON) and compares them as strings. It is the only implementation -//! until apache_avro 0.16.0. -//! See the [Avro specification](https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas) -//! for more information! -//! 2. `StructFieldEq` - a comparator that compares the schemas structurally. -//! It is faster than the `SpecificationEq` because it returns `false` as soon as a difference -//! is found and is recommended for use! -//! It is the default comparator since apache_avro 0.17.0. -//! -//! To use a custom comparator, you need to implement the `SchemataEq` trait and set it using the -//! `set_schemata_equality_comparator` function: -//! -//! ```rust -//! use apache_avro::{AvroResult, Schema}; -//! use apache_avro::schema::Namespace; -//! use apache_avro::schema_equality::{SchemataEq, set_schemata_equality_comparator}; -//! -//! #[derive(Debug)] -//! struct MyCustomSchemataEq; -//! -//! impl SchemataEq for MyCustomSchemataEq { -//! fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool { -//! todo!() -//! } -//! } -//! -//! // don't parse any schema before registering the custom comparator ! -//! -//! set_schemata_equality_comparator(Box::new(MyCustomSchemataEq)); -//! -//! // ... use the library -//! ``` -//! **Note**: the library allows to set a comparator only once per the application lifetime! -//! If the application parses schemas before setting a comparator, the default comparator will be -//! registered and used! -//! - -mod bigdecimal; -mod bytes; -mod codec; -mod de; -mod decimal; -mod decode; -mod duration; -mod encode; -mod error; -mod reader; -mod ser; -mod util; -mod writer; - -pub mod rabin; -pub mod schema; -pub mod schema_compatibility; -pub mod schema_equality; -pub mod types; -pub mod validator; - -pub use crate::{ - bigdecimal::BigDecimal, - bytes::{ - serde_avro_bytes, serde_avro_bytes_opt, serde_avro_fixed, serde_avro_fixed_opt, - serde_avro_slice, serde_avro_slice_opt, - }, -}; -#[cfg(feature = "bzip")] -pub use codec::bzip::Bzip2Settings; -#[cfg(feature = "xz")] -pub use codec::xz::XzSettings; -#[cfg(feature = "zstandard")] -pub use codec::zstandard::ZstandardSettings; -pub use codec::Codec; -pub use de::from_value; -pub use decimal::Decimal; -pub use duration::{Days, Duration, Millis, Months}; -pub use error::Error; -pub use reader::{ - from_avro_datum, from_avro_datum_schemata, read_marker, GenericSingleObjectReader, Reader, - SpecificSingleObjectReader, -}; -pub use schema::{AvroSchema, Schema}; -pub use ser::to_value; -pub use util::{max_allocation_bytes, set_serde_human_readable}; -pub use uuid::Uuid; -pub use writer::{ - to_avro_datum, to_avro_datum_schemata, GenericSingleObjectWriter, SpecificSingleObjectWriter, - Writer, -}; - -#[cfg(feature = "derive")] -pub use apache_avro_derive::*; - -#[macro_use] -extern crate log; - -/// A convenience type alias for `Result`s with `Error`s. -pub type AvroResult = Result; - -#[cfg(test)] -mod tests { - use crate::{ - from_avro_datum, - types::{Record, Value}, - Codec, Reader, Schema, Writer, - }; - use pretty_assertions::assert_eq; - - //TODO: move where it fits better - #[test] - fn test_enum_default() { - let writer_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } - "#; - let reader_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "clubs", "hearts"] - }, - "default": "spades" - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_raw_schema).unwrap(); - let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); - let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - writer.append(record).unwrap(); - let input = writer.into_inner().unwrap(); - let mut reader = Reader::with_schema(&reader_schema, &input[..]).unwrap(); - assert_eq!( - reader.next().unwrap().unwrap(), - Value::Record(vec![ - ("a".to_string(), Value::Long(27)), - ("b".to_string(), Value::String("foo".to_string())), - ("c".to_string(), Value::Enum(1, "spades".to_string())), - ]) - ); - assert!(reader.next().is_none()); - } - - //TODO: move where it fits better - #[test] - fn test_enum_string_value() { - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "clubs", "hearts"] - }, - "default": "spades" - } - ] - } - "#; - let schema = Schema::parse_str(raw_schema).unwrap(); - let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null); - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - record.put("c", "clubs"); - writer.append(record).unwrap(); - let input = writer.into_inner().unwrap(); - let mut reader = Reader::with_schema(&schema, &input[..]).unwrap(); - assert_eq!( - reader.next().unwrap().unwrap(), - Value::Record(vec![ - ("a".to_string(), Value::Long(27)), - ("b".to_string(), Value::String("foo".to_string())), - ("c".to_string(), Value::Enum(2, "clubs".to_string())), - ]) - ); - assert!(reader.next().is_none()); - } - - //TODO: move where it fits better - #[test] - fn test_enum_no_reader_schema() { - let writer_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "clubs", "hearts"] - }, - "default": "spades" - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_raw_schema).unwrap(); - let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - record.put("c", "clubs"); - writer.append(record).unwrap(); - let input = writer.into_inner().unwrap(); - let mut reader = Reader::new(&input[..]).unwrap(); - assert_eq!( - reader.next().unwrap().unwrap(), - Value::Record(vec![ - ("a".to_string(), Value::Long(27)), - ("b".to_string(), Value::String("foo".to_string())), - ("c".to_string(), Value::Enum(2, "clubs".to_string())), - ]) - ); - } - - #[test] - fn test_illformed_length() { - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } - "#; - - let schema = Schema::parse_str(raw_schema).unwrap(); - - // Would allocated 18446744073709551605 bytes - let illformed: &[u8] = &[0x3e, 0x15, 0xff, 0x1f, 0x15, 0xff]; - - let value = from_avro_datum(&schema, &mut &*illformed, None); - assert!(value.is_err()); - } -} diff --git a/lang/rust/avro/src/rabin.rs b/lang/rust/avro/src/rabin.rs deleted file mode 100644 index eb34477539f..00000000000 --- a/lang/rust/avro/src/rabin.rs +++ /dev/null @@ -1,164 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Implementation of the Rabin fingerprint algorithm -use digest::{ - consts::U8, core_api::OutputSizeUser, generic_array::GenericArray, FixedOutput, - FixedOutputReset, HashMarker, Output, Reset, Update, -}; -use std::sync::OnceLock; - -const EMPTY: i64 = -4513414715797952619; - -fn fp_table() -> &'static [i64; 256] { - static FPTABLE_ONCE: OnceLock<[i64; 256]> = OnceLock::new(); - FPTABLE_ONCE.get_or_init(|| { - let mut fp_table: [i64; 256] = [0; 256]; - for i in 0..256 { - let mut fp = i; - for _ in 0..8 { - fp = (fp as u64 >> 1) as i64 ^ (EMPTY & -(fp & 1)); - } - fp_table[i as usize] = fp; - } - fp_table - }) -} - -/// Implementation of the Rabin fingerprint algorithm using the Digest trait as described in [schema_fingerprints](https://avro.apache.org/docs/current/specification/#schema-fingerprints). -/// -/// The digest is returned as the 8-byte little-endian encoding of the Rabin hash. -/// This is what is used for avro [single object encoding](https://avro.apache.org/docs/current/specification/#single-object-encoding) -/// -/// ```rust -/// use apache_avro::rabin::Rabin; -/// use digest::Digest; -/// use hex_literal::hex; -/// -/// // create the Rabin hasher -/// let mut hasher = Rabin::new(); -/// -/// // add the data -/// hasher.update(b"hello world"); -/// -/// // read hash digest and consume hasher -/// let result = hasher.finalize(); -/// -/// assert_eq!(result[..], hex!("60335ba6d0415528")); -/// ``` -/// -/// To convert the digest to the commonly used 64-bit integer value, you can use the i64::from_le_bytes() function -/// -/// ```rust -/// # use apache_avro::rabin::Rabin; -/// # use digest::Digest; -/// # use hex_literal::hex; -/// -/// # let mut hasher = Rabin::new(); -/// -/// # hasher.update(b"hello world"); -/// -/// # let result = hasher.finalize(); -/// -/// # assert_eq!(result[..], hex!("60335ba6d0415528")); -/// -/// let i = i64::from_le_bytes(result.try_into().unwrap()); -/// -/// assert_eq!(i, 2906301498937520992) -/// ``` -#[derive(Clone)] -pub struct Rabin { - result: i64, -} - -impl Default for Rabin { - fn default() -> Self { - Rabin { result: EMPTY } - } -} - -impl Update for Rabin { - fn update(&mut self, data: &[u8]) { - for b in data { - self.result = (self.result as u64 >> 8) as i64 - ^ fp_table()[((self.result ^ *b as i64) & 0xff) as usize]; - } - } -} - -impl FixedOutput for Rabin { - fn finalize_into(self, out: &mut GenericArray) { - out.copy_from_slice(&self.result.to_le_bytes()); - } -} - -impl Reset for Rabin { - fn reset(&mut self) { - self.result = EMPTY; - } -} - -impl OutputSizeUser for Rabin { - // 8-byte little-endian form of the i64 - // See: https://avro.apache.org/docs/current/specification/#single-object-encoding - type OutputSize = U8; -} - -impl HashMarker for Rabin {} - -impl FixedOutputReset for Rabin { - fn finalize_into_reset(&mut self, out: &mut Output) { - out.copy_from_slice(&self.result.to_le_bytes()); - self.reset(); - } -} - -#[cfg(test)] -mod tests { - use super::Rabin; - use apache_avro_test_helper::TestResult; - use digest::Digest; - use pretty_assertions::assert_eq; - - // See: https://github.com/apache/avro/blob/main/share/test/data/schema-tests.txt - #[test] - fn test1() -> TestResult { - let data: &[(&str, i64)] = &[ - (r#""null""#, 7195948357588979594), - (r#""boolean""#, -6970731678124411036), - ( - r#"{"name":"foo","type":"fixed","size":15}"#, - 1756455273707447556, - ), - ( - r#"{"name":"PigValue","type":"record","fields":[{"name":"value","type":["null","int","long","PigValue"]}]}"#, - -1759257747318642341, - ), - ]; - - let mut hasher = Rabin::new(); - - for (s, fp) in data { - hasher.update(s.as_bytes()); - let res: &[u8] = &hasher.finalize_reset(); - let result = i64::from_le_bytes(res.try_into()?); - assert_eq!(*fp, result); - } - - Ok(()) - } -} diff --git a/lang/rust/avro/src/reader.rs b/lang/rust/avro/src/reader.rs deleted file mode 100644 index 1655dccfa79..00000000000 --- a/lang/rust/avro/src/reader.rs +++ /dev/null @@ -1,1030 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Logic handling reading from Avro format at user level. -use crate::{ - decode::{decode, decode_internal}, - from_value, - rabin::Rabin, - schema::{ - resolve_names, resolve_names_with_schemata, AvroSchema, Names, ResolvedOwnedSchema, - ResolvedSchema, Schema, - }, - types::Value, - util, AvroResult, Codec, Error, -}; -use serde::de::DeserializeOwned; -use serde_json::from_slice; -use std::{ - collections::HashMap, - io::{ErrorKind, Read}, - marker::PhantomData, - str::FromStr, -}; - -/// Internal Block reader. -#[derive(Debug, Clone)] -struct Block<'r, R> { - reader: R, - /// Internal buffering to reduce allocation. - buf: Vec, - buf_idx: usize, - /// Number of elements expected to exist within this block. - message_count: usize, - marker: [u8; 16], - codec: Codec, - writer_schema: Schema, - schemata: Vec<&'r Schema>, - user_metadata: HashMap>, - names_refs: Names, -} - -impl<'r, R: Read> Block<'r, R> { - fn new(reader: R, schemata: Vec<&'r Schema>) -> AvroResult> { - let mut block = Block { - reader, - codec: Codec::Null, - writer_schema: Schema::Null, - schemata, - buf: vec![], - buf_idx: 0, - message_count: 0, - marker: [0; 16], - user_metadata: Default::default(), - names_refs: Default::default(), - }; - - block.read_header()?; - Ok(block) - } - - /// Try to read the header and to set the writer `Schema`, the `Codec` and the marker based on - /// its content. - fn read_header(&mut self) -> AvroResult<()> { - let mut buf = [0u8; 4]; - self.reader - .read_exact(&mut buf) - .map_err(Error::ReadHeader)?; - - if buf != [b'O', b'b', b'j', 1u8] { - return Err(Error::HeaderMagic); - } - - let meta_schema = Schema::map(Schema::Bytes); - if let Value::Map(metadata) = decode(&meta_schema, &mut self.reader)? { - self.read_writer_schema(&metadata)?; - self.codec = read_codec(&metadata)?; - - for (key, value) in metadata { - if key == "avro.schema" - || key == "avro.codec" - || key == "avro.codec.compression_level" - { - // already processed - } else if key.starts_with("avro.") { - warn!("Ignoring unknown metadata key: {}", key); - } else { - self.read_user_metadata(key, value); - } - } - } else { - return Err(Error::GetHeaderMetadata); - } - - self.reader - .read_exact(&mut self.marker) - .map_err(Error::ReadMarker) - } - - fn fill_buf(&mut self, n: usize) -> AvroResult<()> { - // The buffer needs to contain exactly `n` elements, otherwise codecs will potentially read - // invalid bytes. - // - // The are two cases to handle here: - // - // 1. `n > self.buf.len()`: - // In this case we call `Vec::resize`, which guarantees that `self.buf.len() == n`. - // 2. `n < self.buf.len()`: - // We need to resize to ensure that the buffer len is safe to read `n` elements. - // - // TODO: Figure out a way to avoid having to truncate for the second case. - self.buf.resize(util::safe_len(n)?, 0); - self.reader - .read_exact(&mut self.buf) - .map_err(Error::ReadIntoBuf)?; - self.buf_idx = 0; - Ok(()) - } - - /// Try to read a data block, also performing schema resolution for the objects contained in - /// the block. The objects are stored in an internal buffer to the `Reader`. - fn read_block_next(&mut self) -> AvroResult<()> { - assert!(self.is_empty(), "Expected self to be empty!"); - match util::read_long(&mut self.reader) { - Ok(block_len) => { - self.message_count = block_len as usize; - let block_bytes = util::read_long(&mut self.reader)?; - self.fill_buf(block_bytes as usize)?; - let mut marker = [0u8; 16]; - self.reader - .read_exact(&mut marker) - .map_err(Error::ReadBlockMarker)?; - - if marker != self.marker { - return Err(Error::GetBlockMarker); - } - - // NOTE (JAB): This doesn't fit this Reader pattern very well. - // `self.buf` is a growable buffer that is reused as the reader is iterated. - // For non `Codec::Null` variants, `decompress` will allocate a new `Vec` - // and replace `buf` with the new one, instead of reusing the same buffer. - // We can address this by using some "limited read" type to decode directly - // into the buffer. But this is fine, for now. - self.codec.decompress(&mut self.buf) - } - Err(Error::ReadVariableIntegerBytes(io_err)) => { - if let ErrorKind::UnexpectedEof = io_err.kind() { - // to not return any error in case we only finished to read cleanly from the stream - Ok(()) - } else { - Err(Error::ReadVariableIntegerBytes(io_err)) - } - } - Err(e) => Err(e), - } - } - - fn len(&self) -> usize { - self.message_count - } - - fn is_empty(&self) -> bool { - self.len() == 0 - } - - fn read_next(&mut self, read_schema: Option<&Schema>) -> AvroResult> { - if self.is_empty() { - self.read_block_next()?; - if self.is_empty() { - return Ok(None); - } - } - - let mut block_bytes = &self.buf[self.buf_idx..]; - let b_original = block_bytes.len(); - - let item = decode_internal( - &self.writer_schema, - &self.names_refs, - &None, - &mut block_bytes, - )?; - let item = match read_schema { - Some(schema) => item.resolve(schema)?, - None => item, - }; - - if b_original == block_bytes.len() { - // from_avro_datum did not consume any bytes, so return an error to avoid an infinite loop - return Err(Error::ReadBlock); - } - self.buf_idx += b_original - block_bytes.len(); - self.message_count -= 1; - Ok(Some(item)) - } - - fn read_writer_schema(&mut self, metadata: &HashMap) -> AvroResult<()> { - let json: serde_json::Value = metadata - .get("avro.schema") - .and_then(|bytes| { - if let Value::Bytes(ref bytes) = *bytes { - from_slice(bytes.as_ref()).ok() - } else { - None - } - }) - .ok_or(Error::GetAvroSchemaFromMap)?; - if !self.schemata.is_empty() { - let rs = ResolvedSchema::try_from(self.schemata.clone())?; - let names: Names = rs - .get_names() - .iter() - .map(|(name, schema)| (name.clone(), (*schema).clone())) - .collect(); - self.writer_schema = Schema::parse_with_names(&json, names)?; - resolve_names_with_schemata(&self.schemata, &mut self.names_refs, &None)?; - } else { - self.writer_schema = Schema::parse(&json)?; - resolve_names(&self.writer_schema, &mut self.names_refs, &None)?; - } - Ok(()) - } - - fn read_user_metadata(&mut self, key: String, value: Value) { - match value { - Value::Bytes(ref vec) => { - self.user_metadata.insert(key, vec.clone()); - } - wrong => { - warn!( - "User metadata values must be Value::Bytes, found {:?}", - wrong - ); - } - } - } -} - -fn read_codec(metadata: &HashMap) -> AvroResult { - let result = metadata - .get("avro.codec") - .map(|codec| { - if let Value::Bytes(ref bytes) = *codec { - match std::str::from_utf8(bytes.as_ref()) { - Ok(utf8) => Ok(utf8), - Err(utf8_error) => Err(Error::ConvertToUtf8Error(utf8_error)), - } - } else { - Err(Error::BadCodecMetadata) - } - }) - .map(|codec_res| match codec_res { - Ok(codec) => match Codec::from_str(codec) { - Ok(codec) => match codec { - #[cfg(feature = "bzip")] - Codec::Bzip2(_) => { - use crate::Bzip2Settings; - if let Some(Value::Bytes(bytes)) = - metadata.get("avro.codec.compression_level") - { - Ok(Codec::Bzip2(Bzip2Settings::new(bytes[0]))) - } else { - Ok(codec) - } - } - #[cfg(feature = "xz")] - Codec::Xz(_) => { - use crate::XzSettings; - if let Some(Value::Bytes(bytes)) = - metadata.get("avro.codec.compression_level") - { - Ok(Codec::Xz(XzSettings::new(bytes[0]))) - } else { - Ok(codec) - } - } - #[cfg(feature = "zstandard")] - Codec::Zstandard(_) => { - use crate::ZstandardSettings; - if let Some(Value::Bytes(bytes)) = - metadata.get("avro.codec.compression_level") - { - Ok(Codec::Zstandard(ZstandardSettings::new(bytes[0]))) - } else { - Ok(codec) - } - } - _ => Ok(codec), - }, - Err(_) => Err(Error::CodecNotSupported(codec.to_owned())), - }, - Err(err) => Err(err), - }); - - result.unwrap_or_else(|| Ok(Codec::Null)) -} - -/// Main interface for reading Avro formatted values. -/// -/// To be used as an iterator: -/// -/// ```no_run -/// # use apache_avro::Reader; -/// # use std::io::Cursor; -/// # let input = Cursor::new(Vec::::new()); -/// for value in Reader::new(input).unwrap() { -/// match value { -/// Ok(v) => println!("{:?}", v), -/// Err(e) => println!("Error: {}", e), -/// }; -/// } -/// ``` -pub struct Reader<'a, R> { - block: Block<'a, R>, - reader_schema: Option<&'a Schema>, - errored: bool, - should_resolve_schema: bool, -} - -impl<'a, R: Read> Reader<'a, R> { - /// Creates a `Reader` given something implementing the `io::Read` trait to read from. - /// No reader `Schema` will be set. - /// - /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. - pub fn new(reader: R) -> AvroResult> { - let block = Block::new(reader, vec![])?; - let reader = Reader { - block, - reader_schema: None, - errored: false, - should_resolve_schema: false, - }; - Ok(reader) - } - - /// Creates a `Reader` given a reader `Schema` and something implementing the `io::Read` trait - /// to read from. - /// - /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. - pub fn with_schema(schema: &'a Schema, reader: R) -> AvroResult> { - let block = Block::new(reader, vec![schema])?; - let mut reader = Reader { - block, - reader_schema: Some(schema), - errored: false, - should_resolve_schema: false, - }; - // Check if the reader and writer schemas disagree. - reader.should_resolve_schema = reader.writer_schema() != schema; - Ok(reader) - } - - /// Creates a `Reader` given a reader `Schema` and something implementing the `io::Read` trait - /// to read from. - /// - /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. - pub fn with_schemata( - schema: &'a Schema, - schemata: Vec<&'a Schema>, - reader: R, - ) -> AvroResult> { - let block = Block::new(reader, schemata)?; - let mut reader = Reader { - block, - reader_schema: Some(schema), - errored: false, - should_resolve_schema: false, - }; - // Check if the reader and writer schemas disagree. - reader.should_resolve_schema = reader.writer_schema() != schema; - Ok(reader) - } - - /// Get a reference to the writer `Schema`. - #[inline] - pub fn writer_schema(&self) -> &Schema { - &self.block.writer_schema - } - - /// Get a reference to the optional reader `Schema`. - #[inline] - pub fn reader_schema(&self) -> Option<&Schema> { - self.reader_schema - } - - /// Get a reference to the user metadata - #[inline] - pub fn user_metadata(&self) -> &HashMap> { - &self.block.user_metadata - } - - #[inline] - fn read_next(&mut self) -> AvroResult> { - let read_schema = if self.should_resolve_schema { - self.reader_schema - } else { - None - }; - - self.block.read_next(read_schema) - } -} - -impl<'a, R: Read> Iterator for Reader<'a, R> { - type Item = AvroResult; - - fn next(&mut self) -> Option { - // to prevent keep on reading after the first error occurs - if self.errored { - return None; - }; - match self.read_next() { - Ok(opt) => opt.map(Ok), - Err(e) => { - self.errored = true; - Some(Err(e)) - } - } - } -} - -/// Decode a `Value` encoded in Avro format given its `Schema` and anything implementing `io::Read` -/// to read from. -/// -/// In case a reader `Schema` is provided, schema resolution will also be performed. -/// -/// **NOTE** This function has a quite small niche of usage and does NOT take care of reading the -/// header and consecutive data blocks; use [`Reader`](struct.Reader.html) if you don't know what -/// you are doing, instead. -pub fn from_avro_datum( - writer_schema: &Schema, - reader: &mut R, - reader_schema: Option<&Schema>, -) -> AvroResult { - let value = decode(writer_schema, reader)?; - match reader_schema { - Some(schema) => value.resolve(schema), - None => Ok(value), - } -} - -/// Decode a `Value` encoded in Avro format given the provided `Schema` and anything implementing `io::Read` -/// to read from. -/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided -/// schemata to resolve any dependencies. -/// -/// In case a reader `Schema` is provided, schema resolution will also be performed. -pub fn from_avro_datum_schemata( - writer_schema: &Schema, - schemata: Vec<&Schema>, - reader: &mut R, - reader_schema: Option<&Schema>, -) -> AvroResult { - let rs = ResolvedSchema::try_from(schemata)?; - let value = decode_internal(writer_schema, rs.get_names(), &None, reader)?; - match reader_schema { - Some(schema) => value.resolve(schema), - None => Ok(value), - } -} - -pub struct GenericSingleObjectReader { - write_schema: ResolvedOwnedSchema, - expected_header: [u8; 10], -} - -impl GenericSingleObjectReader { - pub fn new(schema: Schema) -> AvroResult { - let fingerprint = schema.fingerprint::(); - let expected_header = [ - 0xC3, - 0x01, - fingerprint.bytes[0], - fingerprint.bytes[1], - fingerprint.bytes[2], - fingerprint.bytes[3], - fingerprint.bytes[4], - fingerprint.bytes[5], - fingerprint.bytes[6], - fingerprint.bytes[7], - ]; - Ok(GenericSingleObjectReader { - write_schema: ResolvedOwnedSchema::try_from(schema)?, - expected_header, - }) - } - - pub fn read_value(&self, reader: &mut R) -> AvroResult { - let mut header: [u8; 10] = [0; 10]; - match reader.read_exact(&mut header) { - Ok(_) => { - if self.expected_header == header { - decode_internal( - self.write_schema.get_root_schema(), - self.write_schema.get_names(), - &None, - reader, - ) - } else { - Err(Error::SingleObjectHeaderMismatch( - self.expected_header, - header, - )) - } - } - Err(io_error) => Err(Error::ReadHeader(io_error)), - } - } -} - -pub struct SpecificSingleObjectReader -where - T: AvroSchema, -{ - inner: GenericSingleObjectReader, - _model: PhantomData, -} - -impl SpecificSingleObjectReader -where - T: AvroSchema, -{ - pub fn new() -> AvroResult> { - Ok(SpecificSingleObjectReader { - inner: GenericSingleObjectReader::new(T::get_schema())?, - _model: PhantomData, - }) - } -} - -impl SpecificSingleObjectReader -where - T: AvroSchema + From, -{ - pub fn read_from_value(&self, reader: &mut R) -> AvroResult { - self.inner.read_value(reader).map(|v| v.into()) - } -} - -impl SpecificSingleObjectReader -where - T: AvroSchema + DeserializeOwned, -{ - pub fn read(&self, reader: &mut R) -> AvroResult { - from_value::(&self.inner.read_value(reader)?) - } -} - -/// Reads the marker bytes from Avro bytes generated earlier by a `Writer` -pub fn read_marker(bytes: &[u8]) -> [u8; 16] { - assert!( - bytes.len() > 16, - "The bytes are too short to read a marker from them" - ); - let mut marker = [0_u8; 16]; - marker.clone_from_slice(&bytes[(bytes.len() - 16)..]); - marker -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{encode::encode, types::Record}; - use apache_avro_test_helper::TestResult; - use pretty_assertions::assert_eq; - use serde::Deserialize; - use std::io::Cursor; - - const SCHEMA: &str = r#" - { - "type": "record", - "name": "test", - "fields": [ - { - "name": "a", - "type": "long", - "default": 42 - }, - { - "name": "b", - "type": "string" - } - ] - } - "#; - const UNION_SCHEMA: &str = r#"["null", "long"]"#; - const ENCODED: &[u8] = &[ - 79u8, 98u8, 106u8, 1u8, 4u8, 22u8, 97u8, 118u8, 114u8, 111u8, 46u8, 115u8, 99u8, 104u8, - 101u8, 109u8, 97u8, 222u8, 1u8, 123u8, 34u8, 116u8, 121u8, 112u8, 101u8, 34u8, 58u8, 34u8, - 114u8, 101u8, 99u8, 111u8, 114u8, 100u8, 34u8, 44u8, 34u8, 110u8, 97u8, 109u8, 101u8, 34u8, - 58u8, 34u8, 116u8, 101u8, 115u8, 116u8, 34u8, 44u8, 34u8, 102u8, 105u8, 101u8, 108u8, - 100u8, 115u8, 34u8, 58u8, 91u8, 123u8, 34u8, 110u8, 97u8, 109u8, 101u8, 34u8, 58u8, 34u8, - 97u8, 34u8, 44u8, 34u8, 116u8, 121u8, 112u8, 101u8, 34u8, 58u8, 34u8, 108u8, 111u8, 110u8, - 103u8, 34u8, 44u8, 34u8, 100u8, 101u8, 102u8, 97u8, 117u8, 108u8, 116u8, 34u8, 58u8, 52u8, - 50u8, 125u8, 44u8, 123u8, 34u8, 110u8, 97u8, 109u8, 101u8, 34u8, 58u8, 34u8, 98u8, 34u8, - 44u8, 34u8, 116u8, 121u8, 112u8, 101u8, 34u8, 58u8, 34u8, 115u8, 116u8, 114u8, 105u8, - 110u8, 103u8, 34u8, 125u8, 93u8, 125u8, 20u8, 97u8, 118u8, 114u8, 111u8, 46u8, 99u8, 111u8, - 100u8, 101u8, 99u8, 8u8, 110u8, 117u8, 108u8, 108u8, 0u8, 94u8, 61u8, 54u8, 221u8, 190u8, - 207u8, 108u8, 180u8, 158u8, 57u8, 114u8, 40u8, 173u8, 199u8, 228u8, 239u8, 4u8, 20u8, 54u8, - 6u8, 102u8, 111u8, 111u8, 84u8, 6u8, 98u8, 97u8, 114u8, 94u8, 61u8, 54u8, 221u8, 190u8, - 207u8, 108u8, 180u8, 158u8, 57u8, 114u8, 40u8, 173u8, 199u8, 228u8, 239u8, - ]; - - #[test] - fn test_from_avro_datum() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - let expected = record.into(); - - assert_eq!(from_avro_datum(&schema, &mut encoded, None)?, expected); - - Ok(()) - } - - #[test] - fn test_from_avro_datum_with_union_to_struct() -> TestResult { - const TEST_RECORD_SCHEMA_3240: &str = r#" - { - "type": "record", - "name": "test", - "fields": [ - { - "name": "a", - "type": "long", - "default": 42 - }, - { - "name": "b", - "type": "string" - }, - { - "name": "a_nullable_array", - "type": ["null", {"type": "array", "items": {"type": "string"}}], - "default": null - }, - { - "name": "a_nullable_boolean", - "type": ["null", {"type": "boolean"}], - "default": null - }, - { - "name": "a_nullable_string", - "type": ["null", {"type": "string"}], - "default": null - } - ] - } - "#; - #[derive(Default, Debug, Deserialize, PartialEq, Eq)] - struct TestRecord3240 { - a: i64, - b: String, - a_nullable_array: Option>, - // we are missing the 'a_nullable_boolean' field to simulate missing keys - // a_nullable_boolean: Option, - a_nullable_string: Option, - } - - let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240)?; - let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; - - let expected_record: TestRecord3240 = TestRecord3240 { - a: 27i64, - b: String::from("foo"), - a_nullable_array: None, - a_nullable_string: None, - }; - - let avro_datum = from_avro_datum(&schema, &mut encoded, None)?; - let parsed_record: TestRecord3240 = match &avro_datum { - Value::Record(_) => from_value::(&avro_datum)?, - unexpected => { - panic!("could not map avro data to struct, found unexpected: {unexpected:?}") - } - }; - - assert_eq!(parsed_record, expected_record); - - Ok(()) - } - - #[test] - fn test_null_union() -> TestResult { - let schema = Schema::parse_str(UNION_SCHEMA)?; - let mut encoded: &'static [u8] = &[2, 0]; - - assert_eq!( - from_avro_datum(&schema, &mut encoded, None)?, - Value::Union(1, Box::new(Value::Long(0))) - ); - - Ok(()) - } - - #[test] - fn test_reader_iterator() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let reader = Reader::with_schema(&schema, ENCODED)?; - - let mut record1 = Record::new(&schema).unwrap(); - record1.put("a", 27i64); - record1.put("b", "foo"); - - let mut record2 = Record::new(&schema).unwrap(); - record2.put("a", 42i64); - record2.put("b", "bar"); - - let expected = [record1.into(), record2.into()]; - - for (i, value) in reader.enumerate() { - assert_eq!(value?, expected[i]); - } - - Ok(()) - } - - #[test] - fn test_reader_invalid_header() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let invalid = ENCODED.iter().copied().skip(1).collect::>(); - assert!(Reader::with_schema(&schema, &invalid[..]).is_err()); - - Ok(()) - } - - #[test] - fn test_reader_invalid_block() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let invalid = ENCODED - .iter() - .copied() - .rev() - .skip(19) - .collect::>() - .into_iter() - .rev() - .collect::>(); - let reader = Reader::with_schema(&schema, &invalid[..])?; - for value in reader { - assert!(value.is_err()); - } - - Ok(()) - } - - #[test] - fn test_reader_empty_buffer() -> TestResult { - let empty = Cursor::new(Vec::new()); - assert!(Reader::new(empty).is_err()); - - Ok(()) - } - - #[test] - fn test_reader_only_header() -> TestResult { - let invalid = ENCODED.iter().copied().take(165).collect::>(); - let reader = Reader::new(&invalid[..])?; - for value in reader { - assert!(value.is_err()); - } - - Ok(()) - } - - #[test] - fn test_avro_3405_read_user_metadata_success() -> TestResult { - use crate::writer::Writer; - - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - let mut user_meta_data: HashMap> = HashMap::new(); - user_meta_data.insert( - "stringKey".to_string(), - "stringValue".to_string().into_bytes(), - ); - user_meta_data.insert("bytesKey".to_string(), b"bytesValue".to_vec()); - user_meta_data.insert("vecKey".to_string(), vec![1, 2, 3]); - - for (k, v) in user_meta_data.iter() { - writer.add_user_metadata(k.to_string(), v)?; - } - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - - writer.append(record.clone())?; - writer.append(record.clone())?; - writer.flush()?; - let result = writer.into_inner()?; - - let reader = Reader::new(&result[..])?; - assert_eq!(reader.user_metadata(), &user_meta_data); - - Ok(()) - } - - #[derive(Deserialize, Clone, PartialEq, Debug)] - struct TestSingleObjectReader { - a: i64, - b: f64, - c: Vec, - } - - impl AvroSchema for TestSingleObjectReader { - fn get_schema() -> Schema { - let schema = r#" - { - "type":"record", - "name":"TestSingleObjectWrtierSerialize", - "fields":[ - { - "name":"a", - "type":"long" - }, - { - "name":"b", - "type":"double" - }, - { - "name":"c", - "type":{ - "type":"array", - "items":"string" - } - } - ] - } - "#; - Schema::parse_str(schema).unwrap() - } - } - - impl From for TestSingleObjectReader { - fn from(obj: Value) -> TestSingleObjectReader { - if let Value::Record(fields) = obj { - let mut a = None; - let mut b = None; - let mut c = vec![]; - for (field_name, v) in fields { - match (field_name.as_str(), v) { - ("a", Value::Long(i)) => a = Some(i), - ("b", Value::Double(d)) => b = Some(d), - ("c", Value::Array(v)) => { - for inner_val in v { - if let Value::String(s) = inner_val { - c.push(s); - } - } - } - (key, value) => panic!("Unexpected pair: {key:?} -> {value:?}"), - } - } - TestSingleObjectReader { - a: a.unwrap(), - b: b.unwrap(), - c, - } - } else { - panic!("Expected a Value::Record but was {obj:?}") - } - } - } - - impl From for Value { - fn from(obj: TestSingleObjectReader) -> Value { - Value::Record(vec![ - ("a".into(), obj.a.into()), - ("b".into(), obj.b.into()), - ( - "c".into(), - Value::Array(obj.c.into_iter().map(|s| s.into()).collect()), - ), - ]) - } - } - - #[test] - fn test_avro_3507_single_object_reader() -> TestResult { - let obj = TestSingleObjectReader { - a: 42, - b: 3.33, - c: vec!["cat".into(), "dog".into()], - }; - let mut to_read = Vec::::new(); - to_read.extend_from_slice(&[0xC3, 0x01]); - to_read.extend_from_slice( - &TestSingleObjectReader::get_schema() - .fingerprint::() - .bytes[..], - ); - encode( - &obj.clone().into(), - &TestSingleObjectReader::get_schema(), - &mut to_read, - ) - .expect("Encode should succeed"); - let mut to_read = &to_read[..]; - let generic_reader = GenericSingleObjectReader::new(TestSingleObjectReader::get_schema()) - .expect("Schema should resolve"); - let val = generic_reader - .read_value(&mut to_read) - .expect("Should read"); - let expected_value: Value = obj.into(); - assert_eq!(expected_value, val); - - Ok(()) - } - - #[test] - fn avro_3642_test_single_object_reader_incomplete_reads() -> TestResult { - let obj = TestSingleObjectReader { - a: 42, - b: 3.33, - c: vec!["cat".into(), "dog".into()], - }; - // The two-byte marker, to show that the message uses this single-record format - let to_read_1 = [0xC3, 0x01]; - let mut to_read_2 = Vec::::new(); - to_read_2.extend_from_slice( - &TestSingleObjectReader::get_schema() - .fingerprint::() - .bytes[..], - ); - let mut to_read_3 = Vec::::new(); - encode( - &obj.clone().into(), - &TestSingleObjectReader::get_schema(), - &mut to_read_3, - ) - .expect("Encode should succeed"); - let mut to_read = (&to_read_1[..]).chain(&to_read_2[..]).chain(&to_read_3[..]); - let generic_reader = GenericSingleObjectReader::new(TestSingleObjectReader::get_schema()) - .expect("Schema should resolve"); - let val = generic_reader - .read_value(&mut to_read) - .expect("Should read"); - let expected_value: Value = obj.into(); - assert_eq!(expected_value, val); - - Ok(()) - } - - #[test] - fn test_avro_3507_reader_parity() -> TestResult { - let obj = TestSingleObjectReader { - a: 42, - b: 3.33, - c: vec!["cat".into(), "dog".into()], - }; - - let mut to_read = Vec::::new(); - to_read.extend_from_slice(&[0xC3, 0x01]); - to_read.extend_from_slice( - &TestSingleObjectReader::get_schema() - .fingerprint::() - .bytes[..], - ); - encode( - &obj.clone().into(), - &TestSingleObjectReader::get_schema(), - &mut to_read, - ) - .expect("Encode should succeed"); - let generic_reader = GenericSingleObjectReader::new(TestSingleObjectReader::get_schema()) - .expect("Schema should resolve"); - let specific_reader = SpecificSingleObjectReader::::new() - .expect("schema should resolve"); - let mut to_read1 = &to_read[..]; - let mut to_read2 = &to_read[..]; - let mut to_read3 = &to_read[..]; - - let val = generic_reader - .read_value(&mut to_read1) - .expect("Should read"); - let read_obj1 = specific_reader - .read_from_value(&mut to_read2) - .expect("Should read from value"); - let read_obj2 = specific_reader - .read(&mut to_read3) - .expect("Should read from deserilize"); - let expected_value: Value = obj.clone().into(); - assert_eq!(obj, read_obj1); - assert_eq!(obj, read_obj2); - assert_eq!(val, expected_value); - - Ok(()) - } - - #[cfg(not(feature = "snappy"))] - #[test] - fn test_avro_3549_read_not_enabled_codec() { - let snappy_compressed_avro = vec![ - 79, 98, 106, 1, 4, 22, 97, 118, 114, 111, 46, 115, 99, 104, 101, 109, 97, 210, 1, 123, - 34, 102, 105, 101, 108, 100, 115, 34, 58, 91, 123, 34, 110, 97, 109, 101, 34, 58, 34, - 110, 117, 109, 34, 44, 34, 116, 121, 112, 101, 34, 58, 34, 115, 116, 114, 105, 110, - 103, 34, 125, 93, 44, 34, 110, 97, 109, 101, 34, 58, 34, 101, 118, 101, 110, 116, 34, - 44, 34, 110, 97, 109, 101, 115, 112, 97, 99, 101, 34, 58, 34, 101, 120, 97, 109, 112, - 108, 101, 110, 97, 109, 101, 115, 112, 97, 99, 101, 34, 44, 34, 116, 121, 112, 101, 34, - 58, 34, 114, 101, 99, 111, 114, 100, 34, 125, 20, 97, 118, 114, 111, 46, 99, 111, 100, - 101, 99, 12, 115, 110, 97, 112, 112, 121, 0, 213, 209, 241, 208, 200, 110, 164, 47, - 203, 25, 90, 235, 161, 167, 195, 177, 2, 20, 4, 12, 6, 49, 50, 51, 115, 38, 58, 0, 213, - 209, 241, 208, 200, 110, 164, 47, 203, 25, 90, 235, 161, 167, 195, 177, - ]; - - if let Err(err) = Reader::new(snappy_compressed_avro.as_slice()) { - assert_eq!("Codec 'snappy' is not supported/enabled", err.to_string()); - } else { - panic!("Expected an error in the reading of the codec!"); - } - } -} diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs deleted file mode 100644 index ad5d11e5ed5..00000000000 --- a/lang/rust/avro/src/schema.rs +++ /dev/null @@ -1,6823 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Logic for parsing and interacting with schemas in Avro format. -use crate::{ - error::Error, - schema_equality, types, - util::MapHelper, - validator::{ - validate_enum_symbol_name, validate_namespace, validate_record_field_name, - validate_schema_name, - }, - AvroResult, -}; -use digest::Digest; -use serde::{ - ser::{SerializeMap, SerializeSeq}, - Deserialize, Serialize, Serializer, -}; -use serde_json::{Map, Value}; -use std::{ - borrow::{Borrow, Cow}, - collections::{BTreeMap, HashMap, HashSet}, - fmt, - fmt::Debug, - hash::Hash, - io::Read, - str::FromStr, -}; -use strum_macros::{Display, EnumDiscriminants, EnumString}; - -/// Represents an Avro schema fingerprint -/// More information about Avro schema fingerprints can be found in the -/// [Avro Schema Fingerprint documentation](https://avro.apache.org/docs/current/specification/#schema-fingerprints) -pub struct SchemaFingerprint { - pub bytes: Vec, -} - -impl fmt::Display for SchemaFingerprint { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}", - self.bytes - .iter() - .map(|byte| format!("{byte:02x}")) - .collect::>() - .join("") - ) - } -} - -/// Represents any valid Avro schema -/// More information about Avro schemas can be found in the -/// [Avro Specification](https://avro.apache.org/docs/current/specification/#schema-declaration) -#[derive(Clone, Debug, EnumDiscriminants, Display)] -#[strum_discriminants(name(SchemaKind), derive(Hash, Ord, PartialOrd))] -pub enum Schema { - /// A `null` Avro schema. - Null, - /// A `boolean` Avro schema. - Boolean, - /// An `int` Avro schema. - Int, - /// A `long` Avro schema. - Long, - /// A `float` Avro schema. - Float, - /// A `double` Avro schema. - Double, - /// A `bytes` Avro schema. - /// `Bytes` represents a sequence of 8-bit unsigned bytes. - Bytes, - /// A `string` Avro schema. - /// `String` represents a unicode character sequence. - String, - /// A `array` Avro schema. Avro arrays are required to have the same type for each element. - /// This variant holds the `Schema` for the array element type. - Array(ArraySchema), - /// A `map` Avro schema. - /// `Map` holds a pointer to the `Schema` of its values, which must all be the same schema. - /// `Map` keys are assumed to be `string`. - Map(MapSchema), - /// A `union` Avro schema. - Union(UnionSchema), - /// A `record` Avro schema. - Record(RecordSchema), - /// An `enum` Avro schema. - Enum(EnumSchema), - /// A `fixed` Avro schema. - Fixed(FixedSchema), - /// Logical type which represents `Decimal` values. The underlying type is serialized and - /// deserialized as `Schema::Bytes` or `Schema::Fixed`. - Decimal(DecimalSchema), - /// Logical type which represents `Decimal` values without predefined scale. - /// The underlying type is serialized and deserialized as `Schema::Bytes` - BigDecimal, - /// A universally unique identifier, annotating a string. - Uuid, - /// Logical type which represents the number of days since the unix epoch. - /// Serialization format is `Schema::Int`. - Date, - /// The time of day in number of milliseconds after midnight with no reference any calendar, - /// time zone or date in particular. - TimeMillis, - /// The time of day in number of microseconds after midnight with no reference any calendar, - /// time zone or date in particular. - TimeMicros, - /// An instant in time represented as the number of milliseconds after the UNIX epoch. - TimestampMillis, - /// An instant in time represented as the number of microseconds after the UNIX epoch. - TimestampMicros, - /// An instant in time represented as the number of nanoseconds after the UNIX epoch. - TimestampNanos, - /// An instant in localtime represented as the number of milliseconds after the UNIX epoch. - LocalTimestampMillis, - /// An instant in local time represented as the number of microseconds after the UNIX epoch. - LocalTimestampMicros, - /// An instant in local time represented as the number of nanoseconds after the UNIX epoch. - LocalTimestampNanos, - /// An amount of time defined by a number of months, days and milliseconds. - Duration, - /// A reference to another schema. - Ref { name: Name }, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct MapSchema { - pub types: Box, - pub attributes: BTreeMap, -} - -#[derive(Clone, Debug, PartialEq)] -pub struct ArraySchema { - pub items: Box, - pub attributes: BTreeMap, -} - -impl PartialEq for Schema { - /// Assess equality of two `Schema` based on [Parsing Canonical Form]. - /// - /// [Parsing Canonical Form]: - /// https://avro.apache.org/docs/1.11.1/specification/#parsing-canonical-form-for-schemas - fn eq(&self, other: &Self) -> bool { - schema_equality::compare_schemata(self, other) - } -} - -impl SchemaKind { - pub fn is_primitive(self) -> bool { - matches!( - self, - SchemaKind::Null - | SchemaKind::Boolean - | SchemaKind::Int - | SchemaKind::Long - | SchemaKind::Double - | SchemaKind::Float - | SchemaKind::Bytes - | SchemaKind::String, - ) - } - - pub fn is_named(self) -> bool { - matches!( - self, - SchemaKind::Record | SchemaKind::Enum | SchemaKind::Fixed | SchemaKind::Ref - ) - } -} - -impl From<&types::Value> for SchemaKind { - fn from(value: &types::Value) -> Self { - use crate::types::Value; - match value { - Value::Null => Self::Null, - Value::Boolean(_) => Self::Boolean, - Value::Int(_) => Self::Int, - Value::Long(_) => Self::Long, - Value::Float(_) => Self::Float, - Value::Double(_) => Self::Double, - Value::Bytes(_) => Self::Bytes, - Value::String(_) => Self::String, - Value::Array(_) => Self::Array, - Value::Map(_) => Self::Map, - Value::Union(_, _) => Self::Union, - Value::Record(_) => Self::Record, - Value::Enum(_, _) => Self::Enum, - Value::Fixed(_, _) => Self::Fixed, - Value::Decimal { .. } => Self::Decimal, - Value::BigDecimal(_) => Self::BigDecimal, - Value::Uuid(_) => Self::Uuid, - Value::Date(_) => Self::Date, - Value::TimeMillis(_) => Self::TimeMillis, - Value::TimeMicros(_) => Self::TimeMicros, - Value::TimestampMillis(_) => Self::TimestampMillis, - Value::TimestampMicros(_) => Self::TimestampMicros, - Value::TimestampNanos(_) => Self::TimestampNanos, - Value::LocalTimestampMillis(_) => Self::LocalTimestampMillis, - Value::LocalTimestampMicros(_) => Self::LocalTimestampMicros, - Value::LocalTimestampNanos(_) => Self::LocalTimestampNanos, - Value::Duration { .. } => Self::Duration, - } - } -} - -/// Represents names for `record`, `enum` and `fixed` Avro schemas. -/// -/// Each of these `Schema`s have a `fullname` composed of two parts: -/// * a name -/// * a namespace -/// -/// `aliases` can also be defined, to facilitate schema evolution. -/// -/// More information about schema names can be found in the -/// [Avro specification](https://avro.apache.org/docs/current/specification/#names) -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct Name { - pub name: String, - pub namespace: Namespace, -} - -/// Represents documentation for complex Avro schemas. -pub type Documentation = Option; -/// Represents the aliases for Named Schema -pub type Aliases = Option>; -/// Represents Schema lookup within a schema env -pub(crate) type Names = HashMap; -/// Represents Schema lookup within a schema -pub type NamesRef<'a> = HashMap; -/// Represents the namespace for Named Schema -pub type Namespace = Option; - -impl Name { - /// Create a new `Name`. - /// Parses the optional `namespace` from the `name` string. - /// `aliases` will not be defined. - pub fn new(name: &str) -> AvroResult { - let (name, namespace) = Name::get_name_and_namespace(name)?; - Ok(Self { - name, - namespace: namespace.filter(|ns| !ns.is_empty()), - }) - } - - fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> { - validate_schema_name(name) - } - - /// Parse a `serde_json::Value` into a `Name`. - pub(crate) fn parse( - complex: &Map, - enclosing_namespace: &Namespace, - ) -> AvroResult { - let (name, namespace_from_name) = complex - .name() - .map(|name| Name::get_name_and_namespace(name.as_str()).unwrap()) - .ok_or(Error::GetNameField)?; - // FIXME Reading name from the type is wrong ! The name there is just a metadata (AVRO-3430) - let type_name = match complex.get("type") { - Some(Value::Object(complex_type)) => complex_type.name().or(None), - _ => None, - }; - - let namespace = namespace_from_name - .or_else(|| { - complex - .string("namespace") - .or_else(|| enclosing_namespace.clone()) - }) - .filter(|ns| !ns.is_empty()); - - if let Some(ref ns) = namespace { - validate_namespace(ns)?; - } - - Ok(Self { - name: type_name.unwrap_or(name), - namespace, - }) - } - - /// Return the `fullname` of this `Name` - /// - /// More information about fullnames can be found in the - /// [Avro specification](https://avro.apache.org/docs/current/specification/#names) - pub fn fullname(&self, default_namespace: Namespace) -> String { - if self.name.contains('.') { - self.name.clone() - } else { - let namespace = self.namespace.clone().or(default_namespace); - - match namespace { - Some(ref namespace) if !namespace.is_empty() => { - format!("{}.{}", namespace, self.name) - } - _ => self.name.clone(), - } - } - } - - /// Return the fully qualified name needed for indexing or searching for the schema within a schema/schema env context. Puts the enclosing namespace into the name's namespace for clarity in schema/schema env parsing - /// ```ignore - /// use apache_avro::schema::Name; - /// - /// assert_eq!( - /// Name::new("some_name")?.fully_qualified_name(&Some("some_namespace".into())), - /// Name::new("some_namespace.some_name")? - /// ); - /// assert_eq!( - /// Name::new("some_namespace.some_name")?.fully_qualified_name(&Some("other_namespace".into())), - /// Name::new("some_namespace.some_name")? - /// ); - /// ``` - pub fn fully_qualified_name(&self, enclosing_namespace: &Namespace) -> Name { - Name { - name: self.name.clone(), - namespace: self - .namespace - .clone() - .or_else(|| enclosing_namespace.clone().filter(|ns| !ns.is_empty())), - } - } -} - -impl From<&str> for Name { - fn from(name: &str) -> Self { - Name::new(name).unwrap() - } -} - -impl fmt::Display for Name { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.fullname(None)[..]) - } -} - -impl<'de> Deserialize<'de> for Name { - fn deserialize(deserializer: D) -> Result - where - D: serde::de::Deserializer<'de>, - { - Value::deserialize(deserializer).and_then(|value| { - use serde::de::Error; - if let Value::Object(json) = value { - Name::parse(&json, &None).map_err(Error::custom) - } else { - Err(Error::custom(format!("Expected a JSON object: {value:?}"))) - } - }) - } -} - -/// Newtype pattern for `Name` to better control the `serde_json::Value` representation. -/// Aliases are serialized as an array of plain strings in the JSON representation. -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -pub struct Alias(Name); - -impl Alias { - pub fn new(name: &str) -> AvroResult { - Name::new(name).map(Self) - } - - pub fn name(&self) -> String { - self.0.name.clone() - } - - pub fn namespace(&self) -> Namespace { - self.0.namespace.clone() - } - - pub fn fullname(&self, default_namespace: Namespace) -> String { - self.0.fullname(default_namespace) - } - - pub fn fully_qualified_name(&self, default_namespace: &Namespace) -> Name { - self.0.fully_qualified_name(default_namespace) - } -} - -impl From<&str> for Alias { - fn from(name: &str) -> Self { - Alias::new(name).unwrap() - } -} - -impl Serialize for Alias { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(&self.fullname(None)) - } -} - -#[derive(Debug)] -pub struct ResolvedSchema<'s> { - names_ref: NamesRef<'s>, - schemata: Vec<&'s Schema>, -} - -impl<'s> TryFrom<&'s Schema> for ResolvedSchema<'s> { - type Error = Error; - - fn try_from(schema: &'s Schema) -> AvroResult { - let names = HashMap::new(); - let mut rs = ResolvedSchema { - names_ref: names, - schemata: vec![schema], - }; - rs.resolve(rs.get_schemata(), &None, None)?; - Ok(rs) - } -} - -impl<'s> TryFrom> for ResolvedSchema<'s> { - type Error = Error; - - fn try_from(schemata: Vec<&'s Schema>) -> AvroResult { - let names = HashMap::new(); - let mut rs = ResolvedSchema { - names_ref: names, - schemata, - }; - rs.resolve(rs.get_schemata(), &None, None)?; - Ok(rs) - } -} - -impl<'s> ResolvedSchema<'s> { - pub fn get_schemata(&self) -> Vec<&'s Schema> { - self.schemata.clone() - } - - pub fn get_names(&self) -> &NamesRef<'s> { - &self.names_ref - } - - /// Creates `ResolvedSchema` with some already known schemas. - /// - /// Those schemata would be used to resolve references if needed. - pub fn new_with_known_schemata<'n>( - schemata_to_resolve: Vec<&'s Schema>, - enclosing_namespace: &Namespace, - known_schemata: &'n NamesRef<'n>, - ) -> AvroResult { - let names = HashMap::new(); - let mut rs = ResolvedSchema { - names_ref: names, - schemata: schemata_to_resolve, - }; - rs.resolve(rs.get_schemata(), enclosing_namespace, Some(known_schemata))?; - Ok(rs) - } - - fn resolve<'n>( - &mut self, - schemata: Vec<&'s Schema>, - enclosing_namespace: &Namespace, - known_schemata: Option<&'n NamesRef<'n>>, - ) -> AvroResult<()> { - for schema in schemata { - match schema { - Schema::Array(schema) => { - self.resolve(vec![&schema.items], enclosing_namespace, known_schemata)? - } - Schema::Map(schema) => { - self.resolve(vec![&schema.types], enclosing_namespace, known_schemata)? - } - Schema::Union(UnionSchema { schemas, .. }) => { - for schema in schemas { - self.resolve(vec![schema], enclosing_namespace, known_schemata)? - } - } - Schema::Enum(EnumSchema { name, .. }) | Schema::Fixed(FixedSchema { name, .. }) => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if self - .names_ref - .insert(fully_qualified_name.clone(), schema) - .is_some() - { - return Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)); - } - } - Schema::Record(RecordSchema { name, fields, .. }) => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if self - .names_ref - .insert(fully_qualified_name.clone(), schema) - .is_some() - { - return Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)); - } else { - let record_namespace = fully_qualified_name.namespace; - for field in fields { - self.resolve(vec![&field.schema], &record_namespace, known_schemata)? - } - } - } - Schema::Ref { name } => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - // first search for reference in current schemata, then look into external references. - if !self.names_ref.contains_key(&fully_qualified_name) { - let is_resolved_with_known_schemas = known_schemata - .as_ref() - .map(|names| names.contains_key(&fully_qualified_name)) - .unwrap_or(false); - if !is_resolved_with_known_schemas { - return Err(Error::SchemaResolutionError(fully_qualified_name)); - } - } - } - _ => (), - } - } - Ok(()) - } -} - -pub(crate) struct ResolvedOwnedSchema { - names: Names, - root_schema: Schema, -} - -impl TryFrom for ResolvedOwnedSchema { - type Error = Error; - - fn try_from(schema: Schema) -> AvroResult { - let names = HashMap::new(); - let mut rs = ResolvedOwnedSchema { - names, - root_schema: schema, - }; - resolve_names(&rs.root_schema, &mut rs.names, &None)?; - Ok(rs) - } -} - -impl ResolvedOwnedSchema { - pub(crate) fn get_root_schema(&self) -> &Schema { - &self.root_schema - } - pub(crate) fn get_names(&self) -> &Names { - &self.names - } -} - -pub(crate) fn resolve_names( - schema: &Schema, - names: &mut Names, - enclosing_namespace: &Namespace, -) -> AvroResult<()> { - match schema { - Schema::Array(schema) => resolve_names(&schema.items, names, enclosing_namespace), - Schema::Map(schema) => resolve_names(&schema.types, names, enclosing_namespace), - Schema::Union(UnionSchema { schemas, .. }) => { - for schema in schemas { - resolve_names(schema, names, enclosing_namespace)? - } - Ok(()) - } - Schema::Enum(EnumSchema { name, .. }) | Schema::Fixed(FixedSchema { name, .. }) => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if names - .insert(fully_qualified_name.clone(), schema.clone()) - .is_some() - { - Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)) - } else { - Ok(()) - } - } - Schema::Record(RecordSchema { name, fields, .. }) => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if names - .insert(fully_qualified_name.clone(), schema.clone()) - .is_some() - { - Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)) - } else { - let record_namespace = fully_qualified_name.namespace; - for field in fields { - resolve_names(&field.schema, names, &record_namespace)? - } - Ok(()) - } - } - Schema::Ref { name } => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - names - .get(&fully_qualified_name) - .map(|_| ()) - .ok_or(Error::SchemaResolutionError(fully_qualified_name)) - } - _ => Ok(()), - } -} - -pub(crate) fn resolve_names_with_schemata( - schemata: &Vec<&Schema>, - names: &mut Names, - enclosing_namespace: &Namespace, -) -> AvroResult<()> { - for schema in schemata { - resolve_names(schema, names, enclosing_namespace)?; - } - Ok(()) -} - -/// Represents a `field` in a `record` Avro schema. -#[derive(Clone, Debug, PartialEq)] -pub struct RecordField { - /// Name of the field. - pub name: String, - /// Documentation of the field. - pub doc: Documentation, - /// Aliases of the field's name. They have no namespace. - pub aliases: Option>, - /// Default value of the field. - /// This value will be used when reading Avro datum if schema resolution - /// is enabled. - pub default: Option, - /// Schema of the field. - pub schema: Schema, - /// Order of the field. - /// - /// **NOTE** This currently has no effect. - pub order: RecordFieldOrder, - /// Position of the field in the list of `field` of its parent `Schema` - pub position: usize, - /// A collection of all unknown fields in the record field. - pub custom_attributes: BTreeMap, -} - -/// Represents any valid order for a `field` in a `record` Avro schema. -#[derive(Clone, Debug, Eq, PartialEq, EnumString)] -#[strum(serialize_all = "kebab_case")] -pub enum RecordFieldOrder { - Ascending, - Descending, - Ignore, -} - -impl RecordField { - /// Parse a `serde_json::Value` into a `RecordField`. - fn parse( - field: &Map, - position: usize, - parser: &mut Parser, - enclosing_record: &Name, - ) -> AvroResult { - let name = field.name().ok_or(Error::GetNameFieldFromRecord)?; - - validate_record_field_name(&name)?; - - // TODO: "type" = "" - let schema = parser.parse_complex(field, &enclosing_record.namespace)?; - - let default = field.get("default").cloned(); - Self::resolve_default_value( - &schema, - &name, - &enclosing_record.fullname(None), - &parser.parsed_schemas, - &default, - )?; - - let aliases = field.get("aliases").and_then(|aliases| { - aliases.as_array().map(|aliases| { - aliases - .iter() - .flat_map(|alias| alias.as_str()) - .map(|alias| alias.to_string()) - .collect::>() - }) - }); - - let order = field - .get("order") - .and_then(|order| order.as_str()) - .and_then(|order| RecordFieldOrder::from_str(order).ok()) - .unwrap_or(RecordFieldOrder::Ascending); - - Ok(RecordField { - name, - doc: field.doc(), - default, - aliases, - order, - position, - custom_attributes: RecordField::get_field_custom_attributes(field, &schema), - schema, - }) - } - - fn resolve_default_value( - field_schema: &Schema, - field_name: &str, - record_name: &str, - names: &Names, - default: &Option, - ) -> AvroResult<()> { - if let Some(value) = default { - let avro_value = types::Value::from(value.clone()); - match field_schema { - Schema::Union(union_schema) => { - let schemas = &union_schema.schemas; - let resolved = schemas.iter().any(|schema| { - avro_value - .to_owned() - .resolve_internal(schema, names, &schema.namespace(), &None) - .is_ok() - }); - - if !resolved { - let schema: Option<&Schema> = schemas.first(); - return match schema { - Some(first_schema) => Err(Error::GetDefaultUnion( - SchemaKind::from(first_schema), - types::ValueKind::from(avro_value), - )), - None => Err(Error::EmptyUnion), - }; - } - } - _ => { - let resolved = avro_value - .resolve_internal(field_schema, names, &field_schema.namespace(), &None) - .is_ok(); - - if !resolved { - return Err(Error::GetDefaultRecordField( - field_name.to_string(), - record_name.to_string(), - field_schema.canonical_form(), - )); - } - } - }; - } - - Ok(()) - } - - fn get_field_custom_attributes( - field: &Map, - schema: &Schema, - ) -> BTreeMap { - let mut custom_attributes: BTreeMap = BTreeMap::new(); - for (key, value) in field { - match key.as_str() { - "type" | "name" | "doc" | "default" | "order" | "position" | "aliases" - | "logicalType" => continue, - key if key == "symbols" && matches!(schema, Schema::Enum(_)) => continue, - key if key == "size" && matches!(schema, Schema::Fixed(_)) => continue, - _ => custom_attributes.insert(key.clone(), value.clone()), - }; - } - custom_attributes - } - - /// Returns true if this `RecordField` is nullable, meaning the schema is a `UnionSchema` where the first variant is `Null`. - pub fn is_nullable(&self) -> bool { - match self.schema { - Schema::Union(ref inner) => inner.is_nullable(), - _ => false, - } - } -} - -/// A description of an Enum schema. -#[derive(Debug, Clone)] -pub struct RecordSchema { - /// The name of the schema - pub name: Name, - /// The aliases of the schema - pub aliases: Aliases, - /// The documentation of the schema - pub doc: Documentation, - /// The set of fields of the schema - pub fields: Vec, - /// The `lookup` table maps field names to their position in the `Vec` - /// of `fields`. - pub lookup: BTreeMap, - /// The custom attributes of the schema - pub attributes: BTreeMap, -} - -/// A description of an Enum schema. -#[derive(Debug, Clone)] -pub struct EnumSchema { - /// The name of the schema - pub name: Name, - /// The aliases of the schema - pub aliases: Aliases, - /// The documentation of the schema - pub doc: Documentation, - /// The set of symbols of the schema - pub symbols: Vec, - /// An optional default symbol used for compatibility - pub default: Option, - /// The custom attributes of the schema - pub attributes: BTreeMap, -} - -/// A description of a Union schema. -#[derive(Debug, Clone)] -pub struct FixedSchema { - /// The name of the schema - pub name: Name, - /// The aliases of the schema - pub aliases: Aliases, - /// The documentation of the schema - pub doc: Documentation, - /// The size of the fixed schema - pub size: usize, - /// An optional default symbol used for compatibility - pub default: Option, - /// The custom attributes of the schema - pub attributes: BTreeMap, -} - -impl FixedSchema { - fn serialize_to_map(&self, mut map: S::SerializeMap) -> Result - where - S: Serializer, - { - map.serialize_entry("type", "fixed")?; - if let Some(ref n) = self.name.namespace { - map.serialize_entry("namespace", n)?; - } - map.serialize_entry("name", &self.name.name)?; - if let Some(ref docstr) = self.doc { - map.serialize_entry("doc", docstr)?; - } - map.serialize_entry("size", &self.size)?; - - if let Some(ref aliases) = self.aliases { - map.serialize_entry("aliases", aliases)?; - } - - for attr in &self.attributes { - map.serialize_entry(attr.0, attr.1)?; - } - - Ok(map) - } -} - -/// A description of a Union schema. -/// -/// `scale` defaults to 0 and is an integer greater than or equal to 0 and `precision` is an -/// integer greater than 0. -#[derive(Debug, Clone)] -pub struct DecimalSchema { - /// The number of digits in the unscaled value - pub precision: DecimalMetadata, - /// The number of digits to the right of the decimal point - pub scale: DecimalMetadata, - /// The inner schema of the decimal (fixed or bytes) - pub inner: Box, -} - -/// A description of a Union schema -#[derive(Debug, Clone)] -pub struct UnionSchema { - /// The schemas that make up this union - pub(crate) schemas: Vec, - // Used to ensure uniqueness of schema inputs, and provide constant time finding of the - // schema index given a value. - // **NOTE** that this approach does not work for named types, and will have to be modified - // to support that. A simple solution is to also keep a mapping of the names used. - variant_index: BTreeMap, -} - -impl UnionSchema { - /// Creates a new UnionSchema from a vector of schemas. - pub fn new(schemas: Vec) -> AvroResult { - let mut vindex = BTreeMap::new(); - for (i, schema) in schemas.iter().enumerate() { - if let Schema::Union(_) = schema { - return Err(Error::GetNestedUnion); - } - let kind = SchemaKind::from(schema); - if !kind.is_named() && vindex.insert(kind, i).is_some() { - return Err(Error::GetUnionDuplicate); - } - } - Ok(UnionSchema { - schemas, - variant_index: vindex, - }) - } - - /// Returns a slice to all variants of this schema. - pub fn variants(&self) -> &[Schema] { - &self.schemas - } - - /// Returns true if the any of the variants of this `UnionSchema` is `Null`. - pub fn is_nullable(&self) -> bool { - self.schemas.iter().any(|x| matches!(x, Schema::Null)) - } - - /// Optionally returns a reference to the schema matched by this value, as well as its position - /// within this union. - #[deprecated( - since = "0.15.0", - note = "Please use `find_schema_with_known_schemata` instead" - )] - pub fn find_schema(&self, value: &types::Value) -> Option<(usize, &Schema)> { - self.find_schema_with_known_schemata::(value, None, &None) - } - - /// Optionally returns a reference to the schema matched by this value, as well as its position - /// within this union. - /// - /// Extra arguments: - /// - `known_schemata` - mapping between `Name` and `Schema` - if passed, additional external schemas would be used to resolve references. - pub fn find_schema_with_known_schemata + Debug>( - &self, - value: &types::Value, - known_schemata: Option<&HashMap>, - enclosing_namespace: &Namespace, - ) -> Option<(usize, &Schema)> { - let schema_kind = SchemaKind::from(value); - if let Some(&i) = self.variant_index.get(&schema_kind) { - // fast path - Some((i, &self.schemas[i])) - } else { - // slow path (required for matching logical or named types) - - // first collect what schemas we already know - let mut collected_names: HashMap = known_schemata - .map(|names| { - names - .iter() - .map(|(name, schema)| (name.clone(), schema.borrow())) - .collect() - }) - .unwrap_or_default(); - - self.schemas.iter().enumerate().find(|(_, schema)| { - let resolved_schema = ResolvedSchema::new_with_known_schemata( - vec![*schema], - enclosing_namespace, - &collected_names, - ) - .expect("Schema didn't successfully parse"); - let resolved_names = resolved_schema.names_ref; - - // extend known schemas with just resolved names - collected_names.extend(resolved_names); - let namespace = &schema.namespace().or_else(|| enclosing_namespace.clone()); - - value - .clone() - .resolve_internal(schema, &collected_names, namespace, &None) - .is_ok() - }) - } - } -} - -// No need to compare variant_index, it is derivative of schemas. -impl PartialEq for UnionSchema { - fn eq(&self, other: &UnionSchema) -> bool { - self.schemas.eq(&other.schemas) - } -} - -type DecimalMetadata = usize; -pub(crate) type Precision = DecimalMetadata; -pub(crate) type Scale = DecimalMetadata; - -fn parse_json_integer_for_decimal(value: &serde_json::Number) -> Result { - Ok(if value.is_u64() { - let num = value - .as_u64() - .ok_or_else(|| Error::GetU64FromJson(value.clone()))?; - num.try_into() - .map_err(|e| Error::ConvertU64ToUsize(e, num))? - } else if value.is_i64() { - let num = value - .as_i64() - .ok_or_else(|| Error::GetI64FromJson(value.clone()))?; - num.try_into() - .map_err(|e| Error::ConvertI64ToUsize(e, num))? - } else { - return Err(Error::GetPrecisionOrScaleFromJson(value.clone())); - }) -} - -#[derive(Default)] -struct Parser { - input_schemas: HashMap, - /// A map of name -> Schema::Ref - /// Used to resolve cyclic references, i.e. when a - /// field's type is a reference to its record's type - resolving_schemas: Names, - input_order: Vec, - /// A map of name -> fully parsed Schema - /// Used to avoid parsing the same schema twice - parsed_schemas: Names, -} - -impl Schema { - /// Converts `self` into its [Parsing Canonical Form]. - /// - /// [Parsing Canonical Form]: - /// https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas - pub fn canonical_form(&self) -> String { - let json = serde_json::to_value(self) - .unwrap_or_else(|e| panic!("Cannot parse Schema from JSON: {e}")); - parsing_canonical_form(&json) - } - - /// Generate [fingerprint] of Schema's [Parsing Canonical Form]. - /// - /// [Parsing Canonical Form]: - /// https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas - /// [fingerprint]: - /// https://avro.apache.org/docs/current/specification/#schema-fingerprints - pub fn fingerprint(&self) -> SchemaFingerprint { - let mut d = D::new(); - d.update(self.canonical_form()); - SchemaFingerprint { - bytes: d.finalize().to_vec(), - } - } - - /// Create a `Schema` from a string representing a JSON Avro schema. - pub fn parse_str(input: &str) -> Result { - let mut parser = Parser::default(); - parser.parse_str(input) - } - - /// Create a array of `Schema`'s from a list of named JSON Avro schemas (Record, Enum, and - /// Fixed). - /// - /// It is allowed that the schemas have cross-dependencies; these will be resolved - /// during parsing. - /// - /// If two of the input schemas have the same fullname, an Error will be returned. - pub fn parse_list(input: &[&str]) -> AvroResult> { - let mut input_schemas: HashMap = HashMap::with_capacity(input.len()); - let mut input_order: Vec = Vec::with_capacity(input.len()); - for js in input { - let schema: Value = serde_json::from_str(js).map_err(Error::ParseSchemaJson)?; - if let Value::Object(inner) = &schema { - let name = Name::parse(inner, &None)?; - let previous_value = input_schemas.insert(name.clone(), schema); - if previous_value.is_some() { - return Err(Error::NameCollision(name.fullname(None))); - } - input_order.push(name); - } else { - return Err(Error::GetNameField); - } - } - let mut parser = Parser { - input_schemas, - resolving_schemas: HashMap::default(), - input_order, - parsed_schemas: HashMap::with_capacity(input.len()), - }; - parser.parse_list() - } - - /// Create a `Schema` from a reader which implements [`Read`]. - pub fn parse_reader(reader: &mut (impl Read + ?Sized)) -> AvroResult { - let mut buf = String::new(); - match reader.read_to_string(&mut buf) { - Ok(_) => Self::parse_str(&buf), - Err(e) => Err(Error::ReadSchemaFromReader(e)), - } - } - - /// Parses an Avro schema from JSON. - pub fn parse(value: &Value) -> AvroResult { - let mut parser = Parser::default(); - parser.parse(value, &None) - } - - /// Parses an Avro schema from JSON. - /// Any `Schema::Ref`s must be known in the `names` map. - pub(crate) fn parse_with_names(value: &Value, names: Names) -> AvroResult { - let mut parser = Parser { - input_schemas: HashMap::with_capacity(1), - resolving_schemas: Names::default(), - input_order: Vec::with_capacity(1), - parsed_schemas: names, - }; - parser.parse(value, &None) - } - - /// Returns the custom attributes (metadata) if the schema supports them. - pub fn custom_attributes(&self) -> Option<&BTreeMap> { - match self { - Schema::Record(RecordSchema { attributes, .. }) - | Schema::Enum(EnumSchema { attributes, .. }) - | Schema::Fixed(FixedSchema { attributes, .. }) - | Schema::Array(ArraySchema { attributes, .. }) - | Schema::Map(MapSchema { attributes, .. }) => Some(attributes), - _ => None, - } - } - - /// Returns the name of the schema if it has one. - pub fn name(&self) -> Option<&Name> { - match self { - Schema::Ref { name, .. } - | Schema::Record(RecordSchema { name, .. }) - | Schema::Enum(EnumSchema { name, .. }) - | Schema::Fixed(FixedSchema { name, .. }) => Some(name), - _ => None, - } - } - - /// Returns the namespace of the schema if it has one. - pub fn namespace(&self) -> Namespace { - self.name().and_then(|n| n.namespace.clone()) - } - - /// Returns the aliases of the schema if it has ones. - pub fn aliases(&self) -> Option<&Vec> { - match self { - Schema::Record(RecordSchema { aliases, .. }) - | Schema::Enum(EnumSchema { aliases, .. }) - | Schema::Fixed(FixedSchema { aliases, .. }) => aliases.as_ref(), - _ => None, - } - } - - /// Returns the doc of the schema if it has one. - pub fn doc(&self) -> Option<&String> { - match self { - Schema::Record(RecordSchema { doc, .. }) - | Schema::Enum(EnumSchema { doc, .. }) - | Schema::Fixed(FixedSchema { doc, .. }) => doc.as_ref(), - _ => None, - } - } - - /// Returns a Schema::Map with the given types. - pub fn map(types: Schema) -> Self { - Schema::Map(MapSchema { - types: Box::new(types), - attributes: Default::default(), - }) - } - - /// Returns a Schema::Map with the given types and custom attributes. - pub fn map_with_attributes(types: Schema, attributes: BTreeMap) -> Self { - Schema::Map(MapSchema { - types: Box::new(types), - attributes, - }) - } - - /// Returns a Schema::Array with the given items. - pub fn array(items: Schema) -> Self { - Schema::Array(ArraySchema { - items: Box::new(items), - attributes: Default::default(), - }) - } - - /// Returns a Schema::Array with the given items and custom attributes. - pub fn array_with_attributes(items: Schema, attributes: BTreeMap) -> Self { - Schema::Array(ArraySchema { - items: Box::new(items), - attributes, - }) - } -} - -impl Parser { - /// Create a `Schema` from a string representing a JSON Avro schema. - fn parse_str(&mut self, input: &str) -> Result { - let value = serde_json::from_str(input).map_err(Error::ParseSchemaJson)?; - self.parse(&value, &None) - } - - /// Create an array of `Schema`'s from an iterator of JSON Avro schemas. It is allowed that - /// the schemas have cross-dependencies; these will be resolved during parsing. - fn parse_list(&mut self) -> Result, Error> { - while !self.input_schemas.is_empty() { - let next_name = self - .input_schemas - .keys() - .next() - .expect("Input schemas unexpectedly empty") - .to_owned(); - let (name, value) = self - .input_schemas - .remove_entry(&next_name) - .expect("Key unexpectedly missing"); - let parsed = self.parse(&value, &None)?; - self.parsed_schemas - .insert(get_schema_type_name(name, value), parsed); - } - - let mut parsed_schemas = Vec::with_capacity(self.parsed_schemas.len()); - for name in self.input_order.drain(0..) { - let parsed = self - .parsed_schemas - .remove(&name) - .expect("One of the input schemas was unexpectedly not parsed"); - parsed_schemas.push(parsed); - } - Ok(parsed_schemas) - } - - /// Create a `Schema` from a `serde_json::Value` representing a JSON Avro - /// schema. - fn parse(&mut self, value: &Value, enclosing_namespace: &Namespace) -> AvroResult { - match *value { - Value::String(ref t) => self.parse_known_schema(t.as_str(), enclosing_namespace), - Value::Object(ref data) => self.parse_complex(data, enclosing_namespace), - Value::Array(ref data) => self.parse_union(data, enclosing_namespace), - _ => Err(Error::ParseSchemaFromValidJson), - } - } - - /// Parse a `serde_json::Value` representing an Avro type whose Schema is known into a - /// `Schema`. A Schema for a `serde_json::Value` is known if it is primitive or has - /// been parsed previously by the parsed and stored in its map of parsed_schemas. - fn parse_known_schema( - &mut self, - name: &str, - enclosing_namespace: &Namespace, - ) -> AvroResult { - match name { - "null" => Ok(Schema::Null), - "boolean" => Ok(Schema::Boolean), - "int" => Ok(Schema::Int), - "long" => Ok(Schema::Long), - "double" => Ok(Schema::Double), - "float" => Ok(Schema::Float), - "bytes" => Ok(Schema::Bytes), - "string" => Ok(Schema::String), - _ => self.fetch_schema_ref(name, enclosing_namespace), - } - } - - /// Given a name, tries to retrieve the parsed schema from `parsed_schemas`. - /// If a parsed schema is not found, it checks if a currently resolving - /// schema with that name exists. - /// If a resolving schema is not found, it checks if a json with that name exists - /// in `input_schemas` and then parses it (removing it from `input_schemas`) - /// and adds the parsed schema to `parsed_schemas`. - /// - /// This method allows schemas definitions that depend on other types to - /// parse their dependencies (or look them up if already parsed). - fn fetch_schema_ref( - &mut self, - name: &str, - enclosing_namespace: &Namespace, - ) -> AvroResult { - fn get_schema_ref(parsed: &Schema) -> Schema { - match &parsed { - Schema::Record(RecordSchema { ref name, .. }) - | Schema::Enum(EnumSchema { ref name, .. }) - | Schema::Fixed(FixedSchema { ref name, .. }) => Schema::Ref { name: name.clone() }, - _ => parsed.clone(), - } - } - - let name = Name::new(name)?; - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - - if self.parsed_schemas.contains_key(&fully_qualified_name) { - return Ok(Schema::Ref { - name: fully_qualified_name, - }); - } - if let Some(resolving_schema) = self.resolving_schemas.get(&fully_qualified_name) { - return Ok(resolving_schema.clone()); - } - - let value = self - .input_schemas - .remove(&fully_qualified_name) - // TODO make a better descriptive error message here that conveys that a named schema cannot be found - .ok_or_else(|| Error::ParsePrimitive(fully_qualified_name.fullname(None)))?; - - // parsing a full schema from inside another schema. Other full schema will not inherit namespace - let parsed = self.parse(&value, &None)?; - self.parsed_schemas - .insert(get_schema_type_name(name, value), parsed.clone()); - - Ok(get_schema_ref(&parsed)) - } - - fn parse_precision_and_scale( - complex: &Map, - ) -> Result<(Precision, Scale), Error> { - fn get_decimal_integer( - complex: &Map, - key: &'static str, - ) -> Result { - match complex.get(key) { - Some(Value::Number(value)) => parse_json_integer_for_decimal(value), - None => { - if key == "scale" { - Ok(0) - } else { - Err(Error::GetDecimalMetadataFromJson(key)) - } - } - Some(value) => Err(Error::GetDecimalMetadataValueFromJson { - key: key.into(), - value: value.clone(), - }), - } - } - let precision = get_decimal_integer(complex, "precision")?; - let scale = get_decimal_integer(complex, "scale")?; - - if precision < 1 { - return Err(Error::DecimalPrecisionMuBePositive { precision }); - } - - if precision < scale { - Err(Error::DecimalPrecisionLessThanScale { precision, scale }) - } else { - Ok((precision, scale)) - } - } - - /// Parse a `serde_json::Value` representing a complex Avro type into a - /// `Schema`. - /// - /// Avro supports "recursive" definition of types. - /// e.g: {"type": {"type": "string"}} - fn parse_complex( - &mut self, - complex: &Map, - enclosing_namespace: &Namespace, - ) -> AvroResult { - // Try to parse this as a native complex type. - fn parse_as_native_complex( - complex: &Map, - parser: &mut Parser, - enclosing_namespace: &Namespace, - ) -> AvroResult { - match complex.get("type") { - Some(value) => match value { - Value::String(s) if s == "fixed" => { - parser.parse_fixed(complex, enclosing_namespace) - } - _ => parser.parse(value, enclosing_namespace), - }, - None => Err(Error::GetLogicalTypeField), - } - } - - // This crate support some logical types natively, and this function tries to convert - // a native complex type with a logical type attribute to these logical types. - // This function: - // 1. Checks whether the native complex type is in the supported kinds. - // 2. If it is, using the convert function to convert the native complex type to - // a logical type. - fn try_convert_to_logical_type( - logical_type: &str, - schema: Schema, - supported_schema_kinds: &[SchemaKind], - convert: F, - ) -> AvroResult - where - F: Fn(Schema) -> AvroResult, - { - let kind = SchemaKind::from(schema.clone()); - if supported_schema_kinds.contains(&kind) { - convert(schema) - } else { - warn!( - "Ignoring unknown logical type '{}' for schema of type: {:?}!", - logical_type, schema - ); - Ok(schema) - } - } - - match complex.get("logicalType") { - Some(Value::String(t)) => match t.as_str() { - "decimal" => { - return try_convert_to_logical_type( - "decimal", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Fixed, SchemaKind::Bytes], - |inner| -> AvroResult { - match Self::parse_precision_and_scale(complex) { - Ok((precision, scale)) => Ok(Schema::Decimal(DecimalSchema { - precision, - scale, - inner: Box::new(inner), - })), - Err(err) => { - warn!("Ignoring invalid decimal logical type: {}", err); - Ok(inner) - } - } - }, - ); - } - "big-decimal" => { - return try_convert_to_logical_type( - "big-decimal", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Bytes], - |_| -> AvroResult { Ok(Schema::BigDecimal) }, - ); - } - "uuid" => { - return try_convert_to_logical_type( - "uuid", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::String, SchemaKind::Fixed], - |schema| match schema { - Schema::String => Ok(Schema::Uuid), - Schema::Fixed(FixedSchema { size: 16, .. }) => Ok(Schema::Uuid), - Schema::Fixed(FixedSchema { size, .. }) => { - warn!("Ignoring uuid logical type for a Fixed schema because its size ({size:?}) is not 16! Schema: {:?}", schema); - Ok(schema) - } - _ => { - warn!( - "Ignoring invalid uuid logical type for schema: {:?}", - schema - ); - Ok(schema) - } - }, - ); - } - "date" => { - return try_convert_to_logical_type( - "date", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Int], - |_| -> AvroResult { Ok(Schema::Date) }, - ); - } - "time-millis" => { - return try_convert_to_logical_type( - "date", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Int], - |_| -> AvroResult { Ok(Schema::TimeMillis) }, - ); - } - "time-micros" => { - return try_convert_to_logical_type( - "time-micros", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Long], - |_| -> AvroResult { Ok(Schema::TimeMicros) }, - ); - } - "timestamp-millis" => { - return try_convert_to_logical_type( - "timestamp-millis", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Long], - |_| -> AvroResult { Ok(Schema::TimestampMillis) }, - ); - } - "timestamp-micros" => { - return try_convert_to_logical_type( - "timestamp-micros", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Long], - |_| -> AvroResult { Ok(Schema::TimestampMicros) }, - ); - } - "timestamp-nanos" => { - return try_convert_to_logical_type( - "timestamp-nanos", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Long], - |_| -> AvroResult { Ok(Schema::TimestampNanos) }, - ); - } - "local-timestamp-millis" => { - return try_convert_to_logical_type( - "local-timestamp-millis", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Long], - |_| -> AvroResult { Ok(Schema::LocalTimestampMillis) }, - ); - } - "local-timestamp-micros" => { - return try_convert_to_logical_type( - "local-timestamp-micros", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Long], - |_| -> AvroResult { Ok(Schema::LocalTimestampMicros) }, - ); - } - "local-timestamp-nanos" => { - return try_convert_to_logical_type( - "local-timestamp-nanos", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Long], - |_| -> AvroResult { Ok(Schema::LocalTimestampNanos) }, - ); - } - "duration" => { - return try_convert_to_logical_type( - "duration", - parse_as_native_complex(complex, self, enclosing_namespace)?, - &[SchemaKind::Fixed], - |_| -> AvroResult { Ok(Schema::Duration) }, - ); - } - // In this case, of an unknown logical type, we just pass through the underlying - // type. - _ => {} - }, - // The spec says to ignore invalid logical types and just pass through the - // underlying type. It is unclear whether that applies to this case or not, where the - // `logicalType` is not a string. - Some(value) => return Err(Error::GetLogicalTypeFieldType(value.clone())), - _ => {} - } - match complex.get("type") { - Some(Value::String(t)) => match t.as_str() { - "record" => self.parse_record(complex, enclosing_namespace), - "enum" => self.parse_enum(complex, enclosing_namespace), - "array" => self.parse_array(complex, enclosing_namespace), - "map" => self.parse_map(complex, enclosing_namespace), - "fixed" => self.parse_fixed(complex, enclosing_namespace), - other => self.parse_known_schema(other, enclosing_namespace), - }, - Some(Value::Object(data)) => self.parse_complex(data, enclosing_namespace), - Some(Value::Array(variants)) => self.parse_union(variants, enclosing_namespace), - Some(unknown) => Err(Error::GetComplexType(unknown.clone())), - None => Err(Error::GetComplexTypeField), - } - } - - fn register_resolving_schema(&mut self, name: &Name, aliases: &Aliases) { - let resolving_schema = Schema::Ref { name: name.clone() }; - self.resolving_schemas - .insert(name.clone(), resolving_schema.clone()); - - let namespace = &name.namespace; - - if let Some(ref aliases) = aliases { - aliases.iter().for_each(|alias| { - let alias_fullname = alias.fully_qualified_name(namespace); - self.resolving_schemas - .insert(alias_fullname, resolving_schema.clone()); - }); - } - } - - fn register_parsed_schema( - &mut self, - fully_qualified_name: &Name, - schema: &Schema, - aliases: &Aliases, - ) { - // FIXME, this should be globally aware, so if there is something overwriting something - // else then there is an ambiguous schema definition. An appropriate error should be thrown - self.parsed_schemas - .insert(fully_qualified_name.clone(), schema.clone()); - self.resolving_schemas.remove(fully_qualified_name); - - let namespace = &fully_qualified_name.namespace; - - if let Some(ref aliases) = aliases { - aliases.iter().for_each(|alias| { - let alias_fullname = alias.fully_qualified_name(namespace); - self.resolving_schemas.remove(&alias_fullname); - self.parsed_schemas.insert(alias_fullname, schema.clone()); - }); - } - } - - /// Returns already parsed schema or a schema that is currently being resolved. - fn get_already_seen_schema( - &self, - complex: &Map, - enclosing_namespace: &Namespace, - ) -> Option<&Schema> { - match complex.get("type") { - Some(Value::String(ref typ)) => { - let name = Name::new(typ.as_str()) - .unwrap() - .fully_qualified_name(enclosing_namespace); - self.resolving_schemas - .get(&name) - .or_else(|| self.parsed_schemas.get(&name)) - } - _ => None, - } - } - - /// Parse a `serde_json::Value` representing a Avro record type into a - /// `Schema`. - fn parse_record( - &mut self, - complex: &Map, - enclosing_namespace: &Namespace, - ) -> AvroResult { - let fields_opt = complex.get("fields"); - - if fields_opt.is_none() { - if let Some(seen) = self.get_already_seen_schema(complex, enclosing_namespace) { - return Ok(seen.clone()); - } - } - - let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; - let aliases = fix_aliases_namespace(complex.aliases(), &fully_qualified_name.namespace); - - let mut lookup = BTreeMap::new(); - - self.register_resolving_schema(&fully_qualified_name, &aliases); - - debug!("Going to parse record schema: {:?}", &fully_qualified_name); - - let fields: Vec = fields_opt - .and_then(|fields| fields.as_array()) - .ok_or(Error::GetRecordFieldsJson) - .and_then(|fields| { - fields - .iter() - .filter_map(|field| field.as_object()) - .enumerate() - .map(|(position, field)| { - RecordField::parse(field, position, self, &fully_qualified_name) - }) - .collect::>() - })?; - - for field in &fields { - if let Some(_old) = lookup.insert(field.name.clone(), field.position) { - return Err(Error::FieldNameDuplicate(field.name.clone())); - } - - if let Some(ref field_aliases) = field.aliases { - for alias in field_aliases { - lookup.insert(alias.clone(), field.position); - } - } - } - - let schema = Schema::Record(RecordSchema { - name: fully_qualified_name.clone(), - aliases: aliases.clone(), - doc: complex.doc(), - fields, - lookup, - attributes: self.get_custom_attributes(complex, vec!["fields"]), - }); - - self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); - Ok(schema) - } - - fn get_custom_attributes( - &self, - complex: &Map, - excluded: Vec<&'static str>, - ) -> BTreeMap { - let mut custom_attributes: BTreeMap = BTreeMap::new(); - for (key, value) in complex { - match key.as_str() { - "type" | "name" | "namespace" | "doc" | "aliases" => continue, - candidate if excluded.contains(&candidate) => continue, - _ => custom_attributes.insert(key.clone(), value.clone()), - }; - } - custom_attributes - } - - /// Parse a `serde_json::Value` representing a Avro enum type into a - /// `Schema`. - fn parse_enum( - &mut self, - complex: &Map, - enclosing_namespace: &Namespace, - ) -> AvroResult { - let symbols_opt = complex.get("symbols"); - - if symbols_opt.is_none() { - if let Some(seen) = self.get_already_seen_schema(complex, enclosing_namespace) { - return Ok(seen.clone()); - } - } - - let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; - let aliases = fix_aliases_namespace(complex.aliases(), &fully_qualified_name.namespace); - - let symbols: Vec = symbols_opt - .and_then(|v| v.as_array()) - .ok_or(Error::GetEnumSymbolsField) - .and_then(|symbols| { - symbols - .iter() - .map(|symbol| symbol.as_str().map(|s| s.to_string())) - .collect::>() - .ok_or(Error::GetEnumSymbols) - })?; - - let mut existing_symbols: HashSet<&String> = HashSet::with_capacity(symbols.len()); - for symbol in symbols.iter() { - validate_enum_symbol_name(symbol)?; - - // Ensure there are no duplicate symbols - if existing_symbols.contains(&symbol) { - return Err(Error::EnumSymbolDuplicate(symbol.to_string())); - } - - existing_symbols.insert(symbol); - } - - let mut default: Option = None; - if let Some(value) = complex.get("default") { - if let Value::String(ref s) = *value { - default = Some(s.clone()); - } else { - return Err(Error::EnumDefaultWrongType(value.clone())); - } - } - - if let Some(ref value) = default { - let resolved = types::Value::from(value.clone()) - .resolve_enum(&symbols, &Some(value.to_string()), &None) - .is_ok(); - if !resolved { - return Err(Error::GetEnumDefault { - symbol: value.to_string(), - symbols, - }); - } - } - - let schema = Schema::Enum(EnumSchema { - name: fully_qualified_name.clone(), - aliases: aliases.clone(), - doc: complex.doc(), - symbols, - default, - attributes: self.get_custom_attributes(complex, vec!["symbols"]), - }); - - self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); - - Ok(schema) - } - - /// Parse a `serde_json::Value` representing a Avro array type into a - /// `Schema`. - fn parse_array( - &mut self, - complex: &Map, - enclosing_namespace: &Namespace, - ) -> AvroResult { - complex - .get("items") - .ok_or(Error::GetArrayItemsField) - .and_then(|items| self.parse(items, enclosing_namespace)) - .map(|items| { - Schema::array_with_attributes( - items, - self.get_custom_attributes(complex, vec!["items"]), - ) - }) - } - - /// Parse a `serde_json::Value` representing a Avro map type into a - /// `Schema`. - fn parse_map( - &mut self, - complex: &Map, - enclosing_namespace: &Namespace, - ) -> AvroResult { - complex - .get("values") - .ok_or(Error::GetMapValuesField) - .and_then(|items| self.parse(items, enclosing_namespace)) - .map(|items| { - Schema::map_with_attributes( - items, - self.get_custom_attributes(complex, vec!["values"]), - ) - }) - } - - /// Parse a `serde_json::Value` representing a Avro union type into a - /// `Schema`. - fn parse_union( - &mut self, - items: &[Value], - enclosing_namespace: &Namespace, - ) -> AvroResult { - items - .iter() - .map(|v| self.parse(v, enclosing_namespace)) - .collect::, _>>() - .and_then(|schemas| { - if schemas.is_empty() { - error!( - "Union schemas should have at least two members! \ - Please enable debug logging to find out which Record schema \ - declares the union with 'RUST_LOG=apache_avro::schema=debug'." - ); - } else if schemas.len() == 1 { - warn!( - "Union schema with just one member! Consider dropping the union! \ - Please enable debug logging to find out which Record schema \ - declares the union with 'RUST_LOG=apache_avro::schema=debug'." - ); - } - Ok(Schema::Union(UnionSchema::new(schemas)?)) - }) - } - - /// Parse a `serde_json::Value` representing a Avro fixed type into a - /// `Schema`. - fn parse_fixed( - &mut self, - complex: &Map, - enclosing_namespace: &Namespace, - ) -> AvroResult { - let size_opt = complex.get("size"); - if size_opt.is_none() { - if let Some(seen) = self.get_already_seen_schema(complex, enclosing_namespace) { - return Ok(seen.clone()); - } - } - - let doc = complex.get("doc").and_then(|v| match &v { - Value::String(ref docstr) => Some(docstr.clone()), - _ => None, - }); - - let size = match size_opt { - Some(size) => size - .as_u64() - .ok_or_else(|| Error::GetFixedSizeFieldPositive(size.clone())), - None => Err(Error::GetFixedSizeField), - }?; - - let default = complex.get("default").and_then(|v| match &v { - Value::String(ref default) => Some(default.clone()), - _ => None, - }); - - if default.is_some() { - let len = default.clone().unwrap().len(); - if len != size as usize { - return Err(Error::FixedDefaultLenSizeMismatch(len, size)); - } - } - - let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; - let aliases = fix_aliases_namespace(complex.aliases(), &fully_qualified_name.namespace); - - let schema = Schema::Fixed(FixedSchema { - name: fully_qualified_name.clone(), - aliases: aliases.clone(), - doc, - size: size as usize, - default, - attributes: self.get_custom_attributes(complex, vec!["size"]), - }); - - self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); - - Ok(schema) - } -} - -// A type alias may be specified either as a fully namespace-qualified, or relative -// to the namespace of the name it is an alias for. For example, if a type named "a.b" -// has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" -// and "x.y". -// https://avro.apache.org/docs/current/specification/#aliases -fn fix_aliases_namespace(aliases: Option>, namespace: &Namespace) -> Aliases { - aliases.map(|aliases| { - aliases - .iter() - .map(|alias| { - if alias.find('.').is_none() { - match namespace { - Some(ref ns) => format!("{ns}.{alias}"), - None => alias.clone(), - } - } else { - alias.clone() - } - }) - .map(|alias| Alias::new(alias.as_str()).unwrap()) - .collect() - }) -} - -fn get_schema_type_name(name: Name, value: Value) -> Name { - match value.get("type") { - Some(Value::Object(complex_type)) => match complex_type.name() { - Some(name) => Name::new(name.as_str()).unwrap(), - _ => name, - }, - _ => name, - } -} - -impl Serialize for Schema { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match *self { - Schema::Ref { ref name } => serializer.serialize_str(&name.fullname(None)), - Schema::Null => serializer.serialize_str("null"), - Schema::Boolean => serializer.serialize_str("boolean"), - Schema::Int => serializer.serialize_str("int"), - Schema::Long => serializer.serialize_str("long"), - Schema::Float => serializer.serialize_str("float"), - Schema::Double => serializer.serialize_str("double"), - Schema::Bytes => serializer.serialize_str("bytes"), - Schema::String => serializer.serialize_str("string"), - Schema::Array(ref inner) => { - let mut map = serializer.serialize_map(Some(2 + inner.attributes.len()))?; - map.serialize_entry("type", "array")?; - map.serialize_entry("items", &*inner.items.clone())?; - for attr in &inner.attributes { - map.serialize_entry(attr.0, attr.1)?; - } - map.end() - } - Schema::Map(ref inner) => { - let mut map = serializer.serialize_map(Some(2 + inner.attributes.len()))?; - map.serialize_entry("type", "map")?; - map.serialize_entry("values", &*inner.types.clone())?; - for attr in &inner.attributes { - map.serialize_entry(attr.0, attr.1)?; - } - map.end() - } - Schema::Union(ref inner) => { - let variants = inner.variants(); - let mut seq = serializer.serialize_seq(Some(variants.len()))?; - for v in variants { - seq.serialize_element(v)?; - } - seq.end() - } - Schema::Record(RecordSchema { - ref name, - ref aliases, - ref doc, - ref fields, - ref attributes, - .. - }) => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "record")?; - if let Some(ref n) = name.namespace { - map.serialize_entry("namespace", n)?; - } - map.serialize_entry("name", &name.name)?; - if let Some(ref docstr) = doc { - map.serialize_entry("doc", docstr)?; - } - if let Some(ref aliases) = aliases { - map.serialize_entry("aliases", aliases)?; - } - map.serialize_entry("fields", fields)?; - for attr in attributes { - map.serialize_entry(attr.0, attr.1)?; - } - map.end() - } - Schema::Enum(EnumSchema { - ref name, - ref symbols, - ref aliases, - ref attributes, - .. - }) => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "enum")?; - if let Some(ref n) = name.namespace { - map.serialize_entry("namespace", n)?; - } - map.serialize_entry("name", &name.name)?; - map.serialize_entry("symbols", symbols)?; - - if let Some(ref aliases) = aliases { - map.serialize_entry("aliases", aliases)?; - } - for attr in attributes { - map.serialize_entry(attr.0, attr.1)?; - } - map.end() - } - Schema::Fixed(ref fixed_schema) => { - let mut map = serializer.serialize_map(None)?; - map = fixed_schema.serialize_to_map::(map)?; - map.end() - } - Schema::Decimal(DecimalSchema { - ref scale, - ref precision, - ref inner, - }) => { - let mut map = serializer.serialize_map(None)?; - match inner.as_ref() { - Schema::Fixed(fixed_schema) => { - map = fixed_schema.serialize_to_map::(map)?; - } - Schema::Bytes => { - map.serialize_entry("type", "bytes")?; - } - others => { - return Err(serde::ser::Error::custom(format!( - "DecimalSchema inner type must be Fixed or Bytes, got {:?}", - SchemaKind::from(others) - ))); - } - } - map.serialize_entry("logicalType", "decimal")?; - map.serialize_entry("scale", scale)?; - map.serialize_entry("precision", precision)?; - map.end() - } - - Schema::BigDecimal => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "bytes")?; - map.serialize_entry("logicalType", "big-decimal")?; - map.end() - } - Schema::Uuid => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "string")?; - map.serialize_entry("logicalType", "uuid")?; - map.end() - } - Schema::Date => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "int")?; - map.serialize_entry("logicalType", "date")?; - map.end() - } - Schema::TimeMillis => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "int")?; - map.serialize_entry("logicalType", "time-millis")?; - map.end() - } - Schema::TimeMicros => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "long")?; - map.serialize_entry("logicalType", "time-micros")?; - map.end() - } - Schema::TimestampMillis => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "long")?; - map.serialize_entry("logicalType", "timestamp-millis")?; - map.end() - } - Schema::TimestampMicros => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "long")?; - map.serialize_entry("logicalType", "timestamp-micros")?; - map.end() - } - Schema::TimestampNanos => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "long")?; - map.serialize_entry("logicalType", "timestamp-nanos")?; - map.end() - } - Schema::LocalTimestampMillis => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "long")?; - map.serialize_entry("logicalType", "local-timestamp-millis")?; - map.end() - } - Schema::LocalTimestampMicros => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "long")?; - map.serialize_entry("logicalType", "local-timestamp-micros")?; - map.end() - } - Schema::LocalTimestampNanos => { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("type", "long")?; - map.serialize_entry("logicalType", "local-timestamp-nanos")?; - map.end() - } - Schema::Duration => { - let mut map = serializer.serialize_map(None)?; - - // the Avro doesn't indicate what the name of the underlying fixed type of a - // duration should be or typically is. - let inner = Schema::Fixed(FixedSchema { - name: Name::new("duration").unwrap(), - aliases: None, - doc: None, - size: 12, - default: None, - attributes: Default::default(), - }); - map.serialize_entry("type", &inner)?; - map.serialize_entry("logicalType", "duration")?; - map.end() - } - } - } -} - -impl Serialize for RecordField { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut map = serializer.serialize_map(None)?; - map.serialize_entry("name", &self.name)?; - map.serialize_entry("type", &self.schema)?; - - if let Some(ref default) = self.default { - map.serialize_entry("default", default)?; - } - - if let Some(ref aliases) = self.aliases { - map.serialize_entry("aliases", aliases)?; - } - - for attr in &self.custom_attributes { - map.serialize_entry(attr.0, attr.1)?; - } - - map.end() - } -} - -/// Parses a **valid** avro schema into the Parsing Canonical Form. -/// https://avro.apache.org/docs/current/specification/#parsing-canonical-form-for-schemas -fn parsing_canonical_form(schema: &Value) -> String { - match schema { - Value::Object(map) => pcf_map(map), - Value::String(s) => pcf_string(s), - Value::Array(v) => pcf_array(v), - json => panic!("got invalid JSON value for canonical form of schema: {json}"), - } -} - -fn pcf_map(schema: &Map) -> String { - // Look for the namespace variant up front. - let ns = schema.get("namespace").and_then(|v| v.as_str()); - let typ = schema.get("type").and_then(|v| v.as_str()); - let mut fields = Vec::new(); - for (k, v) in schema { - // Reduce primitive types to their simple form. ([PRIMITIVE] rule) - if schema.len() == 1 && k == "type" { - // Invariant: function is only callable from a valid schema, so this is acceptable. - if let Value::String(s) = v { - return pcf_string(s); - } - } - - // Strip out unused fields ([STRIP] rule) - if field_ordering_position(k).is_none() - || k == "default" - || k == "doc" - || k == "aliases" - || k == "logicalType" - { - continue; - } - - // Fully qualify the name, if it isn't already ([FULLNAMES] rule). - if k == "name" { - // Invariant: Only valid schemas. Must be a string. - let name = v.as_str().unwrap(); - let n = match ns { - Some(namespace) if is_named_type(typ) && !name.contains('.') => { - Cow::Owned(format!("{namespace}.{name}")) - } - _ => Cow::Borrowed(name), - }; - - fields.push((k, format!("{}:{}", pcf_string(k), pcf_string(&n)))); - continue; - } - - // Strip off quotes surrounding "size" type, if they exist ([INTEGERS] rule). - if k == "size" || k == "precision" || k == "scale" { - let i = match v.as_str() { - Some(s) => s.parse::().expect("Only valid schemas are accepted!"), - None => v.as_i64().unwrap(), - }; - fields.push((k, format!("{}:{}", pcf_string(k), i))); - continue; - } - - // For anything else, recursively process the result. - fields.push(( - k, - format!("{}:{}", pcf_string(k), parsing_canonical_form(v)), - )); - } - - // Sort the fields by their canonical ordering ([ORDER] rule). - fields.sort_unstable_by_key(|(k, _)| field_ordering_position(k).unwrap()); - let inter = fields - .into_iter() - .map(|(_, v)| v) - .collect::>() - .join(","); - format!("{{{inter}}}") -} - -fn is_named_type(typ: Option<&str>) -> bool { - matches!( - typ, - Some("record") | Some("enum") | Some("fixed") | Some("ref") - ) -} - -fn pcf_array(arr: &[Value]) -> String { - let inter = arr - .iter() - .map(parsing_canonical_form) - .collect::>() - .join(","); - format!("[{inter}]") -} - -fn pcf_string(s: &str) -> String { - format!("\"{s}\"") -} - -const RESERVED_FIELDS: &[&str] = &[ - "name", - "type", - "fields", - "symbols", - "items", - "values", - "size", - "logicalType", - "order", - "doc", - "aliases", - "default", - "precision", - "scale", -]; - -// Used to define the ordering and inclusion of fields. -fn field_ordering_position(field: &str) -> Option { - RESERVED_FIELDS - .iter() - .position(|&f| f == field) - .map(|pos| pos + 1) -} - -/// Trait for types that serve as an Avro data model. Derive implementation available -/// through `derive` feature. Do not implement directly! -/// Implement `apache_avro::schema::derive::AvroSchemaComponent` to get this trait -/// through a blanket implementation. -pub trait AvroSchema { - fn get_schema() -> Schema; -} - -#[cfg(feature = "derive")] -pub mod derive { - use super::*; - - /// Trait for types that serve as fully defined components inside an Avro data model. Derive - /// implementation available through `derive` feature. This is what is implemented by - /// the `derive(AvroSchema)` macro. - /// - /// # Implementation guide - /// - ///### Simple implementation - /// To construct a non named simple schema, it is possible to ignore the input argument making the - /// general form implementation look like - /// ```ignore - /// impl AvroSchemaComponent for AType { - /// fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { - /// Schema::? - /// } - ///} - /// ``` - /// ### Passthrough implementation - /// To construct a schema for a Type that acts as in "inner" type, such as for smart pointers, simply - /// pass through the arguments to the inner type - /// ```ignore - /// impl AvroSchemaComponent for PassthroughType { - /// fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { - /// InnerType::get_schema_in_ctxt(names, enclosing_namespace) - /// } - ///} - /// ``` - ///### Complex implementation - /// To implement this for Named schema there is a general form needed to avoid creating invalid - /// schemas or infinite loops. - /// ```ignore - /// impl AvroSchemaComponent for ComplexType { - /// fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) -> Schema { - /// // Create the fully qualified name for your type given the enclosing namespace - /// let name = apache_avro::schema::Name::new("MyName") - /// .expect("Unable to parse schema name") - /// .fully_qualified_name(enclosing_namespace); - /// let enclosing_namespace = &name.namespace; - /// // Check, if your name is already defined, and if so, return a ref to that name - /// if named_schemas.contains_key(&name) { - /// apache_avro::schema::Schema::Ref{name: name.clone()} - /// } else { - /// named_schemas.insert(name.clone(), apache_avro::schema::Schema::Ref{name: name.clone()}); - /// // YOUR SCHEMA DEFINITION HERE with the name equivalent to "MyName". - /// // For non-simple sub types delegate to their implementation of AvroSchemaComponent - /// } - /// } - ///} - /// ``` - pub trait AvroSchemaComponent { - fn get_schema_in_ctxt(named_schemas: &mut Names, enclosing_namespace: &Namespace) - -> Schema; - } - - impl AvroSchema for T - where - T: AvroSchemaComponent, - { - fn get_schema() -> Schema { - T::get_schema_in_ctxt(&mut HashMap::default(), &None) - } - } - - macro_rules! impl_schema ( - ($type:ty, $variant_constructor:expr) => ( - impl AvroSchemaComponent for $type { - fn get_schema_in_ctxt(_: &mut Names, _: &Namespace) -> Schema { - $variant_constructor - } - } - ); - ); - - impl_schema!(bool, Schema::Boolean); - impl_schema!(i8, Schema::Int); - impl_schema!(i16, Schema::Int); - impl_schema!(i32, Schema::Int); - impl_schema!(i64, Schema::Long); - impl_schema!(u8, Schema::Int); - impl_schema!(u16, Schema::Int); - impl_schema!(u32, Schema::Long); - impl_schema!(f32, Schema::Float); - impl_schema!(f64, Schema::Double); - impl_schema!(String, Schema::String); - impl_schema!(uuid::Uuid, Schema::Uuid); - impl_schema!(core::time::Duration, Schema::Duration); - - impl AvroSchemaComponent for Vec - where - T: AvroSchemaComponent, - { - fn get_schema_in_ctxt( - named_schemas: &mut Names, - enclosing_namespace: &Namespace, - ) -> Schema { - Schema::array(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) - } - } - - impl AvroSchemaComponent for Option - where - T: AvroSchemaComponent, - { - fn get_schema_in_ctxt( - named_schemas: &mut Names, - enclosing_namespace: &Namespace, - ) -> Schema { - let inner_schema = T::get_schema_in_ctxt(named_schemas, enclosing_namespace); - Schema::Union(UnionSchema { - schemas: vec![Schema::Null, inner_schema.clone()], - variant_index: vec![Schema::Null, inner_schema] - .iter() - .enumerate() - .map(|(idx, s)| (SchemaKind::from(s), idx)) - .collect(), - }) - } - } - - impl AvroSchemaComponent for Map - where - T: AvroSchemaComponent, - { - fn get_schema_in_ctxt( - named_schemas: &mut Names, - enclosing_namespace: &Namespace, - ) -> Schema { - Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) - } - } - - impl AvroSchemaComponent for HashMap - where - T: AvroSchemaComponent, - { - fn get_schema_in_ctxt( - named_schemas: &mut Names, - enclosing_namespace: &Namespace, - ) -> Schema { - Schema::map(T::get_schema_in_ctxt(named_schemas, enclosing_namespace)) - } - } - - impl AvroSchemaComponent for Box - where - T: AvroSchemaComponent, - { - fn get_schema_in_ctxt( - named_schemas: &mut Names, - enclosing_namespace: &Namespace, - ) -> Schema { - T::get_schema_in_ctxt(named_schemas, enclosing_namespace) - } - } - - impl AvroSchemaComponent for std::sync::Mutex - where - T: AvroSchemaComponent, - { - fn get_schema_in_ctxt( - named_schemas: &mut Names, - enclosing_namespace: &Namespace, - ) -> Schema { - T::get_schema_in_ctxt(named_schemas, enclosing_namespace) - } - } - - impl AvroSchemaComponent for Cow<'_, T> - where - T: AvroSchemaComponent + Clone, - { - fn get_schema_in_ctxt( - named_schemas: &mut Names, - enclosing_namespace: &Namespace, - ) -> Schema { - T::get_schema_in_ctxt(named_schemas, enclosing_namespace) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::rabin::Rabin; - use apache_avro_test_helper::{ - logger::{assert_logged, assert_not_logged}, - TestResult, - }; - use serde_json::json; - - #[test] - fn test_invalid_schema() { - assert!(Schema::parse_str("invalid").is_err()); - } - - #[test] - fn test_primitive_schema() -> TestResult { - assert_eq!(Schema::Null, Schema::parse_str("\"null\"")?); - assert_eq!(Schema::Int, Schema::parse_str("\"int\"")?); - assert_eq!(Schema::Double, Schema::parse_str("\"double\"")?); - Ok(()) - } - - #[test] - fn test_array_schema() -> TestResult { - let schema = Schema::parse_str(r#"{"type": "array", "items": "string"}"#)?; - assert_eq!(Schema::array(Schema::String), schema); - Ok(()) - } - - #[test] - fn test_map_schema() -> TestResult { - let schema = Schema::parse_str(r#"{"type": "map", "values": "double"}"#)?; - assert_eq!(Schema::map(Schema::Double), schema); - Ok(()) - } - - #[test] - fn test_union_schema() -> TestResult { - let schema = Schema::parse_str(r#"["null", "int"]"#)?; - assert_eq!( - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), - schema - ); - Ok(()) - } - - #[test] - fn test_union_unsupported_schema() { - let schema = Schema::parse_str(r#"["null", ["null", "int"], "string"]"#); - assert!(schema.is_err()); - } - - #[test] - fn test_multi_union_schema() -> TestResult { - let schema = Schema::parse_str(r#"["null", "int", "float", "string", "bytes"]"#); - assert!(schema.is_ok()); - let schema = schema?; - assert_eq!(SchemaKind::from(&schema), SchemaKind::Union); - let union_schema = match schema { - Schema::Union(u) => u, - _ => unreachable!(), - }; - assert_eq!(union_schema.variants().len(), 5); - let mut variants = union_schema.variants().iter(); - assert_eq!(SchemaKind::from(variants.next().unwrap()), SchemaKind::Null); - assert_eq!(SchemaKind::from(variants.next().unwrap()), SchemaKind::Int); - assert_eq!( - SchemaKind::from(variants.next().unwrap()), - SchemaKind::Float - ); - assert_eq!( - SchemaKind::from(variants.next().unwrap()), - SchemaKind::String - ); - assert_eq!( - SchemaKind::from(variants.next().unwrap()), - SchemaKind::Bytes - ); - assert_eq!(variants.next(), None); - - Ok(()) - } - - #[test] - fn test_avro_3621_nullable_record_field() -> TestResult { - let nullable_record_field = RecordField { - name: "next".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Union(UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, - }, - ])?), - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }; - - assert!(nullable_record_field.is_nullable()); - - let non_nullable_record_field = RecordField { - name: "next".to_string(), - doc: None, - default: Some(json!(2)), - aliases: None, - schema: Schema::Long, - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }; - - assert!(!non_nullable_record_field.is_nullable()); - Ok(()) - } - - // AVRO-3248 - #[test] - fn test_union_of_records() -> TestResult { - use std::iter::FromIterator; - - // A and B are the same except the name. - let schema_str_a = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - - let schema_str_b = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - - // we get Error::GetNameField if we put ["A", "B"] directly here. - let schema_str_c = r#"{ - "name": "C", - "type": "record", - "fields": [ - {"name": "field_one", "type": ["A", "B"]} - ] - }"#; - - let schema_c = Schema::parse_list(&[schema_str_a, schema_str_b, schema_str_c])? - .last() - .unwrap() - .clone(); - - let schema_c_expected = Schema::Record(RecordSchema { - name: Name::new("C")?, - aliases: None, - doc: None, - fields: vec![RecordField { - name: "field_one".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Union(UnionSchema::new(vec![ - Schema::Ref { - name: Name::new("A")?, - }, - Schema::Ref { - name: Name::new("B")?, - }, - ])?), - order: RecordFieldOrder::Ignore, - position: 0, - custom_attributes: Default::default(), - }], - lookup: BTreeMap::from_iter(vec![("field_one".to_string(), 0)]), - attributes: Default::default(), - }); - - assert_eq!(schema_c, schema_c_expected); - Ok(()) - } - - #[test] - fn avro_3584_test_recursion_records() -> TestResult { - // A and B are the same except the name. - let schema_str_a = r#"{ - "name": "A", - "type": "record", - "fields": [ {"name": "field_one", "type": "B"} ] - }"#; - - let schema_str_b = r#"{ - "name": "B", - "type": "record", - "fields": [ {"name": "field_one", "type": "A"} ] - }"#; - - let list = Schema::parse_list(&[schema_str_a, schema_str_b])?; - - let schema_a = list.first().unwrap().clone(); - - match schema_a { - Schema::Record(RecordSchema { fields, .. }) => { - let f1 = fields.first(); - - let ref_schema = Schema::Ref { - name: Name::new("B")?, - }; - assert_eq!(ref_schema, f1.unwrap().schema); - } - _ => panic!("Expected a record schema!"), - } - - Ok(()) - } - - #[test] - fn test_avro_3248_nullable_record() -> TestResult { - use std::iter::FromIterator; - - let schema_str_a = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - - // we get Error::GetNameField if we put ["null", "B"] directly here. - let schema_str_option_a = r#"{ - "name": "OptionA", - "type": "record", - "fields": [ - {"name": "field_one", "type": ["null", "A"], "default": null} - ] - }"#; - - let schema_option_a = Schema::parse_list(&[schema_str_a, schema_str_option_a])? - .last() - .unwrap() - .clone(); - - let schema_option_a_expected = Schema::Record(RecordSchema { - name: Name::new("OptionA")?, - aliases: None, - doc: None, - fields: vec![RecordField { - name: "field_one".to_string(), - doc: None, - default: Some(Value::Null), - aliases: None, - schema: Schema::Union(UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name::new("A")?, - }, - ])?), - order: RecordFieldOrder::Ignore, - position: 0, - custom_attributes: Default::default(), - }], - lookup: BTreeMap::from_iter(vec![("field_one".to_string(), 0)]), - attributes: Default::default(), - }); - - assert_eq!(schema_option_a, schema_option_a_expected); - - Ok(()) - } - - #[test] - fn test_record_schema() -> TestResult { - let parsed = Schema::parse_str( - r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } - "#, - )?; - - let mut lookup = BTreeMap::new(); - lookup.insert("a".to_owned(), 0); - lookup.insert("b".to_owned(), 1); - - let expected = Schema::Record(RecordSchema { - name: Name::new("test")?, - aliases: None, - doc: None, - fields: vec![ - RecordField { - name: "a".to_string(), - doc: None, - default: Some(Value::Number(42i64.into())), - aliases: None, - schema: Schema::Long, - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: Default::default(), - }, - RecordField { - name: "b".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::String, - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }, - ], - lookup, - attributes: Default::default(), - }); - - assert_eq!(parsed, expected); - - Ok(()) - } - - #[test] - fn test_avro_3302_record_schema_with_currently_parsing_schema() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "test", - "fields": [{ - "name": "recordField", - "type": { - "type": "record", - "name": "Node", - "fields": [ - {"name": "label", "type": "string"}, - {"name": "children", "type": {"type": "array", "items": "Node"}} - ] - } - }] - } - "#, - )?; - - let mut lookup = BTreeMap::new(); - lookup.insert("recordField".to_owned(), 0); - - let mut node_lookup = BTreeMap::new(); - node_lookup.insert("children".to_owned(), 1); - node_lookup.insert("label".to_owned(), 0); - - let expected = Schema::Record(RecordSchema { - name: Name::new("test")?, - aliases: None, - doc: None, - fields: vec![RecordField { - name: "recordField".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Record(RecordSchema { - name: Name::new("Node")?, - aliases: None, - doc: None, - fields: vec![ - RecordField { - name: "label".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::String, - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: Default::default(), - }, - RecordField { - name: "children".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::array(Schema::Ref { - name: Name::new("Node")?, - }), - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }, - ], - lookup: node_lookup, - attributes: Default::default(), - }), - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: Default::default(), - }], - lookup, - attributes: Default::default(), - }); - assert_eq!(schema, expected); - - let canonical_form = &schema.canonical_form(); - let expected = r#"{"name":"test","type":"record","fields":[{"name":"recordField","type":{"name":"Node","type":"record","fields":[{"name":"label","type":"string"},{"name":"children","type":{"type":"array","items":"Node"}}]}}]}"#; - assert_eq!(canonical_form, &expected); - - Ok(()) - } - - // https://github.com/flavray/avro-rs/pull/99#issuecomment-1016948451 - #[test] - fn test_parsing_of_recursive_type_enum() -> TestResult { - let schema = r#" - { - "type": "record", - "name": "User", - "namespace": "office", - "fields": [ - { - "name": "details", - "type": [ - { - "type": "record", - "name": "Employee", - "fields": [ - { - "name": "gender", - "type": { - "type": "enum", - "name": "Gender", - "symbols": [ - "male", - "female" - ] - }, - "default": "female" - } - ] - }, - { - "type": "record", - "name": "Manager", - "fields": [ - { - "name": "gender", - "type": "Gender" - } - ] - } - ] - } - ] - } - "#; - - let schema = Schema::parse_str(schema)?; - let schema_str = schema.canonical_form(); - let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"office.Employee","type":"record","fields":[{"name":"gender","type":{"name":"office.Gender","type":"enum","symbols":["male","female"]}}]},{"name":"office.Manager","type":"record","fields":[{"name":"gender","type":"office.Gender"}]}]}]}"#; - assert_eq!(schema_str, expected); - - Ok(()) - } - - #[test] - fn test_parsing_of_recursive_type_fixed() -> TestResult { - let schema = r#" - { - "type": "record", - "name": "User", - "namespace": "office", - "fields": [ - { - "name": "details", - "type": [ - { - "type": "record", - "name": "Employee", - "fields": [ - { - "name": "id", - "type": { - "type": "fixed", - "name": "EmployeeId", - "size": 16 - }, - "default": "female" - } - ] - }, - { - "type": "record", - "name": "Manager", - "fields": [ - { - "name": "id", - "type": "EmployeeId" - } - ] - } - ] - } - ] - } - "#; - - let schema = Schema::parse_str(schema)?; - let schema_str = schema.canonical_form(); - let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"office.Employee","type":"record","fields":[{"name":"id","type":{"name":"office.EmployeeId","type":"fixed","size":16}}]},{"name":"office.Manager","type":"record","fields":[{"name":"id","type":"office.EmployeeId"}]}]}]}"#; - assert_eq!(schema_str, expected); - - Ok(()) - } - - #[test] - fn test_avro_3302_record_schema_with_currently_parsing_schema_aliases() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "LongList", - "aliases": ["LinkedLongs"], - "fields" : [ - {"name": "value", "type": "long"}, - {"name": "next", "type": ["null", "LinkedLongs"]} - ] - } - "#, - )?; - - let mut lookup = BTreeMap::new(); - lookup.insert("value".to_owned(), 0); - lookup.insert("next".to_owned(), 1); - - let expected = Schema::Record(RecordSchema { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, - aliases: Some(vec![Alias::new("LinkedLongs").unwrap()]), - doc: None, - fields: vec![ - RecordField { - name: "value".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Long, - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: Default::default(), - }, - RecordField { - name: "next".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Union(UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, - }, - ])?), - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }, - ], - lookup, - attributes: Default::default(), - }); - assert_eq!(schema, expected); - - let canonical_form = &schema.canonical_form(); - let expected = r#"{"name":"LongList","type":"record","fields":[{"name":"value","type":"long"},{"name":"next","type":["null","LongList"]}]}"#; - assert_eq!(canonical_form, &expected); - - Ok(()) - } - - #[test] - fn test_avro_3370_record_schema_with_currently_parsing_schema_named_record() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type" : "record", - "name" : "record", - "fields" : [ - { "name" : "value", "type" : "long" }, - { "name" : "next", "type" : "record" } - ] - } - "#, - )?; - - let mut lookup = BTreeMap::new(); - lookup.insert("value".to_owned(), 0); - lookup.insert("next".to_owned(), 1); - - let expected = Schema::Record(RecordSchema { - name: Name { - name: "record".to_owned(), - namespace: None, - }, - aliases: None, - doc: None, - fields: vec![ - RecordField { - name: "value".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Long, - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: Default::default(), - }, - RecordField { - name: "next".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Ref { - name: Name { - name: "record".to_owned(), - namespace: None, - }, - }, - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }, - ], - lookup, - attributes: Default::default(), - }); - assert_eq!(schema, expected); - - let canonical_form = &schema.canonical_form(); - let expected = r#"{"name":"record","type":"record","fields":[{"name":"value","type":"long"},{"name":"next","type":"record"}]}"#; - assert_eq!(canonical_form, &expected); - - Ok(()) - } - - #[test] - fn test_avro_3370_record_schema_with_currently_parsing_schema_named_enum() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type" : "record", - "name" : "record", - "fields" : [ - { - "type" : "enum", - "name" : "enum", - "symbols": ["one", "two", "three"] - }, - { "name" : "next", "type" : "enum" } - ] - } - "#, - )?; - - let mut lookup = BTreeMap::new(); - lookup.insert("enum".to_owned(), 0); - lookup.insert("next".to_owned(), 1); - - let expected = Schema::Record(RecordSchema { - name: Name { - name: "record".to_owned(), - namespace: None, - }, - aliases: None, - doc: None, - fields: vec![ - RecordField { - name: "enum".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Enum(EnumSchema { - name: Name { - name: "enum".to_owned(), - namespace: None, - }, - aliases: None, - doc: None, - symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()], - default: None, - attributes: Default::default(), - }), - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: Default::default(), - }, - RecordField { - name: "next".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Enum(EnumSchema { - name: Name { - name: "enum".to_owned(), - namespace: None, - }, - aliases: None, - doc: None, - symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()], - default: None, - attributes: Default::default(), - }), - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }, - ], - lookup, - attributes: Default::default(), - }); - assert_eq!(schema, expected); - - let canonical_form = &schema.canonical_form(); - let expected = r#"{"name":"record","type":"record","fields":[{"name":"enum","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}},{"name":"next","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}}]}"#; - assert_eq!(canonical_form, &expected); - - Ok(()) - } - - #[test] - fn test_avro_3370_record_schema_with_currently_parsing_schema_named_fixed() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type" : "record", - "name" : "record", - "fields" : [ - { - "type" : "fixed", - "name" : "fixed", - "size": 456 - }, - { "name" : "next", "type" : "fixed" } - ] - } - "#, - )?; - - let mut lookup = BTreeMap::new(); - lookup.insert("fixed".to_owned(), 0); - lookup.insert("next".to_owned(), 1); - - let expected = Schema::Record(RecordSchema { - name: Name { - name: "record".to_owned(), - namespace: None, - }, - aliases: None, - doc: None, - fields: vec![ - RecordField { - name: "fixed".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Fixed(FixedSchema { - name: Name { - name: "fixed".to_owned(), - namespace: None, - }, - aliases: None, - doc: None, - size: 456, - default: None, - attributes: Default::default(), - }), - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: Default::default(), - }, - RecordField { - name: "next".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Fixed(FixedSchema { - name: Name { - name: "fixed".to_owned(), - namespace: None, - }, - aliases: None, - doc: None, - size: 456, - default: None, - attributes: Default::default(), - }), - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }, - ], - lookup, - attributes: Default::default(), - }); - assert_eq!(schema, expected); - - let canonical_form = &schema.canonical_form(); - let expected = r#"{"name":"record","type":"record","fields":[{"name":"fixed","type":{"name":"fixed","type":"fixed","size":456}},{"name":"next","type":{"name":"fixed","type":"fixed","size":456}}]}"#; - assert_eq!(canonical_form, &expected); - - Ok(()) - } - - #[test] - fn test_enum_schema() -> TestResult { - let schema = Schema::parse_str( - r#"{"type": "enum", "name": "Suit", "symbols": ["diamonds", "spades", "clubs", "hearts"]}"#, - )?; - - let expected = Schema::Enum(EnumSchema { - name: Name::new("Suit")?, - aliases: None, - doc: None, - symbols: vec![ - "diamonds".to_owned(), - "spades".to_owned(), - "clubs".to_owned(), - "hearts".to_owned(), - ], - default: None, - attributes: Default::default(), - }); - - assert_eq!(expected, schema); - - Ok(()) - } - - #[test] - fn test_enum_schema_duplicate() -> TestResult { - // Duplicate "diamonds" - let schema = Schema::parse_str( - r#"{"type": "enum", "name": "Suit", "symbols": ["diamonds", "spades", "clubs", "diamonds"]}"#, - ); - assert!(schema.is_err()); - - Ok(()) - } - - #[test] - fn test_enum_schema_name() -> TestResult { - // Invalid name "0000" does not match [A-Za-z_][A-Za-z0-9_]* - let schema = Schema::parse_str( - r#"{"type": "enum", "name": "Enum", "symbols": ["0000", "variant"]}"#, - ); - assert!(schema.is_err()); - - Ok(()) - } - - #[test] - fn test_fixed_schema() -> TestResult { - let schema = Schema::parse_str(r#"{"type": "fixed", "name": "test", "size": 16}"#)?; - - let expected = Schema::Fixed(FixedSchema { - name: Name::new("test")?, - aliases: None, - doc: None, - size: 16_usize, - default: None, - attributes: Default::default(), - }); - - assert_eq!(expected, schema); - - Ok(()) - } - - #[test] - fn test_fixed_schema_with_documentation() -> TestResult { - let schema = Schema::parse_str( - r#"{"type": "fixed", "name": "test", "size": 16, "doc": "FixedSchema documentation"}"#, - )?; - - let expected = Schema::Fixed(FixedSchema { - name: Name::new("test")?, - aliases: None, - doc: Some(String::from("FixedSchema documentation")), - size: 16_usize, - default: None, - attributes: Default::default(), - }); - - assert_eq!(expected, schema); - - Ok(()) - } - - #[test] - fn test_no_documentation() -> TestResult { - let schema = Schema::parse_str( - r#"{"type": "enum", "name": "Coin", "symbols": ["heads", "tails"]}"#, - )?; - - let doc = match schema { - Schema::Enum(EnumSchema { doc, .. }) => doc, - _ => unreachable!(), - }; - - assert!(doc.is_none()); - - Ok(()) - } - - #[test] - fn test_documentation() -> TestResult { - let schema = Schema::parse_str( - r#"{"type": "enum", "name": "Coin", "doc": "Some documentation", "symbols": ["heads", "tails"]}"#, - )?; - - let doc = match schema { - Schema::Enum(EnumSchema { doc, .. }) => doc, - _ => None, - }; - - assert_eq!("Some documentation".to_owned(), doc.unwrap()); - - Ok(()) - } - - // Tests to ensure Schema is Send + Sync. These tests don't need to _do_ anything, if they can - // compile, they pass. - #[test] - fn test_schema_is_send() { - fn send(_s: S) {} - - let schema = Schema::Null; - send(schema); - } - - #[test] - fn test_schema_is_sync() { - fn sync(_s: S) {} - - let schema = Schema::Null; - sync(&schema); - sync(schema); - } - - #[test] - fn test_schema_fingerprint() -> TestResult { - use crate::rabin::Rabin; - use md5::Md5; - use sha2::Sha256; - - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - {"name": "c", "type": "long", "logicalType": "timestamp-micros"} - ] - } -"#; - - let schema = Schema::parse_str(raw_schema)?; - assert_eq!( - "7eb3b28d73dfc99bdd9af1848298b40804a2f8ad5d2642be2ecc2ad34842b987", - format!("{}", schema.fingerprint::()) - ); - - assert_eq!( - "cb11615e412ee5d872620d8df78ff6ae", - format!("{}", schema.fingerprint::()) - ); - assert_eq!( - "92f2ccef718c6754", - format!("{}", schema.fingerprint::()) - ); - - Ok(()) - } - - #[test] - fn test_logical_types() -> TestResult { - let schema = Schema::parse_str(r#"{"type": "int", "logicalType": "date"}"#)?; - assert_eq!(schema, Schema::Date); - - let schema = Schema::parse_str(r#"{"type": "long", "logicalType": "timestamp-micros"}"#)?; - assert_eq!(schema, Schema::TimestampMicros); - - Ok(()) - } - - #[test] - fn test_nullable_logical_type() -> TestResult { - let schema = Schema::parse_str( - r#"{"type": ["null", {"type": "long", "logicalType": "timestamp-micros"}]}"#, - )?; - assert_eq!( - schema, - Schema::Union(UnionSchema::new(vec![ - Schema::Null, - Schema::TimestampMicros, - ])?) - ); - - Ok(()) - } - - #[test] - fn record_field_order_from_str() -> TestResult { - use std::str::FromStr; - - assert_eq!( - RecordFieldOrder::from_str("ascending").unwrap(), - RecordFieldOrder::Ascending - ); - assert_eq!( - RecordFieldOrder::from_str("descending").unwrap(), - RecordFieldOrder::Descending - ); - assert_eq!( - RecordFieldOrder::from_str("ignore").unwrap(), - RecordFieldOrder::Ignore - ); - assert!(RecordFieldOrder::from_str("not an ordering").is_err()); - - Ok(()) - } - - #[test] - fn test_avro_3374_preserve_namespace_for_primitive() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type" : "record", - "name" : "ns.int", - "fields" : [ - {"name" : "value", "type" : "int"}, - {"name" : "next", "type" : [ "null", "ns.int" ]} - ] - } - "#, - )?; - - let json = schema.canonical_form(); - assert_eq!( - json, - r#"{"name":"ns.int","type":"record","fields":[{"name":"value","type":"int"},{"name":"next","type":["null","ns.int"]}]}"# - ); - - Ok(()) - } - - #[test] - fn test_avro_3433_preserve_schema_refs_in_json() -> TestResult { - let schema = r#" - { - "name": "test.test", - "type": "record", - "fields": [ - { - "name": "bar", - "type": { "name": "test.foo", "type": "record", "fields": [{ "name": "id", "type": "long" }] } - }, - { "name": "baz", "type": "test.foo" } - ] - } - "#; - - let schema = Schema::parse_str(schema)?; - - let expected = r#"{"name":"test.test","type":"record","fields":[{"name":"bar","type":{"name":"test.foo","type":"record","fields":[{"name":"id","type":"long"}]}},{"name":"baz","type":"test.foo"}]}"#; - assert_eq!(schema.canonical_form(), expected); - - Ok(()) - } - - #[test] - fn test_read_namespace_from_name() -> TestResult { - let schema = r#" - { - "name": "space.name", - "type": "record", - "fields": [ - { - "name": "num", - "type": "int" - } - ] - } - "#; - - let schema = Schema::parse_str(schema)?; - if let Schema::Record(RecordSchema { name, .. }) = schema { - assert_eq!(name.name, "name"); - assert_eq!(name.namespace, Some("space".to_string())); - } else { - panic!("Expected a record schema!"); - } - - Ok(()) - } - - #[test] - fn test_namespace_from_name_has_priority_over_from_field() -> TestResult { - let schema = r#" - { - "name": "space1.name", - "namespace": "space2", - "type": "record", - "fields": [ - { - "name": "num", - "type": "int" - } - ] - } - "#; - - let schema = Schema::parse_str(schema)?; - if let Schema::Record(RecordSchema { name, .. }) = schema { - assert_eq!(name.namespace, Some("space1".to_string())); - } else { - panic!("Expected a record schema!"); - } - - Ok(()) - } - - #[test] - fn test_namespace_from_field() -> TestResult { - let schema = r#" - { - "name": "name", - "namespace": "space2", - "type": "record", - "fields": [ - { - "name": "num", - "type": "int" - } - ] - } - "#; - - let schema = Schema::parse_str(schema)?; - if let Schema::Record(RecordSchema { name, .. }) = schema { - assert_eq!(name.namespace, Some("space2".to_string())); - } else { - panic!("Expected a record schema!"); - } - - Ok(()) - } - - #[test] - /// Zero-length namespace is considered as no-namespace. - fn test_namespace_from_name_with_empty_value() -> TestResult { - let name = Name::new(".name")?; - assert_eq!(name.name, "name"); - assert_eq!(name.namespace, None); - - Ok(()) - } - - #[test] - /// Whitespace is not allowed in the name. - fn test_name_with_whitespace_value() { - match Name::new(" ") { - Err(Error::InvalidSchemaName(_, _)) => {} - _ => panic!("Expected an Error::InvalidSchemaName!"), - } - } - - #[test] - /// The name must be non-empty. - fn test_name_with_no_name_part() { - match Name::new("space.") { - Err(Error::InvalidSchemaName(_, _)) => {} - _ => panic!("Expected an Error::InvalidSchemaName!"), - } - } - - #[test] - fn avro_3448_test_proper_resolution_inner_record_inherited_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_resolution_inner_record_qualified_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "space.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_resolution_inner_enum_inherited_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"enum", - "name":"inner_enum_name", - "symbols":["Extensive","Testing"] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_enum_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_resolution_inner_enum_qualified_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"enum", - "name":"inner_enum_name", - "symbols":["Extensive","Testing"] - } - ] - }, - { - "name": "outer_field_2", - "type" : "space.inner_enum_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_resolution_inner_fixed_inherited_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"fixed", - "name":"inner_fixed_name", - "size": 16 - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_fixed_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_resolution_inner_fixed_qualified_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"fixed", - "name":"inner_fixed_name", - "size": 16 - } - ] - }, - { - "name": "outer_field_2", - "type" : "space.inner_fixed_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_resolution_inner_record_inner_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"record", - "name":"inner_record_name", - "namespace":"inner_space", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_space.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_resolution_inner_enum_inner_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"enum", - "name":"inner_enum_name", - "namespace": "inner_space", - "symbols":["Extensive","Testing"] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_space.inner_enum_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_resolution_inner_fixed_inner_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"fixed", - "name":"inner_fixed_name", - "namespace": "inner_space", - "size": 16 - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_space.inner_fixed_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_outer_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"record", - "name":"middle_record_name", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "space.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 3); - for s in &[ - "space.record_name", - "space.middle_record_name", - "space.inner_record_name", - ] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_middle_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"record", - "name":"middle_record_name", - "namespace":"middle_namespace", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "middle_namespace.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 3); - for s in &[ - "space.record_name", - "middle_namespace.middle_record_name", - "middle_namespace.inner_record_name", - ] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_inner_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"record", - "name":"middle_record_name", - "namespace":"middle_namespace", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "namespace":"inner_namespace", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_namespace.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 3); - for s in &[ - "space.record_name", - "middle_namespace.middle_record_name", - "inner_namespace.inner_record_name", - ] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_in_array_resolution_inherited_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": { - "type":"array", - "items":{ - "type":"record", - "name":"in_array_record", - "fields": [ - { - "name":"array_record_field", - "type":"string" - } - ] - } - } - }, - { - "name":"outer_field_2", - "type":"in_array_record" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.in_array_record"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3448_test_proper_in_map_resolution_inherited_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": { - "type":"map", - "values":{ - "type":"record", - "name":"in_map_record", - "fields": [ - { - "name":"map_record_field", - "type":"string" - } - ] - } - } - }, - { - "name":"outer_field_2", - "type":"in_map_record" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "space.in_map_record"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - Ok(()) - } - - #[test] - fn avro_3466_test_to_json_inner_enum_inner_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"enum", - "name":"inner_enum_name", - "namespace": "inner_space", - "symbols":["Extensive","Testing"] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_space.inner_enum_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - - // confirm we have expected 2 full-names - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - // convert Schema back to JSON string - let schema_str = serde_json::to_string(&schema).expect("test failed"); - let _schema = Schema::parse_str(&schema_str).expect("test failed"); - assert_eq!(schema, _schema); - - Ok(()) - } - - #[test] - fn avro_3466_test_to_json_inner_fixed_inner_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type":"fixed", - "name":"inner_fixed_name", - "namespace": "inner_space", - "size":54 - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_space.inner_fixed_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); - - // confirm we have expected 2 full-names - assert_eq!(rs.get_names().len(), 2); - for s in &["space.record_name", "inner_space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s)?)); - } - - // convert Schema back to JSON string - let schema_str = serde_json::to_string(&schema).expect("test failed"); - let _schema = Schema::parse_str(&schema_str).expect("test failed"); - assert_eq!(schema, _schema); - - Ok(()) - } - - fn assert_avro_3512_aliases(aliases: &Aliases) { - match aliases { - Some(aliases) => { - assert_eq!(aliases.len(), 3); - assert_eq!(aliases[0], Alias::new("space.b").unwrap()); - assert_eq!(aliases[1], Alias::new("x.y").unwrap()); - assert_eq!(aliases[2], Alias::new(".c").unwrap()); - } - None => { - panic!("'aliases' must be Some"); - } - } - } - - #[test] - fn avro_3512_alias_with_null_namespace_record() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "a", - "namespace": "space", - "aliases": ["b", "x.y", ".c"], - "fields" : [ - {"name": "time", "type": "long"} - ] - } - "#, - )?; - - if let Schema::Record(RecordSchema { ref aliases, .. }) = schema { - assert_avro_3512_aliases(aliases); - } else { - panic!("The Schema should be a record: {schema:?}"); - } - - Ok(()) - } - - #[test] - fn avro_3512_alias_with_null_namespace_enum() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "enum", - "name": "a", - "namespace": "space", - "aliases": ["b", "x.y", ".c"], - "symbols" : [ - "symbol1", "symbol2" - ] - } - "#, - )?; - - if let Schema::Enum(EnumSchema { ref aliases, .. }) = schema { - assert_avro_3512_aliases(aliases); - } else { - panic!("The Schema should be an enum: {schema:?}"); - } - - Ok(()) - } - - #[test] - fn avro_3512_alias_with_null_namespace_fixed() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "fixed", - "name": "a", - "namespace": "space", - "aliases": ["b", "x.y", ".c"], - "size" : 12 - } - "#, - )?; - - if let Schema::Fixed(FixedSchema { ref aliases, .. }) = schema { - assert_avro_3512_aliases(aliases); - } else { - panic!("The Schema should be a fixed: {schema:?}"); - } - - Ok(()) - } - - #[test] - fn avro_3518_serialize_aliases_record() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "a", - "namespace": "space", - "aliases": ["b", "x.y", ".c"], - "fields" : [ - { - "name": "time", - "type": "long", - "doc": "The documentation is not serialized", - "default": 123, - "aliases": ["time1", "ns.time2"] - } - ] - } - "#, - )?; - - let value = serde_json::to_value(&schema)?; - let serialized = serde_json::to_string(&value)?; - assert_eq!( - r#"{"aliases":["space.b","x.y","c"],"fields":[{"aliases":["time1","ns.time2"],"default":123,"name":"time","type":"long"}],"name":"a","namespace":"space","type":"record"}"#, - &serialized - ); - assert_eq!(schema, Schema::parse_str(&serialized)?); - - Ok(()) - } - - #[test] - fn avro_3518_serialize_aliases_enum() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "enum", - "name": "a", - "namespace": "space", - "aliases": ["b", "x.y", ".c"], - "symbols" : [ - "symbol1", "symbol2" - ] - } - "#, - )?; - - let value = serde_json::to_value(&schema)?; - let serialized = serde_json::to_string(&value)?; - assert_eq!( - r#"{"aliases":["space.b","x.y","c"],"name":"a","namespace":"space","symbols":["symbol1","symbol2"],"type":"enum"}"#, - &serialized - ); - assert_eq!(schema, Schema::parse_str(&serialized)?); - - Ok(()) - } - - #[test] - fn avro_3518_serialize_aliases_fixed() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "fixed", - "name": "a", - "namespace": "space", - "aliases": ["b", "x.y", ".c"], - "size" : 12 - } - "#, - )?; - - let value = serde_json::to_value(&schema)?; - let serialized = serde_json::to_string(&value)?; - assert_eq!( - r#"{"aliases":["space.b","x.y","c"],"name":"a","namespace":"space","size":12,"type":"fixed"}"#, - &serialized - ); - assert_eq!(schema, Schema::parse_str(&serialized)?); - - Ok(()) - } - - #[test] - fn avro_3130_parse_anonymous_union_type() -> TestResult { - let schema_str = r#" - { - "type": "record", - "name": "AccountEvent", - "fields": [ - {"type": - ["null", - { "name": "accountList", - "type": { - "type": "array", - "items": "long" - } - } - ], - "name":"NullableLongArray" - } - ] - } - "#; - let schema = Schema::parse_str(schema_str)?; - - if let Schema::Record(RecordSchema { name, fields, .. }) = schema { - assert_eq!(name, Name::new("AccountEvent")?); - - let field = &fields[0]; - assert_eq!(&field.name, "NullableLongArray"); - - if let Schema::Union(ref union) = field.schema { - assert_eq!(union.schemas[0], Schema::Null); - - if let Schema::Array(ref array_schema) = union.schemas[1] { - if let Schema::Long = *array_schema.items { - // OK - } else { - panic!("Expected a Schema::Array of type Long"); - } - } else { - panic!("Expected Schema::Array"); - } - } else { - panic!("Expected Schema::Union"); - } - } else { - panic!("Expected Schema::Record"); - } - - Ok(()) - } - - #[test] - fn avro_custom_attributes_schema_without_attributes() -> TestResult { - let schemata_str = [ - r#" - { - "type": "record", - "name": "Rec", - "doc": "A Record schema without custom attributes", - "fields": [] - } - "#, - r#" - { - "type": "enum", - "name": "Enum", - "doc": "An Enum schema without custom attributes", - "symbols": [] - } - "#, - r#" - { - "type": "fixed", - "name": "Fixed", - "doc": "A Fixed schema without custom attributes", - "size": 0 - } - "#, - ]; - for schema_str in schemata_str.iter() { - let schema = Schema::parse_str(schema_str)?; - assert_eq!(schema.custom_attributes(), Some(&Default::default())); - } - - Ok(()) - } - - const CUSTOM_ATTRS_SUFFIX: &str = r#" - "string_key": "value", - "number_key": 1.23, - "null_key": null, - "array_key": [1, 2, 3], - "object_key": { - "key": "value" - } - "#; - - #[test] - fn avro_3609_custom_attributes_schema_with_attributes() -> TestResult { - let schemata_str = [ - r#" - { - "type": "record", - "name": "Rec", - "namespace": "ns", - "doc": "A Record schema with custom attributes", - "fields": [], - {{{}}} - } - "#, - r#" - { - "type": "enum", - "name": "Enum", - "namespace": "ns", - "doc": "An Enum schema with custom attributes", - "symbols": [], - {{{}}} - } - "#, - r#" - { - "type": "fixed", - "name": "Fixed", - "namespace": "ns", - "doc": "A Fixed schema with custom attributes", - "size": 2, - {{{}}} - } - "#, - ]; - - for schema_str in schemata_str.iter() { - let schema = Schema::parse_str( - schema_str - .to_owned() - .replace("{{{}}}", CUSTOM_ATTRS_SUFFIX) - .as_str(), - )?; - - assert_eq!( - schema.custom_attributes(), - Some(&expected_custom_attributes()) - ); - } - - Ok(()) - } - - fn expected_custom_attributes() -> BTreeMap { - let mut expected_attributes: BTreeMap = Default::default(); - expected_attributes.insert("string_key".to_string(), Value::String("value".to_string())); - expected_attributes.insert("number_key".to_string(), json!(1.23)); - expected_attributes.insert("null_key".to_string(), Value::Null); - expected_attributes.insert( - "array_key".to_string(), - Value::Array(vec![json!(1), json!(2), json!(3)]), - ); - let mut object_value: HashMap = HashMap::new(); - object_value.insert("key".to_string(), Value::String("value".to_string())); - expected_attributes.insert("object_key".to_string(), json!(object_value)); - expected_attributes - } - - #[test] - fn avro_3609_custom_attributes_record_field_without_attributes() -> TestResult { - let schema_str = String::from( - r#" - { - "type": "record", - "name": "Rec", - "doc": "A Record schema without custom attributes", - "fields": [ - { - "name": "field_one", - "type": "float", - {{{}}} - } - ] - } - "#, - ); - - let schema = Schema::parse_str(schema_str.replace("{{{}}}", CUSTOM_ATTRS_SUFFIX).as_str())?; - - match schema { - Schema::Record(RecordSchema { name, fields, .. }) => { - assert_eq!(name, Name::new("Rec")?); - assert_eq!(fields.len(), 1); - let field = &fields[0]; - assert_eq!(&field.name, "field_one"); - assert_eq!(field.custom_attributes, expected_custom_attributes()); - } - _ => panic!("Expected Schema::Record"), - } - - Ok(()) - } - - #[test] - fn avro_3625_null_is_first() -> TestResult { - let schema_str = String::from( - r#" - { - "type": "record", - "name": "union_schema_test", - "fields": [ - {"name": "a", "type": ["null", "long"], "default": null} - ] - } - "#, - ); - - let schema = Schema::parse_str(&schema_str)?; - - match schema { - Schema::Record(RecordSchema { name, fields, .. }) => { - assert_eq!(name, Name::new("union_schema_test")?); - assert_eq!(fields.len(), 1); - let field = &fields[0]; - assert_eq!(&field.name, "a"); - assert_eq!(&field.default, &Some(Value::Null)); - match &field.schema { - Schema::Union(union) => { - assert_eq!(union.variants().len(), 2); - assert!(union.is_nullable()); - assert_eq!(union.variants()[0], Schema::Null); - assert_eq!(union.variants()[1], Schema::Long); - } - _ => panic!("Expected Schema::Union"), - } - } - _ => panic!("Expected Schema::Record"), - } - - Ok(()) - } - - #[test] - fn avro_3625_null_is_last() -> TestResult { - let schema_str = String::from( - r#" - { - "type": "record", - "name": "union_schema_test", - "fields": [ - {"name": "a", "type": ["long","null"], "default": 123} - ] - } - "#, - ); - - let schema = Schema::parse_str(&schema_str)?; - - match schema { - Schema::Record(RecordSchema { name, fields, .. }) => { - assert_eq!(name, Name::new("union_schema_test")?); - assert_eq!(fields.len(), 1); - let field = &fields[0]; - assert_eq!(&field.name, "a"); - assert_eq!(&field.default, &Some(json!(123))); - match &field.schema { - Schema::Union(union) => { - assert_eq!(union.variants().len(), 2); - assert_eq!(union.variants()[0], Schema::Long); - assert_eq!(union.variants()[1], Schema::Null); - } - _ => panic!("Expected Schema::Union"), - } - } - _ => panic!("Expected Schema::Record"), - } - - Ok(()) - } - - #[test] - fn avro_3625_null_is_the_middle() -> TestResult { - let schema_str = String::from( - r#" - { - "type": "record", - "name": "union_schema_test", - "fields": [ - {"name": "a", "type": ["long","null","int"], "default": 123} - ] - } - "#, - ); - - let schema = Schema::parse_str(&schema_str)?; - - match schema { - Schema::Record(RecordSchema { name, fields, .. }) => { - assert_eq!(name, Name::new("union_schema_test")?); - assert_eq!(fields.len(), 1); - let field = &fields[0]; - assert_eq!(&field.name, "a"); - assert_eq!(&field.default, &Some(json!(123))); - match &field.schema { - Schema::Union(union) => { - assert_eq!(union.variants().len(), 3); - assert_eq!(union.variants()[0], Schema::Long); - assert_eq!(union.variants()[1], Schema::Null); - assert_eq!(union.variants()[2], Schema::Int); - } - _ => panic!("Expected Schema::Union"), - } - } - _ => panic!("Expected Schema::Record"), - } - - Ok(()) - } - - #[test] - fn avro_3649_default_notintfirst() -> TestResult { - let schema_str = String::from( - r#" - { - "type": "record", - "name": "union_schema_test", - "fields": [ - {"name": "a", "type": ["string", "int"], "default": 123} - ] - } - "#, - ); - - let schema = Schema::parse_str(&schema_str)?; - - match schema { - Schema::Record(RecordSchema { name, fields, .. }) => { - assert_eq!(name, Name::new("union_schema_test")?); - assert_eq!(fields.len(), 1); - let field = &fields[0]; - assert_eq!(&field.name, "a"); - assert_eq!(&field.default, &Some(json!(123))); - match &field.schema { - Schema::Union(union) => { - assert_eq!(union.variants().len(), 2); - assert_eq!(union.variants()[0], Schema::String); - assert_eq!(union.variants()[1], Schema::Int); - } - _ => panic!("Expected Schema::Union"), - } - } - _ => panic!("Expected Schema::Record"), - } - - Ok(()) - } - - #[test] - fn avro_3709_parsing_of_record_field_aliases() -> TestResult { - let schema = r#" - { - "name": "rec", - "type": "record", - "fields": [ - { - "name": "num", - "type": "int", - "aliases": ["num1", "num2"] - } - ] - } - "#; - - let schema = Schema::parse_str(schema)?; - if let Schema::Record(RecordSchema { fields, .. }) = schema { - let num_field = &fields[0]; - assert_eq!(num_field.name, "num"); - assert_eq!(num_field.aliases, Some(vec!("num1".into(), "num2".into()))); - } else { - panic!("Expected a record schema!"); - } - - Ok(()) - } - - #[test] - fn avro_3735_parse_enum_namespace() -> TestResult { - let schema = r#" - { - "type": "record", - "name": "Foo", - "namespace": "name.space", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1" - ] - } - }, - { - "name": "barUse", - "type": "Bar" - } - ] - } - "#; - - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - } - - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barInit")] - pub bar_init: Bar, - #[serde(rename = "barUse")] - pub bar_use: Bar, - } - - let schema = Schema::parse_str(schema)?; - - let foo = Foo { - bar_init: Bar::Bar0, - bar_use: Bar::Bar1, - }; - - let avro_value = crate::to_value(foo)?; - assert!(avro_value.validate(&schema)); - - let mut writer = crate::Writer::new(&schema, Vec::new()); - - // schema validation happens here - writer.append(avro_value)?; - - Ok(()) - } - - #[test] - fn avro_3755_deserialize() -> TestResult { - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barInit")] - pub bar_init: Bar, - #[serde(rename = "barUse")] - pub bar_use: Bar, - } - - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1" - ] - } - }, - { - "name": "barUse", - "type": "Bar" - } - ] - }"#; - - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "name.space", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ] - } - }, - { - "name": "barUse", - "type": "Bar" - } - ] - }"#; - - let writer_schema = Schema::parse_str(writer_schema)?; - let foo = Foo { - bar_init: Bar::Bar0, - bar_use: Bar::Bar1, - }; - let avro_value = crate::to_value(foo)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = crate::to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 2); - assert_eq!(fields[0].0, "barInit"); - assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); - assert_eq!(fields[1].0, "barUse"); - assert_eq!(fields[1].1, types::Value::Enum(1, "bar1".to_string())); - } - _ => panic!("Expected Value::Record"), - } - - Ok(()) - } - - #[test] - fn test_avro_3780_decimal_schema_type_with_fixed() -> TestResult { - let schema = json!( - { - "type": "record", - "name": "recordWithDecimal", - "fields": [ - { - "name": "decimal", - "type": "fixed", - "name": "nestedFixed", - "size": 8, - "logicalType": "decimal", - "precision": 4 - } - ] - }); - - let parse_result = Schema::parse(&schema); - assert!( - parse_result.is_ok(), - "parse result must be ok, got: {:?}", - parse_result - ); - - Ok(()) - } - - #[test] - fn test_avro_3772_enum_default_wrong_type() -> TestResult { - let schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "clubs", "hearts"], - "default": 123 - } - } - ] - } - "#; - - match Schema::parse_str(schema) { - Err(err) => { - assert_eq!( - err.to_string(), - "Default value for enum must be a string! Got: 123" - ); - } - _ => panic!("Expected an error"), - } - Ok(()) - } - - #[test] - fn test_avro_3812_handle_null_namespace_properly() -> TestResult { - let schema_str = r#" - { - "namespace": "", - "type": "record", - "name": "my_schema", - "fields": [ - { - "name": "a", - "type": { - "type": "enum", - "name": "my_enum", - "namespace": "", - "symbols": ["a", "b"] - } - }, { - "name": "b", - "type": { - "type": "fixed", - "name": "my_fixed", - "namespace": "", - "size": 10 - } - } - ] - } - "#; - - let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"a","type":{"name":"my_enum","type":"enum","symbols":["a","b"]}},{"name":"b","type":{"name":"my_fixed","type":"fixed","size":10}}]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - let name = Name::new("my_name")?; - let fullname = name.fullname(Some("".to_string())); - assert_eq!(fullname, "my_name"); - let qname = name.fully_qualified_name(&Some("".to_string())).to_string(); - assert_eq!(qname, "my_name"); - - Ok(()) - } - - #[test] - fn test_avro_3818_inherit_enclosing_namespace() -> TestResult { - // Enclosing namespace is specified but inner namespaces are not. - let schema_str = r#" - { - "namespace": "my_ns", - "type": "record", - "name": "my_schema", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a"] - } - }, { - "name": "f2", - "type": { - "name": "fixed1", - "type": "fixed", - "size": 1 - } - } - ] - } - "#; - - let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"my_ns.enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"my_ns.fixed1","type":"fixed","size":1}}]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - // Enclosing namespace and inner namespaces are specified - // but inner namespaces are "" - let schema_str = r#" - { - "namespace": "my_ns", - "type": "record", - "name": "my_schema", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "namespace": "", - "symbols": ["a"] - } - }, { - "name": "f2", - "type": { - "name": "fixed1", - "type": "fixed", - "namespace": "", - "size": 1 - } - } - ] - } - "#; - - let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fixed1","type":"fixed","size":1}}]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - // Enclosing namespace is "" and inner non-empty namespaces are specified. - let schema_str = r#" - { - "namespace": "", - "type": "record", - "name": "my_schema", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "namespace": "f1.ns", - "symbols": ["a"] - } - }, { - "name": "f2", - "type": { - "name": "f2.ns.fixed1", - "type": "fixed", - "size": 1 - } - } - ] - } - "#; - - let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"f1","type":{"name":"f1.ns.enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"f2.ns.fixed1","type":"fixed","size":1}}]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - // Nested complex types with non-empty enclosing namespace. - let schema_str = r#" - { - "type": "record", - "name": "my_ns.my_schema", - "fields": [ - { - "name": "f1", - "type": { - "name": "inner_record1", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a"] - } - } - ] - } - }, { - "name": "f2", - "type": { - "name": "inner_record2", - "type": "record", - "namespace": "inner_ns", - "fields": [ - { - "name": "f2_1", - "type": { - "name": "enum2", - "type": "enum", - "symbols": ["a"] - } - } - ] - } - } - ] - } - "#; - - let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"my_ns.inner_record1","type":"record","fields":[{"name":"f1_1","type":{"name":"my_ns.enum1","type":"enum","symbols":["a"]}}]}},{"name":"f2","type":{"name":"inner_ns.inner_record2","type":"record","fields":[{"name":"f2_1","type":{"name":"inner_ns.enum2","type":"enum","symbols":["a"]}}]}}]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - Ok(()) - } - - #[test] - fn test_avro_3779_bigdecimal_schema() -> TestResult { - let schema = json!( - { - "name": "decimal", - "type": "bytes", - "logicalType": "big-decimal" - } - ); - - let parse_result = Schema::parse(&schema); - assert!( - parse_result.is_ok(), - "parse result must be ok, got: {:?}", - parse_result - ); - match parse_result? { - Schema::BigDecimal => (), - other => panic!("Expected Schema::BigDecimal but got: {other:?}"), - } - - Ok(()) - } - - #[test] - fn test_avro_3820_deny_invalid_field_names() -> TestResult { - let schema_str = r#" - { - "name": "my_record", - "type": "record", - "fields": [ - { - "name": "f1.x", - "type": { - "name": "my_enum", - "type": "enum", - "symbols": ["a"] - } - }, { - "name": "f2", - "type": { - "name": "my_fixed", - "type": "fixed", - "size": 1 - } - } - ] - } - "#; - - match Schema::parse_str(schema_str) { - Err(Error::FieldName(x)) if x == "f1.x" => Ok(()), - other => Err(format!("Expected Error::FieldName, got {other:?}").into()), - } - } - - #[test] - fn test_avro_3827_disallow_duplicate_field_names() -> TestResult { - let schema_str = r#" - { - "name": "my_schema", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "a", - "type": "record", - "fields": [] - } - }, { - "name": "f1", - "type": { - "name": "b", - "type": "record", - "fields": [] - } - } - ] - } - "#; - - match Schema::parse_str(schema_str) { - Err(Error::FieldNameDuplicate(_)) => (), - other => { - return Err(format!("Expected Error::FieldNameDuplicate, got {other:?}").into()); - } - }; - - let schema_str = r#" - { - "name": "my_schema", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "a", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "b", - "type": "record", - "fields": [] - } - } - ] - } - } - ] - } - "#; - - let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"f1","type":{"name":"a","type":"record","fields":[{"name":"f1","type":{"name":"b","type":"record","fields":[]}}]}}]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - Ok(()) - } - - #[test] - fn test_avro_3830_null_namespace_in_fully_qualified_names() -> TestResult { - // Check whether all the named types don't refer to the namespace field - // if their name starts with a dot. - let schema_str = r#" - { - "name": ".record1", - "namespace": "ns1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": ".enum1", - "namespace": "ns2", - "type": "enum", - "symbols": ["a"] - } - }, { - "name": "f2", - "type": { - "name": ".fxed1", - "namespace": "ns3", - "type": "fixed", - "size": 1 - } - } - ] - } - "#; - - let expected = r#"{"name":"record1","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fxed1","type":"fixed","size":1}}]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - // Check whether inner types don't inherit ns1. - let schema_str = r#" - { - "name": ".record1", - "namespace": "ns1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a"] - } - }, { - "name": "f2", - "type": { - "name": "fxed1", - "type": "fixed", - "size": 1 - } - } - ] - } - "#; - - let expected = r#"{"name":"record1","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fxed1","type":"fixed","size":1}}]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - let name = Name::new(".my_name")?; - let fullname = name.fullname(None); - assert_eq!(fullname, "my_name"); - let qname = name.fully_qualified_name(&None).to_string(); - assert_eq!(qname, "my_name"); - - Ok(()) - } - - #[test] - fn test_avro_3814_schema_resolution_failure() -> TestResult { - // Define a reader schema: a nested record with an optional field. - let reader_schema = json!( - { - "type": "record", - "name": "MyOuterRecord", - "fields": [ - { - "name": "inner_record", - "type": [ - "null", - { - "type": "record", - "name": "MyRecord", - "fields": [ - {"name": "a", "type": "string"} - ] - } - ], - "default": null - } - ] - } - ); - - // Define a writer schema: a nested record with an optional field, which - // may optionally contain an enum. - let writer_schema = json!( - { - "type": "record", - "name": "MyOuterRecord", - "fields": [ - { - "name": "inner_record", - "type": [ - "null", - { - "type": "record", - "name": "MyRecord", - "fields": [ - {"name": "a", "type": "string"}, - { - "name": "b", - "type": [ - "null", - { - "type": "enum", - "name": "MyEnum", - "symbols": ["A", "B", "C"], - "default": "C" - } - ], - "default": null - }, - ] - } - ] - } - ], - "default": null - } - ); - - // Use different structs to represent the "Reader" and the "Writer" - // to mimic two different versions of a producer & consumer application. - #[derive(Serialize, Deserialize, Debug)] - struct MyInnerRecordReader { - a: String, - } - - #[derive(Serialize, Deserialize, Debug)] - struct MyRecordReader { - inner_record: Option, - } - - #[derive(Serialize, Deserialize, Debug)] - enum MyEnum { - A, - B, - C, - } - - #[derive(Serialize, Deserialize, Debug)] - struct MyInnerRecordWriter { - a: String, - b: Option, - } - - #[derive(Serialize, Deserialize, Debug)] - struct MyRecordWriter { - inner_record: Option, - } - - let s = MyRecordWriter { - inner_record: Some(MyInnerRecordWriter { - a: "foo".to_string(), - b: None, - }), - }; - - // Serialize using the writer schema. - let writer_schema = Schema::parse(&writer_schema)?; - let avro_value = crate::to_value(s)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = crate::to_avro_datum(&writer_schema, avro_value)?; - - // Now, attempt to deserialize using the reader schema. - let reader_schema = Schema::parse(&reader_schema)?; - let mut x = &datum[..]; - - // Deserialization should succeed and we should be able to resolve the schema. - let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - assert!(deser_value.validate(&reader_schema)); - - // Verify that we can read a field from the record. - let d: MyRecordReader = crate::from_value(&deser_value)?; - assert_eq!(d.inner_record.unwrap().a, "foo".to_string()); - Ok(()) - } - - #[test] - fn test_avro_3837_disallow_invalid_namespace() -> TestResult { - // Valid namespace #1 (Single name portion) - let schema_str = r#" - { - "name": "record1", - "namespace": "ns1", - "type": "record", - "fields": [] - } - "#; - - let expected = r#"{"name":"ns1.record1","type":"record","fields":[]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - // Valid namespace #2 (multiple name portions). - let schema_str = r#" - { - "name": "enum1", - "namespace": "ns1.foo.bar", - "type": "enum", - "symbols": ["a"] - } - "#; - - let expected = r#"{"name":"ns1.foo.bar.enum1","type":"enum","symbols":["a"]}"#; - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - assert_eq!(canonical_form, expected); - - // Invalid namespace #1 (a name portion starts with dot) - let schema_str = r#" - { - "name": "fixed1", - "namespace": ".ns1.a.b", - "type": "fixed", - "size": 1 - } - "#; - - match Schema::parse_str(schema_str) { - Err(Error::InvalidNamespace(_, _)) => (), - other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), - }; - - // Invalid namespace #2 (invalid character in a name portion) - let schema_str = r#" - { - "name": "record1", - "namespace": "ns1.a*b.c", - "type": "record", - "fields": [] - } - "#; - - match Schema::parse_str(schema_str) { - Err(Error::InvalidNamespace(_, _)) => (), - other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), - }; - - // Invalid namespace #3 (a name portion starts with a digit) - let schema_str = r#" - { - "name": "fixed1", - "namespace": "ns1.1a.b", - "type": "fixed", - "size": 1 - } - "#; - - match Schema::parse_str(schema_str) { - Err(Error::InvalidNamespace(_, _)) => (), - other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), - }; - - // Invalid namespace #4 (a name portion is missing - two dots in a row) - let schema_str = r#" - { - "name": "fixed1", - "namespace": "ns1..a", - "type": "fixed", - "size": 1 - } - "#; - - match Schema::parse_str(schema_str) { - Err(Error::InvalidNamespace(_, _)) => (), - other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), - }; - - // Invalid namespace #5 (a name portion is missing - ends with a dot) - let schema_str = r#" - { - "name": "fixed1", - "namespace": "ns1.a.", - "type": "fixed", - "size": 1 - } - "#; - - match Schema::parse_str(schema_str) { - Err(Error::InvalidNamespace(_, _)) => (), - other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), - }; - - Ok(()) - } - - #[test] - fn test_avro_3851_validate_default_value_of_simple_record_field() -> TestResult { - let schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "int", - "default": "invalid" - } - ] - } - "#; - let expected = Error::GetDefaultRecordField( - "f1".to_string(), - "ns.record1".to_string(), - r#""int""#.to_string(), - ) - .to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - Ok(()) - } - - #[test] - fn test_avro_3851_validate_default_value_of_nested_record_field() -> TestResult { - let schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - }, - "default": "invalid" - } - ] - } - "#; - let expected = Error::GetDefaultRecordField( - "f1".to_string(), - "ns.record1".to_string(), - r#"{"name":"ns.record2","type":"record","fields":[{"name":"f1_1","type":"int"}]}"# - .to_string(), - ) - .to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - Ok(()) - } - - #[test] - fn test_avro_3851_validate_default_value_of_enum_record_field() -> TestResult { - let schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a", "b", "c"] - }, - "default": "invalid" - } - ] - } - "#; - let expected = Error::GetDefaultRecordField( - "f1".to_string(), - "ns.record1".to_string(), - r#"{"name":"ns.enum1","type":"enum","symbols":["a","b","c"]}"#.to_string(), - ) - .to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - Ok(()) - } - - #[test] - fn test_avro_3851_validate_default_value_of_fixed_record_field() -> TestResult { - let schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "fixed1", - "type": "fixed", - "size": 3 - }, - "default": 100 - } - ] - } - "#; - let expected = Error::GetDefaultRecordField( - "f1".to_string(), - "ns.record1".to_string(), - r#"{"name":"ns.fixed1","type":"fixed","size":3}"#.to_string(), - ) - .to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - Ok(()) - } - - #[test] - fn test_avro_3851_validate_default_value_of_array_record_field() -> TestResult { - let schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "array", - "items": "int", - "default": "invalid" - } - ] - } - "#; - let expected = Error::GetDefaultRecordField( - "f1".to_string(), - "ns.record1".to_string(), - r#"{"type":"array","items":"int"}"#.to_string(), - ) - .to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - Ok(()) - } - - #[test] - fn test_avro_3851_validate_default_value_of_map_record_field() -> TestResult { - let schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "map", - "values": "string", - "default": "invalid" - } - ] - } - "#; - let expected = Error::GetDefaultRecordField( - "f1".to_string(), - "ns.record1".to_string(), - r#"{"type":"map","values":"string"}"#.to_string(), - ) - .to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - Ok(()) - } - - #[test] - fn test_avro_3851_validate_default_value_of_ref_record_field() -> TestResult { - let schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - }, { - "name": "f2", - "type": "ns.record2", - "default": { "f1_1": true } - } - ] - } - "#; - let expected = Error::GetDefaultRecordField( - "f2".to_string(), - "ns.record1".to_string(), - r#""ns.record2""#.to_string(), - ) - .to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - Ok(()) - } - - #[test] - fn test_avro_3851_validate_default_value_of_enum() -> TestResult { - let schema_str = r#" - { - "name": "enum1", - "namespace": "ns", - "type": "enum", - "symbols": ["a", "b", "c"], - "default": 100 - } - "#; - let expected = Error::EnumDefaultWrongType(100.into()).to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - let schema_str = r#" - { - "name": "enum1", - "namespace": "ns", - "type": "enum", - "symbols": ["a", "b", "c"], - "default": "d" - } - "#; - let expected = Error::GetEnumDefault { - symbol: "d".to_string(), - symbols: vec!["a".to_string(), "b".to_string(), "c".to_string()], - } - .to_string(); - let result = Schema::parse_str(schema_str); - assert!(result.is_err()); - let err = result - .map_err(|e| e.to_string()) - .err() - .unwrap_or_else(|| "unexpected".to_string()); - assert_eq!(expected, err); - - Ok(()) - } - - #[test] - fn test_avro_3862_get_aliases() -> TestResult { - // Test for Record - let schema_str = r#" - { - "name": "record1", - "namespace": "ns1", - "type": "record", - "aliases": ["r1", "ns2.r2"], - "fields": [ - { "name": "f1", "type": "int" }, - { "name": "f2", "type": "string" } - ] - } - "#; - let schema = Schema::parse_str(schema_str)?; - let expected = vec![Alias::new("ns1.r1")?, Alias::new("ns2.r2")?]; - match schema.aliases() { - Some(aliases) => assert_eq!(aliases, &expected), - None => panic!("Expected Some({:?}), got None", expected), - } - - let schema_str = r#" - { - "name": "record1", - "namespace": "ns1", - "type": "record", - "fields": [ - { "name": "f1", "type": "int" }, - { "name": "f2", "type": "string" } - ] - } - "#; - let schema = Schema::parse_str(schema_str)?; - match schema.aliases() { - None => (), - some => panic!("Expected None, got {some:?}"), - } - - // Test for Enum - let schema_str = r#" - { - "name": "enum1", - "namespace": "ns1", - "type": "enum", - "aliases": ["en1", "ns2.en2"], - "symbols": ["a", "b", "c"] - } - "#; - let schema = Schema::parse_str(schema_str)?; - let expected = vec![Alias::new("ns1.en1")?, Alias::new("ns2.en2")?]; - match schema.aliases() { - Some(aliases) => assert_eq!(aliases, &expected), - None => panic!("Expected Some({:?}), got None", expected), - } - - let schema_str = r#" - { - "name": "enum1", - "namespace": "ns1", - "type": "enum", - "symbols": ["a", "b", "c"] - } - "#; - let schema = Schema::parse_str(schema_str)?; - match schema.aliases() { - None => (), - some => panic!("Expected None, got {some:?}"), - } - - // Test for Fixed - let schema_str = r#" - { - "name": "fixed1", - "namespace": "ns1", - "type": "fixed", - "aliases": ["fx1", "ns2.fx2"], - "size": 10 - } - "#; - let schema = Schema::parse_str(schema_str)?; - let expected = vec![Alias::new("ns1.fx1")?, Alias::new("ns2.fx2")?]; - match schema.aliases() { - Some(aliases) => assert_eq!(aliases, &expected), - None => panic!("Expected Some({:?}), got None", expected), - } - - let schema_str = r#" - { - "name": "fixed1", - "namespace": "ns1", - "type": "fixed", - "size": 10 - } - "#; - let schema = Schema::parse_str(schema_str)?; - match schema.aliases() { - None => (), - some => panic!("Expected None, got {some:?}"), - } - - // Test for non-named type - let schema = Schema::Int; - match schema.aliases() { - None => (), - some => panic!("Expected None, got {some:?}"), - } - - Ok(()) - } - - #[test] - fn test_avro_3862_get_doc() -> TestResult { - // Test for Record - let schema_str = r#" - { - "name": "record1", - "type": "record", - "doc": "Record Document", - "fields": [ - { "name": "f1", "type": "int" }, - { "name": "f2", "type": "string" } - ] - } - "#; - let schema = Schema::parse_str(schema_str)?; - let expected = "Record Document"; - match schema.doc() { - Some(doc) => assert_eq!(doc, expected), - None => panic!("Expected Some({:?}), got None", expected), - } - - let schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { "name": "f1", "type": "int" }, - { "name": "f2", "type": "string" } - ] - } - "#; - let schema = Schema::parse_str(schema_str)?; - match schema.doc() { - None => (), - some => panic!("Expected None, got {some:?}"), - } - - // Test for Enum - let schema_str = r#" - { - "name": "enum1", - "type": "enum", - "doc": "Enum Document", - "symbols": ["a", "b", "c"] - } - "#; - let schema = Schema::parse_str(schema_str)?; - let expected = "Enum Document"; - match schema.doc() { - Some(doc) => assert_eq!(doc, expected), - None => panic!("Expected Some({:?}), got None", expected), - } - - let schema_str = r#" - { - "name": "enum1", - "type": "enum", - "symbols": ["a", "b", "c"] - } - "#; - let schema = Schema::parse_str(schema_str)?; - match schema.doc() { - None => (), - some => panic!("Expected None, got {some:?}"), - } - - // Test for Fixed - let schema_str = r#" - { - "name": "fixed1", - "type": "fixed", - "doc": "Fixed Document", - "size": 10 - } - "#; - let schema = Schema::parse_str(schema_str)?; - let expected = "Fixed Document"; - match schema.doc() { - Some(doc) => assert_eq!(doc, expected), - None => panic!("Expected Some({:?}), got None", expected), - } - - let schema_str = r#" - { - "name": "fixed1", - "type": "fixed", - "size": 10 - } - "#; - let schema = Schema::parse_str(schema_str)?; - match schema.doc() { - None => (), - some => panic!("Expected None, got {some:?}"), - } - - // Test for non-named type - let schema = Schema::Int; - match schema.doc() { - None => (), - some => panic!("Expected None, got {some:?}"), - } - - Ok(()) - } - - #[test] - fn avro_3886_serialize_attributes() -> TestResult { - let attributes = BTreeMap::from([ - ("string_key".into(), "value".into()), - ("number_key".into(), 1.23.into()), - ("null_key".into(), Value::Null), - ( - "array_key".into(), - Value::Array(vec![1.into(), 2.into(), 3.into()]), - ), - ("object_key".into(), Value::Object(Map::default())), - ]); - - // Test serialize enum attributes - let schema = Schema::Enum(EnumSchema { - name: Name::new("a")?, - aliases: None, - doc: None, - symbols: vec![], - default: None, - attributes: attributes.clone(), - }); - let serialized = serde_json::to_string(&schema)?; - assert_eq!( - r#"{"type":"enum","name":"a","symbols":[],"array_key":[1,2,3],"null_key":null,"number_key":1.23,"object_key":{},"string_key":"value"}"#, - &serialized - ); - - // Test serialize fixed custom_attributes - let schema = Schema::Fixed(FixedSchema { - name: Name::new("a")?, - aliases: None, - doc: None, - size: 1, - default: None, - attributes: attributes.clone(), - }); - let serialized = serde_json::to_string(&schema)?; - assert_eq!( - r#"{"type":"fixed","name":"a","size":1,"array_key":[1,2,3],"null_key":null,"number_key":1.23,"object_key":{},"string_key":"value"}"#, - &serialized - ); - - // Test serialize record custom_attributes - let schema = Schema::Record(RecordSchema { - name: Name::new("a")?, - aliases: None, - doc: None, - fields: vec![], - lookup: BTreeMap::new(), - attributes, - }); - let serialized = serde_json::to_string(&schema)?; - assert_eq!( - r#"{"type":"record","name":"a","fields":[],"array_key":[1,2,3],"null_key":null,"number_key":1.23,"object_key":{},"string_key":"value"}"#, - &serialized - ); - - Ok(()) - } - - /// A test cases showing that names and namespaces can be constructed - /// entirely by underscores. - #[test] - fn test_avro_3897_funny_valid_names_and_namespaces() -> TestResult { - for funny_name in ["_", "_._", "__._", "_.__", "_._._"] { - let name = Name::new(funny_name); - assert!(name.is_ok()); - } - Ok(()) - } - - #[test] - fn test_avro_3896_decimal_schema() -> TestResult { - // bytes decimal, represented as native logical type. - let schema = json!( - { - "type": "bytes", - "name": "BytesDecimal", - "logicalType": "decimal", - "size": 38, - "precision": 9, - "scale": 2 - }); - let parse_result = Schema::parse(&schema)?; - assert!(matches!( - parse_result, - Schema::Decimal(DecimalSchema { - precision: 9, - scale: 2, - .. - }) - )); - - // long decimal, represents as native complex type. - let schema = json!( - { - "type": "long", - "name": "LongDecimal", - "logicalType": "decimal" - }); - let parse_result = Schema::parse(&schema)?; - // assert!(matches!(parse_result, Schema::Long)); - assert_eq!(parse_result, Schema::Long); - - Ok(()) - } - - #[test] - fn avro_3896_uuid_schema_for_string() -> TestResult { - // string uuid, represents as native logical type. - let schema = json!( - { - "type": "string", - "name": "StringUUID", - "logicalType": "uuid" - }); - let parse_result = Schema::parse(&schema)?; - assert_eq!(parse_result, Schema::Uuid); - - Ok(()) - } - - #[test] - fn avro_3926_uuid_schema_for_fixed_with_size_16() -> TestResult { - let schema = json!( - { - "type": "fixed", - "name": "FixedUUID", - "size": 16, - "logicalType": "uuid" - }); - let parse_result = Schema::parse(&schema)?; - assert_eq!(parse_result, Schema::Uuid); - assert_not_logged( - r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, attributes: {"logicalType": String("uuid")} })"#, - ); - - Ok(()) - } - - #[test] - fn avro_3926_uuid_schema_for_fixed_with_size_different_than_16() -> TestResult { - let schema = json!( - { - "type": "fixed", - "name": "FixedUUID", - "size": 6, - "logicalType": "uuid" - }); - let parse_result = Schema::parse(&schema)?; - - assert_eq!( - parse_result, - Schema::Fixed(FixedSchema { - name: Name::new("FixedUUID")?, - aliases: None, - doc: None, - size: 6, - default: None, - attributes: BTreeMap::from([("logicalType".to_string(), "uuid".into())]), - }) - ); - assert_logged( - r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, default: None, attributes: {"logicalType": String("uuid")} })"#, - ); - - Ok(()) - } - - #[test] - fn test_avro_3896_timestamp_millis_schema() -> TestResult { - // long timestamp-millis, represents as native logical type. - let schema = json!( - { - "type": "long", - "name": "LongTimestampMillis", - "logicalType": "timestamp-millis" - }); - let parse_result = Schema::parse(&schema)?; - assert_eq!(parse_result, Schema::TimestampMillis); - - // int timestamp-millis, represents as native complex type. - let schema = json!( - { - "type": "int", - "name": "IntTimestampMillis", - "logicalType": "timestamp-millis" - }); - let parse_result = Schema::parse(&schema)?; - assert_eq!(parse_result, Schema::Int); - - Ok(()) - } - - #[test] - fn test_avro_3896_custom_bytes_schema() -> TestResult { - // log type, represents as complex type. - let schema = json!( - { - "type": "bytes", - "name": "BytesLog", - "logicalType": "custom" - }); - let parse_result = Schema::parse(&schema)?; - assert_eq!(parse_result, Schema::Bytes); - assert_eq!(parse_result.custom_attributes(), None); - - Ok(()) - } - - #[test] - fn test_avro_3899_parse_decimal_type() -> TestResult { - let schema = Schema::parse_str( - r#"{ - "name": "InvalidDecimal", - "type": "fixed", - "size": 16, - "logicalType": "decimal", - "precision": 2, - "scale": 3 - }"#, - )?; - match schema { - Schema::Fixed(fixed_schema) => { - let attrs = fixed_schema.attributes; - let precision = attrs - .get("precision") - .expect("The 'precision' attribute is missing"); - let scale = attrs - .get("scale") - .expect("The 'scale' attribute is missing"); - assert_logged(&format!("Ignoring invalid decimal logical type: The decimal precision ({}) must be bigger or equal to the scale ({})", precision, scale)); - } - _ => unreachable!("Expected Schema::Fixed, got {:?}", schema), - } - - let schema = Schema::parse_str( - r#"{ - "name": "ValidDecimal", - "type": "bytes", - "logicalType": "decimal", - "precision": 3, - "scale": 2 - }"#, - )?; - match schema { - Schema::Decimal(_) => { - assert_not_logged("Ignoring invalid decimal logical type: The decimal precision (2) must be bigger or equal to the scale (3)"); - } - _ => unreachable!("Expected Schema::Decimal, got {:?}", schema), - } - - Ok(()) - } - - #[test] - fn avro_3920_serialize_record_with_custom_attributes() -> TestResult { - let expected = { - let mut lookup = BTreeMap::new(); - lookup.insert("value".to_owned(), 0); - Schema::Record(RecordSchema { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, - aliases: Some(vec![Alias::new("LinkedLongs").unwrap()]), - doc: None, - fields: vec![RecordField { - name: "value".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Long, - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: BTreeMap::from([("field-id".to_string(), 1.into())]), - }], - lookup, - attributes: BTreeMap::from([("custom-attribute".to_string(), "value".into())]), - }) - }; - - let value = serde_json::to_value(&expected)?; - let serialized = serde_json::to_string(&value)?; - assert_eq!( - r#"{"aliases":["LinkedLongs"],"custom-attribute":"value","fields":[{"field-id":1,"name":"value","type":"long"}],"name":"LongList","type":"record"}"#, - &serialized - ); - assert_eq!(expected, Schema::parse_str(&serialized)?); - - Ok(()) - } - - #[test] - fn test_avro_3925_serialize_decimal_inner_fixed() -> TestResult { - let schema = Schema::Decimal(DecimalSchema { - precision: 36, - scale: 10, - inner: Box::new(Schema::Fixed(FixedSchema { - name: Name::new("decimal_36_10").unwrap(), - aliases: None, - doc: None, - size: 16, - default: None, - attributes: Default::default(), - })), - }); - - let serialized_json = serde_json::to_string_pretty(&schema)?; - - let expected_json = r#"{ - "type": "fixed", - "name": "decimal_36_10", - "size": 16, - "logicalType": "decimal", - "scale": 10, - "precision": 36 -}"#; - - assert_eq!(serialized_json, expected_json); - - Ok(()) - } - - #[test] - fn test_avro_3925_serialize_decimal_inner_bytes() -> TestResult { - let schema = Schema::Decimal(DecimalSchema { - precision: 36, - scale: 10, - inner: Box::new(Schema::Bytes), - }); - - let serialized_json = serde_json::to_string_pretty(&schema)?; - - let expected_json = r#"{ - "type": "bytes", - "logicalType": "decimal", - "scale": 10, - "precision": 36 -}"#; - - assert_eq!(serialized_json, expected_json); - - Ok(()) - } - - #[test] - fn test_avro_3925_serialize_decimal_inner_invalid() -> TestResult { - let schema = Schema::Decimal(DecimalSchema { - precision: 36, - scale: 10, - inner: Box::new(Schema::String), - }); - - let serialized_json = serde_json::to_string_pretty(&schema); - - assert!(serialized_json.is_err()); - - Ok(()) - } - - #[test] - fn test_avro_3927_serialize_array_with_custom_attributes() -> TestResult { - let expected = Schema::array_with_attributes( - Schema::Long, - BTreeMap::from([("field-id".to_string(), "1".into())]), - ); - - let value = serde_json::to_value(&expected)?; - let serialized = serde_json::to_string(&value)?; - assert_eq!( - r#"{"field-id":"1","items":"long","type":"array"}"#, - &serialized - ); - let actual_schema = Schema::parse_str(&serialized)?; - assert_eq!(expected, actual_schema); - assert_eq!( - expected.custom_attributes(), - actual_schema.custom_attributes() - ); - - Ok(()) - } - - #[test] - fn test_avro_3927_serialize_map_with_custom_attributes() -> TestResult { - let expected = Schema::map_with_attributes( - Schema::Long, - BTreeMap::from([("field-id".to_string(), "1".into())]), - ); - - let value = serde_json::to_value(&expected)?; - let serialized = serde_json::to_string(&value)?; - assert_eq!( - r#"{"field-id":"1","type":"map","values":"long"}"#, - &serialized - ); - let actual_schema = Schema::parse_str(&serialized)?; - assert_eq!(expected, actual_schema); - assert_eq!( - expected.custom_attributes(), - actual_schema.custom_attributes() - ); - - Ok(()) - } - - #[test] - fn avro_3928_parse_int_based_schema_with_default() -> TestResult { - let schema = r#" - { - "type": "record", - "name": "DateLogicalType", - "fields": [ { - "name": "birthday", - "type": {"type": "int", "logicalType": "date"}, - "default": 1681601653 - } ] - }"#; - - match Schema::parse_str(schema)? { - Schema::Record(record_schema) => { - assert_eq!(record_schema.fields.len(), 1); - let field = record_schema.fields.first().unwrap(); - assert_eq!(field.name, "birthday"); - assert_eq!(field.schema, Schema::Date); - assert_eq!( - types::Value::from(field.default.clone().unwrap()), - types::Value::Int(1681601653) - ); - } - _ => unreachable!("Expected Schema::Record"), - } - - Ok(()) - } - - #[test] - fn avro_3946_union_with_single_type() -> TestResult { - let schema = r#" - { - "type": "record", - "name": "Issue", - "namespace": "invalid.example", - "fields": [ - { - "name": "myField", - "type": ["long"] - } - ] - }"#; - - let _ = Schema::parse_str(schema)?; - - assert_logged( - "Union schema with just one member! Consider dropping the union! \ - Please enable debug logging to find out which Record schema \ - declares the union with 'RUST_LOG=apache_avro::schema=debug'.", - ); - - Ok(()) - } - - #[test] - fn avro_3946_union_without_any_types() -> TestResult { - let schema = r#" - { - "type": "record", - "name": "Issue", - "namespace": "invalid.example", - "fields": [ - { - "name": "myField", - "type": [] - } - ] - }"#; - - let _ = Schema::parse_str(schema)?; - - assert_logged( - "Union schemas should have at least two members! \ - Please enable debug logging to find out which Record schema \ - declares the union with 'RUST_LOG=apache_avro::schema=debug'.", - ); - - Ok(()) - } - - #[test] - fn avro_3965_fixed_schema_with_default_bigger_than_size() -> TestResult { - match Schema::parse_str( - r#"{ - "type": "fixed", - "name": "test", - "size": 1, - "default": "123456789" - }"#, - ) { - Ok(_schema) => panic!("Must fail!"), - Err(err) => { - assert_eq!( - err.to_string(), - "Fixed schema's default value length (9) does not match its size (1)" - ); - } - } - - Ok(()) - } - - #[test] - fn avro_4004_canonical_form_strip_logical_types() -> TestResult { - let schema_str = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42, "doc": "The field a"}, - {"name": "b", "type": "string", "namespace": "test.a"}, - {"name": "c", "type": "long", "logicalType": "timestamp-micros"} - ] - }"#; - - let schema = Schema::parse_str(schema_str)?; - let canonical_form = schema.canonical_form(); - let fp_rabin = schema.fingerprint::(); - assert_eq!( - r#"{"name":"test","type":"record","fields":[{"name":"a","type":"long"},{"name":"b","type":"string"},{"name":"c","type":{"type":"long"}}]}"#, - canonical_form - ); - assert_eq!("92f2ccef718c6754", fp_rabin.to_string()); - Ok(()) - } -} diff --git a/lang/rust/avro/src/schema_compatibility.rs b/lang/rust/avro/src/schema_compatibility.rs deleted file mode 100644 index 5df7c520145..00000000000 --- a/lang/rust/avro/src/schema_compatibility.rs +++ /dev/null @@ -1,1774 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Logic for checking schema compatibility -use crate::{ - error::CompatibilityError, - schema::{EnumSchema, FixedSchema, RecordSchema, Schema, SchemaKind}, -}; -use std::{ - collections::{hash_map::DefaultHasher, HashSet}, - hash::Hasher, - ptr, -}; - -fn match_ref_schemas( - writers_schema: &Schema, - readers_schema: &Schema, -) -> Result<(), CompatibilityError> { - match (readers_schema, writers_schema) { - (Schema::Ref { name: r_name }, Schema::Ref { name: w_name }) => { - if r_name == w_name { - Ok(()) - } else { - Err(CompatibilityError::NameMismatch { - writer_name: w_name.fullname(None), - reader_name: r_name.fullname(None), - }) - } - } - _ => Err(CompatibilityError::WrongType { - writer_schema_type: format!("{:#?}", writers_schema), - reader_schema_type: format!("{:#?}", readers_schema), - }), - } -} - -pub struct SchemaCompatibility; - -struct Checker { - recursion: HashSet<(u64, u64)>, -} - -impl Checker { - /// Create a new checker, with recursion set to an empty set. - pub(crate) fn new() -> Self { - Self { - recursion: HashSet::new(), - } - } - - pub(crate) fn can_read( - &mut self, - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<(), CompatibilityError> { - self.full_match_schemas(writers_schema, readers_schema) - } - - pub(crate) fn full_match_schemas( - &mut self, - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<(), CompatibilityError> { - if self.recursion_in_progress(writers_schema, readers_schema) { - return Ok(()); - } - - SchemaCompatibility::match_schemas(writers_schema, readers_schema)?; - - let w_type = SchemaKind::from(writers_schema); - let r_type = SchemaKind::from(readers_schema); - - if w_type != SchemaKind::Union - && (r_type.is_primitive() - || r_type == SchemaKind::Fixed - || r_type == SchemaKind::Uuid - || r_type == SchemaKind::Date - || r_type == SchemaKind::TimeMillis - || r_type == SchemaKind::TimeMicros - || r_type == SchemaKind::TimestampMillis - || r_type == SchemaKind::TimestampMicros - || r_type == SchemaKind::TimestampNanos - || r_type == SchemaKind::LocalTimestampMillis - || r_type == SchemaKind::LocalTimestampMicros - || r_type == SchemaKind::LocalTimestampNanos) - { - return Ok(()); - } - - match r_type { - SchemaKind::Ref => match_ref_schemas(writers_schema, readers_schema), - SchemaKind::Record => self.match_record_schemas(writers_schema, readers_schema), - SchemaKind::Map => { - if let Schema::Map(w_m) = writers_schema { - match readers_schema { - Schema::Map(r_m) => self.full_match_schemas(&w_m.types, &r_m.types), - _ => Err(CompatibilityError::WrongType { - writer_schema_type: format!("{:#?}", writers_schema), - reader_schema_type: format!("{:#?}", readers_schema), - }), - } - } else { - Err(CompatibilityError::TypeExpected { - schema_type: String::from("writers_schema"), - expected_type: vec![SchemaKind::Record], - }) - } - } - SchemaKind::Array => { - if let Schema::Array(w_a) = writers_schema { - match readers_schema { - Schema::Array(r_a) => self.full_match_schemas(&w_a.items, &r_a.items), - _ => Err(CompatibilityError::WrongType { - writer_schema_type: format!("{:#?}", writers_schema), - reader_schema_type: format!("{:#?}", readers_schema), - }), - } - } else { - Err(CompatibilityError::TypeExpected { - schema_type: String::from("writers_schema"), - expected_type: vec![SchemaKind::Array], - }) - } - } - SchemaKind::Union => self.match_union_schemas(writers_schema, readers_schema), - SchemaKind::Enum => { - // reader's symbols must contain all writer's symbols - if let Schema::Enum(EnumSchema { - symbols: w_symbols, .. - }) = writers_schema - { - if let Schema::Enum(EnumSchema { - symbols: r_symbols, .. - }) = readers_schema - { - if w_symbols.iter().all(|e| r_symbols.contains(e)) { - return Ok(()); - } - } - } - Err(CompatibilityError::MissingSymbols) - } - _ => { - if w_type == SchemaKind::Union { - if let Schema::Union(r) = writers_schema { - if r.schemas.len() == 1 { - return self.full_match_schemas(&r.schemas[0], readers_schema); - } - } - } - Err(CompatibilityError::Inconclusive(String::from( - "writers_schema", - ))) - } - } - } - - fn match_record_schemas( - &mut self, - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<(), CompatibilityError> { - let w_type = SchemaKind::from(writers_schema); - - if w_type == SchemaKind::Union { - return Err(CompatibilityError::TypeExpected { - schema_type: String::from("writers_schema"), - expected_type: vec![SchemaKind::Record], - }); - } - - if let Schema::Record(RecordSchema { - fields: w_fields, - lookup: w_lookup, - .. - }) = writers_schema - { - if let Schema::Record(RecordSchema { - fields: r_fields, .. - }) = readers_schema - { - for field in r_fields.iter() { - // get all field names in a vector (field.name + aliases) - let mut fields_names = vec![&field.name]; - if let Some(ref aliases) = field.aliases { - for alias in aliases { - fields_names.push(alias); - } - } - - // Check whether any of the possible fields names are in the writer schema. - // If the field was found, then it must have the exact same name with the writer field, - // otherwise we would have a false positive with the writers aliases - let position = fields_names.iter().find_map(|field_name| { - if let Some(pos) = w_lookup.get(*field_name) { - if &w_fields[*pos].name == *field_name { - return Some(pos); - } - } - None - }); - - match position { - Some(pos) => { - if let Err(err) = - self.full_match_schemas(&w_fields[*pos].schema, &field.schema) - { - return Err(CompatibilityError::FieldTypeMismatch( - field.name.clone(), - Box::new(err), - )); - } - } - _ => { - if field.default.is_none() { - return Err(CompatibilityError::MissingDefaultValue( - field.name.clone(), - )); - } - } - } - } - } - } - Ok(()) - } - - fn match_union_schemas( - &mut self, - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<(), CompatibilityError> { - if let Schema::Union(u) = writers_schema { - if u.schemas - .iter() - .all(|schema| self.full_match_schemas(schema, readers_schema).is_ok()) - { - return Ok(()); - } else { - return Err(CompatibilityError::MissingUnionElements); - } - } else if let Schema::Union(u) = readers_schema { - // This check is needed because the writer_schema can be not union - // but the type can be contain in the union of the reader schema - // e.g. writer_schema is string and reader_schema is [string, int] - if u.schemas - .iter() - .any(|schema| self.full_match_schemas(writers_schema, schema).is_ok()) - { - return Ok(()); - } - } - Err(CompatibilityError::MissingUnionElements) - } - - fn recursion_in_progress(&mut self, writers_schema: &Schema, readers_schema: &Schema) -> bool { - let mut hasher = DefaultHasher::new(); - ptr::hash(writers_schema, &mut hasher); - let w_hash = hasher.finish(); - - hasher = DefaultHasher::new(); - ptr::hash(readers_schema, &mut hasher); - let r_hash = hasher.finish(); - - let key = (w_hash, r_hash); - // This is a shortcut to add if not exists *and* return false. It will return true - // if it was able to insert. - !self.recursion.insert(key) - } -} - -impl SchemaCompatibility { - /// `can_read` performs a full, recursive check that a datum written using the - /// writers_schema can be read using the readers_schema. - pub fn can_read( - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<(), CompatibilityError> { - let mut c = Checker::new(); - c.can_read(writers_schema, readers_schema) - } - - /// `mutual_read` performs a full, recursive check that a datum written using either - /// the writers_schema or the readers_schema can be read using the other schema. - pub fn mutual_read( - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<(), CompatibilityError> { - SchemaCompatibility::can_read(writers_schema, readers_schema)?; - SchemaCompatibility::can_read(readers_schema, writers_schema) - } - - /// `match_schemas` performs a basic check that a datum written with the - /// writers_schema could be read using the readers_schema. This check only includes - /// matching the types, including schema promotion, and matching the full name for - /// named types. Aliases for named types are not supported here, and the rust - /// implementation of Avro in general does not include support for aliases (I think). - pub(crate) fn match_schemas( - writers_schema: &Schema, - readers_schema: &Schema, - ) -> Result<(), CompatibilityError> { - fn check_reader_type_multi( - reader_type: SchemaKind, - allowed_reader_types: Vec, - writer_type: SchemaKind, - ) -> Result<(), CompatibilityError> { - if allowed_reader_types.iter().any(|&t| t == reader_type) { - Ok(()) - } else { - let mut allowed_types: Vec = vec![writer_type]; - allowed_types.extend_from_slice(allowed_reader_types.as_slice()); - Err(CompatibilityError::TypeExpected { - schema_type: String::from("readers_schema"), - expected_type: allowed_types, - }) - } - } - - fn check_reader_type( - reader_type: SchemaKind, - allowed_reader_type: SchemaKind, - writer_type: SchemaKind, - ) -> Result<(), CompatibilityError> { - if reader_type == allowed_reader_type { - Ok(()) - } else { - Err(CompatibilityError::TypeExpected { - schema_type: String::from("readers_schema"), - expected_type: vec![writer_type, allowed_reader_type], - }) - } - } - - fn check_writer_type( - writers_schema: &Schema, - allowed_schema: &Schema, - expected_schema_types: Vec, - ) -> Result<(), CompatibilityError> { - if *allowed_schema == *writers_schema { - Ok(()) - } else { - Err(CompatibilityError::TypeExpected { - schema_type: String::from("writers_schema"), - expected_type: expected_schema_types, - }) - } - } - - let w_type = SchemaKind::from(writers_schema); - let r_type = SchemaKind::from(readers_schema); - - if w_type == SchemaKind::Union || r_type == SchemaKind::Union { - return Ok(()); - } - - if w_type == r_type { - if r_type.is_primitive() { - return Ok(()); - } - - match r_type { - SchemaKind::Record | SchemaKind::Enum => { - let msg = format!("A {} type must always has a name", readers_schema); - let writers_name = writers_schema.name().expect(&msg); - let readers_name = readers_schema.name().expect(&msg); - - if writers_name.name == readers_name.name { - return Ok(()); - } - - return Err(CompatibilityError::NameMismatch { - writer_name: writers_name.name.clone(), - reader_name: readers_name.name.clone(), - }); - } - SchemaKind::Fixed => { - if let Schema::Fixed(FixedSchema { - name: w_name, - aliases: _, - doc: _w_doc, - size: w_size, - default: _w_default, - attributes: _, - }) = writers_schema - { - if let Schema::Fixed(FixedSchema { - name: r_name, - aliases: _, - doc: _r_doc, - size: r_size, - default: _r_default, - attributes: _, - }) = readers_schema - { - return (w_name.name == r_name.name && w_size == r_size) - .then_some(()) - .ok_or(CompatibilityError::FixedMismatch); - } - } - } - SchemaKind::Map => { - if let Schema::Map(w_m) = writers_schema { - if let Schema::Map(r_m) = readers_schema { - return SchemaCompatibility::match_schemas(&w_m.types, &r_m.types); - } - } - } - SchemaKind::Array => { - if let Schema::Array(w_a) = writers_schema { - if let Schema::Array(r_a) = readers_schema { - return SchemaCompatibility::match_schemas(&w_a.items, &r_a.items); - } - } - } - SchemaKind::Uuid => { - return check_writer_type( - writers_schema, - readers_schema, - vec![r_type, SchemaKind::String], - ); - } - SchemaKind::Date | SchemaKind::TimeMillis => { - return check_writer_type( - writers_schema, - readers_schema, - vec![r_type, SchemaKind::Int], - ); - } - SchemaKind::TimeMicros - | SchemaKind::TimestampNanos - | SchemaKind::TimestampMillis - | SchemaKind::TimestampMicros - | SchemaKind::LocalTimestampMillis - | SchemaKind::LocalTimestampMicros - | SchemaKind::LocalTimestampNanos => { - return check_writer_type( - writers_schema, - readers_schema, - vec![r_type, SchemaKind::Long], - ); - } - SchemaKind::Duration => { - return Ok(()); - } - SchemaKind::Ref => return match_ref_schemas(writers_schema, readers_schema), - _ => { - return Err(CompatibilityError::Inconclusive(String::from( - "readers_schema", - ))) - } - }; - } - - // Here are the checks for primitive types - match w_type { - SchemaKind::Int => check_reader_type_multi( - r_type, - vec![ - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::Date, - SchemaKind::TimeMillis, - ], - w_type, - ), - SchemaKind::Long => check_reader_type_multi( - r_type, - vec![ - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::TimeMicros, - SchemaKind::TimestampMillis, - SchemaKind::TimestampMicros, - SchemaKind::TimestampNanos, - SchemaKind::LocalTimestampMillis, - SchemaKind::LocalTimestampMicros, - SchemaKind::LocalTimestampNanos, - ], - w_type, - ), - SchemaKind::Float => { - check_reader_type_multi(r_type, vec![SchemaKind::Float, SchemaKind::Double], w_type) - } - SchemaKind::String => { - check_reader_type_multi(r_type, vec![SchemaKind::Bytes, SchemaKind::Uuid], w_type) - } - SchemaKind::Bytes => check_reader_type(r_type, SchemaKind::String, w_type), - SchemaKind::Uuid => check_reader_type(r_type, SchemaKind::String, w_type), - SchemaKind::Date | SchemaKind::TimeMillis => { - check_reader_type(r_type, SchemaKind::Int, w_type) - } - SchemaKind::TimeMicros - | SchemaKind::TimestampMicros - | SchemaKind::TimestampMillis - | SchemaKind::TimestampNanos - | SchemaKind::LocalTimestampMillis - | SchemaKind::LocalTimestampMicros - | SchemaKind::LocalTimestampNanos => { - check_reader_type(r_type, SchemaKind::Long, w_type) - } - _ => Err(CompatibilityError::Inconclusive(String::from( - "writers_schema", - ))), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - types::{Record, Value}, - Codec, Reader, Writer, - }; - use apache_avro_test_helper::TestResult; - use rstest::*; - - fn int_array_schema() -> Schema { - Schema::parse_str(r#"{"type":"array", "items":"int"}"#).unwrap() - } - - fn long_array_schema() -> Schema { - Schema::parse_str(r#"{"type":"array", "items":"long"}"#).unwrap() - } - - fn string_array_schema() -> Schema { - Schema::parse_str(r#"{"type":"array", "items":"string"}"#).unwrap() - } - - fn int_map_schema() -> Schema { - Schema::parse_str(r#"{"type":"map", "values":"int"}"#).unwrap() - } - - fn long_map_schema() -> Schema { - Schema::parse_str(r#"{"type":"map", "values":"long"}"#).unwrap() - } - - fn string_map_schema() -> Schema { - Schema::parse_str(r#"{"type":"map", "values":"string"}"#).unwrap() - } - - fn enum1_ab_schema() -> Schema { - Schema::parse_str(r#"{"type":"enum", "name":"Enum1", "symbols":["A","B"]}"#).unwrap() - } - - fn enum1_abc_schema() -> Schema { - Schema::parse_str(r#"{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}"#).unwrap() - } - - fn enum1_bc_schema() -> Schema { - Schema::parse_str(r#"{"type":"enum", "name":"Enum1", "symbols":["B","C"]}"#).unwrap() - } - - fn enum2_ab_schema() -> Schema { - Schema::parse_str(r#"{"type":"enum", "name":"Enum2", "symbols":["A","B"]}"#).unwrap() - } - - fn empty_record1_schema() -> Schema { - Schema::parse_str(r#"{"type":"record", "name":"Record1", "fields":[]}"#).unwrap() - } - - fn empty_record2_schema() -> Schema { - Schema::parse_str(r#"{"type":"record", "name":"Record2", "fields": []}"#).unwrap() - } - - fn a_int_record1_schema() -> Schema { - Schema::parse_str( - r#"{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}]}"#, - ) - .unwrap() - } - - fn a_long_record1_schema() -> Schema { - Schema::parse_str( - r#"{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"long"}]}"#, - ) - .unwrap() - } - - fn a_int_b_int_record1_schema() -> Schema { - Schema::parse_str(r#"{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int"}]}"#).unwrap() - } - - fn a_dint_record1_schema() -> Schema { - Schema::parse_str(r#"{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}]}"#).unwrap() - } - - fn a_int_b_dint_record1_schema() -> Schema { - Schema::parse_str(r#"{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int", "default":0}]}"#).unwrap() - } - - fn a_dint_b_dint_record1_schema() -> Schema { - Schema::parse_str(r#"{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}, {"name":"b", "type":"int", "default":0}]}"#).unwrap() - } - - fn nested_record() -> Schema { - Schema::parse_str(r#"{"type":"record","name":"parent","fields":[{"name":"attribute","type":{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}}]}"#).unwrap() - } - - fn nested_optional_record() -> Schema { - Schema::parse_str(r#"{"type":"record","name":"parent","fields":[{"name":"attribute","type":["null",{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}],"default":null}]}"#).unwrap() - } - - fn int_list_record_schema() -> Schema { - Schema::parse_str(r#"{"type":"record", "name":"List", "fields": [{"name": "head", "type": "int"},{"name": "tail", "type": "array", "items": "int"}]}"#).unwrap() - } - - fn long_list_record_schema() -> Schema { - Schema::parse_str( - r#" - { - "type":"record", "name":"List", "fields": [ - {"name": "head", "type": "long"}, - {"name": "tail", "type": "array", "items": "long"} - ]} -"#, - ) - .unwrap() - } - - fn union_schema(schemas: Vec) -> Schema { - let schema_string = schemas - .iter() - .map(|s| s.canonical_form()) - .collect::>() - .join(","); - Schema::parse_str(&format!("[{schema_string}]")).unwrap() - } - - fn empty_union_schema() -> Schema { - union_schema(vec![]) - } - - // unused - // fn null_union_schema() -> Schema { union_schema(vec![Schema::Null]) } - - fn int_union_schema() -> Schema { - union_schema(vec![Schema::Int]) - } - - fn long_union_schema() -> Schema { - union_schema(vec![Schema::Long]) - } - - fn string_union_schema() -> Schema { - union_schema(vec![Schema::String]) - } - - fn int_string_union_schema() -> Schema { - union_schema(vec![Schema::Int, Schema::String]) - } - - fn string_int_union_schema() -> Schema { - union_schema(vec![Schema::String, Schema::Int]) - } - - #[test] - fn test_broken() { - assert_eq!( - CompatibilityError::MissingUnionElements, - SchemaCompatibility::can_read(&int_string_union_schema(), &int_union_schema()) - .unwrap_err() - ) - } - - #[test] - fn test_incompatible_reader_writer_pairs() { - let incompatible_schemas = vec![ - // null - (Schema::Null, Schema::Int), - (Schema::Null, Schema::Long), - // boolean - (Schema::Boolean, Schema::Int), - // int - (Schema::Int, Schema::Null), - (Schema::Int, Schema::Boolean), - (Schema::Int, Schema::Long), - (Schema::Int, Schema::Float), - (Schema::Int, Schema::Double), - // long - (Schema::Long, Schema::Float), - (Schema::Long, Schema::Double), - // float - (Schema::Float, Schema::Double), - // string - (Schema::String, Schema::Boolean), - (Schema::String, Schema::Int), - // bytes - (Schema::Bytes, Schema::Null), - (Schema::Bytes, Schema::Int), - // logical types - (Schema::TimeMicros, Schema::Int), - (Schema::TimestampMillis, Schema::Int), - (Schema::TimestampMicros, Schema::Int), - (Schema::TimestampNanos, Schema::Int), - (Schema::LocalTimestampMillis, Schema::Int), - (Schema::LocalTimestampMicros, Schema::Int), - (Schema::LocalTimestampNanos, Schema::Int), - (Schema::Date, Schema::Long), - (Schema::TimeMillis, Schema::Long), - // array and maps - (int_array_schema(), long_array_schema()), - (int_map_schema(), int_array_schema()), - (int_array_schema(), int_map_schema()), - (int_map_schema(), long_map_schema()), - // enum - (enum1_ab_schema(), enum1_abc_schema()), - (enum1_bc_schema(), enum1_abc_schema()), - (enum1_ab_schema(), enum2_ab_schema()), - (Schema::Int, enum2_ab_schema()), - (enum2_ab_schema(), Schema::Int), - //union - (int_union_schema(), int_string_union_schema()), - (string_union_schema(), int_string_union_schema()), - //record - (empty_record2_schema(), empty_record1_schema()), - (a_int_record1_schema(), empty_record1_schema()), - (a_int_b_dint_record1_schema(), empty_record1_schema()), - (int_list_record_schema(), long_list_record_schema()), - (nested_record(), nested_optional_record()), - ]; - - assert!(incompatible_schemas - .iter() - .any(|(reader, writer)| SchemaCompatibility::can_read(writer, reader).is_err())); - } - - #[rstest] - // Record type test - #[case( - r#"{"type": "record", "name": "record_a", "fields": [{"type": "long", "name": "date"}]}"#, - r#"{"type": "record", "name": "record_a", "fields": [{"type": "long", "name": "date", "default": 18181}]}"# - )] - // Fixed type test - #[case( - r#"{"type": "fixed", "name": "EmployeeId", "size": 16}"#, - r#"{"type": "fixed", "name": "EmployeeId", "size": 16, "default": "u00ffffffffffffx"}"# - )] - // Enum type test - #[case( - r#"{"type": "enum", "name":"Enum1", "symbols": ["A","B"]}"#, - r#"{"type": "enum", "name":"Enum1", "symbols": ["A","B", "C"], "default": "C"}"# - )] - // Map type test - #[case( - r#"{"type": "map", "values": "int"}"#, - r#"{"type": "map", "values": "long"}"# - )] - // Date type - #[case(r#"{"type": "int"}"#, r#"{"type": "int", "logicalType": "date"}"#)] - // time-millis type - #[case( - r#"{"type": "int"}"#, - r#"{"type": "int", "logicalType": "time-millis"}"# - )] - // time-millis type - #[case( - r#"{"type": "long"}"#, - r#"{"type": "long", "logicalType": "time-micros"}"# - )] - // timetimestamp-nanos type - #[case( - r#"{"type": "long"}"#, - r#"{"type": "long", "logicalType": "timestamp-nanos"}"# - )] - // timestamp-millis type - #[case( - r#"{"type": "long"}"#, - r#"{"type": "long", "logicalType": "timestamp-millis"}"# - )] - // timestamp-micros type - #[case( - r#"{"type": "long"}"#, - r#"{"type": "long", "logicalType": "timestamp-micros"}"# - )] - // local-timestamp-millis type - #[case( - r#"{"type": "long"}"#, - r#"{"type": "long", "logicalType": "local-timestamp-millis"}"# - )] - // local-timestamp-micros type - #[case( - r#"{"type": "long"}"#, - r#"{"type": "long", "logicalType": "local-timestamp-micros"}"# - )] - // local-timestamp-nanos type - #[case( - r#"{"type": "long"}"#, - r#"{"type": "long", "logicalType": "local-timestamp-nanos"}"# - )] - // Array type test - #[case( - r#"{"type": "array", "items": "int"}"#, - r#"{"type": "array", "items": "long"}"# - )] - fn test_avro_3950_match_schemas_ok( - #[case] writer_schema_str: &str, - #[case] reader_schema_str: &str, - ) { - let writer_schema = Schema::parse_str(writer_schema_str).unwrap(); - let reader_schema = Schema::parse_str(reader_schema_str).unwrap(); - - assert!(SchemaCompatibility::match_schemas(&writer_schema, &reader_schema).is_ok()); - } - - #[rstest] - // Record type test - #[case( - r#"{"type": "record", "name":"record_a", "fields": [{"type": "long", "name": "date"}]}"#, - r#"{"type": "record", "name":"record_b", "fields": [{"type": "long", "name": "date"}]}"#, - CompatibilityError::NameMismatch{writer_name: String::from("record_a"), reader_name: String::from("record_b")} - )] - // Fixed type test - #[case( - r#"{"type": "fixed", "name": "EmployeeId", "size": 16}"#, - r#"{"type": "fixed", "name": "EmployeeId", "size": 20}"#, - CompatibilityError::FixedMismatch - )] - // Enum type test - #[case( - r#"{"type": "enum", "name": "Enum1", "symbols": ["A","B"]}"#, - r#"{"type": "enum", "name": "Enum2", "symbols": ["A","B"]}"#, - CompatibilityError::NameMismatch{writer_name: String::from("Enum1"), reader_name: String::from("Enum2")} - )] - // Map type test - #[case( - r#"{"type":"map", "values": "long"}"#, - r#"{"type":"map", "values": "int"}"#, - CompatibilityError::TypeExpected {schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::TimeMicros, - SchemaKind::TimestampMillis, - SchemaKind::TimestampMicros, - SchemaKind::TimestampNanos, - SchemaKind::LocalTimestampMillis, - SchemaKind::LocalTimestampMicros, - SchemaKind::LocalTimestampNanos, - ]} - )] - // Array type test - #[case( - r#"{"type": "array", "items": "long"}"#, - r#"{"type": "array", "items": "int"}"#, - CompatibilityError::TypeExpected {schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::TimeMicros, - SchemaKind::TimestampMillis, - SchemaKind::TimestampMicros, - SchemaKind::TimestampNanos, - SchemaKind::LocalTimestampMillis, - SchemaKind::LocalTimestampMicros, - SchemaKind::LocalTimestampNanos, - ]} - )] - // Date type test - #[case( - r#"{"type": "string"}"#, - r#"{"type": "int", "logicalType": "date"}"#, - CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::String, - SchemaKind::Bytes, - SchemaKind::Uuid, - ]} - )] - // time-millis type - #[case( - r#"{"type": "string"}"#, - r#"{"type": "int", "logicalType": "time-millis"}"#, - CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::String, - SchemaKind::Bytes, - SchemaKind::Uuid, - ]} - )] - // time-millis type - #[case( - r#"{"type": "int"}"#, - r#"{"type": "long", "logicalType": "time-micros"}"#, - CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::Int, - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::Date, - SchemaKind::TimeMillis - ]} - )] - // timestamp-nanos type. This test should fail because it is not supported on schema parse_complex - // #[case( - // r#"{"type": "string"}"#, - // r#"{"type": "long", "logicalType": "timestamp-nanos"}"#, - // CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - // SchemaKind::Int, - // SchemaKind::Long, - // SchemaKind::Float, - // SchemaKind::Double, - // SchemaKind::Date, - // SchemaKind::TimeMillis - // ]} - // )] - // timestamp-millis type - #[case( - r#"{"type": "int"}"#, - r#"{"type": "long", "logicalType": "timestamp-millis"}"#, - CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::Int, - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::Date, - SchemaKind::TimeMillis - ]} - )] - // timestamp-micros type - #[case( - r#"{"type": "int"}"#, - r#"{"type": "long", "logicalType": "timestamp-micros"}"#, - CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::Int, - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::Date, - SchemaKind::TimeMillis - ]} - )] - // local-timestamp-millis type - #[case( - r#"{"type": "int"}"#, - r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#, - CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::Int, - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::Date, - SchemaKind::TimeMillis - ]} - )] - // local-timestamp-micros type - #[case( - r#"{"type": "int"}"#, - r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#, - CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - SchemaKind::Int, - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::Date, - SchemaKind::TimeMillis - ]} - )] - // local-timestamp-nanos type. This test should fail because it is not supported on schema parse_complex - // #[case( - // r#"{"type": "int"}"#, - // r#"{"type": "long", "logicalType": "local-timestamp-nanos"}"#, - // CompatibilityError::TypeExpected{schema_type: String::from("readers_schema"), expected_type: vec![ - // SchemaKind::Int, - // SchemaKind::Long, - // SchemaKind::Float, - // SchemaKind::Double, - // SchemaKind::Date, - // SchemaKind::TimeMillis - // ]} - // )] - // When comparing different types we always get Inconclusive - #[case( - r#"{"type": "record", "name":"record_b", "fields": [{"type": "long", "name": "date"}]}"#, - r#"{"type": "fixed", "name": "EmployeeId", "size": 16}"#, - CompatibilityError::Inconclusive(String::from("writers_schema")) - )] - fn test_avro_3950_match_schemas_error( - #[case] writer_schema_str: &str, - #[case] reader_schema_str: &str, - #[case] expected_error: CompatibilityError, - ) { - let writer_schema = Schema::parse_str(writer_schema_str).unwrap(); - let reader_schema = Schema::parse_str(reader_schema_str).unwrap(); - - assert_eq!( - expected_error, - SchemaCompatibility::match_schemas(&writer_schema, &reader_schema).unwrap_err() - ) - } - - #[test] - fn test_compatible_reader_writer_pairs() { - let compatible_schemas = vec![ - (Schema::Null, Schema::Null), - (Schema::Long, Schema::Int), - (Schema::Float, Schema::Int), - (Schema::Float, Schema::Long), - (Schema::Double, Schema::Long), - (Schema::Double, Schema::Int), - (Schema::Double, Schema::Float), - (Schema::String, Schema::Bytes), - (Schema::Bytes, Schema::String), - // logical types - (Schema::Uuid, Schema::Uuid), - (Schema::Uuid, Schema::String), - (Schema::Date, Schema::Int), - (Schema::TimeMillis, Schema::Int), - (Schema::TimeMicros, Schema::Long), - (Schema::TimestampMillis, Schema::Long), - (Schema::TimestampMicros, Schema::Long), - (Schema::TimestampNanos, Schema::Long), - (Schema::LocalTimestampMillis, Schema::Long), - (Schema::LocalTimestampMicros, Schema::Long), - (Schema::LocalTimestampNanos, Schema::Long), - (Schema::String, Schema::Uuid), - (Schema::Int, Schema::Date), - (Schema::Int, Schema::TimeMillis), - (Schema::Long, Schema::TimeMicros), - (Schema::Long, Schema::TimestampMillis), - (Schema::Long, Schema::TimestampMicros), - (Schema::Long, Schema::TimestampNanos), - (Schema::Long, Schema::LocalTimestampMillis), - (Schema::Long, Schema::LocalTimestampMicros), - (Schema::Long, Schema::LocalTimestampNanos), - (int_array_schema(), int_array_schema()), - (long_array_schema(), int_array_schema()), - (int_map_schema(), int_map_schema()), - (long_map_schema(), int_map_schema()), - (enum1_ab_schema(), enum1_ab_schema()), - (enum1_abc_schema(), enum1_ab_schema()), - (empty_union_schema(), empty_union_schema()), - (int_union_schema(), int_union_schema()), - (int_string_union_schema(), string_int_union_schema()), - (int_union_schema(), empty_union_schema()), - (long_union_schema(), int_union_schema()), - (int_union_schema(), Schema::Int), - (Schema::Int, int_union_schema()), - (empty_record1_schema(), empty_record1_schema()), - (empty_record1_schema(), a_int_record1_schema()), - (a_int_record1_schema(), a_int_record1_schema()), - (a_dint_record1_schema(), a_int_record1_schema()), - (a_dint_record1_schema(), a_dint_record1_schema()), - (a_int_record1_schema(), a_dint_record1_schema()), - (a_long_record1_schema(), a_int_record1_schema()), - (a_int_record1_schema(), a_int_b_int_record1_schema()), - (a_dint_record1_schema(), a_int_b_int_record1_schema()), - (a_int_b_dint_record1_schema(), a_int_record1_schema()), - (a_dint_b_dint_record1_schema(), empty_record1_schema()), - (a_dint_b_dint_record1_schema(), a_int_record1_schema()), - (a_int_b_int_record1_schema(), a_dint_b_dint_record1_schema()), - (int_list_record_schema(), int_list_record_schema()), - (long_list_record_schema(), long_list_record_schema()), - (long_list_record_schema(), int_list_record_schema()), - (nested_optional_record(), nested_record()), - ]; - - assert!(compatible_schemas - .iter() - .all(|(reader, writer)| SchemaCompatibility::can_read(writer, reader).is_ok())); - } - - fn writer_schema() -> Schema { - Schema::parse_str( - r#" - {"type":"record", "name":"Record", "fields":[ - {"name":"oldfield1", "type":"int"}, - {"name":"oldfield2", "type":"string"} - ]} -"#, - ) - .unwrap() - } - - #[test] - fn test_missing_field() -> TestResult { - let reader_schema = Schema::parse_str( - r#" - {"type":"record", "name":"Record", "fields":[ - {"name":"oldfield1", "type":"int"} - ]} -"#, - )?; - assert!(SchemaCompatibility::can_read(&writer_schema(), &reader_schema,).is_ok()); - assert_eq!( - CompatibilityError::MissingDefaultValue(String::from("oldfield2")), - SchemaCompatibility::can_read(&reader_schema, &writer_schema()).unwrap_err() - ); - - Ok(()) - } - - #[test] - fn test_missing_second_field() -> TestResult { - let reader_schema = Schema::parse_str( - r#" - {"type":"record", "name":"Record", "fields":[ - {"name":"oldfield2", "type":"string"} - ]} -"#, - )?; - assert!(SchemaCompatibility::can_read(&writer_schema(), &reader_schema).is_ok()); - assert_eq!( - CompatibilityError::MissingDefaultValue(String::from("oldfield1")), - SchemaCompatibility::can_read(&reader_schema, &writer_schema()).unwrap_err() - ); - - Ok(()) - } - - #[test] - fn test_all_fields() -> TestResult { - let reader_schema = Schema::parse_str( - r#" - {"type":"record", "name":"Record", "fields":[ - {"name":"oldfield1", "type":"int"}, - {"name":"oldfield2", "type":"string"} - ]} -"#, - )?; - assert!(SchemaCompatibility::can_read(&writer_schema(), &reader_schema).is_ok()); - assert!(SchemaCompatibility::can_read(&reader_schema, &writer_schema()).is_ok()); - - Ok(()) - } - - #[test] - fn test_new_field_with_default() -> TestResult { - let reader_schema = Schema::parse_str( - r#" - {"type":"record", "name":"Record", "fields":[ - {"name":"oldfield1", "type":"int"}, - {"name":"newfield1", "type":"int", "default":42} - ]} -"#, - )?; - assert!(SchemaCompatibility::can_read(&writer_schema(), &reader_schema).is_ok()); - assert_eq!( - CompatibilityError::MissingDefaultValue(String::from("oldfield2")), - SchemaCompatibility::can_read(&reader_schema, &writer_schema()).unwrap_err() - ); - - Ok(()) - } - - #[test] - fn test_new_field() -> TestResult { - let reader_schema = Schema::parse_str( - r#" - {"type":"record", "name":"Record", "fields":[ - {"name":"oldfield1", "type":"int"}, - {"name":"newfield1", "type":"int"} - ]} -"#, - )?; - assert_eq!( - CompatibilityError::MissingDefaultValue(String::from("newfield1")), - SchemaCompatibility::can_read(&writer_schema(), &reader_schema).unwrap_err() - ); - assert_eq!( - CompatibilityError::MissingDefaultValue(String::from("oldfield2")), - SchemaCompatibility::can_read(&reader_schema, &writer_schema()).unwrap_err() - ); - - Ok(()) - } - - #[test] - fn test_array_writer_schema() { - let valid_reader = string_array_schema(); - let invalid_reader = string_map_schema(); - - assert!(SchemaCompatibility::can_read(&string_array_schema(), &valid_reader).is_ok()); - assert_eq!( - CompatibilityError::Inconclusive(String::from("writers_schema")), - SchemaCompatibility::can_read(&string_array_schema(), &invalid_reader).unwrap_err() - ); - } - - #[test] - fn test_primitive_writer_schema() { - let valid_reader = Schema::String; - assert!(SchemaCompatibility::can_read(&Schema::String, &valid_reader).is_ok()); - assert_eq!( - CompatibilityError::TypeExpected { - schema_type: String::from("readers_schema"), - expected_type: vec![ - SchemaKind::Int, - SchemaKind::Long, - SchemaKind::Float, - SchemaKind::Double, - SchemaKind::Date, - SchemaKind::TimeMillis - ], - }, - SchemaCompatibility::can_read(&Schema::Int, &Schema::String).unwrap_err() - ); - } - - #[test] - fn test_union_reader_writer_subset_incompatibility() { - // reader union schema must contain all writer union branches - let union_writer = union_schema(vec![Schema::Int, Schema::String]); - let union_reader = union_schema(vec![Schema::String]); - - assert_eq!( - CompatibilityError::MissingUnionElements, - SchemaCompatibility::can_read(&union_writer, &union_reader).unwrap_err() - ); - assert!(SchemaCompatibility::can_read(&union_reader, &union_writer).is_ok()); - } - - #[test] - fn test_incompatible_record_field() -> TestResult { - let string_schema = Schema::parse_str( - r#" - {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [ - {"name":"field1", "type":"string"} - ]} - "#, - )?; - - let int_schema = Schema::parse_str( - r#" - {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [ - {"name":"field1", "type":"int"} - ]} - "#, - )?; - - assert_eq!( - CompatibilityError::FieldTypeMismatch( - "field1".to_owned(), - Box::new(CompatibilityError::TypeExpected { - schema_type: "readers_schema".to_owned(), - expected_type: vec![SchemaKind::String, SchemaKind::Bytes, SchemaKind::Uuid] - }) - ), - SchemaCompatibility::can_read(&string_schema, &int_schema).unwrap_err() - ); - - Ok(()) - } - - #[test] - fn test_enum_symbols() -> TestResult { - let enum_schema1 = Schema::parse_str( - r#" - {"type":"enum", "name":"MyEnum", "symbols":["A","B"]} -"#, - )?; - let enum_schema2 = - Schema::parse_str(r#"{"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}"#)?; - assert_eq!( - CompatibilityError::MissingSymbols, - SchemaCompatibility::can_read(&enum_schema2, &enum_schema1).unwrap_err() - ); - assert!(SchemaCompatibility::can_read(&enum_schema1, &enum_schema2).is_ok()); - - Ok(()) - } - - fn point_2d_schema() -> Schema { - Schema::parse_str( - r#" - {"type":"record", "name":"Point2D", "fields":[ - {"name":"x", "type":"double"}, - {"name":"y", "type":"double"} - ]} - "#, - ) - .unwrap() - } - - fn point_2d_fullname_schema() -> Schema { - Schema::parse_str( - r#" - {"type":"record", "name":"Point", "namespace":"written", "fields":[ - {"name":"x", "type":"double"}, - {"name":"y", "type":"double"} - ]} - "#, - ) - .unwrap() - } - - fn point_3d_no_default_schema() -> Schema { - Schema::parse_str( - r#" - {"type":"record", "name":"Point", "fields":[ - {"name":"x", "type":"double"}, - {"name":"y", "type":"double"}, - {"name":"z", "type":"double"} - ]} - "#, - ) - .unwrap() - } - - fn point_3d_schema() -> Schema { - Schema::parse_str( - r#" - {"type":"record", "name":"Point3D", "fields":[ - {"name":"x", "type":"double"}, - {"name":"y", "type":"double"}, - {"name":"z", "type":"double", "default": 0.0} - ]} - "#, - ) - .unwrap() - } - - fn point_3d_match_name_schema() -> Schema { - Schema::parse_str( - r#" - {"type":"record", "name":"Point", "fields":[ - {"name":"x", "type":"double"}, - {"name":"y", "type":"double"}, - {"name":"z", "type":"double", "default": 0.0} - ]} - "#, - ) - .unwrap() - } - - #[test] - fn test_union_resolution_no_structure_match() { - // short name match, but no structure match - let read_schema = union_schema(vec![Schema::Null, point_3d_no_default_schema()]); - assert_eq!( - CompatibilityError::MissingUnionElements, - SchemaCompatibility::can_read(&point_2d_fullname_schema(), &read_schema).unwrap_err() - ); - } - - #[test] - fn test_union_resolution_first_structure_match_2d() { - // multiple structure matches with no name matches - let read_schema = union_schema(vec![ - Schema::Null, - point_3d_no_default_schema(), - point_2d_schema(), - point_3d_schema(), - ]); - assert_eq!( - CompatibilityError::MissingUnionElements, - SchemaCompatibility::can_read(&point_2d_fullname_schema(), &read_schema).unwrap_err() - ); - } - - #[test] - fn test_union_resolution_first_structure_match_3d() { - // multiple structure matches with no name matches - let read_schema = union_schema(vec![ - Schema::Null, - point_3d_no_default_schema(), - point_3d_schema(), - point_2d_schema(), - ]); - assert_eq!( - CompatibilityError::MissingUnionElements, - SchemaCompatibility::can_read(&point_2d_fullname_schema(), &read_schema).unwrap_err() - ); - } - - #[test] - fn test_union_resolution_named_structure_match() { - // multiple structure matches with a short name match - let read_schema = union_schema(vec![ - Schema::Null, - point_2d_schema(), - point_3d_match_name_schema(), - point_3d_schema(), - ]); - assert_eq!( - CompatibilityError::MissingUnionElements, - SchemaCompatibility::can_read(&point_2d_fullname_schema(), &read_schema).unwrap_err() - ); - } - - #[test] - fn test_union_resolution_full_name_match() { - // there is a full name match that should be chosen - let read_schema = union_schema(vec![ - Schema::Null, - point_2d_schema(), - point_3d_match_name_schema(), - point_3d_schema(), - point_2d_fullname_schema(), - ]); - assert!(SchemaCompatibility::can_read(&point_2d_fullname_schema(), &read_schema).is_ok()); - } - - #[test] - fn test_avro_3772_enum_default() -> TestResult { - let writer_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "clubs", "hearts"], - "default": "spades" - } - } - ] - } - "#; - - let reader_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "ninja", "hearts"], - "default": "spades" - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_raw_schema)?; - let reader_schema = Schema::parse_str(reader_raw_schema)?; - let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - record.put("c", "clubs"); - writer.append(record).unwrap(); - let input = writer.into_inner()?; - let mut reader = Reader::with_schema(&reader_schema, &input[..])?; - assert_eq!( - reader.next().unwrap().unwrap(), - Value::Record(vec![ - ("a".to_string(), Value::Long(27)), - ("b".to_string(), Value::String("foo".to_string())), - ("c".to_string(), Value::Enum(1, "spades".to_string())), - ]) - ); - assert!(reader.next().is_none()); - - Ok(()) - } - - #[test] - fn test_avro_3772_enum_default_less_symbols() -> TestResult { - let writer_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "clubs", "hearts"], - "default": "spades" - } - } - ] - } - "#; - - let reader_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["hearts", "spades"], - "default": "spades" - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_raw_schema)?; - let reader_schema = Schema::parse_str(reader_raw_schema)?; - let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - record.put("c", "hearts"); - writer.append(record).unwrap(); - let input = writer.into_inner()?; - let mut reader = Reader::with_schema(&reader_schema, &input[..])?; - assert_eq!( - reader.next().unwrap().unwrap(), - Value::Record(vec![ - ("a".to_string(), Value::Long(27)), - ("b".to_string(), Value::String("foo".to_string())), - ("c".to_string(), Value::Enum(0, "hearts".to_string())), - ]) - ); - assert!(reader.next().is_none()); - - Ok(()) - } - - #[test] - fn avro_3894_take_aliases_into_account_when_serializing_for_schema_compatibility() -> TestResult - { - let schema_v1 = Schema::parse_str( - r#" - { - "type": "record", - "name": "Conference", - "namespace": "advdaba", - "fields": [ - {"type": "string", "name": "name"}, - {"type": "long", "name": "date"} - ] - }"#, - )?; - - let schema_v2 = Schema::parse_str( - r#" - { - "type": "record", - "name": "Conference", - "namespace": "advdaba", - "fields": [ - {"type": "string", "name": "name"}, - {"type": "long", "name": "date", "aliases" : [ "time" ]} - ] - }"#, - )?; - - assert!(SchemaCompatibility::mutual_read(&schema_v1, &schema_v2).is_ok()); - - Ok(()) - } - - #[test] - fn avro_3917_take_aliases_into_account_for_schema_compatibility() -> TestResult { - let schema_v1 = Schema::parse_str( - r#" - { - "type": "record", - "name": "Conference", - "namespace": "advdaba", - "fields": [ - {"type": "string", "name": "name"}, - {"type": "long", "name": "date", "aliases" : [ "time" ]} - ] - }"#, - )?; - - let schema_v2 = Schema::parse_str( - r#" - { - "type": "record", - "name": "Conference", - "namespace": "advdaba", - "fields": [ - {"type": "string", "name": "name"}, - {"type": "long", "name": "time"} - ] - }"#, - )?; - - assert!(SchemaCompatibility::can_read(&schema_v2, &schema_v1).is_ok()); - assert_eq!( - CompatibilityError::MissingDefaultValue(String::from("time")), - SchemaCompatibility::can_read(&schema_v1, &schema_v2).unwrap_err() - ); - - Ok(()) - } - - #[test] - fn test_avro_3898_record_schemas_match_by_unqualified_name() -> TestResult { - let schemas = [ - // Record schemas - ( - Schema::parse_str( - r#"{ - "type": "record", - "name": "Statistics", - "fields": [ - { "name": "success", "type": "int" }, - { "name": "fail", "type": "int" }, - { "name": "time", "type": "string" }, - { "name": "max", "type": "int", "default": 0 } - ] - }"#, - )?, - Schema::parse_str( - r#"{ - "type": "record", - "name": "Statistics", - "namespace": "my.namespace", - "fields": [ - { "name": "success", "type": "int" }, - { "name": "fail", "type": "int" }, - { "name": "time", "type": "string" }, - { "name": "average", "type": "int", "default": 0} - ] - }"#, - )?, - ), - // Enum schemas - ( - Schema::parse_str( - r#"{ - "type": "enum", - "name": "Suit", - "symbols": ["diamonds", "spades", "clubs"] - }"#, - )?, - Schema::parse_str( - r#"{ - "type": "enum", - "name": "Suit", - "namespace": "my.namespace", - "symbols": ["diamonds", "spades", "clubs", "hearts"] - }"#, - )?, - ), - // Fixed schemas - ( - Schema::parse_str( - r#"{ - "type": "fixed", - "name": "EmployeeId", - "size": 16 - }"#, - )?, - Schema::parse_str( - r#"{ - "type": "fixed", - "name": "EmployeeId", - "namespace": "my.namespace", - "size": 16 - }"#, - )?, - ), - ]; - - for (schema_1, schema_2) in schemas { - assert!(SchemaCompatibility::can_read(&schema_1, &schema_2).is_ok()); - } - - Ok(()) - } - - #[test] - fn test_can_read_compatibility_errors() -> TestResult { - let schemas = [ - ( - Schema::parse_str( - r#"{ - "type": "record", - "name": "StatisticsMap", - "fields": [ - {"name": "average", "type": "int", "default": 0}, - {"name": "success", "type": {"type": "map", "values": "int"}} - ] - }"#)?, - Schema::parse_str( - r#"{ - "type": "record", - "name": "StatisticsMap", - "fields": [ - {"name": "average", "type": "int", "default": 0}, - {"name": "success", "type": ["null", {"type": "map", "values": "int"}], "default": null} - ] - }"#)?, - "Incompatible schemata! Field 'success' in reader schema does not match the type in the writer schema" - ), - ( - Schema::parse_str( - r#"{ - "type": "record", - "name": "StatisticsArray", - "fields": [ - {"name": "max_values", "type": {"type": "array", "items": "int"}} - ] - }"#)?, - Schema::parse_str( - r#"{ - "type": "record", - "name": "StatisticsArray", - "fields": [ - {"name": "max_values", "type": ["null", {"type": "array", "items": "int"}], "default": null} - ] - }"#)?, - "Incompatible schemata! Field 'max_values' in reader schema does not match the type in the writer schema" - ) - ]; - - for (schema_1, schema_2, error) in schemas { - assert!(SchemaCompatibility::can_read(&schema_1, &schema_2).is_ok()); - assert_eq!( - error, - SchemaCompatibility::can_read(&schema_2, &schema_1) - .unwrap_err() - .to_string() - ); - } - - Ok(()) - } - - #[test] - fn avro_3974_can_read_schema_references() -> TestResult { - let schema_strs = vec![ - r#"{ - "type": "record", - "name": "Child", - "namespace": "avro", - "fields": [ - { - "name": "val", - "type": "int" - } - ] - } - "#, - r#"{ - "type": "record", - "name": "Parent", - "namespace": "avro", - "fields": [ - { - "name": "child", - "type": "avro.Child" - } - ] - } - "#, - ]; - - let schemas = Schema::parse_list(&schema_strs).unwrap(); - SchemaCompatibility::can_read(&schemas[1], &schemas[1])?; - - Ok(()) - } -} diff --git a/lang/rust/avro/src/schema_equality.rs b/lang/rust/avro/src/schema_equality.rs deleted file mode 100644 index c9eaa2a1173..00000000000 --- a/lang/rust/avro/src/schema_equality.rs +++ /dev/null @@ -1,590 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{ - schema::{ - ArraySchema, DecimalSchema, EnumSchema, FixedSchema, MapSchema, RecordField, RecordSchema, - UnionSchema, - }, - Schema, -}; -use std::{fmt::Debug, sync::OnceLock}; - -/// A trait that compares two schemata for equality. -/// To register a custom one use [set_schemata_equality_comparator]. -pub trait SchemataEq: Debug + Send + Sync { - /// Compares two schemata for equality. - fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool; -} - -/// Compares two schemas according to the Avro specification by using -/// their canonical forms. -/// See -#[derive(Debug)] -pub struct SpecificationEq; -impl SchemataEq for SpecificationEq { - fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool { - schema_one.canonical_form() == schema_two.canonical_form() - } -} - -/// Compares two schemas for equality field by field, using only the fields that -/// are used to construct their canonical forms. -/// See -#[derive(Debug)] -pub struct StructFieldEq { - /// Whether to include custom attributes in the comparison. - /// The custom attributes are not used to construct the canonical form of the schema! - pub include_attributes: bool, -} - -impl SchemataEq for StructFieldEq { - fn compare(&self, schema_one: &Schema, schema_two: &Schema) -> bool { - macro_rules! compare_primitive { - ($primitive:ident) => { - if let Schema::$primitive = schema_one { - if let Schema::$primitive = schema_two { - return true; - } - return false; - } - }; - } - - if schema_one.name() != schema_two.name() { - return false; - } - - compare_primitive!(Null); - compare_primitive!(Boolean); - compare_primitive!(Int); - compare_primitive!(Int); - compare_primitive!(Long); - compare_primitive!(Float); - compare_primitive!(Double); - compare_primitive!(Bytes); - compare_primitive!(String); - compare_primitive!(Uuid); - compare_primitive!(BigDecimal); - compare_primitive!(Date); - compare_primitive!(Duration); - compare_primitive!(TimeMicros); - compare_primitive!(TimeMillis); - compare_primitive!(TimestampMicros); - compare_primitive!(TimestampMillis); - compare_primitive!(TimestampNanos); - compare_primitive!(LocalTimestampMicros); - compare_primitive!(LocalTimestampMillis); - compare_primitive!(LocalTimestampNanos); - - if self.include_attributes - && schema_one.custom_attributes() != schema_two.custom_attributes() - { - return false; - } - - if let Schema::Record(RecordSchema { - fields: fields_one, .. - }) = schema_one - { - if let Schema::Record(RecordSchema { - fields: fields_two, .. - }) = schema_two - { - return self.compare_fields(fields_one, fields_two); - } - return false; - } - - if let Schema::Enum(EnumSchema { - symbols: symbols_one, - .. - }) = schema_one - { - if let Schema::Enum(EnumSchema { - symbols: symbols_two, - .. - }) = schema_two - { - return symbols_one == symbols_two; - } - return false; - } - - if let Schema::Fixed(FixedSchema { size: size_one, .. }) = schema_one { - if let Schema::Fixed(FixedSchema { size: size_two, .. }) = schema_two { - return size_one == size_two; - } - return false; - } - - if let Schema::Union(UnionSchema { - schemas: schemas_one, - .. - }) = schema_one - { - if let Schema::Union(UnionSchema { - schemas: schemas_two, - .. - }) = schema_two - { - return schemas_one.len() == schemas_two.len() - && schemas_one - .iter() - .zip(schemas_two.iter()) - .all(|(s1, s2)| self.compare(s1, s2)); - } - return false; - } - - if let Schema::Decimal(DecimalSchema { - precision: precision_one, - scale: scale_one, - .. - }) = schema_one - { - if let Schema::Decimal(DecimalSchema { - precision: precision_two, - scale: scale_two, - .. - }) = schema_two - { - return precision_one == precision_two && scale_one == scale_two; - } - return false; - } - - if let Schema::Array(ArraySchema { - items: items_one, .. - }) = schema_one - { - if let Schema::Array(ArraySchema { - items: items_two, .. - }) = schema_two - { - return items_one == items_two; - } - return false; - } - - if let Schema::Map(MapSchema { - types: types_one, .. - }) = schema_one - { - if let Schema::Map(MapSchema { - types: types_two, .. - }) = schema_two - { - return self.compare(types_one, types_two); - } - return false; - } - - if let Schema::Ref { name: name_one } = schema_one { - if let Schema::Ref { name: name_two } = schema_two { - return name_one == name_two; - } - return false; - } - - error!( - "This is a bug in schema_equality.rs! The following schemata types are not checked! \ - Please report it to the Avro library maintainers! \ - \n{:?}\n\n{:?}", - schema_one, schema_two - ); - false - } -} - -impl StructFieldEq { - fn compare_fields(&self, fields_one: &[RecordField], fields_two: &[RecordField]) -> bool { - fields_one.len() == fields_two.len() - && fields_one - .iter() - .zip(fields_two.iter()) - .all(|(f1, f2)| self.compare(&f1.schema, &f2.schema)) - } -} - -static SCHEMATA_COMPARATOR_ONCE: OnceLock> = OnceLock::new(); - -/// Sets a custom schemata equality comparator. -/// -/// Returns a unit if the registration was successful or the already -/// registered comparator if the registration failed. -/// -/// **Note**: This function must be called before parsing any schema because this will -/// register the default comparator and the registration is one time only! -pub fn set_schemata_equality_comparator( - comparator: Box, -) -> Result<(), Box> { - debug!( - "Setting a custom schemata equality comparator: {:?}.", - comparator - ); - SCHEMATA_COMPARATOR_ONCE.set(comparator) -} - -pub(crate) fn compare_schemata(schema_one: &Schema, schema_two: &Schema) -> bool { - SCHEMATA_COMPARATOR_ONCE - .get_or_init(|| { - debug!("Going to use the default schemata equality comparator: SpecificationEq.",); - Box::new(StructFieldEq { - include_attributes: false, - }) - }) - .compare(schema_one, schema_two) -} - -#[cfg(test)] -#[allow(non_snake_case)] -mod tests { - use super::*; - use crate::schema::{Name, RecordFieldOrder}; - use apache_avro_test_helper::TestResult; - use serde_json::Value; - use std::collections::BTreeMap; - - const SPECIFICATION_EQ: SpecificationEq = SpecificationEq; - const STRUCT_FIELD_EQ: StructFieldEq = StructFieldEq { - include_attributes: false, - }; - - macro_rules! test_primitives { - ($primitive:ident) => { - paste::item! { - #[test] - fn []() { - let specification_eq_res = SPECIFICATION_EQ.compare(&Schema::$primitive, &Schema::$primitive); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&Schema::$primitive, &Schema::$primitive); - assert_eq!(specification_eq_res, struct_field_eq_res) - } - } - }; - } - - test_primitives!(Null); - test_primitives!(Boolean); - test_primitives!(Int); - test_primitives!(Long); - test_primitives!(Float); - test_primitives!(Double); - test_primitives!(Bytes); - test_primitives!(String); - test_primitives!(Uuid); - test_primitives!(BigDecimal); - test_primitives!(Date); - test_primitives!(Duration); - test_primitives!(TimeMicros); - test_primitives!(TimeMillis); - test_primitives!(TimestampMicros); - test_primitives!(TimestampMillis); - test_primitives!(TimestampNanos); - test_primitives!(LocalTimestampMicros); - test_primitives!(LocalTimestampMillis); - test_primitives!(LocalTimestampNanos); - - #[test] - fn test_avro_3939_compare_named_schemata_with_different_names() { - let schema_one = Schema::Ref { - name: Name::from("name1"), - }; - - let schema_two = Schema::Ref { - name: Name::from("name2"), - }; - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - assert!(!specification_eq_res); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!(!struct_field_eq_res); - - assert_eq!(specification_eq_res, struct_field_eq_res); - } - - #[test] - fn test_avro_3939_compare_schemata_not_including_attributes() { - let schema_one = Schema::map_with_attributes( - Schema::Boolean, - BTreeMap::from_iter([("key1".to_string(), Value::Bool(true))]), - ); - let schema_two = Schema::map_with_attributes( - Schema::Boolean, - BTreeMap::from_iter([("key2".to_string(), Value::Bool(true))]), - ); - // STRUCT_FIELD_EQ does not include attributes ! - assert!(STRUCT_FIELD_EQ.compare(&schema_one, &schema_two)); - } - - #[test] - fn test_avro_3939_compare_schemata_including_attributes() { - let struct_field_eq = StructFieldEq { - include_attributes: true, - }; - let schema_one = Schema::map_with_attributes( - Schema::Boolean, - BTreeMap::from_iter([("key1".to_string(), Value::Bool(true))]), - ); - let schema_two = Schema::map_with_attributes( - Schema::Boolean, - BTreeMap::from_iter([("key2".to_string(), Value::Bool(true))]), - ); - assert!(!struct_field_eq.compare(&schema_one, &schema_two)); - } - - #[test] - fn test_avro_3939_compare_map_schemata() { - let schema_one = Schema::map(Schema::Boolean); - assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); - assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - - let schema_two = Schema::map(Schema::Boolean); - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!( - specification_eq_res, - "SpecificationEq: Equality of two Schema::Map failed!" - ); - assert!( - struct_field_eq_res, - "StructFieldEq: Equality of two Schema::Map failed!" - ); - assert_eq!(specification_eq_res, struct_field_eq_res); - } - - #[test] - fn test_avro_3939_compare_array_schemata() { - let schema_one = Schema::array(Schema::Boolean); - assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); - assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - - let schema_two = Schema::array(Schema::Boolean); - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!( - specification_eq_res, - "SpecificationEq: Equality of two Schema::Array failed!" - ); - assert!( - struct_field_eq_res, - "StructFieldEq: Equality of two Schema::Array failed!" - ); - assert_eq!(specification_eq_res, struct_field_eq_res); - } - - #[test] - fn test_avro_3939_compare_decimal_schemata() { - let schema_one = Schema::Decimal(DecimalSchema { - precision: 10, - scale: 2, - inner: Box::new(Schema::Bytes), - }); - assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); - assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - - let schema_two = Schema::Decimal(DecimalSchema { - precision: 10, - scale: 2, - inner: Box::new(Schema::Bytes), - }); - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!( - specification_eq_res, - "SpecificationEq: Equality of two Schema::Decimal failed!" - ); - assert!( - struct_field_eq_res, - "StructFieldEq: Equality of two Schema::Decimal failed!" - ); - assert_eq!(specification_eq_res, struct_field_eq_res); - } - - #[test] - fn test_avro_3939_compare_fixed_schemata() { - let schema_one = Schema::Fixed(FixedSchema { - name: Name::from("fixed"), - doc: None, - size: 10, - default: None, - aliases: None, - attributes: BTreeMap::new(), - }); - assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); - assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - - let schema_two = Schema::Fixed(FixedSchema { - name: Name::from("fixed"), - doc: None, - size: 10, - default: None, - aliases: None, - attributes: BTreeMap::new(), - }); - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!( - specification_eq_res, - "SpecificationEq: Equality of two Schema::Fixed failed!" - ); - assert!( - struct_field_eq_res, - "StructFieldEq: Equality of two Schema::Fixed failed!" - ); - assert_eq!(specification_eq_res, struct_field_eq_res); - } - - #[test] - fn test_avro_3939_compare_enum_schemata() { - let schema_one = Schema::Enum(EnumSchema { - name: Name::from("enum"), - doc: None, - symbols: vec!["A".to_string(), "B".to_string()], - default: None, - aliases: None, - attributes: BTreeMap::new(), - }); - assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); - assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - - let schema_two = Schema::Enum(EnumSchema { - name: Name::from("enum"), - doc: None, - symbols: vec!["A".to_string(), "B".to_string()], - default: None, - aliases: None, - attributes: BTreeMap::new(), - }); - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!( - specification_eq_res, - "SpecificationEq: Equality of two Schema::Enum failed!" - ); - assert!( - struct_field_eq_res, - "StructFieldEq: Equality of two Schema::Enum failed!" - ); - assert_eq!(specification_eq_res, struct_field_eq_res); - } - - #[test] - fn test_avro_3939_compare_ref_schemata() { - let schema_one = Schema::Ref { - name: Name::from("ref"), - }; - assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); - assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - - let schema_two = Schema::Ref { - name: Name::from("ref"), - }; - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!( - specification_eq_res, - "SpecificationEq: Equality of two Schema::Ref failed!" - ); - assert!( - struct_field_eq_res, - "StructFieldEq: Equality of two Schema::Ref failed!" - ); - assert_eq!(specification_eq_res, struct_field_eq_res); - } - - #[test] - fn test_avro_3939_compare_record_schemata() { - let schema_one = Schema::Record(RecordSchema { - name: Name::from("record"), - doc: None, - fields: vec![RecordField { - name: "field".to_string(), - doc: None, - default: None, - schema: Schema::Boolean, - order: RecordFieldOrder::Ignore, - aliases: None, - custom_attributes: BTreeMap::new(), - position: 0, - }], - aliases: None, - attributes: BTreeMap::new(), - lookup: Default::default(), - }); - assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); - assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - - let schema_two = Schema::Record(RecordSchema { - name: Name::from("record"), - doc: None, - fields: vec![RecordField { - name: "field".to_string(), - doc: None, - default: None, - schema: Schema::Boolean, - order: RecordFieldOrder::Ignore, - aliases: None, - custom_attributes: BTreeMap::new(), - position: 0, - }], - aliases: None, - attributes: BTreeMap::new(), - lookup: Default::default(), - }); - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!( - specification_eq_res, - "SpecificationEq: Equality of two Schema::Record failed!" - ); - assert!( - struct_field_eq_res, - "StructFieldEq: Equality of two Schema::Record failed!" - ); - assert_eq!(specification_eq_res, struct_field_eq_res); - } - - #[test] - fn test_avro_3939_compare_union_schemata() -> TestResult { - let schema_one = Schema::Union(UnionSchema::new(vec![Schema::Boolean, Schema::Int])?); - assert!(!SPECIFICATION_EQ.compare(&schema_one, &Schema::Boolean)); - assert!(!STRUCT_FIELD_EQ.compare(&schema_one, &Schema::Boolean)); - - let schema_two = Schema::Union(UnionSchema::new(vec![Schema::Boolean, Schema::Int])?); - - let specification_eq_res = SPECIFICATION_EQ.compare(&schema_one, &schema_two); - let struct_field_eq_res = STRUCT_FIELD_EQ.compare(&schema_one, &schema_two); - assert!( - specification_eq_res, - "SpecificationEq: Equality of two Schema::Union failed!" - ); - assert!( - struct_field_eq_res, - "StructFieldEq: Equality of two Schema::Union failed!" - ); - assert_eq!(specification_eq_res, struct_field_eq_res); - Ok(()) - } -} diff --git a/lang/rust/avro/src/ser.rs b/lang/rust/avro/src/ser.rs deleted file mode 100644 index 519306a4d0f..00000000000 --- a/lang/rust/avro/src/ser.rs +++ /dev/null @@ -1,1041 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Logic for serde-compatible serialization. -use crate::{ - bytes::{BytesType, SER_BYTES_TYPE}, - types::Value, - Error, -}; -use serde::{ser, Serialize}; -use std::{collections::HashMap, iter::once}; - -#[derive(Clone, Default)] -pub struct Serializer {} - -pub struct SeqSerializer { - items: Vec, -} - -pub struct SeqVariantSerializer<'a> { - index: u32, - variant: &'a str, - items: Vec, -} - -pub struct MapSerializer { - indices: HashMap, - values: Vec, -} - -pub struct StructSerializer { - fields: Vec<(String, Value)>, -} - -pub struct StructVariantSerializer<'a> { - index: u32, - variant: &'a str, - fields: Vec<(String, Value)>, -} - -impl SeqSerializer { - pub fn new(len: Option) -> SeqSerializer { - let items = match len { - Some(len) => Vec::with_capacity(len), - None => Vec::new(), - }; - - SeqSerializer { items } - } -} - -impl<'a> SeqVariantSerializer<'a> { - pub fn new(index: u32, variant: &'a str, len: Option) -> SeqVariantSerializer<'a> { - let items = match len { - Some(len) => Vec::with_capacity(len), - None => Vec::new(), - }; - SeqVariantSerializer { - index, - variant, - items, - } - } -} - -impl MapSerializer { - pub fn new(len: Option) -> MapSerializer { - let (indices, values) = match len { - Some(len) => (HashMap::with_capacity(len), Vec::with_capacity(len)), - None => (HashMap::new(), Vec::new()), - }; - - MapSerializer { indices, values } - } -} - -impl StructSerializer { - pub fn new(len: usize) -> StructSerializer { - StructSerializer { - fields: Vec::with_capacity(len), - } - } -} - -impl<'a> StructVariantSerializer<'a> { - pub fn new(index: u32, variant: &'a str, len: usize) -> StructVariantSerializer<'a> { - StructVariantSerializer { - index, - variant, - fields: Vec::with_capacity(len), - } - } -} - -impl<'b> ser::Serializer for &'b mut Serializer { - type Ok = Value; - type Error = Error; - type SerializeSeq = SeqSerializer; - type SerializeTuple = SeqSerializer; - type SerializeTupleStruct = SeqSerializer; - type SerializeTupleVariant = SeqVariantSerializer<'b>; - type SerializeMap = MapSerializer; - type SerializeStruct = StructSerializer; - type SerializeStructVariant = StructVariantSerializer<'b>; - - fn serialize_bool(self, v: bool) -> Result { - Ok(Value::Boolean(v)) - } - - fn serialize_i8(self, v: i8) -> Result { - self.serialize_i32(i32::from(v)) - } - - fn serialize_i16(self, v: i16) -> Result { - self.serialize_i32(i32::from(v)) - } - - fn serialize_i32(self, v: i32) -> Result { - Ok(Value::Int(v)) - } - - fn serialize_i64(self, v: i64) -> Result { - Ok(Value::Long(v)) - } - - fn serialize_u8(self, v: u8) -> Result { - self.serialize_i32(i32::from(v)) - } - - fn serialize_u16(self, v: u16) -> Result { - self.serialize_i32(i32::from(v)) - } - - fn serialize_u32(self, v: u32) -> Result { - if v <= i32::MAX as u32 { - self.serialize_i32(v as i32) - } else { - self.serialize_i64(i64::from(v)) - } - } - - fn serialize_u64(self, v: u64) -> Result { - if v <= i64::MAX as u64 { - self.serialize_i64(v as i64) - } else { - Err(ser::Error::custom("u64 is too large")) - } - } - - fn serialize_f32(self, v: f32) -> Result { - Ok(Value::Float(v)) - } - - fn serialize_f64(self, v: f64) -> Result { - Ok(Value::Double(v)) - } - - fn serialize_char(self, v: char) -> Result { - self.serialize_str(&once(v).collect::()) - } - - fn serialize_str(self, v: &str) -> Result { - Ok(Value::String(v.to_owned())) - } - - fn serialize_bytes(self, v: &[u8]) -> Result { - match SER_BYTES_TYPE.get() { - BytesType::Bytes => Ok(Value::Bytes(v.to_owned())), - BytesType::Fixed => Ok(Value::Fixed(v.len(), v.to_owned())), - } - } - - fn serialize_none(self) -> Result { - Ok(Value::from(None::)) - } - - fn serialize_some(self, value: &T) -> Result - where - T: Serialize + ?Sized, - { - let v = value.serialize(&mut Serializer::default())?; - Ok(Value::from(Some(v))) - } - - fn serialize_unit(self) -> Result { - Ok(Value::Null) - } - - fn serialize_unit_struct(self, _: &'static str) -> Result { - self.serialize_unit() - } - - fn serialize_unit_variant( - self, - _: &'static str, - _variant_index: u32, - variant: &'static str, - ) -> Result { - Ok(Value::String(variant.to_string())) - } - - fn serialize_newtype_struct( - self, - _: &'static str, - value: &T, - ) -> Result - where - T: Serialize + ?Sized, - { - value.serialize(self) - } - - fn serialize_newtype_variant( - self, - _: &'static str, - index: u32, - variant: &'static str, - value: &T, - ) -> Result - where - T: Serialize + ?Sized, - { - Ok(Value::Record(vec![ - ("type".to_owned(), Value::Enum(index, variant.to_owned())), - ( - "value".to_owned(), - Value::Union(index, Box::new(value.serialize(self)?)), - ), - ])) - } - - fn serialize_seq(self, len: Option) -> Result { - Ok(SeqSerializer::new(len)) - } - - fn serialize_tuple(self, len: usize) -> Result { - self.serialize_seq(Some(len)) - } - - fn serialize_tuple_struct( - self, - _: &'static str, - len: usize, - ) -> Result { - self.serialize_seq(Some(len)) - } - - fn serialize_tuple_variant( - self, - _: &'static str, - index: u32, - variant: &'static str, - len: usize, - ) -> Result { - Ok(SeqVariantSerializer::new(index, variant, Some(len))) - } - - fn serialize_map(self, len: Option) -> Result { - Ok(MapSerializer::new(len)) - } - - fn serialize_struct( - self, - _: &'static str, - len: usize, - ) -> Result { - Ok(StructSerializer::new(len)) - } - - fn serialize_struct_variant( - self, - _: &'static str, - index: u32, - variant: &'static str, - len: usize, - ) -> Result { - Ok(StructVariantSerializer::new(index, variant, len)) - } - - fn is_human_readable(&self) -> bool { - crate::util::is_human_readable() - } -} - -impl ser::SerializeSeq for SeqSerializer { - type Ok = Value; - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - self.items - .push(value.serialize(&mut Serializer::default())?); - Ok(()) - } - - fn end(self) -> Result { - Ok(Value::Array(self.items)) - } -} - -impl ser::SerializeTuple for SeqSerializer { - type Ok = Value; - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - ser::SerializeSeq::serialize_element(self, value) - } - - fn end(self) -> Result { - ser::SerializeSeq::end(self) - } -} - -impl ser::SerializeTupleStruct for SeqSerializer { - type Ok = Value; - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - ser::SerializeSeq::serialize_element(self, value) - } - - fn end(self) -> Result { - ser::SerializeSeq::end(self) - } -} - -impl<'a> ser::SerializeSeq for SeqVariantSerializer<'a> { - type Ok = Value; - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - self.items.push(Value::Union( - self.index, - Box::new(value.serialize(&mut Serializer::default())?), - )); - Ok(()) - } - - fn end(self) -> Result { - Ok(Value::Record(vec![ - ( - "type".to_owned(), - Value::Enum(self.index, self.variant.to_owned()), - ), - ("value".to_owned(), Value::Array(self.items)), - ])) - } -} - -impl<'a> ser::SerializeTupleVariant for SeqVariantSerializer<'a> { - type Ok = Value; - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - ser::SerializeSeq::serialize_element(self, value) - } - - fn end(self) -> Result { - ser::SerializeSeq::end(self) - } -} - -impl ser::SerializeMap for MapSerializer { - type Ok = Value; - type Error = Error; - - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - let key = key.serialize(&mut Serializer::default())?; - - if let Value::String(key) = key { - self.indices.insert(key, self.values.len()); - Ok(()) - } else { - Err(ser::Error::custom("map key is not a string")) - } - } - - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - self.values - .push(value.serialize(&mut Serializer::default())?); - Ok(()) - } - - fn end(self) -> Result { - let mut items = HashMap::new(); - for (key, index) in self.indices { - if let Some(value) = self.values.get(index) { - items.insert(key, value.clone()); - } - } - - Ok(Value::Map(items)) - } -} - -impl ser::SerializeStruct for StructSerializer { - type Ok = Value; - type Error = Error; - - fn serialize_field(&mut self, name: &'static str, value: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - self.fields.push(( - name.to_owned(), - value.serialize(&mut Serializer::default())?, - )); - Ok(()) - } - - fn end(self) -> Result { - Ok(Value::Record(self.fields)) - } -} - -impl<'a> ser::SerializeStructVariant for StructVariantSerializer<'a> { - type Ok = Value; - type Error = Error; - - fn serialize_field(&mut self, name: &'static str, value: &T) -> Result<(), Self::Error> - where - T: Serialize + ?Sized, - { - self.fields.push(( - name.to_owned(), - value.serialize(&mut Serializer::default())?, - )); - Ok(()) - } - - fn end(self) -> Result { - Ok(Value::Record(vec![ - ( - "type".to_owned(), - Value::Enum(self.index, self.variant.to_owned()), - ), - ( - "value".to_owned(), - Value::Union(self.index, Box::new(Value::Record(self.fields))), - ), - ])) - } -} - -/// Interpret a serializeable instance as a `Value`. -/// -/// This conversion can fail if the value is not valid as per the Avro specification. -/// e.g: HashMap with non-string keys -pub fn to_value(value: S) -> Result { - let mut serializer = Serializer::default(); - value.serialize(&mut serializer) -} - -#[cfg(test)] -mod tests { - use super::*; - use apache_avro_test_helper::TestResult; - use pretty_assertions::assert_eq; - use serde::{Deserialize, Serialize}; - use serial_test::serial; - use std::sync::atomic::Ordering; - - #[derive(Debug, Deserialize, Serialize, Clone)] - struct Test { - a: i64, - b: String, - } - - #[derive(Debug, Deserialize, Serialize)] - struct TestInner { - a: Test, - b: i32, - } - - #[derive(Debug, Deserialize, Serialize)] - struct TestUnitExternalEnum { - a: UnitExternalEnum, - } - - #[derive(Debug, Deserialize, Serialize)] - enum UnitExternalEnum { - Val1, - Val2, - } - - #[derive(Debug, Deserialize, Serialize)] - struct TestUnitInternalEnum { - a: UnitInternalEnum, - } - - #[derive(Debug, Deserialize, Serialize)] - #[serde(tag = "t")] - enum UnitInternalEnum { - Val1, - Val2, - } - - #[derive(Debug, Deserialize, Serialize)] - struct TestUnitAdjacentEnum { - a: UnitAdjacentEnum, - } - - #[derive(Debug, Deserialize, Serialize)] - #[serde(tag = "t", content = "v")] - enum UnitAdjacentEnum { - Val1, - Val2, - } - - #[derive(Debug, Deserialize, Serialize)] - struct TestUnitUntaggedEnum { - a: UnitUntaggedEnum, - } - - #[derive(Debug, Deserialize, Serialize)] - #[serde(untagged)] - enum UnitUntaggedEnum { - Val1, - Val2, - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestSingleValueExternalEnum { - a: SingleValueExternalEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - enum SingleValueExternalEnum { - Double(f64), - String(String), - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestSingleValueInternalEnum { - a: SingleValueInternalEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - #[serde(tag = "t")] - enum SingleValueInternalEnum { - Double(f64), - String(String), - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestSingleValueAdjacentEnum { - a: SingleValueAdjacentEnum, - } - #[derive(Debug, Serialize, Deserialize)] - #[serde(tag = "t", content = "v")] - enum SingleValueAdjacentEnum { - Double(f64), - String(String), - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestSingleValueUntaggedEnum { - a: SingleValueUntaggedEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - #[serde(untagged)] - enum SingleValueUntaggedEnum { - Double(f64), - String(String), - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestStructExternalEnum { - a: StructExternalEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - enum StructExternalEnum { - Val1 { x: f32, y: f32 }, - Val2 { x: f32, y: f32 }, - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestStructInternalEnum { - a: StructInternalEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - #[serde(tag = "type")] - enum StructInternalEnum { - Val1 { x: f32, y: f32 }, - Val2 { x: f32, y: f32 }, - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestStructAdjacentEnum { - a: StructAdjacentEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - #[serde(tag = "t", content = "v")] - enum StructAdjacentEnum { - Val1 { x: f32, y: f32 }, - Val2 { x: f32, y: f32 }, - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestStructUntaggedEnum { - a: StructUntaggedEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - #[serde(untagged)] - enum StructUntaggedEnum { - Val1 { x: f32, y: f32 }, - Val2 { x: f32, y: f32, z: f32 }, - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestTupleExternalEnum { - a: TupleExternalEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - enum TupleExternalEnum { - Val1(f32, f32), - Val2(f32, f32, f32), - } - - // Tuple Internal Enum cannot be instantiated - - #[derive(Debug, Serialize, Deserialize)] - struct TestTupleAdjacentEnum { - a: TupleAdjacentEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - #[serde(tag = "t", content = "v")] - enum TupleAdjacentEnum { - Val1(f32, f32), - Val2(f32, f32, f32), - } - - #[derive(Debug, Serialize, Deserialize)] - struct TestTupleUntaggedEnum { - a: TupleUntaggedEnum, - } - - #[derive(Debug, Serialize, Deserialize)] - #[serde(untagged)] - enum TupleUntaggedEnum { - Val1(f32, f32), - Val2(f32, f32, f32), - } - - #[test] - fn test_to_value() -> TestResult { - let test = Test { - a: 27, - b: "foo".to_owned(), - }; - let expected = Value::Record(vec![ - ("a".to_owned(), Value::Long(27)), - ("b".to_owned(), Value::String("foo".to_owned())), - ]); - - assert_eq!(to_value(test.clone())?, expected); - - let test_inner = TestInner { a: test, b: 35 }; - - let expected_inner = Value::Record(vec![ - ( - "a".to_owned(), - Value::Record(vec![ - ("a".to_owned(), Value::Long(27)), - ("b".to_owned(), Value::String("foo".to_owned())), - ]), - ), - ("b".to_owned(), Value::Int(35)), - ]); - - assert_eq!(to_value(test_inner)?, expected_inner); - - Ok(()) - } - - #[test] - fn test_to_value_unit_enum() -> TestResult { - let test = TestUnitExternalEnum { - a: UnitExternalEnum::Val1, - }; - - let expected = Value::Record(vec![("a".to_owned(), Value::String("Val1".to_owned()))]); - - assert_eq!( - to_value(test)?, - expected, - "Error serializing unit external enum" - ); - - let test = TestUnitInternalEnum { - a: UnitInternalEnum::Val1, - }; - - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![("t".to_owned(), Value::String("Val1".to_owned()))]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "Error serializing unit internal enum" - ); - - let test = TestUnitAdjacentEnum { - a: UnitAdjacentEnum::Val1, - }; - - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![("t".to_owned(), Value::String("Val1".to_owned()))]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "Error serializing unit adjacent enum" - ); - - let test = TestUnitUntaggedEnum { - a: UnitUntaggedEnum::Val1, - }; - - let expected = Value::Record(vec![("a".to_owned(), Value::Null)]); - - assert_eq!( - to_value(test)?, - expected, - "Error serializing unit untagged enum" - ); - - Ok(()) - } - - #[test] - fn test_to_value_single_value_enum() -> TestResult { - let test = TestSingleValueExternalEnum { - a: SingleValueExternalEnum::Double(64.0), - }; - - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::Enum(0, "Double".to_owned())), - ( - "value".to_owned(), - Value::Union(0, Box::new(Value::Double(64.0))), - ), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "Error serializing single value external enum" - ); - - // It is not possible to serialize an internal Single Value enum... - let test = TestSingleValueInternalEnum { - a: SingleValueInternalEnum::Double(64.0), - }; - - assert!(to_value(test).is_err(), "{}", true); - - let test = TestSingleValueAdjacentEnum { - a: SingleValueAdjacentEnum::Double(64.0), - }; - - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("t".to_owned(), Value::String("Double".to_owned())), - ("v".to_owned(), Value::Double(64.0)), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "Error serializing single value adjacent enum" - ); - - let test = TestSingleValueUntaggedEnum { - a: SingleValueUntaggedEnum::Double(64.0), - }; - - let expected = Value::Record(vec![("a".to_owned(), Value::Double(64.0))]); - - assert_eq!( - to_value(test)?, - expected, - "Error serializing single value untagged enum" - ); - - Ok(()) - } - - #[test] - fn test_to_value_struct_enum() -> TestResult { - let test = TestStructExternalEnum { - a: StructExternalEnum::Val1 { x: 1.0, y: 2.0 }, - }; - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::Enum(0, "Val1".to_owned())), - ( - "value".to_owned(), - Value::Union( - 0, - Box::new(Value::Record(vec![ - ("x".to_owned(), Value::Float(1.0)), - ("y".to_owned(), Value::Float(2.0)), - ])), - ), - ), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "error serializing struct external enum" - ); - - // I don't think that this is feasible in avro - - let test = TestStructInternalEnum { - a: StructInternalEnum::Val1 { x: 1.0, y: 2.0 }, - }; - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::String("Val1".to_owned())), - ("x".to_owned(), Value::Float(1.0)), - ("y".to_owned(), Value::Float(2.0)), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "error serializing struct internal enum" - ); - - let test = TestStructAdjacentEnum { - a: StructAdjacentEnum::Val1 { x: 1.0, y: 2.0 }, - }; - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("t".to_owned(), Value::String("Val1".to_owned())), - ( - "v".to_owned(), - Value::Record(vec![ - ("x".to_owned(), Value::Float(1.0)), - ("y".to_owned(), Value::Float(2.0)), - ]), - ), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "error serializing struct adjacent enum" - ); - - let test = TestStructUntaggedEnum { - a: StructUntaggedEnum::Val1 { x: 1.0, y: 2.0 }, - }; - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("x".to_owned(), Value::Float(1.0)), - ("y".to_owned(), Value::Float(2.0)), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "error serializing struct untagged enum" - ); - - let test = TestStructUntaggedEnum { - a: StructUntaggedEnum::Val2 { - x: 1.0, - y: 2.0, - z: 3.0, - }, - }; - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("x".to_owned(), Value::Float(1.0)), - ("y".to_owned(), Value::Float(2.0)), - ("z".to_owned(), Value::Float(3.0)), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "error serializing struct untagged enum variant" - ); - - Ok(()) - } - - #[test] - fn test_to_value_tuple_enum() -> TestResult { - let test = TestTupleExternalEnum { - a: TupleExternalEnum::Val2(1.0, 2.0, 3.0), - }; - - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::Enum(1, "Val2".to_owned())), - ( - "value".to_owned(), - Value::Array(vec![ - Value::Union(1, Box::new(Value::Float(1.0))), - Value::Union(1, Box::new(Value::Float(2.0))), - Value::Union(1, Box::new(Value::Float(3.0))), - ]), - ), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "error serializing tuple external enum" - ); - - let test = TestTupleAdjacentEnum { - a: TupleAdjacentEnum::Val1(1.0, 2.0), - }; - - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("t".to_owned(), Value::String("Val1".to_owned())), - ( - "v".to_owned(), - Value::Array(vec![Value::Float(1.0), Value::Float(2.0)]), - ), - ]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "error serializing tuple adjacent enum" - ); - - let test = TestTupleUntaggedEnum { - a: TupleUntaggedEnum::Val1(1.0, 2.0), - }; - - let expected = Value::Record(vec![( - "a".to_owned(), - Value::Array(vec![Value::Float(1.0), Value::Float(2.0)]), - )]); - - assert_eq!( - to_value(test)?, - expected, - "error serializing tuple untagged enum" - ); - - Ok(()) - } - - #[test] - #[serial(avro_3747)] - fn avro_3747_human_readable_false() { - use serde::ser::Serializer as SerdeSerializer; - - crate::util::SERDE_HUMAN_READABLE.store(false, Ordering::Release); - - let ser = &mut Serializer {}; - - assert_eq!(ser.is_human_readable(), false); - } - - #[test] - #[serial(avro_3747)] - fn avro_3747_human_readable_true() { - use serde::ser::Serializer as SerdeSerializer; - - crate::util::SERDE_HUMAN_READABLE.store(true, Ordering::Release); - - let ser = &mut Serializer {}; - - assert!(ser.is_human_readable()); - } -} diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs deleted file mode 100644 index 7afacf0a139..00000000000 --- a/lang/rust/avro/src/types.rs +++ /dev/null @@ -1,3222 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Logic handling the intermediate representation of Avro values. -use crate::{ - bigdecimal::{deserialize_big_decimal, serialize_big_decimal}, - decimal::Decimal, - duration::Duration, - schema::{ - DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, Precision, RecordField, - RecordSchema, ResolvedSchema, Scale, Schema, SchemaKind, UnionSchema, - }, - AvroResult, Error, -}; -use bigdecimal::BigDecimal; -use serde_json::{Number, Value as JsonValue}; -use std::{ - borrow::Borrow, - collections::{BTreeMap, HashMap}, - fmt::Debug, - hash::BuildHasher, - str::FromStr, -}; -use uuid::Uuid; - -/// Compute the maximum decimal value precision of a byte array of length `len` could hold. -fn max_prec_for_len(len: usize) -> Result { - let len = i32::try_from(len).map_err(|e| Error::ConvertLengthToI32(e, len))?; - Ok((2.0_f64.powi(8 * len - 1) - 1.0).log10().floor() as usize) -} - -/// A valid Avro value. -/// -/// More information about Avro values can be found in the [Avro -/// Specification](https://avro.apache.org/docs/current/specification/#schema-declaration) -#[derive(Clone, Debug, PartialEq, strum_macros::EnumDiscriminants)] -#[strum_discriminants(name(ValueKind))] -pub enum Value { - /// A `null` Avro value. - Null, - /// A `boolean` Avro value. - Boolean(bool), - /// A `int` Avro value. - Int(i32), - /// A `long` Avro value. - Long(i64), - /// A `float` Avro value. - Float(f32), - /// A `double` Avro value. - Double(f64), - /// A `bytes` Avro value. - Bytes(Vec), - /// A `string` Avro value. - String(String), - /// A `fixed` Avro value. - /// The size of the fixed value is represented as a `usize`. - Fixed(usize, Vec), - /// An `enum` Avro value. - /// - /// An Enum is represented by a symbol and its position in the symbols list - /// of its corresponding schema. - /// This allows schema-less encoding, as well as schema resolution while - /// reading values. - Enum(u32, String), - /// An `union` Avro value. - /// - /// A Union is represented by the value it holds and its position in the type list - /// of its corresponding schema - /// This allows schema-less encoding, as well as schema resolution while - /// reading values. - Union(u32, Box), - /// An `array` Avro value. - Array(Vec), - /// A `map` Avro value. - Map(HashMap), - /// A `record` Avro value. - /// - /// A Record is represented by a vector of (``, `value`). - /// This allows schema-less encoding. - /// - /// See [Record](types.Record) for a more user-friendly support. - Record(Vec<(String, Value)>), - /// A date value. - /// - /// Serialized and deserialized as `i32` directly. Can only be deserialized properly with a - /// schema. - Date(i32), - /// An Avro Decimal value. Bytes are in big-endian order, per the Avro spec. - Decimal(Decimal), - /// An Avro Decimal value. - BigDecimal(BigDecimal), - /// Time in milliseconds. - TimeMillis(i32), - /// Time in microseconds. - TimeMicros(i64), - /// Timestamp in milliseconds. - TimestampMillis(i64), - /// Timestamp in microseconds. - TimestampMicros(i64), - /// Timestamp in nanoseconds. - TimestampNanos(i64), - /// Local timestamp in milliseconds. - LocalTimestampMillis(i64), - /// Local timestamp in microseconds. - LocalTimestampMicros(i64), - /// Local timestamp in nanoseconds. - LocalTimestampNanos(i64), - /// Avro Duration. An amount of time defined by months, days and milliseconds. - Duration(Duration), - /// Universally unique identifier. - Uuid(Uuid), -} - -/// Any structure implementing the [ToAvro](trait.ToAvro.html) trait will be usable -/// from a [Writer](../writer/struct.Writer.html). -#[deprecated( - since = "0.11.0", - note = "Please use Value::from, Into::into or value.into() instead" -)] -pub trait ToAvro { - /// Transforms this value into an Avro-compatible [Value](enum.Value.html). - fn avro(self) -> Value; -} - -#[allow(deprecated)] -impl> ToAvro for T { - fn avro(self) -> Value { - self.into() - } -} - -macro_rules! to_value( - ($type:ty, $variant_constructor:expr) => ( - impl From<$type> for Value { - fn from(value: $type) -> Self { - $variant_constructor(value) - } - } - ); -); - -to_value!(bool, Value::Boolean); -to_value!(i32, Value::Int); -to_value!(i64, Value::Long); -to_value!(f32, Value::Float); -to_value!(f64, Value::Double); -to_value!(String, Value::String); -to_value!(Vec, Value::Bytes); -to_value!(uuid::Uuid, Value::Uuid); -to_value!(Decimal, Value::Decimal); -to_value!(BigDecimal, Value::BigDecimal); -to_value!(Duration, Value::Duration); - -impl From<()> for Value { - fn from(_: ()) -> Self { - Self::Null - } -} - -impl From for Value { - fn from(value: usize) -> Self { - i64::try_from(value) - .expect("cannot convert usize to i64") - .into() - } -} - -impl From<&str> for Value { - fn from(value: &str) -> Self { - Self::String(value.to_owned()) - } -} - -impl From<&[u8]> for Value { - fn from(value: &[u8]) -> Self { - Self::Bytes(value.to_owned()) - } -} - -impl From> for Value -where - T: Into, -{ - fn from(value: Option) -> Self { - // FIXME: this is incorrect in case first type in union is not "none" - Self::Union( - value.is_some() as u32, - Box::new(value.map_or_else(|| Self::Null, Into::into)), - ) - } -} - -impl From> for Value -where - K: Into, - V: Into, - S: BuildHasher, -{ - fn from(value: HashMap) -> Self { - Self::Map( - value - .into_iter() - .map(|(key, value)| (key.into(), value.into())) - .collect(), - ) - } -} - -/// Utility interface to build `Value::Record` objects. -#[derive(Debug, Clone)] -pub struct Record<'a> { - /// List of fields contained in the record. - /// Ordered according to the fields in the schema given to create this - /// `Record` object. Any unset field defaults to `Value::Null`. - pub fields: Vec<(String, Value)>, - schema_lookup: &'a BTreeMap, -} - -impl<'a> Record<'a> { - /// Create a `Record` given a `Schema`. - /// - /// If the `Schema` is not a `Schema::Record` variant, `None` will be returned. - pub fn new(schema: &Schema) -> Option { - match *schema { - Schema::Record(RecordSchema { - fields: ref schema_fields, - lookup: ref schema_lookup, - .. - }) => { - let mut fields = Vec::with_capacity(schema_fields.len()); - for schema_field in schema_fields.iter() { - fields.push((schema_field.name.clone(), Value::Null)); - } - - Some(Record { - fields, - schema_lookup, - }) - } - _ => None, - } - } - - /// Put a compatible value (implementing the `ToAvro` trait) in the - /// `Record` for a given `field` name. - /// - /// **NOTE** Only ensure that the field name is present in the `Schema` given when creating - /// this `Record`. Does not perform any schema validation. - pub fn put(&mut self, field: &str, value: V) - where - V: Into, - { - if let Some(&position) = self.schema_lookup.get(field) { - self.fields[position].1 = value.into() - } - } -} - -impl<'a> From> for Value { - fn from(value: Record<'a>) -> Self { - Self::Record(value.fields) - } -} - -impl From for Value { - fn from(value: JsonValue) -> Self { - match value { - JsonValue::Null => Self::Null, - JsonValue::Bool(b) => b.into(), - JsonValue::Number(ref n) if n.is_i64() => { - let n = n.as_i64().unwrap(); - if n >= i32::MIN as i64 && n <= i32::MAX as i64 { - Value::Int(n as i32) - } else { - Value::Long(n) - } - } - JsonValue::Number(ref n) if n.is_f64() => Value::Double(n.as_f64().unwrap()), - JsonValue::Number(n) => Value::Long(n.as_u64().unwrap() as i64), // TODO: Not so great - JsonValue::String(s) => s.into(), - JsonValue::Array(items) => Value::Array(items.into_iter().map(Value::from).collect()), - JsonValue::Object(items) => Value::Map( - items - .into_iter() - .map(|(key, value)| (key, value.into())) - .collect(), - ), - } - } -} - -/// Convert Avro values to Json values -impl TryFrom for JsonValue { - type Error = crate::error::Error; - fn try_from(value: Value) -> AvroResult { - match value { - Value::Null => Ok(Self::Null), - Value::Boolean(b) => Ok(Self::Bool(b)), - Value::Int(i) => Ok(Self::Number(i.into())), - Value::Long(l) => Ok(Self::Number(l.into())), - Value::Float(f) => Number::from_f64(f.into()) - .map(Self::Number) - .ok_or_else(|| Error::ConvertF64ToJson(f.into())), - Value::Double(d) => Number::from_f64(d) - .map(Self::Number) - .ok_or(Error::ConvertF64ToJson(d)), - Value::Bytes(bytes) => Ok(Self::Array(bytes.into_iter().map(|b| b.into()).collect())), - Value::String(s) => Ok(Self::String(s)), - Value::Fixed(_size, items) => { - Ok(Self::Array(items.into_iter().map(|v| v.into()).collect())) - } - Value::Enum(_i, s) => Ok(Self::String(s)), - Value::Union(_i, b) => Self::try_from(*b), - Value::Array(items) => items - .into_iter() - .map(Self::try_from) - .collect::, _>>() - .map(Self::Array), - Value::Map(items) => items - .into_iter() - .map(|(key, value)| Self::try_from(value).map(|v| (key, v))) - .collect::, _>>() - .map(|v| Self::Object(v.into_iter().collect())), - Value::Record(items) => items - .into_iter() - .map(|(key, value)| Self::try_from(value).map(|v| (key, v))) - .collect::, _>>() - .map(|v| Self::Object(v.into_iter().collect())), - Value::Date(d) => Ok(Self::Number(d.into())), - Value::Decimal(ref d) => >::try_from(d) - .map(|vec| Self::Array(vec.into_iter().map(|v| v.into()).collect())), - Value::BigDecimal(ref bg) => { - let vec1: Vec = serialize_big_decimal(bg); - Ok(Self::Array(vec1.into_iter().map(|b| b.into()).collect())) - } - Value::TimeMillis(t) => Ok(Self::Number(t.into())), - Value::TimeMicros(t) => Ok(Self::Number(t.into())), - Value::TimestampMillis(t) => Ok(Self::Number(t.into())), - Value::TimestampMicros(t) => Ok(Self::Number(t.into())), - Value::TimestampNanos(t) => Ok(Self::Number(t.into())), - Value::LocalTimestampMillis(t) => Ok(Self::Number(t.into())), - Value::LocalTimestampMicros(t) => Ok(Self::Number(t.into())), - Value::LocalTimestampNanos(t) => Ok(Self::Number(t.into())), - Value::Duration(d) => Ok(Self::Array( - <[u8; 12]>::from(d).iter().map(|&v| v.into()).collect(), - )), - Value::Uuid(uuid) => Ok(Self::String(uuid.as_hyphenated().to_string())), - } - } -} - -impl Value { - /// Validate the value against the given [Schema](../schema/enum.Schema.html). - /// - /// See the [Avro specification](https://avro.apache.org/docs/current/specification) - /// for the full set of rules of schema validation. - pub fn validate(&self, schema: &Schema) -> bool { - self.validate_schemata(vec![schema]) - } - - pub fn validate_schemata(&self, schemata: Vec<&Schema>) -> bool { - let rs = ResolvedSchema::try_from(schemata.clone()) - .expect("Schemata didn't successfully resolve"); - let schemata_len = schemata.len(); - schemata.iter().any(|schema| { - let enclosing_namespace = schema.namespace(); - - match self.validate_internal(schema, rs.get_names(), &enclosing_namespace) { - Some(reason) => { - let log_message = format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - self, schema, reason - ); - if schemata_len == 1 { - error!("{}", log_message); - } else { - debug!("{}", log_message); - }; - false - } - None => true, - } - }) - } - - fn accumulate(accumulator: Option, other: Option) -> Option { - match (accumulator, other) { - (None, None) => None, - (None, s @ Some(_)) => s, - (s @ Some(_), None) => s, - (Some(reason1), Some(reason2)) => Some(format!("{reason1}\n{reason2}")), - } - } - - /// Validates the value against the provided schema. - pub(crate) fn validate_internal + Debug>( - &self, - schema: &Schema, - names: &HashMap, - enclosing_namespace: &Namespace, - ) -> Option { - match (self, schema) { - (_, Schema::Ref { name }) => { - let name = name.fully_qualified_name(enclosing_namespace); - names.get(&name).map_or_else( - || { - Some(format!( - "Unresolved schema reference: '{:?}'. Parsed names: {:?}", - name, - names.keys() - )) - }, - |s| self.validate_internal(s.borrow(), names, &name.namespace), - ) - } - (&Value::Null, &Schema::Null) => None, - (&Value::Boolean(_), &Schema::Boolean) => None, - (&Value::Int(_), &Schema::Int) => None, - (&Value::Int(_), &Schema::Date) => None, - (&Value::Int(_), &Schema::TimeMillis) => None, - (&Value::Int(_), &Schema::Long) => None, - (&Value::Long(_), &Schema::Long) => None, - (&Value::Long(_), &Schema::TimeMicros) => None, - (&Value::Long(_), &Schema::TimestampMillis) => None, - (&Value::Long(_), &Schema::TimestampMicros) => None, - (&Value::Long(_), &Schema::LocalTimestampMillis) => None, - (&Value::Long(_), &Schema::LocalTimestampMicros) => None, - (&Value::TimestampMicros(_), &Schema::TimestampMicros) => None, - (&Value::TimestampMillis(_), &Schema::TimestampMillis) => None, - (&Value::TimestampNanos(_), &Schema::TimestampNanos) => None, - (&Value::LocalTimestampMicros(_), &Schema::LocalTimestampMicros) => None, - (&Value::LocalTimestampMillis(_), &Schema::LocalTimestampMillis) => None, - (&Value::LocalTimestampNanos(_), &Schema::LocalTimestampNanos) => None, - (&Value::TimeMicros(_), &Schema::TimeMicros) => None, - (&Value::TimeMillis(_), &Schema::TimeMillis) => None, - (&Value::Date(_), &Schema::Date) => None, - (&Value::Decimal(_), &Schema::Decimal { .. }) => None, - (&Value::BigDecimal(_), &Schema::BigDecimal) => None, - (&Value::Duration(_), &Schema::Duration) => None, - (&Value::Uuid(_), &Schema::Uuid) => None, - (&Value::Float(_), &Schema::Float) => None, - (&Value::Float(_), &Schema::Double) => None, - (&Value::Double(_), &Schema::Double) => None, - (&Value::Bytes(_), &Schema::Bytes) => None, - (&Value::Bytes(_), &Schema::Decimal { .. }) => None, - (&Value::String(_), &Schema::String) => None, - (&Value::String(_), &Schema::Uuid) => None, - (&Value::Fixed(n, _), &Schema::Fixed(FixedSchema { size, .. })) => { - if n != size { - Some(format!( - "The value's size ({n}) is different than the schema's size ({size})" - )) - } else { - None - } - } - (Value::Bytes(b), &Schema::Fixed(FixedSchema { size, .. })) => { - if b.len() != size { - Some(format!( - "The bytes' length ({}) is different than the schema's size ({})", - b.len(), - size - )) - } else { - None - } - } - (&Value::Fixed(n, _), &Schema::Duration) => { - if n != 12 { - Some(format!( - "The value's size ('{n}') must be exactly 12 to be a Duration" - )) - } else { - None - } - } - // TODO: check precision against n - (&Value::Fixed(_n, _), &Schema::Decimal { .. }) => None, - (Value::String(s), Schema::Enum(EnumSchema { symbols, .. })) => { - if !symbols.contains(s) { - Some(format!("'{s}' is not a member of the possible symbols")) - } else { - None - } - } - ( - &Value::Enum(i, ref s), - Schema::Enum(EnumSchema { - symbols, default, .. - }), - ) => symbols - .get(i as usize) - .map(|ref symbol| { - if symbol != &s { - Some(format!("Symbol '{s}' is not at position '{i}'")) - } else { - None - } - }) - .unwrap_or_else(|| match default { - Some(_) => None, - None => Some(format!("No symbol at position '{i}'")), - }), - // (&Value::Union(None), &Schema::Union(_)) => None, - (&Value::Union(i, ref value), Schema::Union(inner)) => inner - .variants() - .get(i as usize) - .map(|schema| value.validate_internal(schema, names, enclosing_namespace)) - .unwrap_or_else(|| Some(format!("No schema in the union at position '{i}'"))), - (v, Schema::Union(inner)) => { - match inner.find_schema_with_known_schemata(v, Some(names), enclosing_namespace) { - Some(_) => None, - None => Some("Could not find matching type in union".to_string()), - } - } - (Value::Array(items), Schema::Array(inner)) => items.iter().fold(None, |acc, item| { - Value::accumulate( - acc, - item.validate_internal(&inner.items, names, enclosing_namespace), - ) - }), - (Value::Map(items), Schema::Map(inner)) => { - items.iter().fold(None, |acc, (_, value)| { - Value::accumulate( - acc, - value.validate_internal(&inner.types, names, enclosing_namespace), - ) - }) - } - ( - Value::Record(record_fields), - Schema::Record(RecordSchema { - fields, - lookup, - name, - .. - }), - ) => { - let non_nullable_fields_count = - fields.iter().filter(|&rf| !rf.is_nullable()).count(); - - // If the record contains fewer fields as required fields by the schema, it is invalid. - if record_fields.len() < non_nullable_fields_count { - return Some(format!( - "The value's records length ({}) doesn't match the schema ({} non-nullable fields)", - record_fields.len(), - non_nullable_fields_count - )); - } else if record_fields.len() > fields.len() { - return Some(format!( - "The value's records length ({}) is greater than the schema's ({} fields)", - record_fields.len(), - fields.len(), - )); - } - - record_fields - .iter() - .fold(None, |acc, (field_name, record_field)| { - let record_namespace = if name.namespace.is_none() { - enclosing_namespace - } else { - &name.namespace - }; - match lookup.get(field_name) { - Some(idx) => { - let field = &fields[*idx]; - Value::accumulate( - acc, - record_field.validate_internal( - &field.schema, - names, - record_namespace, - ), - ) - } - None => Value::accumulate( - acc, - Some(format!("There is no schema field for field '{field_name}'")), - ), - } - }) - } - (Value::Map(items), Schema::Record(RecordSchema { fields, .. })) => { - fields.iter().fold(None, |acc, field| { - if let Some(item) = items.get(&field.name) { - let res = item.validate_internal(&field.schema, names, enclosing_namespace); - Value::accumulate(acc, res) - } else if !field.is_nullable() { - Value::accumulate( - acc, - Some(format!( - "Field with name '{:?}' is not a member of the map items", - field.name - )), - ) - } else { - acc - } - }) - } - (v, s) => Some(format!( - "Unsupported value-schema combination! Value: {:?}, schema: {:?}", - v, s - )), - } - } - - /// Attempt to perform schema resolution on the value, with the given - /// [Schema](../schema/enum.Schema.html). - /// - /// See [Schema Resolution](https://avro.apache.org/docs/current/specification/#schema-resolution) - /// in the Avro specification for the full set of rules of schema - /// resolution. - pub fn resolve(self, schema: &Schema) -> AvroResult { - let enclosing_namespace = schema.namespace(); - let rs = ResolvedSchema::try_from(schema)?; - self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None) - } - - /// Attempt to perform schema resolution on the value, with the given - /// [Schema](../schema/enum.Schema.html) and set of schemas to use for Refs resolution. - /// - /// See [Schema Resolution](https://avro.apache.org/docs/current/specification/#schema-resolution) - /// in the Avro specification for the full set of rules of schema - /// resolution. - pub fn resolve_schemata(self, schema: &Schema, schemata: Vec<&Schema>) -> AvroResult { - let enclosing_namespace = schema.namespace(); - let rs = ResolvedSchema::try_from(schemata)?; - self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None) - } - - pub(crate) fn resolve_internal + Debug>( - mut self, - schema: &Schema, - names: &HashMap, - enclosing_namespace: &Namespace, - field_default: &Option, - ) -> AvroResult { - // Check if this schema is a union, and if the reader schema is not. - if SchemaKind::from(&self) == SchemaKind::Union - && SchemaKind::from(schema) != SchemaKind::Union - { - // Pull out the Union, and attempt to resolve against it. - let v = match self { - Value::Union(_i, b) => *b, - _ => unreachable!(), - }; - self = v; - } - match *schema { - Schema::Ref { ref name } => { - let name = name.fully_qualified_name(enclosing_namespace); - - if let Some(resolved) = names.get(&name) { - debug!("Resolved {:?}", name); - self.resolve_internal(resolved.borrow(), names, &name.namespace, field_default) - } else { - error!("Failed to resolve schema {:?}", name); - Err(Error::SchemaResolutionError(name.clone())) - } - } - Schema::Null => self.resolve_null(), - Schema::Boolean => self.resolve_boolean(), - Schema::Int => self.resolve_int(), - Schema::Long => self.resolve_long(), - Schema::Float => self.resolve_float(), - Schema::Double => self.resolve_double(), - Schema::Bytes => self.resolve_bytes(), - Schema::String => self.resolve_string(), - Schema::Fixed(FixedSchema { size, .. }) => self.resolve_fixed(size), - Schema::Union(ref inner) => { - self.resolve_union(inner, names, enclosing_namespace, field_default) - } - Schema::Enum(EnumSchema { - ref symbols, - ref default, - .. - }) => self.resolve_enum(symbols, default, field_default), - Schema::Array(ref inner) => { - self.resolve_array(&inner.items, names, enclosing_namespace) - } - Schema::Map(ref inner) => self.resolve_map(&inner.types, names, enclosing_namespace), - Schema::Record(RecordSchema { ref fields, .. }) => { - self.resolve_record(fields, names, enclosing_namespace) - } - Schema::Decimal(DecimalSchema { - scale, - precision, - ref inner, - }) => self.resolve_decimal(precision, scale, inner), - Schema::BigDecimal => self.resolve_bigdecimal(), - Schema::Date => self.resolve_date(), - Schema::TimeMillis => self.resolve_time_millis(), - Schema::TimeMicros => self.resolve_time_micros(), - Schema::TimestampMillis => self.resolve_timestamp_millis(), - Schema::TimestampMicros => self.resolve_timestamp_micros(), - Schema::TimestampNanos => self.resolve_timestamp_nanos(), - Schema::LocalTimestampMillis => self.resolve_local_timestamp_millis(), - Schema::LocalTimestampMicros => self.resolve_local_timestamp_micros(), - Schema::LocalTimestampNanos => self.resolve_local_timestamp_nanos(), - Schema::Duration => self.resolve_duration(), - Schema::Uuid => self.resolve_uuid(), - } - } - - fn resolve_uuid(self) -> Result { - Ok(match self { - uuid @ Value::Uuid(_) => uuid, - Value::String(ref string) => { - Value::Uuid(Uuid::from_str(string).map_err(Error::ConvertStrToUuid)?) - } - other => return Err(Error::GetUuid(other)), - }) - } - - fn resolve_bigdecimal(self) -> Result { - Ok(match self { - bg @ Value::BigDecimal(_) => bg, - Value::Bytes(b) => Value::BigDecimal(deserialize_big_decimal(&b).unwrap()), - other => return Err(Error::GetBigDecimal(other)), - }) - } - - fn resolve_duration(self) -> Result { - Ok(match self { - duration @ Value::Duration { .. } => duration, - Value::Fixed(size, bytes) => { - if size != 12 { - return Err(Error::GetDecimalFixedBytes(size)); - } - Value::Duration(Duration::from([ - bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], - bytes[8], bytes[9], bytes[10], bytes[11], - ])) - } - other => return Err(Error::ResolveDuration(other)), - }) - } - - fn resolve_decimal( - self, - precision: Precision, - scale: Scale, - inner: &Schema, - ) -> Result { - if scale > precision { - return Err(Error::GetScaleAndPrecision { scale, precision }); - } - match inner { - &Schema::Fixed(FixedSchema { size, .. }) => { - if max_prec_for_len(size)? < precision { - return Err(Error::GetScaleWithFixedSize { size, precision }); - } - } - Schema::Bytes => (), - _ => return Err(Error::ResolveDecimalSchema(inner.into())), - }; - match self { - Value::Decimal(num) => { - let num_bytes = num.len(); - if max_prec_for_len(num_bytes)? < precision { - Err(Error::ComparePrecisionAndSize { - precision, - num_bytes, - }) - } else { - Ok(Value::Decimal(num)) - } - // check num.bits() here - } - Value::Fixed(_, bytes) | Value::Bytes(bytes) => { - if max_prec_for_len(bytes.len())? < precision { - Err(Error::ComparePrecisionAndSize { - precision, - num_bytes: bytes.len(), - }) - } else { - // precision and scale match, can we assume the underlying type can hold the data? - Ok(Value::Decimal(Decimal::from(bytes))) - } - } - other => Err(Error::ResolveDecimal(other)), - } - } - - fn resolve_date(self) -> Result { - match self { - Value::Date(d) | Value::Int(d) => Ok(Value::Date(d)), - other => Err(Error::GetDate(other)), - } - } - - fn resolve_time_millis(self) -> Result { - match self { - Value::TimeMillis(t) | Value::Int(t) => Ok(Value::TimeMillis(t)), - other => Err(Error::GetTimeMillis(other)), - } - } - - fn resolve_time_micros(self) -> Result { - match self { - Value::TimeMicros(t) | Value::Long(t) => Ok(Value::TimeMicros(t)), - Value::Int(t) => Ok(Value::TimeMicros(i64::from(t))), - other => Err(Error::GetTimeMicros(other)), - } - } - - fn resolve_timestamp_millis(self) -> Result { - match self { - Value::TimestampMillis(ts) | Value::Long(ts) => Ok(Value::TimestampMillis(ts)), - Value::Int(ts) => Ok(Value::TimestampMillis(i64::from(ts))), - other => Err(Error::GetTimestampMillis(other)), - } - } - - fn resolve_timestamp_micros(self) -> Result { - match self { - Value::TimestampMicros(ts) | Value::Long(ts) => Ok(Value::TimestampMicros(ts)), - Value::Int(ts) => Ok(Value::TimestampMicros(i64::from(ts))), - other => Err(Error::GetTimestampMicros(other)), - } - } - - fn resolve_timestamp_nanos(self) -> Result { - match self { - Value::TimestampNanos(ts) | Value::Long(ts) => Ok(Value::TimestampNanos(ts)), - Value::Int(ts) => Ok(Value::TimestampNanos(i64::from(ts))), - other => Err(Error::GetTimestampNanos(other)), - } - } - - fn resolve_local_timestamp_millis(self) -> Result { - match self { - Value::LocalTimestampMillis(ts) | Value::Long(ts) => { - Ok(Value::LocalTimestampMillis(ts)) - } - Value::Int(ts) => Ok(Value::LocalTimestampMillis(i64::from(ts))), - other => Err(Error::GetLocalTimestampMillis(other)), - } - } - - fn resolve_local_timestamp_micros(self) -> Result { - match self { - Value::LocalTimestampMicros(ts) | Value::Long(ts) => { - Ok(Value::LocalTimestampMicros(ts)) - } - Value::Int(ts) => Ok(Value::LocalTimestampMicros(i64::from(ts))), - other => Err(Error::GetLocalTimestampMicros(other)), - } - } - - fn resolve_local_timestamp_nanos(self) -> Result { - match self { - Value::LocalTimestampNanos(ts) | Value::Long(ts) => Ok(Value::LocalTimestampNanos(ts)), - Value::Int(ts) => Ok(Value::LocalTimestampNanos(i64::from(ts))), - other => Err(Error::GetLocalTimestampNanos(other)), - } - } - - fn resolve_null(self) -> Result { - match self { - Value::Null => Ok(Value::Null), - other => Err(Error::GetNull(other)), - } - } - - fn resolve_boolean(self) -> Result { - match self { - Value::Boolean(b) => Ok(Value::Boolean(b)), - other => Err(Error::GetBoolean(other)), - } - } - - fn resolve_int(self) -> Result { - match self { - Value::Int(n) => Ok(Value::Int(n)), - Value::Long(n) => Ok(Value::Int(n as i32)), - other => Err(Error::GetInt(other)), - } - } - - fn resolve_long(self) -> Result { - match self { - Value::Int(n) => Ok(Value::Long(i64::from(n))), - Value::Long(n) => Ok(Value::Long(n)), - other => Err(Error::GetLong(other)), - } - } - - fn resolve_float(self) -> Result { - match self { - Value::Int(n) => Ok(Value::Float(n as f32)), - Value::Long(n) => Ok(Value::Float(n as f32)), - Value::Float(x) => Ok(Value::Float(x)), - Value::Double(x) => Ok(Value::Float(x as f32)), - Value::String(ref x) => match Self::parse_special_float(x) { - Some(f) => Ok(Value::Float(f)), - None => Err(Error::GetFloat(self)), - }, - other => Err(Error::GetFloat(other)), - } - } - - fn resolve_double(self) -> Result { - match self { - Value::Int(n) => Ok(Value::Double(f64::from(n))), - Value::Long(n) => Ok(Value::Double(n as f64)), - Value::Float(x) => Ok(Value::Double(f64::from(x))), - Value::Double(x) => Ok(Value::Double(x)), - Value::String(ref x) => match Self::parse_special_float(x) { - Some(f) => Ok(Value::Double(f64::from(f))), - None => Err(Error::GetDouble(self)), - }, - other => Err(Error::GetDouble(other)), - } - } - - /// IEEE 754 NaN and infinities are not valid JSON numbers. - /// So they are represented in JSON as strings. - fn parse_special_float(value: &str) -> Option { - match value { - "NaN" => Some(f32::NAN), - "INF" | "Infinity" => Some(f32::INFINITY), - "-INF" | "-Infinity" => Some(f32::NEG_INFINITY), - _ => None, - } - } - - fn resolve_bytes(self) -> Result { - match self { - Value::Bytes(bytes) => Ok(Value::Bytes(bytes)), - Value::String(s) => Ok(Value::Bytes(s.into_bytes())), - Value::Array(items) => Ok(Value::Bytes( - items - .into_iter() - .map(Value::try_u8) - .collect::, _>>()?, - )), - other => Err(Error::GetBytes(other)), - } - } - - fn resolve_string(self) -> Result { - match self { - Value::String(s) => Ok(Value::String(s)), - Value::Bytes(bytes) | Value::Fixed(_, bytes) => Ok(Value::String( - String::from_utf8(bytes).map_err(Error::ConvertToUtf8)?, - )), - other => Err(Error::GetString(other)), - } - } - - fn resolve_fixed(self, size: usize) -> Result { - match self { - Value::Fixed(n, bytes) => { - if n == size { - Ok(Value::Fixed(n, bytes)) - } else { - Err(Error::CompareFixedSizes { size, n }) - } - } - Value::String(s) => Ok(Value::Fixed(s.len(), s.into_bytes())), - Value::Bytes(s) => { - if s.len() == size { - Ok(Value::Fixed(size, s)) - } else { - Err(Error::CompareFixedSizes { size, n: s.len() }) - } - } - other => Err(Error::GetStringForFixed(other)), - } - } - - pub(crate) fn resolve_enum( - self, - symbols: &[String], - enum_default: &Option, - _field_default: &Option, - ) -> Result { - let validate_symbol = |symbol: String, symbols: &[String]| { - if let Some(index) = symbols.iter().position(|item| item == &symbol) { - Ok(Value::Enum(index as u32, symbol)) - } else { - match enum_default { - Some(default) => { - if let Some(index) = symbols.iter().position(|item| item == default) { - Ok(Value::Enum(index as u32, default.clone())) - } else { - Err(Error::GetEnumDefault { - symbol, - symbols: symbols.into(), - }) - } - } - _ => Err(Error::GetEnumDefault { - symbol, - symbols: symbols.into(), - }), - } - } - }; - - match self { - Value::Enum(_raw_index, s) => validate_symbol(s, symbols), - Value::String(s) => validate_symbol(s, symbols), - other => Err(Error::GetEnum(other)), - } - } - - fn resolve_union + Debug>( - self, - schema: &UnionSchema, - names: &HashMap, - enclosing_namespace: &Namespace, - field_default: &Option, - ) -> Result { - let v = match self { - // Both are unions case. - Value::Union(_i, v) => *v, - // Reader is a union, but writer is not. - v => v, - }; - let (i, inner) = schema - .find_schema_with_known_schemata(&v, Some(names), enclosing_namespace) - .ok_or(Error::FindUnionVariant)?; - - Ok(Value::Union( - i as u32, - Box::new(v.resolve_internal(inner, names, enclosing_namespace, field_default)?), - )) - } - - fn resolve_array + Debug>( - self, - schema: &Schema, - names: &HashMap, - enclosing_namespace: &Namespace, - ) -> Result { - match self { - Value::Array(items) => Ok(Value::Array( - items - .into_iter() - .map(|item| item.resolve_internal(schema, names, enclosing_namespace, &None)) - .collect::>()?, - )), - other => Err(Error::GetArray { - expected: schema.into(), - other, - }), - } - } - - fn resolve_map + Debug>( - self, - schema: &Schema, - names: &HashMap, - enclosing_namespace: &Namespace, - ) -> Result { - match self { - Value::Map(items) => Ok(Value::Map( - items - .into_iter() - .map(|(key, value)| { - value - .resolve_internal(schema, names, enclosing_namespace, &None) - .map(|value| (key, value)) - }) - .collect::>()?, - )), - other => Err(Error::GetMap { - expected: schema.into(), - other, - }), - } - } - - fn resolve_record + Debug>( - self, - fields: &[RecordField], - names: &HashMap, - enclosing_namespace: &Namespace, - ) -> Result { - let mut items = match self { - Value::Map(items) => Ok(items), - Value::Record(fields) => Ok(fields.into_iter().collect::>()), - other => Err(Error::GetRecord { - expected: fields - .iter() - .map(|field| (field.name.clone(), field.schema.clone().into())) - .collect(), - other, - }), - }?; - - let new_fields = fields - .iter() - .map(|field| { - let value = match items.remove(&field.name) { - Some(value) => value, - None => match field.default { - Some(ref value) => match field.schema { - Schema::Enum(EnumSchema { - ref symbols, - ref default, - .. - }) => Value::from(value.clone()).resolve_enum( - symbols, - default, - &field.default.clone(), - )?, - Schema::Union(ref union_schema) => { - let first = &union_schema.variants()[0]; - // NOTE: this match exists only to optimize null defaults for large - // backward-compatible schemas with many nullable fields - match first { - Schema::Null => Value::Union(0, Box::new(Value::Null)), - _ => Value::Union( - 0, - Box::new(Value::from(value.clone()).resolve_internal( - first, - names, - enclosing_namespace, - &field.default, - )?), - ), - } - } - _ => Value::from(value.clone()), - }, - None => { - return Err(Error::GetField(field.name.clone())); - } - }, - }; - value - .resolve_internal(&field.schema, names, enclosing_namespace, &field.default) - .map(|value| (field.name.clone(), value)) - }) - .collect::, _>>()?; - - Ok(Value::Record(new_fields)) - } - - fn try_u8(self) -> AvroResult { - let int = self.resolve(&Schema::Int)?; - if let Value::Int(n) = int { - if n >= 0 && n <= i32::from(u8::MAX) { - return Ok(n as u8); - } - } - - Err(Error::GetU8(int)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - duration::{Days, Millis, Months}, - schema::RecordFieldOrder, - }; - use apache_avro_test_helper::{ - logger::{assert_logged, assert_not_logged}, - TestResult, - }; - use num_bigint::BigInt; - use pretty_assertions::assert_eq; - use serde_json::json; - - #[test] - fn avro_3809_validate_nested_records_with_implicit_namespace() -> TestResult { - let schema = Schema::parse_str( - r#"{ - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": { - "type": "record", - "name": "middle_record_name", - "namespace": "middle_namespace", - "fields": [ - { - "name": "middle_field_1", - "type": { - "type": "record", - "name": "inner_record_name", - "fields": [ - { "name": "inner_field_1", "type": "double" } - ] - } - }, - { "name": "middle_field_2", "type": "inner_record_name" } - ] - } - } - ] - }"#, - )?; - let value = Value::Record(vec![( - "outer_field_1".into(), - Value::Record(vec![ - ( - "middle_field_1".into(), - Value::Record(vec![("inner_field_1".into(), Value::Double(1.2f64))]), - ), - ( - "middle_field_2".into(), - Value::Record(vec![("inner_field_1".into(), Value::Double(1.6f64))]), - ), - ]), - )]); - - assert!(value.validate(&schema)); - Ok(()) - } - - #[test] - fn validate() -> TestResult { - let value_schema_valid = vec![ - (Value::Int(42), Schema::Int, true, ""), - (Value::Int(43), Schema::Long, true, ""), - (Value::Float(43.2), Schema::Float, true, ""), - (Value::Float(45.9), Schema::Double, true, ""), - ( - Value::Int(42), - Schema::Boolean, - false, - "Invalid value: Int(42) for schema: Boolean. Reason: Unsupported value-schema combination! Value: Int(42), schema: Boolean", - ), - ( - Value::Union(0, Box::new(Value::Null)), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), - true, - "", - ), - ( - Value::Union(1, Box::new(Value::Int(42))), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), - true, - "", - ), - ( - Value::Union(0, Box::new(Value::Null)), - Schema::Union(UnionSchema::new(vec![Schema::Double, Schema::Int])?), - false, - "Invalid value: Union(0, Null) for schema: Union(UnionSchema { schemas: [Double, Int], variant_index: {Int: 1, Double: 0} }). Reason: Unsupported value-schema combination! Value: Null, schema: Double", - ), - ( - Value::Union(3, Box::new(Value::Int(42))), - Schema::Union( - UnionSchema::new(vec![ - Schema::Null, - Schema::Double, - Schema::String, - Schema::Int, - ]) - ?, - ), - true, - "", - ), - ( - Value::Union(1, Box::new(Value::Long(42i64))), - Schema::Union( - UnionSchema::new(vec![Schema::Null, Schema::TimestampMillis])?, - ), - true, - "", - ), - ( - Value::Union(2, Box::new(Value::Long(1_i64))), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), - false, - "Invalid value: Union(2, Long(1)) for schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }). Reason: No schema in the union at position '2'", - ), - ( - Value::Array(vec![Value::Long(42i64)]), - Schema::array(Schema::Long), - true, - "", - ), - ( - Value::Array(vec![Value::Boolean(true)]), - Schema::array(Schema::Long), - false, - "Invalid value: Array([Boolean(true)]) for schema: Array(ArraySchema { items: Long, attributes: {} }). Reason: Unsupported value-schema combination! Value: Boolean(true), schema: Long", - ), - (Value::Record(vec![]), Schema::Null, false, "Invalid value: Record([]) for schema: Null. Reason: Unsupported value-schema combination! Value: Record([]), schema: Null"), - ( - Value::Fixed(12, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), - Schema::Duration, - true, - "", - ), - ( - Value::Fixed(11, vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - Schema::Duration, - false, - "Invalid value: Fixed(11, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) for schema: Duration. Reason: The value's size ('11') must be exactly 12 to be a Duration", - ), - ( - Value::Record(vec![("unknown_field_name".to_string(), Value::Null)]), - Schema::Record(RecordSchema { - name: Name::new("record_name").unwrap(), - aliases: None, - doc: None, - fields: vec![RecordField { - name: "field_name".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Int, - order: RecordFieldOrder::Ignore, - position: 0, - custom_attributes: Default::default(), - }], - lookup: Default::default(), - attributes: Default::default(), - }), - false, - r#"Invalid value: Record([("unknown_field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, aliases: None, default: None, schema: Int, order: Ignore, position: 0, custom_attributes: {} }], lookup: {}, attributes: {} }). Reason: There is no schema field for field 'unknown_field_name'"#, - ), - ( - Value::Record(vec![("field_name".to_string(), Value::Null)]), - Schema::Record(RecordSchema { - name: Name::new("record_name").unwrap(), - aliases: None, - doc: None, - fields: vec![RecordField { - name: "field_name".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Ref { - name: Name::new("missing").unwrap(), - }, - order: RecordFieldOrder::Ignore, - position: 0, - custom_attributes: Default::default(), - }], - lookup: [("field_name".to_string(), 0)].iter().cloned().collect(), - attributes: Default::default(), - }), - false, - r#"Invalid value: Record([("field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, aliases: None, default: None, schema: Ref { name: Name { name: "missing", namespace: None } }, order: Ignore, position: 0, custom_attributes: {} }], lookup: {"field_name": 0}, attributes: {} }). Reason: Unresolved schema reference: 'Name { name: "missing", namespace: None }'. Parsed names: []"#, - ), - ]; - - for (value, schema, valid, expected_err_message) in value_schema_valid.into_iter() { - let err_message = - value.validate_internal::(&schema, &HashMap::default(), &None); - assert_eq!(valid, err_message.is_none()); - if !valid { - let full_err_message = format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - value, - schema, - err_message.unwrap() - ); - assert_eq!(expected_err_message, full_err_message); - } - } - - Ok(()) - } - - #[test] - fn validate_fixed() -> TestResult { - let schema = Schema::Fixed(FixedSchema { - size: 4, - name: Name::new("some_fixed").unwrap(), - aliases: None, - doc: None, - default: None, - attributes: Default::default(), - }); - - assert!(Value::Fixed(4, vec![0, 0, 0, 0]).validate(&schema)); - let value = Value::Fixed(5, vec![0, 0, 0, 0, 0]); - assert!(!value.validate(&schema)); - assert_logged( - format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - value, schema, "The value's size (5) is different than the schema's size (4)" - ) - .as_str(), - ); - - assert!(Value::Bytes(vec![0, 0, 0, 0]).validate(&schema)); - let value = Value::Bytes(vec![0, 0, 0, 0, 0]); - assert!(!value.validate(&schema)); - assert_logged( - format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - value, schema, "The bytes' length (5) is different than the schema's size (4)" - ) - .as_str(), - ); - - Ok(()) - } - - #[test] - fn validate_enum() -> TestResult { - let schema = Schema::Enum(EnumSchema { - name: Name::new("some_enum").unwrap(), - aliases: None, - doc: None, - symbols: vec![ - "spades".to_string(), - "hearts".to_string(), - "diamonds".to_string(), - "clubs".to_string(), - ], - default: None, - attributes: Default::default(), - }); - - assert!(Value::Enum(0, "spades".to_string()).validate(&schema)); - assert!(Value::String("spades".to_string()).validate(&schema)); - - let value = Value::Enum(1, "spades".to_string()); - assert!(!value.validate(&schema)); - assert_logged( - format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - value, schema, "Symbol 'spades' is not at position '1'" - ) - .as_str(), - ); - - let value = Value::Enum(1000, "spades".to_string()); - assert!(!value.validate(&schema)); - assert_logged( - format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - value, schema, "No symbol at position '1000'" - ) - .as_str(), - ); - - let value = Value::String("lorem".to_string()); - assert!(!value.validate(&schema)); - assert_logged( - format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - value, schema, "'lorem' is not a member of the possible symbols" - ) - .as_str(), - ); - - let other_schema = Schema::Enum(EnumSchema { - name: Name::new("some_other_enum").unwrap(), - aliases: None, - doc: None, - symbols: vec![ - "hearts".to_string(), - "diamonds".to_string(), - "clubs".to_string(), - "spades".to_string(), - ], - default: None, - attributes: Default::default(), - }); - - let value = Value::Enum(0, "spades".to_string()); - assert!(!value.validate(&other_schema)); - assert_logged( - format!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - value, other_schema, "Symbol 'spades' is not at position '0'" - ) - .as_str(), - ); - - Ok(()) - } - - #[test] - fn validate_record() -> TestResult { - // { - // "type": "record", - // "fields": [ - // {"type": "long", "name": "a"}, - // {"type": "string", "name": "b"}, - // { - // "type": ["null", "int"] - // "name": "c", - // "default": null - // } - // ] - // } - let schema = Schema::Record(RecordSchema { - name: Name::new("some_record").unwrap(), - aliases: None, - doc: None, - fields: vec![ - RecordField { - name: "a".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::Long, - order: RecordFieldOrder::Ascending, - position: 0, - custom_attributes: Default::default(), - }, - RecordField { - name: "b".to_string(), - doc: None, - default: None, - aliases: None, - schema: Schema::String, - order: RecordFieldOrder::Ascending, - position: 1, - custom_attributes: Default::default(), - }, - RecordField { - name: "c".to_string(), - doc: None, - default: Some(JsonValue::Null), - aliases: None, - schema: Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), - order: RecordFieldOrder::Ascending, - position: 2, - custom_attributes: Default::default(), - }, - ], - lookup: [ - ("a".to_string(), 0), - ("b".to_string(), 1), - ("c".to_string(), 2), - ] - .iter() - .cloned() - .collect(), - attributes: Default::default(), - }); - - assert!(Value::Record(vec![ - ("a".to_string(), Value::Long(42i64)), - ("b".to_string(), Value::String("foo".to_string())), - ]) - .validate(&schema)); - - let value = Value::Record(vec![ - ("b".to_string(), Value::String("foo".to_string())), - ("a".to_string(), Value::Long(42i64)), - ]); - assert!(value.validate(&schema)); - - let value = Value::Record(vec![ - ("a".to_string(), Value::Boolean(false)), - ("b".to_string(), Value::String("foo".to_string())), - ]); - assert!(!value.validate(&schema)); - assert_logged( - r#"Invalid value: Record([("a", Boolean(false)), ("b", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Unsupported value-schema combination! Value: Boolean(false), schema: Long"#, - ); - - let value = Value::Record(vec![ - ("a".to_string(), Value::Long(42i64)), - ("c".to_string(), Value::String("foo".to_string())), - ]); - assert!(!value.validate(&schema)); - assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("c", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Could not find matching type in union"#, - ); - assert_not_logged( - r#"Invalid value: String("foo") for schema: Int. Reason: Unsupported value-schema combination"#, - ); - - let value = Value::Record(vec![ - ("a".to_string(), Value::Long(42i64)), - ("d".to_string(), Value::String("foo".to_string())), - ]); - assert!(!value.validate(&schema)); - assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("d", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: There is no schema field for field 'd'"#, - ); - - let value = Value::Record(vec![ - ("a".to_string(), Value::Long(42i64)), - ("b".to_string(), Value::String("foo".to_string())), - ("c".to_string(), Value::Null), - ("d".to_string(), Value::Null), - ]); - assert!(!value.validate(&schema)); - assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("b", String("foo")), ("c", Null), ("d", Null)]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: The value's records length (4) is greater than the schema's (3 fields)"#, - ); - - assert!(Value::Map( - vec![ - ("a".to_string(), Value::Long(42i64)), - ("b".to_string(), Value::String("foo".to_string())), - ] - .into_iter() - .collect() - ) - .validate(&schema)); - - assert!(!Value::Map( - vec![("d".to_string(), Value::Long(123_i64)),] - .into_iter() - .collect() - ) - .validate(&schema)); - assert_logged( - r#"Invalid value: Map({"d": Long(123)}) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Field with name '"a"' is not a member of the map items -Field with name '"b"' is not a member of the map items"#, - ); - - let union_schema = Schema::Union(UnionSchema::new(vec![Schema::Null, schema])?); - - assert!(Value::Union( - 1, - Box::new(Value::Record(vec![ - ("a".to_string(), Value::Long(42i64)), - ("b".to_string(), Value::String("foo".to_string())), - ])) - ) - .validate(&union_schema)); - - assert!(Value::Union( - 1, - Box::new(Value::Map( - vec![ - ("a".to_string(), Value::Long(42i64)), - ("b".to_string(), Value::String("foo".to_string())), - ] - .into_iter() - .collect() - )) - ) - .validate(&union_schema)); - - Ok(()) - } - - #[test] - fn resolve_bytes_ok() -> TestResult { - let value = Value::Array(vec![Value::Int(0), Value::Int(42)]); - assert_eq!( - value.resolve(&Schema::Bytes)?, - Value::Bytes(vec![0u8, 42u8]) - ); - - Ok(()) - } - - #[test] - fn resolve_string_from_bytes() -> TestResult { - let value = Value::Bytes(vec![97, 98, 99]); - assert_eq!( - value.resolve(&Schema::String)?, - Value::String("abc".to_string()) - ); - - Ok(()) - } - - #[test] - fn resolve_string_from_fixed() -> TestResult { - let value = Value::Fixed(3, vec![97, 98, 99]); - assert_eq!( - value.resolve(&Schema::String)?, - Value::String("abc".to_string()) - ); - - Ok(()) - } - - #[test] - fn resolve_bytes_failure() { - let value = Value::Array(vec![Value::Int(2000), Value::Int(-42)]); - assert!(value.resolve(&Schema::Bytes).is_err()); - } - - #[test] - fn resolve_decimal_bytes() -> TestResult { - let value = Value::Decimal(Decimal::from(vec![1, 2, 3, 4, 5])); - value.clone().resolve(&Schema::Decimal(DecimalSchema { - precision: 10, - scale: 4, - inner: Box::new(Schema::Bytes), - }))?; - assert!(value.resolve(&Schema::String).is_err()); - - Ok(()) - } - - #[test] - fn resolve_decimal_invalid_scale() { - let value = Value::Decimal(Decimal::from(vec![1, 2])); - assert!(value - .resolve(&Schema::Decimal(DecimalSchema { - precision: 2, - scale: 3, - inner: Box::new(Schema::Bytes), - })) - .is_err()); - } - - #[test] - fn resolve_decimal_invalid_precision_for_length() { - let value = Value::Decimal(Decimal::from((1u8..=8u8).rev().collect::>())); - assert!(value - .resolve(&Schema::Decimal(DecimalSchema { - precision: 1, - scale: 0, - inner: Box::new(Schema::Bytes), - })) - .is_ok()); - } - - #[test] - fn resolve_decimal_fixed() { - let value = Value::Decimal(Decimal::from(vec![1, 2, 3, 4, 5])); - assert!(value - .clone() - .resolve(&Schema::Decimal(DecimalSchema { - precision: 10, - scale: 1, - inner: Box::new(Schema::Fixed(FixedSchema { - name: Name::new("decimal").unwrap(), - aliases: None, - size: 20, - doc: None, - default: None, - attributes: Default::default(), - })) - })) - .is_ok()); - assert!(value.resolve(&Schema::String).is_err()); - } - - #[test] - fn resolve_date() { - let value = Value::Date(2345); - assert!(value.clone().resolve(&Schema::Date).is_ok()); - assert!(value.resolve(&Schema::String).is_err()); - } - - #[test] - fn resolve_time_millis() { - let value = Value::TimeMillis(10); - assert!(value.clone().resolve(&Schema::TimeMillis).is_ok()); - assert!(value.resolve(&Schema::TimeMicros).is_err()); - } - - #[test] - fn resolve_time_micros() { - let value = Value::TimeMicros(10); - assert!(value.clone().resolve(&Schema::TimeMicros).is_ok()); - assert!(value.resolve(&Schema::TimeMillis).is_err()); - } - - #[test] - fn resolve_timestamp_millis() { - let value = Value::TimestampMillis(10); - assert!(value.clone().resolve(&Schema::TimestampMillis).is_ok()); - assert!(value.resolve(&Schema::Float).is_err()); - - let value = Value::Float(10.0f32); - assert!(value.resolve(&Schema::TimestampMillis).is_err()); - } - - #[test] - fn resolve_timestamp_micros() { - let value = Value::TimestampMicros(10); - assert!(value.clone().resolve(&Schema::TimestampMicros).is_ok()); - assert!(value.resolve(&Schema::Int).is_err()); - - let value = Value::Double(10.0); - assert!(value.resolve(&Schema::TimestampMicros).is_err()); - } - - #[test] - fn test_avro_3914_resolve_timestamp_nanos() { - let value = Value::TimestampNanos(10); - assert!(value.clone().resolve(&Schema::TimestampNanos).is_ok()); - assert!(value.resolve(&Schema::Int).is_err()); - - let value = Value::Double(10.0); - assert!(value.resolve(&Schema::TimestampNanos).is_err()); - } - - #[test] - fn test_avro_3853_resolve_timestamp_millis() { - let value = Value::LocalTimestampMillis(10); - assert!(value.clone().resolve(&Schema::LocalTimestampMillis).is_ok()); - assert!(value.resolve(&Schema::Float).is_err()); - - let value = Value::Float(10.0f32); - assert!(value.resolve(&Schema::LocalTimestampMillis).is_err()); - } - - #[test] - fn test_avro_3853_resolve_timestamp_micros() { - let value = Value::LocalTimestampMicros(10); - assert!(value.clone().resolve(&Schema::LocalTimestampMicros).is_ok()); - assert!(value.resolve(&Schema::Int).is_err()); - - let value = Value::Double(10.0); - assert!(value.resolve(&Schema::LocalTimestampMicros).is_err()); - } - - #[test] - fn test_avro_3916_resolve_timestamp_nanos() { - let value = Value::LocalTimestampNanos(10); - assert!(value.clone().resolve(&Schema::LocalTimestampNanos).is_ok()); - assert!(value.resolve(&Schema::Int).is_err()); - - let value = Value::Double(10.0); - assert!(value.resolve(&Schema::LocalTimestampNanos).is_err()); - } - - #[test] - fn resolve_duration() { - let value = Value::Duration(Duration::new( - Months::new(10), - Days::new(5), - Millis::new(3000), - )); - assert!(value.clone().resolve(&Schema::Duration).is_ok()); - assert!(value.resolve(&Schema::TimestampMicros).is_err()); - assert!(Value::Long(1i64).resolve(&Schema::Duration).is_err()); - } - - #[test] - fn resolve_uuid() -> TestResult { - let value = Value::Uuid(Uuid::parse_str("1481531d-ccc9-46d9-a56f-5b67459c0537")?); - assert!(value.clone().resolve(&Schema::Uuid).is_ok()); - assert!(value.resolve(&Schema::TimestampMicros).is_err()); - - Ok(()) - } - - #[test] - fn avro_3678_resolve_float_to_double() { - let value = Value::Float(2345.1); - assert!(value.resolve(&Schema::Double).is_ok()); - } - - #[test] - fn test_avro_3621_resolve_to_nullable_union() -> TestResult { - let schema = Schema::parse_str( - r#"{ - "type": "record", - "name": "root", - "fields": [ - { - "name": "event", - "type": [ - "null", - { - "type": "record", - "name": "event", - "fields": [ - { - "name": "amount", - "type": "int" - }, - { - "name": "size", - "type": [ - "null", - "int" - ], - "default": null - } - ] - } - ], - "default": null - } - ] - }"#, - )?; - - let value = Value::Record(vec![( - "event".to_string(), - Value::Record(vec![("amount".to_string(), Value::Int(200))]), - )]); - assert!(value.resolve(&schema).is_ok()); - - let value = Value::Record(vec![( - "event".to_string(), - Value::Record(vec![("size".to_string(), Value::Int(1))]), - )]); - assert!(value.resolve(&schema).is_err()); - - Ok(()) - } - - #[test] - fn json_from_avro() -> TestResult { - assert_eq!(JsonValue::try_from(Value::Null)?, JsonValue::Null); - assert_eq!( - JsonValue::try_from(Value::Boolean(true))?, - JsonValue::Bool(true) - ); - assert_eq!( - JsonValue::try_from(Value::Int(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::Long(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::Float(1.0))?, - JsonValue::Number(Number::from_f64(1.0).unwrap()) - ); - assert_eq!( - JsonValue::try_from(Value::Double(1.0))?, - JsonValue::Number(Number::from_f64(1.0).unwrap()) - ); - assert_eq!( - JsonValue::try_from(Value::Bytes(vec![1, 2, 3]))?, - JsonValue::Array(vec![ - JsonValue::Number(1.into()), - JsonValue::Number(2.into()), - JsonValue::Number(3.into()) - ]) - ); - assert_eq!( - JsonValue::try_from(Value::String("test".into()))?, - JsonValue::String("test".into()) - ); - assert_eq!( - JsonValue::try_from(Value::Fixed(3, vec![1, 2, 3]))?, - JsonValue::Array(vec![ - JsonValue::Number(1.into()), - JsonValue::Number(2.into()), - JsonValue::Number(3.into()) - ]) - ); - assert_eq!( - JsonValue::try_from(Value::Enum(1, "test_enum".into()))?, - JsonValue::String("test_enum".into()) - ); - assert_eq!( - JsonValue::try_from(Value::Union(1, Box::new(Value::String("test_enum".into()))))?, - JsonValue::String("test_enum".into()) - ); - assert_eq!( - JsonValue::try_from(Value::Array(vec![ - Value::Int(1), - Value::Int(2), - Value::Int(3) - ]))?, - JsonValue::Array(vec![ - JsonValue::Number(1.into()), - JsonValue::Number(2.into()), - JsonValue::Number(3.into()) - ]) - ); - assert_eq!( - JsonValue::try_from(Value::Map( - vec![ - ("v1".to_string(), Value::Int(1)), - ("v2".to_string(), Value::Int(2)), - ("v3".to_string(), Value::Int(3)) - ] - .into_iter() - .collect() - ))?, - JsonValue::Object( - vec![ - ("v1".to_string(), JsonValue::Number(1.into())), - ("v2".to_string(), JsonValue::Number(2.into())), - ("v3".to_string(), JsonValue::Number(3.into())) - ] - .into_iter() - .collect() - ) - ); - assert_eq!( - JsonValue::try_from(Value::Record(vec![ - ("v1".to_string(), Value::Int(1)), - ("v2".to_string(), Value::Int(2)), - ("v3".to_string(), Value::Int(3)) - ]))?, - JsonValue::Object( - vec![ - ("v1".to_string(), JsonValue::Number(1.into())), - ("v2".to_string(), JsonValue::Number(2.into())), - ("v3".to_string(), JsonValue::Number(3.into())) - ] - .into_iter() - .collect() - ) - ); - assert_eq!( - JsonValue::try_from(Value::Date(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::Decimal(vec![1, 2, 3].into()))?, - JsonValue::Array(vec![ - JsonValue::Number(1.into()), - JsonValue::Number(2.into()), - JsonValue::Number(3.into()) - ]) - ); - assert_eq!( - JsonValue::try_from(Value::TimeMillis(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::TimeMicros(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::TimestampMillis(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::TimestampMicros(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::TimestampNanos(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::LocalTimestampMillis(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::LocalTimestampMicros(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::LocalTimestampNanos(1))?, - JsonValue::Number(1.into()) - ); - assert_eq!( - JsonValue::try_from(Value::Duration( - [1u8, 2u8, 3u8, 4u8, 5u8, 6u8, 7u8, 8u8, 9u8, 10u8, 11u8, 12u8].into() - ))?, - JsonValue::Array(vec![ - JsonValue::Number(1.into()), - JsonValue::Number(2.into()), - JsonValue::Number(3.into()), - JsonValue::Number(4.into()), - JsonValue::Number(5.into()), - JsonValue::Number(6.into()), - JsonValue::Number(7.into()), - JsonValue::Number(8.into()), - JsonValue::Number(9.into()), - JsonValue::Number(10.into()), - JsonValue::Number(11.into()), - JsonValue::Number(12.into()), - ]) - ); - assert_eq!( - JsonValue::try_from(Value::Uuid(Uuid::parse_str( - "936DA01F-9ABD-4D9D-80C7-02AF85C822A8" - )?))?, - JsonValue::String("936da01f-9abd-4d9d-80c7-02af85c822a8".into()) - ); - - Ok(()) - } - - #[test] - fn test_avro_3433_recursive_resolves_record() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - }, - { - "name":"b", - "type":"Inner" - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer = Value::Record(vec![("a".into(), inner_value1), ("b".into(), inner_value2)]); - outer - .resolve(&schema) - .expect("Record definition defined in one field must be available in other field"); - - Ok(()) - } - - #[test] - fn test_avro_3433_recursive_resolves_array() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"array", - "items": { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - } - }, - { - "name":"b", - "type": { - "type":"map", - "values":"Inner" - } - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = Value::Record(vec![ - ("a".into(), Value::Array(vec![inner_value1])), - ( - "b".into(), - Value::Map(vec![("akey".into(), inner_value2)].into_iter().collect()), - ), - ]); - outer_value - .resolve(&schema) - .expect("Record defined in array definition must be resolvable from map"); - - Ok(()) - } - - #[test] - fn test_avro_3433_recursive_resolves_map() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - }, - { - "name":"b", - "type": { - "type":"map", - "values":"Inner" - } - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = Value::Record(vec![ - ("a".into(), inner_value1), - ( - "b".into(), - Value::Map(vec![("akey".into(), inner_value2)].into_iter().collect()), - ), - ]); - outer_value - .resolve(&schema) - .expect("Record defined in record field must be resolvable from map field"); - - Ok(()) - } - - #[test] - fn test_avro_3433_recursive_resolves_record_wrapper() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - }, - { - "name":"b", - "type": { - "type":"record", - "name": "InnerWrapper", - "fields": [ { - "name":"j", - "type":"Inner" - }] - } - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![( - "j".into(), - Value::Record(vec![("z".into(), Value::Int(6))]), - )]); - let outer_value = - Value::Record(vec![("a".into(), inner_value1), ("b".into(), inner_value2)]); - outer_value.resolve(&schema).expect("Record schema defined in field must be resolvable in Record schema defined in other field"); - - Ok(()) - } - - #[test] - fn test_avro_3433_recursive_resolves_map_and_array() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"map", - "values": { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - } - }, - { - "name":"b", - "type": { - "type":"array", - "items":"Inner" - } - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer_value = Value::Record(vec![ - ( - "a".into(), - Value::Map(vec![("akey".into(), inner_value2)].into_iter().collect()), - ), - ("b".into(), Value::Array(vec![inner_value1])), - ]); - outer_value - .resolve(&schema) - .expect("Record defined in map definition must be resolvable from array"); - - Ok(()) - } - - #[test] - fn test_avro_3433_recursive_resolves_union() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":["null", { - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - }] - }, - { - "name":"b", - "type":"Inner" - } - ] - }"#, - )?; - - let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); - let outer1 = Value::Record(vec![ - ("a".into(), inner_value1), - ("b".into(), inner_value2.clone()), - ]); - outer1 - .resolve(&schema) - .expect("Record definition defined in union must be resolved in other field"); - let outer2 = Value::Record(vec![("a".into(), Value::Null), ("b".into(), inner_value2)]); - outer2 - .resolve(&schema) - .expect("Record definition defined in union must be resolved in other field"); - - Ok(()) - } - - #[test] - fn test_avro_3461_test_multi_level_resolve_outer_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type": "record", - "name": "middle_record_name", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "space.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); - let middle_record_variation_1 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - )]); - let middle_record_variation_2 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(1, Box::new(inner_record.clone())), - )]); - let outer_record_variation_1 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_2 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_1)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_3 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_2)), - ), - ("outer_field_2".into(), inner_record), - ]); - - outer_record_variation_1 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - outer_record_variation_2 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - outer_record_variation_3 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - - Ok(()) - } - - #[test] - fn test_avro_3461_test_multi_level_resolve_middle_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type": "record", - "name": "middle_record_name", - "namespace":"middle_namespace", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "middle_namespace.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); - let middle_record_variation_1 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - )]); - let middle_record_variation_2 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(1, Box::new(inner_record.clone())), - )]); - let outer_record_variation_1 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_2 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_1)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_3 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_2)), - ), - ("outer_field_2".into(), inner_record), - ]); - - outer_record_variation_1 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - outer_record_variation_2 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - outer_record_variation_3 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - - Ok(()) - } - - #[test] - fn test_avro_3461_test_multi_level_resolve_inner_namespace() -> TestResult { - let schema = r#" - { - "name": "record_name", - "namespace": "space", - "type": "record", - "fields": [ - { - "name": "outer_field_1", - "type": [ - "null", - { - "type": "record", - "name": "middle_record_name", - "namespace":"middle_namespace", - "fields":[ - { - "name":"middle_field_1", - "type":[ - "null", - { - "type":"record", - "name":"inner_record_name", - "namespace":"inner_namespace", - "fields":[ - { - "name":"inner_field_1", - "type":"double" - } - ] - } - ] - } - ] - } - ] - }, - { - "name": "outer_field_2", - "type" : "inner_namespace.inner_record_name" - } - ] - } - "#; - let schema = Schema::parse_str(schema)?; - - let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); - let middle_record_variation_1 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - )]); - let middle_record_variation_2 = Value::Record(vec![( - "middle_field_1".into(), - Value::Union(1, Box::new(inner_record.clone())), - )]); - let outer_record_variation_1 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(0, Box::new(Value::Null)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_2 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_1)), - ), - ("outer_field_2".into(), inner_record.clone()), - ]); - let outer_record_variation_3 = Value::Record(vec![ - ( - "outer_field_1".into(), - Value::Union(1, Box::new(middle_record_variation_2)), - ), - ("outer_field_2".into(), inner_record), - ]); - - outer_record_variation_1 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - outer_record_variation_2 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - outer_record_variation_3 - .resolve(&schema) - .expect("Should be able to resolve value to the schema that is it's definition"); - - Ok(()) - } - - #[test] - fn test_avro_3460_validation_with_refs() -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - }, - { - "name":"b", - "type":"Inner" - } - ] - }"#, - )?; - - let inner_value_right = Value::Record(vec![("z".into(), Value::Int(3))]); - let inner_value_wrong1 = Value::Record(vec![("z".into(), Value::Null)]); - let inner_value_wrong2 = Value::Record(vec![("a".into(), Value::String("testing".into()))]); - let outer1 = Value::Record(vec![ - ("a".into(), inner_value_right.clone()), - ("b".into(), inner_value_wrong1), - ]); - - let outer2 = Value::Record(vec![ - ("a".into(), inner_value_right), - ("b".into(), inner_value_wrong2), - ]); - - assert!( - !outer1.validate(&schema), - "field b record is invalid against the schema" - ); // this should pass, but doesn't - assert!( - !outer2.validate(&schema), - "field b record is invalid against the schema" - ); // this should pass, but doesn't - - Ok(()) - } - - #[test] - fn test_avro_3460_validation_with_refs_real_struct() -> TestResult { - use crate::ser::Serializer; - use serde::Serialize; - - #[derive(Serialize, Clone)] - struct TestInner { - z: i32, - } - - #[derive(Serialize)] - struct TestRefSchemaStruct1 { - a: TestInner, - b: String, // could be literally anything - } - - #[derive(Serialize)] - struct TestRefSchemaStruct2 { - a: TestInner, - b: i32, // could be literally anything - } - - #[derive(Serialize)] - struct TestRefSchemaStruct3 { - a: TestInner, - b: Option, // could be literally anything - } - - let schema = Schema::parse_str( - r#" - { - "type":"record", - "name":"TestStruct", - "fields": [ - { - "name":"a", - "type":{ - "type":"record", - "name": "Inner", - "fields": [ { - "name":"z", - "type":"int" - }] - } - }, - { - "name":"b", - "type":"Inner" - } - ] - }"#, - )?; - - let test_inner = TestInner { z: 3 }; - let test_outer1 = TestRefSchemaStruct1 { - a: test_inner.clone(), - b: "testing".into(), - }; - let test_outer2 = TestRefSchemaStruct2 { - a: test_inner.clone(), - b: 24, - }; - let test_outer3 = TestRefSchemaStruct3 { - a: test_inner, - b: None, - }; - - let mut ser = Serializer::default(); - let test_outer1: Value = test_outer1.serialize(&mut ser)?; - let mut ser = Serializer::default(); - let test_outer2: Value = test_outer2.serialize(&mut ser)?; - let mut ser = Serializer::default(); - let test_outer3: Value = test_outer3.serialize(&mut ser)?; - - assert!( - !test_outer1.validate(&schema), - "field b record is invalid against the schema" - ); - assert!( - !test_outer2.validate(&schema), - "field b record is invalid against the schema" - ); - assert!( - !test_outer3.validate(&schema), - "field b record is invalid against the schema" - ); - - Ok(()) - } - - fn avro_3674_with_or_without_namespace(with_namespace: bool) -> TestResult { - use crate::ser::Serializer; - use serde::Serialize; - - let schema_str = r#"{ - "type": "record", - "name": "NamespacedMessage", - [NAMESPACE] - "fields": [ - { - "type": "record", - "name": "field_a", - "fields": [ - { - "name": "enum_a", - "type": { - "type": "enum", - "name": "EnumType", - "symbols": [ - "SYMBOL_1", - "SYMBOL_2" - ], - "default": "SYMBOL_1" - } - }, - { - "name": "enum_b", - "type": "EnumType" - } - ] - } - ] - }"#; - let schema_str = schema_str.replace( - "[NAMESPACE]", - if with_namespace { - r#""namespace": "com.domain","# - } else { - "" - }, - ); - - let schema = Schema::parse_str(&schema_str)?; - - #[derive(Serialize)] - enum EnumType { - #[serde(rename = "SYMBOL_1")] - Symbol1, - #[serde(rename = "SYMBOL_2")] - Symbol2, - } - - #[derive(Serialize)] - struct FieldA { - enum_a: EnumType, - enum_b: EnumType, - } - - #[derive(Serialize)] - struct NamespacedMessage { - field_a: FieldA, - } - - let msg = NamespacedMessage { - field_a: FieldA { - enum_a: EnumType::Symbol2, - enum_b: EnumType::Symbol1, - }, - }; - - let mut ser = Serializer::default(); - let test_value: Value = msg.serialize(&mut ser)?; - assert!(test_value.validate(&schema), "test_value should validate"); - assert!( - test_value.resolve(&schema).is_ok(), - "test_value should resolve" - ); - - Ok(()) - } - - #[test] - fn test_avro_3674_validate_no_namespace_resolution() -> TestResult { - avro_3674_with_or_without_namespace(false) - } - - #[test] - fn test_avro_3674_validate_with_namespace_resolution() -> TestResult { - avro_3674_with_or_without_namespace(true) - } - - fn avro_3688_schema_resolution_panic(set_field_b: bool) -> TestResult { - use crate::ser::Serializer; - use serde::{Deserialize, Serialize}; - - let schema_str = r#"{ - "type": "record", - "name": "Message", - "fields": [ - { - "name": "field_a", - "type": [ - "null", - { - "name": "Inner", - "type": "record", - "fields": [ - { - "name": "inner_a", - "type": "string" - } - ] - } - ], - "default": null - }, - { - "name": "field_b", - "type": [ - "null", - "Inner" - ], - "default": null - } - ] - }"#; - - #[derive(Serialize, Deserialize)] - struct Inner { - inner_a: String, - } - - #[derive(Serialize, Deserialize)] - struct Message { - field_a: Option, - field_b: Option, - } - - let schema = Schema::parse_str(schema_str)?; - - let msg = Message { - field_a: Some(Inner { - inner_a: "foo".to_string(), - }), - field_b: if set_field_b { - Some(Inner { - inner_a: "bar".to_string(), - }) - } else { - None - }, - }; - - let mut ser = Serializer::default(); - let test_value: Value = msg.serialize(&mut ser)?; - assert!(test_value.validate(&schema), "test_value should validate"); - assert!( - test_value.resolve(&schema).is_ok(), - "test_value should resolve" - ); - - Ok(()) - } - - #[test] - fn test_avro_3688_field_b_not_set() -> TestResult { - avro_3688_schema_resolution_panic(false) - } - - #[test] - fn test_avro_3688_field_b_set() -> TestResult { - avro_3688_schema_resolution_panic(true) - } - - #[test] - fn test_avro_3764_use_resolve_schemata() -> TestResult { - let referenced_schema = - r#"{"name": "enumForReference", "type": "enum", "symbols": ["A", "B"]}"#; - let main_schema = r#"{"name": "recordWithReference", "type": "record", "fields": [{"name": "reference", "type": "enumForReference"}]}"#; - - let value: serde_json::Value = serde_json::from_str( - r#" - { - "reference": "A" - } - "#, - )?; - - let avro_value = Value::from(value); - - let schemas = Schema::parse_list(&[main_schema, referenced_schema])?; - - let main_schema = schemas.first().unwrap(); - let schemata: Vec<_> = schemas.iter().skip(1).collect(); - - let resolve_result = avro_value.clone().resolve_schemata(main_schema, schemata); - - assert!( - resolve_result.is_ok(), - "result of resolving with schemata should be ok, got: {:?}", - resolve_result - ); - - let resolve_result = avro_value.resolve(main_schema); - assert!( - resolve_result.is_err(), - "result of resolving without schemata should be err, got: {:?}", - resolve_result - ); - - Ok(()) - } - - #[test] - fn test_avro_3767_union_resolve_complex_refs() -> TestResult { - let referenced_enum = - r#"{"name": "enumForReference", "type": "enum", "symbols": ["A", "B"]}"#; - let referenced_record = r#"{"name": "recordForReference", "type": "record", "fields": [{"name": "refInRecord", "type": "enumForReference"}]}"#; - let main_schema = r#"{"name": "recordWithReference", "type": "record", "fields": [{"name": "reference", "type": ["null", "recordForReference"]}]}"#; - - let value: serde_json::Value = serde_json::from_str( - r#" - { - "reference": { - "refInRecord": "A" - } - } - "#, - )?; - - let avro_value = Value::from(value); - - let schemata = Schema::parse_list(&[referenced_enum, referenced_record, main_schema])?; - - let main_schema = schemata.last().unwrap(); - let other_schemata: Vec<&Schema> = schemata.iter().take(2).collect(); - - let resolve_result = avro_value.resolve_schemata(main_schema, other_schemata); - - assert!( - resolve_result.is_ok(), - "result of resolving with schemata should be ok, got: {:?}", - resolve_result - ); - - assert!( - resolve_result?.validate_schemata(schemata.iter().collect()), - "result of validation with schemata should be true" - ); - - Ok(()) - } - - #[test] - fn test_avro_3782_incorrect_decimal_resolving() -> TestResult { - let schema = r#"{"name": "decimalSchema", "logicalType": "decimal", "type": "fixed", "precision": 8, "scale": 0, "size": 8}"#; - - let avro_value = Value::Decimal(Decimal::from( - BigInt::from(12345678u32).to_signed_bytes_be(), - )); - let schema = Schema::parse_str(schema)?; - let resolve_result = avro_value.resolve(&schema); - assert!( - resolve_result.is_ok(), - "resolve result must be ok, got: {resolve_result:?}" - ); - - Ok(()) - } - - #[test] - fn test_avro_3779_bigdecimal_resolving() -> TestResult { - let schema = - r#"{"name": "bigDecimalSchema", "logicalType": "big-decimal", "type": "bytes" }"#; - - let avro_value = Value::BigDecimal(BigDecimal::from(12345678u32)); - let schema = Schema::parse_str(schema)?; - let resolve_result: AvroResult = avro_value.resolve(&schema); - assert!( - resolve_result.is_ok(), - "resolve result must be ok, got: {resolve_result:?}" - ); - - Ok(()) - } - - #[test] - fn test_avro_3892_resolve_fixed_from_bytes() -> TestResult { - let value = Value::Bytes(vec![97, 98, 99]); - assert_eq!( - value.resolve(&Schema::Fixed(FixedSchema { - name: "test".into(), - aliases: None, - doc: None, - size: 3, - default: None, - attributes: Default::default() - }))?, - Value::Fixed(3, vec![97, 98, 99]) - ); - - let value = Value::Bytes(vec![97, 99]); - assert!(value - .resolve(&Schema::Fixed(FixedSchema { - name: "test".into(), - aliases: None, - doc: None, - size: 3, - default: None, - attributes: Default::default() - })) - .is_err(),); - - let value = Value::Bytes(vec![97, 98, 99, 100]); - assert!(value - .resolve(&Schema::Fixed(FixedSchema { - name: "test".into(), - aliases: None, - doc: None, - size: 3, - default: None, - attributes: Default::default() - })) - .is_err(),); - - Ok(()) - } - - #[test] - fn avro_3928_from_serde_value_to_types_value() { - assert_eq!(Value::from(serde_json::Value::Null), Value::Null); - assert_eq!(Value::from(json!(true)), Value::Boolean(true)); - assert_eq!(Value::from(json!(false)), Value::Boolean(false)); - assert_eq!(Value::from(json!(0)), Value::Int(0)); - assert_eq!(Value::from(json!(i32::MIN)), Value::Int(i32::MIN)); - assert_eq!(Value::from(json!(i32::MAX)), Value::Int(i32::MAX)); - assert_eq!( - Value::from(json!(i32::MIN as i64 - 1)), - Value::Long(i32::MIN as i64 - 1) - ); - assert_eq!( - Value::from(json!(i32::MAX as i64 + 1)), - Value::Long(i32::MAX as i64 + 1) - ); - assert_eq!(Value::from(json!(1.23)), Value::Double(1.23)); - assert_eq!(Value::from(json!(-1.23)), Value::Double(-1.23)); - assert_eq!(Value::from(json!(u64::MIN)), Value::Int(u64::MIN as i32)); - assert_eq!(Value::from(json!(u64::MAX)), Value::Long(u64::MAX as i64)); - assert_eq!( - Value::from(json!("some text")), - Value::String("some text".into()) - ); - assert_eq!( - Value::from(json!(["text1", "text2", "text3"])), - Value::Array(vec![ - Value::String("text1".into()), - Value::String("text2".into()), - Value::String("text3".into()) - ]) - ); - assert_eq!( - Value::from(json!({"key1": "value1", "key2": "value2"})), - Value::Map( - vec![ - ("key1".into(), Value::String("value1".into())), - ("key2".into(), Value::String("value2".into())) - ] - .into_iter() - .collect() - ) - ); - } - - #[test] - fn avro_4024_resolve_double_from_unknown_string_err() -> TestResult { - let schema = Schema::parse_str(r#"{"type": "double"}"#)?; - let value = Value::String("unknown".to_owned()); - match value.resolve(&schema) { - Err(err @ Error::GetDouble(_)) => { - assert_eq!( - format!("{err:?}"), - r#"Expected Value::Double, Value::Float, Value::Int, Value::Long or Value::String ("NaN", "INF", "Infinity", "-INF" or "-Infinity"), got: String("unknown")"# - ); - } - other => { - panic!("Expected Error::GetDouble, got {other:?}"); - } - } - Ok(()) - } - - #[test] - fn avro_4024_resolve_float_from_unknown_string_err() -> TestResult { - let schema = Schema::parse_str(r#"{"type": "float"}"#)?; - let value = Value::String("unknown".to_owned()); - match value.resolve(&schema) { - Err(err @ Error::GetFloat(_)) => { - assert_eq!( - format!("{err:?}"), - r#"Expected Value::Float, Value::Double, Value::Int, Value::Long or Value::String ("NaN", "INF", "Infinity", "-INF" or "-Infinity"), got: String("unknown")"# - ); - } - other => { - panic!("Expected Error::GetFloat, got {other:?}"); - } - } - Ok(()) - } - - #[test] - fn avro_4029_resolve_from_unsupported_err() -> TestResult { - let data: Vec<(&str, Value, &str)> = vec!( - (r#"{ "name": "NAME", "type": "int" }"#, Value::Float(123_f32), "Expected Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "fixed", "size": 3 }"#, Value::Float(123_f32), "String expected for fixed, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "bytes" }"#, Value::Float(123_f32), "Expected Value::Bytes, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "string", "logicalType": "uuid" }"#, Value::String("abc-1234".into()), "Failed to convert &str to UUID: invalid group count: expected 5, found 2"), - (r#"{ "name": "NAME", "type": "string", "logicalType": "uuid" }"#, Value::Float(123_f32), "Expected Value::Uuid, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "bytes", "logicalType": "big-decimal" }"#, Value::Float(123_f32), "Expected Value::BigDecimal, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "fixed", "size": 12, "logicalType": "duration" }"#, Value::Float(123_f32), "Expected Value::Duration or Value::Fixed(12), got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 3 }"#, Value::Float(123_f32), "Expected Value::Decimal, Value::Bytes or Value::Fixed, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "bytes" }"#, Value::Array(vec!(Value::Long(256_i64))), "Unable to convert to u8, got Int(256)"), - (r#"{ "name": "NAME", "type": "int", "logicalType": "date" }"#, Value::Float(123_f32), "Expected Value::Date or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "int", "logicalType": "time-millis" }"#, Value::Float(123_f32), "Expected Value::TimeMillis or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "long", "logicalType": "time-micros" }"#, Value::Float(123_f32), "Expected Value::TimeMicros, Value::Long or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "long", "logicalType": "timestamp-millis" }"#, Value::Float(123_f32), "Expected Value::TimestampMillis, Value::Long or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "long", "logicalType": "timestamp-micros" }"#, Value::Float(123_f32), "Expected Value::TimestampMicros, Value::Long or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "long", "logicalType": "timestamp-nanos" }"#, Value::Float(123_f32), "Expected Value::TimestampNanos, Value::Long or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "long", "logicalType": "local-timestamp-millis" }"#, Value::Float(123_f32), "Expected Value::LocalTimestampMillis, Value::Long or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "long", "logicalType": "local-timestamp-micros" }"#, Value::Float(123_f32), "Expected Value::LocalTimestampMicros, Value::Long or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "long", "logicalType": "local-timestamp-nanos" }"#, Value::Float(123_f32), "Expected Value::LocalTimestampNanos, Value::Long or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "null" }"#, Value::Float(123_f32), "Expected Value::Null, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "boolean" }"#, Value::Float(123_f32), "Expected Value::Boolean, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "int" }"#, Value::Float(123_f32), "Expected Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "long" }"#, Value::Float(123_f32), "Expected Value::Long or Value::Int, got: Float(123.0)"), - (r#"{ "name": "NAME", "type": "float" }"#, Value::Boolean(false), r#"Expected Value::Float, Value::Double, Value::Int, Value::Long or Value::String ("NaN", "INF", "Infinity", "-INF" or "-Infinity"), got: Boolean(false)"#), - (r#"{ "name": "NAME", "type": "double" }"#, Value::Boolean(false), r#"Expected Value::Double, Value::Float, Value::Int, Value::Long or Value::String ("NaN", "INF", "Infinity", "-INF" or "-Infinity"), got: Boolean(false)"#), - (r#"{ "name": "NAME", "type": "string" }"#, Value::Boolean(false), "Expected Value::String, Value::Bytes or Value::Fixed, got: Boolean(false)"), - (r#"{ "name": "NAME", "type": "enum", "symbols": ["one", "two"] }"#, Value::Boolean(false), "Expected Value::Enum, got: Boolean(false)"), - ); - - for (schema_str, value, expected_error) in data { - let schema = Schema::parse_str(schema_str)?; - match value.resolve(&schema) { - Err(error) => { - assert_eq!(format!("{error}"), expected_error); - } - other => { - panic!("Expected '{expected_error}', got {other:?}"); - } - } - } - Ok(()) - } -} diff --git a/lang/rust/avro/src/util.rs b/lang/rust/avro/src/util.rs deleted file mode 100644 index 869ea15668f..00000000000 --- a/lang/rust/avro/src/util.rs +++ /dev/null @@ -1,288 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{schema::Documentation, AvroResult, Error}; -use serde_json::{Map, Value}; -use std::{ - io::Read, - sync::{ - atomic::{AtomicBool, AtomicUsize, Ordering}, - Once, - }, -}; - -/// Maximum number of bytes that can be allocated when decoding -/// Avro-encoded values. This is a protection against ill-formed -/// data, whose length field might be interpreted as enormous. -/// See max_allocation_bytes to change this limit. -pub const DEFAULT_MAX_ALLOCATION_BYTES: usize = 512 * 1024 * 1024; -static MAX_ALLOCATION_BYTES: AtomicUsize = AtomicUsize::new(DEFAULT_MAX_ALLOCATION_BYTES); -static MAX_ALLOCATION_BYTES_ONCE: Once = Once::new(); - -/// Whether to set serialization & deserialization traits -/// as `human_readable` or not. -/// See [set_serde_human_readable] to change this value. -// crate-visible for testing -pub(crate) static SERDE_HUMAN_READABLE: AtomicBool = AtomicBool::new(true); -static SERDE_HUMAN_READABLE_ONCE: Once = Once::new(); - -pub trait MapHelper { - fn string(&self, key: &str) -> Option; - - fn name(&self) -> Option { - self.string("name") - } - - fn doc(&self) -> Documentation { - self.string("doc") - } - - fn aliases(&self) -> Option>; -} - -impl MapHelper for Map { - fn string(&self, key: &str) -> Option { - self.get(key) - .and_then(|v| v.as_str()) - .map(|v| v.to_string()) - } - - fn aliases(&self) -> Option> { - // FIXME no warning when aliases aren't a json array of json strings - self.get("aliases") - .and_then(|aliases| aliases.as_array()) - .and_then(|aliases| { - aliases - .iter() - .map(|alias| alias.as_str()) - .map(|alias| alias.map(|a| a.to_string())) - .collect::>() - }) - } -} - -pub fn read_long(reader: &mut R) -> AvroResult { - zag_i64(reader) -} - -pub fn zig_i32(n: i32, buffer: &mut Vec) { - zig_i64(n as i64, buffer) -} - -pub fn zig_i64(n: i64, buffer: &mut Vec) { - encode_variable(((n << 1) ^ (n >> 63)) as u64, buffer) -} - -pub fn zag_i32(reader: &mut R) -> AvroResult { - let i = zag_i64(reader)?; - i32::try_from(i).map_err(|e| Error::ZagI32(e, i)) -} - -pub fn zag_i64(reader: &mut R) -> AvroResult { - let z = decode_variable(reader)?; - Ok(if z & 0x1 == 0 { - (z >> 1) as i64 - } else { - !(z >> 1) as i64 - }) -} - -fn encode_variable(mut z: u64, buffer: &mut Vec) { - loop { - if z <= 0x7F { - buffer.push((z & 0x7F) as u8); - break; - } else { - buffer.push((0x80 | (z & 0x7F)) as u8); - z >>= 7; - } - } -} - -fn decode_variable(reader: &mut R) -> AvroResult { - let mut i = 0u64; - let mut buf = [0u8; 1]; - - let mut j = 0; - loop { - if j > 9 { - // if j * 7 > 64 - return Err(Error::IntegerOverflow); - } - reader - .read_exact(&mut buf[..]) - .map_err(Error::ReadVariableIntegerBytes)?; - i |= (u64::from(buf[0] & 0x7F)) << (j * 7); - if (buf[0] >> 7) == 0 { - break; - } else { - j += 1; - } - } - - Ok(i) -} - -/// Set a new maximum number of bytes that can be allocated when decoding data. -/// Once called, the limit cannot be changed. -/// -/// **NOTE** This function must be called before decoding **any** data. The -/// library leverages [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html) -/// to set the limit either when calling this method, or when decoding for -/// the first time. -pub fn max_allocation_bytes(num_bytes: usize) -> usize { - MAX_ALLOCATION_BYTES_ONCE.call_once(|| { - MAX_ALLOCATION_BYTES.store(num_bytes, Ordering::Release); - }); - MAX_ALLOCATION_BYTES.load(Ordering::Acquire) -} - -pub fn safe_len(len: usize) -> AvroResult { - let max_bytes = max_allocation_bytes(DEFAULT_MAX_ALLOCATION_BYTES); - - if len <= max_bytes { - Ok(len) - } else { - Err(Error::MemoryAllocation { - desired: len, - maximum: max_bytes, - }) - } -} - -/// Set whether serializing/deserializing is marked as human readable in serde traits. -/// This will adjust the return value of `is_human_readable()` for both. -/// Once called, the value cannot be changed. -/// -/// **NOTE** This function must be called before serializing/deserializing **any** data. The -/// library leverages [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html) -/// to set the limit either when calling this method, or when decoding for -/// the first time. -pub fn set_serde_human_readable(human_readable: bool) { - SERDE_HUMAN_READABLE_ONCE.call_once(|| { - SERDE_HUMAN_READABLE.store(human_readable, Ordering::Release); - }); -} - -pub(crate) fn is_human_readable() -> bool { - SERDE_HUMAN_READABLE.load(Ordering::Acquire) -} - -#[cfg(test)] -mod tests { - use super::*; - use apache_avro_test_helper::TestResult; - use pretty_assertions::assert_eq; - - #[test] - fn test_zigzag() { - let mut a = Vec::new(); - let mut b = Vec::new(); - zig_i32(42i32, &mut a); - zig_i64(42i64, &mut b); - assert_eq!(a, b); - } - - #[test] - fn test_zig_i64() { - let mut s = Vec::new(); - - zig_i64(0, &mut s); - assert_eq!(s, [0]); - - s.clear(); - zig_i64(-1, &mut s); - assert_eq!(s, [1]); - - s.clear(); - zig_i64(1, &mut s); - assert_eq!(s, [2]); - - s.clear(); - zig_i64(-64, &mut s); - assert_eq!(s, [127]); - - s.clear(); - zig_i64(64, &mut s); - assert_eq!(s, [128, 1]); - - s.clear(); - zig_i64(i32::MAX as i64, &mut s); - assert_eq!(s, [254, 255, 255, 255, 15]); - - s.clear(); - zig_i64(i32::MAX as i64 + 1, &mut s); - assert_eq!(s, [128, 128, 128, 128, 16]); - - s.clear(); - zig_i64(i32::MIN as i64, &mut s); - assert_eq!(s, [255, 255, 255, 255, 15]); - - s.clear(); - zig_i64(i32::MIN as i64 - 1, &mut s); - assert_eq!(s, [129, 128, 128, 128, 16]); - - s.clear(); - zig_i64(i64::MAX, &mut s); - assert_eq!(s, [254, 255, 255, 255, 255, 255, 255, 255, 255, 1]); - - s.clear(); - zig_i64(i64::MIN, &mut s); - assert_eq!(s, [255, 255, 255, 255, 255, 255, 255, 255, 255, 1]); - } - - #[test] - fn test_zig_i32() { - let mut s = Vec::new(); - zig_i32(i32::MAX / 2, &mut s); - assert_eq!(s, [254, 255, 255, 255, 7]); - - s.clear(); - zig_i32(i32::MIN / 2, &mut s); - assert_eq!(s, [255, 255, 255, 255, 7]); - - s.clear(); - zig_i32(-(i32::MIN / 2), &mut s); - assert_eq!(s, [128, 128, 128, 128, 8]); - - s.clear(); - zig_i32(i32::MIN / 2 - 1, &mut s); - assert_eq!(s, [129, 128, 128, 128, 8]); - - s.clear(); - zig_i32(i32::MAX, &mut s); - assert_eq!(s, [254, 255, 255, 255, 15]); - - s.clear(); - zig_i32(i32::MIN, &mut s); - assert_eq!(s, [255, 255, 255, 255, 15]); - } - - #[test] - fn test_overflow() { - let causes_left_shift_overflow: &[u8] = &[0xe1, 0xe1, 0xe1, 0xe1, 0xe1]; - assert!(decode_variable(&mut &*causes_left_shift_overflow).is_err()); - } - - #[test] - fn test_safe_len() -> TestResult { - assert_eq!(42usize, safe_len(42usize)?); - assert!(safe_len(1024 * 1024 * 1024).is_err()); - - Ok(()) - } -} diff --git a/lang/rust/avro/src/validator.rs b/lang/rust/avro/src/validator.rs deleted file mode 100644 index 2b4967d7f8b..00000000000 --- a/lang/rust/avro/src/validator.rs +++ /dev/null @@ -1,318 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::{schema::Namespace, AvroResult, Error}; -use regex_lite::Regex; -use std::sync::OnceLock; - -/// A validator that validates names and namespaces according to the Avro specification. -struct SpecificationValidator; - -/// A trait that validates schema names. -/// To register a custom one use [set_schema_name_validator]. -pub trait SchemaNameValidator: Send + Sync { - /// Returns the regex used to validate the schema name - /// according to the Avro specification. - fn regex(&self) -> &'static Regex { - static SCHEMA_NAME_ONCE: OnceLock = OnceLock::new(); - SCHEMA_NAME_ONCE.get_or_init(|| { - Regex::new( - // An optional namespace (with optional dots) followed by a name without any dots in it. - r"^((?P([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?)\.)?(?P[A-Za-z_][A-Za-z0-9_]*)$", - ) - .unwrap() - }) - } - - /// Validates the schema name and returns the name and the optional namespace, - /// or [Error::InvalidSchemaName] if it is invalid. - fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)>; -} - -impl SchemaNameValidator for SpecificationValidator { - fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { - let regex = SchemaNameValidator::regex(self); - let caps = regex - .captures(schema_name) - .ok_or_else(|| Error::InvalidSchemaName(schema_name.to_string(), regex.as_str()))?; - Ok(( - caps["name"].to_string(), - caps.name("namespace").map(|s| s.as_str().to_string()), - )) - } -} - -static NAME_VALIDATOR_ONCE: OnceLock> = OnceLock::new(); - -/// Sets a custom schema name validator. -/// -/// Returns a unit if the registration was successful or the already -/// registered validator if the registration failed. -/// -/// **Note**: This function must be called before parsing any schema because this will -/// register the default validator and the registration is one time only! -pub fn set_schema_name_validator( - validator: Box, -) -> Result<(), Box> { - debug!("Setting a custom schema name validator."); - NAME_VALIDATOR_ONCE.set(validator) -} - -pub(crate) fn validate_schema_name(schema_name: &str) -> AvroResult<(String, Namespace)> { - NAME_VALIDATOR_ONCE - .get_or_init(|| { - debug!("Going to use the default name validator."); - Box::new(SpecificationValidator) - }) - .validate(schema_name) -} - -/// A trait that validates schema namespaces. -/// To register a custom one use [set_schema_namespace_validator]. -pub trait SchemaNamespaceValidator: Send + Sync { - /// Returns the regex used to validate the schema namespace - /// according to the Avro specification. - fn regex(&self) -> &'static Regex { - static NAMESPACE_ONCE: OnceLock = OnceLock::new(); - NAMESPACE_ONCE.get_or_init(|| { - Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap() - }) - } - - /// Validates the schema namespace or [Error::InvalidNamespace] if it is invalid. - fn validate(&self, namespace: &str) -> AvroResult<()>; -} - -impl SchemaNamespaceValidator for SpecificationValidator { - fn validate(&self, ns: &str) -> AvroResult<()> { - let regex = SchemaNamespaceValidator::regex(self); - if !regex.is_match(ns) { - return Err(Error::InvalidNamespace(ns.to_string(), regex.as_str())); - } else { - Ok(()) - } - } -} - -static NAMESPACE_VALIDATOR_ONCE: OnceLock> = - OnceLock::new(); - -/// Sets a custom schema namespace validator. -/// -/// Returns a unit if the registration was successful or the already -/// registered validator if the registration failed. -/// -/// **Note**: This function must be called before parsing any schema because this will -/// register the default validator and the registration is one time only! -pub fn set_schema_namespace_validator( - validator: Box, -) -> Result<(), Box> { - NAMESPACE_VALIDATOR_ONCE.set(validator) -} - -pub(crate) fn validate_namespace(ns: &str) -> AvroResult<()> { - NAMESPACE_VALIDATOR_ONCE - .get_or_init(|| { - debug!("Going to use the default namespace validator."); - Box::new(SpecificationValidator) - }) - .validate(ns) -} - -/// A trait that validates enum symbol names. -/// To register a custom one use [set_enum_symbol_name_validator]. -pub trait EnumSymbolNameValidator: Send + Sync { - /// Returns the regex used to validate the symbols of enum schema - /// according to the Avro specification. - fn regex(&self) -> &'static Regex { - static ENUM_SYMBOL_NAME_ONCE: OnceLock = OnceLock::new(); - ENUM_SYMBOL_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) - } - - /// Validates the symbols of an Enum schema name and returns nothing (unit), - /// or [Error::EnumSymbolName] if it is invalid. - fn validate(&self, name: &str) -> AvroResult<()>; -} - -impl EnumSymbolNameValidator for SpecificationValidator { - fn validate(&self, symbol: &str) -> AvroResult<()> { - let regex = EnumSymbolNameValidator::regex(self); - if !regex.is_match(symbol) { - return Err(Error::EnumSymbolName(symbol.to_string())); - } - - Ok(()) - } -} - -static ENUM_SYMBOL_NAME_VALIDATOR_ONCE: OnceLock> = - OnceLock::new(); - -/// Sets a custom enum symbol name validator. -/// -/// Returns a unit if the registration was successful or the already -/// registered validator if the registration failed. -/// -/// **Note**: This function must be called before parsing any schema because this will -/// register the default validator and the registration is one time only! -pub fn set_enum_symbol_name_validator( - validator: Box, -) -> Result<(), Box> { - ENUM_SYMBOL_NAME_VALIDATOR_ONCE.set(validator) -} - -pub(crate) fn validate_enum_symbol_name(symbol: &str) -> AvroResult<()> { - ENUM_SYMBOL_NAME_VALIDATOR_ONCE - .get_or_init(|| { - debug!("Going to use the default enum symbol name validator."); - Box::new(SpecificationValidator) - }) - .validate(symbol) -} - -/// A trait that validates record field names. -/// To register a custom one use [set_record_field_name_validator]. -pub trait RecordFieldNameValidator: Send + Sync { - /// Returns the regex used to validate the record field names - /// according to the Avro specification. - fn regex(&self) -> &'static Regex { - static FIELD_NAME_ONCE: OnceLock = OnceLock::new(); - FIELD_NAME_ONCE.get_or_init(|| Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap()) - } - - /// Validates the record field's names and returns nothing (unit), - /// or [Error::FieldName] if it is invalid. - fn validate(&self, name: &str) -> AvroResult<()>; -} - -impl RecordFieldNameValidator for SpecificationValidator { - fn validate(&self, field_name: &str) -> AvroResult<()> { - let regex = RecordFieldNameValidator::regex(self); - if !regex.is_match(field_name) { - return Err(Error::FieldName(field_name.to_string())); - } - - Ok(()) - } -} - -static RECORD_FIELD_NAME_VALIDATOR_ONCE: OnceLock> = - OnceLock::new(); - -/// Sets a custom record field name validator. -/// -/// Returns a unit if the registration was successful or the already -/// registered validator if the registration failed. -/// -/// **Note**: This function must be called before parsing any schema because this will -/// register the default validator and the registration is one time only! -pub fn set_record_field_name_validator( - validator: Box, -) -> Result<(), Box> { - RECORD_FIELD_NAME_VALIDATOR_ONCE.set(validator) -} - -pub(crate) fn validate_record_field_name(field_name: &str) -> AvroResult<()> { - RECORD_FIELD_NAME_VALIDATOR_ONCE - .get_or_init(|| { - debug!("Going to use the default record field name validator."); - Box::new(SpecificationValidator) - }) - .validate(field_name) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::schema::Name; - use apache_avro_test_helper::TestResult; - - #[test] - fn avro_3900_default_name_validator_with_valid_ns() -> TestResult { - validate_schema_name("example")?; - Ok(()) - } - - #[test] - fn avro_3900_default_name_validator_with_invalid_ns() -> TestResult { - assert!(validate_schema_name("com-example").is_err()); - Ok(()) - } - - #[test] - fn test_avro_3897_disallow_invalid_namespaces_in_fully_qualified_name() -> TestResult { - let full_name = "ns.0.record1"; - let name = Name::new(full_name); - assert!(name.is_err()); - let validator = SpecificationValidator; - let expected = Error::InvalidSchemaName( - full_name.to_string(), - SchemaNameValidator::regex(&validator).as_str(), - ) - .to_string(); - let err = name.map_err(|e| e.to_string()).err().unwrap(); - pretty_assertions::assert_eq!(expected, err); - - let full_name = "ns..record1"; - let name = Name::new(full_name); - assert!(name.is_err()); - let expected = Error::InvalidSchemaName( - full_name.to_string(), - SchemaNameValidator::regex(&validator).as_str(), - ) - .to_string(); - let err = name.map_err(|e| e.to_string()).err().unwrap(); - pretty_assertions::assert_eq!(expected, err); - Ok(()) - } - - #[test] - fn avro_3900_default_namespace_validator_with_valid_ns() -> TestResult { - validate_namespace("com.example")?; - Ok(()) - } - - #[test] - fn avro_3900_default_namespace_validator_with_invalid_ns() -> TestResult { - assert!(validate_namespace("com-example").is_err()); - Ok(()) - } - - #[test] - fn avro_3900_default_enum_symbol_validator_with_valid_symbol_name() -> TestResult { - validate_enum_symbol_name("spades")?; - Ok(()) - } - - #[test] - fn avro_3900_default_enum_symbol_validator_with_invalid_symbol_name() -> TestResult { - assert!(validate_enum_symbol_name("com-example").is_err()); - Ok(()) - } - - #[test] - fn avro_3900_default_record_field_validator_with_valid_name() -> TestResult { - validate_record_field_name("test")?; - Ok(()) - } - - #[test] - fn avro_3900_default_record_field_validator_with_invalid_name() -> TestResult { - assert!(validate_record_field_name("com-example").is_err()); - Ok(()) - } -} diff --git a/lang/rust/avro/src/writer.rs b/lang/rust/avro/src/writer.rs deleted file mode 100644 index 5010bfff552..00000000000 --- a/lang/rust/avro/src/writer.rs +++ /dev/null @@ -1,1447 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Logic handling writing in Avro format at user level. -use crate::{ - encode::{encode, encode_internal, encode_to_vec}, - rabin::Rabin, - schema::{AvroSchema, ResolvedOwnedSchema, ResolvedSchema, Schema}, - ser::Serializer, - types::Value, - AvroResult, Codec, Error, -}; -use serde::Serialize; -use std::{collections::HashMap, io::Write, marker::PhantomData}; - -const DEFAULT_BLOCK_SIZE: usize = 16000; -const AVRO_OBJECT_HEADER: &[u8] = b"Obj\x01"; - -/// Main interface for writing Avro formatted values. -#[derive(typed_builder::TypedBuilder)] -pub struct Writer<'a, W> { - schema: &'a Schema, - writer: W, - #[builder(default, setter(skip))] - resolved_schema: Option>, - #[builder(default = Codec::Null)] - codec: Codec, - #[builder(default = DEFAULT_BLOCK_SIZE)] - block_size: usize, - #[builder(default = Vec::with_capacity(block_size), setter(skip))] - buffer: Vec, - #[builder(default, setter(skip))] - serializer: Serializer, - #[builder(default = 0, setter(skip))] - num_values: usize, - #[builder(default = generate_sync_marker())] - marker: [u8; 16], - #[builder(default = false)] - has_header: bool, - #[builder(default)] - user_metadata: HashMap, -} - -impl<'a, W: Write> Writer<'a, W> { - /// Creates a `Writer` given a `Schema` and something implementing the `io::Write` trait to write - /// to. - /// No compression `Codec` will be used. - pub fn new(schema: &'a Schema, writer: W) -> Self { - Writer::with_codec(schema, writer, Codec::Null) - } - - /// Creates a `Writer` with a specific `Codec` given a `Schema` and something implementing the - /// `io::Write` trait to write to. - pub fn with_codec(schema: &'a Schema, writer: W, codec: Codec) -> Self { - let mut w = Self::builder() - .schema(schema) - .writer(writer) - .codec(codec) - .build(); - w.resolved_schema = ResolvedSchema::try_from(schema).ok(); - w - } - - /// Creates a `Writer` with a specific `Codec` given a `Schema` and something implementing the - /// `io::Write` trait to write to. - /// If the `schema` is incomplete, i.e. contains `Schema::Ref`s then all dependencies must - /// be provided in `schemata`. - pub fn with_schemata( - schema: &'a Schema, - schemata: Vec<&'a Schema>, - writer: W, - codec: Codec, - ) -> Self { - let mut w = Self::builder() - .schema(schema) - .writer(writer) - .codec(codec) - .build(); - w.resolved_schema = ResolvedSchema::try_from(schemata).ok(); - w - } - - /// Creates a `Writer` that will append values to already populated - /// `std::io::Write` using the provided `marker` - /// No compression `Codec` will be used. - pub fn append_to(schema: &'a Schema, writer: W, marker: [u8; 16]) -> Self { - Writer::append_to_with_codec(schema, writer, Codec::Null, marker) - } - - /// Creates a `Writer` that will append values to already populated - /// `std::io::Write` using the provided `marker` - pub fn append_to_with_codec( - schema: &'a Schema, - writer: W, - codec: Codec, - marker: [u8; 16], - ) -> Self { - let mut w = Self::builder() - .schema(schema) - .writer(writer) - .codec(codec) - .marker(marker) - .has_header(true) - .build(); - w.resolved_schema = ResolvedSchema::try_from(schema).ok(); - w - } - - /// Creates a `Writer` that will append values to already populated - /// `std::io::Write` using the provided `marker` - pub fn append_to_with_codec_schemata( - schema: &'a Schema, - schemata: Vec<&'a Schema>, - writer: W, - codec: Codec, - marker: [u8; 16], - ) -> Self { - let mut w = Self::builder() - .schema(schema) - .writer(writer) - .codec(codec) - .marker(marker) - .has_header(true) - .build(); - w.resolved_schema = ResolvedSchema::try_from(schemata).ok(); - w - } - - /// Get a reference to the `Schema` associated to a `Writer`. - pub fn schema(&self) -> &'a Schema { - self.schema - } - - /// Append a compatible value (implementing the `ToAvro` trait) to a `Writer`, also performing - /// schema validation. - /// - /// Return the number of bytes written (it might be 0, see below). - /// - /// **NOTE** This function is not guaranteed to perform any actual write, since it relies on - /// internal buffering for performance reasons. If you want to be sure the value has been - /// written, then call [`flush`](struct.Writer.html#method.flush). - pub fn append>(&mut self, value: T) -> AvroResult { - let n = self.maybe_write_header()?; - - let avro = value.into(); - self.append_value_ref(&avro).map(|m| m + n) - } - - /// Append a compatible value to a `Writer`, also performing schema validation. - /// - /// Return the number of bytes written (it might be 0, see below). - /// - /// **NOTE** This function is not guaranteed to perform any actual write, since it relies on - /// internal buffering for performance reasons. If you want to be sure the value has been - /// written, then call [`flush`](struct.Writer.html#method.flush). - pub fn append_value_ref(&mut self, value: &Value) -> AvroResult { - let n = self.maybe_write_header()?; - - // Lazy init for users using the builder pattern with error throwing - match self.resolved_schema { - Some(ref rs) => { - write_value_ref_resolved(self.schema, rs, value, &mut self.buffer)?; - self.num_values += 1; - - if self.buffer.len() >= self.block_size { - return self.flush().map(|b| b + n); - } - - Ok(n) - } - None => { - let rs = ResolvedSchema::try_from(self.schema)?; - self.resolved_schema = Some(rs); - self.append_value_ref(value) - } - } - } - - /// Append anything implementing the `Serialize` trait to a `Writer` for - /// [`serde`](https://docs.serde.rs/serde/index.html) compatibility, also performing schema - /// validation. - /// - /// Return the number of bytes written. - /// - /// **NOTE** This function is not guaranteed to perform any actual write, since it relies on - /// internal buffering for performance reasons. If you want to be sure the value has been - /// written, then call [`flush`](struct.Writer.html#method.flush). - pub fn append_ser(&mut self, value: S) -> AvroResult { - let avro_value = value.serialize(&mut self.serializer)?; - self.append(avro_value) - } - - /// Extend a `Writer` with an `Iterator` of compatible values (implementing the `ToAvro` - /// trait), also performing schema validation. - /// - /// Return the number of bytes written. - /// - /// **NOTE** This function forces the written data to be flushed (an implicit - /// call to [`flush`](struct.Writer.html#method.flush) is performed). - pub fn extend>(&mut self, values: I) -> AvroResult - where - I: IntoIterator, - { - /* - https://github.com/rust-lang/rfcs/issues/811 :( - let mut stream = values - .filter_map(|value| value.serialize(&mut self.serializer).ok()) - .map(|value| value.encode(self.schema)) - .collect::>>() - .ok_or_else(|| err_msg("value does not match given schema"))? - .into_iter() - .fold(Vec::new(), |mut acc, stream| { - num_values += 1; - acc.extend(stream); acc - }); - */ - - let mut num_bytes = 0; - for value in values { - num_bytes += self.append(value)?; - } - num_bytes += self.flush()?; - - Ok(num_bytes) - } - - /// Extend a `Writer` with an `Iterator` of anything implementing the `Serialize` trait for - /// [`serde`](https://docs.serde.rs/serde/index.html) compatibility, also performing schema - /// validation. - /// - /// Return the number of bytes written. - /// - /// **NOTE** This function forces the written data to be flushed (an implicit - /// call to [`flush`](struct.Writer.html#method.flush) is performed). - pub fn extend_ser(&mut self, values: I) -> AvroResult - where - I: IntoIterator, - { - /* - https://github.com/rust-lang/rfcs/issues/811 :( - let mut stream = values - .filter_map(|value| value.serialize(&mut self.serializer).ok()) - .map(|value| value.encode(self.schema)) - .collect::>>() - .ok_or_else(|| err_msg("value does not match given schema"))? - .into_iter() - .fold(Vec::new(), |mut acc, stream| { - num_values += 1; - acc.extend(stream); acc - }); - */ - - let mut num_bytes = 0; - for value in values { - num_bytes += self.append_ser(value)?; - } - num_bytes += self.flush()?; - - Ok(num_bytes) - } - - /// Extend a `Writer` by appending each `Value` from a slice, while also performing schema - /// validation on each value appended. - /// - /// Return the number of bytes written. - /// - /// **NOTE** This function forces the written data to be flushed (an implicit - /// call to [`flush`](struct.Writer.html#method.flush) is performed). - pub fn extend_from_slice(&mut self, values: &[Value]) -> AvroResult { - let mut num_bytes = 0; - for value in values { - num_bytes += self.append_value_ref(value)?; - } - num_bytes += self.flush()?; - - Ok(num_bytes) - } - - /// Flush the content appended to a `Writer`. Call this function to make sure all the content - /// has been written before releasing the `Writer`. - /// - /// Return the number of bytes written. - pub fn flush(&mut self) -> AvroResult { - if self.num_values == 0 { - return Ok(0); - } - - self.codec.compress(&mut self.buffer)?; - - let num_values = self.num_values; - let stream_len = self.buffer.len(); - - let num_bytes = self.append_raw(&num_values.into(), &Schema::Long)? - + self.append_raw(&stream_len.into(), &Schema::Long)? - + self - .writer - .write(self.buffer.as_ref()) - .map_err(Error::WriteBytes)? - + self.append_marker()?; - - self.buffer.clear(); - self.num_values = 0; - - Ok(num_bytes) - } - - /// Return what the `Writer` is writing to, consuming the `Writer` itself. - /// - /// **NOTE** This function forces the written data to be flushed (an implicit - /// call to [`flush`](struct.Writer.html#method.flush) is performed). - pub fn into_inner(mut self) -> AvroResult { - self.maybe_write_header()?; - self.flush()?; - Ok(self.writer) - } - - /// Generate and append synchronization marker to the payload. - fn append_marker(&mut self) -> AvroResult { - // using .writer.write directly to avoid mutable borrow of self - // with ref borrowing of self.marker - self.writer.write(&self.marker).map_err(Error::WriteMarker) - } - - /// Append a raw Avro Value to the payload avoiding to encode it again. - fn append_raw(&mut self, value: &Value, schema: &Schema) -> AvroResult { - self.append_bytes(encode_to_vec(value, schema)?.as_ref()) - } - - /// Append pure bytes to the payload. - fn append_bytes(&mut self, bytes: &[u8]) -> AvroResult { - self.writer.write(bytes).map_err(Error::WriteBytes) - } - - /// Adds custom metadata to the file. - /// This method could be used only before adding the first record to the writer. - pub fn add_user_metadata>(&mut self, key: String, value: T) -> AvroResult<()> { - if !self.has_header { - if key.starts_with("avro.") { - return Err(Error::InvalidMetadataKey(key)); - } - self.user_metadata - .insert(key, Value::Bytes(value.as_ref().to_vec())); - Ok(()) - } else { - Err(Error::FileHeaderAlreadyWritten) - } - } - - /// Create an Avro header based on schema, codec and sync marker. - fn header(&self) -> Result, Error> { - let schema_bytes = serde_json::to_string(self.schema) - .map_err(Error::ConvertJsonToString)? - .into_bytes(); - - let mut metadata = HashMap::with_capacity(2); - metadata.insert("avro.schema", Value::Bytes(schema_bytes)); - metadata.insert("avro.codec", self.codec.into()); - match self.codec { - #[cfg(feature = "bzip")] - Codec::Bzip2(settings) => { - metadata.insert( - "avro.codec.compression_level", - Value::Bytes(vec![settings.compression_level]), - ); - } - #[cfg(feature = "xz")] - Codec::Xz(settings) => { - metadata.insert( - "avro.codec.compression_level", - Value::Bytes(vec![settings.compression_level]), - ); - } - #[cfg(feature = "zstandard")] - Codec::Zstandard(settings) => { - metadata.insert( - "avro.codec.compression_level", - Value::Bytes(vec![settings.compression_level]), - ); - } - _ => {} - } - - for (k, v) in &self.user_metadata { - metadata.insert(k.as_str(), v.clone()); - } - - let mut header = Vec::new(); - header.extend_from_slice(AVRO_OBJECT_HEADER); - encode(&metadata.into(), &Schema::map(Schema::Bytes), &mut header)?; - header.extend_from_slice(&self.marker); - - Ok(header) - } - - fn maybe_write_header(&mut self) -> AvroResult { - if !self.has_header { - let header = self.header()?; - let n = self.append_bytes(header.as_ref())?; - self.has_header = true; - Ok(n) - } else { - Ok(0) - } - } -} - -/// Encode a compatible value (implementing the `ToAvro` trait) into Avro format, also performing -/// schema validation. -/// -/// This is an internal function which gets the bytes buffer where to write as parameter instead of -/// creating a new one like `to_avro_datum`. -fn write_avro_datum>( - schema: &Schema, - value: T, - buffer: &mut Vec, -) -> Result<(), Error> { - let avro = value.into(); - if !avro.validate(schema) { - return Err(Error::Validation); - } - encode(&avro, schema, buffer)?; - Ok(()) -} - -fn write_avro_datum_schemata>( - schema: &Schema, - schemata: Vec<&Schema>, - value: T, - buffer: &mut Vec, -) -> AvroResult<()> { - let avro = value.into(); - let rs = ResolvedSchema::try_from(schemata)?; - let names = rs.get_names(); - let enclosing_namespace = schema.namespace(); - if let Some(_err) = avro.validate_internal(schema, names, &enclosing_namespace) { - return Err(Error::Validation); - } - encode_internal(&avro, schema, names, &enclosing_namespace, buffer) -} - -/// Writer that encodes messages according to the single object encoding v1 spec -/// Uses an API similar to the current File Writer -/// Writes all object bytes at once, and drains internal buffer -pub struct GenericSingleObjectWriter { - buffer: Vec, - resolved: ResolvedOwnedSchema, -} - -impl GenericSingleObjectWriter { - pub fn new_with_capacity( - schema: &Schema, - initial_buffer_cap: usize, - ) -> AvroResult { - let fingerprint = schema.fingerprint::(); - let mut buffer = Vec::with_capacity(initial_buffer_cap); - let header = [ - 0xC3, - 0x01, - fingerprint.bytes[0], - fingerprint.bytes[1], - fingerprint.bytes[2], - fingerprint.bytes[3], - fingerprint.bytes[4], - fingerprint.bytes[5], - fingerprint.bytes[6], - fingerprint.bytes[7], - ]; - buffer.extend_from_slice(&header); - - Ok(GenericSingleObjectWriter { - buffer, - resolved: ResolvedOwnedSchema::try_from(schema.clone())?, - }) - } - - /// Write the referenced Value to the provided Write object. Returns a result with the number of bytes written including the header - pub fn write_value_ref(&mut self, v: &Value, writer: &mut W) -> AvroResult { - if self.buffer.len() != 10 { - Err(Error::IllegalSingleObjectWriterState) - } else { - write_value_ref_owned_resolved(&self.resolved, v, &mut self.buffer)?; - writer.write_all(&self.buffer).map_err(Error::WriteBytes)?; - let len = self.buffer.len(); - self.buffer.truncate(10); - Ok(len) - } - } - - /// Write the Value to the provided Write object. Returns a result with the number of bytes written including the header - pub fn write_value(&mut self, v: Value, writer: &mut W) -> AvroResult { - self.write_value_ref(&v, writer) - } -} - -/// Writer that encodes messages according to the single object encoding v1 spec -pub struct SpecificSingleObjectWriter -where - T: AvroSchema, -{ - inner: GenericSingleObjectWriter, - _model: PhantomData, -} - -impl SpecificSingleObjectWriter -where - T: AvroSchema, -{ - pub fn with_capacity(buffer_cap: usize) -> AvroResult> { - let schema = T::get_schema(); - Ok(SpecificSingleObjectWriter { - inner: GenericSingleObjectWriter::new_with_capacity(&schema, buffer_cap)?, - _model: PhantomData, - }) - } -} - -impl SpecificSingleObjectWriter -where - T: AvroSchema + Into, -{ - /// Write the `Into` to the provided Write object. Returns a result with the number - /// of bytes written including the header - pub fn write_value(&mut self, data: T, writer: &mut W) -> AvroResult { - let v: Value = data.into(); - self.inner.write_value_ref(&v, writer) - } -} - -impl SpecificSingleObjectWriter -where - T: AvroSchema + Serialize, -{ - /// Write the referenced Serialize object to the provided Write object. Returns a result with - /// the number of bytes written including the header - pub fn write_ref(&mut self, data: &T, writer: &mut W) -> AvroResult { - let mut serializer = Serializer::default(); - let v = data.serialize(&mut serializer)?; - self.inner.write_value_ref(&v, writer) - } - - /// Write the Serialize object to the provided Write object. Returns a result with the number - /// of bytes written including the header - pub fn write(&mut self, data: T, writer: &mut W) -> AvroResult { - self.write_ref(&data, writer) - } -} - -fn write_value_ref_resolved( - schema: &Schema, - resolved_schema: &ResolvedSchema, - value: &Value, - buffer: &mut Vec, -) -> AvroResult<()> { - match value.validate_internal(schema, resolved_schema.get_names(), &schema.namespace()) { - Some(reason) => Err(Error::ValidationWithReason { - value: value.clone(), - schema: schema.clone(), - reason, - }), - None => encode_internal( - value, - schema, - resolved_schema.get_names(), - &schema.namespace(), - buffer, - ), - } -} - -fn write_value_ref_owned_resolved( - resolved_schema: &ResolvedOwnedSchema, - value: &Value, - buffer: &mut Vec, -) -> AvroResult<()> { - let root_schema = resolved_schema.get_root_schema(); - if let Some(reason) = value.validate_internal( - root_schema, - resolved_schema.get_names(), - &root_schema.namespace(), - ) { - return Err(Error::ValidationWithReason { - value: value.clone(), - schema: root_schema.clone(), - reason, - }); - } - encode_internal( - value, - root_schema, - resolved_schema.get_names(), - &root_schema.namespace(), - buffer, - )?; - Ok(()) -} - -/// Encode a compatible value (implementing the `ToAvro` trait) into Avro format, also -/// performing schema validation. -/// -/// **NOTE** This function has a quite small niche of usage and does NOT generate headers and sync -/// markers; use [`Writer`](struct.Writer.html) to be fully Avro-compatible if you don't know what -/// you are doing, instead. -pub fn to_avro_datum>(schema: &Schema, value: T) -> AvroResult> { - let mut buffer = Vec::new(); - write_avro_datum(schema, value, &mut buffer)?; - Ok(buffer) -} - -/// Encode a compatible value (implementing the `ToAvro` trait) into Avro format, also -/// performing schema validation. -/// If the provided `schema` is incomplete then its dependencies must be -/// provided in `schemata` -pub fn to_avro_datum_schemata>( - schema: &Schema, - schemata: Vec<&Schema>, - value: T, -) -> AvroResult> { - let mut buffer = Vec::new(); - write_avro_datum_schemata(schema, schemata, value, &mut buffer)?; - Ok(buffer) -} - -#[cfg(not(target_arch = "wasm32"))] -fn generate_sync_marker() -> [u8; 16] { - let mut marker = [0_u8; 16]; - std::iter::repeat_with(rand::random) - .take(16) - .enumerate() - .for_each(|(i, n)| marker[i] = n); - marker -} - -#[cfg(target_arch = "wasm32")] -fn generate_sync_marker() -> [u8; 16] { - let mut marker = [0_u8; 16]; - std::iter::repeat_with(quad_rand::rand) - .take(4) - .flat_map(|i| i.to_be_bytes()) - .enumerate() - .for_each(|(i, n)| marker[i] = n); - marker -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - decimal::Decimal, - duration::{Days, Duration, Millis, Months}, - schema::{DecimalSchema, FixedSchema, Name}, - types::Record, - util::zig_i64, - Reader, - }; - use pretty_assertions::assert_eq; - use serde::{Deserialize, Serialize}; - - use apache_avro_test_helper::TestResult; - - const AVRO_OBJECT_HEADER_LEN: usize = AVRO_OBJECT_HEADER.len(); - - const SCHEMA: &str = r#" - { - "type": "record", - "name": "test", - "fields": [ - { - "name": "a", - "type": "long", - "default": 42 - }, - { - "name": "b", - "type": "string" - } - ] - } - "#; - const UNION_SCHEMA: &str = r#"["null", "long"]"#; - - #[test] - fn test_to_avro_datum() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - - let mut expected = Vec::new(); - zig_i64(27, &mut expected); - zig_i64(3, &mut expected); - expected.extend([b'f', b'o', b'o']); - - assert_eq!(to_avro_datum(&schema, record)?, expected); - - Ok(()) - } - - #[test] - fn test_union_not_null() -> TestResult { - let schema = Schema::parse_str(UNION_SCHEMA)?; - let union = Value::Union(1, Box::new(Value::Long(3))); - - let mut expected = Vec::new(); - zig_i64(1, &mut expected); - zig_i64(3, &mut expected); - - assert_eq!(to_avro_datum(&schema, union)?, expected); - - Ok(()) - } - - #[test] - fn test_union_null() -> TestResult { - let schema = Schema::parse_str(UNION_SCHEMA)?; - let union = Value::Union(0, Box::new(Value::Null)); - - let mut expected = Vec::new(); - zig_i64(0, &mut expected); - - assert_eq!(to_avro_datum(&schema, union)?, expected); - - Ok(()) - } - - fn logical_type_test + Clone>( - schema_str: &'static str, - - expected_schema: &Schema, - value: Value, - - raw_schema: &Schema, - raw_value: T, - ) -> TestResult { - let schema = Schema::parse_str(schema_str)?; - assert_eq!(&schema, expected_schema); - // The serialized format should be the same as the schema. - let ser = to_avro_datum(&schema, value.clone())?; - let raw_ser = to_avro_datum(raw_schema, raw_value)?; - assert_eq!(ser, raw_ser); - - // Should deserialize from the schema into the logical type. - let mut r = ser.as_slice(); - let de = crate::from_avro_datum(&schema, &mut r, None)?; - assert_eq!(de, value); - Ok(()) - } - - #[test] - fn date() -> TestResult { - logical_type_test( - r#"{"type": "int", "logicalType": "date"}"#, - &Schema::Date, - Value::Date(1_i32), - &Schema::Int, - 1_i32, - ) - } - - #[test] - fn time_millis() -> TestResult { - logical_type_test( - r#"{"type": "int", "logicalType": "time-millis"}"#, - &Schema::TimeMillis, - Value::TimeMillis(1_i32), - &Schema::Int, - 1_i32, - ) - } - - #[test] - fn time_micros() -> TestResult { - logical_type_test( - r#"{"type": "long", "logicalType": "time-micros"}"#, - &Schema::TimeMicros, - Value::TimeMicros(1_i64), - &Schema::Long, - 1_i64, - ) - } - - #[test] - fn timestamp_millis() -> TestResult { - logical_type_test( - r#"{"type": "long", "logicalType": "timestamp-millis"}"#, - &Schema::TimestampMillis, - Value::TimestampMillis(1_i64), - &Schema::Long, - 1_i64, - ) - } - - #[test] - fn timestamp_micros() -> TestResult { - logical_type_test( - r#"{"type": "long", "logicalType": "timestamp-micros"}"#, - &Schema::TimestampMicros, - Value::TimestampMicros(1_i64), - &Schema::Long, - 1_i64, - ) - } - - #[test] - fn decimal_fixed() -> TestResult { - let size = 30; - let inner = Schema::Fixed(FixedSchema { - name: Name::new("decimal")?, - aliases: None, - doc: None, - size, - default: None, - attributes: Default::default(), - }); - let value = vec![0u8; size]; - logical_type_test( - r#"{"type": {"type": "fixed", "size": 30, "name": "decimal"}, "logicalType": "decimal", "precision": 20, "scale": 5}"#, - &Schema::Decimal(DecimalSchema { - precision: 20, - scale: 5, - inner: Box::new(inner.clone()), - }), - Value::Decimal(Decimal::from(value.clone())), - &inner, - Value::Fixed(size, value), - ) - } - - #[test] - fn decimal_bytes() -> TestResult { - let inner = Schema::Bytes; - let value = vec![0u8; 10]; - logical_type_test( - r#"{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 3}"#, - &Schema::Decimal(DecimalSchema { - precision: 4, - scale: 3, - inner: Box::new(inner.clone()), - }), - Value::Decimal(Decimal::from(value.clone())), - &inner, - value, - ) - } - - #[test] - fn duration() -> TestResult { - let inner = Schema::Fixed(FixedSchema { - name: Name::new("duration")?, - aliases: None, - doc: None, - size: 12, - default: None, - attributes: Default::default(), - }); - let value = Value::Duration(Duration::new( - Months::new(256), - Days::new(512), - Millis::new(1024), - )); - logical_type_test( - r#"{"type": {"type": "fixed", "name": "duration", "size": 12}, "logicalType": "duration"}"#, - &Schema::Duration, - value, - &inner, - Value::Fixed(12, vec![0, 1, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0]), - ) - } - - #[test] - fn test_writer_append() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - - let n1 = writer.append(record.clone())?; - let n2 = writer.append(record.clone())?; - let n3 = writer.flush()?; - let result = writer.into_inner()?; - - assert_eq!(n1 + n2 + n3, result.len()); - - let mut data = Vec::new(); - zig_i64(27, &mut data); - zig_i64(3, &mut data); - data.extend(b"foo"); - data.extend(data.clone()); - - // starts with magic - assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); - // ends with data and sync marker - let last_data_byte = result.len() - 16; - assert_eq!( - &result[last_data_byte - data.len()..last_data_byte], - data.as_slice() - ); - - Ok(()) - } - - #[test] - fn test_writer_extend() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - let record_copy = record.clone(); - let records = vec![record, record_copy]; - - let n1 = writer.extend(records)?; - let n2 = writer.flush()?; - let result = writer.into_inner()?; - - assert_eq!(n1 + n2, result.len()); - - let mut data = Vec::new(); - zig_i64(27, &mut data); - zig_i64(3, &mut data); - data.extend(b"foo"); - data.extend(data.clone()); - - // starts with magic - assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); - // ends with data and sync marker - let last_data_byte = result.len() - 16; - assert_eq!( - &result[last_data_byte - data.len()..last_data_byte], - data.as_slice() - ); - - Ok(()) - } - - #[derive(Debug, Clone, Deserialize, Serialize)] - struct TestSerdeSerialize { - a: i64, - b: String, - } - - #[test] - fn test_writer_append_ser() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - let record = TestSerdeSerialize { - a: 27, - b: "foo".to_owned(), - }; - - let n1 = writer.append_ser(record)?; - let n2 = writer.flush()?; - let result = writer.into_inner()?; - - assert_eq!(n1 + n2, result.len()); - - let mut data = Vec::new(); - zig_i64(27, &mut data); - zig_i64(3, &mut data); - data.extend(b"foo"); - - // starts with magic - assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); - // ends with data and sync marker - let last_data_byte = result.len() - 16; - assert_eq!( - &result[last_data_byte - data.len()..last_data_byte], - data.as_slice() - ); - - Ok(()) - } - - #[test] - fn test_writer_extend_ser() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - let record = TestSerdeSerialize { - a: 27, - b: "foo".to_owned(), - }; - let record_copy = record.clone(); - let records = vec![record, record_copy]; - - let n1 = writer.extend_ser(records)?; - let n2 = writer.flush()?; - let result = writer.into_inner()?; - - assert_eq!(n1 + n2, result.len()); - - let mut data = Vec::new(); - zig_i64(27, &mut data); - zig_i64(3, &mut data); - data.extend(b"foo"); - data.extend(data.clone()); - - // starts with magic - assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); - // ends with data and sync marker - let last_data_byte = result.len() - 16; - assert_eq!( - &result[last_data_byte - data.len()..last_data_byte], - data.as_slice() - ); - - Ok(()) - } - - fn make_writer_with_codec(schema: &Schema) -> Writer<'_, Vec> { - Writer::with_codec(schema, Vec::new(), Codec::Deflate) - } - - fn make_writer_with_builder(schema: &Schema) -> Writer<'_, Vec> { - Writer::builder() - .writer(Vec::new()) - .schema(schema) - .codec(Codec::Deflate) - .block_size(100) - .build() - } - - fn check_writer(mut writer: Writer<'_, Vec>, schema: &Schema) -> TestResult { - let mut record = Record::new(schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - - let n1 = writer.append(record.clone())?; - let n2 = writer.append(record.clone())?; - let n3 = writer.flush()?; - let result = writer.into_inner()?; - - assert_eq!(n1 + n2 + n3, result.len()); - - let mut data = Vec::new(); - zig_i64(27, &mut data); - zig_i64(3, &mut data); - data.extend(b"foo"); - data.extend(data.clone()); - Codec::Deflate.compress(&mut data)?; - - // starts with magic - assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); - // ends with data and sync marker - let last_data_byte = result.len() - 16; - assert_eq!( - &result[last_data_byte - data.len()..last_data_byte], - data.as_slice() - ); - - Ok(()) - } - - #[test] - fn test_writer_with_codec() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let writer = make_writer_with_codec(&schema); - check_writer(writer, &schema) - } - - #[test] - fn test_writer_with_builder() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let writer = make_writer_with_builder(&schema); - check_writer(writer, &schema) - } - - #[test] - fn test_logical_writer() -> TestResult { - const LOGICAL_TYPE_SCHEMA: &str = r#" - { - "type": "record", - "name": "logical_type_test", - "fields": [ - { - "name": "a", - "type": [ - "null", - { - "type": "long", - "logicalType": "timestamp-micros" - } - ] - } - ] - } - "#; - let codec = Codec::Deflate; - let schema = Schema::parse_str(LOGICAL_TYPE_SCHEMA)?; - let mut writer = Writer::builder() - .schema(&schema) - .codec(codec) - .writer(Vec::new()) - .build(); - - let mut record1 = Record::new(&schema).unwrap(); - record1.put( - "a", - Value::Union(1, Box::new(Value::TimestampMicros(1234_i64))), - ); - - let mut record2 = Record::new(&schema).unwrap(); - record2.put("a", Value::Union(0, Box::new(Value::Null))); - - let n1 = writer.append(record1)?; - let n2 = writer.append(record2)?; - let n3 = writer.flush()?; - let result = writer.into_inner()?; - - assert_eq!(n1 + n2 + n3, result.len()); - - let mut data = Vec::new(); - // byte indicating not null - zig_i64(1, &mut data); - zig_i64(1234, &mut data); - - // byte indicating null - zig_i64(0, &mut data); - codec.compress(&mut data)?; - - // starts with magic - assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); - // ends with data and sync marker - let last_data_byte = result.len() - 16; - assert_eq!( - &result[last_data_byte - data.len()..last_data_byte], - data.as_slice() - ); - - Ok(()) - } - - #[test] - fn test_avro_3405_writer_add_metadata_success() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - writer.add_user_metadata("stringKey".to_string(), String::from("stringValue"))?; - writer.add_user_metadata("strKey".to_string(), "strValue")?; - writer.add_user_metadata("bytesKey".to_string(), b"bytesValue")?; - writer.add_user_metadata("vecKey".to_string(), vec![1, 2, 3])?; - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - - writer.append(record.clone())?; - writer.append(record.clone())?; - writer.flush()?; - let result = writer.into_inner()?; - - assert_eq!(result.len(), 260); - - Ok(()) - } - - #[test] - fn test_avro_3881_metadata_empty_body() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - writer.add_user_metadata("a".to_string(), "b")?; - let result = writer.into_inner()?; - - let reader = Reader::with_schema(&schema, &result[..])?; - let mut expected = HashMap::new(); - expected.insert("a".to_string(), vec![b'b']); - assert_eq!(reader.user_metadata(), &expected); - assert_eq!(reader.into_iter().count(), 0); - - Ok(()) - } - - #[test] - fn test_avro_3405_writer_add_metadata_failure() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - writer.append(record.clone())?; - - match writer.add_user_metadata("stringKey".to_string(), String::from("value2")) { - Err(e @ Error::FileHeaderAlreadyWritten) => { - assert_eq!(e.to_string(), "The file metadata is already flushed.") - } - Err(e) => panic!("Unexpected error occurred while writing user metadata: {e:?}"), - Ok(_) => panic!("Expected an error that metadata cannot be added after adding data"), - } - - Ok(()) - } - - #[test] - fn test_avro_3405_writer_add_metadata_reserved_prefix_failure() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - let key = "avro.stringKey".to_string(); - match writer.add_user_metadata(key.clone(), "value") { - Err(ref e @ Error::InvalidMetadataKey(_)) => { - assert_eq!(e.to_string(), format!("Metadata keys starting with 'avro.' are reserved for internal usage: {key}.")) - } - Err(e) => panic!( - "Unexpected error occurred while writing user metadata with reserved prefix ('avro.'): {e:?}" - ), - Ok(_) => panic!("Expected an error that the metadata key cannot be prefixed with 'avro.'"), - } - - Ok(()) - } - - #[test] - fn test_avro_3405_writer_add_metadata_with_builder_api_success() -> TestResult { - let schema = Schema::parse_str(SCHEMA)?; - - let mut user_meta_data: HashMap = HashMap::new(); - user_meta_data.insert( - "stringKey".to_string(), - Value::String("stringValue".to_string()), - ); - user_meta_data.insert("bytesKey".to_string(), Value::Bytes(b"bytesValue".to_vec())); - user_meta_data.insert("vecKey".to_string(), Value::Bytes(vec![1, 2, 3])); - - let writer: Writer<'_, Vec> = Writer::builder() - .writer(Vec::new()) - .schema(&schema) - .user_metadata(user_meta_data.clone()) - .build(); - - assert_eq!(writer.user_metadata, user_meta_data); - - Ok(()) - } - - #[derive(Serialize, Clone)] - struct TestSingleObjectWriter { - a: i64, - b: f64, - c: Vec, - } - - impl AvroSchema for TestSingleObjectWriter { - fn get_schema() -> Schema { - let schema = r#" - { - "type":"record", - "name":"TestSingleObjectWrtierSerialize", - "fields":[ - { - "name":"a", - "type":"long" - }, - { - "name":"b", - "type":"double" - }, - { - "name":"c", - "type":{ - "type":"array", - "items":"string" - } - } - ] - } - "#; - Schema::parse_str(schema).unwrap() - } - } - - impl From for Value { - fn from(obj: TestSingleObjectWriter) -> Value { - Value::Record(vec![ - ("a".into(), obj.a.into()), - ("b".into(), obj.b.into()), - ( - "c".into(), - Value::Array(obj.c.into_iter().map(|s| s.into()).collect()), - ), - ]) - } - } - - #[test] - fn test_single_object_writer() -> TestResult { - let mut buf: Vec = Vec::new(); - let obj = TestSingleObjectWriter { - a: 300, - b: 34.555, - c: vec!["cat".into(), "dog".into()], - }; - let mut writer = GenericSingleObjectWriter::new_with_capacity( - &TestSingleObjectWriter::get_schema(), - 1024, - ) - .expect("Should resolve schema"); - let value = obj.into(); - let written_bytes = writer - .write_value_ref(&value, &mut buf) - .expect("Error serializing properly"); - - assert!(buf.len() > 10, "no bytes written"); - assert_eq!(buf.len(), written_bytes); - assert_eq!(buf[0], 0xC3); - assert_eq!(buf[1], 0x01); - assert_eq!( - &buf[2..10], - &TestSingleObjectWriter::get_schema() - .fingerprint::() - .bytes[..] - ); - let mut msg_binary = Vec::new(); - encode( - &value, - &TestSingleObjectWriter::get_schema(), - &mut msg_binary, - ) - .expect("encode should have failed by here as a dependency of any writing"); - assert_eq!(&buf[10..], &msg_binary[..]); - - Ok(()) - } - - #[test] - fn test_writer_parity() -> TestResult { - let obj1 = TestSingleObjectWriter { - a: 300, - b: 34.555, - c: vec!["cat".into(), "dog".into()], - }; - - let mut buf1: Vec = Vec::new(); - let mut buf2: Vec = Vec::new(); - let mut buf3: Vec = Vec::new(); - - let mut generic_writer = GenericSingleObjectWriter::new_with_capacity( - &TestSingleObjectWriter::get_schema(), - 1024, - ) - .expect("Should resolve schema"); - let mut specific_writer = - SpecificSingleObjectWriter::::with_capacity(1024) - .expect("Resolved should pass"); - specific_writer - .write(obj1.clone(), &mut buf1) - .expect("Serialization expected"); - specific_writer - .write_value(obj1.clone(), &mut buf2) - .expect("Serialization expected"); - generic_writer - .write_value(obj1.into(), &mut buf3) - .expect("Serialization expected"); - assert_eq!(buf1, buf2); - assert_eq!(buf1, buf3); - - Ok(()) - } - - #[test] - fn avro_3894_take_aliases_into_account_when_serializing() -> TestResult { - const SCHEMA: &str = r#" - { - "type": "record", - "name": "Conference", - "fields": [ - {"type": "string", "name": "name"}, - {"type": ["null", "long"], "name": "date", "aliases" : [ "time2", "time" ]} - ] - }"#; - - #[derive(Debug, PartialEq, Eq, Clone, Serialize)] - pub struct Conference { - pub name: String, - pub time: Option, - } - - let conf = Conference { - name: "RustConf".to_string(), - time: Some(1234567890), - }; - - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - let bytes = writer.append_ser(conf)?; - - assert_eq!(198, bytes); - - Ok(()) - } - - #[test] - fn avro_4014_validation_returns_a_detailed_error() -> TestResult { - const SCHEMA: &str = r#" - { - "type": "record", - "name": "Conference", - "fields": [ - {"type": "string", "name": "name"}, - {"type": ["null", "long"], "name": "date", "aliases" : [ "time2", "time" ]} - ] - }"#; - - #[derive(Debug, PartialEq, Clone, Serialize)] - pub struct Conference { - pub name: String, - pub time: Option, // wrong type: f64 instead of i64 - } - - let conf = Conference { - name: "RustConf".to_string(), - time: Some(12345678.90), - }; - - let schema = Schema::parse_str(SCHEMA)?; - let mut writer = Writer::new(&schema, Vec::new()); - - match writer.append_ser(conf) { - Ok(bytes) => panic!("Expected an error, but got {} bytes written", bytes), - Err(e) => { - assert_eq!( - e.to_string(), - r#"Value Record([("name", String("RustConf")), ("time", Union(1, Double(12345678.9)))]) does not match schema Record(RecordSchema { name: Name { name: "Conference", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "name", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "date", doc: None, aliases: Some(["time2", "time"]), default: None, schema: Union(UnionSchema { schemas: [Null, Long], variant_index: {Null: 0, Long: 1} }), order: Ascending, position: 1, custom_attributes: {} }], lookup: {"date": 1, "name": 0, "time": 1, "time2": 1}, attributes: {} }): Reason: Unsupported value-schema combination! Value: Double(12345678.9), schema: Long"# - ); - } - } - Ok(()) - } -} diff --git a/lang/rust/avro/tests/append_to_existing.rs b/lang/rust/avro/tests/append_to_existing.rs deleted file mode 100644 index d378ad68416..00000000000 --- a/lang/rust/avro/tests/append_to_existing.rs +++ /dev/null @@ -1,111 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - read_marker, - types::{Record, Value}, - AvroResult, Reader, Schema, Writer, -}; -use apache_avro_test_helper::TestResult; - -const SCHEMA: &str = r#"{ - "type": "record", - "name": "append_to_existing_file", - "fields": [ - {"name": "a", "type": "int"} - ] -}"#; - -#[test] -fn avro_3630_append_to_an_existing_file() -> TestResult { - let schema = Schema::parse_str(SCHEMA).expect("Cannot parse the schema"); - - let bytes = get_avro_bytes(&schema); - - let marker = read_marker(&bytes[..]); - - let mut writer = Writer::append_to(&schema, bytes, marker); - - writer - .append(create_datum(&schema, 2)) - .expect("An error occurred while appending more data"); - - let new_bytes = writer.into_inner().expect("Cannot get the new bytes"); - - let reader = Reader::new(&*new_bytes).expect("Cannot read the new bytes"); - let mut i = 1; - for value in reader { - check(&value, i); - i += 1 - } - - Ok(()) -} - -#[test] -fn avro_4031_append_to_file_using_multiple_writers() -> TestResult { - let schema = Schema::parse_str(SCHEMA).expect("Cannot parse the schema"); - - let mut first_writer = Writer::builder().schema(&schema).writer(Vec::new()).build(); - first_writer.append(create_datum(&schema, -42))?; - let mut resulting_bytes = first_writer.into_inner()?; - let first_marker = read_marker(&resulting_bytes); - - let mut second_writer = Writer::builder() - .schema(&schema) - .has_header(true) - .marker(first_marker) - .writer(Vec::new()) - .build(); - second_writer.append(create_datum(&schema, 42))?; - resulting_bytes.append(&mut second_writer.into_inner()?); - - let values: Vec<_> = Reader::new(&resulting_bytes[..])?.collect(); - check(&values[0], -42); - check(&values[1], 42); - Ok(()) -} - -/// Simulates reading from a pre-existing .avro file and returns its bytes -fn get_avro_bytes(schema: &Schema) -> Vec { - let mut writer = Writer::new(schema, Vec::new()); - writer - .append(create_datum(schema, 1)) - .expect("An error while appending data"); - writer.into_inner().expect("Cannot get the Avro bytes") -} - -/// Creates a new datum to write -fn create_datum(schema: &Schema, value: i32) -> Record { - let mut datum = Record::new(schema).unwrap(); - datum.put("a", value); - datum -} - -/// Checks the read values -fn check(value: &AvroResult, expected: i32) { - match value { - Ok(value) => match value { - Value::Record(fields) => match &fields[0] { - (_, Value::Int(actual)) => assert_eq!(&expected, actual), - _ => panic!("The field value type must be an Int: {:?}!", &fields[0]), - }, - _ => panic!("The value type must be a Record: {value:?}!"), - }, - Err(e) => panic!("Error while reading the data: {e:?}"), - } -} diff --git a/lang/rust/avro/tests/avro-3786.rs b/lang/rust/avro/tests/avro-3786.rs deleted file mode 100644 index a0e7714f50a..00000000000 --- a/lang/rust/avro/tests/avro-3786.rs +++ /dev/null @@ -1,886 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{from_avro_datum, to_avro_datum, to_value, types, Schema}; -use apache_avro_test_helper::TestResult; - -#[test] -fn avro_3786_deserialize_union_with_different_enum_order() -> TestResult { - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct BarUseParent { - #[serde(rename = "barUse")] - pub bar_use: Bar, - } - - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barInit")] - pub bar_init: Bar, - #[serde(rename = "barUseParent")] - pub bar_use_parent: Option, - } - - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ], - "default": "bar0" - } - }, - { - "name": "barUseParent", - "type": [ - "null", - { - "type": "record", - "name": "BarUseParent", - "fields": [ - { - "name": "barUse", - "type": "Bar" - } - ] - } - ] - } - ] - }"#; - - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar1" - ], - "default": "bar1" - } - }, - { - "name": "barUseParent", - "type": [ - "null", - { - "type": "record", - "name": "BarUseParent", - "fields": [ - { - "name": "barUse", - "type": "Bar" - } - ] - } - ] - } - ] - }"#; - - let writer_schema = Schema::parse_str(writer_schema)?; - let foo1 = Foo { - bar_init: Bar::Bar1, - bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar1 }), - }; - let avro_value = to_value(foo1)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 2); - assert_eq!(fields[0].0, "barInit"); - assert_eq!(fields[0].1, types::Value::Enum(0, "bar1".to_string())); - assert_eq!(fields[1].0, "barUseParent"); - assert_eq!( - fields[1].1, - types::Value::Union( - 1, - Box::new(types::Value::Record(vec![( - "barUse".to_string(), - types::Value::Enum(0, "bar1".to_string()) - )])) - ) - ); - } - _ => panic!("Expected Value::Record"), - } - Ok(()) -} - -#[test] -fn avro_3786_deserialize_union_with_different_enum_order_defined_in_record() -> TestResult { - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct BarParent { - pub bar: Bar, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barParent")] - pub bar_parent: Option, - } - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "name": "bar", - "type": { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ], - "default": "bar0" - } - } - ] - } - ] - } - ] - }"#; - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "name": "bar", - "type": { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar2" - ], - "default": "bar0" - } - } - ] - } - ] - } - ] - }"#; - let writer_schema = Schema::parse_str(writer_schema)?; - let foo1 = Foo { - bar_parent: Some(BarParent { bar: Bar::Bar0 }), - }; - let avro_value = to_value(foo1)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 1); - assert_eq!(fields[0].0, "barParent"); - // TODO: better validation - } - _ => panic!("Expected Value::Record"), - } - Ok(()) -} - -#[test] -fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v1() -> TestResult { - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct BarParent { - pub bar: Bar, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barParent")] - pub bar_parent: Option, - } - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "name": "bar", - "type": { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ], - "default": "bar0" - } - } - ] - } - ] - } - ] - }"#; - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "name": "bar", - "type": { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar2" - ], - "default": "bar0" - } - } - ] - } - ] - } - ] - }"#; - let writer_schema = Schema::parse_str(writer_schema)?; - let foo1 = Foo { - bar_parent: Some(BarParent { bar: Bar::Bar1 }), - }; - let avro_value = to_value(foo1)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 1); - assert_eq!(fields[0].0, "barParent"); - // TODO: better validation - } - _ => panic!("Expected Value::Record"), - } - Ok(()) -} - -#[test] -fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v2() -> TestResult { - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct BarParent { - pub bar: Bar, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barParent")] - pub bar_parent: Option, - } - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "name": "bar", - "type": { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ], - "default": "bar2" - } - } - ] - } - ] - } - ] - }"#; - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "name": "bar", - "type": { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar1", - "bar2" - ], - "default": "bar2" - } - } - ] - } - ] - } - ] - }"#; - let writer_schema = Schema::parse_str(writer_schema)?; - let foo1 = Foo { - bar_parent: Some(BarParent { bar: Bar::Bar1 }), - }; - let avro_value = to_value(foo1)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 1); - assert_eq!(fields[0].0, "barParent"); - // TODO: better validation - } - _ => panic!("Expected Value::Record"), - } - Ok(()) -} - -#[test] -fn deserialize_union_with_different_enum_order_defined_in_record() -> TestResult { - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct BarParent { - pub bar: Bar, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barParent")] - pub bar_parent: Option, - } - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "name": "bar", - "type": { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ], - "default": "bar0" - } - } - ] - } - ] - } - ] - }"#; - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "name": "bar", - "type": { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar2" - ], - "default": "bar0" - } - } - ] - } - ] - } - ] - }"#; - let writer_schema = Schema::parse_str(writer_schema)?; - let foo1 = Foo { - bar_parent: Some(BarParent { bar: Bar::Bar2 }), - }; - let avro_value = to_value(foo1)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 1); - assert_eq!(fields[0].0, "barParent"); - // TODO: better validation - } - _ => panic!("Expected Value::Record"), - } - Ok(()) -} - -#[test] -fn deserialize_union_with_record_with_enum_defined_inline_reader_has_different_indices( -) -> TestResult { - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum DefinedInRecord { - #[serde(rename = "val0")] - Val0, - #[serde(rename = "val1")] - Val1, - #[serde(rename = "val2")] - Val2, - #[serde(rename = "UNKNOWN")] - Unknown, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Parent { - pub date: i64, - #[serde(rename = "barUse")] - pub bar_use: Bar, - #[serde(rename = "bazUse")] - pub baz_use: Option>, - #[serde(rename = "definedInRecord")] - pub defined_in_record: DefinedInRecord, - #[serde(rename = "optionalString")] - pub optional_string: Option, - } - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Baz { - #[serde(rename = "baz0")] - Baz0, - #[serde(rename = "baz1")] - Baz1, - #[serde(rename = "baz2")] - Baz2, - } - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barInit")] - pub bar_init: Bar, - pub baz: Baz, - pub parent: Option, - } - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "fake", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ], - "default": "bar0" - } - }, - { - "name": "baz", - "type": - { - "type": "enum", - "name": "Baz", - "symbols": - [ - "baz0", - "baz1", - "baz2" - ], - "default": "baz0" - } - }, - { - "name": "parent", - "type": [ - "null", - { - "type": "record", - "name": "Parent", - "fields": [ - { - "name": "date", - "type": { - "type": "long", - "avro.java.long": "Long" - } - }, - { - "name": "barUse", - "type": "Bar" - }, - { - "name": "bazUse", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "Baz" - } - } - ] - }, - { - "name": "definedInRecord", - "type": { - "name": "DefinedInRecord", - "type": "enum", - "symbols": [ - "val0", - "val1", - "val2", - "UNKNOWN" - ], - "default": "UNKNOWN" - } - }, - { - "name": "optionalString", - "type": [ - "null", - "string" - ] - } - ] - } - ] - } - ] - }"#; - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "fake", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar2" - ], - "default": "bar0" - } - }, - { - "name": "baz", - "type": - { - "type": "enum", - "name": "Baz", - "symbols": - [ - "baz0", - "baz2" - ], - "default": "baz0" - } - }, - { - "name": "parent", - "type": [ - "null", - { - "type": "record", - "name": "Parent", - "fields": [ - { - "name": "date", - "type": { - "type": "long", - "avro.java.long": "Long" - } - }, - { - "name": "barUse", - "type": "Bar" - }, - { - "name": "bazUse", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "Baz" - } - } - ] - }, - { - "name": "definedInRecord", - "type": { - "name": "DefinedInRecord", - "type": "enum", - "symbols": [ - "val1", - "val2", - "UNKNOWN" - ], - "default": "UNKNOWN" - } - }, - { - "name": "optionalString", - "type": [ - "null", - "string" - ] - } - ] - } - ] - } - ] - }"#; - let writer_schema = Schema::parse_str(writer_schema)?; - let foo1 = Foo { - bar_init: Bar::Bar0, - baz: Baz::Baz0, - parent: Some(Parent { - bar_use: Bar::Bar0, - baz_use: Some(vec![Baz::Baz0]), - optional_string: Some("test".to_string()), - date: 1689197893, - defined_in_record: DefinedInRecord::Val1, - }), - }; - let avro_value = to_value(foo1)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 3); - assert_eq!(fields[0].0, "barInit"); - assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); - // TODO: better validation - } - _ => panic!("Expected Value::Record"), - } - Ok(()) -} diff --git a/lang/rust/avro/tests/avro-3787.rs b/lang/rust/avro/tests/avro-3787.rs deleted file mode 100644 index c08c3c6cce8..00000000000 --- a/lang/rust/avro/tests/avro-3787.rs +++ /dev/null @@ -1,279 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{from_avro_datum, to_avro_datum, to_value, types, Schema}; -use apache_avro_test_helper::TestResult; - -#[test] -fn avro_3787_deserialize_union_with_unknown_symbol() -> TestResult { - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct BarUseParent { - #[serde(rename = "barUse")] - pub bar_use: Bar, - } - - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barInit")] - pub bar_init: Bar, - #[serde(rename = "barUseParent")] - pub bar_use_parent: Option, - } - - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ], - "default": "bar0" - } - }, - { - "name": "barUseParent", - "type": [ - "null", - { - "type": "record", - "name": "BarUseParent", - "fields": [ - { - "name": "barUse", - "type": "Bar" - } - ] - } - ] - } - ] - }"#; - - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1" - ], - "default": "bar0" - } - }, - { - "name": "barUseParent", - "type": [ - "null", - { - "type": "record", - "name": "BarUseParent", - "fields": [ - { - "name": "barUse", - "type": "Bar" - } - ] - } - ] - } - ] - }"#; - - let writer_schema = Schema::parse_str(writer_schema)?; - let foo1 = Foo { - bar_init: Bar::Bar1, - bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar2 }), - }; - let avro_value = to_value(foo1)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 2); - assert_eq!(fields[0].0, "barInit"); - assert_eq!(fields[0].1, types::Value::Enum(1, "bar1".to_string())); - assert_eq!(fields[1].0, "barUseParent"); - // TODO: test value - } - _ => panic!("Expected Value::Record"), - } - - Ok(()) -} - -#[test] -fn avro_3787_deserialize_union_with_unknown_symbol_no_ref() -> TestResult { - #[derive( - Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, - )] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - #[serde(default)] - pub struct BarParent { - #[serde(rename = "Bar")] - pub bar: Bar, - } - - #[inline(always)] - fn default_barparent_bar() -> Bar { - Bar::Bar0 - } - impl Default for BarParent { - fn default() -> BarParent { - BarParent { - bar: default_barparent_bar(), - } - } - } - - #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] - pub struct Foo { - #[serde(rename = "barParent")] - pub bar_parent: Option, - } - - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ], - "default": "bar0" - } - ] - } - ] - } - ] - }"#; - - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "fields": - [ - { - "name": "barParent", - "type": [ - "null", - { - "type": "record", - "name": "BarParent", - "fields": [ - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1" - ], - "default": "bar0" - } - ] - } - ] - } - ] - }"#; - - let writer_schema = Schema::parse_str(writer_schema)?; - let foo2 = Foo { - bar_parent: Some(BarParent { bar: Bar::Bar2 }), - }; - let avro_value = to_value(foo2)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - types::Value::Record(fields) => { - assert_eq!(fields.len(), 1); - // assert_eq!(fields[0].0, "barInit"); - // assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); - assert_eq!(fields[0].0, "barParent"); - // assert_eq!(fields[1].1, types::Value::Enum(1, "bar1".to_string())); - } - _ => panic!("Expected Value::Record"), - } - - Ok(()) -} diff --git a/lang/rust/avro/tests/big_decimal.rs b/lang/rust/avro/tests/big_decimal.rs deleted file mode 100644 index b05247103ce..00000000000 --- a/lang/rust/avro/tests/big_decimal.rs +++ /dev/null @@ -1,23 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::BigDecimal; - -#[test] -fn avro_3948_use_apache_avro_big_decimal() { - let _ = BigDecimal::from(1234567890123456789_i64); -} diff --git a/lang/rust/avro/tests/bigdec.avro b/lang/rust/avro/tests/bigdec.avro deleted file mode 100644 index 641db7e2a47553371ce17679e3ad14d6be08e1e1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 189 zcmeZI%3@>^ODrqO*DFrWNX<=bVX9UtsVqoUvQjEaP0lY$QPNS$OUy;#r{pICr9q^Q zQd(wePD-(oRdh8>8CWD9tVaiG6x_(9%97M#pst+!^vvYMoDhgbNtx-oDXGbsxrsSS kwY9Oe49_Y)-d~aba?y@Qi|4#zbx~pB;$UHTz=AFe0E!w$Q2+n{ diff --git a/lang/rust/avro/tests/codecs.rs b/lang/rust/avro/tests/codecs.rs deleted file mode 100644 index 5017d338966..00000000000 --- a/lang/rust/avro/tests/codecs.rs +++ /dev/null @@ -1,89 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - types::{Record, Value}, - Codec, Reader, Schema, Writer, -}; -use apache_avro_test_helper::TestResult; - -#[test] -fn avro_4032_null_codec_settings() -> TestResult { - avro_4032_codec_settings(Codec::Null) -} -#[test] -fn avro_4032_deflate_codec_settings() -> TestResult { - avro_4032_codec_settings(Codec::Deflate) -} - -#[test] -#[cfg(feature = "bzip")] -fn avro_4032_bzip_codec_settings() -> TestResult { - use apache_avro::Bzip2Settings; - use bzip2::Compression; - let codec = Codec::Bzip2(Bzip2Settings::new(Compression::fast().level() as u8)); - avro_4032_codec_settings(codec) -} - -#[test] -#[cfg(feature = "xz")] -fn avro_4032_xz_codec_settings() -> TestResult { - use apache_avro::XzSettings; - let codec = Codec::Xz(XzSettings::new(8)); - avro_4032_codec_settings(codec) -} - -#[test] -#[cfg(feature = "zstandard")] -fn avro_4032_zstandard_codec_settings() -> TestResult { - use apache_avro::ZstandardSettings; - let compression_level = 13; - let codec = Codec::Zstandard(ZstandardSettings::new(compression_level)); - avro_4032_codec_settings(codec) -} - -fn avro_4032_codec_settings(codec: Codec) -> TestResult { - let schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "Test", - "fields": [ - {"name": "f1", "type": "int"}, - {"name": "f2", "type": "string"} - ] - }"#, - )?; - - let mut writer = Writer::with_codec(&schema, Vec::new(), codec); - let mut record = Record::new(writer.schema()).unwrap(); - record.put("f1", 27_i32); - record.put("f2", "foo"); - writer.append(record)?; - let input = writer.into_inner()?; - let mut reader = Reader::new(&input[..])?; - assert_eq!( - reader.next().unwrap()?, - Value::Record(vec![ - ("f1".to_string(), Value::Int(27)), - ("f2".to_string(), Value::String("foo".to_string())), - ]) - ); - assert!(reader.next().is_none()); - - Ok(()) -} diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs deleted file mode 100644 index c6589a978f1..00000000000 --- a/lang/rust/avro/tests/io.rs +++ /dev/null @@ -1,476 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Port of https://github.com/apache/avro/blob/release-1.9.1/lang/py/test/test_io.py -use apache_avro::{from_avro_datum, to_avro_datum, types::Value, Error, Schema}; -use apache_avro_test_helper::TestResult; -use pretty_assertions::assert_eq; -use std::{io::Cursor, sync::OnceLock}; - -fn schemas_to_validate() -> &'static Vec<(&'static str, Value)> { - static SCHEMAS_TO_VALIDATE_ONCE: OnceLock> = OnceLock::new(); - SCHEMAS_TO_VALIDATE_ONCE.get_or_init(|| { - vec![ - (r#""null""#, Value::Null), - (r#""boolean""#, Value::Boolean(true)), - ( - r#""string""#, - Value::String("adsfasdf09809dsf-=adsf".to_string()), - ), - ( - r#""bytes""#, - Value::Bytes("12345abcd".to_string().into_bytes()), - ), - (r#""int""#, Value::Int(1234)), - (r#""long""#, Value::Long(1234)), - (r#""float""#, Value::Float(1234.0)), - (r#""double""#, Value::Double(1234.0)), - ( - r#"{"type": "fixed", "name": "Test", "size": 1}"#, - Value::Fixed(1, vec![b'B']), - ), - ( - r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#, - Value::Enum(1, "B".to_string()), - ), - ( - r#"{"type": "array", "items": "long"}"#, - Value::Array(vec![Value::Long(1), Value::Long(3), Value::Long(2)]), - ), - ( - r#"{"type": "map", "values": "long"}"#, - Value::Map( - [ - ("a".to_string(), Value::Long(1i64)), - ("b".to_string(), Value::Long(3i64)), - ("c".to_string(), Value::Long(2i64)), - ] - .iter() - .cloned() - .collect(), - ), - ), - ( - r#"["string", "null", "long"]"#, - Value::Union(1, Box::new(Value::Null)), - ), - ( - r#"{"type": "record", "name": "Test", "fields": [{"name": "f", "type": "long"}]}"#, - Value::Record(vec![("f".to_string(), Value::Long(1))]), - ), - ] - }) -} - -fn binary_encodings() -> &'static Vec<(i64, Vec)> { - static BINARY_ENCODINGS_ONCE: OnceLock)>> = OnceLock::new(); - BINARY_ENCODINGS_ONCE.get_or_init(|| { - vec![ - (0, vec![0x00]), - (-1, vec![0x01]), - (1, vec![0x02]), - (-2, vec![0x03]), - (2, vec![0x04]), - (-64, vec![0x7f]), - (64, vec![0x80, 0x01]), - (8192, vec![0x80, 0x80, 0x01]), - (-8193, vec![0x81, 0x80, 0x01]), - ] - }) -} - -fn default_value_examples() -> &'static Vec<(&'static str, &'static str, Value)> { - static DEFAULT_VALUE_EXAMPLES_ONCE: OnceLock> = - OnceLock::new(); - DEFAULT_VALUE_EXAMPLES_ONCE.get_or_init(|| { - vec![ - (r#""null""#, "null", Value::Null), - (r#""boolean""#, "true", Value::Boolean(true)), - (r#""string""#, r#""foo""#, Value::String("foo".to_string())), - (r#""bytes""#, r#""a""#, Value::Bytes(vec![97])), // ASCII 'a' => one byte - (r#""bytes""#, r#""\u00FF""#, Value::Bytes(vec![195, 191])), // The value is between U+0080 and U+07FF => two bytes - (r#""int""#, "5", Value::Int(5)), - (r#""long""#, "5", Value::Long(5)), - (r#""float""#, "1.1", Value::Float(1.1)), - (r#""double""#, "1.1", Value::Double(1.1)), - (r#""float""#, r#""INF""#, Value::Float(f32::INFINITY)), - (r#""double""#, r#""INF""#, Value::Double(f64::INFINITY)), - (r#""float""#, r#""Infinity""#, Value::Float(f32::INFINITY)), - ( - r#""float""#, - r#""-Infinity""#, - Value::Float(f32::NEG_INFINITY), - ), - (r#""double""#, r#""Infinity""#, Value::Double(f64::INFINITY)), - ( - r#""double""#, - r#""-Infinity""#, - Value::Double(f64::NEG_INFINITY), - ), - (r#""float""#, r#""NaN""#, Value::Float(f32::NAN)), - (r#""double""#, r#""NaN""#, Value::Double(f64::NAN)), - ( - r#"{"type": "fixed", "name": "F", "size": 2}"#, - r#""a""#, - Value::Fixed(1, vec![97]), - ), // ASCII 'a' => one byte - ( - r#"{"type": "fixed", "name": "F", "size": 2}"#, - r#""\u00FF""#, - Value::Fixed(2, vec![195, 191]), - ), // The value is between U+0080 and U+07FF => two bytes - ( - r#"{"type": "enum", "name": "F", "symbols": ["FOO", "BAR"]}"#, - r#""FOO""#, - Value::Enum(0, "FOO".to_string()), - ), - ( - r#"{"type": "array", "items": "int"}"#, - "[1, 2, 3]", - Value::Array(vec![Value::Int(1), Value::Int(2), Value::Int(3)]), - ), - ( - r#"{"type": "map", "values": "int"}"#, - r#"{"a": 1, "b": 2}"#, - Value::Map( - [ - ("a".to_string(), Value::Int(1)), - ("b".to_string(), Value::Int(2)), - ] - .iter() - .cloned() - .collect(), - ), - ), - ( - r#"["int", "null"]"#, - "5", - Value::Union(0, Box::new(Value::Int(5))), - ), - ( - r#"{"type": "record", "name": "F", "fields": [{"name": "A", "type": "int"}]}"#, - r#"{"A": 5}"#, - Value::Record(vec![("A".to_string(), Value::Int(5))]), - ), - ( - r#"["null", "int"]"#, - "null", - Value::Union(0, Box::new(Value::Null)), - ), - ] - }) -} - -fn long_record_schema() -> &'static Schema { - static LONG_RECORD_SCHEMA_ONCE: OnceLock = OnceLock::new(); - LONG_RECORD_SCHEMA_ONCE.get_or_init(|| { - Schema::parse_str( - r#" -{ - "type": "record", - "name": "Test", - "fields": [ - {"name": "A", "type": "int"}, - {"name": "B", "type": "int"}, - {"name": "C", "type": "int"}, - {"name": "D", "type": "int"}, - {"name": "E", "type": "int"}, - {"name": "F", "type": "int"}, - {"name": "G", "type": "int"} - ] -} -"#, - ) - .unwrap() - }) -} - -fn long_record_datum() -> &'static Value { - static LONG_RECORD_DATUM_ONCE: OnceLock = OnceLock::new(); - LONG_RECORD_DATUM_ONCE.get_or_init(|| { - Value::Record(vec![ - ("A".to_string(), Value::Int(1)), - ("B".to_string(), Value::Int(2)), - ("C".to_string(), Value::Int(3)), - ("D".to_string(), Value::Int(4)), - ("E".to_string(), Value::Int(5)), - ("F".to_string(), Value::Int(6)), - ("G".to_string(), Value::Int(7)), - ]) - }) -} - -#[test] -fn test_validate() -> TestResult { - for (raw_schema, value) in schemas_to_validate().iter() { - let schema = Schema::parse_str(raw_schema)?; - assert!( - value.validate(&schema), - "value {value:?} does not validate schema: {raw_schema}" - ); - } - - Ok(()) -} - -#[test] -fn test_round_trip() -> TestResult { - for (raw_schema, value) in schemas_to_validate().iter() { - let schema = Schema::parse_str(raw_schema)?; - let encoded = to_avro_datum(&schema, value.clone()).unwrap(); - let decoded = from_avro_datum(&schema, &mut Cursor::new(encoded), None).unwrap(); - assert_eq!(value, &decoded); - } - - Ok(()) -} - -#[test] -fn test_binary_int_encoding() -> TestResult { - for (number, hex_encoding) in binary_encodings().iter() { - let encoded = to_avro_datum(&Schema::Int, Value::Int(*number as i32))?; - assert_eq!(&encoded, hex_encoding); - } - - Ok(()) -} - -#[test] -fn test_binary_long_encoding() -> TestResult { - for (number, hex_encoding) in binary_encodings().iter() { - let encoded = to_avro_datum(&Schema::Long, Value::Long(*number))?; - assert_eq!(&encoded, hex_encoding); - } - - Ok(()) -} - -#[test] -fn test_schema_promotion() -> TestResult { - // Each schema is present in order of promotion (int -> long, long -> float, float -> double) - // Each value represents the expected decoded value when promoting a value previously encoded with a promotable schema - let promotable_schemas = [r#""int""#, r#""long""#, r#""float""#, r#""double""#]; - let promotable_values = vec![ - Value::Int(219), - Value::Long(219), - Value::Float(219.0), - Value::Double(219.0), - ]; - for (i, writer_raw_schema) in promotable_schemas.iter().enumerate() { - let writer_schema = Schema::parse_str(writer_raw_schema)?; - let original_value = &promotable_values[i]; - for (j, reader_raw_schema) in promotable_schemas.iter().enumerate().skip(i + 1) { - let reader_schema = Schema::parse_str(reader_raw_schema)?; - let encoded = to_avro_datum(&writer_schema, original_value.clone())?; - let decoded = from_avro_datum( - &writer_schema, - &mut Cursor::new(encoded), - Some(&reader_schema), - ) - .unwrap_or_else(|_| { - panic!("failed to decode {original_value:?} with schema: {reader_raw_schema:?}",) - }); - assert_eq!(decoded, promotable_values[j]); - } - } - - Ok(()) -} - -#[test] -fn test_unknown_symbol() -> TestResult { - let writer_schema = - Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["FOO", "BAR"]}"#)?; - let reader_schema = - Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}"#)?; - let original_value = Value::Enum(0, "FOO".to_string()); - let encoded = to_avro_datum(&writer_schema, original_value)?; - let decoded = from_avro_datum( - &writer_schema, - &mut Cursor::new(encoded), - Some(&reader_schema), - ); - assert!(decoded.is_err()); - - Ok(()) -} - -#[test] -fn test_default_value() -> TestResult { - for (field_type, default_json, default_datum) in default_value_examples().iter() { - let reader_schema = Schema::parse_str(&format!( - r#"{{ - "type": "record", - "name": "Test", - "fields": [ - {{"name": "H", "type": {field_type}, "default": {default_json}}} - ] - }}"# - ))?; - let datum_to_read = Value::Record(vec![("H".to_string(), default_datum.clone())]); - let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; - - match default_datum { - // For float/double, NaN != NaN, so we check specially here. - Value::Double(f) if f.is_nan() => { - let Value::Record(fields) = datum_read else { - unreachable!("the test always constructs top level as record") - }; - let Value::Double(f) = fields[0].1 else { - panic!("double expected") - }; - assert!( - f.is_nan(), - "{field_type} -> {default_json} is parsed as {f} rather than NaN" - ); - } - Value::Float(f) if f.is_nan() => { - let Value::Record(fields) = datum_read else { - unreachable!("the test always constructs top level as record") - }; - let Value::Float(f) = fields[0].1 else { - panic!("double expected") - }; - assert!( - f.is_nan(), - "{field_type} -> {default_json} is parsed as {f} rather than NaN" - ); - } - _ => { - assert_eq!( - datum_read, datum_to_read, - "{} -> {}", - *field_type, *default_json - ); - } - } - } - - Ok(()) -} - -#[test] -fn test_no_default_value() -> TestResult { - let reader_schema = Schema::parse_str( - r#"{ - "type": "record", - "name": "Test", - "fields": [ - {"name": "H", "type": "int"} - ] - }"#, - )?; - let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; - let result = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - ); - assert!(result.is_err()); - - Ok(()) -} - -#[test] -fn test_projection() -> TestResult { - let reader_schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "Test", - "fields": [ - {"name": "E", "type": "int"}, - {"name": "F", "type": "int"} - ] - } - "#, - )?; - let datum_to_read = Value::Record(vec![ - ("E".to_string(), Value::Int(5)), - ("F".to_string(), Value::Int(6)), - ]); - let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; - assert_eq!(datum_to_read, datum_read); - - Ok(()) -} - -#[test] -fn test_field_order() -> TestResult { - let reader_schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "Test", - "fields": [ - {"name": "F", "type": "int"}, - {"name": "E", "type": "int"} - ] - } - "#, - )?; - let datum_to_read = Value::Record(vec![ - ("F".to_string(), Value::Int(6)), - ("E".to_string(), Value::Int(5)), - ]); - let encoded = to_avro_datum(long_record_schema(), long_record_datum().clone())?; - let datum_read = from_avro_datum( - long_record_schema(), - &mut Cursor::new(encoded), - Some(&reader_schema), - )?; - assert_eq!(datum_to_read, datum_read); - - Ok(()) -} - -#[test] -fn test_type_exception() -> Result<(), String> { - let writer_schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "Test", - "fields": [ - {"name": "F", "type": "int"}, - {"name": "E", "type": "int"} - ] - } - "#, - ) - .unwrap(); - let datum_to_write = Value::Record(vec![ - ("E".to_string(), Value::Int(5)), - ("F".to_string(), Value::String(String::from("Bad"))), - ]); - let encoded = to_avro_datum(&writer_schema, datum_to_write); - match encoded { - Ok(_) => Err(String::from("Expected ValidationError, got Ok")), - Err(Error::Validation) => Ok(()), - Err(ref e) => Err(format!("Expected ValidationError, got {e:?}")), - } -} diff --git a/lang/rust/avro/tests/schema.rs b/lang/rust/avro/tests/schema.rs deleted file mode 100644 index 13cf6af266d..00000000000 --- a/lang/rust/avro/tests/schema.rs +++ /dev/null @@ -1,2019 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use std::{ - collections::HashMap, - io::{Cursor, Read}, -}; - -use apache_avro::{ - from_avro_datum, from_value, - schema::{EnumSchema, FixedSchema, Name, RecordField, RecordSchema}, - to_avro_datum, to_value, - types::{Record, Value}, - Codec, Error, Reader, Schema, Writer, -}; -use apache_avro_test_helper::{ - data::{examples, valid_examples, DOC_EXAMPLES}, - init, TestResult, -}; - -#[test] -fn test_correct_recursive_extraction() -> TestResult { - init(); - let raw_outer_schema = r#"{ - "type": "record", - "name": "X", - "fields": [ - { - "name": "y", - "type": { - "type": "record", - "name": "Y", - "fields": [ - { - "name": "Z", - "type": "X" - } - ] - } - } - ] - }"#; - let outer_schema = Schema::parse_str(raw_outer_schema)?; - if let Schema::Record(RecordSchema { - fields: outer_fields, - .. - }) = outer_schema - { - let inner_schema = &outer_fields[0].schema; - if let Schema::Record(RecordSchema { - fields: inner_fields, - .. - }) = inner_schema - { - if let Schema::Record(RecordSchema { - name: recursive_type, - .. - }) = &inner_fields[0].schema - { - assert_eq!("X", recursive_type.name.as_str()); - } - } else { - panic!("inner schema {inner_schema:?} should have been a record") - } - } else { - panic!("outer schema {outer_schema:?} should have been a record") - } - - Ok(()) -} - -#[test] -fn test_parse() -> TestResult { - init(); - for (raw_schema, valid) in examples().iter() { - let schema = Schema::parse_str(raw_schema); - if *valid { - assert!( - schema.is_ok(), - "schema {raw_schema} was supposed to be valid; error: {schema:?}", - ) - } else { - assert!( - schema.is_err(), - "schema {raw_schema} was supposed to be invalid" - ) - } - } - Ok(()) -} - -#[test] -fn test_3799_parse_reader() -> TestResult { - init(); - for (raw_schema, valid) in examples().iter() { - let schema = Schema::parse_reader(&mut Cursor::new(raw_schema)); - if *valid { - assert!( - schema.is_ok(), - "schema {raw_schema} was supposed to be valid; error: {schema:?}", - ) - } else { - assert!( - schema.is_err(), - "schema {raw_schema} was supposed to be invalid" - ) - } - } - - // Ensure it works for trait objects too. - for (raw_schema, valid) in examples().iter() { - let reader: &mut dyn Read = &mut Cursor::new(raw_schema); - let schema = Schema::parse_reader(reader); - if *valid { - assert!( - schema.is_ok(), - "schema {raw_schema} was supposed to be valid; error: {schema:?}", - ) - } else { - assert!( - schema.is_err(), - "schema {raw_schema} was supposed to be invalid" - ) - } - } - Ok(()) -} - -#[test] -fn test_3799_raise_io_error_from_parse_read() -> Result<(), String> { - // 0xDF is invalid for UTF-8. - let mut invalid_data = Cursor::new([0xDF]); - - let error = Schema::parse_reader(&mut invalid_data).unwrap_err(); - - if let Error::ReadSchemaFromReader(e) = error { - assert!( - e.to_string().contains("stream did not contain valid UTF-8"), - "{e}" - ); - Ok(()) - } else { - Err(format!("Expected std::io::Error, got {error:?}")) - } -} - -#[test] -/// Test that the string generated by an Avro Schema object is, in fact, a valid Avro schema. -fn test_valid_cast_to_string_after_parse() -> TestResult { - init(); - for (raw_schema, _) in valid_examples().iter() { - let schema = Schema::parse_str(raw_schema)?; - Schema::parse_str(schema.canonical_form().as_str())?; - } - Ok(()) -} - -#[test] -/// Test that a list of schemas whose definitions do not depend on each other produces the same -/// result as parsing each element of the list individually -fn test_parse_list_without_cross_deps() -> TestResult { - init(); - let schema_str_1 = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - let schema_str_2 = r#"{ - "name": "B", - "type": "fixed", - "size": 16 - }"#; - let schema_strs = [schema_str_1, schema_str_2]; - let schemas = Schema::parse_list(&schema_strs)?; - - for schema_str in &schema_strs { - let parsed = Schema::parse_str(schema_str)?; - assert!(schemas.contains(&parsed)); - } - Ok(()) -} - -#[test] -/// Test that the parsing of a list of schemas, whose definitions do depend on each other, can -/// perform the necessary schema composition. This should work regardless of the order in which -/// the schemas are input. -/// However, the output order is guaranteed to be the same as the input order. -fn test_parse_list_with_cross_deps_basic() -> TestResult { - init(); - let schema_a_str = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - let schema_b_str = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_one", "type": "A"} - ] - }"#; - - let schema_strs_first = [schema_a_str, schema_b_str]; - let schema_strs_second = [schema_b_str, schema_a_str]; - let schemas_first = Schema::parse_list(&schema_strs_first)?; - let schemas_second = Schema::parse_list(&schema_strs_second)?; - - assert_eq!(schemas_first[0], schemas_second[1]); - assert_eq!(schemas_first[1], schemas_second[0]); - Ok(()) -} - -#[test] -fn test_parse_list_recursive_type() -> TestResult { - init(); - let schema_str_1 = r#"{ - "name": "A", - "doc": "A's schema", - "type": "record", - "fields": [ - {"name": "a_field_one", "type": "B"} - ] - }"#; - let schema_str_2 = r#"{ - "name": "B", - "doc": "B's schema", - "type": "record", - "fields": [ - {"name": "b_field_one", "type": "A"} - ] - }"#; - let schema_strs_first = [schema_str_1, schema_str_2]; - let schema_strs_second = [schema_str_2, schema_str_1]; - let _ = Schema::parse_list(&schema_strs_first)?; - let _ = Schema::parse_list(&schema_strs_second)?; - Ok(()) -} - -#[test] -/// Test that schema composition resolves namespaces. -fn test_parse_list_with_cross_deps_and_namespaces() -> TestResult { - init(); - let schema_a_str = r#"{ - "name": "A", - "type": "record", - "namespace": "namespace", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - let schema_b_str = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_one", "type": "namespace.A"} - ] - }"#; - - let schemas_first = Schema::parse_list(&[schema_a_str, schema_b_str])?; - let schemas_second = Schema::parse_list(&[schema_b_str, schema_a_str])?; - - assert_eq!(schemas_first[0], schemas_second[1]); - assert_eq!(schemas_first[1], schemas_second[0]); - - Ok(()) -} - -#[test] -/// Test that schema composition fails on namespace errors. -fn test_parse_list_with_cross_deps_and_namespaces_error() -> TestResult { - init(); - let schema_str_1 = r#"{ - "name": "A", - "type": "record", - "namespace": "namespace", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - let schema_str_2 = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_one", "type": "A"} - ] - }"#; - - let schema_strs_first = [schema_str_1, schema_str_2]; - let schema_strs_second = [schema_str_2, schema_str_1]; - let _ = Schema::parse_list(&schema_strs_first).expect_err("Test failed"); - let _ = Schema::parse_list(&schema_strs_second).expect_err("Test failed"); - - Ok(()) -} - -#[test] -// -// test that field's RecordSchema could be referenced by a following field by full name -fn test_parse_reused_record_schema_by_fullname() -> TestResult { - init(); - let schema_str = r#" - { - "type" : "record", - "name" : "Weather", - "namespace" : "test", - "doc" : "A weather reading.", - "fields" : [ - { - "name" : "station", - "type" : { - "type" : "string", - "avro.java.string" : "String" - } - }, - { - "name" : "max_temp", - "type" : { - "type" : "record", - "name" : "Temp", - "namespace": "prefix", - "doc" : "A temperature reading.", - "fields" : [ { - "name" : "temp", - "type" : "long" - } ] - } - }, { - "name" : "min_temp", - "type" : "prefix.Temp" - } - ] - } - "#; - - let schema = Schema::parse_str(schema_str); - assert!(schema.is_ok()); - match schema? { - Schema::Record(RecordSchema { - ref name, - aliases: _, - doc: _, - ref fields, - lookup: _, - attributes: _, - }) => { - assert_eq!(name.fullname(None), "test.Weather", "Name does not match!"); - - assert_eq!(fields.len(), 3, "The number of the fields is not correct!"); - - let RecordField { - ref name, - doc: _, - default: _, - aliases: _, - ref schema, - order: _, - position: _, - custom_attributes: _, - } = fields.get(2).unwrap(); - - assert_eq!(name, "min_temp"); - - match schema { - Schema::Ref { ref name } => { - assert_eq!(name.fullname(None), "prefix.Temp", "Name does not match!"); - } - unexpected => unreachable!("Unexpected schema type: {:?}", unexpected), - } - } - unexpected => unreachable!("Unexpected schema type: {:?}", unexpected), - } - - Ok(()) -} - -/// Return all permutations of an input slice -fn permutations(list: &[T]) -> Vec> { - let size = list.len(); - let indices = permutation_indices((0..size).collect()); - let mut perms = Vec::new(); - for perm_map in &indices { - let mut perm = Vec::new(); - for ix in perm_map { - perm.push(&list[*ix]); - } - perms.push(perm) - } - perms -} - -/// Return all permutations of the indices of a vector -fn permutation_indices(indices: Vec) -> Vec> { - let size = indices.len(); - let mut perms: Vec> = Vec::new(); - if size == 1 { - perms.push(indices); - return perms; - } - for index in 0..size { - let (head, tail) = indices.split_at(index); - let (first, rest) = tail.split_at(1); - let mut head = head.to_vec(); - head.extend_from_slice(rest); - for mut sub_index in permutation_indices(head) { - sub_index.insert(0, first[0]); - perms.push(sub_index); - } - } - - perms -} - -#[test] -/// Test that a type that depends on more than one other type is parsed correctly when all -/// definitions are passed in as a list. This should work regardless of the ordering of the list. -fn test_parse_list_multiple_dependencies() -> TestResult { - init(); - let schema_a_str = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": ["null", "B", "C"]} - ] - }"#; - let schema_b_str = r#"{ - "name": "B", - "type": "fixed", - "size": 16 - }"#; - let schema_c_str = r#"{ - "name": "C", - "type": "record", - "fields": [ - {"name": "field_one", "type": "string"} - ] - }"#; - - let parsed = Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str])?; - let schema_strs = vec![schema_a_str, schema_b_str, schema_c_str]; - for schema_str_perm in permutations(&schema_strs) { - let schema_str_perm: Vec<&str> = schema_str_perm.iter().map(|s| **s).collect(); - let schemas = Schema::parse_list(&schema_str_perm)?; - assert_eq!(schemas.len(), 3); - for parsed_schema in &parsed { - assert!(schemas.contains(parsed_schema)); - } - } - Ok(()) -} - -#[test] -/// Test that a type that is depended on by more than one other type is parsed correctly when all -/// definitions are passed in as a list. This should work regardless of the ordering of the list. -fn test_parse_list_shared_dependency() -> TestResult { - init(); - let schema_a_str = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": {"type": "array", "items": "C"}} - ] - }"#; - let schema_b_str = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_one", "type": {"type": "map", "values": "C"}} - ] - }"#; - let schema_c_str = r#"{ - "name": "C", - "type": "record", - "fields": [ - {"name": "field_one", "type": "string"} - ] - }"#; - - let parsed = Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str])?; - let schema_strs = vec![schema_a_str, schema_b_str, schema_c_str]; - for schema_str_perm in permutations(&schema_strs) { - let schema_str_perm: Vec<&str> = schema_str_perm.iter().map(|s| **s).collect(); - let schemas = Schema::parse_list(&schema_str_perm)?; - assert_eq!(schemas.len(), 3); - for parsed_schema in &parsed { - assert!(schemas.contains(parsed_schema)); - } - } - Ok(()) -} - -#[test] -/// Test that trying to parse two schemas with the same fullname returns an Error -fn test_name_collision_error() -> TestResult { - init(); - let schema_str_1 = r#"{ - "name": "foo.A", - "type": "record", - "fields": [ - {"name": "field_one", "type": "double"} - ] - }"#; - let schema_str_2 = r#"{ - "name": "A", - "type": "record", - "namespace": "foo", - "fields": [ - {"name": "field_two", "type": "string"} - ] - }"#; - - let _ = Schema::parse_list(&[schema_str_1, schema_str_2]).expect_err("Test failed"); - Ok(()) -} - -#[test] -/// Test that having the same name but different fullnames does not return an error -fn test_namespace_prevents_collisions() -> TestResult { - init(); - let schema_str_1 = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": "double"} - ] - }"#; - let schema_str_2 = r#"{ - "name": "A", - "type": "record", - "namespace": "foo", - "fields": [ - {"name": "field_two", "type": "string"} - ] - }"#; - - let parsed = Schema::parse_list(&[schema_str_1, schema_str_2])?; - let parsed_1 = Schema::parse_str(schema_str_1)?; - let parsed_2 = Schema::parse_str(schema_str_2)?; - assert_eq!(parsed, vec!(parsed_1, parsed_2)); - Ok(()) -} - -// The fullname is determined in one of the following ways: -// * A name and namespace are both specified. For example, -// one might use "name": "X", "namespace": "org.foo" -// to indicate the fullname "org.foo.X". -// * A fullname is specified. If the name specified contains -// a dot, then it is assumed to be a fullname, and any -// namespace also specified is ignored. For example, -// use "name": "org.foo.X" to indicate the -// fullname "org.foo.X". -// * A name only is specified, i.e., a name that contains no -// dots. In this case the namespace is taken from the most -// tightly enclosing schema or protocol. For example, -// if "name": "X" is specified, and this occurs -// within a field of the record definition /// of "org.foo.Y", then the fullname is "org.foo.X". - -// References to previously defined names are as in the latter -// two cases above: if they contain a dot they are a fullname, if -// they do not contain a dot, the namespace is the namespace of -// the enclosing definition. - -// Primitive type names have no namespace and their names may -// not be defined in any namespace. A schema may only contain -// multiple definitions of a fullname if the definitions are -// equivalent. - -#[test] -fn test_fullname_name_and_namespace_specified() -> TestResult { - init(); - let name: Name = - serde_json::from_str(r#"{"name": "a", "namespace": "o.a.h", "aliases": null}"#)?; - let fullname = name.fullname(None); - assert_eq!("o.a.h.a", fullname); - Ok(()) -} - -#[test] -fn test_fullname_fullname_and_namespace_specified() -> TestResult { - init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.h"}"#)?; - assert_eq!(&name.name, "d"); - assert_eq!(name.namespace, Some("a.b.c".to_owned())); - let fullname = name.fullname(None); - assert_eq!("a.b.c.d", fullname); - Ok(()) -} - -#[test] -fn test_fullname_name_and_default_namespace_specified() -> TestResult { - init(); - let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": null}"#)?; - assert_eq!(&name.name, "a"); - assert_eq!(name.namespace, None); - let fullname = name.fullname(Some("b.c.d".into())); - assert_eq!("b.c.d.a", fullname); - Ok(()) -} - -#[test] -fn test_fullname_fullname_and_default_namespace_specified() -> TestResult { - init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": null}"#)?; - assert_eq!(&name.name, "d"); - assert_eq!(name.namespace, Some("a.b.c".to_owned())); - let fullname = name.fullname(Some("o.a.h".into())); - assert_eq!("a.b.c.d", fullname); - Ok(()) -} - -#[test] -fn test_avro_3452_parsing_name_without_namespace() -> TestResult { - init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d"}"#)?; - assert_eq!(&name.name, "d"); - assert_eq!(name.namespace, Some("a.b.c".to_owned())); - let fullname = name.fullname(None); - assert_eq!("a.b.c.d", fullname); - Ok(()) -} - -#[test] -fn test_avro_3452_parsing_name_with_leading_dot_without_namespace() -> TestResult { - init(); - let name: Name = serde_json::from_str(r#"{"name": ".a"}"#)?; - assert_eq!(&name.name, "a"); - assert_eq!(name.namespace, None); - assert_eq!("a", name.fullname(None)); - Ok(()) -} - -#[test] -fn test_avro_3452_parse_json_without_name_field() -> TestResult { - init(); - let result: serde_json::error::Result = serde_json::from_str(r#"{"unknown": "a"}"#); - assert!(&result.is_err()); - assert_eq!(result.unwrap_err().to_string(), "No `name` field"); - Ok(()) -} - -#[test] -fn test_fullname_fullname_namespace_and_default_namespace_specified() -> TestResult { - init(); - let name: Name = - serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.a", "aliases": null}"#)?; - assert_eq!(&name.name, "d"); - assert_eq!(name.namespace, Some("a.b.c".to_owned())); - let fullname = name.fullname(Some("o.a.h".into())); - assert_eq!("a.b.c.d", fullname); - Ok(()) -} - -#[test] -fn test_fullname_name_namespace_and_default_namespace_specified() -> TestResult { - init(); - let name: Name = - serde_json::from_str(r#"{"name": "a", "namespace": "o.a.a", "aliases": null}"#)?; - assert_eq!(&name.name, "a"); - assert_eq!(name.namespace, Some("o.a.a".to_owned())); - let fullname = name.fullname(Some("o.a.h".into())); - assert_eq!("o.a.a.a", fullname); - Ok(()) -} - -#[test] -fn test_doc_attributes() -> TestResult { - init(); - fn assert_doc(schema: &Schema) { - match schema { - Schema::Enum(EnumSchema { doc, .. }) => assert!(doc.is_some()), - Schema::Record(RecordSchema { doc, .. }) => assert!(doc.is_some()), - Schema::Fixed(FixedSchema { doc, .. }) => assert!(doc.is_some()), - Schema::String => (), - _ => unreachable!("Unexpected schema type: {:?}", schema), - } - } - - for (raw_schema, _) in DOC_EXAMPLES.iter() { - let original_schema = Schema::parse_str(raw_schema)?; - assert_doc(&original_schema); - if let Schema::Record(RecordSchema { fields, .. }) = original_schema { - for f in fields { - assert_doc(&f.schema) - } - } - } - Ok(()) -} - -/* -TODO: (#94) add support for user-defined attributes and uncomment (may need some tweaks to compile) -#[test] -fn test_other_attributes() { - fn assert_attribute_type(attribute: (String, serde_json::Value)) { - match attribute.1.as_ref() { - "cp_boolean" => assert!(attribute.2.is_bool()), - "cp_int" => assert!(attribute.2.is_i64()), - "cp_object" => assert!(attribute.2.is_object()), - "cp_float" => assert!(attribute.2.is_f64()), - "cp_array" => assert!(attribute.2.is_array()), - } - } - - for (raw_schema, _) in OTHER_ATTRIBUTES_EXAMPLES.iter() { - let schema = Schema::parse_str(raw_schema)?; - // all inputs have at least some user-defined attributes - assert!(schema.other_attributes.is_some()); - for prop in schema.other_attributes?.iter() { - assert_attribute_type(prop); - } - if let Schema::Record { fields, .. } = schema { - for f in fields { - // all fields in the record have at least some user-defined attributes - assert!(f.schema.other_attributes.is_some()); - for prop in f.schema.other_attributes?.iter() { - assert_attribute_type(prop); - } - } - } - } -} -*/ - -#[test] -fn test_root_error_is_not_swallowed_on_parse_error() -> Result<(), String> { - init(); - let raw_schema = r#"/not/a/real/file"#; - let error = Schema::parse_str(raw_schema).unwrap_err(); - - if let Error::ParseSchemaJson(e) = error { - assert!( - e.to_string().contains("expected value at line 1 column 1"), - "{}", - e - ); - Ok(()) - } else { - Err(format!("Expected serde_json::error::Error, got {error:?}")) - } -} - -// AVRO-3302 -#[test] -fn test_record_schema_with_cyclic_references() -> TestResult { - init(); - let schema = Schema::parse_str( - r#" - { - "type": "record", - "name": "test", - "fields": [{ - "name": "recordField", - "type": { - "type": "record", - "name": "Node", - "fields": [ - {"name": "label", "type": "string"}, - {"name": "children", "type": {"type": "array", "items": "Node"}} - ] - } - }] - } - "#, - )?; - - let mut datum = Record::new(&schema).unwrap(); - datum.put( - "recordField", - Value::Record(vec![ - ("label".into(), Value::String("level_1".into())), - ( - "children".into(), - Value::Array(vec![Value::Record(vec![ - ("label".into(), Value::String("level_2".into())), - ( - "children".into(), - Value::Array(vec![Value::Record(vec![ - ("label".into(), Value::String("level_3".into())), - ( - "children".into(), - Value::Array(vec![Value::Record(vec![ - ("label".into(), Value::String("level_4".into())), - ("children".into(), Value::Array(vec![])), - ])]), - ), - ])]), - ), - ])]), - ), - ]), - ); - - let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null); - if let Err(err) = writer.append(datum) { - panic!("An error occurred while writing datum: {err:?}") - } - let bytes = writer.into_inner()?; - assert_eq!(316, bytes.len()); - - match Reader::new(&mut bytes.as_slice()) { - Ok(mut reader) => match reader.next() { - Some(value) => log::debug!("{:?}", value?), - None => panic!("No value was read!"), - }, - Err(err) => panic!("An error occurred while reading datum: {err:?}"), - } - Ok(()) -} - -/* -// TODO: (#93) add support for logical type and attributes and uncomment (may need some tweaks to compile) -#[test] -fn test_decimal_valid_type_attributes() { - init(); - let fixed_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[0])?; - assert_eq!(4, fixed_decimal.get_attribute("precision")); - assert_eq!(2, fixed_decimal.get_attribute("scale")); - assert_eq!(2, fixed_decimal.get_attribute("size")); - - let bytes_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[1])?; - assert_eq!(4, bytes_decimal.get_attribute("precision")); - assert_eq!(0, bytes_decimal.get_attribute("scale")); -} -*/ - -// https://github.com/flavray/avro-rs/issues/47 -#[test] -fn avro_old_issue_47() -> TestResult { - init(); - let schema_str = r#" - { - "type": "record", - "name": "my_record", - "fields": [ - {"name": "a", "type": "long"}, - {"name": "b", "type": "string"} - ] - }"#; - let schema = Schema::parse_str(schema_str)?; - - use serde::{Deserialize, Serialize}; - - #[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)] - pub struct MyRecord { - b: String, - a: i64, - } - - let record = MyRecord { - b: "hello".to_string(), - a: 1, - }; - - let ser_value = to_value(record.clone())?; - let serialized_bytes = to_avro_datum(&schema, ser_value)?; - - let de_value = &from_avro_datum(&schema, &mut &*serialized_bytes, None)?; - let deserialized_record = from_value::(de_value)?; - - assert_eq!(record, deserialized_record); - Ok(()) -} - -#[test] -fn test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_type() -> TestResult -{ - use apache_avro::{from_avro_datum, to_avro_datum, types::Value}; - use serde::{Deserialize, Serialize}; - - #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] - pub struct BarUseParent { - #[serde(rename = "barUse")] - pub bar_use: Bar, - } - - #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, Deserialize, Serialize)] - pub enum Bar { - #[serde(rename = "bar0")] - Bar0, - #[serde(rename = "bar1")] - Bar1, - #[serde(rename = "bar2")] - Bar2, - } - - #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] - pub struct Foo { - #[serde(rename = "barInit")] - pub bar_init: Bar, - #[serde(rename = "barUseParent")] - pub bar_use_parent: Option, - } - - let writer_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "name.space", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1", - "bar2" - ] - } - }, - { - "name": "barUseParent", - "type": [ - "null", - { - "type": "record", - "name": "BarUseParent", - "fields": [ - { - "name": "barUse", - "type": "Bar" - } - ] - } - ] - } - ] - }"#; - - let reader_schema = r#"{ - "type": "record", - "name": "Foo", - "namespace": "name.space", - "fields": - [ - { - "name": "barInit", - "type": - { - "type": "enum", - "name": "Bar", - "symbols": - [ - "bar0", - "bar1" - ] - } - }, - { - "name": "barUseParent", - "type": [ - "null", - { - "type": "record", - "name": "BarUseParent", - "fields": [ - { - "name": "barUse", - "type": "Bar" - } - ] - } - ] - } - ] - }"#; - - let writer_schema = Schema::parse_str(writer_schema)?; - let foo1 = Foo { - bar_init: Bar::Bar0, - bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar1 }), - }; - let avro_value = crate::to_value(foo1)?; - assert!( - avro_value.validate(&writer_schema), - "value is valid for schema", - ); - let datum = to_avro_datum(&writer_schema, avro_value)?; - let mut x = &datum[..]; - let reader_schema = Schema::parse_str(reader_schema)?; - let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; - match deser_value { - Value::Record(fields) => { - assert_eq!(fields.len(), 2); - } - _ => panic!("Expected Value::Record"), - } - - Ok(()) -} - -#[test] -fn test_avro_3847_union_field_with_default_value_of_ref() -> TestResult { - // Test for reference to Record - let writer_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - }, { - "name": "f2", - "type": ["record2", "int"], - "default": { - "f1_1": 100 - } - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ( - "f1".to_string(), - Value::Record(vec![("f1_1".to_string(), 10.into())]), - ), - ( - "f2".to_string(), - Value::Union( - 0, - Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), - ), - ), - ]); - - assert_eq!(expected, result[0]); - - // Test for reference to Enum - let writer_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a", "b"] - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Enum(1, "b".to_string())); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a", "b"] - } - }, { - "name": "f2", - "type": ["enum1", "int"], - "default": "a" - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Enum(1, "b".to_string())), - ( - "f2".to_string(), - Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), - ), - ]); - - assert_eq!(expected, result[0]); - - // Test for reference to Fixed - let writer_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "fixed1", - "type": "fixed", - "size": 3 - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Fixed(3, vec![0, 1, 2])); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "fixed1", - "type": "fixed", - "size": 3 - } - }, { - "name": "f2", - "type": ["fixed1", "int"], - "default": "abc" - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), - ( - "f2".to_string(), - Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), - ), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3847_union_field_with_default_value_of_ref_with_namespace() -> TestResult { - // Test for reference to Record - let writer_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - }, { - "name": "f2", - "type": ["ns.record2", "int"], - "default": { - "f1_1": 100 - } - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ( - "f1".to_string(), - Value::Record(vec![("f1_1".to_string(), 10.into())]), - ), - ( - "f2".to_string(), - Value::Union( - 0, - Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), - ), - ), - ]); - - assert_eq!(expected, result[0]); - - // Test for reference to Enum - let writer_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "namespace": "ns", - "type": "enum", - "symbols": ["a", "b"] - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Enum(1, "b".to_string())); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "namespace": "ns", - "type": "enum", - "symbols": ["a", "b"] - } - }, { - "name": "f2", - "type": ["ns.enum1", "int"], - "default": "a" - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Enum(1, "b".to_string())), - ( - "f2".to_string(), - Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), - ), - ]); - - assert_eq!(expected, result[0]); - - // Test for reference to Fixed - let writer_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "fixed1", - "namespace": "ns", - "type": "fixed", - "size": 3 - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Fixed(3, vec![0, 1, 2])); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "fixed1", - "namespace": "ns", - "type": "fixed", - "size": 3 - } - }, { - "name": "f2", - "type": ["ns.fixed1", "int"], - "default": "abc" - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), - ( - "f2".to_string(), - Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), - ), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3847_union_field_with_default_value_of_ref_with_enclosing_namespace() -> TestResult { - // Test for reference to Record - let writer_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - }, { - "name": "f2", - "type": ["ns.record2", "int"], - "default": { - "f1_1": 100 - } - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ( - "f1".to_string(), - Value::Record(vec![("f1_1".to_string(), 10.into())]), - ), - ( - "f2".to_string(), - Value::Union( - 0, - Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), - ), - ), - ]); - - assert_eq!(expected, result[0]); - - // Test for reference to Enum - let writer_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a", "b"] - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Enum(1, "b".to_string())); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a", "b"] - } - }, { - "name": "f2", - "type": ["ns.enum1", "int"], - "default": "a" - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Enum(1, "b".to_string())), - ( - "f2".to_string(), - Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), - ), - ]); - - assert_eq!(expected, result[0]); - - // Test for reference to Fixed - let writer_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "fixed1", - "type": "fixed", - "size": 3 - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Fixed(3, vec![0, 1, 2])); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "fixed1", - "type": "fixed", - "size": 3 - } - }, { - "name": "f2", - "type": ["ns.fixed1", "int"], - "default": "abc" - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), - ( - "f2".to_string(), - Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), - ), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -fn write_schema_for_default_value_test() -> apache_avro::AvroResult> { - let writer_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "int" - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()) - .ok_or("Expected Some(Record), but got None") - .unwrap(); - record.put("f1", 10); - writer.append(record)?; - - writer.into_inner() -} - -#[test] -fn test_avro_3851_read_default_value_for_simple_record_field() -> TestResult { - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "int" - }, { - "name": "f2", - "type": "int", - "default": 20 - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Int(10)), - ("f2".to_string(), Value::Int(20)), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3851_read_default_value_for_nested_record_field() -> TestResult { - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "int" - }, { - "name": "f2", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - }, - "default": { - "f1_1": 100 - } - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Int(10)), - ( - "f2".to_string(), - Value::Record(vec![("f1_1".to_string(), 100.into())]), - ), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3851_read_default_value_for_enum_record_field() -> TestResult { - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "int" - }, { - "name": "f2", - "type": { - "name": "enum1", - "type": "enum", - "symbols": ["a", "b", "c"] - }, - "default": "a" - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Int(10)), - ("f2".to_string(), Value::Enum(0, "a".to_string())), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3851_read_default_value_for_fixed_record_field() -> TestResult { - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "int" - }, { - "name": "f2", - "type": { - "name": "fixed1", - "type": "fixed", - "size": 3 - }, - "default": "abc" - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Int(10)), - ("f2".to_string(), Value::Fixed(3, vec![b'a', b'b', b'c'])), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3851_read_default_value_for_array_record_field() -> TestResult { - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "int" - }, { - "name": "f2", - "type": "array", - "items": "int", - "default": [1, 2, 3] - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Int(10)), - ( - "f2".to_string(), - Value::Array(vec![1.into(), 2.into(), 3.into()]), - ), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3851_read_default_value_for_map_record_field() -> TestResult { - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": "int" - }, { - "name": "f2", - "type": "map", - "values": "string", - "default": { "a": "A", "b": "B", "c": "C" } - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = write_schema_for_default_value_test()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let map = HashMap::from_iter([ - ("a".to_string(), "A".into()), - ("b".to_string(), "B".into()), - ("c".to_string(), "C".into()), - ]); - let expected = Value::Record(vec![ - ("f1".to_string(), Value::Int(10)), - ("f2".to_string(), Value::Map(map)), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3851_read_default_value_for_ref_record_field() -> TestResult { - let writer_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; - record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); - writer.append(record)?; - - let reader_schema_str = r#" - { - "name": "record1", - "namespace": "ns", - "type": "record", - "fields": [ - { - "name": "f1", - "type": { - "name": "record2", - "type": "record", - "fields": [ - { - "name": "f1_1", - "type": "int" - } - ] - } - }, { - "name": "f2", - "type": "ns.record2", - "default": { "f1_1": 100 } - } - ] - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Record(vec![ - ( - "f1".to_string(), - Value::Record(vec![("f1_1".to_string(), 10.into())]), - ), - ( - "f2".to_string(), - Value::Record(vec![("f1_1".to_string(), 100.into())]), - ), - ]); - - assert_eq!(expected, result[0]); - - Ok(()) -} - -#[test] -fn test_avro_3851_read_default_value_for_enum() -> TestResult { - let writer_schema_str = r#" - { - "name": "enum1", - "namespace": "ns", - "type": "enum", - "symbols": ["a", "b", "c"] - } - "#; - let writer_schema = Schema::parse_str(writer_schema_str)?; - let mut writer = Writer::new(&writer_schema, Vec::new()); - writer.append("c")?; - - let reader_schema_str = r#" - { - "name": "enum1", - "namespace": "ns", - "type": "enum", - "symbols": ["a", "b"], - "default": "a" - } - "#; - let reader_schema = Schema::parse_str(reader_schema_str)?; - let input = writer.into_inner()?; - let reader = Reader::with_schema(&reader_schema, &input[..])?; - let result = reader.collect::, _>>()?; - - assert_eq!(1, result.len()); - - let expected = Value::Enum(0, "a".to_string()); - assert_eq!(expected, result[0]); - - Ok(()) -} diff --git a/lang/rust/avro/tests/shared.rs b/lang/rust/avro/tests/shared.rs deleted file mode 100644 index 3915d5d12aa..00000000000 --- a/lang/rust/avro/tests/shared.rs +++ /dev/null @@ -1,150 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{types::Value, Codec, Reader, Schema, Writer}; -use apache_avro_test_helper::TestResult; -use std::{ - fmt, - fs::{DirEntry, File, ReadDir}, - io::BufReader, - path::Path, - slice::Iter, -}; - -const ROOT_DIRECTORY: &str = "../../../share/test/data/schemas"; - -#[test] -fn test_schema() -> TestResult { - let directory: ReadDir = match std::fs::read_dir(ROOT_DIRECTORY) { - Ok(root_folder) => root_folder, - Err(err) => { - log::warn!("Can't read the root folder: {err}"); - return Ok(()); - } - }; - let mut result: Result<(), ErrorsDesc> = Ok(()); - for f in directory { - let entry: DirEntry = match f { - Ok(entry) => entry, - Err(e) => core::panic!("Can't get file {}", e), - }; - log::debug!("{:?}", entry.file_name()); - if let Ok(ft) = entry.file_type() { - if ft.is_dir() { - let sub_folder = - ROOT_DIRECTORY.to_owned() + "/" + entry.file_name().to_str().unwrap(); - - let dir_result = test_folder(sub_folder.as_str()); - if let Err(ed) = dir_result { - result = match result { - Ok(()) => Err(ed), - Err(e) => Err(e.merge(&ed)), - } - } - } - } - } - result?; - - Ok(()) -} - -#[derive(Debug)] -struct ErrorsDesc { - details: Vec, -} - -impl ErrorsDesc { - fn new(msg: &str) -> ErrorsDesc { - ErrorsDesc { - details: vec![msg.to_string()], - } - } - - fn add(&self, msg: &str) -> Self { - let mut new_vec = self.details.clone(); - new_vec.push(msg.to_string()); - Self { details: new_vec } - } - - fn merge(&self, err: &ErrorsDesc) -> Self { - let mut new_vec = self.details.clone(); - err.details - .iter() - .for_each(|d: &String| new_vec.push(d.clone())); - Self { details: new_vec } - } -} - -impl fmt::Display for ErrorsDesc { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.details.join("\n").as_str()) - } -} - -fn test_folder(folder: &str) -> Result<(), ErrorsDesc> { - let file_name = folder.to_owned() + "/schema.json"; - let content = std::fs::read_to_string(file_name).expect("Unable to find schema.json file"); - - let schema: Schema = Schema::parse_str(content.as_str()).expect("Can't read schema"); - - let data_file_name = folder.to_owned() + "/data.avro"; - let data_path: &Path = Path::new(data_file_name.as_str()); - let mut result = Ok(()); - if !data_path.exists() { - log::error!("{}", format!("folder {folder} does not exist")); - return Err(ErrorsDesc::new( - format!("folder {folder} does not exist").as_str(), - )); - } else { - let file: File = File::open(data_path).expect("Can't open data.avro"); - let reader = - Reader::with_schema(&schema, BufReader::new(&file)).expect("Can't read data.avro"); - - let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null); - - let mut records: Vec = vec![]; - - for r in reader { - let record: Value = r.expect("Error on reading"); - writer.append(record.clone()).expect("Error on write item"); - records.push(record); - } - - writer.flush().expect("Error on flush"); - let bytes: Vec = writer.into_inner().unwrap(); - let reader_bis = - Reader::with_schema(&schema, &bytes[..]).expect("Can't read flushed vector"); - - let mut records_iter: Iter = records.iter(); - for r2 in reader_bis { - let record: Value = r2.expect("Error on reading"); - let original = records_iter.next().expect("Error, no next"); - if original != &record { - result = match result { - Ok(_) => Result::Err(ErrorsDesc::new( - format!("Records are not equals for folder : {folder}").as_str(), - )), - Err(e) => { - Err(e.add(format!("Records are not equals for folder : {folder}").as_str())) - } - } - } - } - } - result -} diff --git a/lang/rust/avro/tests/to_from_avro_datum_schemata.rs b/lang/rust/avro/tests/to_from_avro_datum_schemata.rs deleted file mode 100644 index e27f1e625ac..00000000000 --- a/lang/rust/avro/tests/to_from_avro_datum_schemata.rs +++ /dev/null @@ -1,88 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - from_avro_datum_schemata, to_avro_datum_schemata, types::Value, Codec, Reader, Schema, Writer, -}; -use apache_avro_test_helper::{init, TestResult}; - -static SCHEMA_A_STR: &str = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_a", "type": "float"} - ] - }"#; - -static SCHEMA_B_STR: &str = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_b", "type": "A"} - ] - }"#; - -#[test] -fn test_avro_3683_multiple_schemata_to_from_avro_datum() -> TestResult { - init(); - - let record: Value = Value::Record(vec![( - String::from("field_b"), - Value::Record(vec![(String::from("field_a"), Value::Float(1.0))]), - )]); - - let schemata: Vec = Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR])?; - let schemata: Vec<&Schema> = schemata.iter().collect(); - - // this is the Schema we want to use for write/read - let schema_b = schemata[1]; - let expected: Vec = vec![0, 0, 128, 63]; - let actual = to_avro_datum_schemata(schema_b, schemata.clone(), record.clone())?; - assert_eq!(actual, expected); - - let value = from_avro_datum_schemata(schema_b, schemata, &mut actual.as_slice(), None)?; - assert_eq!(value, record); - - Ok(()) -} - -#[test] -fn test_avro_3683_multiple_schemata_writer_reader() -> TestResult { - init(); - - let record: Value = Value::Record(vec![( - String::from("field_b"), - Value::Record(vec![(String::from("field_a"), Value::Float(1.0))]), - )]); - - let schemata: Vec = Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR])?; - let schemata: Vec<&Schema> = schemata.iter().collect(); - - // this is the Schema we want to use for write/read - let schema_b = schemata[1]; - let mut output: Vec = Vec::new(); - - let mut writer = Writer::with_schemata(schema_b, schemata.clone(), &mut output, Codec::Null); - writer.append(record.clone())?; - writer.flush()?; - - let reader = Reader::with_schemata(schema_b, schemata, output.as_slice())?; - let value = reader.into_iter().next().unwrap().unwrap(); - assert_eq!(value, record); - - Ok(()) -} diff --git a/lang/rust/avro/tests/union_schema.rs b/lang/rust/avro/tests/union_schema.rs deleted file mode 100644 index 09a1d130534..00000000000 --- a/lang/rust/avro/tests/union_schema.rs +++ /dev/null @@ -1,342 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{from_value, AvroResult, Codec, Reader, Schema, Writer}; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; - -static SCHEMA_A_STR: &str = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_a", "type": "float"} - ] - }"#; - -static SCHEMA_B_STR: &str = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_b", "type": "long"} - ] - }"#; - -static SCHEMA_C_STR: &str = r#"{ - "name": "C", - "type": "record", - "fields": [ - {"name": "field_union", "type": ["A", "B"]}, - {"name": "field_c", "type": "string"} - ] - }"#; - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -struct A { - field_a: f32, -} - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -struct B { - field_b: i64, -} - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -#[serde(untagged)] -enum UnionAB { - A(A), - B(B), -} - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -struct C { - field_union: UnionAB, - field_c: String, -} - -fn encode_decode(input: &T, schema: &Schema, schemata: &[Schema]) -> AvroResult -where - T: DeserializeOwned + Serialize, -{ - let mut encoded: Vec = Vec::new(); - let mut writer = - Writer::with_schemata(schema, schemata.iter().collect(), &mut encoded, Codec::Null); - writer.append_ser(input)?; - writer.flush()?; - - let mut reader = Reader::with_schemata(schema, schemata.iter().collect(), encoded.as_slice())?; - from_value::(&reader.next().expect("")?) -} - -#[test] -fn test_avro_3901_union_schema_round_trip_no_null() -> AvroResult<()> { - let schemata: Vec = - Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_C_STR]).expect("parsing schemata"); - - let input = C { - field_union: (UnionAB::A(A { field_a: 45.5 })), - field_c: "foo".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = C { - field_union: (UnionAB::B(B { field_b: 73 })), - field_c: "bar".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - Ok(()) -} - -static SCHEMA_D_STR: &str = r#"{ - "name": "D", - "type": "record", - "fields": [ - {"name": "field_union", "type": ["null", "A", "B"]}, - {"name": "field_d", "type": "string"} - ] - }"#; - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -#[serde(untagged)] -enum UnionNoneAB { - None, - A(A), - B(B), -} - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -struct D { - field_union: UnionNoneAB, - field_d: String, -} - -#[test] -fn test_avro_3901_union_schema_round_trip_null_at_start() -> AvroResult<()> { - let schemata: Vec = - Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_D_STR]).expect("parsing schemata"); - - let input = D { - field_union: UnionNoneAB::A(A { field_a: 54.25 }), - field_d: "fooy".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = D { - field_union: UnionNoneAB::None, - field_d: "fooyy".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = D { - field_union: UnionNoneAB::B(B { field_b: 103 }), - field_d: "foov".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - Ok(()) -} - -static SCHEMA_E_STR: &str = r#"{ - "name": "E", - "type": "record", - "fields": [ - {"name": "field_union", "type": ["A", "null", "B"]}, - {"name": "field_e", "type": "string"} - ] - }"#; - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -#[serde(untagged)] -enum UnionANoneB { - A(A), - None, - B(B), -} - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -struct E { - field_union: UnionANoneB, - field_e: String, -} - -#[test] -fn test_avro_3901_union_schema_round_trip_with_out_of_order_null() -> AvroResult<()> { - let schemata: Vec = - Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_E_STR]).expect("parsing schemata"); - - let input = E { - field_union: UnionANoneB::A(A { field_a: 23.75 }), - field_e: "barme".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = E { - field_union: UnionANoneB::None, - field_e: "barme2".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = E { - field_union: UnionANoneB::B(B { field_b: 89 }), - field_e: "barme3".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - Ok(()) -} - -static SCHEMA_F_STR: &str = r#"{ - "name": "F", - "type": "record", - "fields": [ - {"name": "field_union", "type": ["A", "B", "null"]}, - {"name": "field_f", "type": "string"} - ] - }"#; - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -#[serde(untagged)] -enum UnionABNone { - A(A), - B(B), - None, -} - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -struct F { - field_union: UnionABNone, - field_f: String, -} - -#[test] -fn test_avro_3901_union_schema_round_trip_with_end_null() -> AvroResult<()> { - let schemata: Vec = - Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_F_STR]).expect("parsing schemata"); - - let input = F { - field_union: UnionABNone::A(A { field_a: 23.75 }), - field_f: "aoe".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = F { - field_union: UnionABNone::B(B { field_b: 89 }), - field_f: "aoe3".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = F { - field_union: UnionABNone::None, - field_f: "aoee2".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - Ok(()) -} - -/* -One could make an argument that we should be able to represent a union schema of [null,A,B] as Option>. -This is a failing test to show that we *can't*. My (Simon Gittins's) feeling is that this should NOT be implemented -static SCHEMA_G_STR: &str = r#"{ - "name": "G", - "type": "record", - "fields": [ - {"name": "field_union", "type": ["null", "A", "B"]}, - {"name": "field_g", "type": "string"} - ] - }"#; - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -struct G { - field_union: Option, - field_g: String, -} - -#[test] -fn test_avro_3901_union_schema_as_optional_2() -> AvroResult<()> { - let schemata: Vec = - Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR, SCHEMA_G_STR]).expect("parsing schemata"); - - let input = G { - field_union: Some(UnionAB::A(A { field_a: 32.25 })), - field_g: "aj".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = G { - field_union: None, - field_g: "aja".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - let input = G { - field_union: Some(UnionAB::B(B { field_b: 44 })), - field_g: "aju".to_string(), - }; - let output = encode_decode(&input, &schemata[2], &schemata)?; - assert_eq!(input, output); - - Ok(()) -} -*/ -static SCHEMA_H_STR: &str = r#"{ - "name": "H", - "type": "record", - "fields": [ - {"name": "field_union", "type": ["null", "long"]}, - {"name": "field_h", "type": "string"} - ] - }"#; - -#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)] -struct H { - field_union: Option, - field_h: String, -} - -#[test] -fn test_avro_3901_union_schema_as_optional() -> AvroResult<()> { - let schemata: Vec = Schema::parse_list(&[SCHEMA_H_STR]).expect("parsing schemata"); - - let input = H { - field_union: Some(23), - field_h: "aaa".to_string(), - }; - let output = encode_decode(&input, &schemata[0], &schemata)?; - assert_eq!(input, output); - - let input = H { - field_union: None, - field_h: "bbb".to_string(), - }; - let output = encode_decode(&input, &schemata[0], &schemata)?; - assert_eq!(input, output); - - Ok(()) -} diff --git a/lang/rust/avro/tests/uuids.rs b/lang/rust/avro/tests/uuids.rs deleted file mode 100644 index bea71e2e418..00000000000 --- a/lang/rust/avro/tests/uuids.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::Uuid; -use apache_avro_test_helper::TestResult; - -#[test] -fn avro_3948_use_apache_avro_uuid() -> TestResult { - let _ = Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?; - - Ok(()) -} diff --git a/lang/rust/avro/tests/validators.rs b/lang/rust/avro/tests/validators.rs deleted file mode 100644 index fc45353a8d6..00000000000 --- a/lang/rust/avro/tests/validators.rs +++ /dev/null @@ -1,85 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - schema::Namespace, - validator::{ - set_enum_symbol_name_validator, set_record_field_name_validator, set_schema_name_validator, - set_schema_namespace_validator, EnumSymbolNameValidator, RecordFieldNameValidator, - SchemaNameValidator, SchemaNamespaceValidator, - }, - AvroResult, -}; -use apache_avro_test_helper::TestResult; - -struct CustomValidator; - -// Setup the custom validators before the schema is parsed -// because the parsing will trigger the validation and will -// setup the default validator (SpecificationValidator)! -impl SchemaNameValidator for CustomValidator { - fn validate(&self, schema_name: &str) -> AvroResult<(String, Namespace)> { - Ok((schema_name.to_string(), None)) - } -} - -impl SchemaNamespaceValidator for CustomValidator { - fn validate(&self, _ns: &str) -> AvroResult<()> { - Ok(()) - } -} - -impl EnumSymbolNameValidator for CustomValidator { - fn validate(&self, _ns: &str) -> AvroResult<()> { - Ok(()) - } -} - -impl RecordFieldNameValidator for CustomValidator { - fn validate(&self, _ns: &str) -> AvroResult<()> { - Ok(()) - } -} - -#[test] -fn avro_3900_custom_validator_with_spec_invalid_names() -> TestResult { - assert!(set_schema_name_validator(Box::new(CustomValidator)).is_ok()); - assert!(set_schema_namespace_validator(Box::new(CustomValidator)).is_ok()); - assert!(set_enum_symbol_name_validator(Box::new(CustomValidator)).is_ok()); - assert!(set_record_field_name_validator(Box::new(CustomValidator)).is_ok()); - - let invalid_schema = r#"{ - "name": "invalid-schema-name", - "namespace": "invalid-namespace", - "type": "record", - "fields": [ - { - "name": "invalid-field-name", - "type": "int" - }, - { - "type": "enum", - "name": "Test", - "symbols": ["A-B", "B-A"] - } - ] - }"#; - - apache_avro::Schema::parse_str(invalid_schema)?; - - Ok(()) -} diff --git a/lang/rust/avro_derive/Cargo.toml b/lang/rust/avro_derive/Cargo.toml deleted file mode 100644 index 6ca57bfaf15..00000000000 --- a/lang/rust/avro_derive/Cargo.toml +++ /dev/null @@ -1,49 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "apache-avro-derive" -version.workspace = true -authors.workspace = true -description = "A library for deriving Avro schemata from Rust structs and enums" -license.workspace = true -repository.workspace = true -edition.workspace = true -rust-version.workspace = true -keywords = ["avro", "data", "serialization", "derive"] -categories.workspace = true -documentation = "https://docs.rs/apache-avro-derive" -readme = "README.md" - -[lib] -proc-macro = true - -[dependencies] -darling = { default-features = false, version = "0.20.10" } -proc-macro2 = { default-features = false, version = "1.0.86" } -quote = { default-features = false, version = "1.0.37" } -serde_json = { workspace = true } -syn = { default-features = false, version = "2.0.77", features = ["full", "fold"] } - -[dev-dependencies] -apache-avro = { default-features = false, path = "../avro", features = ["derive"] } -proptest = { default-features = false, version = "1.5.0", features = ["std"] } -serde = { workspace = true } - - -[package.metadata.docs.rs] -rustdoc-args = ["--cfg", "docsrs"] diff --git a/lang/rust/avro_derive/README.md b/lang/rust/avro_derive/README.md deleted file mode 100644 index 0098d38970a..00000000000 --- a/lang/rust/avro_derive/README.md +++ /dev/null @@ -1,69 +0,0 @@ - - - -# avro_derive - -A proc-macro module for automatically deriving the avro schema for structs or enums. The macro produces the logic necessary to implement the `AvroSchema` trait for the type. - -```rust -pub trait AvroSchema { - // constructs the schema for the type - fn get_schema() -> Schema; -} -``` -## How-to use -Add the "derive" feature to your apache-avro dependency inside cargo.toml -``` -apache-avro = { version = "X.Y.Z", features = ["derive"] } -``` - -Add to your data model -```rust -#[derive(AvroSchema)] -struct Test { - a: i64, - b: String, -} -``` - - -### Example -```rust -use apache_avro::Writer; - -#[derive(Debug, Serialize, AvroSchema)] -struct Test { - a: i64, - b: String, -} -// derived schema, always valid or code fails to compile with a descriptive message -let schema = Test::get_schema(); - -let mut writer = Writer::new(&schema, Vec::new()); -let test = Test { - a: 27, - b: "foo".to_owned(), -}; -writer.append_ser(test).unwrap(); -let encoded = writer.into_inner(); -``` - -### Compatibility Notes -This module is designed to work in concert with the Serde implementation. If your use case dictates needing to manually convert to a `Value` type in order to encode then the derived schema may not be correct. diff --git a/lang/rust/avro_derive/src/lib.rs b/lang/rust/avro_derive/src/lib.rs deleted file mode 100644 index eeeaa93a48d..00000000000 --- a/lang/rust/avro_derive/src/lib.rs +++ /dev/null @@ -1,650 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use darling::FromAttributes; -use proc_macro2::{Span, TokenStream}; -use quote::quote; - -use syn::{ - parse_macro_input, spanned::Spanned, AttrStyle, Attribute, DeriveInput, Ident, Meta, Type, - TypePath, -}; - -#[derive(darling::FromAttributes)] -#[darling(attributes(avro))] -struct FieldOptions { - #[darling(default)] - doc: Option, - #[darling(default)] - default: Option, - #[darling(multiple)] - alias: Vec, - #[darling(default)] - rename: Option, - #[darling(default)] - skip: Option, -} - -#[derive(darling::FromAttributes)] -#[darling(attributes(avro))] -struct NamedTypeOptions { - #[darling(default)] - namespace: Option, - #[darling(default)] - doc: Option, - #[darling(multiple)] - alias: Vec, -} - -#[proc_macro_derive(AvroSchema, attributes(avro))] -// Templated from Serde -pub fn proc_macro_derive_avro_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let mut input = parse_macro_input!(input as DeriveInput); - derive_avro_schema(&mut input) - .unwrap_or_else(to_compile_errors) - .into() -} - -fn derive_avro_schema(input: &mut DeriveInput) -> Result> { - let named_type_options = - NamedTypeOptions::from_attributes(&input.attrs[..]).map_err(darling_to_syn)?; - let full_schema_name = vec![named_type_options.namespace, Some(input.ident.to_string())] - .into_iter() - .flatten() - .collect::>() - .join("."); - let schema_def = match &input.data { - syn::Data::Struct(s) => get_data_struct_schema_def( - &full_schema_name, - named_type_options - .doc - .or_else(|| extract_outer_doc(&input.attrs)), - named_type_options.alias, - s, - input.ident.span(), - )?, - syn::Data::Enum(e) => get_data_enum_schema_def( - &full_schema_name, - named_type_options - .doc - .or_else(|| extract_outer_doc(&input.attrs)), - named_type_options.alias, - e, - input.ident.span(), - )?, - _ => { - return Err(vec![syn::Error::new( - input.ident.span(), - "AvroSchema derive only works for structs and simple enums ", - )]) - } - }; - let ident = &input.ident; - let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl(); - Ok(quote! { - impl #impl_generics apache_avro::schema::derive::AvroSchemaComponent for #ident #ty_generics #where_clause { - fn get_schema_in_ctxt(named_schemas: &mut std::collections::HashMap, enclosing_namespace: &Option) -> apache_avro::schema::Schema { - let name = apache_avro::schema::Name::new(#full_schema_name).expect(&format!("Unable to parse schema name {}", #full_schema_name)[..]).fully_qualified_name(enclosing_namespace); - let enclosing_namespace = &name.namespace; - if named_schemas.contains_key(&name) { - apache_avro::schema::Schema::Ref{name: name.clone()} - } else { - named_schemas.insert(name.clone(), apache_avro::schema::Schema::Ref{name: name.clone()}); - #schema_def - } - } - } - }) -} - -fn get_data_struct_schema_def( - full_schema_name: &str, - record_doc: Option, - aliases: Vec, - s: &syn::DataStruct, - error_span: Span, -) -> Result> { - let mut record_field_exprs = vec![]; - match s.fields { - syn::Fields::Named(ref a) => { - let mut index: usize = 0; - for field in a.named.iter() { - let mut name = field.ident.as_ref().unwrap().to_string(); // we know everything has a name - if let Some(raw_name) = name.strip_prefix("r#") { - name = raw_name.to_string(); - } - let field_attrs = - FieldOptions::from_attributes(&field.attrs[..]).map_err(darling_to_syn)?; - let doc = - preserve_optional(field_attrs.doc.or_else(|| extract_outer_doc(&field.attrs))); - if let Some(rename) = field_attrs.rename { - name = rename - } - if let Some(true) = field_attrs.skip { - continue; - } - let default_value = match field_attrs.default { - Some(default_value) => { - let _: serde_json::Value = serde_json::from_str(&default_value[..]) - .map_err(|e| { - vec![syn::Error::new( - field.ident.span(), - format!("Invalid avro default json: \n{e}"), - )] - })?; - quote! { - Some(serde_json::from_str(#default_value).expect(format!("Invalid JSON: {:?}", #default_value).as_str())) - } - } - None => quote! { None }, - }; - let aliases = preserve_vec(field_attrs.alias); - let schema_expr = type_to_schema_expr(&field.ty)?; - let position = index; - record_field_exprs.push(quote! { - apache_avro::schema::RecordField { - name: #name.to_string(), - doc: #doc, - default: #default_value, - aliases: #aliases, - schema: #schema_expr, - order: apache_avro::schema::RecordFieldOrder::Ascending, - position: #position, - custom_attributes: Default::default(), - } - }); - index += 1; - } - } - syn::Fields::Unnamed(_) => { - return Err(vec![syn::Error::new( - error_span, - "AvroSchema derive does not work for tuple structs", - )]) - } - syn::Fields::Unit => { - return Err(vec![syn::Error::new( - error_span, - "AvroSchema derive does not work for unit structs", - )]) - } - } - let record_doc = preserve_optional(record_doc); - let record_aliases = preserve_vec(aliases); - Ok(quote! { - let schema_fields = vec![#(#record_field_exprs),*]; - let name = apache_avro::schema::Name::new(#full_schema_name).expect(&format!("Unable to parse struct name for schema {}", #full_schema_name)[..]); - let lookup: std::collections::BTreeMap = schema_fields - .iter() - .map(|field| (field.name.to_owned(), field.position)) - .collect(); - apache_avro::schema::Schema::Record(apache_avro::schema::RecordSchema { - name, - aliases: #record_aliases, - doc: #record_doc, - fields: schema_fields, - lookup, - attributes: Default::default(), - }) - }) -} - -fn get_data_enum_schema_def( - full_schema_name: &str, - doc: Option, - aliases: Vec, - e: &syn::DataEnum, - error_span: Span, -) -> Result> { - let doc = preserve_optional(doc); - let enum_aliases = preserve_vec(aliases); - if e.variants.iter().all(|v| syn::Fields::Unit == v.fields) { - let default_value = default_enum_variant(e, error_span)?; - let default = preserve_optional(default_value); - let symbols: Vec = e - .variants - .iter() - .map(|variant| variant.ident.to_string()) - .collect(); - Ok(quote! { - apache_avro::schema::Schema::Enum(apache_avro::schema::EnumSchema { - name: apache_avro::schema::Name::new(#full_schema_name).expect(&format!("Unable to parse enum name for schema {}", #full_schema_name)[..]), - aliases: #enum_aliases, - doc: #doc, - symbols: vec![#(#symbols.to_owned()),*], - default: #default, - attributes: Default::default(), - }) - }) - } else { - Err(vec![syn::Error::new( - error_span, - "AvroSchema derive does not work for enums with non unit structs", - )]) - } -} - -/// Takes in the Tokens of a type and returns the tokens of an expression with return type `Schema` -fn type_to_schema_expr(ty: &Type) -> Result> { - if let Type::Path(p) = ty { - let type_string = p.path.segments.last().unwrap().ident.to_string(); - - let schema = match &type_string[..] { - "bool" => quote! {apache_avro::schema::Schema::Boolean}, - "i8" | "i16" | "i32" | "u8" | "u16" => quote! {apache_avro::schema::Schema::Int}, - "u32" | "i64" => quote! {apache_avro::schema::Schema::Long}, - "f32" => quote! {apache_avro::schema::Schema::Float}, - "f64" => quote! {apache_avro::schema::Schema::Double}, - "String" | "str" => quote! {apache_avro::schema::Schema::String}, - "char" => { - return Err(vec![syn::Error::new_spanned( - ty, - "AvroSchema: Cannot guarantee successful deserialization of this type", - )]) - } - "u64" => { - return Err(vec![syn::Error::new_spanned( - ty, - "Cannot guarantee successful serialization of this type due to overflow concerns", - )]) - } // Can't guarantee serialization type - _ => { - // Fails when the type does not implement AvroSchemaComponent directly - // TODO check and error report with something like https://docs.rs/quote/1.0.15/quote/macro.quote_spanned.html#example - type_path_schema_expr(p) - } - }; - Ok(schema) - } else if let Type::Array(ta) = ty { - let inner_schema_expr = type_to_schema_expr(&ta.elem)?; - Ok(quote! {apache_avro::schema::Schema::array(#inner_schema_expr)}) - } else if let Type::Reference(tr) = ty { - type_to_schema_expr(&tr.elem) - } else { - Err(vec![syn::Error::new_spanned( - ty, - format!("Unable to generate schema for type: {ty:?}"), - )]) - } -} - -fn default_enum_variant( - data_enum: &syn::DataEnum, - error_span: Span, -) -> Result, Vec> { - match data_enum - .variants - .iter() - .filter(|v| v.attrs.iter().any(is_default_attr)) - .collect::>() - { - variants if variants.is_empty() => Ok(None), - single if single.len() == 1 => Ok(Some(single[0].ident.to_string())), - multiple => Err(vec![syn::Error::new( - error_span, - format!( - "Multiple defaults defined: {:?}", - multiple - .iter() - .map(|v| v.ident.to_string()) - .collect::>() - ), - )]), - } -} - -fn is_default_attr(attr: &Attribute) -> bool { - matches!(attr, Attribute { meta: Meta::Path(path), .. } if path.get_ident().map(Ident::to_string).as_deref() == Some("default")) -} - -/// Generates the schema def expression for fully qualified type paths using the associated function -/// - `A -> ::get_schema_in_ctxt()` -/// - `A -> as apache_avro::schema::derive::AvroSchemaComponent>::get_schema_in_ctxt()` -fn type_path_schema_expr(p: &TypePath) -> TokenStream { - quote! {<#p as apache_avro::schema::derive::AvroSchemaComponent>::get_schema_in_ctxt(named_schemas, enclosing_namespace)} -} - -/// Stolen from serde -fn to_compile_errors(errors: Vec) -> proc_macro2::TokenStream { - let compile_errors = errors.iter().map(syn::Error::to_compile_error); - quote!(#(#compile_errors)*) -} - -fn extract_outer_doc(attributes: &[Attribute]) -> Option { - let doc = attributes - .iter() - .filter(|attr| attr.style == AttrStyle::Outer && attr.path().is_ident("doc")) - .filter_map(|attr| { - let name_value = attr.meta.require_name_value(); - match name_value { - Ok(name_value) => match &name_value.value { - syn::Expr::Lit(expr_lit) => match expr_lit.lit { - syn::Lit::Str(ref lit_str) => Some(lit_str.value().trim().to_string()), - _ => None, - }, - _ => None, - }, - Err(_) => None, - } - }) - .collect::>() - .join("\n"); - if doc.is_empty() { - None - } else { - Some(doc) - } -} - -fn preserve_optional(op: Option) -> TokenStream { - match op { - Some(tt) => quote! {Some(#tt.into())}, - None => quote! {None}, - } -} - -fn preserve_vec(op: Vec) -> TokenStream { - let items: Vec = op.iter().map(|tt| quote! {#tt.into()}).collect(); - if items.is_empty() { - quote! {None} - } else { - quote! {Some(vec![#(#items),*])} - } -} - -fn darling_to_syn(e: darling::Error) -> Vec { - let msg = format!("{e}"); - let token_errors = e.write_errors(); - vec![syn::Error::new(token_errors.span(), msg)] -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn basic_case() { - let test_struct = quote! { - struct A { - a: i32, - b: String - } - }; - - match syn::parse2::(test_struct) { - Ok(mut input) => { - assert!(derive_avro_schema(&mut input).is_ok()) - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn tuple_struct_unsupported() { - let test_tuple_struct = quote! { - struct B (i32, String); - }; - - match syn::parse2::(test_tuple_struct) { - Ok(mut input) => { - assert!(derive_avro_schema(&mut input).is_err()) - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn unit_struct_unsupported() { - let test_tuple_struct = quote! { - struct AbsoluteUnit; - }; - - match syn::parse2::(test_tuple_struct) { - Ok(mut input) => { - assert!(derive_avro_schema(&mut input).is_err()) - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn struct_with_optional() { - let struct_with_optional = quote! { - struct Test4 { - a : Option - } - }; - match syn::parse2::(struct_with_optional) { - Ok(mut input) => { - assert!(derive_avro_schema(&mut input).is_ok()) - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn test_basic_enum() { - let basic_enum = quote! { - enum Basic { - A, - B, - C, - D - } - }; - match syn::parse2::(basic_enum) { - Ok(mut input) => { - assert!(derive_avro_schema(&mut input).is_ok()) - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn avro_3687_basic_enum_with_default() { - let basic_enum = quote! { - enum Basic { - #[default] - A, - B, - C, - D - } - }; - match syn::parse2::(basic_enum) { - Ok(mut input) => { - let derived = derive_avro_schema(&mut input); - assert!(derived.is_ok()); - assert_eq!(derived.unwrap().to_string(), quote! { - impl apache_avro::schema::derive::AvroSchemaComponent for Basic { - fn get_schema_in_ctxt( - named_schemas: &mut std::collections::HashMap< - apache_avro::schema::Name, - apache_avro::schema::Schema - >, - enclosing_namespace: &Option - ) -> apache_avro::schema::Schema { - let name = apache_avro::schema::Name::new("Basic") - .expect(&format!("Unable to parse schema name {}", "Basic")[..]) - .fully_qualified_name(enclosing_namespace); - let enclosing_namespace = &name.namespace; - if named_schemas.contains_key(&name) { - apache_avro::schema::Schema::Ref { name: name.clone() } - } else { - named_schemas.insert( - name.clone(), - apache_avro::schema::Schema::Ref { name: name.clone() } - ); - apache_avro::schema::Schema::Enum(apache_avro::schema::EnumSchema { - name: apache_avro::schema::Name::new("Basic").expect( - &format!("Unable to parse enum name for schema {}", "Basic")[..] - ), - aliases: None, - doc: None, - symbols: vec![ - "A".to_owned(), - "B".to_owned(), - "C".to_owned(), - "D".to_owned() - ], - default: Some("A".into()), - attributes: Default::default(), - }) - } - } - } - }.to_string()); - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn avro_3687_basic_enum_with_default_twice() { - let non_basic_enum = quote! { - enum Basic { - #[default] - A, - B, - #[default] - C, - D - } - }; - match syn::parse2::(non_basic_enum) { - Ok(mut input) => match derive_avro_schema(&mut input) { - Ok(_) => { - panic!("Should not be able to derive schema for enum with multiple defaults") - } - Err(errors) => { - assert_eq!(errors.len(), 1); - assert_eq!( - errors[0].to_string(), - r#"Multiple defaults defined: ["A", "C"]"# - ); - } - }, - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn test_non_basic_enum() { - let non_basic_enum = quote! { - enum Basic { - A(i32), - B, - C, - D - } - }; - match syn::parse2::(non_basic_enum) { - Ok(mut input) => { - assert!(derive_avro_schema(&mut input).is_err()) - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn test_namespace() { - let test_struct = quote! { - #[avro(namespace = "namespace.testing")] - struct A { - a: i32, - b: String - } - }; - - match syn::parse2::(test_struct) { - Ok(mut input) => { - let schema_token_stream = derive_avro_schema(&mut input); - assert!(&schema_token_stream.is_ok()); - assert!(schema_token_stream - .unwrap() - .to_string() - .contains("namespace.testing")) - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn test_reference() { - let test_reference_struct = quote! { - struct A<'a> { - a: &'a Vec, - b: &'static str - } - }; - - match syn::parse2::(test_reference_struct) { - Ok(mut input) => { - assert!(derive_avro_schema(&mut input).is_ok()) - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } - - #[test] - fn test_trait_cast() { - assert_eq!(type_path_schema_expr(&syn::parse2::(quote!{i32}).unwrap()).to_string(), quote!{::get_schema_in_ctxt(named_schemas, enclosing_namespace)}.to_string()); - assert_eq!(type_path_schema_expr(&syn::parse2::(quote!{Vec}).unwrap()).to_string(), quote!{ as apache_avro::schema::derive::AvroSchemaComponent>::get_schema_in_ctxt(named_schemas, enclosing_namespace)}.to_string()); - assert_eq!(type_path_schema_expr(&syn::parse2::(quote!{AnyType}).unwrap()).to_string(), quote!{::get_schema_in_ctxt(named_schemas, enclosing_namespace)}.to_string()); - } - - #[test] - fn test_avro_3709_record_field_attributes() { - let test_struct = quote! { - struct A { - #[avro(alias = "a1", alias = "a2", doc = "a doc", default = "123", rename = "a3")] - a: i32 - } - }; - - match syn::parse2::(test_struct) { - Ok(mut input) => { - let schema_res = derive_avro_schema(&mut input); - let expected_token_stream = r#"let schema_fields = vec ! [apache_avro :: schema :: RecordField { name : "a3" . to_string () , doc : Some ("a doc" . into ()) , default : Some (serde_json :: from_str ("123") . expect (format ! ("Invalid JSON: {:?}" , "123") . as_str ())) , aliases : Some (vec ! ["a1" . into () , "a2" . into ()]) , schema : apache_avro :: schema :: Schema :: Int , order : apache_avro :: schema :: RecordFieldOrder :: Ascending , position : 0usize , custom_attributes : Default :: default () , }] ;"#; - let schema_token_stream = schema_res.unwrap().to_string(); - assert!(schema_token_stream.contains(expected_token_stream)); - } - Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {error:?}" - ), - }; - } -} diff --git a/lang/rust/avro_derive/tests/derive.rs b/lang/rust/avro_derive/tests/derive.rs deleted file mode 100644 index ec7a96d7560..00000000000 --- a/lang/rust/avro_derive/tests/derive.rs +++ /dev/null @@ -1,1598 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use apache_avro::{ - from_value, - schema::{derive::AvroSchemaComponent, AvroSchema}, - Reader, Schema, Writer, -}; -use apache_avro_derive::*; -use proptest::prelude::*; -use serde::{de::DeserializeOwned, ser::Serialize}; -use std::collections::HashMap; - -#[macro_use] -extern crate serde; - -#[cfg(test)] -mod test_derive { - use apache_avro::schema::{Alias, EnumSchema, RecordSchema}; - use std::{borrow::Cow, sync::Mutex}; - - use super::*; - - /// Takes in a type that implements the right combination of traits and runs it through a Serde Cycle and asserts the result is the same - fn serde_assert(obj: T) - where - T: std::fmt::Debug + Serialize + DeserializeOwned + AvroSchema + Clone + PartialEq, - { - assert_eq!(obj, serde(obj.clone())); - } - - fn serde(obj: T) -> T - where - T: Serialize + DeserializeOwned + AvroSchema, - { - de(ser(obj)) - } - - fn ser(obj: T) -> Vec - where - T: Serialize + AvroSchema, - { - let schema = T::get_schema(); - let mut writer = Writer::new(&schema, Vec::new()); - if let Err(e) = writer.append_ser(obj) { - panic!("{e:?}"); - } - writer.into_inner().unwrap() - } - - fn de(encoded: Vec) -> T - where - T: DeserializeOwned + AvroSchema, - { - assert!(!encoded.is_empty()); - let schema = T::get_schema(); - let mut reader = Reader::with_schema(&schema, &encoded[..]).unwrap(); - if let Some(res) = reader.next() { - match res { - Ok(value) => { - return from_value::(&value).unwrap(); - } - Err(e) => panic!("{e:?}"), - } - } - unreachable!() - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - struct TestBasic { - a: i32, - b: String, - } - - proptest! { - #[test] - fn test_smoke_test(a: i32, b: String) { - let schema = r#" - { - "type":"record", - "name":"TestBasic", - "fields":[ - { - "name":"a", - "type":"int" - }, - { - "name":"b", - "type":"string" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestBasic::get_schema()); - let test = TestBasic { - a, - b, - }; - serde_assert(test); - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(namespace = "com.testing.namespace")] - struct TestBasicNamespace { - a: i32, - b: String, - } - - #[test] - fn test_basic_namespace() { - let schema = r#" - { - "type":"record", - "name":"com.testing.namespace.TestBasicNamespace", - "fields":[ - { - "name":"a", - "type":"int" - }, - { - "name":"b", - "type":"string" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestBasicNamespace::get_schema()); - if let Schema::Record(RecordSchema { name, .. }) = TestBasicNamespace::get_schema() { - assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()) - } else { - panic!("TestBasicNamespace schema must be a record schema") - } - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(namespace = "com.testing.complex.namespace")] - struct TestComplexNamespace { - a: TestBasicNamespace, - b: String, - } - - #[test] - fn test_complex_namespace() { - let schema = r#" - { - "type":"record", - "name":"com.testing.complex.namespace.TestComplexNamespace", - "fields":[ - { - "name":"a", - "type":{ - "type":"record", - "name":"com.testing.namespace.TestBasicNamespace", - "fields":[ - { - "name":"a", - "type":"int" - }, - { - "name":"b", - "type":"string" - } - ] - } - }, - { - "name":"b", - "type":"string" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestComplexNamespace::get_schema()); - if let Schema::Record(RecordSchema { name, fields, .. }) = - TestComplexNamespace::get_schema() - { - assert_eq!( - "com.testing.complex.namespace".to_owned(), - name.namespace.unwrap() - ); - let inner_schema = fields - .iter() - .filter(|field| field.name == "a") - .map(|field| &field.schema) - .next(); - if let Some(Schema::Record(RecordSchema { name, .. })) = inner_schema { - assert_eq!( - "com.testing.namespace".to_owned(), - name.namespace.clone().unwrap() - ) - } else { - panic!("Field 'a' must have a record schema") - } - } else { - panic!("TestComplexNamespace schema must be a record schema") - } - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct TestAllSupportedBaseTypes { - //Basics test - a: bool, - b: i8, - c: i16, - d: i32, - e: u8, - f: u16, - g: i64, - h: f32, - i: f64, - j: String, - } - - proptest! { - #[test] - fn test_basic_types(a: bool, b: i8, c: i16, d: i32, e: u8, f: u16, g: i64, h: f32, i: f64, j: String) { - let schema = r#" - { - "type":"record", - "name":"TestAllSupportedBaseTypes", - "fields":[ - { - "name":"a", - "type": "boolean" - }, - { - "name":"b", - "type":"int" - }, - { - "name":"c", - "type":"int" - }, - { - "name":"d", - "type":"int" - }, - { - "name":"e", - "type":"int" - }, - { - "name":"f", - "type":"int" - }, - { - "name":"g", - "type":"long" - }, - { - "name":"h", - "type":"float" - }, - { - "name":"i", - "type":"double" - }, - { - "name":"j", - "type":"string" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestAllSupportedBaseTypes::get_schema()); - let all_basic = TestAllSupportedBaseTypes { - a, - b, - c, - d, - e, - f, - g, - h, - i, - j, - }; - serde_assert(all_basic); - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct TestNested { - a: i32, - b: TestAllSupportedBaseTypes, - } - - proptest! { - #[test] - fn test_inner_struct(a: bool, b: i8, c: i16, d: i32, e: u8, f: u16, g: i64, h: f32, i: f64, j: String, aa: i32) { - let schema = r#" - { - "type":"record", - "name":"TestNested", - "fields":[ - { - "name":"a", - "type":"int" - }, - { - "name":"b", - "type":{ - "type":"record", - "name":"TestAllSupportedBaseTypes", - "fields":[ - { - "name":"a", - "type": "boolean" - }, - { - "name":"b", - "type":"int" - }, - { - "name":"c", - "type":"int" - }, - { - "name":"d", - "type":"int" - }, - { - "name":"e", - "type":"int" - }, - { - "name":"f", - "type":"int" - }, - { - "name":"g", - "type":"long" - }, - { - "name":"h", - "type":"float" - }, - { - "name":"i", - "type":"double" - }, - { - "name":"j", - "type":"string" - } - ] - } - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestNested::get_schema()); - let all_basic = TestAllSupportedBaseTypes { - a, - b, - c, - d, - e, - f, - g, - h, - i, - j, - }; - let inner_struct = TestNested { - a: aa, - b: all_basic, - }; - serde_assert(inner_struct); - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - struct TestOptional { - a: Option, - } - - proptest! { - #[test] - fn test_optional_field_some(a: i32) { - let schema = r#" - { - "type":"record", - "name":"TestOptional", - "fields":[ - { - "name":"a", - "type":["null","int"] - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestOptional::get_schema()); - let optional_field = TestOptional { a: Some(a) }; - serde_assert(optional_field); - }} - - #[test] - fn test_optional_field_none() { - let optional_field = TestOptional { a: None }; - serde_assert(optional_field); - } - - /// Generic Containers - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct TestGeneric { - a: String, - b: Vec, - c: HashMap, - } - - proptest! { - #[test] - fn test_generic_container_1(a: String, b: Vec, c: HashMap) { - let schema = r#" - { - "type":"record", - "name":"TestGeneric", - "fields":[ - { - "name":"a", - "type":"string" - }, - { - "name":"b", - "type": { - "type":"array", - "items":"int" - } - }, - { - "name":"c", - "type": { - "type":"map", - "values":"int" - } - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestGeneric::::get_schema()); - let test_generic = TestGeneric:: { - a, - b, - c, - }; - serde_assert(test_generic); - }} - - proptest! { - #[test] - fn test_generic_container_2(a: bool, b: i8, c: i16, d: i32, e: u8, f: u16, g: i64, h: f32, i: f64, j: String) { - let schema = r#" - { - "type":"record", - "name":"TestGeneric", - "fields":[ - { - "name":"a", - "type":"string" - }, - { - "name":"b", - "type": { - "type":"array", - "items":{ - "type":"record", - "name":"TestAllSupportedBaseTypes", - "fields":[ - { - "name":"a", - "type": "boolean" - }, - { - "name":"b", - "type":"int" - }, - { - "name":"c", - "type":"int" - }, - { - "name":"d", - "type":"int" - }, - { - "name":"e", - "type":"int" - }, - { - "name":"f", - "type":"int" - }, - { - "name":"g", - "type":"long" - }, - { - "name":"h", - "type":"float" - }, - { - "name":"i", - "type":"double" - }, - { - "name":"j", - "type":"string" - } - ] - } - } - }, - { - "name":"c", - "type": { - "type":"map", - "values":"TestAllSupportedBaseTypes" - } - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!( - schema, - TestGeneric::::get_schema() - ); - let test_generic = TestGeneric:: { - a: "testing".to_owned(), - b: vec![TestAllSupportedBaseTypes { - a, - b, - c, - d, - e, - f, - g, - h, - i, - j: j.clone(), - }], - c: vec![( - "key".to_owned(), - TestAllSupportedBaseTypes { - a, - b, - c, - d, - e, - f, - g, - h, - i, - j, - }, - )] - .into_iter() - .collect(), - }; - serde_assert(test_generic); - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - enum TestAllowedEnum { - A, - B, - C, - D, - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - struct TestAllowedEnumNested { - a: TestAllowedEnum, - b: String, - } - - #[test] - fn test_enum() { - let schema = r#" - { - "type":"record", - "name":"TestAllowedEnumNested", - "fields":[ - { - "name":"a", - "type": { - "type":"enum", - "name":"TestAllowedEnum", - "symbols":["A","B","C","D"] - } - }, - { - "name":"b", - "type":"string" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestAllowedEnumNested::get_schema()); - let enum_included = TestAllowedEnumNested { - a: TestAllowedEnum::B, - b: "hey".to_owned(), - }; - serde_assert(enum_included); - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct ConsList { - value: i32, - next: Option>, - } - - #[test] - fn test_cons() { - let schema = r#" - { - "type":"record", - "name":"ConsList", - "fields":[ - { - "name":"value", - "type":"int" - }, - { - "name":"next", - "type":["null","ConsList"] - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, ConsList::get_schema()); - let list = ConsList { - value: 34, - next: Some(Box::new(ConsList { - value: 42, - next: None, - })), - }; - serde_assert(list) - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct ConsListGeneric { - value: T, - next: Option>>, - } - - #[test] - fn test_cons_generic() { - let schema = r#" - { - "type":"record", - "name":"ConsListGeneric", - "fields":[ - { - "name":"value", - "type":{ - "type":"record", - "name":"TestAllowedEnumNested", - "fields":[ - { - "name":"a", - "type": { - "type":"enum", - "name":"TestAllowedEnum", - "symbols":["A","B","C","D"] - } - }, - { - "name":"b", - "type":"string" - } - ] - } - }, - { - "name":"next", - "type":["null","ConsListGeneric"] - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!( - schema, - ConsListGeneric::::get_schema() - ); - let list = ConsListGeneric:: { - value: TestAllowedEnumNested { - a: TestAllowedEnum::B, - b: "testing".into(), - }, - next: Some(Box::new(ConsListGeneric:: { - value: TestAllowedEnumNested { - a: TestAllowedEnum::D, - b: "testing2".into(), - }, - next: None, - })), - }; - serde_assert(list) - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - struct TestSimpleArray { - a: [i32; 4], - } - - proptest! { - #[test] - fn test_simple_array(a: [i32; 4]) { - let schema = r#" - { - "type":"record", - "name":"TestSimpleArray", - "fields":[ - { - "name":"a", - "type": { - "type":"array", - "items":"int" - } - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestSimpleArray::get_schema()); - let test = TestSimpleArray { a }; - serde_assert(test) - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct TestComplexArray { - a: [T; 2], - } - - #[test] - fn test_complex_array() { - let schema = r#" - { - "type":"record", - "name":"TestComplexArray", - "fields":[ - { - "name":"a", - "type": { - "type":"array", - "items":{ - "type":"record", - "name":"TestBasic", - "fields":[ - { - "name":"a", - "type":"int" - }, - { - "name":"b", - "type":"string" - } - ] - } - } - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestComplexArray::::get_schema()); - let test = TestComplexArray:: { - a: [ - TestBasic { - a: 27, - b: "foo".to_owned(), - }, - TestBasic { - a: 28, - b: "bar".to_owned(), - }, - ], - }; - serde_assert(test) - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - struct Testu8 { - a: Vec, - b: [u8; 2], - } - - proptest! { - #[test] - fn test_bytes_handled(a: Vec, b: [u8; 2]) { - let test = Testu8 { - a, - b, - }; - serde_assert(test) - // don't check for schema equality to allow for transitioning to bytes or fixed types in the future - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema)] - struct TestSmartPointers<'a> { - a: String, - b: Mutex>, - c: Cow<'a, i32>, - } - - #[test] - fn test_smart_pointers() { - let schema = r#" - { - "type":"record", - "name":"TestSmartPointers", - "fields":[ - { - "name":"a", - "type": "string" - }, - { - "name":"b", - "type":{ - "type":"array", - "items":"long" - } - }, - { - "name":"c", - "type":"int" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestSmartPointers::get_schema()); - let test = TestSmartPointers { - a: "hey".into(), - b: Mutex::new(vec![42]), - c: Cow::Owned(32), - }; - // test serde with manual equality for mutex - let test = serde(test); - assert_eq!("hey", test.a); - assert_eq!(vec![42], *test.b.lock().unwrap()); - assert_eq!(Cow::Owned::(32), test.c); - } - - #[derive(Debug, Serialize, AvroSchema, Clone, PartialEq)] - struct TestReference<'a> { - a: &'a Vec, - b: &'static str, - c: &'a f64, - } - - proptest! { - #[test] - fn test_reference_struct(a: Vec, c: f64) { - let schema = r#" - { - "type":"record", - "name":"TestReference", - "fields":[ - { - "name":"a", - "type": { - "type":"array", - "items":"int" - } - }, - { - "name":"b", - "type":"string" - }, - { - "name":"c", - "type":"double" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - assert_eq!(schema, TestReference::get_schema()); - // let a = vec![34]; - // let c = 4.55555555_f64; - let test = TestReference { - a: &a, - b: "testing_static", - c: &c, - }; - ser(test); - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(namespace = "com.testing.namespace", doc = "A Documented Record")] - struct TestBasicWithAttributes { - #[avro(doc = "Milliseconds since Queen released Bohemian Rhapsody")] - a: i32, - #[avro(doc = "Full lyrics of Bohemian Rhapsody")] - b: String, - } - - #[test] - fn test_basic_with_attributes() { - let schema = r#" - { - "type":"record", - "name":"com.testing.namespace.TestBasicWithAttributes", - "doc":"A Documented Record", - "fields":[ - { - "name":"a", - "type":"int", - "doc":"Milliseconds since Queen released Bohemian Rhapsody" - }, - { - "name":"b", - "type": "string", - "doc": "Full lyrics of Bohemian Rhapsody" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record(RecordSchema { name, doc, .. }) = - TestBasicWithAttributes::get_schema() - { - assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); - assert_eq!("A Documented Record", doc.unwrap()) - } else { - panic!("TestBasicWithAttributes schema must be a record schema") - } - assert_eq!(schema, TestBasicWithAttributes::get_schema()); - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(namespace = "com.testing.namespace")] - /// A Documented Record - struct TestBasicWithOuterDocAttributes { - #[avro(doc = "Milliseconds since Queen released Bohemian Rhapsody")] - a: i32, - #[avro(doc = "Full lyrics of Bohemian Rhapsody")] - b: String, - } - - #[test] - fn test_basic_with_out_doc_attributes() { - let schema = r#" - { - "type":"record", - "name":"com.testing.namespace.TestBasicWithOuterDocAttributes", - "doc":"A Documented Record", - "fields":[ - { - "name":"a", - "type":"int", - "doc":"Milliseconds since Queen released Bohemian Rhapsody" - }, - { - "name":"b", - "type": "string", - "doc": "Full lyrics of Bohemian Rhapsody" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - let derived_schema = TestBasicWithOuterDocAttributes::get_schema(); - assert_eq!(&schema, &derived_schema); - if let Schema::Record(RecordSchema { name, doc, .. }) = derived_schema { - assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); - assert_eq!("A Documented Record", doc.unwrap()) - } else { - panic!("TestBasicWithOuterDocAttributes schema must be a record schema") - } - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(namespace = "com.testing.namespace")] - /// A Documented Record - /// that spans - /// multiple lines - struct TestBasicWithLargeDoc { - #[avro(doc = "Milliseconds since Queen released Bohemian Rhapsody")] - a: i32, - #[avro(doc = "Full lyrics of Bohemian Rhapsody")] - b: String, - } - - #[test] - fn test_basic_with_large_doc() { - let schema = r#" - { - "type":"record", - "name":"com.testing.namespace.TestBasicWithLargeDoc", - "doc":"A Documented Record", - "fields":[ - { - "name":"a", - "type":"int", - "doc":"Milliseconds since Queen released Bohemian Rhapsody" - }, - { - "name":"b", - "type": "string", - "doc": "Full lyrics of Bohemian Rhapsody" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record(RecordSchema { name, doc, .. }) = TestBasicWithLargeDoc::get_schema() - { - assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); - assert_eq!( - "A Documented Record\nthat spans\nmultiple lines", - doc.unwrap() - ) - } else { - panic!("TestBasicWithLargeDoc schema must be a record schema") - } - assert_eq!(schema, TestBasicWithLargeDoc::get_schema()); - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - struct TestBasicWithBool { - a: bool, - b: Option, - } - - proptest! { - #[test] - fn avro_3634_test_basic_with_bool(a in any::(), b in any::>()) { - let schema = r#" - { - "type":"record", - "name":"TestBasicWithBool", - "fields":[ - { - "name":"a", - "type":"boolean" - }, - { - "name":"b", - "type":["null","boolean"] - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - let derived_schema = TestBasicWithBool::get_schema(); - - if let Schema::Record(RecordSchema { name, .. }) = derived_schema { - assert_eq!("TestBasicWithBool", name.fullname(None)) - } else { - panic!("TestBasicWithBool schema must be a record schema") - } - assert_eq!(schema, TestBasicWithBool::get_schema()); - - serde_assert(TestBasicWithBool { a, b }); - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - struct TestBasicWithU32 { - a: u32, - } - - proptest! { - #[test] - fn test_basic_with_u32(a in any::()) { - let schema = r#" - { - "type":"record", - "name":"TestBasicWithU32", - "fields":[ - { - "name":"a", - "type":"long" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record(RecordSchema { name, .. }) = TestBasicWithU32::get_schema() { - assert_eq!("TestBasicWithU32", name.fullname(None)) - } else { - panic!("TestBasicWithU32 schema must be a record schema") - } - assert_eq!(schema, TestBasicWithU32::get_schema()); - - serde_assert(TestBasicWithU32 { a }); - }} - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(alias = "a", alias = "b", alias = "c")] - struct TestBasicStructWithAliases { - a: i32, - } - - #[test] - fn test_basic_struct_with_aliases() { - let schema = r#" - { - "type":"record", - "name":"TestBasicStructWithAliases", - "aliases":["a", "b", "c"], - "fields":[ - { - "name":"a", - "type":"int" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record(RecordSchema { name, aliases, .. }) = - TestBasicStructWithAliases::get_schema() - { - assert_eq!("TestBasicStructWithAliases", name.fullname(None)); - assert_eq!( - Some(vec![ - Alias::new("a").unwrap(), - Alias::new("b").unwrap(), - Alias::new("c").unwrap() - ]), - aliases - ); - } else { - panic!("TestBasicStructWithAliases schema must be a record schema") - } - assert_eq!(schema, TestBasicStructWithAliases::get_schema()); - - serde_assert(TestBasicStructWithAliases { a: i32::MAX }); - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(alias = "d")] - #[avro(alias = "e")] - #[avro(alias = "f")] - struct TestBasicStructWithAliases2 { - a: i32, - } - - #[test] - fn test_basic_struct_with_aliases2() { - let schema = r#" - { - "type":"record", - "name":"TestBasicStructWithAliases2", - "aliases":["d", "e", "f"], - "fields":[ - { - "name":"a", - "type":"int" - } - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record(RecordSchema { name, aliases, .. }) = - TestBasicStructWithAliases2::get_schema() - { - assert_eq!("TestBasicStructWithAliases2", name.fullname(None)); - assert_eq!( - Some(vec![ - Alias::new("d").unwrap(), - Alias::new("e").unwrap(), - Alias::new("f").unwrap() - ]), - aliases - ); - } else { - panic!("TestBasicStructWithAliases2 schema must be a record schema") - } - assert_eq!(schema, TestBasicStructWithAliases2::get_schema()); - - serde_assert(TestBasicStructWithAliases2 { a: i32::MAX }); - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(alias = "a", alias = "b", alias = "c")] - enum TestBasicEnumWithAliases { - A, - B, - } - - #[test] - fn test_basic_enum_with_aliases() { - let schema = r#" - { - "type":"enum", - "name":"TestBasicEnumWithAliases", - "aliases":["a", "b", "c"], - "symbols":[ - "A", - "B" - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Enum(EnumSchema { name, aliases, .. }) = - TestBasicEnumWithAliases::get_schema() - { - assert_eq!("TestBasicEnumWithAliases", name.fullname(None)); - assert_eq!( - Some(vec![ - Alias::new("a").unwrap(), - Alias::new("b").unwrap(), - Alias::new("c").unwrap() - ]), - aliases - ); - } else { - panic!("TestBasicEnumWithAliases schema must be an enum schema") - } - assert_eq!(schema, TestBasicEnumWithAliases::get_schema()); - - serde_assert(TestBasicEnumWithAliases::A); - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] - #[avro(alias = "d")] - #[avro(alias = "e")] - #[avro(alias = "f")] - enum TestBasicEnumWithAliases2 { - A, - B, - } - - #[test] - fn test_basic_enum_with_aliases2() { - let schema = r#" - { - "type":"enum", - "name":"TestBasicEnumWithAliases2", - "aliases":["d", "e", "f"], - "symbols":[ - "A", - "B" - ] - } - "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Enum(EnumSchema { name, aliases, .. }) = - TestBasicEnumWithAliases2::get_schema() - { - assert_eq!("TestBasicEnumWithAliases2", name.fullname(None)); - assert_eq!( - Some(vec![ - Alias::new("d").unwrap(), - Alias::new("e").unwrap(), - Alias::new("f").unwrap() - ]), - aliases - ); - } else { - panic!("TestBasicEnumWithAliases2 schema must be an enum schema") - } - assert_eq!(schema, TestBasicEnumWithAliases2::get_schema()); - - serde_assert(TestBasicEnumWithAliases2::B); - } - - #[test] - fn test_basic_struct_with_defaults() { - #[derive(Debug, Deserialize, Serialize, AvroSchema, Clone, PartialEq, Eq)] - enum MyEnum { - Foo, - Bar, - Baz, - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct TestBasicStructWithDefaultValues { - #[avro(default = "123")] - a: i32, - #[avro(default = r#""The default value for 'b'""#)] - b: String, - #[avro(default = "true")] - condition: bool, - // no default value for 'c' - c: f64, - #[avro(default = r#"{"a": 1, "b": 2}"#)] - map: HashMap, - - #[avro(default = "[1, 2, 3]")] - array: Vec, - - #[avro(default = r#""Foo""#)] - myenum: MyEnum, - } - - let schema = r#" - { - "type":"record", - "name":"TestBasicStructWithDefaultValues", - "fields": [ - { - "name":"a", - "type":"int", - "default":123 - }, - { - "name":"b", - "type":"string", - "default": "The default value for 'b'" - }, - { - "name":"condition", - "type":"boolean", - "default":true - }, - { - "name":"c", - "type":"double" - }, - { - "name":"map", - "type":{ - "type":"map", - "values":"int" - }, - "default": { - "a": 1, - "b": 2 - } - }, - { - "name":"array", - "type":{ - "type":"array", - "items":"int" - }, - "default": [1, 2, 3] - }, - { - "name":"myenum", - "type":{ - "type":"enum", - "name":"MyEnum", - "symbols":["Foo", "Bar", "Baz"] - }, - "default":"Foo" - } - ] - } - "#; - - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record(RecordSchema { name, fields, .. }) = - TestBasicStructWithDefaultValues::get_schema() - { - assert_eq!("TestBasicStructWithDefaultValues", name.fullname(None)); - use serde_json::json; - for field in fields { - match field.name.as_str() { - "a" => assert_eq!(Some(json!(123_i32)), field.default), - "b" => assert_eq!( - Some(json!(r#"The default value for 'b'"#.to_owned())), - field.default - ), - "condition" => assert_eq!(Some(json!(true)), field.default), - "array" => assert_eq!(Some(json!([1, 2, 3])), field.default), - "map" => assert_eq!( - Some(json!({ - "a": 1, - "b": 2 - })), - field.default - ), - "c" => assert_eq!(None, field.default), - "myenum" => assert_eq!(Some(json!("Foo")), field.default), - _ => panic!("Unexpected field name"), - } - } - } else { - panic!("TestBasicStructWithDefaultValues schema must be a record schema") - } - assert_eq!(schema, TestBasicStructWithDefaultValues::get_schema()); - - serde_assert(TestBasicStructWithDefaultValues { - a: 321, - b: "A custom value for 'b'".to_owned(), - condition: false, - c: 987.654, - map: [("a".to_owned(), 1), ("b".to_owned(), 2)] - .iter() - .cloned() - .collect(), - array: vec![4, 5, 6], - myenum: MyEnum::Bar, - }); - } - - #[test] - fn avro_3633_test_basic_struct_with_skip_attribute() { - // Note: If using the skip attribute together with serialization, - // the serde's skip attribute needs also to be added - - #[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq, Eq)] - struct TestBasicStructNoSchema { - field: bool, - } - - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct TestBasicStructWithSkipAttribute { - #[avro(skip)] - #[serde(skip)] - condition: bool, - #[avro(skip = false)] - a: f64, - #[avro(skip)] - #[serde(skip)] - map: HashMap, - array: Vec, - #[avro(skip = true)] - #[serde(skip)] - mystruct: TestBasicStructNoSchema, - b: i32, - } - - let schema = r#" - { - "type":"record", - "name":"TestBasicStructWithSkipAttribute", - "fields": [ - { - "name":"a", - "type":"double" - }, - { - "name":"array", - "type":{ - "type":"array", - "items":"int" - } - }, - { - "name":"b", - "type":"int" - } - ] - } - "#; - - let schema = Schema::parse_str(schema).unwrap(); - let derived_schema = TestBasicStructWithSkipAttribute::get_schema(); - if let Schema::Record(RecordSchema { name, fields, .. }) = &derived_schema { - assert_eq!("TestBasicStructWithSkipAttribute", name.fullname(None)); - for field in fields { - match field.name.as_str() { - "condition" => panic!("Unexpected field 'condition'"), - "mystruct" => panic!("Unexpected field 'mystruct'"), - "map" => panic!("Unexpected field 'map'"), - _ => {} - } - } - } else { - panic!( - "TestBasicStructWithSkipAttribute schema must be a record schema: {derived_schema:?}" - ) - } - assert_eq!(schema, derived_schema); - - // Note: If serde's `skip` attribute is used on a field, the field's type - // needs the trait 'Default' to be implemented, since it is skipping the serialization process. - // Copied or cloned objects within 'serde_assert()' doesn't "copy" (serialize/deserialze) - // these fields, so no values are initialized here for skipped fields. - serde_assert(TestBasicStructWithSkipAttribute { - condition: bool::default(), // <- skipped - a: 987.654, - map: HashMap::default(), // <- skipped - array: vec![4, 5, 6], - mystruct: TestBasicStructNoSchema::default(), // <- skipped - b: 321, - }); - } - - #[test] - fn avro_3633_test_basic_struct_with_rename_attribute() { - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct TestBasicStructWithRenameAttribute { - #[avro(rename = "a1")] - #[serde(rename = "a1")] - a: bool, - b: i32, - #[avro(rename = "c1")] - #[serde(rename = "c1")] - c: f32, - } - - let schema = r#" - { - "type":"record", - "name":"TestBasicStructWithRenameAttribute", - "fields": [ - { - "name":"a1", - "type":"boolean" - }, - { - "name":"b", - "type":"int" - }, - { - "name":"c1", - "type":"float" - } - ] - } - "#; - - let schema = Schema::parse_str(schema).unwrap(); - let derived_schema = TestBasicStructWithRenameAttribute::get_schema(); - if let Schema::Record(RecordSchema { name, fields, .. }) = &derived_schema { - assert_eq!("TestBasicStructWithRenameAttribute", name.fullname(None)); - for field in fields { - match field.name.as_str() { - "a" => panic!("Unexpected field name 'a': must be 'a1'"), - "c" => panic!("Unexpected field name 'c': must be 'c1'"), - _ => {} - } - } - } else { - panic!( - "TestBasicStructWithRenameAttribute schema must be a record schema: {derived_schema:?}" - ) - } - assert_eq!(schema, derived_schema); - - serde_assert(TestBasicStructWithRenameAttribute { - a: true, - b: 321, - c: 987.654, - }); - } - - #[test] - fn test_avro_3663_raw_identifier_field_name() { - #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq)] - struct TestRawIdent { - r#type: bool, - } - - let derived_schema = TestRawIdent::get_schema(); - if let Schema::Record(RecordSchema { fields, .. }) = derived_schema { - let field = fields.first().expect("TestRawIdent must contain a field"); - assert_eq!(field.name, "type"); - } else { - panic!("Unexpected schema type for {derived_schema:?}") - } - } - - #[test] - fn avro_3962_fields_documentation() { - /// Foo docs - #[derive(AvroSchema)] - #[allow(dead_code)] - struct Foo { - /// a's Rustdoc - a: i32, - /// b's Rustdoc - #[avro(doc = "attribute doc has priority over Rustdoc")] - b: i32, - } - - if let Schema::Record(RecordSchema { fields, .. }) = Foo::get_schema() { - assert_eq!(fields[0].doc, Some("a's Rustdoc".to_string())); - assert_eq!( - fields[1].doc, - Some("attribute doc has priority over Rustdoc".to_string()) - ); - } else { - panic!("Unexpected schema type for Foo") - } - } -} diff --git a/lang/rust/avro_test_helper/Cargo.toml b/lang/rust/avro_test_helper/Cargo.toml deleted file mode 100644 index 4bb01603ea8..00000000000 --- a/lang/rust/avro_test_helper/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "apache-avro-test-helper" -version.workspace = true -edition.workspace = true -rust-version.workspace = true -description = "Apache Avro tests helper." -authors.workspace = true -license.workspace = true -repository.workspace = true -keywords = ["avro", "data", "serialization", "test"] -categories.workspace = true -documentation = "https://docs.rs/apache-avro-test-helper" -readme = "README.md" - - -[dependencies] -anyhow = { default-features = false, version = "1.0.89", features = ["std"] } -better-panic = { default-features = false, version = "0.3.0" } -ctor = { default-features = false, version = "0.2.8" } -env_logger = { default-features = false, version = "0.11.5" } -log = { workspace = true } diff --git a/lang/rust/avro_test_helper/README.md b/lang/rust/avro_test_helper/README.md deleted file mode 100644 index 924516922d4..00000000000 --- a/lang/rust/avro_test_helper/README.md +++ /dev/null @@ -1,51 +0,0 @@ - - - -# Avro Test Helper - -A module that provides several test related goodies to the other Avro crates: - -### Custom Logger - -The logger: - -* collects the logged messages, so that a test could assert what has been logged -* delegates to env_logger so that they printed on the stderr - -### Colorized Backtraces - -Uses `color-backtrace` to make the backtraces easier to read. - -# Setup - -### Unit tests - -The module is automatically setup for all unit tests when this crate is listed as a `[dev-dependency]` in Cargo.toml. - -### Integration tests - -Since integration tests are actually crates without Cargo.toml, the test author needs to call `test_logger::init()` in the beginning of a test. - -# Usage - -To assert that a given message was logged, use the `assert_logged` function. -```rust -apache_avro_test_helper::logger::assert_logged("An expected message"); -``` diff --git a/lang/rust/avro_test_helper/src/data.rs b/lang/rust/avro_test_helper/src/data.rs deleted file mode 100644 index 662df23d3f9..00000000000 --- a/lang/rust/avro_test_helper/src/data.rs +++ /dev/null @@ -1,636 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Provides a set of Avro schema examples that are used in the tests. - -use std::sync::OnceLock; - -pub const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[ - (r#""null""#, true), - (r#"{"type": "null"}"#, true), - (r#""boolean""#, true), - (r#"{"type": "boolean"}"#, true), - (r#""string""#, true), - (r#"{"type": "string"}"#, true), - (r#""bytes""#, true), - (r#"{"type": "bytes"}"#, true), - (r#""int""#, true), - (r#"{"type": "int"}"#, true), - (r#""long""#, true), - (r#"{"type": "long"}"#, true), - (r#""float""#, true), - (r#"{"type": "float"}"#, true), - (r#""double""#, true), - (r#"{"type": "double"}"#, true), - (r#""true""#, false), - (r#"true"#, false), - (r#"{"no_type": "test"}"#, false), - (r#"{"type": "panther"}"#, false), -]; - -pub const FIXED_EXAMPLES: &[(&str, bool)] = &[ - (r#"{"type": "fixed", "name": "Test", "size": 1}"#, true), - ( - r#"{ - "type": "fixed", - "name": "MyFixed", - "namespace": "org.apache.hadoop.avro", - "size": 1 - }"#, - true, - ), - (r#"{"type": "fixed", "name": "MissingSize"}"#, false), - (r#"{"type": "fixed", "size": 314}"#, false), -]; - -pub const ENUM_EXAMPLES: &[(&str, bool)] = &[ - ( - r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"]}"#, - true, - ), - ( - r#"{ - "type": "enum", - "name": "Status", - "symbols": "Normal Caution Critical" - }"#, - false, - ), - ( - r#"{ - "type": "enum", - "name": [ 0, 1, 1, 2, 3, 5, 8 ], - "symbols": ["Golden", "Mean"] - }"#, - false, - ), - ( - r#"{ - "type": "enum", - "symbols" : ["I", "will", "fail", "no", "name"] - }"#, - false, - ), - ( - r#"{ - "type": "enum", - "name": "Test" - "symbols" : ["AA", "AA"] - }"#, - false, - ), -]; - -pub const ARRAY_EXAMPLES: &[(&str, bool)] = &[ - (r#"{"type": "array", "items": "long"}"#, true), - ( - r#"{ - "type": "array", - "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]} - }"#, - true, - ), -]; - -pub const MAP_EXAMPLES: &[(&str, bool)] = &[ - (r#"{"type": "map", "values": "long"}"#, true), - ( - r#"{ - "type": "map", - "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]} - }"#, - true, - ), -]; - -pub const UNION_EXAMPLES: &[(&str, bool)] = &[ - (r#"["string", "null", "long"]"#, true), - (r#"["null", "null"]"#, false), - (r#"["long", "long"]"#, false), - ( - r#"[ - {"type": "array", "items": "long"} - {"type": "array", "items": "string"} - ]"#, - false, - ), - // Unions with default values - ( - r#"{"name": "foo", "type": ["string", "long"], "default": "bar"}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["long", "string"], "default": 1}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["null", "string"], "default": null}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["string", "null"], "default": null}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#, - true, - ), - ( - r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#, - true, - ), -]; - -pub const RECORD_EXAMPLES: &[(&str, bool)] = &[ - ( - r#"{ - "type": "record", - "name": "Test", - "fields": [{"name": "f", "type": "long"}] - }"#, - true, - ), - ( - r#"{ - "type": "error", - "name": "Test", - "fields": [{"name": "f", "type": "long"}] - }"#, - false, - ), - ( - r#"{ - "type": "record", - "name": "Node", - "fields": [ - {"name": "label", "type": "string"}, - {"name": "children", "type": {"type": "array", "items": "Node"}} - ] - }"#, - true, - ), - ( - r#"{ - "type": "record", - "name": "Lisp", - "fields": [ - { - "name": "value", - "type": [ - "null", "string", - { - "type": "record", - "name": "Cons", - "fields": [ - {"name": "car", "type": "Lisp"}, - {"name": "cdr", "type": "Lisp"} - ] - } - ] - } - ] - }"#, - true, - ), - ( - r#"{ - "type": "record", - "name": "HandshakeRequest", - "namespace": "org.apache.avro.ipc", - "fields": [ - {"name": "clientHash", "type": {"type": "fixed", "name": "MD5", "size": 16}}, - {"name": "clientProtocol", "type": ["null", "string"]}, - {"name": "serverHash", "type": "MD5"}, - {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} - ] - }"#, - true, - ), - ( - r#"{ - "type":"record", - "name":"HandshakeResponse", - "namespace":"org.apache.avro.ipc", - "fields":[ - { - "name":"match", - "type":{ - "type":"enum", - "name":"HandshakeMatch", - "symbols":["BOTH", "CLIENT", "NONE"] - } - }, - {"name":"serverProtocol", "type":["null", "string"]}, - { - "name":"serverHash", - "type":["null", {"name":"MD5", "size":16, "type":"fixed"}] - }, - { - "name":"meta", - "type":["null", {"type":"map", "values":"bytes"}] - } - ] - }"#, - true, - ), - ( - r#"{ - "type":"record", - "name":"HandshakeResponse", - "namespace":"org.apache.avro.ipc", - "fields":[ - { - "name":"match", - "type":{ - "type":"enum", - "name":"HandshakeMatch", - "symbols":["BOTH", "CLIENT", "NONE"] - } - }, - {"name":"serverProtocol", "type":["null", "string"]}, - { - "name":"serverHash", - "type":["null", { "name":"MD5", "size":16, "type":"fixed"}] - }, - {"name":"meta", "type":["null", { "type":"map", "values":"bytes"}]} - ] - }"#, - true, - ), - // Unions may not contain more than one schema with the same type, except for the named - // types record, fixed and enum. For example, unions containing two array types or two map - // types are not permitted, but two types with different names are permitted. - // (Names permit efficient resolution when reading and writing unions.) - ( - r#"{ - "type": "record", - "name": "ipAddr", - "fields": [ - { - "name": "addr", - "type": [ - {"name": "IPv6", "type": "fixed", "size": 16}, - {"name": "IPv4", "type": "fixed", "size": 4} - ] - } - ] - }"#, - true, - ), - ( - r#"{ - "type": "record", - "name": "Address", - "fields": [ - {"type": "string"}, - {"type": "string", "name": "City"} - ] - }"#, - false, - ), - ( - r#"{ - "type": "record", - "name": "Event", - "fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}] - }"#, - false, - ), - ( - r#"{ - "type": "record", - "fields": "His vision, from the constantly passing bars," - "name", - "Rainer" - }"#, - false, - ), - ( - r#"{ - "name": ["Tom", "Jerry"], - "type": "record", - "fields": [{"name": "name", "type": "string"}] - }"#, - false, - ), -]; - -pub const DOC_EXAMPLES: &[(&str, bool)] = &[ - ( - r#"{ - "type": "record", - "name": "TestDoc", - "doc": "Doc string", - "fields": [{"name": "name", "type": "string", "doc" : "Doc String"}] - }"#, - true, - ), - ( - r#"{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}"#, - true, - ), - ( - r#"{"type": "fixed", "name": "Test", "size": 1, "doc": "Fixed Doc String"}"#, - true, - ), -]; - -pub const OTHER_ATTRIBUTES_EXAMPLES: &[(&str, bool)] = &[ - ( - r#"{ - "type": "record", - "name": "TestRecord", - "cp_string": "string", - "cp_int": 1, - "cp_array": [ 1, 2, 3, 4], - "fields": [ - {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2}}, - {"name": "f2", "type": "long", "cp_null": null} - ] - }"#, - true, - ), - ( - r#"{"type": "map", "values": "long", "cp_boolean": true}"#, - true, - ), - ( - r#"{ - "type": "enum", - "name": "TestEnum", - "symbols": [ "one", "two", "three" ], - "cp_float" : 1.0 - }"#, - true, - ), - (r#"{"type": "long", "date": "true"}"#, true), -]; - -pub const DECIMAL_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{ - "type": { - "type": "fixed", - "name": "TestDecimal", - "size": 10 - }, - "logicalType": "decimal", - "precision": 4, - "scale": 2 - }"#, - true, - ), - ( - r#"{ - "type": { - "type": "fixed", - "name": "ScaleIsImplicitlyZero", - "size": 10 - }, - "logicalType": "decimal", - "precision": 4 - }"#, - true, - ), - ( - r#"{ - "type": { - "type": "fixed", - "name": "PrecisionMustBeGreaterThanZero", - "size": 10 - }, - "logicalType": "decimal", - "precision": 0 - }"#, - true, - ), - ( - r#"{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 10, - "scale": 2, - "size": 18 - }"#, - true, - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": 4, - "scale": 2 - }"#, - true, - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": 2, - "scale": -2 - }"#, - true, - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": -2, - "scale": 2 - }"#, - true, - ), - ( - r#"{ - "type": "bytes", - "logicalType": "decimal", - "precision": 2, - "scale": 3 - }"#, - true, - ), - ( - r#"{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": -10, - "scale": 2, - "size": 5 - }"#, - true, - ), - ( - r#"{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 2, - "scale": 3, - "size": 2 - }"#, - true, - ), - ( - r#"{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 2, - "scale": 2, - "size": -2 - }"#, - false, - ), -]; - -pub const DATE_LOGICAL_TYPE: &[(&str, bool)] = &[ - (r#"{"type": "int", "logicalType": "date"}"#, true), - // this is valid even though its logical type is "date1", because unknown logical types are - // ignored - (r#"{"type": "int", "logicalType": "date1"}"#, true), - // this is still valid because unknown logicalType should be ignored - (r#"{"type": "long", "logicalType": "date"}"#, true), -]; - -pub const TIMEMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ - (r#"{"type": "int", "logicalType": "time-millis"}"#, true), - // this is valid even though its logical type is "time-milis" (missing the second "l"), - // because unknown logical types are ignored - (r#"{"type": "int", "logicalType": "time-milis"}"#, true), - // this is still valid because unknown logicalType should be ignored - (r#"{"type": "long", "logicalType": "time-millis"}"#, true), -]; - -pub const TIMEMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ - (r#"{"type": "long", "logicalType": "time-micros"}"#, true), - // this is valid even though its logical type is "time-micro" (missing the last "s"), because - // unknown logical types are ignored - (r#"{"type": "long", "logicalType": "time-micro"}"#, true), - // this is still valid because unknown logicalType should be ignored - (r#"{"type": "int", "logicalType": "time-micros"}"#, true), -]; - -pub const TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{"type": "long", "logicalType": "timestamp-millis"}"#, - true, - ), - // this is valid even though its logical type is "timestamp-milis" (missing the second "l"), because - // unknown logical types are ignored - ( - r#"{"type": "long", "logicalType": "timestamp-milis"}"#, - true, - ), - ( - // this is still valid because unknown logicalType should be ignored - r#"{"type": "int", "logicalType": "timestamp-millis"}"#, - true, - ), -]; - -pub const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{"type": "long", "logicalType": "timestamp-micros"}"#, - true, - ), - // this is valid even though its logical type is "timestamp-micro" (missing the last "s"), because - // unknown logical types are ignored - ( - r#"{"type": "long", "logicalType": "timestamp-micro"}"#, - true, - ), - ( - // this is still valid because unknown logicalType should be ignored - r#"{"type": "int", "logicalType": "timestamp-micros"}"#, - true, - ), -]; - -pub const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#, - true, - ), - // this is valid even though its logical type is "local-timestamp-milis" (missing the second "l"), because - // unknown logical types are ignored - ( - r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#, - true, - ), - ( - // this is still valid because unknown logicalType should be ignored - r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#, - true, - ), -]; - -pub const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ - ( - r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#, - true, - ), - // this is valid even though its logical type is "local-timestamp-micro" (missing the last "s"), because - // unknown logical types are ignored - ( - r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#, - true, - ), - ( - // this is still valid because unknown logicalType should be ignored - r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#, - true, - ), -]; - -pub fn examples() -> &'static Vec<(&'static str, bool)> { - static EXAMPLES_ONCE: OnceLock> = OnceLock::new(); - EXAMPLES_ONCE.get_or_init(|| { - Vec::new() - .iter() - .copied() - .chain(PRIMITIVE_EXAMPLES.iter().copied()) - .chain(FIXED_EXAMPLES.iter().copied()) - .chain(ENUM_EXAMPLES.iter().copied()) - .chain(ARRAY_EXAMPLES.iter().copied()) - .chain(MAP_EXAMPLES.iter().copied()) - .chain(UNION_EXAMPLES.iter().copied()) - .chain(RECORD_EXAMPLES.iter().copied()) - .chain(DOC_EXAMPLES.iter().copied()) - .chain(OTHER_ATTRIBUTES_EXAMPLES.iter().copied()) - .chain(DECIMAL_LOGICAL_TYPE.iter().copied()) - .chain(DATE_LOGICAL_TYPE.iter().copied()) - .chain(TIMEMILLIS_LOGICAL_TYPE.iter().copied()) - .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied()) - .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) - .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) - .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) - .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) - .collect() - }) -} - -pub fn valid_examples() -> &'static Vec<(&'static str, bool)> { - static VALID_EXAMPLES_ONCE: OnceLock> = OnceLock::new(); - VALID_EXAMPLES_ONCE.get_or_init(|| examples().iter().copied().filter(|s| s.1).collect()) -} diff --git a/lang/rust/avro_test_helper/src/lib.rs b/lang/rust/avro_test_helper/src/lib.rs deleted file mode 100644 index f9fd05030b6..00000000000 --- a/lang/rust/avro_test_helper/src/lib.rs +++ /dev/null @@ -1,71 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#[cfg(not(target_arch = "wasm32"))] -use ctor::{ctor, dtor}; -use std::cell::RefCell; - -thread_local! { - // The unit tests run in parallel - // We need to keep the log messages in a thread-local variable - // and clear them after assertion - pub(crate) static LOG_MESSAGES: RefCell> = const { RefCell::new(Vec::new()) }; -} - -pub mod data; -pub mod logger; - -#[cfg(not(target_arch = "wasm32"))] -#[ctor] -fn before_all() { - // better stacktraces in tests - better_panic::Settings::new() - .most_recent_first(true) - .lineno_suffix(false) - .backtrace_first(true) - .install(); - - // enable logging in tests - logger::install(); -} - -#[cfg(not(target_arch = "wasm32"))] -#[dtor] -fn after_all() { - logger::clear_log_messages(); -} - -/// A custom error type for tests. -#[derive(Debug)] -pub enum TestError {} - -/// A converter of any error into [TestError]. -/// It is used to print better error messages in the tests. -/// Borrowed from -impl From for TestError { - #[track_caller] - fn from(err: Err) -> Self { - panic!("{}: {}", std::any::type_name::(), err); - } -} - -pub type TestResult = anyhow::Result<(), TestError>; - -/// Does nothing. Just loads the crate. -/// Should be used in the integration tests, because they do not use [dev-dependencies] -/// and do not auto-load this crate. -pub fn init() {} diff --git a/lang/rust/avro_test_helper/src/logger.rs b/lang/rust/avro_test_helper/src/logger.rs deleted file mode 100644 index 8617358959a..00000000000 --- a/lang/rust/avro_test_helper/src/logger.rs +++ /dev/null @@ -1,96 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::LOG_MESSAGES; -use log::{LevelFilter, Log, Metadata}; -use std::sync::OnceLock; - -struct TestLogger { - delegate: env_logger::Logger, -} - -impl Log for TestLogger { - #[inline] - fn enabled(&self, _metadata: &Metadata) -> bool { - true - } - - fn log(&self, record: &log::Record) { - if self.enabled(record.metadata()) { - LOG_MESSAGES.with(|msgs| msgs.borrow_mut().push(format!("{}", record.args()))); - - self.delegate.log(record); - } - } - - fn flush(&self) {} -} - -fn test_logger() -> &'static TestLogger { - // Lazy static because the Logger has to be 'static - static TEST_LOGGER_ONCE: OnceLock = OnceLock::new(); - TEST_LOGGER_ONCE.get_or_init(|| TestLogger { - delegate: env_logger::Builder::from_default_env() - .filter_level(LevelFilter::Off) - .parse_default_env() - .build(), - }) -} - -pub fn clear_log_messages() { - LOG_MESSAGES.with(|msgs| match msgs.try_borrow_mut() { - Ok(mut log_messages) => log_messages.clear(), - Err(err) => panic!("Failed to clear log messages: {err:?}"), - }); -} - -pub fn assert_not_logged(unexpected_message: &str) { - LOG_MESSAGES.with(|msgs| match msgs.borrow().last() { - Some(last_log) if last_log == unexpected_message => { - panic!("The following log message should not have been logged: '{unexpected_message}'") - } - _ => (), - }); -} - -pub fn assert_logged(expected_message: &str) { - let mut deleted = false; - LOG_MESSAGES.with(|msgs| { - msgs.borrow_mut().retain(|msg| { - if msg == expected_message { - deleted = true; - false - } else { - true - } - }) - }); - - if !deleted { - panic!("Expected log message has not been logged: '{expected_message}'"); - } -} - -#[cfg(not(target_arch = "wasm32"))] -pub(crate) fn install() { - log::set_logger(test_logger()) - .map(|_| log::set_max_level(LevelFilter::Trace)) - .map_err(|err| { - eprintln!("Failed to set the custom logger: {err:?}"); - }) - .unwrap(); -} diff --git a/lang/rust/build.sh b/lang/rust/build.sh deleted file mode 100755 index 90691bee240..00000000000 --- a/lang/rust/build.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e # exit on error - -build_dir="../../build/rust" -dist_dir="../../dist/rust" - -function clean { - rm -rf $build_dir - rm -rf $dist_dir -} - - -function prepare_build { - clean - mkdir -p $build_dir -} - -cd $(dirname "$0") - -for target in "$@" -do - case "$target" in - clean) - cargo clean - ;; - lint) - cargo clippy --all-targets --all-features -- -Dclippy::all - ;; - test) - cargo test - ;; - dist) - mkdir -p ${dist_dir} - cargo build --release --lib --all-features --workspace - git archive --output=apache-avro.tgz HEAD - mv apache-avro.tgz ${dist_dir}/ - ;; - interop-data-generate) - prepare_build - RUST_LOG=apache_avro=debug RUST_BACKTRACE=1 cargo run --features snappy,zstandard,bzip,xz --example generate_interop_data - ;; - interop-data-test) - prepare_build - echo "Running interop data tests" - RUST_LOG=apache_avro=debug RUST_BACKTRACE=1 cargo run --features snappy,zstandard,bzip,xz --example test_interop_data - echo -e "\nRunning single object encoding interop data tests" - RUST_LOG=apache_avro=debug RUST_BACKTRACE=1 cargo run --example test_interop_single_object_encoding - ;; - *) - echo "Usage: $0 {lint|test|dist|clean|interop-data-generate|interop-data-test}" >&2 - exit 1 - esac -done diff --git a/lang/rust/deny.toml b/lang/rust/deny.toml deleted file mode 100644 index 95516caaa66..00000000000 --- a/lang/rust/deny.toml +++ /dev/null @@ -1,124 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Note that all fields that take a lint level have these possible values: -# * deny - An error will be produced and the check will fail -# * warn - A warning will be produced, but the check will not fail -# * allow - No warning or error will be produced, though in some cases a note -# will be - -# This section is considered when running `cargo deny check advisories` -# More documentation for the advisories section can be found here: -# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html -[advisories] -# The lint level for crates that have been yanked from their source registry -yanked = "warn" -# A list of advisory IDs to ignore. Note that ignored advisories will still -# output a note when they are encountered. -# -# e.g. "RUSTSEC-0000-0000", -ignore = [ -] - -# This section is considered when running `cargo deny check licenses` -# More documentation for the licenses section can be found here: -# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html -[licenses] -# List of explicitly allowed licenses -# See https://spdx.org/licenses/ for list of possible licenses -# [possible values: any SPDX 3.11 short identifier (+ optional exception)]. -allow = [ - "MIT", - "Apache-2.0", - "Unicode-DFS-2016", -] -# The confidence threshold for detecting a license from license text. -# The higher the value, the more closely the license text must be to the -# canonical license text of a valid SPDX license file. -# [possible values: any between 0.0 and 1.0]. -confidence-threshold = 0.8 -# Allow 1 or more licenses on a per-crate basis, so that particular licenses -# aren't accepted for every possible crate as with the normal allow list -exceptions = [ - # Each entry is the crate and version constraint, and its specific allow - # list - { allow = ["Zlib"], name = "adler32", version = "*" }, -] - -[licenses.private] -# If true, ignores workspace crates that aren't published, or are only -# published to private registries. -# To see how to mark a crate as unpublished (to the official registry), -# visit https://doc.rust-lang.org/cargo/reference/manifest.html#the-publish-field. -ignore = true - -# This section is considered when running `cargo deny check bans`. -# More documentation about the 'bans' section can be found here: -# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html -[bans] -# Lint level for when multiple versions of the same crate are detected -multiple-versions = "warn" -# Lint level for when a crate version requirement is `*` -wildcards = "warn" -# The graph highlighting used when creating dotgraphs for crates -# with multiple versions -# * lowest-version - The path to the lowest versioned duplicate is highlighted -# * simplest-path - The path to the version with the fewest edges is highlighted -# * all - Both lowest-version and simplest-path are used -highlight = "all" -# The default lint level for `default` features for crates that are members of -# the workspace that is being checked. This can be overridden by allowing/denying -# `default` on a crate-by-crate basis if desired. -workspace-default-features = "allow" -# The default lint level for `default` features for external crates that are not -# members of the workspace. This can be overridden by allowing/denying `default` -# on a crate-by-crate basis if desired. -external-default-features = "allow" -# List of crates that are allowed. Use with care! -allow = [ - #{ name = "ansi_term", version = "=0.11.0" }, -] -# List of crates to deny -deny = [ - # Each entry the name of a crate and a version range. If version is - # not specified, all versions will be matched. - #{ name = "ansi_term", version = "=0.11.0" }, - # - # Wrapper crates can optionally be specified to allow the crate when it - # is a direct dependency of the otherwise banned crate - #{ name = "ansi_term", version = "=0.11.0", wrappers = [] }, -] - -# This section is considered when running `cargo deny check sources`. -# More documentation about the 'sources' section can be found here: -# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html -[sources] -# Lint level for what to happen when a crate from a crate registry that is not -# in the allow list is encountered -unknown-registry = "deny" -# Lint level for what to happen when a crate from a git repository that is not -# in the allow list is encountered -unknown-git = "deny" -# List of URLs for allowed crate registries. Defaults to the crates.io index -# if not specified. If it is specified but empty, no registries are allowed. -allow-registry = ["https://github.com/rust-lang/crates.io-index"] -# List of URLs for allowed Git repositories -allow-git = [] - -[sources.allow-org] -# 1 or more github.com organizations to allow git sources for -github = [] diff --git a/lang/rust/fuzz/.gitignore b/lang/rust/fuzz/.gitignore deleted file mode 100644 index 1a45eee7760..00000000000 --- a/lang/rust/fuzz/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -target -corpus -artifacts -coverage diff --git a/lang/rust/fuzz/Cargo.toml b/lang/rust/fuzz/Cargo.toml deleted file mode 100644 index a80f3b9fa1d..00000000000 --- a/lang/rust/fuzz/Cargo.toml +++ /dev/null @@ -1,49 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "apache-avro-fuzz" -version = "0.0.0" -publish = false -edition = "2021" -rust-version = "1.65.0" - -[package.metadata] -cargo-fuzz = true - -[dependencies] -libfuzzer-sys = "0.4" -serde = { version = "1", features = ["derive"] } - -[dependencies.apache-avro] -path = "../avro" -features = ["derive"] - -[workspace] -members = ["."] - -[features] -debug = [] - -[[bin]] -name = "roundtrip" -path = "fuzz_targets/roundtrip.rs" -test = false -doc = false - -[profile.release] -debug = true diff --git a/lang/rust/fuzz/corpus/roundtrip/infinite_iteration b/lang/rust/fuzz/corpus/roundtrip/infinite_iteration deleted file mode 100644 index d6606806d3381d201ae80fee9a5335ff33aa8772..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 132 zcmeZI%3@?<;^P7W##lZ+MLrZDmRMGluUDL$k(!&x#TCuT70boN$H#5N$HxQ&e6c{x W$8E~T#DEL@|Bos25D933ECv8-02WFB diff --git a/lang/rust/fuzz/corpus/roundtrip/negate_with_overflow b/lang/rust/fuzz/corpus/roundtrip/negate_with_overflow deleted file mode 100644 index 698f2b56bd10966ba46e5f4110a54dcc23b169f7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 17 RcmeZI%3}Nv1O7}5NdROA3daBd diff --git a/lang/rust/fuzz/corpus/roundtrip/unchecked_resize b/lang/rust/fuzz/corpus/roundtrip/unchecked_resize deleted file mode 100644 index 82a5f947b780aca748b40e482deb34761ddd43fc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 58 zcmeZI%3@>^ODrqO*DFrWNX<>;iss^tRbb!)0tOa;pe&zQqCQNPfgzre0SNy8|DOl| Ds|pWp diff --git a/lang/rust/fuzz/fuzz_targets/roundtrip.rs b/lang/rust/fuzz/fuzz_targets/roundtrip.rs deleted file mode 100644 index 14bc59c8c2e..00000000000 --- a/lang/rust/fuzz/fuzz_targets/roundtrip.rs +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#![no_main] -use libfuzzer_sys::fuzz_target; - -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -enum PlainEnum { - A, - B, - C, - D, -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -enum Enum { - A(u8), - B(()), - C(Vec), - D(i128), -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -enum FloatEnum { - A(Enum), - E(Option), -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -struct Struct { - _a: (), - _b: u8, - _c: Vec, - _d: (u128, i8, (), PlainEnum, String), -} - -#[derive(Debug, Serialize, Deserialize, PartialEq)] -struct FloatStruct { - _a: Struct, - _b: f64, -} - -macro_rules! round_trip { - ($ty:ty, $data:ident, $equality:expr) => {{ - #[cfg(feature = "debug")] - println!("roundtripping {}", stringify!($ty)); - - use ::apache_avro::{from_value, Reader}; - - let reader = match Reader::new($data) { - Ok(r) => r, - _ => return, - }; - - for value in reader { - let value = match value { - Ok(v) => v, - _ => continue, - }; - #[cfg(feature = "debug")] - println!("value {:?}", &value); - - let _: Result<$ty, _> = from_value(&value); - } - }}; -} - -macro_rules! from_bytes { - ($ty:ty, $data:ident, $equality:expr) => {{ - round_trip!($ty, $data, $equality); - round_trip!(Vec<$ty>, $data, $equality); - round_trip!(Option<$ty>, $data, $equality); - }}; -} - -fuzz_target!(|data: &[u8]| { - // limit avro memory usage - apache_avro::max_allocation_bytes(2 * 1024 * 1024); // 2 MB - - from_bytes!(bool, data, true); - from_bytes!(i8, data, true); - from_bytes!(i16, data, true); - from_bytes!(i32, data, true); - from_bytes!(i64, data, true); - from_bytes!(i128, data, true); - from_bytes!(u8, data, true); - from_bytes!(u16, data, true); - from_bytes!(u32, data, true); - from_bytes!(u64, data, true); - from_bytes!(u128, data, true); - from_bytes!(f32, data, false); - from_bytes!(f64, data, false); - from_bytes!(char, data, true); - from_bytes!(&str, data, true); - from_bytes!((), data, true); - from_bytes!(PlainEnum, data, true); - from_bytes!(Enum, data, true); - from_bytes!(FloatEnum, data, false); - from_bytes!(Struct, data, true); - from_bytes!(FloatStruct, data, false); -}); diff --git a/lang/rust/migration_guide.md b/lang/rust/migration_guide.md deleted file mode 100644 index 5c30b183fc8..00000000000 --- a/lang/rust/migration_guide.md +++ /dev/null @@ -1,107 +0,0 @@ - - -# Migration Guide -## Unreleased -All changes are backward-compatible so far. - -# 0.13.0 -All changes are backward compatible. - -# 0.12.0 -All changes are backward compatible. - -## 0.11.0 -- A custom `Error` enum has been introduced to replace all existing errors and - the `failure` crate has been replaced by `thiserror`. - - This means that all public functions returning `Result` - will now return `Result` and that you can pattern match on - `Error` variants if you want to gather more information about the error. - - For example, code that used to be like this: - ```rust - match decoded { - Ok(msg) => Ok(msg.to_string()), - // assuming you were reading a Duration - Err(ref e) => match e.downcast_ref::() { - Some(_) => Ok("default".to_string()), - None => Err(format!("Unexpected error: {}", e)), - }, - } - ``` - now becomes: - ```rust - match decoded { - Ok(msg) => Ok(msg.to_string()), - Err(Error::ReadDuration(_)) => Ok("default".to_string()), - Err(e) => Err(format!("Unexpected error: {}", e)), - } - ``` - - Please note that all instances of: - - `DecodeError` - - `ValidationError` - - `DeError` - - `SerError` - - `ParseSchemaError` - - `SchemaResolutionError` - - must be replaced by `Error`. - -- The `ToAvro` trait has been deprecated in favor of `From` for `Value` implementations. - - Code like the following: - ```rust - use crate::types::{Record, ToAvro, Value}; - - let expected: Value = record.avro(); - ``` - - should be updated to: - - ```rust - use crate::types::{Record, Value}; - - let expected: Value = record.into(); - ``` - - Using the `ToAvro` trait will result in a deprecation warning. The trait will - be removed in future versions. - -- The `digest` crate has been updated to version `0.9`. If you were using the - `digest::Digest` trait from version `0.8`, you must update to the one defined - in `0.9`. - -## 0.10.0 -- `Writer::into_inner()` now calls `flush()` and returns a `Result`. - - This means that code like - ```rust - writer.append_ser(test)?; - writer.flush()?; - let input = writer.into_inner(); - ``` - - can be simplified into - ```rust - writer.append_ser(test)?; - let input = writer.into_inner()?; - ``` - There is no harm in leaving old calls to `flush()` around. diff --git a/lang/rust/rustfmt.toml b/lang/rust/rustfmt.toml deleted file mode 100644 index 90ae52caec8..00000000000 --- a/lang/rust/rustfmt.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -edition = "2018" -imports_granularity="Crate" diff --git a/lang/rust/wasm-demo/Cargo.toml b/lang/rust/wasm-demo/Cargo.toml deleted file mode 100644 index 0abff574d0c..00000000000 --- a/lang/rust/wasm-demo/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "hello-wasm" -version = "0.1.0" -authors.workspace = true -description = "A demo project for testing apache_avro in WebAssembly" -license.workspace = true -readme = "README.md" -repository.workspace = true -edition.workspace = true -rust-version.workspace = true -keywords = ["avro", "data", "serialization", "wasm", "web assembly"] -categories.workspace = true -documentation.workspace = true -publish = false - - -[lib] -crate-type = ["cdylib", "rlib"] - -[dependencies] -apache-avro = { path = "../avro" } -serde = { workspace = true } -wasm-bindgen = "0.2.93" - -[dev-dependencies] -console_error_panic_hook = { version = "0.1.7" } -wasm-bindgen-test = "0.3.43" diff --git a/lang/rust/wasm-demo/README.md b/lang/rust/wasm-demo/README.md deleted file mode 100644 index 87d63641b13..00000000000 --- a/lang/rust/wasm-demo/README.md +++ /dev/null @@ -1,28 +0,0 @@ - - -# About - -An application that is used to test `apache_avro` crate as a web assembly. - -The project is created with `wasm-pack new wasm-demo` command and simplified to not use unrelated technologies (like Wee and a panic hook). - -# Code - -See [tests](./tests/demos.rs) diff --git a/lang/rust/wasm-demo/src/lib.rs b/lang/rust/wasm-demo/src/lib.rs deleted file mode 100644 index b248758bc12..00000000000 --- a/lang/rust/wasm-demo/src/lib.rs +++ /dev/null @@ -1,16 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. diff --git a/lang/rust/wasm-demo/tests/demos.rs b/lang/rust/wasm-demo/tests/demos.rs deleted file mode 100644 index 89be21f7266..00000000000 --- a/lang/rust/wasm-demo/tests/demos.rs +++ /dev/null @@ -1,86 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#![cfg(target_arch = "wasm32")] - -extern crate wasm_bindgen_test; - -use std::io::BufWriter; -use wasm_bindgen_test::*; - -use apache_avro::{from_value, to_value, types::Record, Codec, Reader, Schema, Writer}; -use serde::{Deserialize, Serialize}; - -wasm_bindgen_test_configure!(run_in_browser); - -#[derive(Deserialize, Serialize, Debug, PartialEq)] -pub struct MyRecord { - b: String, - a: i64, -} - -#[wasm_bindgen_test] -fn serialization_roundtrip() { - console_error_panic_hook::set_once(); - - let record = MyRecord { - b: "hello".to_string(), - a: 1, - }; - - let serialized = to_value(&record).unwrap(); - let deserialized = from_value::(&serialized).unwrap(); - assert_eq!(deserialized, record); -} - -#[wasm_bindgen_test] -fn write_read() { - console_error_panic_hook::set_once(); - - let schema_str = r#" - { - "type": "record", - "name": "my_record", - "fields": [ - {"name": "a", "type": "long"}, - {"name": "b", "type": "string"} - ] - }"#; - let schema = Schema::parse_str(schema_str).unwrap(); - - let mut record = Record::new(&schema).unwrap(); - record.put("a", 12_i32); - record.put("b", "hello".to_owned()); - - let mut writer = Writer::with_codec( - &schema, - BufWriter::new(Vec::with_capacity(200)), - Codec::Null, - ); - writer.append(record).unwrap(); - writer.flush().unwrap(); - let bytes = writer.into_inner().unwrap().into_inner().unwrap(); - - let reader = Reader::new(&bytes[..]).unwrap(); - - for value in reader { - match value { - Ok(record) => println!("Successfully read {:?}", record), - Err(err) => panic!("An error occurred while reading: {:?}", err), - } - } -} diff --git a/pom.xml b/pom.xml index 6a7fdd6d0ea..a4e3064f785 100644 --- a/pom.xml +++ b/pom.xml @@ -452,11 +452,7 @@ lang/ruby/.gem/** lang/ruby/pkg/** lang/ruby/.bundle/** - lang/rust/target/** - lang/rust/precommit_venv/** - lang/rust/Cargo.lock - lang/rust/README.tpl - lang/rust/.requirements-precommit.txt + lang/rust/README.md lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithLogicalTypes.java lang/java/avro/src/test/java/org/apache/avro/specific/TestRecordWithoutLogicalTypes.java lang/java/avro/src/test/java/org/apache/avro/specific/int$.java