From e53ab491d93c1f9207944b2df72ba63eda5ec886 Mon Sep 17 00:00:00 2001 From: Sanket Kedia Date: Wed, 1 May 2024 14:42:27 -0700 Subject: [PATCH] [ENH] SIMD vectorization for distance metrics (#2084) ## Description of changes Adds SIMD vectorization for euclidean, cosine and inner product for x86, x86_64 and arm. Instruction sets whose support has been added are SSE, AVX and NEON. ## Test plan - [+] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Documentation Changes No --------- Co-authored-by: Sanket Kedia --- Cargo.lock | 212 +++++++++++- rust/worker/Cargo.toml | 6 + rust/worker/src/benches/distance_metrics.rs | 22 ++ rust/worker/src/distance/distance_avx.rs | 345 ++++++++++++++++++++ rust/worker/src/distance/distance_neon.rs | 296 +++++++++++++++++ rust/worker/src/distance/distance_sse.rs | 342 +++++++++++++++++++ rust/worker/src/distance/mod.rs | 10 +- rust/worker/src/distance/types.rs | 293 ++++++++++++++++- rust/worker/src/lib.rs | 2 +- 9 files changed, 1504 insertions(+), 24 deletions(-) create mode 100644 rust/worker/src/benches/distance_metrics.rs create mode 100644 rust/worker/src/distance/distance_avx.rs create mode 100644 rust/worker/src/distance/distance_neon.rs create mode 100644 rust/worker/src/distance/distance_sse.rs diff --git a/Cargo.lock b/Cargo.lock index fef655061d1..81b2b455355 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,7 +105,7 @@ dependencies = [ "arrow-data", "arrow-schema", "chrono", - "half", + "half 2.4.0", "num", ] @@ -120,7 +120,7 @@ dependencies = [ "arrow-data", "arrow-schema", "chrono", - "half", + "half 2.4.0", "hashbrown 0.14.3", "num", ] @@ -132,7 +132,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" dependencies = [ "bytes", - "half", + "half 2.4.0", "num", ] @@ -149,7 +149,7 @@ dependencies = [ "arrow-select", "base64", "chrono", - "half", + "half 2.4.0", "lexical-core", "num", ] @@ -181,7 +181,7 @@ checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" dependencies = [ "arrow-buffer", "arrow-schema", - "half", + "half 2.4.0", "num", ] @@ -211,7 +211,7 @@ dependencies = [ "arrow-data", "arrow-schema", "chrono", - "half", + "half 2.4.0", "indexmap 2.2.5", "lexical-core", "num", @@ -230,7 +230,7 @@ dependencies = [ "arrow-data", "arrow-schema", "arrow-select", - "half", + "half 2.4.0", "num", ] @@ -245,7 +245,7 @@ dependencies = [ "arrow-buffer", "arrow-data", "arrow-schema", - "half", + "half 2.4.0", "hashbrown 0.14.3", ] @@ -327,6 +327,17 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -874,6 +885,12 @@ dependencies = [ "either", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.0.90" @@ -909,6 +926,17 @@ dependencies = [ "windows-targets 0.52.4", ] +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "bitflags 1.3.2", + "textwrap", + "unicode-width", +] + [[package]] name = "const-oid" version = "0.9.6" @@ -978,6 +1006,42 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools 0.10.5", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +dependencies = [ + "cast", + "itertools 0.10.5", +] + [[package]] name = "crossbeam-channel" version = "0.5.12" @@ -1452,6 +1516,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" + [[package]] name = "half" version = "2.4.0" @@ -1485,6 +1555,15 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.9" @@ -1701,6 +1780,15 @@ dependencies = [ "web-sys", ] +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.11.0" @@ -2226,7 +2314,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.9", "libc", ] @@ -2254,6 +2342,12 @@ dependencies = [ "loom", ] +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "openssl-probe" version = "0.1.5" @@ -2466,6 +2560,34 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -2557,7 +2679,7 @@ checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" dependencies = [ "bytes", "heck", - "itertools", + "itertools 0.11.0", "log", "multimap", "once_cell", @@ -2578,7 +2700,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" dependencies = [ "anyhow", - "itertools", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.52", @@ -2873,6 +2995,15 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.23" @@ -3000,6 +3131,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half 1.8.3", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.197" @@ -3217,7 +3358,7 @@ dependencies = [ "fastdivide", "fs4", "htmlescape", - "itertools", + "itertools 0.11.0", "levenshtein_automata", "log", "lru", @@ -3267,7 +3408,7 @@ checksum = "8d85f8019af9a78b3118c11298b36ffd21c2314bd76bbcd9d12e00124cbb7e70" dependencies = [ "fastdivide", "fnv", - "itertools", + "itertools 0.11.0", "serde", "tantivy-bitpacker", "tantivy-common", @@ -3350,6 +3491,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thiserror" version = "1.0.58" @@ -3420,6 +3570,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -3728,6 +3888,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-width" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" + [[package]] name = "unsafe-libyaml" version = "0.2.10" @@ -3813,6 +3979,16 @@ dependencies = [ "libc", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3920,6 +4096,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -4087,6 +4272,7 @@ dependencies = [ "aws-smithy-types", "bytes", "cc", + "criterion", "figment", "futures", "k8s-openapi", diff --git a/rust/worker/Cargo.toml b/rust/worker/Cargo.toml index d4355f078dd..688c6e9bdfa 100644 --- a/rust/worker/Cargo.toml +++ b/rust/worker/Cargo.toml @@ -11,6 +11,11 @@ path = "src/bin/query_service.rs" name = "compaction_service" path = "src/bin/compaction_service.rs" +[[bench]] +name = "distance_metrics" +path = "src/benches/distance_metrics.rs" +harness = false + [dependencies] tonic = "0.10" prost = "0.12" @@ -47,6 +52,7 @@ proptest = "1.4.0" proptest-state-machine = "0.1.0" "rand" = "0.8.5" rayon = "1.8.0" +criterion = "0.3" [build-dependencies] tonic-build = "0.10" diff --git a/rust/worker/src/benches/distance_metrics.rs b/rust/worker/src/benches/distance_metrics.rs new file mode 100644 index 00000000000..99a86be8da2 --- /dev/null +++ b/rust/worker/src/benches/distance_metrics.rs @@ -0,0 +1,22 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use worker::distance::DistanceFunction; + +fn distance_metrics(c: &mut Criterion) { + c.bench_function("distance_metrics", |b| { + let mut x: Vec = Vec::with_capacity(786); + for _ in 0..x.capacity() { + x.push(rand::random()); + } + let mut y: Vec = Vec::with_capacity(786); + for _ in 0..y.capacity() { + y.push(rand::random()); + } + b.iter(|| { + let d = DistanceFunction::Cosine; + std::hint::black_box(DistanceFunction::distance(&d, &x, &y)); + }); + }); +} + +criterion_group!(benches, distance_metrics,); +criterion_main!(benches); diff --git a/rust/worker/src/distance/distance_avx.rs b/rust/worker/src/distance/distance_avx.rs new file mode 100644 index 00000000000..bf2f0e3c079 --- /dev/null +++ b/rust/worker/src/distance/distance_avx.rs @@ -0,0 +1,345 @@ +/* Much of this file is copied from https://github.com/qdrant/qdrant/blob/master/lib/segment/src/spaces/simple_avx.rs +copyright Qdrant, licensed under the Apache 2.0 license. + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +#[cfg(all(target_feature = "avx", target_feature = "fma"))] +pub unsafe fn hsum256_ps_avx(x: __m256) -> f32 { + let x128: __m128 = _mm_add_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); + let x64: __m128 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); + let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + _mm_cvtss_f32(x32) +} + +#[cfg(all(target_feature = "avx", target_feature = "fma"))] +pub unsafe fn cosine_distance(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 32); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum256_1: __m256 = _mm256_setzero_ps(); + let mut sum256_2: __m256 = _mm256_setzero_ps(); + let mut sum256_3: __m256 = _mm256_setzero_ps(); + let mut sum256_4: __m256 = _mm256_setzero_ps(); + let mut i: usize = 0; + while i < m { + sum256_1 = _mm256_fmadd_ps(_mm256_loadu_ps(ptr1), _mm256_loadu_ps(ptr2), sum256_1); + sum256_2 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(8)), + _mm256_loadu_ps(ptr2.add(8)), + sum256_2, + ); + sum256_3 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(16)), + _mm256_loadu_ps(ptr2.add(16)), + sum256_3, + ); + sum256_4 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(24)), + _mm256_loadu_ps(ptr2.add(24)), + sum256_4, + ); + + ptr1 = ptr1.add(32); + ptr2 = ptr2.add(32); + i += 32; + } + + let mut result = hsum256_ps_avx(sum256_1) + + hsum256_ps_avx(sum256_2) + + hsum256_ps_avx(sum256_3) + + hsum256_ps_avx(sum256_4); + + for i in 0..n - m { + result += (*ptr1.add(i)) * (*ptr2.add(i)); + } + 1.0_f32 - result +} + +#[cfg(all(target_feature = "avx", target_feature = "fma"))] +pub unsafe fn inner_product(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 32); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum256_1: __m256 = _mm256_setzero_ps(); + let mut sum256_2: __m256 = _mm256_setzero_ps(); + let mut sum256_3: __m256 = _mm256_setzero_ps(); + let mut sum256_4: __m256 = _mm256_setzero_ps(); + let mut i: usize = 0; + while i < m { + sum256_1 = _mm256_fmadd_ps(_mm256_loadu_ps(ptr1), _mm256_loadu_ps(ptr2), sum256_1); + sum256_2 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(8)), + _mm256_loadu_ps(ptr2.add(8)), + sum256_2, + ); + sum256_3 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(16)), + _mm256_loadu_ps(ptr2.add(16)), + sum256_3, + ); + sum256_4 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(24)), + _mm256_loadu_ps(ptr2.add(24)), + sum256_4, + ); + + ptr1 = ptr1.add(32); + ptr2 = ptr2.add(32); + i += 32; + } + + let mut result = hsum256_ps_avx(sum256_1) + + hsum256_ps_avx(sum256_2) + + hsum256_ps_avx(sum256_3) + + hsum256_ps_avx(sum256_4); + + for i in 0..n - m { + result += (*ptr1.add(i)) * (*ptr2.add(i)); + } + 1.0_f32 - result +} + +#[cfg(all(target_feature = "avx", target_feature = "fma"))] +pub unsafe fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 32); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum256_1: __m256 = _mm256_setzero_ps(); + let mut sum256_2: __m256 = _mm256_setzero_ps(); + let mut sum256_3: __m256 = _mm256_setzero_ps(); + let mut sum256_4: __m256 = _mm256_setzero_ps(); + let mut i: usize = 0; + while i < m { + let sub256_1: __m256 = + _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(0)), _mm256_loadu_ps(ptr2.add(0))); + sum256_1 = _mm256_fmadd_ps(sub256_1, sub256_1, sum256_1); + + let sub256_2: __m256 = + _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(8)), _mm256_loadu_ps(ptr2.add(8))); + sum256_2 = _mm256_fmadd_ps(sub256_2, sub256_2, sum256_2); + + let sub256_3: __m256 = + _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(16)), _mm256_loadu_ps(ptr2.add(16))); + sum256_3 = _mm256_fmadd_ps(sub256_3, sub256_3, sum256_3); + + let sub256_4: __m256 = + _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(24)), _mm256_loadu_ps(ptr2.add(24))); + sum256_4 = _mm256_fmadd_ps(sub256_4, sub256_4, sum256_4); + + ptr1 = ptr1.add(32); + ptr2 = ptr2.add(32); + i += 32; + } + + let mut result = hsum256_ps_avx(sum256_1) + + hsum256_ps_avx(sum256_2) + + hsum256_ps_avx(sum256_3) + + hsum256_ps_avx(sum256_4); + for i in 0..n - m { + result += (*ptr1.add(i) - *ptr2.add(i)).powi(2); + } + result +} diff --git a/rust/worker/src/distance/distance_neon.rs b/rust/worker/src/distance/distance_neon.rs new file mode 100644 index 00000000000..4f22f9e4dfe --- /dev/null +++ b/rust/worker/src/distance/distance_neon.rs @@ -0,0 +1,296 @@ +/* Much of this file is copied from https://github.com/qdrant/qdrant/blob/master/lib/segment/src/spaces/simple_neon.rs +copyright Qdrant, licensed under the Apache 2.0 license. + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#[cfg(target_feature = "neon")] +use std::arch::aarch64::*; + +#[cfg(target_feature = "neon")] +pub unsafe fn cosine_distance(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 16); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum1 = vdupq_n_f32(0.); + let mut sum2 = vdupq_n_f32(0.); + let mut sum3 = vdupq_n_f32(0.); + let mut sum4 = vdupq_n_f32(0.); + + let mut i: usize = 0; + while i < m { + sum1 = vfmaq_f32(sum1, vld1q_f32(ptr1), vld1q_f32(ptr2)); + sum2 = vfmaq_f32(sum2, vld1q_f32(ptr1.add(4)), vld1q_f32(ptr2.add(4))); + sum3 = vfmaq_f32(sum3, vld1q_f32(ptr1.add(8)), vld1q_f32(ptr2.add(8))); + sum4 = vfmaq_f32(sum4, vld1q_f32(ptr1.add(12)), vld1q_f32(ptr2.add(12))); + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + let mut result = vaddvq_f32(sum1) + vaddvq_f32(sum2) + vaddvq_f32(sum3) + vaddvq_f32(sum4); + for i in 0..n - m { + result += (*ptr1.add(i)) * (*ptr2.add(i)); + } + 1.0_f32 - result +} + +#[cfg(target_feature = "neon")] +pub unsafe fn inner_product(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 16); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum1 = vdupq_n_f32(0.); + let mut sum2 = vdupq_n_f32(0.); + let mut sum3 = vdupq_n_f32(0.); + let mut sum4 = vdupq_n_f32(0.); + + let mut i: usize = 0; + while i < m { + sum1 = vfmaq_f32(sum1, vld1q_f32(ptr1), vld1q_f32(ptr2)); + sum2 = vfmaq_f32(sum2, vld1q_f32(ptr1.add(4)), vld1q_f32(ptr2.add(4))); + sum3 = vfmaq_f32(sum3, vld1q_f32(ptr1.add(8)), vld1q_f32(ptr2.add(8))); + sum4 = vfmaq_f32(sum4, vld1q_f32(ptr1.add(12)), vld1q_f32(ptr2.add(12))); + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + let mut result = vaddvq_f32(sum1) + vaddvq_f32(sum2) + vaddvq_f32(sum3) + vaddvq_f32(sum4); + for i in 0..n - m { + result += (*ptr1.add(i)) * (*ptr2.add(i)); + } + 1.0_f32 - result +} + +#[cfg(target_feature = "neon")] +pub unsafe fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 16); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum1 = vdupq_n_f32(0.); + let mut sum2 = vdupq_n_f32(0.); + let mut sum3 = vdupq_n_f32(0.); + let mut sum4 = vdupq_n_f32(0.); + + let mut i: usize = 0; + while i < m { + let sub1 = vsubq_f32(vld1q_f32(ptr1), vld1q_f32(ptr2)); + sum1 = vfmaq_f32(sum1, sub1, sub1); + + let sub2 = vsubq_f32(vld1q_f32(ptr1.add(4)), vld1q_f32(ptr2.add(4))); + sum2 = vfmaq_f32(sum2, sub2, sub2); + + let sub3 = vsubq_f32(vld1q_f32(ptr1.add(8)), vld1q_f32(ptr2.add(8))); + sum3 = vfmaq_f32(sum3, sub3, sub3); + + let sub4 = vsubq_f32(vld1q_f32(ptr1.add(12)), vld1q_f32(ptr2.add(12))); + sum4 = vfmaq_f32(sum4, sub4, sub4); + + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + let mut result = vaddvq_f32(sum1) + vaddvq_f32(sum2) + vaddvq_f32(sum3) + vaddvq_f32(sum4); + for i in 0..n - m { + result += (*ptr1.add(i) - *ptr2.add(i)).powi(2); + } + result +} diff --git a/rust/worker/src/distance/distance_sse.rs b/rust/worker/src/distance/distance_sse.rs new file mode 100644 index 00000000000..c02b2fc8619 --- /dev/null +++ b/rust/worker/src/distance/distance_sse.rs @@ -0,0 +1,342 @@ +/* Much of this file is copied from https://github.com/qdrant/qdrant/blob/master/lib/segment/src/spaces/simple_sse.rs +copyright Qdrant, licensed under the Apache 2.0 license. + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +#[cfg(target_feature = "sse")] +pub unsafe fn hsum128_ps_sse(x: __m128) -> f32 { + let x64: __m128 = _mm_add_ps(x, _mm_movehl_ps(x, x)); + let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + _mm_cvtss_f32(x32) +} + +#[cfg(target_feature = "sse")] +pub unsafe fn cosine_distance(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 16); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum128_1: __m128 = _mm_setzero_ps(); + let mut sum128_2: __m128 = _mm_setzero_ps(); + let mut sum128_3: __m128 = _mm_setzero_ps(); + let mut sum128_4: __m128 = _mm_setzero_ps(); + + let mut i: usize = 0; + while i < m { + sum128_1 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(ptr1), _mm_loadu_ps(ptr2)), sum128_1); + + sum128_2 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(4)), _mm_loadu_ps(ptr2.add(4))), + sum128_2, + ); + + sum128_3 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(8)), _mm_loadu_ps(ptr2.add(8))), + sum128_3, + ); + + sum128_4 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(12)), _mm_loadu_ps(ptr2.add(12))), + sum128_4, + ); + + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + + let mut result = hsum128_ps_sse(sum128_1) + + hsum128_ps_sse(sum128_2) + + hsum128_ps_sse(sum128_3) + + hsum128_ps_sse(sum128_4); + for i in 0..n - m { + result += (*ptr1.add(i)) * (*ptr2.add(i)); + } + 1.0_f32 - result +} + +#[cfg(target_feature = "sse")] +pub unsafe fn inner_product(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 16); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum128_1: __m128 = _mm_setzero_ps(); + let mut sum128_2: __m128 = _mm_setzero_ps(); + let mut sum128_3: __m128 = _mm_setzero_ps(); + let mut sum128_4: __m128 = _mm_setzero_ps(); + + let mut i: usize = 0; + while i < m { + sum128_1 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(ptr1), _mm_loadu_ps(ptr2)), sum128_1); + + sum128_2 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(4)), _mm_loadu_ps(ptr2.add(4))), + sum128_2, + ); + + sum128_3 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(8)), _mm_loadu_ps(ptr2.add(8))), + sum128_3, + ); + + sum128_4 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(12)), _mm_loadu_ps(ptr2.add(12))), + sum128_4, + ); + + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + + let mut result = hsum128_ps_sse(sum128_1) + + hsum128_ps_sse(sum128_2) + + hsum128_ps_sse(sum128_3) + + hsum128_ps_sse(sum128_4); + for i in 0..n - m { + result += (*ptr1.add(i)) * (*ptr2.add(i)); + } + 1.0_f32 - result +} + +#[cfg(target_feature = "sse")] +pub unsafe fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + let m = n - (n % 16); + let mut ptr1: *const f32 = a.as_ptr(); + let mut ptr2: *const f32 = b.as_ptr(); + let mut sum128_1: __m128 = _mm_setzero_ps(); + let mut sum128_2: __m128 = _mm_setzero_ps(); + let mut sum128_3: __m128 = _mm_setzero_ps(); + let mut sum128_4: __m128 = _mm_setzero_ps(); + let mut i: usize = 0; + while i < m { + let sub128_1 = _mm_sub_ps(_mm_loadu_ps(ptr1), _mm_loadu_ps(ptr2)); + sum128_1 = _mm_add_ps(_mm_mul_ps(sub128_1, sub128_1), sum128_1); + + let sub128_2 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(4)), _mm_loadu_ps(ptr2.add(4))); + sum128_2 = _mm_add_ps(_mm_mul_ps(sub128_2, sub128_2), sum128_2); + + let sub128_3 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(8)), _mm_loadu_ps(ptr2.add(8))); + sum128_3 = _mm_add_ps(_mm_mul_ps(sub128_3, sub128_3), sum128_3); + + let sub128_4 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(12)), _mm_loadu_ps(ptr2.add(12))); + sum128_4 = _mm_add_ps(_mm_mul_ps(sub128_4, sub128_4), sum128_4); + + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + + let mut result = hsum128_ps_sse(sum128_1) + + hsum128_ps_sse(sum128_2) + + hsum128_ps_sse(sum128_3) + + hsum128_ps_sse(sum128_4); + for i in 0..n - m { + result += (*ptr1.add(i) - *ptr2.add(i)).powi(2); + } + result +} diff --git a/rust/worker/src/distance/mod.rs b/rust/worker/src/distance/mod.rs index f8baffd363b..a6958b1c0da 100644 --- a/rust/worker/src/distance/mod.rs +++ b/rust/worker/src/distance/mod.rs @@ -1,3 +1,9 @@ -mod types; +pub mod distance_avx; +pub mod distance_neon; +pub mod distance_sse; +pub mod types; -pub(crate) use types::*; +pub use distance_avx::*; +pub use distance_neon::*; +pub use distance_sse::*; +pub use types::*; diff --git a/rust/worker/src/distance/types.rs b/rust/worker/src/distance/types.rs index 27a86954f29..298b48357f9 100644 --- a/rust/worker/src/distance/types.rs +++ b/rust/worker/src/distance/types.rs @@ -1,3 +1,205 @@ +/* Parts of file is copied from https://github.com/qdrant/qdrant/blob/master/lib/segment/src/spaces/simple.rs +copyright Qdrant, licensed under the Apache 2.0 license. + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + use crate::errors::{ChromaError, ErrorCodes}; use thiserror::Error; @@ -11,7 +213,7 @@ use thiserror::Error; /// # Notes /// See https://docs.trychroma.com/usage-guide#changing-the-distance-function #[derive(Clone, Debug, PartialEq)] -pub(crate) enum DistanceFunction { +pub enum DistanceFunction { Euclidean, Cosine, InnerProduct, @@ -19,14 +221,37 @@ pub(crate) enum DistanceFunction { impl DistanceFunction { // TOOD: Should we error if mismatched dimensions? - pub(crate) fn distance(&self, a: &[f32], b: &[f32]) -> f32 { - // TODO: implement this in SSE/AVX SIMD - // For now we write these as loops since we suspect that will more likely - // lead to the compiler vectorizing the code. (We saw this on - // Apple Silicon Macs who didn't have hand-rolled SIMD instructions in our - // C++ code). + pub fn distance(&self, a: &[f32], b: &[f32]) -> f32 { + // TODO: Figure out why the compiler is not auto vectorizing these + // by default. match self { DistanceFunction::Euclidean => { + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") { + return unsafe { crate::distance::distance_neon::euclidean_distance(a, b) }; + } + } + #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "sse" + ))] + { + if std::arch::is_x86_feature_detected!("sse") { + return unsafe { crate::distance::distance_sse::euclidean_distance(a, b) }; + } + } + #[cfg(all( + target_arch = "x86_64", + all(target_feature = "avx", target_feature = "fma") + ))] + { + if std::arch::is_x86_feature_detected!("avx") + && std::arch::is_x86_feature_detected!("fma") + { + return unsafe { crate::distance::distance_avx::euclidean_distance(a, b) }; + } + } let mut sum = 0.0; for i in 0..a.len() { sum += (a[i] - b[i]).powi(2); @@ -34,6 +259,32 @@ impl DistanceFunction { sum } DistanceFunction::Cosine => { + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") { + return unsafe { crate::distance::distance_neon::cosine_distance(a, b) }; + } + } + #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "sse" + ))] + { + if std::arch::is_x86_feature_detected!("sse") { + return unsafe { crate::distance::distance_sse::cosine_distance(a, b) }; + } + } + #[cfg(all( + target_arch = "x86_64", + all(target_feature = "avx", target_feature = "fma") + ))] + { + if std::arch::is_x86_feature_detected!("avx") + && std::arch::is_x86_feature_detected!("fma") + { + return unsafe { crate::distance::distance_avx::cosine_distance(a, b) }; + } + } // For cosine we just assume the vectors have been normalized, since that // is what our indices expect. let mut sum = 0.0; @@ -43,6 +294,32 @@ impl DistanceFunction { 1.0_f32 - sum } DistanceFunction::InnerProduct => { + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") { + return unsafe { crate::distance::distance_neon::inner_product(a, b) }; + } + } + #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "sse" + ))] + { + if std::arch::is_x86_feature_detected!("sse") { + return unsafe { crate::distance::distance_sse::inner_product(a, b) }; + } + } + #[cfg(all( + target_arch = "x86_64", + all(target_feature = "avx", target_feature = "fma") + ))] + { + if std::arch::is_x86_feature_detected!("avx") + && std::arch::is_x86_feature_detected!("fma") + { + return unsafe { crate::distance::distance_avx::inner_product(a, b) }; + } + } let mut sum = 0.0; for i in 0..a.len() { sum += a[i] * b[i]; @@ -54,7 +331,7 @@ impl DistanceFunction { } #[derive(Error, Debug)] -pub(crate) enum DistanceFunctionError { +pub enum DistanceFunctionError { #[error("Invalid distance function `{0}`")] InvalidDistanceFunction(String), } diff --git a/rust/worker/src/lib.rs b/rust/worker/src/lib.rs index ee06489601b..eb1f93120ad 100644 --- a/rust/worker/src/lib.rs +++ b/rust/worker/src/lib.rs @@ -2,7 +2,7 @@ mod assignment; mod blockstore; mod compactor; mod config; -mod distance; +pub mod distance; mod errors; mod execution; mod index;