diff --git a/interpreter/binary/decode.ml b/interpreter/binary/decode.ml index ea0baf815..2390fff93 100644 --- a/interpreter/binary/decode.ml +++ b/interpreter/binary/decode.ml @@ -370,6 +370,7 @@ let simd_prefix s = | 0xb7l -> i32x4_min_u | 0xb8l -> i32x4_max_s | 0xb9l -> i32x4_max_u + | 0xbal -> i32x4_dot_i16x8_s | 0xc1l -> i64x2_neg | 0xcbl -> i64x2_shl | 0xccl -> i64x2_shr_s diff --git a/interpreter/binary/encode.ml b/interpreter/binary/encode.ml index 3dccb4243..d686d6a8d 100644 --- a/interpreter/binary/encode.ml +++ b/interpreter/binary/encode.ml @@ -462,6 +462,7 @@ let encode m = | Binary (V128 V128Op.(I32x4 MinU)) -> simd_op 0xb7l | Binary (V128 V128Op.(I32x4 MaxS)) -> simd_op 0xb8l | Binary (V128 V128Op.(I32x4 MaxU)) -> simd_op 0xb9l + | Binary (V128 V128Op.(I32x4 DotI16x8S)) -> simd_op 0xbal | Binary (V128 V128Op.(I32x4 Mul)) -> simd_op 0xb5l | Binary (V128 V128Op.(I32x4 Eq)) -> simd_op 0x37l | Binary (V128 V128Op.(I32x4 Ne)) -> simd_op 0x38l diff --git a/interpreter/exec/eval_simd.ml b/interpreter/exec/eval_simd.ml index 624062919..9c34718af 100644 --- a/interpreter/exec/eval_simd.ml +++ b/interpreter/exec/eval_simd.ml @@ -118,6 +118,7 @@ module SimdOp (SXX : Simd.S) (Value : ValueType with type t = SXX.t) = struct | I32x4 GtU -> SXX.I32x4.gt_u | I32x4 GeS -> SXX.I32x4.ge_s | I32x4 GeU -> SXX.I32x4.ge_u + | I32x4 DotI16x8S -> SXX.I32x4_convert.dot_i16x8_s | I64x2 Add -> SXX.I64x2.add | I64x2 Sub -> SXX.I64x2.sub | I64x2 Mul -> SXX.I64x2.mul diff --git a/interpreter/exec/simd.ml b/interpreter/exec/simd.ml index f2a706944..dfbbdbd40 100644 --- a/interpreter/exec/simd.ml +++ b/interpreter/exec/simd.ml @@ -185,6 +185,7 @@ sig val widen_high_s : t -> t val widen_low_u : t -> t val widen_high_u : t -> t + val dot_i16x8_s : t -> t -> t end module I64x2_convert : sig val widen_low_s : t -> t @@ -429,6 +430,17 @@ struct let widen_high_s = widen Lib.List.drop 0xffffffffl let widen_low_u = widen Lib.List.take 0xffffl let widen_high_u = widen Lib.List.drop 0xffffl + + let dot_i16x8_s x y = + let xs = Rep.to_i16x8 x in + let ys = Rep.to_i16x8 y in + let rec dot xs ys = + match xs, ys with + | x1::x2::xss, y1::y2::yss -> + Int32.(add (mul x1 y1) (mul x2 y2)) :: dot xss yss + | [], [] -> [] + | _, _ -> assert false + in Rep.of_i32x4 (dot xs ys) end module I64x2_convert = struct diff --git a/interpreter/syntax/ast.ml b/interpreter/syntax/ast.ml index 748b852ce..5943ede40 100644 --- a/interpreter/syntax/ast.ml +++ b/interpreter/syntax/ast.ml @@ -54,6 +54,7 @@ struct | Eq | Ne | LtS | LtU | LeS | LeU | GtS | GtU | GeS | GeU | Swizzle | Shuffle of int list | NarrowS | NarrowU | AddSatS | AddSatU | SubSatS | SubSatU + | DotI16x8S type funop = Abs | Neg | Sqrt | Ceil | Floor | Trunc | Nearest | ConvertI32x4S | ConvertI32x4U diff --git a/interpreter/syntax/operators.ml b/interpreter/syntax/operators.ml index 1a5a92b4f..fc1bd258d 100644 --- a/interpreter/syntax/operators.ml +++ b/interpreter/syntax/operators.ml @@ -367,6 +367,7 @@ let i32x4_max_u = Binary (V128 V128Op.(I32x4 MaxU)) let i32x4_mul = Binary (V128 V128Op.(I32x4 Mul)) let i32x4_trunc_sat_f32x4_s = Unary (V128 V128Op.(I32x4 TruncSatF32x4S)) let i32x4_trunc_sat_f32x4_u = Unary (V128 V128Op.(I32x4 TruncSatF32x4U)) +let i32x4_dot_i16x8_s = Binary (V128 V128Op.(I32x4 DotI16x8S)) let i64x2_splat = Convert (V128 V128Op.(I64x2 Splat)) let i64x2_extract_lane imm = SimdExtract (V128Op.I64x2 (ZX, imm)) diff --git a/interpreter/text/arrange.ml b/interpreter/text/arrange.ml index 88c443e96..b71c9f890 100644 --- a/interpreter/text/arrange.ml +++ b/interpreter/text/arrange.ml @@ -305,6 +305,7 @@ struct | I32x4 MinU -> "i32x4.min_u" | I32x4 MaxS -> "i32x4.max_s" | I32x4 MaxU -> "i32x4.max_u" + | I32x4 DotI16x8S -> "i32x4.dot_i16x8_s" | I64x2 Add -> "i64x2.add" | I64x2 Sub -> "i64x2.sub" | I64x2 Mul -> "i64x2.mul" diff --git a/interpreter/text/lexer.mll b/interpreter/text/lexer.mll index b24310a2a..85d135124 100644 --- a/interpreter/text/lexer.mll +++ b/interpreter/text/lexer.mll @@ -569,6 +569,9 @@ rule token = parse | "i16x8.sub_sat_"(sign as s) { BINARY (ext s i16x8_sub_sat_s i16x8_sub_sat_u) } + | "i32x4.dot_i16x8_s" + { BINARY i32x4_dot_i16x8_s } + | (simd_shape as s) { SIMD_SHAPE (simd_shape s) } | name as s { VAR s } diff --git a/test/core/simd/meta/README.md b/test/core/simd/meta/README.md index d1ade7c72..6ae6747b5 100644 --- a/test/core/simd/meta/README.md +++ b/test/core/simd/meta/README.md @@ -26,6 +26,7 @@ Currently it only support following simd test files generation. - 'simd_f64x2_rounding' - 'simd_f32x4_pmin_pmax' - 'simd_f64x2_pmin_pmax' +- 'simd_i32x4_dot_i16x8' Usage: diff --git a/test/core/simd/meta/gen_tests.py b/test/core/simd/meta/gen_tests.py index 6a7cdde48..7ea8436b3 100644 --- a/test/core/simd/meta/gen_tests.py +++ b/test/core/simd/meta/gen_tests.py @@ -30,6 +30,7 @@ 'simd_f64x2_rounding', 'simd_f32x4_pmin_pmax', 'simd_f64x2_pmin_pmax', + 'simd_i32x4_dot_i16x8', ) diff --git a/test/core/simd/meta/simd_i32x4_dot_i16x8.py b/test/core/simd/meta/simd_i32x4_dot_i16x8.py new file mode 100644 index 000000000..1d62fc102 --- /dev/null +++ b/test/core/simd/meta/simd_i32x4_dot_i16x8.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 + +from simd_arithmetic import SimdArithmeticCase, i16 +from simd_integer_op import ArithmeticOp + + +class SimdI32x4DotI16x8TestCase(SimdArithmeticCase): + LANE_TYPE = 'i32x4' + UNARY_OPS = () + BINARY_OPS = ('dot_i16x8_s',) + + @property + def lane(self): + return i16 + + def binary_op(self, x, y, lane): + # For test data we always splat a single value to the + # entire v128, so '* 2' will work here. + return ArithmeticOp.get_valid_value(x, i16) * ArithmeticOp.get_valid_value(y, i16) * 2 + + @property + def hex_binary_op_test_data(self): + return [] + + @property + def bin_test_data(self): + return [ + (self.normal_binary_op_test_data, ['i16x8', 'i16x8', 'i32x4']), + (self.hex_binary_op_test_data, ['i16x8', 'i16x8', 'i32x4']) + ] + + def get_case_data(self): + case_data = [] + op_name = 'i32x4.dot_i16x8_s' + case_data.append(['#', op_name]) + for data_group, v128_forms in self.bin_test_data: + for data in data_group: + case_data.append([op_name, [str(data[0]), str(data[1])], + str(self.binary_op(data[0], data[1], self.lane)), + v128_forms]) + return case_data + + def get_combine_cases(self): + return '' + + def gen_test_cases(self): + wast_filename = '../simd_i32x4_dot_i16x8.wast' + with open(wast_filename, 'w') as fp: + fp.write(self.get_all_cases()) + +def gen_test_cases(): + simd_i16x8_arith = SimdI32x4DotI16x8TestCase() + simd_i16x8_arith.gen_test_cases() + +if __name__ == '__main__': + gen_test_cases() diff --git a/test/core/simd/simd_i32x4_dot_i16x8.wast b/test/core/simd/simd_i32x4_dot_i16x8.wast new file mode 100644 index 000000000..b41de74d0 --- /dev/null +++ b/test/core/simd/simd_i32x4_dot_i16x8.wast @@ -0,0 +1,110 @@ +;; Tests for i32x4 arithmetic operations on major boundary values and all special values. + + +(module + (func (export "i32x4.dot_i16x8_s") (param v128 v128) (result v128) (i32x4.dot_i16x8_s (local.get 0) (local.get 1))) +) + + +;; i32x4.dot_i16x8_s +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0) + (v128.const i16x8 0 0 0 0 0 0 0 0)) + (v128.const i32x4 0 0 0 0)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0) + (v128.const i16x8 1 1 1 1 1 1 1 1)) + (v128.const i32x4 0 0 0 0)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 1 1 1 1 1 1 1 1) + (v128.const i16x8 1 1 1 1 1 1 1 1)) + (v128.const i32x4 2 2 2 2)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0) + (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)) + (v128.const i32x4 0 0 0 0)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 1 1 1 1 1 1 1 1) + (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)) + (v128.const i32x4 -2 -2 -2 -2)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1) + (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)) + (v128.const i32x4 2 2 2 2)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 16383 16383 16383 16383 16383 16383 16383 16383) + (v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384)) + (v128.const i32x4 536838144 536838144 536838144 536838144)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384) + (v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384)) + (v128.const i32x4 536870912 536870912 536870912 536870912)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16383 -16383 -16383 -16383 -16383 -16383 -16383 -16383) + (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384)) + (v128.const i32x4 536838144 536838144 536838144 536838144)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384) + (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384)) + (v128.const i32x4 536870912 536870912 536870912 536870912)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16385 -16385 -16385 -16385 -16385 -16385 -16385 -16385) + (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384)) + (v128.const i32x4 536903680 536903680 536903680 536903680)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32765 32765 32765 32765 32765 32765 32765 32765) + (v128.const i16x8 1 1 1 1 1 1 1 1)) + (v128.const i32x4 65530 65530 65530 65530)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32766 32766 32766 32766 32766 32766 32766 32766) + (v128.const i16x8 1 1 1 1 1 1 1 1)) + (v128.const i32x4 65532 65532 65532 65532)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32768 32768 32768 32768 32768 32768 32768 32768) + (v128.const i16x8 1 1 1 1 1 1 1 1)) + (v128.const i32x4 -65536 -65536 -65536 -65536)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32766 -32766 -32766 -32766 -32766 -32766 -32766 -32766) + (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)) + (v128.const i32x4 65532 65532 65532 65532)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32767 -32767 -32767 -32767 -32767 -32767 -32767 -32767) + (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)) + (v128.const i32x4 65534 65534 65534 65534)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768) + (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)) + (v128.const i32x4 65536 65536 65536 65536)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767) + (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767)) + (v128.const i32x4 2147352578 2147352578 2147352578 2147352578)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768) + (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)) + (v128.const i32x4 2147483648 2147483648 2147483648 2147483648)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768) + (v128.const i16x8 -32767 -32767 -32767 -32767 -32767 -32767 -32767 -32767)) + (v128.const i32x4 2147418112 2147418112 2147418112 2147418112)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535) + (v128.const i16x8 0 0 0 0 0 0 0 0)) + (v128.const i32x4 0 0 0 0)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535) + (v128.const i16x8 1 1 1 1 1 1 1 1)) + (v128.const i32x4 -2 -2 -2 -2)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535) + (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)) + (v128.const i32x4 2 2 2 2)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535) + (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767)) + (v128.const i32x4 -65534 -65534 -65534 -65534)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535) + (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)) + (v128.const i32x4 65536 65536 65536 65536)) +(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535) + (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)) + (v128.const i32x4 2 2 2 2)) + +;; type check +(assert_invalid (module (func (result v128) (i32x4.dot_i16x8_s (i32.const 0) (f32.const 0.0)))) "type mismatch") + +;; Test operation with empty argument + +(assert_invalid + (module + (func $i32x4.dot_i16x8_s-1st-arg-empty (result v128) + (i32x4.dot_i16x8_s (v128.const i32x4 0 0 0 0)) + ) + ) + "type mismatch" +) +(assert_invalid + (module + (func $i32x4.dot_i16x8_s-arg-empty (result v128) + (i32x4.dot_i16x8_s) + ) + ) + "type mismatch" +) +