diff --git a/interpreter/binary/decode.ml b/interpreter/binary/decode.ml
index ea0baf815..2390fff93 100644
--- a/interpreter/binary/decode.ml
+++ b/interpreter/binary/decode.ml
@@ -370,6 +370,7 @@ let simd_prefix s =
   | 0xb7l -> i32x4_min_u
   | 0xb8l -> i32x4_max_s
   | 0xb9l -> i32x4_max_u
+  | 0xbal -> i32x4_dot_i16x8_s
   | 0xc1l -> i64x2_neg
   | 0xcbl -> i64x2_shl
   | 0xccl -> i64x2_shr_s
diff --git a/interpreter/binary/encode.ml b/interpreter/binary/encode.ml
index 3dccb4243..d686d6a8d 100644
--- a/interpreter/binary/encode.ml
+++ b/interpreter/binary/encode.ml
@@ -462,6 +462,7 @@ let encode m =
       | Binary (V128 V128Op.(I32x4 MinU)) -> simd_op 0xb7l
       | Binary (V128 V128Op.(I32x4 MaxS)) -> simd_op 0xb8l
       | Binary (V128 V128Op.(I32x4 MaxU)) -> simd_op 0xb9l
+      | Binary (V128 V128Op.(I32x4 DotI16x8S)) -> simd_op 0xbal
       | Binary (V128 V128Op.(I32x4 Mul)) -> simd_op 0xb5l
       | Binary (V128 V128Op.(I32x4 Eq)) -> simd_op 0x37l
       | Binary (V128 V128Op.(I32x4 Ne)) -> simd_op 0x38l
diff --git a/interpreter/exec/eval_simd.ml b/interpreter/exec/eval_simd.ml
index 624062919..9c34718af 100644
--- a/interpreter/exec/eval_simd.ml
+++ b/interpreter/exec/eval_simd.ml
@@ -118,6 +118,7 @@ module SimdOp (SXX : Simd.S) (Value : ValueType with type t = SXX.t) = struct
       | I32x4 GtU -> SXX.I32x4.gt_u
       | I32x4 GeS -> SXX.I32x4.ge_s
       | I32x4 GeU -> SXX.I32x4.ge_u
+      | I32x4 DotI16x8S -> SXX.I32x4_convert.dot_i16x8_s
       | I64x2 Add -> SXX.I64x2.add
       | I64x2 Sub -> SXX.I64x2.sub
       | I64x2 Mul -> SXX.I64x2.mul
diff --git a/interpreter/exec/simd.ml b/interpreter/exec/simd.ml
index f2a706944..dfbbdbd40 100644
--- a/interpreter/exec/simd.ml
+++ b/interpreter/exec/simd.ml
@@ -185,6 +185,7 @@ sig
     val widen_high_s : t -> t
     val widen_low_u : t -> t
     val widen_high_u : t -> t
+    val dot_i16x8_s : t -> t -> t
   end
   module I64x2_convert : sig
     val widen_low_s : t -> t
@@ -429,6 +430,17 @@ struct
     let widen_high_s = widen Lib.List.drop 0xffffffffl
     let widen_low_u = widen Lib.List.take 0xffffl
     let widen_high_u = widen Lib.List.drop 0xffffl
+
+    let dot_i16x8_s x y =
+      let xs = Rep.to_i16x8 x in
+      let ys = Rep.to_i16x8 y in
+      let rec dot xs ys =
+        match xs, ys with
+        | x1::x2::xss, y1::y2::yss ->
+          Int32.(add (mul x1 y1) (mul x2 y2)) :: dot xss yss
+        | [], [] -> []
+        | _, _ -> assert false
+      in Rep.of_i32x4 (dot xs ys)
   end
 
   module I64x2_convert = struct
diff --git a/interpreter/syntax/ast.ml b/interpreter/syntax/ast.ml
index 748b852ce..5943ede40 100644
--- a/interpreter/syntax/ast.ml
+++ b/interpreter/syntax/ast.ml
@@ -54,6 +54,7 @@ struct
               | Eq | Ne | LtS | LtU | LeS | LeU | GtS | GtU | GeS | GeU
               | Swizzle | Shuffle of int list | NarrowS | NarrowU
               | AddSatS | AddSatU | SubSatS | SubSatU
+              | DotI16x8S
   type funop = Abs | Neg | Sqrt
              | Ceil | Floor | Trunc | Nearest
              | ConvertI32x4S | ConvertI32x4U
diff --git a/interpreter/syntax/operators.ml b/interpreter/syntax/operators.ml
index 1a5a92b4f..fc1bd258d 100644
--- a/interpreter/syntax/operators.ml
+++ b/interpreter/syntax/operators.ml
@@ -367,6 +367,7 @@ let i32x4_max_u = Binary (V128 V128Op.(I32x4 MaxU))
 let i32x4_mul = Binary (V128 V128Op.(I32x4 Mul))
 let i32x4_trunc_sat_f32x4_s = Unary (V128 V128Op.(I32x4 TruncSatF32x4S))
 let i32x4_trunc_sat_f32x4_u = Unary (V128 V128Op.(I32x4 TruncSatF32x4U))
+let i32x4_dot_i16x8_s = Binary (V128 V128Op.(I32x4 DotI16x8S))
 
 let i64x2_splat = Convert (V128 V128Op.(I64x2 Splat))
 let i64x2_extract_lane imm = SimdExtract (V128Op.I64x2 (ZX, imm))
diff --git a/interpreter/text/arrange.ml b/interpreter/text/arrange.ml
index 88c443e96..b71c9f890 100644
--- a/interpreter/text/arrange.ml
+++ b/interpreter/text/arrange.ml
@@ -305,6 +305,7 @@ struct
     | I32x4 MinU -> "i32x4.min_u"
     | I32x4 MaxS -> "i32x4.max_s"
     | I32x4 MaxU -> "i32x4.max_u"
+    | I32x4 DotI16x8S -> "i32x4.dot_i16x8_s"
     | I64x2 Add -> "i64x2.add"
     | I64x2 Sub -> "i64x2.sub"
     | I64x2 Mul -> "i64x2.mul"
diff --git a/interpreter/text/lexer.mll b/interpreter/text/lexer.mll
index b24310a2a..85d135124 100644
--- a/interpreter/text/lexer.mll
+++ b/interpreter/text/lexer.mll
@@ -569,6 +569,9 @@ rule token = parse
   | "i16x8.sub_sat_"(sign as s)
   { BINARY (ext s i16x8_sub_sat_s i16x8_sub_sat_u) }
 
+  | "i32x4.dot_i16x8_s"
+  { BINARY i32x4_dot_i16x8_s }
+
   | (simd_shape as s) { SIMD_SHAPE (simd_shape s) }
 
   | name as s { VAR s }
diff --git a/test/core/simd/meta/README.md b/test/core/simd/meta/README.md
index d1ade7c72..6ae6747b5 100644
--- a/test/core/simd/meta/README.md
+++ b/test/core/simd/meta/README.md
@@ -26,6 +26,7 @@ Currently it only support following simd test files generation.
 - 'simd_f64x2_rounding'
 - 'simd_f32x4_pmin_pmax'
 - 'simd_f64x2_pmin_pmax'
+- 'simd_i32x4_dot_i16x8'
 
 
 Usage:
diff --git a/test/core/simd/meta/gen_tests.py b/test/core/simd/meta/gen_tests.py
index 6a7cdde48..7ea8436b3 100644
--- a/test/core/simd/meta/gen_tests.py
+++ b/test/core/simd/meta/gen_tests.py
@@ -30,6 +30,7 @@
     'simd_f64x2_rounding',
     'simd_f32x4_pmin_pmax',
     'simd_f64x2_pmin_pmax',
+    'simd_i32x4_dot_i16x8',
 )
 
 
diff --git a/test/core/simd/meta/simd_i32x4_dot_i16x8.py b/test/core/simd/meta/simd_i32x4_dot_i16x8.py
new file mode 100644
index 000000000..1d62fc102
--- /dev/null
+++ b/test/core/simd/meta/simd_i32x4_dot_i16x8.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+
+from simd_arithmetic import SimdArithmeticCase, i16
+from simd_integer_op import ArithmeticOp
+
+
+class SimdI32x4DotI16x8TestCase(SimdArithmeticCase):
+    LANE_TYPE = 'i32x4'
+    UNARY_OPS = ()
+    BINARY_OPS = ('dot_i16x8_s',)
+
+    @property
+    def lane(self):
+        return i16
+
+    def binary_op(self, x, y, lane):
+        # For test data we always splat a single value to the
+        # entire v128, so '* 2' will work here.
+        return ArithmeticOp.get_valid_value(x, i16) * ArithmeticOp.get_valid_value(y, i16) * 2
+
+    @property
+    def hex_binary_op_test_data(self):
+        return []
+
+    @property
+    def bin_test_data(self):
+        return [
+            (self.normal_binary_op_test_data, ['i16x8', 'i16x8', 'i32x4']),
+            (self.hex_binary_op_test_data, ['i16x8', 'i16x8', 'i32x4'])
+        ]
+
+    def get_case_data(self):
+        case_data = []
+        op_name = 'i32x4.dot_i16x8_s'
+        case_data.append(['#', op_name])
+        for data_group, v128_forms in self.bin_test_data:
+            for data in data_group:
+                case_data.append([op_name, [str(data[0]), str(data[1])],
+                    str(self.binary_op(data[0], data[1], self.lane)),
+                    v128_forms])
+        return case_data
+
+    def get_combine_cases(self):
+        return ''
+
+    def gen_test_cases(self):
+        wast_filename = '../simd_i32x4_dot_i16x8.wast'
+        with open(wast_filename, 'w') as fp:
+            fp.write(self.get_all_cases())
+
+def gen_test_cases():
+    simd_i16x8_arith = SimdI32x4DotI16x8TestCase()
+    simd_i16x8_arith.gen_test_cases()
+
+if __name__ == '__main__':
+    gen_test_cases()
diff --git a/test/core/simd/simd_i32x4_dot_i16x8.wast b/test/core/simd/simd_i32x4_dot_i16x8.wast
new file mode 100644
index 000000000..b41de74d0
--- /dev/null
+++ b/test/core/simd/simd_i32x4_dot_i16x8.wast
@@ -0,0 +1,110 @@
+;; Tests for i32x4 arithmetic operations on major boundary values and all special values.
+
+
+(module
+  (func (export "i32x4.dot_i16x8_s") (param v128 v128) (result v128) (i32x4.dot_i16x8_s (local.get 0) (local.get 1)))
+)
+
+
+;; i32x4.dot_i16x8_s
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0)
+                                           (v128.const i16x8 0 0 0 0 0 0 0 0))
+                                           (v128.const i32x4 0 0 0 0))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0)
+                                           (v128.const i16x8 1 1 1 1 1 1 1 1))
+                                           (v128.const i32x4 0 0 0 0))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 1 1 1 1 1 1 1 1)
+                                           (v128.const i16x8 1 1 1 1 1 1 1 1))
+                                           (v128.const i32x4 2 2 2 2))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 0 0 0 0 0 0 0 0)
+                                           (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+                                           (v128.const i32x4 0 0 0 0))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 1 1 1 1 1 1 1 1)
+                                           (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+                                           (v128.const i32x4 -2 -2 -2 -2))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1)
+                                           (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+                                           (v128.const i32x4 2 2 2 2))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 16383 16383 16383 16383 16383 16383 16383 16383)
+                                           (v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384))
+                                           (v128.const i32x4 536838144 536838144 536838144 536838144))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384)
+                                           (v128.const i16x8 16384 16384 16384 16384 16384 16384 16384 16384))
+                                           (v128.const i32x4 536870912 536870912 536870912 536870912))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16383 -16383 -16383 -16383 -16383 -16383 -16383 -16383)
+                                           (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384))
+                                           (v128.const i32x4 536838144 536838144 536838144 536838144))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384)
+                                           (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384))
+                                           (v128.const i32x4 536870912 536870912 536870912 536870912))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -16385 -16385 -16385 -16385 -16385 -16385 -16385 -16385)
+                                           (v128.const i16x8 -16384 -16384 -16384 -16384 -16384 -16384 -16384 -16384))
+                                           (v128.const i32x4 536903680 536903680 536903680 536903680))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32765 32765 32765 32765 32765 32765 32765 32765)
+                                           (v128.const i16x8 1 1 1 1 1 1 1 1))
+                                           (v128.const i32x4 65530 65530 65530 65530))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32766 32766 32766 32766 32766 32766 32766 32766)
+                                           (v128.const i16x8 1 1 1 1 1 1 1 1))
+                                           (v128.const i32x4 65532 65532 65532 65532))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32768 32768 32768 32768 32768 32768 32768 32768)
+                                           (v128.const i16x8 1 1 1 1 1 1 1 1))
+                                           (v128.const i32x4 -65536 -65536 -65536 -65536))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32766 -32766 -32766 -32766 -32766 -32766 -32766 -32766)
+                                           (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+                                           (v128.const i32x4 65532 65532 65532 65532))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32767 -32767 -32767 -32767 -32767 -32767 -32767 -32767)
+                                           (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+                                           (v128.const i32x4 65534 65534 65534 65534))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)
+                                           (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+                                           (v128.const i32x4 65536 65536 65536 65536))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767)
+                                           (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767))
+                                           (v128.const i32x4 2147352578 2147352578 2147352578 2147352578))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)
+                                           (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768))
+                                           (v128.const i32x4 2147483648 2147483648 2147483648 2147483648))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768)
+                                           (v128.const i16x8 -32767 -32767 -32767 -32767 -32767 -32767 -32767 -32767))
+                                           (v128.const i32x4 2147418112 2147418112 2147418112 2147418112))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
+                                           (v128.const i16x8 0 0 0 0 0 0 0 0))
+                                           (v128.const i32x4 0 0 0 0))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
+                                           (v128.const i16x8 1 1 1 1 1 1 1 1))
+                                           (v128.const i32x4 -2 -2 -2 -2))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
+                                           (v128.const i16x8 -1 -1 -1 -1 -1 -1 -1 -1))
+                                           (v128.const i32x4 2 2 2 2))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
+                                           (v128.const i16x8 32767 32767 32767 32767 32767 32767 32767 32767))
+                                           (v128.const i32x4 -65534 -65534 -65534 -65534))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
+                                           (v128.const i16x8 -32768 -32768 -32768 -32768 -32768 -32768 -32768 -32768))
+                                           (v128.const i32x4 65536 65536 65536 65536))
+(assert_return (invoke "i32x4.dot_i16x8_s" (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535)
+                                           (v128.const i16x8 65535 65535 65535 65535 65535 65535 65535 65535))
+                                           (v128.const i32x4 2 2 2 2))
+
+;; type check
+(assert_invalid (module (func (result v128) (i32x4.dot_i16x8_s (i32.const 0) (f32.const 0.0)))) "type mismatch")
+
+;; Test operation with empty argument
+
+(assert_invalid
+  (module
+    (func $i32x4.dot_i16x8_s-1st-arg-empty (result v128)
+      (i32x4.dot_i16x8_s (v128.const i32x4 0 0 0 0))
+    )
+  )
+  "type mismatch"
+)
+(assert_invalid
+  (module
+    (func $i32x4.dot_i16x8_s-arg-empty (result v128)
+      (i32x4.dot_i16x8_s)
+    )
+  )
+  "type mismatch"
+)
+