Merge pull request #130 from Tencent/master

20210117
Tencent · Jan 17, 2021 · c48fbb0 · c48fbb0
2 parents ee01900 + 0a59ac9
commit c48fbb0
Show file tree

Hide file tree

Showing 24 changed files with 8,937 additions and 759 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -119,7 +119,9 @@ if((IOS AND CMAKE_OSX_ARCHITECTURES MATCHES "arm")
     OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
     OR (CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)"))
     set(NCNN_TARGET_ARCH arm)
-    option(NCNN_ARM82 "optimize aarch64 platform with armv8.2" ON)
+    if(NOT (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0))
+        option(NCNN_ARM82 "optimize aarch64 platform with armv8.2" ON)
+    endif()
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(mips)")
     set(NCNN_TARGET_ARCH mips)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(riscv)")

diff --git a/docs/developer-guide/how-to-be-a-contributor.zh.md b/docs/developer-guide/how-to-be-a-contributor.zh.md
@@ -10,14 +10,22 @@ clone 项目，fetch 官方 remote
 ```
 $ git clone https://github.com/user/ncnn && cd ncnn
 $ git remote add tencent https://github.com/tencent/ncnn
+```
+对于 `git clone` 下来的项目，remote origin 是指 clone 下来的 `user/ncnn`；remote tencent 即官方 repo。
+可以基于不同的 remote 创建和提交分支。
+
+```
 $ git fetch tencent
 $ git checkout tencent/master
 ```
+
 创建自己的分支，命名尽量言简意赅。一个分支只做一件事，方便 review 和 revert。例如：
 ```
 $ git checkout -b add-conv-int8
 ```
 
+> `git fetch` 是从远程获取最新代码到本地。如果是第二次 pr ncnn，直接从  `git fetch tencent` 开始即可，不需要 `git remote add tencent`，也不需要修改 `github.com/user/ncnn`。
+
 #### 二、代码习惯
 为了增加沟通效率，reviewer 一般要求 contributor 遵从以下规则
 

diff --git a/docs/developer-guide/operation-param-weight-table.md b/docs/developer-guide/operation-param-weight-table.md
@@ -192,7 +192,7 @@
 ||8|behind|0|
 |Permute|0|order_type|0|
 |PixelShuffle|0|upscale_factor|1|
-|Pooling|0|pooling_type|0|
+|Pooling|0|pooling_type(0: max 1: avg)|0|
 ||1|kernel_w|0|
 ||11|kernel_h|kernel_w|
 ||2|stride_w|1|

diff --git a/docs/developer-guide/operators.md b/docs/developer-guide/operators.md
@@ -13,6 +13,7 @@
 * [crop](#crop)
 * [dequantize](#dequantize)
 * [lstm](#lstm)
+* [pooling](#pooling)
 * [sigmoid](#sigmoid)
 * [softmax](#softmax)
 * [tanh](#tanh)
@@ -273,6 +274,27 @@ Apply a single-layer LSTM to a feature sequence of `T` timesteps. The input blob
 |bias_c_data|float|`[w=num_output, h=4, c=num_directions]`||
 |weight_hc_data|float|`[w=num_output, h=num_output * 4, c=num_directions]`||
 
+# pooling
+
+```
+x2 = pad(x, pads)
+x3 = pooling(x2, kernel, stride)
+```
+
+| param id | name           | type | default  | description                                                                                                                         |
+| -------- | -------------- | ---- | -------- | ----------------------------------------------------------------------------------------------------------------------------------- |
+| 0        | pooling_type   | int  | 0        | 0: max 1: avg                                                                                                                       |
+| 1        | kernel_w       | int  | 0        |                                                                                                                                     |
+| 2        | stride_w       | int  | 1        |                                                                                                                                     |
+| 3        | pad_left       | int  | 0        |                                                                                                                                     |
+| 4        | global_pooling | int  | 0        |                                                                                                                                     |
+| 5        | pad_mode       | int  | 0        | 0: full padding <br/> 1: valid padding <br/> 2: tensorflow padding=SAME or onnx padding=SAME_UPPER <br/> 3: onnx padding=SAME_LOWER |
+| 11       | kernel_h       | int  | kernel_w |                                                                                                                                     |
+| 12       | stride_h       | int  | stride_w |                                                                                                                                     |
+| 13       | pad_top        | int  | pad_left |                                                                                                                                     |
+| 14       | pad_right      | int  | pad_left |                                                                                                                                     |
+| 15       | pad_bottom     | int  | pad_top  |                                                                                                                                     |
+
 # sigmoid
 ```
 y = 1 / (1 + exp(-x))

diff --git a/python/src/main.cpp b/python/src/main.cpp
@@ -183,20 +183,21 @@ PYBIND11_MODULE(ncnn, m)
     .def_readwrite("use_sgemm_convolution", &Option::use_sgemm_convolution)
     .def_readwrite("use_int8_inference", &Option::use_int8_inference)
     .def_readwrite("use_vulkan_compute", &Option::use_vulkan_compute)
+    .def_readwrite("use_bf16_storage", &Option::use_bf16_storage)
     .def_readwrite("use_fp16_packed", &Option::use_fp16_packed)
     .def_readwrite("use_fp16_storage", &Option::use_fp16_storage)
     .def_readwrite("use_fp16_arithmetic", &Option::use_fp16_arithmetic)
+    .def_readwrite("use_int8_packed", &Option::use_int8_packed)
     .def_readwrite("use_int8_storage", &Option::use_int8_storage)
     .def_readwrite("use_int8_arithmetic", &Option::use_int8_arithmetic)
     .def_readwrite("use_packing_layout", &Option::use_packing_layout)
     .def_readwrite("use_shader_pack8", &Option::use_shader_pack8)
-    .def_readwrite("use_image_storage", &Option::use_image_storage)
     .def_readwrite("use_subgroup_basic", &Option::use_subgroup_basic)
     .def_readwrite("use_subgroup_vote", &Option::use_subgroup_vote)
     .def_readwrite("use_subgroup_ballot", &Option::use_subgroup_ballot)
     .def_readwrite("use_subgroup_shuffle", &Option::use_subgroup_shuffle)
     .def_readwrite("use_image_storage", &Option::use_image_storage)
-    .def_readwrite("use_bf16_storage", &Option::use_bf16_storage)
+    .def_readwrite("use_tensor_storage", &Option::use_tensor_storage)
     .def_readwrite("use_weight_fp16_storage", &Option::use_weight_fp16_storage);
 
     py::class_<Mat> mat(m, "Mat", py::buffer_protocol());
@@ -882,8 +883,8 @@ PYBIND11_MODULE(ncnn, m)
 
     .def("clear", &Net::clear)
     .def("create_extractor", &Net::create_extractor)
-    .def("blobs", &Net::blobs)
-    .def("layers", &Net::layers);
+    .def("blobs", &Net::blobs, py::return_value_policy::reference_internal)
+    .def("layers", &Net::layers, py::return_value_policy::reference_internal);
 
     py::enum_<ncnn::BorderType>(m, "BorderType")
     .value("BORDER_CONSTANT", ncnn::BorderType::BORDER_CONSTANT)

diff --git a/python/tests/custom_layer.param b/python/tests/custom_layer.param
@@ -0,0 +1,4 @@
+7767517
+2 2
+Input            data                             0 1 data
+CustomLayer      cl_fwd                           1 1 data output
diff --git a/python/tests/test.py → python/tests/test_allocator.py b/python/tests/test.py → python/tests/test_allocator.py
@@ -12,22 +12,26 @@
 # CONDITIONS OF ANY KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations under the License.
 
-import time
-import ncnn
-
-dr = ncnn.DataReaderFromEmpty()
+import pytest
 
-net = ncnn.Net()
-net.load_param("test.param")
-net.load_model(dr)
+import ncnn
 
-in_mat = ncnn.Mat((227, 227, 3))
 
-start = time.time()
+def test_pool_allocator():
+    pa = ncnn.PoolAllocator()
+    assert pa is not None
+    pa.set_size_compare_ratio(0.5)
+    buf = pa.fastMalloc(10 * 1024)
+    assert buf is not None
+    pa.fastFree(buf)
+    pa.clear()
 
-ex = net.create_extractor()
-ex.input("data", in_mat)
-ret, out_mat = ex.extract("output")
 
-end = time.time()
-print("timespan = ", end - start)
+def test_unlocked_pool_allocator():
+    upa = ncnn.UnlockedPoolAllocator()
+    assert upa is not None
+    upa.set_size_compare_ratio(0.5)
+    buf = upa.fastMalloc(10 * 1024)
+    assert buf is not None
+    upa.fastFree(buf)
+    upa.clear()
diff --git a/python/tests/test_net.py b/python/tests/test_net.py
@@ -0,0 +1,89 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import numpy as np
+import pytest
+
+import ncnn
+
+
+def test_net():
+    dr = ncnn.DataReaderFromEmpty()
+
+    net = ncnn.Net()
+    ret = net.load_param("tests/test.param")
+    net.load_model(dr)
+    assert ret == 0 and len(net.blobs()) == 3 and len(net.layers()) == 3
+
+    in_mat = ncnn.Mat((227, 227, 3))
+
+    ex = net.create_extractor()
+    ex.input("data", in_mat)
+    ret, out_mat = ex.extract("output")
+
+    assert ret == 0 and out_mat.dims == 1 and out_mat.w == 1
+
+    net.clear()
+    assert len(net.blobs()) == 0 and len(net.layers()) == 0
+
+
+def test_custom_layer():
+    class CustomLayer(ncnn.Layer):
+        customLayers = []
+
+        def __init__(self):
+            ncnn.Layer.__init__(self)
+            self.one_blob_only = True
+
+            self.customLayers.append(self)
+
+        def forward(self, bottom_blob, top_blob, opt):
+            x = np.array(bottom_blob)
+            x += 1
+
+            top_blob.clone_from(ncnn.Mat(x), opt.blob_allocator)
+            if top_blob.empty():
+                return -100
+
+            return 0
+
+    def CustomLayer_layer_creator():
+        return CustomLayer()
+
+    def CustomLayer_layer_destroyer(layer):
+        for i in range(len(CustomLayer.customLayers)):
+            if CustomLayer.customLayers[i] == layer:
+                del CustomLayer.customLayers[i]
+                break
+
+    dr = ncnn.DataReaderFromEmpty()
+
+    net = ncnn.Net()
+    net.register_custom_layer(
+        "CustomLayer", CustomLayer_layer_creator, CustomLayer_layer_destroyer
+    )
+    ret = net.load_param("tests/custom_layer.param")
+    net.load_model(dr)
+    assert ret == 0 and len(net.blobs()) == 2 and len(net.layers()) == 2
+
+    in_mat = ncnn.Mat(1)
+    in_mat.fill(1.0)
+
+    ex = net.create_extractor()
+    ex.input("data", in_mat)
+    ret, out_mat = ex.extract("output")
+    assert ret == 0 and out_mat.dims == 1 and out_mat.w == 1 and out_mat[0] == 2.0
+
+    net.clear()
+    assert len(net.blobs()) == 0 and len(net.layers()) == 0
diff --git a/python/tests/test_option.py b/python/tests/test_option.py
@@ -0,0 +1,144 @@
+# Tencent is pleased to support the open source community by making ncnn available.
+#
+# Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
+#
+# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# https://opensource.org/licenses/BSD-3-Clause
+#
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+import pytest
+
+import ncnn
+
+
+def test_option():
+    allocator = ncnn.PoolAllocator()
+
+    opt = ncnn.Option()
+
+    opt.lightmode = True
+    assert opt.lightmode == True
+    opt.lightmode = False
+    assert opt.lightmode == False
+
+    assert opt.num_threads == ncnn.get_cpu_count()
+    opt.num_threads = 1
+    assert opt.num_threads == 1
+
+    assert opt.blob_allocator is None
+    opt.blob_allocator = allocator
+    assert opt.blob_allocator == allocator
+
+    assert opt.workspace_allocator is None
+    opt.workspace_allocator = allocator
+    assert opt.workspace_allocator == allocator
+
+    assert opt.openmp_blocktime == 20
+    opt.openmp_blocktime = 40
+    assert opt.openmp_blocktime == 40
+
+    opt.use_winograd_convolution = True
+    assert opt.use_winograd_convolution == True
+    opt.use_winograd_convolution = False
+    assert opt.use_winograd_convolution == False
+
+    opt.use_sgemm_convolution = True
+    assert opt.use_sgemm_convolution == True
+    opt.use_sgemm_convolution = False
+    assert opt.use_sgemm_convolution == False
+
+    opt.use_int8_inference = True
+    assert opt.use_int8_inference == True
+    opt.use_int8_inference = False
+    assert opt.use_int8_inference == False
+
+    opt.use_vulkan_compute = True
+    assert opt.use_vulkan_compute == True
+    opt.use_vulkan_compute = False
+    assert opt.use_vulkan_compute == False
+
+    opt.use_bf16_storage = True
+    assert opt.use_bf16_storage == True
+    opt.use_bf16_storage = False
+    assert opt.use_bf16_storage == False
+
+    opt.use_fp16_packed = True
+    assert opt.use_fp16_packed == True
+    opt.use_fp16_packed = False
+    assert opt.use_fp16_packed == False
+
+    opt.use_fp16_storage = True
+    assert opt.use_fp16_storage == True
+    opt.use_fp16_storage = False
+    assert opt.use_fp16_storage == False
+
+    opt.use_fp16_arithmetic = True
+    assert opt.use_fp16_arithmetic == True
+    opt.use_fp16_arithmetic = False
+    assert opt.use_fp16_arithmetic == False
+
+    opt.use_int8_packed = True
+    assert opt.use_int8_packed == True
+    opt.use_int8_packed = False
+    assert opt.use_int8_packed == False
+
+    opt.use_int8_storage = True
+    assert opt.use_int8_storage == True
+    opt.use_int8_storage = False
+    assert opt.use_int8_storage == False
+
+    opt.use_int8_arithmetic = True
+    assert opt.use_int8_arithmetic == True
+    opt.use_int8_arithmetic = False
+    assert opt.use_int8_arithmetic == False
+
+    opt.use_packing_layout = True
+    assert opt.use_packing_layout == True
+    opt.use_packing_layout = False
+    assert opt.use_packing_layout == False
+
+    opt.use_shader_pack8 = True
+    assert opt.use_shader_pack8 == True
+    opt.use_shader_pack8 = False
+    assert opt.use_shader_pack8 == False
+
+    opt.use_subgroup_basic = True
+    assert opt.use_subgroup_basic == True
+    opt.use_subgroup_basic = False
+    assert opt.use_subgroup_basic == False
+
+    opt.use_subgroup_vote = True
+    assert opt.use_subgroup_vote == True
+    opt.use_subgroup_vote = False
+    assert opt.use_subgroup_vote == False
+
+    opt.use_subgroup_ballot = True
+    assert opt.use_subgroup_ballot == True
+    opt.use_subgroup_ballot = False
+    assert opt.use_subgroup_ballot == False
+
+    opt.use_subgroup_shuffle = True
+    assert opt.use_subgroup_shuffle == True
+    opt.use_subgroup_shuffle = False
+    assert opt.use_subgroup_shuffle == False
+
+    opt.use_image_storage = True
+    assert opt.use_image_storage == True
+    opt.use_image_storage = False
+    assert opt.use_image_storage == False
+
+    opt.use_tensor_storage = True
+    assert opt.use_tensor_storage == True
+    opt.use_tensor_storage = False
+    assert opt.use_tensor_storage == False
+
+    opt.use_weight_fp16_storage = True
+    assert opt.use_weight_fp16_storage == True
+    opt.use_weight_fp16_storage = False
+    assert opt.use_weight_fp16_storage == False