From 9d1dd3daa01ebd97a4ac2e8279ddd6e2cb109244 Mon Sep 17 00:00:00 2001 From: Jozef Wludzik Date: Fri, 22 Mar 2024 15:50:05 +0100 Subject: [PATCH] NPU Driver drop 1.2.0 release unified 2024WW8 (#23) * Bumped level-zero to 1.16.1 * Fix building with external level-zero * Now level-zero can work with any Level Zero API * Compiler drop from mtl_npu_ud_2024_08_rc3 * Firmware drop from ci_tag_ud202408_vpu_rc_20240221_2101 * NPU test drop 1.2.0 release unified 2024WW8 Signed-off-by: Jakub Pawlak Signed-off-by: Schulfer, Magdalena Signed-off-by: Wludzik, Jozef Signed-off-by: Krystian Pradzynski Signed-off-by: Mateusz Tabaka --- CMakeLists.txt | 32 +- cmake/packages.cmake | 18 +- compiler/compiler_source.cmake | 6 +- compiler/include/vpux_driver_compiler.h | 2 +- compiler/openvino_modules/gflags-config.cmake | 13 + compiler/openvino_package.cmake | 168 +-- compiler/vpux_compiler.cmake | 6 +- compiler/vpux_compiler_build.cmake | 12 +- firmware/CMakeLists.txt | 2 +- firmware/bin/mtl_vpu_v0.0.bin | 1 - firmware/bin/vpu_37xx_v0.0.bin | 4 +- firmware/include/api/vpu_jsm_api.h | 124 ++- firmware/include/api/vpu_jsm_job_cmd_api.h | 24 +- firmware/include/api/vpu_nnrt_api_37xx.h | 102 +- linux/include/uapi/drm/ivpu_accel.h | 43 +- linux/include/uapi/linux/dma-buf.h | 182 ++++ linux/include/uapi/linux/dma-heap.h | 53 + third_party/level-zero | 2 +- third_party/vpux_elf | 2 +- umd/CMakeLists.txt | 10 +- umd/level_zero_driver/CMakeLists.txt | 23 +- .../api/core/ze_core_loader.cpp | 85 +- umd/level_zero_driver/api/core/ze_memory.hpp | 123 ++- umd/level_zero_driver/api/core/zes_loader.cpp | 34 +- umd/level_zero_driver/api/ext/ze_graph.cpp | 8 + .../api/tools/ze_tools_loader.cpp | 24 +- .../core/source/cmdlist/cmdlist.cpp | 4 +- .../core/source/cmdqueue/cmdqueue.cpp | 40 +- .../core/source/cmdqueue/cmdqueue.hpp | 10 +- .../core/source/context/context.cpp | 79 +- .../core/source/context/context.hpp | 36 +- .../core/source/device/device.cpp | 49 +- .../core/source/device/device.hpp | 14 +- .../core/source/driver/driver.cpp | 9 +- .../core/source/driver/driver.hpp | 4 +- .../core/source/driver/driver_handle.cpp | 43 +- .../core/source/driver/driver_handle.hpp | 11 +- .../core/source/event/event.cpp | 37 +- .../core/source/event/event.hpp | 16 +- .../core/source/event/eventpool.cpp | 7 +- .../core/source/fence/fence.cpp | 10 +- .../core/source/memory/memory.cpp | 80 +- .../ext/source/CMakeLists.txt | 2 + .../ext/source/graph/compiler.cpp | 80 +- .../ext/source/graph/compiler.hpp | 14 +- .../ext/source/graph/compiler_common.cpp | 26 + .../ext/source/graph/compiler_common.hpp | 16 + .../ext/source/graph/elf_parser.cpp | 213 ++-- .../ext/source/graph/elf_parser.hpp | 4 +- .../ext/source/graph/graph.cpp | 62 +- .../ext/source/graph/graph.hpp | 2 + .../ext/source/graph/profiling_data.cpp | 147 ++- .../ext/source/graph/profiling_data.hpp | 40 +- .../ext/source/graph/query_network.cpp | 3 +- .../ext/source/graph/query_network.hpp | 1 + .../ext/source/graph/vcl_symbols.hpp | 3 +- .../tools/source/metrics/metric.cpp | 117 ++- .../tools/source/metrics/metric.hpp | 23 +- .../tools/source/metrics/metric_query.cpp | 212 ++-- .../tools/source/metrics/metric_query.hpp | 50 +- .../tools/source/metrics/metric_streamer.cpp | 119 +-- .../tools/source/metrics/metric_streamer.hpp | 27 +- .../unit_tests/fixtures/device_fixture.hpp | 14 +- umd/level_zero_driver/unit_tests/main.cpp | 82 +- .../unit_tests/mocks/mock_metrics.cpp | 2 +- .../unit_tests/mocks/mock_metrics.hpp | 21 +- umd/level_zero_driver/unit_tests/options.hpp | 18 + .../source/core/cmdlist/test_cmdlist_api.cpp | 108 ++ .../source/core/device/test_device.cpp | 4 +- .../source/core/driver/test_driver.cpp | 25 +- .../unit_tests/source/ext/CMakeLists.txt | 7 + .../source/ext/graph/CMakeLists.txt | 10 + .../source/ext/graph/test_graph.cpp | 242 +++++ .../source/ext/graph/test_graph_cid.cpp | 211 ++++ .../source/tools/metrics/test_metrics.cpp | 14 +- umd/level_zero_driver/unit_tests/utils.hpp | 24 + umd/vpu_driver/include/umd_common.hpp | 5 + .../source/command/vpu_command_buffer.cpp | 12 +- .../source/command/vpu_command_buffer.hpp | 10 + .../source/command/vpu_event_command.hpp | 5 +- .../source/command/vpu_graph_init_command.cpp | 8 +- umd/vpu_driver/source/command/vpu_job.hpp | 5 + umd/vpu_driver/source/device/hw_info.hpp | 28 +- .../source/device/vpu_37xx/vpu_hw_37xx.cpp | 15 +- umd/vpu_driver/source/device/vpu_device.cpp | 116 +-- umd/vpu_driver/source/device/vpu_device.hpp | 5 +- .../source/device/vpu_device_context.cpp | 93 +- .../source/device/vpu_device_context.hpp | 20 +- .../source/memory/vpu_buffer_object.cpp | 57 ++ .../source/memory/vpu_buffer_object.hpp | 57 +- .../source/os_interface/os_interface.hpp | 2 - .../source/os_interface/os_interface_imp.cpp | 14 - .../source/os_interface/os_interface_imp.hpp | 2 - .../os_interface/vpu_device_factory.cpp | 24 +- .../os_interface/vpu_device_factory.hpp | 3 +- .../source/os_interface/vpu_driver_api.cpp | 79 +- .../source/os_interface/vpu_driver_api.hpp | 28 +- umd/vpu_driver/source/utilities/log.cpp | 12 +- umd/vpu_driver/source/utilities/log.hpp | 2 +- umd/vpu_driver/source/utilities/timer.cpp | 26 +- umd/vpu_driver/source/utilities/timer.hpp | 24 +- .../unit_tests/job_submission/job_test.cpp | 2 +- .../job_submission/vpu_command_test.cpp | 6 +- umd/vpu_driver/unit_tests/main.cpp | 40 +- .../unit_tests/memory/buffer_object_test.cpp | 12 +- .../mocks/gmock_os_interface_imp.hpp | 2 - .../mocks/mock_os_interface_imp.cpp | 33 +- .../mocks/mock_os_interface_imp.hpp | 6 +- .../unit_tests/mocks/mock_vpu_device.cpp | 2 +- umd/vpu_driver/unit_tests/options.hpp | 49 - .../os_interface/vpu_device_factory_test.cpp | 2 +- .../os_interface/vpu_driver_api_test.cpp | 11 +- .../vpu_device/device_context_test.cpp | 16 +- .../unit_tests/vpu_device/vpu_device_test.cpp | 21 - validation/umd-test/CMakeLists.txt | 32 +- validation/umd-test/configs/README.md | 44 +- validation/umd-test/graph_utilities.hpp | 364 +++++++ validation/umd-test/image.cpp | 152 +++ validation/umd-test/image.hpp | 26 + validation/umd-test/test_commands.cpp | 14 +- validation/umd-test/test_context.cpp | 72 +- validation/umd-test/test_device.cpp | 3 + validation/umd-test/test_driver.cpp | 3 +- validation/umd-test/test_event.cpp | 8 +- validation/umd-test/test_event_sync.cpp | 29 +- validation/umd-test/test_eventpool.cpp | 8 +- validation/umd-test/test_fence.cpp | 6 +- validation/umd-test/test_graph.cpp | 115 ++- validation/umd-test/test_graph_cid.cpp | 956 +++++++----------- validation/umd-test/test_graph_commands.cpp | 663 ++++-------- validation/umd-test/test_graph_inference.cpp | 25 +- .../umd-test/test_inference_performance.cpp | 89 +- validation/umd-test/test_memory.cpp | 2 +- validation/umd-test/test_metric.cpp | 413 +++++--- validation/umd-test/test_metric_streamer.cpp | 518 ++++++++-- validation/umd-test/test_ov_inference.cpp | 204 ++++ validation/umd-test/test_prime_buffers.cpp | 205 ++++ validation/umd-test/test_priority.cpp | 217 ++++ validation/umd-test/testenv.hpp | 41 +- validation/umd-test/umd_prime_buffers.h | 103 ++ validation/umd-test/umd_test.cpp | 121 ++- validation/umd-test/umd_test.h | 28 +- validation/umd-test/utilities/CMakeLists.txt | 10 + validation/umd-test/utilities/data_handle.cpp | 122 +++ validation/umd-test/utilities/data_handle.h | 57 ++ validation/umd-test/utilities/graph_to_str.h | 111 ++ validation/umd-test/ze_memory.hpp | 67 ++ validation/umd-test/ze_scope.hpp | 21 + version.h.in | 4 +- 149 files changed, 6043 insertions(+), 3159 deletions(-) create mode 100644 compiler/openvino_modules/gflags-config.cmake delete mode 120000 firmware/bin/mtl_vpu_v0.0.bin create mode 100644 linux/include/uapi/linux/dma-buf.h create mode 100644 linux/include/uapi/linux/dma-heap.h create mode 100644 umd/level_zero_driver/ext/source/graph/compiler_common.cpp create mode 100644 umd/level_zero_driver/ext/source/graph/compiler_common.hpp create mode 100644 umd/level_zero_driver/unit_tests/options.hpp create mode 100644 umd/level_zero_driver/unit_tests/source/ext/CMakeLists.txt create mode 100644 umd/level_zero_driver/unit_tests/source/ext/graph/CMakeLists.txt create mode 100644 umd/level_zero_driver/unit_tests/source/ext/graph/test_graph.cpp create mode 100644 umd/level_zero_driver/unit_tests/source/ext/graph/test_graph_cid.cpp create mode 100644 umd/level_zero_driver/unit_tests/utils.hpp delete mode 100644 umd/vpu_driver/unit_tests/options.hpp create mode 100644 validation/umd-test/graph_utilities.hpp create mode 100644 validation/umd-test/image.cpp create mode 100644 validation/umd-test/image.hpp create mode 100644 validation/umd-test/test_ov_inference.cpp create mode 100644 validation/umd-test/test_prime_buffers.cpp create mode 100644 validation/umd-test/test_priority.cpp create mode 100644 validation/umd-test/umd_prime_buffers.h create mode 100644 validation/umd-test/utilities/CMakeLists.txt create mode 100644 validation/umd-test/utilities/data_handle.cpp create mode 100644 validation/umd-test/utilities/data_handle.h create mode 100644 validation/umd-test/utilities/graph_to_str.h create mode 100644 validation/umd-test/ze_memory.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index af23fff..dbddb17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,29 +1,27 @@ # -# Copyright (C) 2022-2023 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # # SPDX-License-Identifier: MIT # cmake_minimum_required(VERSION 3.10 FATAL_ERROR) -project(vpu-drv VERSION 1.1.0) -if(NOT CMAKE_BUILD_TYPE) - message(STATUS "CMAKE_BUILD_TYPE not specified, using Release") - set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type: [Release, Debug]" FORCE) -endif() +set(STACK_VERSION 1.2.0 CACHE STRING "Main project version") +project(npu-linux-driver VERSION ${STACK_VERSION}) -string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_TYPE_LOWER) -if (NOT ${BUILD_TYPE_LOWER} STREQUAL "debug" AND - NOT ${BUILD_TYPE_LOWER} STREQUAL "release") - message(FATAL_ERROR "Not supported CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}. " - "Supported types: [Release, Debug]") +set(BUILD_NUMBER "dev-0" CACHE STRING "Build number composed of name and unique number used as driver version") +set(BUILD_VERSION ${PROJECT_NAME}-dev-${PROJECT_VERSION}.${BUILD_NUMBER} CACHE STRING "Build version") + +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Supported build types: Release, Debug" FORCE) endif() +string(TOLOWER ${CMAKE_BUILD_TYPE} BUILD_TYPE_LOWER) -message(STATUS "Using CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") +message(STATUS "PROJECT_VERSION = ${PROJECT_VERSION}") +message(STATUS "BUILD_NUMBER = ${BUILD_NUMBER}") +message(STATUS "BUILD_VERSION = ${BUILD_VERSION}") +message(STATUS "CMAKE_BUILD_TYPE = ${CMAKE_BUILD_TYPE}") -if (NOT VPU_VERSION) - set(VPU_VERSION "dev-0") -endif() configure_file(version.h.in ${CMAKE_BINARY_DIR}/include/version.h) include_directories(SYSTEM ${CMAKE_BINARY_DIR}/include) @@ -32,13 +30,14 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +include(cmake/detect_linux_system.cmake) + option(ENABLE_OPENVINO_PACKAGE "Enable building the OpenVINO + VPUX Plugin package" OFF) option(ENABLE_VPUX_COMPILER_BUILD "Enable building compiler in driver" OFF) message(STATUS "option ENABLE_OPENVINO_PACKAGE: ${ENABLE_OPENVINO_PACKAGE}") message(STATUS "option ENABLE_VPUX_COMPILER_BUILD: ${ENABLE_VPUX_COMPILER_BUILD}") - include(cmake/sanitizer.cmake) # Add Intel CET flags for Indirect Branch Tracking and Shadow call-stack technologies. @@ -50,7 +49,6 @@ add_subdirectory(third_party) add_subdirectory(umd) add_subdirectory(validation) -include(cmake/detect_linux_system.cmake) include(cmake/packages.cmake) # You may your custom commands in .local.cmake, i.e. driver compilation, running tests. diff --git a/cmake/packages.cmake b/cmake/packages.cmake index 9c7263d..c5e5811 100644 --- a/cmake/packages.cmake +++ b/cmake/packages.cmake @@ -21,14 +21,17 @@ set(CPACK_GENERATOR DEB) set(CPACK_PACKAGE_NAME "intel") set(CPACK_PACKAGE_VENDOR "Intel") set(CPACK_PACKAGE_CONTACT "Intel Corporation") -set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION}.${VPU_VERSION}) +set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION}.${BUILD_NUMBER}) # Create package per component set(CPACK_DEB_COMPONENT_INSTALL ON) # Enable detection of component dependencies set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) -set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS_PRIVATE_DIRS ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) +list(APPEND SHLIBDEPS_PRIVATE_DIRS ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) +list(APPEND SHLIBDEPS_PRIVATE_DIRS ${CMAKE_BINARY_DIR}/_deps/openvino-src/runtime/lib/intel64/) +list(APPEND SHLIBDEPS_PRIVATE_DIRS ${CMAKE_BINARY_DIR}/_deps/openvino-src/opencv/lib/) +set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS_PRIVATE_DIRS ${SHLIBDEPS_PRIVATE_DIRS}) # Component dependencies set(CPACK_DEBIAN_VALIDATION-NPU_PACKAGE_DEPENDS "level-zero (>=1.10.0) | intel-level-zero") @@ -64,4 +67,15 @@ Library implements Level Zero API to interract with NPU hardware.") set(CPACK_COMPONENT_VALIDATION-NPU_DESCRIPTION "Intel(R) Validation applications for NPU\n\ Set of application required for testing of Intel(R) Level Zero Driver for NPU hardware.") +set(CPACK_DEBIAN_DEBUGINFO_PACKAGE OFF) +if(${BUILD_TYPE_LOWER} STREQUAL "release") + set(CPACK_DEBIAN_DEBUGINFO_PACKAGE ON) + + if (NOT CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + FILE(WRITE "${CMAKE_BINARY_DIR}/post_package.cmake" "execute_process(COMMAND sh -c \"cp -r ${CMAKE_BINARY_DIR}/_CPack_Packages/Linux/DEB/intel-${CPACK_PACKAGE_VERSION}-Linux/level-zero-npu-dbgsym/usr/lib ${CMAKE_INSTALL_PREFIX}/\") \n") + FILE(APPEND "${CMAKE_BINARY_DIR}/post_package.cmake" "execute_process(COMMAND sh -c \"cp -r ${CMAKE_BINARY_DIR}/_CPack_Packages/Linux/DEB/intel-${CPACK_PACKAGE_VERSION}-Linux/level-zero-npu/usr/lib ${CMAKE_INSTALL_PREFIX}/\") \n") + LIST(APPEND CPACK_POST_BUILD_SCRIPTS "${CMAKE_BINARY_DIR}/post_package.cmake") + endif() +endif() + include(CPack) diff --git a/compiler/compiler_source.cmake b/compiler/compiler_source.cmake index 246f7b9..5346c10 100644 --- a/compiler/compiler_source.cmake +++ b/compiler/compiler_source.cmake @@ -17,9 +17,9 @@ endif() include(ExternalProject) # OpenVINO + VPUX Plugin package options -set(OPENVINO_REVISION 2023.0.2) -set(VPUX_PLUGIN_REVISION cb47a12c7119238071e170e60154720657baa487) -set(VPUX_PLUGIN_RELEASE 23ww42_snapshot) +set(OPENVINO_REVISION 7e18bd074a2487a7a98adcd313abd09c58d88072) +set(VPUX_PLUGIN_REVISION 1ba8c49601c1091b15ceb1ac758eeb99e58e7a8a) +set(VPUX_PLUGIN_RELEASE mtl_npu_ud_2024_08_rc3) # Directories set(OPENVINO_PREFIX_DIR "${CMAKE_BINARY_DIR}/third_party/openvino") diff --git a/compiler/include/vpux_driver_compiler.h b/compiler/include/vpux_driver_compiler.h index 9dc5ff5..f0a104f 100644 --- a/compiler/include/vpux_driver_compiler.h +++ b/compiler/include/vpux_driver_compiler.h @@ -23,7 +23,7 @@ extern "C" { #endif #define VCL_COMPILER_VERSION_MAJOR 5 -#define VCL_COMPILER_VERSION_MINOR 0 +#define VCL_COMPILER_VERSION_MINOR 2 #define VCL_PROFILING_VERSION_MAJOR 2 #define VCL_PROFILING_VERSION_MINOR 0 diff --git a/compiler/openvino_modules/gflags-config.cmake b/compiler/openvino_modules/gflags-config.cmake new file mode 100644 index 0000000..4920e0e --- /dev/null +++ b/compiler/openvino_modules/gflags-config.cmake @@ -0,0 +1,13 @@ +# Copyright 2022-2023 Intel Corporation. +# +# This software and the related documents are Intel copyrighted materials, and +# your use of them is governed by the express license under which they were +# provided to you ("License"). Unless the License provides otherwise, you may +# not use, modify, copy, publish, distribute, disclose or transmit this +# software or the related documents without Intel's prior written permission. +# +# This software and the related documents are provided as is, with no express +# or implied warranties, other than those that are expressly stated in +# the License. + +message(WARNING "Ignoring system gflags") diff --git a/compiler/openvino_package.cmake b/compiler/openvino_package.cmake index 96acfee..1ca07e1 100644 --- a/compiler/openvino_package.cmake +++ b/compiler/openvino_package.cmake @@ -22,118 +22,128 @@ else() set(TARGET_DISTRO ${CMAKE_SYSTEM_NAME}) endif() -set(OPENCV_PREFIX_DIR "${CMAKE_BINARY_DIR}/third_party/opencv") -set(OPENCV_SOURCE_DIR "${OPENCV_PREFIX_DIR}/src/opencv") -file(MAKE_DIRECTORY ${OPENCV_SOURCE_DIR}) - -set(OPENCV_BINARY_DIR "${OPENCV_PREFIX_DIR}/build") -file(MAKE_DIRECTORY ${OPENCV_BINARY_DIR}) +set(OPENVINO_PACKAGE_NAME "openvino-vpu-drv-${TARGET_DISTRO}-${VPUX_PLUGIN_RELEASE}-${BUILD_NUMBER}") +set(OPENVINO_PACKAGE_DIR "${CMAKE_BINARY_DIR}/third_party/openvino_package") +file(MAKE_DIRECTORY ${OPENVINO_PACKAGE_DIR}) -set(OPENVINO_PACKAGE_NAME "openvino-vpu-drv-${TARGET_DISTRO}-${VPUX_PLUGIN_RELEASE}-${VPU_VERSION}") +list(APPEND COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE:FILEPATH=${CMAKE_TOOLCHAIN_FILE}) +list(APPEND COMMON_CMAKE_ARGS -DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}) +### OpenVINO ### set(OPENVINO_BINARY_DIR "${OPENVINO_PREFIX_DIR}/build") file(MAKE_DIRECTORY ${OPENVINO_BINARY_DIR}) -set(OPENVINO_PACKAGE_DIR "${CMAKE_BINARY_DIR}/third_party/openvino_package") -file(MAKE_DIRECTORY ${OPENVINO_PACKAGE_DIR}) - -set(SAMPLES_APPS_PACKAGE_DIR "${OPENVINO_PACKAGE_DIR}/tools/deployment_tools/inference_engine/bin") -file(MAKE_DIRECTORY ${SAMPLES_APPS_PACKAGE_DIR}) +ExternalProject_Add( + openvino_build + DOWNLOAD_COMMAND "" + DEPENDS vpux_plugin_source openvino_source + PREFIX ${OPENVINO_PREFIX_DIR} + SOURCE_DIR ${OPENVINO_SOURCE_DIR} + BINARY_DIR ${OPENVINO_BINARY_DIR} + INSTALL_DIR ${OPENVINO_PACKAGE_DIR} + CMAKE_ARGS + ${COMMON_CMAKE_ARGS} + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${OPENVINO_PACKAGE_DIR}) -set(OPENVINO_BINARY_RELEASE_DIR "${OPENVINO_SOURCE_DIR}/bin/intel64/Release") +### OpenCV ### +set(OPENCV_PREFIX_DIR "${CMAKE_BINARY_DIR}/third_party/opencv") +set(OPENCV_SOURCE_DIR "${OPENCV_PREFIX_DIR}/src/opencv") +file(MAKE_DIRECTORY ${OPENCV_SOURCE_DIR}) -### OpenVINO flags -list(APPEND OPENVINO_CMAKE_FLAGS -DCMAKE_BUILD_TYPE=Release) -list(APPEND OPENVINO_CMAKE_FLAGS -DCMAKE_INSTALL_PREFIX=${OPENVINO_PACKAGE_DIR}) -# To set BUILD_SHARED_LIBS to OFF the cmake 3.18 is required -list(APPEND OPENVINO_CMAKE_FLAGS -DBUILD_SHARED_LIBS=ON) -# OpenVINO Plugins -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_INTEL_CPU=ON) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_INTEL_GPU=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_INTEL_GNA=ON) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_HETERO=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_MULTI=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_AUTO=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_TEMPLATE=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_AUTO_BATCH=OFF) -# OpenVINO Frontends -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_OV_IR_FRONTEND=ON) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_OV_TF_FRONTEND=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_OV_TF_LITE_FRONTEND=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_OV_ONNX_FRONTEND=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_OV_PADDLE_FRONTEND=OFF) -# OpenVINO Other -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_PLUGINS_XML=ON) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_IR_V7_READER=ON) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_SAMPLES=ON) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_PYTHON=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_WHEEL=OFF) -list(APPEND OPENVINO_CMAKE_FLAGS -DTHREADING=TBB) -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_TBBBIND_2_5=OFF) -# OpenCV is required by single-image-test in vpux-plugin -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_OPENCV=ON) - -### VPUX plugin flags -list(APPEND OPENVINO_CMAKE_FLAGS -DCMAKE_BUILD_TYPE=Release) -list(APPEND OPENVINO_CMAKE_FLAGS -DIE_EXTRA_MODULES=${VPUX_PLUGIN_SOURCE_DIR}) -# MLIR can be used in compile_tool -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_MLIR_COMPILER=ON) -# Enable using CiD from VPUX plugin -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_DRIVER_COMPILER_ADAPTER=ON) -# Do not build CiD, the CiD is comes with driver -list(APPEND OPENVINO_CMAKE_FLAGS -DBUILD_COMPILER_FOR_DRIVER=OFF) -# To build CiD, ENABLE_TESTS has to be set to ON -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_TESTS=OFF) -# Below flags add logs that can be enabled using "export IE_VPUX_LOG_LEVEL=LOG_INFO" -list(APPEND OPENVINO_CMAKE_FLAGS -DENABLE_DEVELOPER_BUILD=OFF) - -# TODO: Temporary WA for building LLVM from plugin source -list(APPEND OPENVINO_CMAKE_FLAGS -DLLVM_ENABLE_ZLIB=OFF) -# TODO: Disable AVX512F instruction for MTL to exclude unsupported instructions -list(APPEND OPENVINO_CMAKE_FLAGS -DCMAKE_CXX_FLAGS_INIT="-mno-avx512f") - -list(APPEND OPENVINO_CMAKE_FLAGS -DOpenCV_DIR=${OPENCV_BINARY_DIR}) +set(OPENCV_BINARY_DIR "${OPENCV_PREFIX_DIR}/build") +file(MAKE_DIRECTORY ${OPENCV_BINARY_DIR}) ExternalProject_Add( opencv_build GIT_REPOSITORY https://github.com/opencv/opencv.git - GIT_TAG 4.8.0 + GIT_TAG 8e43c8f200b1b785df7f265dfa79ee97278977f0 + DEPENDS openvino_build PREFIX ${OPENCV_PREFIX_DIR} SOURCE_DIR ${OPENCV_SOURCE_DIR} BINARY_DIR ${OPENCV_BINARY_DIR} INSTALL_DIR ${OPENVINO_PACKAGE_DIR}/opencv - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${OPENVINO_PACKAGE_DIR}/opencv - -DOPENCV_GENERATE_SETUPVARS=ON) + CMAKE_ARGS + ${COMMON_CMAKE_ARGS} + -DCMAKE_INSTALL_PREFIX=${OPENVINO_PACKAGE_DIR}/opencv + -DCMAKE_PREFIX_PATH=${OPENVINO_BINARY_DIR} + -DOPENCV_GENERATE_SETUPVARS=ON + -DWITH_OPENVINO=ON) + +### VPUX plugin ### +set(VPUX_PLUGIN_BINARY_DIR ${VPUX_PLUGIN_PREFIX_DIR}/build) +file(MAKE_DIRECTORY ${VPUX_PLUGIN_BINARY_DIR}) ExternalProject_Add( - openvino_vpux_plugin_build + vpux_plugin_build DOWNLOAD_COMMAND "" - DEPENDS vpux_plugin_source openvino_source opencv_build - PREFIX ${OPENVINO_PREFIX_DIR} - SOURCE_DIR ${OPENVINO_SOURCE_DIR} - BINARY_DIR ${OPENVINO_BINARY_DIR} + DEPENDS openvino_build opencv_build + PREFIX ${VPUX_PLUGIN_PREFIX_DIR} + SOURCE_DIR ${VPUX_PLUGIN_SOURCE_DIR} + BINARY_DIR ${VPUX_PLUGIN_BINARY_DIR} INSTALL_DIR ${OPENVINO_PACKAGE_DIR} - CMAKE_ARGS ${OPENVINO_CMAKE_FLAGS}) + CMAKE_ARGS + ${COMMON_CMAKE_ARGS} + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_INSTALL_PREFIX=${OPENVINO_PACKAGE_DIR} + -DOpenCV_DIR=${OPENCV_BINARY_DIR} + -DInferenceEngineDeveloperPackage_DIR=${OPENVINO_BINARY_DIR}) + +### Sample applications from OpenVINO (benchmark_app ...) ### +set(SAMPLES_APPS_BUILD_DIR ${OPENVINO_PREFIX_DIR}/build-samples) +file(MAKE_DIRECTORY ${SAMPLES_APPS_BUILD_DIR}) + +set(SAMPLES_APPS_PACKAGE_DIR "${OPENVINO_PACKAGE_DIR}/tools/deployment_tools/inference_engine/bin") +file(MAKE_DIRECTORY ${SAMPLES_APPS_PACKAGE_DIR}) + +ExternalProject_Add( + sample_apps_build + DOWNLOAD_COMMAND "" + DEPENDS openvino_build opencv_build + PREFIX ${OPENVINO_PREFIX_DIR} + SOURCE_DIR ${OPENVINO_SOURCE_DIR}/samples/cpp + BINARY_DIR ${SAMPLES_APPS_BUILD_DIR} + INSTALL_DIR ${SAMPLES_APPS_BUILD_DIR} + CMAKE_ARGS + ${COMMON_CMAKE_ARGS} + -DCMAKE_INSTALL_PREFIX=${SAMPLES_APPS_PACKAGE_DIR} + -DCMAKE_PREFIX_PATH=${OPENVINO_BINARY_DIR} + -DOpenCV_DIR=${OPENCV_BINARY_DIR}) + +### OV+VPUX plugin package ### +set(COMPILE_TOOL_PACKAGE_DIR "${OPENVINO_PACKAGE_DIR}/tools/compile_tool") +file(MAKE_DIRECTORY ${COMPILE_TOOL_PACKAGE_DIR}) + +set(OPENVINO_BINARY_RELEASE_DIR "${OPENVINO_SOURCE_DIR}/bin/intel64/Release") add_custom_target( openvino_package ALL COMMAND cp -d ${OPENCV_BINARY_DIR}/setup_vars.sh ${OPENVINO_PACKAGE_DIR}/opencv/setupvars.sh && - cp -d ${OPENVINO_BINARY_RELEASE_DIR}/benchmark_app ${SAMPLES_APPS_PACKAGE_DIR}/ && - cp -d ${OPENVINO_BINARY_RELEASE_DIR}/classification_sample_async ${SAMPLES_APPS_PACKAGE_DIR}/ && - cp -d ${OPENVINO_BINARY_RELEASE_DIR}/hello_classification ${SAMPLES_APPS_PACKAGE_DIR}/ && - cp -d ${OPENVINO_BINARY_RELEASE_DIR}/hello_query_device ${SAMPLES_APPS_PACKAGE_DIR}/ && + cp -d ${SAMPLES_APPS_BUILD_DIR}/intel64/benchmark_app ${SAMPLES_APPS_PACKAGE_DIR}/ && + cp -d ${SAMPLES_APPS_BUILD_DIR}/intel64/classification_sample_async ${SAMPLES_APPS_PACKAGE_DIR}/ && + cp -d ${SAMPLES_APPS_BUILD_DIR}/intel64/hello_classification ${SAMPLES_APPS_PACKAGE_DIR}/ && + cp -d ${SAMPLES_APPS_BUILD_DIR}/intel64/hello_query_device ${SAMPLES_APPS_PACKAGE_DIR}/ && + cp -d ${OPENVINO_BINARY_RELEASE_DIR}/protopipe ${SAMPLES_APPS_PACKAGE_DIR}/ && cp -d ${OPENVINO_BINARY_RELEASE_DIR}/single-image-test ${SAMPLES_APPS_PACKAGE_DIR}/ && + cp -d ${OPENVINO_BINARY_RELEASE_DIR}/compile_tool ${COMPILE_TOOL_PACKAGE_DIR}/ && + git -C ${OPENCV_SOURCE_DIR} rev-list --max-count=1 HEAD > ${OPENVINO_PACKAGE_DIR}/opencv_sha && git -C ${OPENVINO_SOURCE_DIR} rev-list --max-count=1 HEAD > ${OPENVINO_PACKAGE_DIR}/openvino_sha && git -C ${VPUX_PLUGIN_SOURCE_DIR} rev-list --max-count=1 HEAD > ${OPENVINO_PACKAGE_DIR}/vpux_plugin_sha && echo ${OPENVINO_PACKAGE_NAME} > ${OPENVINO_PACKAGE_DIR}/build_version && echo `git -C ${OPENVINO_SOURCE_DIR} rev-parse HEAD` `git -C ${OPENVINO_SOURCE_DIR} config --local --get remote.origin.url` > ${OPENVINO_PACKAGE_DIR}/manifest.txt && echo `git -C ${VPUX_PLUGIN_SOURCE_DIR} rev-parse HEAD` `git -C ${VPUX_PLUGIN_SOURCE_DIR} config --local --get remote.origin.url` >> ${OPENVINO_PACKAGE_DIR}/manifest.txt && tar -C ${OPENVINO_PACKAGE_DIR} -czf ${CMAKE_BINARY_DIR}/${OPENVINO_PACKAGE_NAME}.tar.gz . - DEPENDS openvino_vpux_plugin_build + DEPENDS openvino_build opencv_build vpux_plugin_build sample_apps_build BYPRODUCTS ${CMAKE_BINARY_DIR}/${OPENVINO_PACKAGE_NAME}.tar.gz) +install( + DIRECTORY ${OPENVINO_PACKAGE_DIR} + DESTINATION validation/openvino + COMPONENT openvino_driver_package) + install( FILES ${CMAKE_BINARY_DIR}/${OPENVINO_PACKAGE_NAME}.tar.gz DESTINATION . - COMPONENT openvino_package) + COMPONENT openvino_standalone_package + EXCLUDE_FROM_ALL) diff --git a/compiler/vpux_compiler.cmake b/compiler/vpux_compiler.cmake index d6c7c9c..a83c0c0 100644 --- a/compiler/vpux_compiler.cmake +++ b/compiler/vpux_compiler.cmake @@ -11,19 +11,19 @@ add_library(vpux_compiler INTERFACE) -if(ENABLE_VPUX_COMPILER_CACHE OR ENABLE_VPUX_COMPILER_BUILD) +if(ENABLE_VPUX_COMPILER_BUILD) if(ENABLE_VPUX_COMPILER_BUILD) include(vpux_compiler_build.cmake) endif() add_dependencies(vpux_compiler ${VPUX_COMPILER_DEPENDENCY}) - install(FILES ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libvpux_driver_compiler.so + install(FILES ${VPUX_COMPILER_LIBS} TYPE LIB COMPONENT driver-compiler-npu) elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/include/) set(VPUX_COMPILER_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) else() - message(FATAL_ERROR "Missing compiler headers in path ${CMAKE_CURRENT_SOURCE_DIR}/include/") + message(FATAL_ERROR "Missing VPUXCompilerL0 headers in path ${CMAKE_CURRENT_SOURCE_DIR}/include/") endif() target_include_directories(vpux_compiler INTERFACE ${VPUX_COMPILER_INCLUDE_DIR}) diff --git a/compiler/vpux_compiler_build.cmake b/compiler/vpux_compiler_build.cmake index ccf0a2c..ae08244 100644 --- a/compiler/vpux_compiler_build.cmake +++ b/compiler/vpux_compiler_build.cmake @@ -23,6 +23,8 @@ file(MAKE_DIRECTORY ${VPUX_COMPILER_BINARY_DIR}) set(VPUX_COMPILER_INCLUDE_DIR "${VPUX_PLUGIN_SOURCE_DIR}/src/vpux_driver_compiler/include") file(MAKE_DIRECTORY ${VPUX_COMPILER_INCLUDE_DIR}) +list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DCMAKE_TOOLCHAIN_FILE:FILEPATH=${CMAKE_TOOLCHAIN_FILE}) +list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DBUILD_COMPILER_FOR_DRIVER=ON) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DBUILD_SHARED_LIBS=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DCMAKE_BUILD_TYPE=Release) @@ -32,8 +34,6 @@ list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_HETERO=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_INTEL_CPU=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_INTEL_GNA=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_INTEL_GPU=OFF) -list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_INTEL_MYRIAD=OFF) -list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_INTEL_MYRIAD_COMMON=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_IR_V7_READER=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_MULTI=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_OV_IR_FRONTEND=ON) @@ -45,15 +45,14 @@ list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_PYTHON=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_TEMPLATE=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_TESTS=ON) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_WHEEL=OFF) -list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DTHREADING=TBB) -list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DENABLE_TBBBIND_2_5=OFF) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DIE_EXTRA_MODULES=${VPUX_PLUGIN_SOURCE_DIR}) list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DOUTPUT_ROOT=${VPUX_COMPILER_BINARY_DIR}) # TODO: Temporary WA for building LLVM from plugin source list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DLLVM_ENABLE_ZLIB=OFF) -# TODO: Disable AVX512F instruction for MTL to exclude unsupported instructions -list(APPEND VPUX_COMPILER_CMAKE_FLAGS -DCMAKE_CXX_FLAGS_INIT="-mno-avx512f") + +# TODO: Temporary WA for buidling VPUX tools that tries to find gflags dependency in system +list(APPEND VPUX_COMPILER_CMAKE_FLAGS -Dgflags_DIR=${CMAKE_CURRENT_SOURCE_DIR}/openvino_modules) ExternalProject_Add( vpux_compiler_build @@ -75,3 +74,4 @@ ExternalProject_Add( ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libvpux_driver_compiler.so) set(VPUX_COMPILER_DEPENDENCY vpux_compiler_build) +list(APPEND VPUX_COMPILER_LIBS ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libvpux_driver_compiler.so) diff --git a/firmware/CMakeLists.txt b/firmware/CMakeLists.txt index d47151e..8842327 100644 --- a/firmware/CMakeLists.txt +++ b/firmware/CMakeLists.txt @@ -14,7 +14,7 @@ target_include_directories(${PROJECT_NAME} INTERFACE include) file(GLOB FIRMWARE_BINARIES ${CMAKE_CURRENT_SOURCE_DIR}/bin/*.bin) if (FIRMWARE_BINARIES) install(FILES ${FIRMWARE_BINARIES} - DESTINATION ${CMAKE_INSTALL_LIBDIR}/firmware/intel/vpu/ + DESTINATION ${CMAKE_INSTALL_LIBDIR}/firmware/updates/intel/vpu/ PERMISSIONS OWNER_READ COMPONENT fw-npu) endif() diff --git a/firmware/bin/mtl_vpu_v0.0.bin b/firmware/bin/mtl_vpu_v0.0.bin deleted file mode 120000 index 89eed7d..0000000 --- a/firmware/bin/mtl_vpu_v0.0.bin +++ /dev/null @@ -1 +0,0 @@ -vpu_37xx_v0.0.bin \ No newline at end of file diff --git a/firmware/bin/vpu_37xx_v0.0.bin b/firmware/bin/vpu_37xx_v0.0.bin index 55fc87f..00a52fa 100644 --- a/firmware/bin/vpu_37xx_v0.0.bin +++ b/firmware/bin/vpu_37xx_v0.0.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c0b380ccbb834a41f58c466082012978c5975812c79b29207b4a5c46329cac57 -size 1917644 +oid sha256:a8684b2e20673a5985fc1108182ec310f8df81d2c268c7bb99612d33db8ad66a +size 2847008 diff --git a/firmware/include/api/vpu_jsm_api.h b/firmware/include/api/vpu_jsm_api.h index e1e4950..d29f0a8 100644 --- a/firmware/include/api/vpu_jsm_api.h +++ b/firmware/include/api/vpu_jsm_api.h @@ -22,23 +22,18 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 15 +#define VPU_JSM_API_VER_MINOR 16 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_JSM_API_VER_PATCH 2 +#define VPU_JSM_API_VER_PATCH 0 /* * Index in the API version table */ #define VPU_JSM_API_VER_INDEX 4 -#ifndef API_ALIGN -/* Default declaration of API_ALIGN for platforms where it is not needed. */ -#define API_ALIGN -#endif /* API_ALIGN */ - /* * Number of Priority Bands for Hardware Scheduling * Bands: RealTime, Focus, Normal, Idle @@ -48,8 +43,11 @@ /* Max number of impacted contexts that can be dealt with the engine reset command */ #define VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS 3 -/** Pack the API structures for now, once alignment issues are fixed this can be removed */ -#pragma pack(push, 1) +/* + * Pack the API structures to enforce binary compatibility + * Align to 8 bytes for optimal performance + */ +#pragma pack(push, 8) /* * Engine indexes. @@ -129,6 +127,19 @@ */ #define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U +/* + * vpu_jsm_engine_reset_context flag definitions + */ +#define VPU_ENGINE_RESET_CONTEXT_FLAG_COLLATERAL_DAMAGE_MASK (1 << 0) +#define VPU_ENGINE_RESET_CONTEXT_HANG_PRIMARY_CAUSE 0 +#define VPU_ENGINE_RESET_CONTEXT_COLLATERAL_DAMAGE 1 + +/* + * Invalid command queue handle identifier. Applies to cmdq_id and cmdq_group + * in this API. + */ +#define VPU_HWS_INVALID_CMDQ_HANDLE 0ULL + /* * Job format. */ @@ -144,7 +155,7 @@ struct vpu_job { volatile uint64_t secondary_preempt_buf_addr; /**< Address of secondary preemption buffer to use for this job */ uint8_t reserved_0[VPU_JOB_RESERVED_BYTES]; }; -typedef struct vpu_job API_ALIGN vpu_job_t; +typedef struct vpu_job vpu_job_t; /* * Job queue control registers. @@ -155,7 +166,7 @@ struct vpu_job_queue_header { volatile uint32_t tail; uint8_t reserved_0[VPU_JOB_QUEUE_RESERVED_BYTES]; }; -typedef struct vpu_job_queue_header API_ALIGN vpu_job_queue_header_t; +typedef struct vpu_job_queue_header vpu_job_queue_header_t; /* * Job queue format. @@ -164,7 +175,7 @@ struct vpu_job_queue { struct vpu_job_queue_header header; struct vpu_job job[]; }; -typedef struct vpu_job_queue API_ALIGN vpu_job_queue_t; +typedef struct vpu_job_queue vpu_job_queue_t; /** * Logging entity types. @@ -194,7 +205,6 @@ struct vpu_hws_log_buffer_header { uint64_t num_of_entries; uint64_t reserved[2]; }; -typedef struct vpu_hws_log_buffer_header API_ALIGN vpu_hws_log_buffer_header_t; /* * HWS specific log buffer entry details. @@ -216,7 +226,6 @@ struct vpu_hws_log_buffer_entry { /* Operation data depends on operation type */ uint64_t operation_data[2]; }; -typedef struct vpu_hws_log_buffer_entry API_ALIGN vpu_hws_log_buffer_entry_t; /* * Host <-> VPU IPC messages types. @@ -435,7 +444,7 @@ struct vpu_ipc_msg_payload_engine_reset { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_engine_reset API_ALIGN vpu_ipc_msg_payload_engine_reset_t; +typedef struct vpu_ipc_msg_payload_engine_reset vpu_ipc_msg_payload_engine_reset_t; struct vpu_ipc_msg_payload_engine_preempt { /* Engine to be preempted. */ @@ -443,7 +452,7 @@ struct vpu_ipc_msg_payload_engine_preempt { /* ID of the preemption request. */ uint32_t preempt_id; }; -typedef struct vpu_ipc_msg_payload_engine_preempt API_ALIGN vpu_ipc_msg_payload_engine_preempt_t; +typedef struct vpu_ipc_msg_payload_engine_preempt vpu_ipc_msg_payload_engine_preempt_t; /* * @brief Register doorbell command structure. @@ -462,7 +471,7 @@ struct vpu_ipc_msg_payload_register_db { /* Host sub-stream ID for the context assigned to the doorbell. */ uint32_t host_ssid; }; -typedef struct vpu_ipc_msg_payload_register_db API_ALIGN vpu_ipc_msg_payload_register_db_t; +typedef struct vpu_ipc_msg_payload_register_db vpu_ipc_msg_payload_register_db_t; /** * @brief Unregister doorbell command structure. @@ -475,7 +484,7 @@ struct vpu_ipc_msg_payload_unregister_db { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_unregister_db API_ALIGN vpu_ipc_msg_payload_unregister_db_t; +typedef struct vpu_ipc_msg_payload_unregister_db vpu_ipc_msg_payload_unregister_db_t; struct vpu_ipc_msg_payload_query_engine_hb { /* Engine to return heartbeat value. */ @@ -483,7 +492,7 @@ struct vpu_ipc_msg_payload_query_engine_hb { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_query_engine_hb API_ALIGN vpu_ipc_msg_payload_query_engine_hb_t; +typedef struct vpu_ipc_msg_payload_query_engine_hb vpu_ipc_msg_payload_query_engine_hb_t; struct vpu_ipc_msg_payload_power_level { /** @@ -500,7 +509,7 @@ struct vpu_ipc_msg_payload_power_level { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_set_power_level API_ALIGN vpu_ipc_msg_payload_set_power_level_t; +typedef struct vpu_ipc_msg_payload_set_power_level vpu_ipc_msg_payload_set_power_level_t; struct vpu_ipc_msg_payload_ssid_release { /* Host sub-stream ID for the context to be released. */ @@ -508,7 +517,7 @@ struct vpu_ipc_msg_payload_ssid_release { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_ssid_release API_ALIGN vpu_ipc_msg_payload_ssid_release_t; +typedef struct vpu_ipc_msg_payload_ssid_release vpu_ipc_msg_payload_ssid_release_t; /** * @brief Metric streamer start command structure. @@ -556,7 +565,7 @@ struct vpu_jsm_metric_streamer_start { uint64_t next_buffer_addr; uint64_t next_buffer_size; }; -typedef struct vpu_jsm_metric_streamer_start API_ALIGN vpu_jsm_metric_streamer_start_t; +typedef struct vpu_jsm_metric_streamer_start vpu_jsm_metric_streamer_start_t; /** * @brief Metric streamer stop command structure. @@ -566,7 +575,7 @@ struct vpu_jsm_metric_streamer_stop { /** Bitmask to select the desired metric groups. */ uint64_t metric_group_mask; }; -typedef struct vpu_jsm_metric_streamer_stop API_ALIGN vpu_jsm_metric_streamer_stop_t; +typedef struct vpu_jsm_metric_streamer_stop vpu_jsm_metric_streamer_stop_t; /** * Provide VPU FW with buffers to write metric data. @@ -598,13 +607,13 @@ struct vpu_jsm_metric_streamer_update { uint64_t next_buffer_addr; uint64_t next_buffer_size; }; -typedef struct vpu_jsm_metric_streamer_update API_ALIGN vpu_jsm_metric_streamer_update_t; +typedef struct vpu_jsm_metric_streamer_update vpu_jsm_metric_streamer_update_t; struct vpu_ipc_msg_payload_blob_deinit { /* 64-bit unique ID for the blob to be de-initialized. */ uint64_t blob_id; }; -typedef struct vpu_ipc_msg_payload_blob_deinit API_ALIGN vpu_ipc_msg_payload_blob_deinit_t; +typedef struct vpu_ipc_msg_payload_blob_deinit vpu_ipc_msg_payload_blob_deinit_t; struct vpu_ipc_msg_payload_job_done { /* Engine to which the job was submitted. */ @@ -622,7 +631,7 @@ struct vpu_ipc_msg_payload_job_done { /* Command queue id */ uint64_t cmdq_id; }; -typedef struct vpu_ipc_msg_payload_job_done API_ALIGN vpu_ipc_msg_payload_job_done_t; +typedef struct vpu_ipc_msg_payload_job_done vpu_ipc_msg_payload_job_done_t; struct vpu_jsm_engine_reset_context { /* Host SSID */ @@ -631,10 +640,9 @@ struct vpu_jsm_engine_reset_context { uint32_t reserved_0; /* Command queue id */ uint64_t cmdq_id; - /* Flags: 0: cause of hang; 1: collateral damage of reset */ + /* See VPU_ENGINE_RESET_CONTEXT_* defines */ uint64_t flags; }; -typedef struct vpu_jsm_engine_reset_context API_ALIGN vpu_jsm_engine_reset_context_t; struct vpu_ipc_msg_payload_engine_reset_done { /* Engine ordinal */ @@ -644,7 +652,7 @@ struct vpu_ipc_msg_payload_engine_reset_done { /* Array of impacted command queue ids and their flags */ struct vpu_jsm_engine_reset_context impacted_contexts[VPU_MAX_ENGINE_RESET_IMPACTED_CONTEXTS]; }; -typedef struct vpu_ipc_msg_payload_engine_reset_done API_ALIGN vpu_ipc_msg_payload_engine_reset_done_t; +typedef struct vpu_ipc_msg_payload_engine_reset_done vpu_ipc_msg_payload_engine_reset_done_t; struct vpu_ipc_msg_payload_engine_preempt_done { /* Engine preempted. */ @@ -652,7 +660,7 @@ struct vpu_ipc_msg_payload_engine_preempt_done { /* ID of the preemption request. */ uint32_t preempt_id; }; -typedef struct vpu_ipc_msg_payload_engine_preempt_done API_ALIGN vpu_ipc_msg_payload_engine_preempt_done_t; +typedef struct vpu_ipc_msg_payload_engine_preempt_done vpu_ipc_msg_payload_engine_preempt_done_t; /** * Response structure for register doorbell command for both OS @@ -666,7 +674,7 @@ struct vpu_ipc_msg_payload_register_db_done { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_register_db_done API_ALIGN vpu_ipc_msg_payload_register_db_done_t; +typedef struct vpu_ipc_msg_payload_register_db_done vpu_ipc_msg_payload_register_db_done_t; /** * Response structure for unregister doorbell command for both OS @@ -679,7 +687,7 @@ struct vpu_ipc_msg_payload_unregister_db_done { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_unregister_db_done API_ALIGN vpu_ipc_msg_payload_unregister_db_done_t; +typedef struct vpu_ipc_msg_payload_unregister_db_done vpu_ipc_msg_payload_unregister_db_done_t; struct vpu_ipc_msg_payload_query_engine_hb_done { /* Engine returning heartbeat value. */ @@ -689,7 +697,7 @@ struct vpu_ipc_msg_payload_query_engine_hb_done { /* Heartbeat value. */ uint64_t heartbeat; }; -typedef struct vpu_ipc_msg_payload_query_engine_hb_done API_ALIGN vpu_ipc_msg_payload_query_engine_hb_done_t; +typedef struct vpu_ipc_msg_payload_query_engine_hb_done vpu_ipc_msg_payload_query_engine_hb_done_t; struct vpu_ipc_msg_payload_get_power_level_count_done { /** @@ -706,14 +714,13 @@ struct vpu_ipc_msg_payload_get_power_level_count_done { */ uint8_t power_limit[16]; }; -typedef struct vpu_ipc_msg_payload_get_power_level_count_done API_ALIGN - vpu_ipc_msg_payload_get_power_level_count_done_t; +typedef struct vpu_ipc_msg_payload_get_power_level_count_done vpu_ipc_msg_payload_get_power_level_count_done_t; struct vpu_ipc_msg_payload_blob_deinit_done { /* 64-bit unique ID for the blob de-initialized. */ uint64_t blob_id; }; -typedef struct vpu_ipc_msg_payload_blob_deinit_done API_ALIGN vpu_ipc_msg_payload_blob_deinit_done_t; +typedef struct vpu_ipc_msg_payload_blob_deinit_done vpu_ipc_msg_payload_blob_deinit_done_t; /* HWS priority band setup request / response */ struct vpu_ipc_msg_payload_hws_priority_band_setup { @@ -740,7 +747,6 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_hws_priority_band_setup API_ALIGN vpu_ipc_msg_payload_hws_priority_band_setup_t; /* * @brief HWS create command queue request. @@ -757,11 +763,7 @@ struct vpu_ipc_msg_payload_hws_create_cmdq { uint32_t host_ssid; /* Engine for which queue is being created */ uint32_t engine_idx; - /* - * Cmdq group may be set to 0 or equal to - * cmdq_id while each priority band contains - * only single engine instances. - */ + /* Cmdq group: only used for HWS logging of state changes */ uint64_t cmdq_group; /* Command queue id */ uint64_t cmdq_id; @@ -772,7 +774,6 @@ struct vpu_ipc_msg_payload_hws_create_cmdq { /* Zero padding */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_hws_create_cmdq API_ALIGN vpu_ipc_msg_payload_hws_create_cmdq_t; /* * @brief HWS create command queue response. @@ -791,7 +792,6 @@ struct vpu_ipc_msg_payload_hws_create_cmdq_rsp { /* Command queue id */ uint64_t cmdq_id; }; -typedef struct vpu_ipc_msg_payload_hws_create_cmdq_rsp API_ALIGN vpu_ipc_msg_payload_hws_create_cmdq_rsp_t; /* HWS destroy command queue request / response */ struct vpu_ipc_msg_payload_hws_destroy_cmdq { @@ -802,7 +802,6 @@ struct vpu_ipc_msg_payload_hws_destroy_cmdq { /* Command queue id */ uint64_t cmdq_id; }; -typedef struct vpu_ipc_msg_payload_hws_destroy_cmdq API_ALIGN vpu_ipc_msg_payload_hws_destroy_cmdq_t; /* HWS set context scheduling properties request / response */ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { @@ -827,8 +826,6 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { /* Grace period when preempting context of a lower priority within the same process */ uint64_t grace_period_lower_priority; }; -typedef struct vpu_ipc_msg_payload_hws_set_context_sched_properties API_ALIGN - vpu_ipc_msg_payload_hws_set_context_sched_properties_t; /* * @brief Register doorbell command structure. @@ -851,7 +848,6 @@ struct vpu_jsm_hws_register_db { /* Size of the command queue in bytes. */ uint64_t cmdq_size; }; -typedef struct vpu_jsm_hws_register_db API_ALIGN vpu_jsm_hws_register_db_t; /* * @brief Structure to set another buffer to be used for scheduling-related logging. @@ -889,8 +885,15 @@ struct vpu_ipc_msg_payload_hws_set_scheduling_log { * is generated when an event log is written to this index. */ uint64_t notify_index; + /* + * Enable extra events to be output to log for debug of scheduling algorithm. + * Interpreted by VPU as a boolean to enable or disable, expected values are + * 0 and 1. + */ + uint32_t enable_extra_events; + /* Zero Padding */ + uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_hws_set_scheduling_log API_ALIGN vpu_ipc_msg_payload_hws_set_scheduling_log_t; /* * @brief The scheduling log notification is generated by VPU when it writes @@ -906,8 +909,6 @@ struct vpu_ipc_msg_payload_hws_scheduling_log_notification { /* Zero Padding */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_hws_scheduling_log_notification API_ALIGN - vpu_ipc_msg_payload_hws_scheduling_log_notification_t; /* * @brief HWS suspend command queue request and done structure. @@ -941,7 +942,6 @@ struct vpu_ipc_msg_payload_hws_suspend_cmdq { */ uint64_t suspend_fence_value; }; -typedef struct vpu_ipc_msg_payload_hws_suspend_cmdq API_ALIGN vpu_ipc_msg_payload_hws_suspend_cmdq_t; /* * @brief HWS Resume command queue request / response structure. @@ -959,7 +959,6 @@ struct vpu_ipc_msg_payload_hws_resume_cmdq { /* Command queue id */ uint64_t cmdq_id; }; -typedef struct vpu_ipc_msg_payload_hws_resume_cmdq API_ALIGN vpu_ipc_msg_payload_hws_resume_cmdq_t; /* * @brief HWS Resume engine request / response structure. @@ -974,7 +973,6 @@ struct vpu_ipc_msg_payload_hws_resume_engine { /* Reserved */ uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_hws_resume_engine API_ALIGN vpu_ipc_msg_payload_hws_resume_engine_t; /** * Payload for VPU_IPC_MSG_TRACE_SET_CONFIG[_RSP] and @@ -1013,7 +1011,6 @@ struct vpu_ipc_msg_payload_trace_config { uint64_t trace_hw_component_mask; uint64_t reserved_0; /**< Reserved for future extensions. */ }; -typedef struct vpu_ipc_msg_payload_trace_config API_ALIGN vpu_ipc_msg_payload_trace_config_t; /** * Payload for VPU_IPC_MSG_TRACE_GET_CAPABILITY_RSP messages. @@ -1024,7 +1021,6 @@ struct vpu_ipc_msg_payload_trace_capability_rsp { uint64_t trace_hw_component_mask; /**< Bitmask of supported loggable HW components. */ uint64_t reserved_1; /**< Reserved for future extensions. */ }; -typedef struct vpu_ipc_msg_payload_trace_capability_rsp API_ALIGN vpu_ipc_msg_payload_trace_capability_rsp_t; /** * Payload for VPU_IPC_MSG_TRACE_GET_NAME requests. @@ -1042,7 +1038,6 @@ struct vpu_ipc_msg_payload_trace_get_name { */ uint64_t entity_id; }; -typedef struct vpu_ipc_msg_payload_trace_get_name API_ALIGN vpu_ipc_msg_payload_trace_get_name_t; /** * Payload for VPU_IPC_MSG_TRACE_GET_NAME_RSP responses. @@ -1064,7 +1059,6 @@ struct vpu_ipc_msg_payload_trace_get_name_rsp { /** The name of the entity. */ char entity_name[VPU_TRACE_ENTITY_NAME_MAX_LEN]; }; -typedef struct vpu_ipc_msg_payload_trace_get_name_rsp API_ALIGN vpu_ipc_msg_payload_trace_get_name_rsp_t; /** * Data sent from the VPU to the host in all metric streamer response messages @@ -1106,7 +1100,7 @@ struct vpu_jsm_metric_streamer_done { */ uint64_t bytes_written; }; -typedef struct vpu_jsm_metric_streamer_done API_ALIGN vpu_jsm_metric_streamer_done_t; +typedef struct vpu_jsm_metric_streamer_done vpu_jsm_metric_streamer_done_t; /** * Metric group description placed in the metric buffer after successful completion @@ -1153,7 +1147,7 @@ struct vpu_jsm_metric_group_descriptor { * the metric group. */ }; -typedef struct vpu_jsm_metric_group_descriptor API_ALIGN vpu_jsm_metric_group_descriptor_t; +typedef struct vpu_jsm_metric_group_descriptor vpu_jsm_metric_group_descriptor_t; /** * Metric counter description, placed in the buffer after vpu_jsm_metric_group_descriptor. @@ -1197,7 +1191,7 @@ struct vpu_jsm_metric_counter_descriptor { * component and unit strings. */ }; -typedef struct vpu_jsm_metric_counter_descriptor API_ALIGN vpu_jsm_metric_counter_descriptor_t; +typedef struct vpu_jsm_metric_counter_descriptor vpu_jsm_metric_counter_descriptor_t; /** * Payload for VPU_IPC_MSG_DYNDBG_CONTROL requests. @@ -1226,7 +1220,6 @@ struct vpu_ipc_msg_payload_dyndbg_control { */ char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN]; }; -typedef struct vpu_ipc_msg_payload_dyndbg_control API_ALIGN vpu_ipc_msg_payload_dyndbg_control_t; /** * Payload for VPU_IPC_MSG_PWR_D0I3_ENTER @@ -1242,7 +1235,7 @@ struct vpu_ipc_msg_payload_pwr_d0i3_enter { uint32_t send_response; uint32_t reserved_0; }; -typedef struct vpu_ipc_msg_payload_pwr_d0i3_enter API_ALIGN vpu_ipc_msg_payload_pwr_d0i3_enter_t; +typedef struct vpu_ipc_msg_payload_pwr_d0i3_enter vpu_ipc_msg_payload_pwr_d0i3_enter_t; /** * Payload for VPU_IPC_MSG_DCT_ENABLE message. @@ -1257,7 +1250,6 @@ struct vpu_ipc_msg_payload_pwr_dct_control { /** Duty cycle inactive time in microseconds */ uint32_t dct_inactive_us; }; -typedef struct vpu_ipc_msg_payload_pwr_dct_control API_ALIGN vpu_ipc_msg_payload_pwr_dct_control_t; /* * Payloads union, used to define complete message format. @@ -1302,7 +1294,7 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_pwr_d0i3_enter pwr_d0i3_enter; struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control; }; -typedef union vpu_ipc_msg_payload API_ALIGN vpu_ipc_msg_payload_t; +typedef union vpu_ipc_msg_payload vpu_ipc_msg_payload_t; /* * Host <-> LRT IPC message base structure. @@ -1328,7 +1320,7 @@ struct vpu_ipc_msg { /* Message payload depending on message type, see vpu_ipc_msg_payload union. */ union vpu_ipc_msg_payload payload; }; -typedef struct vpu_ipc_msg API_ALIGN vpu_ipc_msg_t; +typedef struct vpu_ipc_msg vpu_ipc_msg_t; #pragma pack(pop) diff --git a/firmware/include/api/vpu_jsm_job_cmd_api.h b/firmware/include/api/vpu_jsm_job_cmd_api.h index 6adc311..0de7446 100644 --- a/firmware/include/api/vpu_jsm_job_cmd_api.h +++ b/firmware/include/api/vpu_jsm_job_cmd_api.h @@ -25,7 +25,7 @@ /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_JSM_JOB_CMD_API_VER_PATCH 1 +#define VPU_JSM_JOB_CMD_API_VER_PATCH 4 /* * Index in the API version table @@ -33,8 +33,11 @@ */ #define VPU_JSM_JOB_CMD_API_VER_INDEX 3 -/** Pack the API structures for now, once alignment issues are fixed this can be removed */ -#pragma pack(push, 1) +/* + * Pack the API structures to enforce binary compatibility + * Align to 8 bytes for optimal performance + */ +#pragma pack(push, 8) /** Maximum number of descriptors in a copy command. */ #define VPU_CMD_COPY_DESC_COUNT_MAX 4096 @@ -146,6 +149,19 @@ typedef struct vpu_cmd_resource_descriptor_table { * @brief Copy command descriptor on VPU 37xx * Note VPU 37xx does not have a LOCAL memory * + * NOTE: Due to the presence of optional fields + * unused in copy commands context, this copy + * descriptor is 64B in size but DMA HW will in + * practice read 80B of data each time it fetches + * a given descriptor. The extra 16B are discarded + * as soon as the DMA HW understands the optional + * fields are unused so this does not prevent + * allocating contiguous 64B descriptors. But this + * means that UMD must ensure there is always an + * extra 16B of memory accessible to the DMA HW + * immediately after the memory allocated for any + * descriptor. + * * @see VPU_CMD_COPY_SYSTEM_TO_SYSTEM */ typedef struct vpu_cmd_copy_descriptor_37xx { @@ -345,7 +361,7 @@ typedef struct vpu_cmd_timestamp { uint32_t reserved_0; /** * Timestamp address - * NOTE: Address must be aligned on a 64B boundary to allow proper handling of + * NOTE: (MTL) - Address must be aligned on a 64B boundary to allow proper handling of * VPU cache operations. */ uint64_t timestamp_address; diff --git a/firmware/include/api/vpu_nnrt_api_37xx.h b/firmware/include/api/vpu_nnrt_api_37xx.h index 2447c02..2ae82d4 100644 --- a/firmware/include/api/vpu_nnrt_api_37xx.h +++ b/firmware/include/api/vpu_nnrt_api_37xx.h @@ -26,18 +26,12 @@ * Note: VPU_NNRT_37XX_API_VER_PATCH is not stored in the VpuMappedInference as * compatibility is not affected if this changes. */ -#define VPU_NNRT_37XX_API_VER_MAJOR 6 -#define VPU_NNRT_37XX_API_VER_MINOR 2 -#define VPU_NNRT_37XX_API_VER_PATCH 0 +#define VPU_NNRT_37XX_API_VER_MAJOR 7 +#define VPU_NNRT_37XX_API_VER_MINOR 0 +#define VPU_NNRT_37XX_API_VER_PATCH 1 #define VPU_NNRT_37XX_API_VER ((VPU_NNRT_37XX_API_VER_MAJOR << 16) | VPU_NNRT_37XX_API_VER_MINOR) -// Temporarily define VPU_NNRT_API_VER until all consumers of this header are updated -// to use VPU_NNRT_37XX_API_VER -#define VPU_NNRT_API_VER VPU_NNRT_37XX_API_VER - -/* - * Index in the API version table - */ +/* Index in the API version table, same for all HW generations */ #define VPU_NNRT_37XX_API_VER_INDEX 7 /* @@ -121,6 +115,10 @@ struct VPU_ALIGNED_STRUCT(8) VpuTaskReference { // // Use the methods data(int64_t offset) and at(uint32_t index, int64_t offset) to // apply the aperture offset to convert address to the bridge aperture. + uint64_t reserved1; + uint64_t reserved2; + uint64_t reserved3; + uint64_t address; uint64_t count; @@ -142,19 +140,19 @@ struct VPU_ALIGNED_STRUCT(8) VpuTaskReference { address = static_cast(reinterpret_cast(fixedVector.data())) - fixedVector.apertureOffset(); count = static_cast(fixedVector.size()); return *this; - }; + } }; -static_assert(sizeof(VpuTaskReference) == 16, "VpuTaskReference size != 16"); +static_assert(sizeof(VpuTaskReference) == 40, "VpuTaskReference size != 40"); typedef void(actKernelEntryFunction)(void *); -struct VPU_ALIGNED_STRUCT(2) VpuTaskSchedulingBarrierConfig { - uint16_t start_after_; - uint16_t clean_after_; +struct VPU_ALIGNED_STRUCT(4) VpuTaskSchedulingBarrierConfig { + uint32_t start_after_; + uint32_t clean_after_; }; -static_assert(sizeof(VpuTaskSchedulingBarrierConfig) == 4, "VpuTaskSchedulingBarrierConfig size != 4"); +static_assert(sizeof(VpuTaskSchedulingBarrierConfig) == 8, "VpuTaskSchedulingBarrierConfig size != 8"); struct VPU_ALIGNED_STRUCT(8) VpuTaskBarrierDependency { uint64_t wait_mask_; @@ -166,36 +164,33 @@ struct VPU_ALIGNED_STRUCT(8) VpuTaskBarrierDependency { static_assert(sizeof(VpuTaskBarrierDependency) == 24, "VpuTaskBarrierDependency size != 24"); -struct VPU_ALIGNED_STRUCT(2) VpuBarrierCountConfig { - uint16_t next_same_id_; +struct VPU_ALIGNED_STRUCT(4) VpuBarrierCountConfig { + uint32_t next_same_id_; uint16_t producer_count_; uint16_t consumer_count_; uint8_t real_id_; - uint8_t pad_; + uint8_t pad_[3]; }; -static_assert(sizeof(VpuBarrierCountConfig) == 8, "VpuBarrierCountConfig size != 8"); +static_assert(sizeof(VpuBarrierCountConfig) == 12, "VpuBarrierCountConfig size != 12"); struct VPU_ALIGNED_STRUCT(8) VpuDPUInvariant { VpuDPUInvariantRegisters registers_; - // breaking-change: change this - uint32_t hwp_cmx_base_offset_; // int32_t hwp_cmx_base_offset_; + int32_t hwp_cmx_base_offset_; VpuTaskBarrierDependency barriers_; VpuTaskSchedulingBarrierConfig barriers_sched_; uint16_t variant_count_; uint8_t cluster_; uint8_t is_cont_conv_; - // breaking-change: add this - // VpuHWPStatMode dpu_prof_mode; // add this - // uint8_t pad_[7]; // add this + VpuHWPStatMode dpu_prof_mode_; + uint8_t pad_[3]; }; -// breaking-change: change this -// static_assert(sizeof(VpuDPUInvariant) == 304, "DPUInvariant size != 304"); -static_assert(sizeof(VpuDPUInvariant) == 296, "DPUInvariant size != 296"); // == 304 +static_assert(sizeof(VpuDPUInvariant) == 304, "DPUInvariant size != 304"); static_assert(offsetof(VpuDPUInvariant, hwp_cmx_base_offset_) % 4 == 0, "Alignment error"); static_assert(offsetof(VpuDPUInvariant, barriers_) % 8 == 0, "Alignment error"); static_assert(offsetof(VpuDPUInvariant, barriers_sched_) % 4 == 0, "Alignment error"); +static_assert(offsetof(VpuDPUInvariant, variant_count_) % 2 == 0, "Alignment error"); struct VPU_ALIGNED_STRUCT(4) VpuDPUVariant { VpuDPUVariantRegisters registers_; @@ -222,6 +217,8 @@ struct VPU_ALIGNED_STRUCT(4) VpuResourceRequirements { static_assert(sizeof(VpuResourceRequirements) == 12, "VpuResourceRequirements size != 12"); struct VPU_ALIGNED_STRUCT(4) VpuNNShaveRuntimeConfigs { + uint32_t reserved1; + uint32_t reserved2; uint32_t runtime_entry; // when useScheduleEmbeddedRt = true this is a windowed address uint32_t act_rt_window_base; uint32_t stack_frames[VPU_AS_TOTAL]; // UNUSED - to be removed @@ -235,7 +232,7 @@ struct VPU_ALIGNED_STRUCT(4) VpuNNShaveRuntimeConfigs { uint8_t pad_[2]; }; -static_assert(sizeof(VpuNNShaveRuntimeConfigs) == 44, "VpuNNShaveRuntimeConfigs size != 44"); +static_assert(sizeof(VpuNNShaveRuntimeConfigs) == 52, "VpuNNShaveRuntimeConfigs size != 52"); // Forcing struct padding so we have same sizeof() of the structure both on x86 compilation and Sparc // compilation. @@ -274,9 +271,10 @@ struct VPU_ALIGNED_STRUCT(8) VpuActKernelInvocation { uint32_t invo_index; uint32_t invo_tile; uint32_t kernel_range_index; + uint8_t pad_[4]; }; -static_assert(sizeof(VpuActKernelInvocation) == 56, "VpuActKernelInvocation size != 56"); +static_assert(sizeof(VpuActKernelInvocation) == 64, "VpuActKernelInvocation size != 64"); static_assert(offsetof(VpuActKernelInvocation, kernel_args) % 4 == 0, "Alignment error"); static_assert(offsetof(VpuActKernelInvocation, data_window_base) % 4 == 0, "Alignment error"); static_assert(offsetof(VpuActKernelInvocation, perf_packet_out) % 4 == 0, "Alignment error"); @@ -297,38 +295,47 @@ struct VPU_ALIGNED_STRUCT(64) VpuDescriptorWrapper { static_assert(sizeof(VpuDescriptorWrapper) == 128, "DMA descriptor wrapper size != 128"); +struct VPU_ALIGNED_STRUCT(4) VpuTaskCounts { + uint32_t reserved1; + uint32_t reserved2; + uint32_t dma_count; + uint32_t dpu_invariant_count; + uint32_t dpu_variant_count; + uint32_t act_range_count; + uint32_t act_invo_count; +}; + +static_assert(sizeof(VpuTaskCounts) == 28, "VpuTaskCounts size != 28"); + struct VPU_ALIGNED_STRUCT(64) VpuMappedInference { uint32_t vpu_nnrt_api_ver; uint8_t pad0_[4]; + uint64_t reserved0_; + VpuTaskCounts task_storage_counts_; + uint32_t task_storage_size_; VpuTaskReference dma_tasks[VPU_MAX_DMA_ENGINES]; VpuTaskReference invariants; VpuTaskReference variants; - VpuTaskReference barrier_configs; VpuTaskReference act_kernel_ranges; VpuTaskReference act_kernel_invocations; - uint8_t pad1_[8]; - VpuDescriptorWrapper feeder_descriptors[VPU_NUM_METADATA_FEEDERS]; - uint32_t leading_dma_tasks[VPU_MAX_DMA_ENGINES]; + VpuTaskReference barrier_configs; VpuNNShaveRuntimeConfigs shv_rt_configs; - uint8_t pad2_[12]; - // for unification might be added field: see updated system/nn/include/nn_public.h - // from version VPU_NN_PUBLIC_VER_MAJOR 6 - // logaddr_dma_hwp_ , padding. assert size checking added + uint8_t pad1_[4]; + VpuTaskReference reserved1_; // reserved for workload management + uint8_t pad2_[24]; }; -static_assert(sizeof(VpuMappedInference) == 960, "VpuMappedInference size != 960"); -static_assert(sizeof(VpuMappedInference::feeder_descriptors) == - (VPU_NUM_METADATA_FEEDERS * sizeof(VpuDescriptorWrapper)), - "Sizeof feeder_descriptors != VPU_NUM_METADATA_FEEDERS * VpuDescriptorWrapper"); +static_assert(sizeof(VpuMappedInference) == 448, "VpuMappedInference size != 448"); +static_assert(offsetof(VpuMappedInference, task_storage_counts_) % 4 == 0, "Alignment error"); +static_assert(offsetof(VpuMappedInference, task_storage_size_) % 4 == 0, "Alignment error"); static_assert(offsetof(VpuMappedInference, dma_tasks) % 8 == 0, "Alignment error"); static_assert(offsetof(VpuMappedInference, invariants) % 8 == 0, "Alignment error"); static_assert(offsetof(VpuMappedInference, variants) % 8 == 0, "Alignment error"); -static_assert(offsetof(VpuMappedInference, barrier_configs) % 8 == 0, "Alignment error"); static_assert(offsetof(VpuMappedInference, act_kernel_ranges) % 8 == 0, "Alignment error"); static_assert(offsetof(VpuMappedInference, act_kernel_invocations) % 8 == 0, "Alignment error"); -static_assert(offsetof(VpuMappedInference, feeder_descriptors) % 64 == 0, "Alignment error"); -static_assert(offsetof(VpuMappedInference, leading_dma_tasks) % 4 == 0, "Alignment error"); -static_assert(offsetof(VpuMappedInference, shv_rt_configs) % 8 == 0, "Alignment error"); +static_assert(offsetof(VpuMappedInference, barrier_configs) % 8 == 0, "Alignment error"); +static_assert(offsetof(VpuMappedInference, shv_rt_configs) % 4 == 0, "Alignment error"); +static_assert(offsetof(VpuMappedInference, reserved1_) % 8 == 0, "Alignment error"); struct VPU_ALIGNED_STRUCT(8) VpuPerformanceMetrics { uint32_t freq_base; ///< Base of frequency values used in tables (in MHz). @@ -347,13 +354,14 @@ struct VPU_ALIGNED_STRUCT(8) VpuPerformanceMetrics { static_assert(sizeof(VpuPerformanceMetrics) == 320, "VpuPerformanceMetrics size != 320"); struct VPU_ALIGNED_STRUCT(8) VpuHostParsedInference { + uint64_t reserved; VpuResourceRequirements resource_requirements_; uint8_t pad_[4]; VpuPerformanceMetrics performance_metrics_; VpuTaskReference mapped_; }; -static_assert(sizeof(VpuHostParsedInference) == 352, "VpuHostParsedInference size != 352"); +static_assert(sizeof(VpuHostParsedInference) == 384, "VpuHostParsedInference size != 384"); static_assert(offsetof(VpuHostParsedInference, resource_requirements_) % 8 == 0, "Alignment error"); static_assert(offsetof(VpuHostParsedInference, performance_metrics_) % 8 == 0, "Alignment error"); static_assert(offsetof(VpuHostParsedInference, mapped_) % 8 == 0, "Alignment error"); diff --git a/linux/include/uapi/drm/ivpu_accel.h b/linux/include/uapi/drm/ivpu_accel.h index 96b64f2..eeae6c9 100644 --- a/linux/include/uapi/drm/ivpu_accel.h +++ b/linux/include/uapi/drm/ivpu_accel.h @@ -74,7 +74,7 @@ extern "C" { #define DRM_IVPU_PARAM_CORE_CLOCK_RATE 3 #define DRM_IVPU_PARAM_NUM_CONTEXTS 4 #define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 -#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 +#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 /* Deprecated */ #define DRM_IVPU_PARAM_CONTEXT_ID 7 #define DRM_IVPU_PARAM_FW_API_VERSION 8 #define DRM_IVPU_PARAM_ENGINE_HEARTBEAT 9 @@ -85,13 +85,32 @@ extern "C" { #define DRM_IVPU_PLATFORM_TYPE_SILICON 0 +/* Deprecated, use DRM_IVPU_JOB_PRIORITY */ #define DRM_IVPU_CONTEXT_PRIORITY_IDLE 0 #define DRM_IVPU_CONTEXT_PRIORITY_NORMAL 1 #define DRM_IVPU_CONTEXT_PRIORITY_FOCUS 2 #define DRM_IVPU_CONTEXT_PRIORITY_REALTIME 3 -#define DRM_IVPU_CAP_METRIC_STREAMER 1 -#define DRM_IVPU_CAP_DMA_MEMORY_RANGE 2 +#define DRM_IVPU_JOB_PRIORITY_DEFAULT 0 +#define DRM_IVPU_JOB_PRIORITY_IDLE 1 +#define DRM_IVPU_JOB_PRIORITY_NORMAL 2 +#define DRM_IVPU_JOB_PRIORITY_FOCUS 3 +#define DRM_IVPU_JOB_PRIORITY_REALTIME 4 + +/** + * DRM_IVPU_CAP_METRIC_STREAMER + * + * Metric streamer support. Provides sampling of various hardware performance + * metrics like DMA bandwidth and cache miss/hits. Can be used for profiling. + */ +#define DRM_IVPU_CAP_METRIC_STREAMER 1 +/** + * DRM_IVPU_CAP_DMA_MEMORY_RANGE + * + * Driver has capability to allocate separate memory range + * accessible by hardware DMA. + */ +#define DRM_IVPU_CAP_DMA_MEMORY_RANGE 2 /** * struct drm_ivpu_param - Get/Set VPU parameters @@ -121,10 +140,6 @@ struct drm_ivpu_param { * %DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: * Lowest VPU virtual address available in the current context (read-only) * - * %DRM_IVPU_PARAM_CONTEXT_PRIORITY: - * Value of current context scheduling priority (read-write). - * See DRM_IVPU_CONTEXT_PRIORITY_* for possible values. - * * %DRM_IVPU_PARAM_CONTEXT_ID: * Current context ID, always greater than 0 (read-only) * @@ -144,6 +159,8 @@ struct drm_ivpu_param { * %DRM_IVPU_PARAM_SKU: * VPU SKU ID (read-only) * + * %DRM_IVPU_PARAM_CAPABILITIES: + * Supported capabilities (read-only) */ __u32 param; @@ -293,6 +310,18 @@ struct drm_ivpu_submit { * to be executed. The offset has to be 8-byte aligned. */ __u32 commands_offset; + + /** + * @priority: + * + * Priority to be set for related job command queue, can be one of the following: + * %DRM_IVPU_JOB_PRIORITY_DEFAULT + * %DRM_IVPU_JOB_PRIORITY_IDLE + * %DRM_IVPU_JOB_PRIORITY_NORMAL + * %DRM_IVPU_JOB_PRIORITY_FOCUS + * %DRM_IVPU_JOB_PRIORITY_REALTIME + */ + __u32 priority; }; /* drm_ivpu_bo_wait job status codes */ diff --git a/linux/include/uapi/linux/dma-buf.h b/linux/include/uapi/linux/dma-buf.h new file mode 100644 index 0000000..5a6fda6 --- /dev/null +++ b/linux/include/uapi/linux/dma-buf.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Framework for buffer objects that can be shared across devices/subsystems. + * + * Copyright(C) 2015 Intel Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _DMA_BUF_UAPI_H_ +#define _DMA_BUF_UAPI_H_ + +#include + +/** + * struct dma_buf_sync - Synchronize with CPU access. + * + * When a DMA buffer is accessed from the CPU via mmap, it is not always + * possible to guarantee coherency between the CPU-visible map and underlying + * memory. To manage coherency, DMA_BUF_IOCTL_SYNC must be used to bracket + * any CPU access to give the kernel the chance to shuffle memory around if + * needed. + * + * Prior to accessing the map, the client must call DMA_BUF_IOCTL_SYNC + * with DMA_BUF_SYNC_START and the appropriate read/write flags. Once the + * access is complete, the client should call DMA_BUF_IOCTL_SYNC with + * DMA_BUF_SYNC_END and the same read/write flags. + * + * The synchronization provided via DMA_BUF_IOCTL_SYNC only provides cache + * coherency. It does not prevent other processes or devices from + * accessing the memory at the same time. If synchronization with a GPU or + * other device driver is required, it is the client's responsibility to + * wait for buffer to be ready for reading or writing before calling this + * ioctl with DMA_BUF_SYNC_START. Likewise, the client must ensure that + * follow-up work is not submitted to GPU or other device driver until + * after this ioctl has been called with DMA_BUF_SYNC_END? + * + * If the driver or API with which the client is interacting uses implicit + * synchronization, waiting for prior work to complete can be done via + * poll() on the DMA buffer file descriptor. If the driver or API requires + * explicit synchronization, the client may have to wait on a sync_file or + * other synchronization primitive outside the scope of the DMA buffer API. + */ +struct dma_buf_sync { + /** + * @flags: Set of access flags + * + * DMA_BUF_SYNC_START: + * Indicates the start of a map access session. + * + * DMA_BUF_SYNC_END: + * Indicates the end of a map access session. + * + * DMA_BUF_SYNC_READ: + * Indicates that the mapped DMA buffer will be read by the + * client via the CPU map. + * + * DMA_BUF_SYNC_WRITE: + * Indicates that the mapped DMA buffer will be written by the + * client via the CPU map. + * + * DMA_BUF_SYNC_RW: + * An alias for DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE. + */ + __u64 flags; +}; + +#define DMA_BUF_SYNC_READ (1 << 0) +#define DMA_BUF_SYNC_WRITE (2 << 0) +#define DMA_BUF_SYNC_RW (DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE) +#define DMA_BUF_SYNC_START (0 << 2) +#define DMA_BUF_SYNC_END (1 << 2) +#define DMA_BUF_SYNC_VALID_FLAGS_MASK \ + (DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END) + +#define DMA_BUF_NAME_LEN 32 + +/** + * struct dma_buf_export_sync_file - Get a sync_file from a dma-buf + * + * Userspace can perform a DMA_BUF_IOCTL_EXPORT_SYNC_FILE to retrieve the + * current set of fences on a dma-buf file descriptor as a sync_file. CPU + * waits via poll() or other driver-specific mechanisms typically wait on + * whatever fences are on the dma-buf at the time the wait begins. This + * is similar except that it takes a snapshot of the current fences on the + * dma-buf for waiting later instead of waiting immediately. This is + * useful for modern graphics APIs such as Vulkan which assume an explicit + * synchronization model but still need to inter-operate with dma-buf. + * + * The intended usage pattern is the following: + * + * 1. Export a sync_file with flags corresponding to the expected GPU usage + * via DMA_BUF_IOCTL_EXPORT_SYNC_FILE. + * + * 2. Submit rendering work which uses the dma-buf. The work should wait on + * the exported sync file before rendering and produce another sync_file + * when complete. + * + * 3. Import the rendering-complete sync_file into the dma-buf with flags + * corresponding to the GPU usage via DMA_BUF_IOCTL_IMPORT_SYNC_FILE. + * + * Unlike doing implicit synchronization via a GPU kernel driver's exec ioctl, + * the above is not a single atomic operation. If userspace wants to ensure + * ordering via these fences, it is the respnosibility of userspace to use + * locks or other mechanisms to ensure that no other context adds fences or + * submits work between steps 1 and 3 above. + */ +struct dma_buf_export_sync_file { + /** + * @flags: Read/write flags + * + * Must be DMA_BUF_SYNC_READ, DMA_BUF_SYNC_WRITE, or both. + * + * If DMA_BUF_SYNC_READ is set and DMA_BUF_SYNC_WRITE is not set, + * the returned sync file waits on any writers of the dma-buf to + * complete. Waiting on the returned sync file is equivalent to + * poll() with POLLIN. + * + * If DMA_BUF_SYNC_WRITE is set, the returned sync file waits on + * any users of the dma-buf (read or write) to complete. Waiting + * on the returned sync file is equivalent to poll() with POLLOUT. + * If both DMA_BUF_SYNC_WRITE and DMA_BUF_SYNC_READ are set, this + * is equivalent to just DMA_BUF_SYNC_WRITE. + */ + __u32 flags; + /** @fd: Returned sync file descriptor */ + __s32 fd; +}; + +/** + * struct dma_buf_import_sync_file - Insert a sync_file into a dma-buf + * + * Userspace can perform a DMA_BUF_IOCTL_IMPORT_SYNC_FILE to insert a + * sync_file into a dma-buf for the purposes of implicit synchronization + * with other dma-buf consumers. This allows clients using explicitly + * synchronized APIs such as Vulkan to inter-op with dma-buf consumers + * which expect implicit synchronization such as OpenGL or most media + * drivers/video. + */ +struct dma_buf_import_sync_file { + /** + * @flags: Read/write flags + * + * Must be DMA_BUF_SYNC_READ, DMA_BUF_SYNC_WRITE, or both. + * + * If DMA_BUF_SYNC_READ is set and DMA_BUF_SYNC_WRITE is not set, + * this inserts the sync_file as a read-only fence. Any subsequent + * implicitly synchronized writes to this dma-buf will wait on this + * fence but reads will not. + * + * If DMA_BUF_SYNC_WRITE is set, this inserts the sync_file as a + * write fence. All subsequent implicitly synchronized access to + * this dma-buf will wait on this fence. + */ + __u32 flags; + /** @fd: Sync file descriptor */ + __s32 fd; +}; + +#define DMA_BUF_BASE 'b' +#define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync) + +/* 32/64bitness of this uapi was botched in android, there's no difference + * between them in actual uapi, they're just different numbers. + */ +#define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) +#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) +#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) +#define DMA_BUF_IOCTL_EXPORT_SYNC_FILE _IOWR(DMA_BUF_BASE, 2, struct dma_buf_export_sync_file) +#define DMA_BUF_IOCTL_IMPORT_SYNC_FILE _IOW(DMA_BUF_BASE, 3, struct dma_buf_import_sync_file) + +#endif diff --git a/linux/include/uapi/linux/dma-heap.h b/linux/include/uapi/linux/dma-heap.h new file mode 100644 index 0000000..6f84fa0 --- /dev/null +++ b/linux/include/uapi/linux/dma-heap.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * DMABUF Heaps Userspace API + * + * Copyright (C) 2011 Google, Inc. + * Copyright (C) 2019 Linaro Ltd. + */ +#ifndef _UAPI_LINUX_DMABUF_POOL_H +#define _UAPI_LINUX_DMABUF_POOL_H + +#include +#include + +/** + * DOC: DMABUF Heaps Userspace API + */ + +/* Valid FD_FLAGS are O_CLOEXEC, O_RDONLY, O_WRONLY, O_RDWR */ +#define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE) + +/* Currently no heap flags */ +#define DMA_HEAP_VALID_HEAP_FLAGS (0) + +/** + * struct dma_heap_allocation_data - metadata passed from userspace for + * allocations + * @len: size of the allocation + * @fd: will be populated with a fd which provides the + * handle to the allocated dma-buf + * @fd_flags: file descriptor flags used when allocating + * @heap_flags: flags passed to heap + * + * Provided by userspace as an argument to the ioctl + */ +struct dma_heap_allocation_data { + __u64 len; + __u32 fd; + __u32 fd_flags; + __u64 heap_flags; +}; + +#define DMA_HEAP_IOC_MAGIC 'H' + +/** + * DOC: DMA_HEAP_IOCTL_ALLOC - allocate memory from pool + * + * Takes a dma_heap_allocation_data struct and returns it with the fd field + * populated with the dmabuf handle of the allocation. + */ +#define DMA_HEAP_IOCTL_ALLOC _IOWR(DMA_HEAP_IOC_MAGIC, 0x0,\ + struct dma_heap_allocation_data) + +#endif /* _UAPI_LINUX_DMABUF_POOL_H */ diff --git a/third_party/level-zero b/third_party/level-zero index 0d56d8e..ac99dbf 160000 --- a/third_party/level-zero +++ b/third_party/level-zero @@ -1 +1 @@ -Subproject commit 0d56d8e0434e8b756ddc7ffd982e8ac056061fb4 +Subproject commit ac99dbfb937f0715171eb39f83b5fadf20474b68 diff --git a/third_party/vpux_elf b/third_party/vpux_elf index 4ce0e48..03878c1 160000 --- a/third_party/vpux_elf +++ b/third_party/vpux_elf @@ -1 +1 @@ -Subproject commit 4ce0e48a5a85ceac8825a3255e2647979e26e238 +Subproject commit 03878c115d13aa1ce6af5329c5759fc1cc94a3fb diff --git a/umd/CMakeLists.txt b/umd/CMakeLists.txt index 62ce78b..6e566f0 100644 --- a/umd/CMakeLists.txt +++ b/umd/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2023 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -12,6 +12,9 @@ if (NOT UMD_LOGLEVEL) set(UMD_LOGLEVEL ERROR) endif() +set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -g") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUMD_LOGLEVEL=${UMD_LOGLEVEL}") set(CMAKE_CXX_STANDARD 17) @@ -38,11 +41,6 @@ if(${CMAKE_CXX_COMPILER} MATCHES "clang") add_compile_options(-Wno-sign-conversion) endif() -if(NOT ${BUILD_TYPE_LOWER} STREQUAL "debug") - # TODO: Consider to move debug symbols to file and then strip the library - add_link_options(-s) -endif() - add_subdirectory_unique(vpu_driver/source) add_subdirectory_unique(vpu_driver/unit_tests) add_subdirectory_unique(level_zero_driver) diff --git a/umd/level_zero_driver/CMakeLists.txt b/umd/level_zero_driver/CMakeLists.txt index 75b07a4..6cca6af 100644 --- a/umd/level_zero_driver/CMakeLists.txt +++ b/umd/level_zero_driver/CMakeLists.txt @@ -1,28 +1,17 @@ # -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # # SPDX-License-Identifier: MIT # set(TARGET_NAME_L0 ze_intel_vpu) -project(level-zero-npu VERSION 1.1.0) -message(STATUS "Level Zero driver version: ${PROJECT_VERSION}") - -# VPU UMD Version -# Note: Max value: 2 bytes / 65535 -if(NOT DEFINED VPU_VERSION_BUILD) - set(VPU_VERSION_BUILD 0) - string(TIMESTAMP VPU_VERSION_BUILD "%y%U%w") -endif() - -add_compile_definitions( - L0_PROJECT_VERSION_MAJOR=${PROJECT_VERSION_MAJOR} - L0_PROJECT_VERSION_MINOR=${PROJECT_VERSION_MINOR} - VPU_VERSION_BUILD=${VPU_VERSION_BUILD}) +string(TIMESTAMP UNIX_SECONDS %s UTC) +set(DRIVER_VERSION ${UNIX_SECONDS} CACHE STRING "Decimal value that set Level Zero driverVersion") +add_compile_definitions(DRIVER_VERSION=${DRIVER_VERSION}) +message(STATUS "Level Zero driver version: ${DRIVER_VERSION}") add_library(${TARGET_NAME_L0} SHARED) -add_dependencies(${TARGET_NAME_L0} ze_loader) target_link_libraries(${TARGET_NAME_L0} vpu_driver @@ -44,4 +33,4 @@ add_subdirectory(tools/source) target_include_directories(${TARGET_NAME_L0} PUBLIC ddi) install(TARGETS ${TARGET_NAME_L0} - COMPONENT ${PROJECT_NAME}) + COMPONENT level-zero-npu) diff --git a/umd/level_zero_driver/api/core/ze_core_loader.cpp b/umd/level_zero_driver/api/core/ze_core_loader.cpp index 6a31bcf..42876e3 100644 --- a/umd/level_zero_driver/api/core/ze_core_loader.cpp +++ b/umd/level_zero_driver/api/core/ze_core_loader.cpp @@ -15,8 +15,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetDriverProcAddrTable(ze_api_version_t ve ze_driver_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnGet = L0::zeDriverGet; pDdiTable->pfnGetApiVersion = L0::zeDriverGetApiVersion; @@ -31,8 +32,9 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zeGetMemProcAddrTable(ze_api_version_t versi ze_mem_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnAllocShared = L0::zeMemAllocShared; pDdiTable->pfnAllocDevice = L0::zeMemAllocDevice; @@ -50,8 +52,9 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zeGetContextProcAddrTable(ze_api_version_t v ze_context_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = L0::zeContextCreate; pDdiTable->pfnDestroy = L0::zeContextDestroy; @@ -68,8 +71,9 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zeGetPhysicalMemProcAddrTable(ze_api_version_t version, ze_physical_mem_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = L0::zePhysicalMemCreate; pDdiTable->pfnDestroy = L0::zePhysicalMemDestroy; @@ -80,8 +84,9 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zeGetVirtualMemProcAddrTable(ze_api_version_t version, ze_virtual_mem_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnReserve = L0::zeVirtualMemReserve; pDdiTable->pfnFree = L0::zeVirtualMemFree; @@ -97,8 +102,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetGlobalProcAddrTable(ze_api_version_t ve ze_global_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnInit = L0::zeInit; return ZE_RESULT_SUCCESS; @@ -108,8 +114,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetDeviceProcAddrTable(ze_api_version_t ve ze_device_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnGet = L0::zeDeviceGet; pDdiTable->pfnGetCommandQueueGroupProperties = L0::zeDeviceGetCommandQueueGroupProperties; @@ -132,8 +139,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetCommandQueueProcAddrTable(ze_api_version_t version, ze_command_queue_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = L0::zeCommandQueueCreate; pDdiTable->pfnDestroy = L0::zeCommandQueueDestroy; @@ -146,8 +154,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetCommandListProcAddrTable(ze_api_version_t version, ze_command_list_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnAppendBarrier = L0::zeCommandListAppendBarrier; pDdiTable->pfnAppendMemoryRangesBarrier = L0::zeCommandListAppendMemoryRangesBarrier; @@ -183,8 +192,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetFenceProcAddrTable(ze_api_version_t ver ze_fence_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = L0::zeFenceCreate; pDdiTable->pfnDestroy = L0::zeFenceDestroy; @@ -198,8 +208,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetEventPoolProcAddrTable(ze_api_version_t version, ze_event_pool_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = L0::zeEventPoolCreate; pDdiTable->pfnDestroy = L0::zeEventPoolDestroy; @@ -213,8 +224,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetEventProcAddrTable(ze_api_version_t ver ze_event_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = L0::zeEventCreate; pDdiTable->pfnDestroy = L0::zeEventDestroy; @@ -230,8 +242,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetImageProcAddrTable(ze_api_version_t ver ze_image_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnGetProperties = nullptr; // zeImageGetProperties pDdiTable->pfnCreate = nullptr; // zeImageCreate @@ -243,8 +256,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetModuleProcAddrTable(ze_api_version_t ve ze_module_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = nullptr; // zeModuleCreate pDdiTable->pfnDestroy = nullptr; // zeModuleDestroy @@ -262,8 +276,9 @@ zeGetModuleBuildLogProcAddrTable(ze_api_version_t version, ze_module_build_log_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnDestroy = nullptr; // zeModuleBuildLogDestroy pDdiTable->pfnGetString = nullptr; // zeModuleBuildLogGetString @@ -274,8 +289,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetKernelProcAddrTable(ze_api_version_t ve ze_kernel_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = nullptr; // zeKernelCreate pDdiTable->pfnDestroy = nullptr; // zeKernelDestroy @@ -297,8 +313,9 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeGetSamplerProcAddrTable(ze_api_version_t v ze_sampler_dditable_t *pDdiTable) { if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_ARGUMENT; - if (ZE_API_VERSION_CURRENT < version) - return ZE_RESULT_ERROR_UNKNOWN; + + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) + return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; pDdiTable->pfnCreate = nullptr; // zeSamplerCreate pDdiTable->pfnDestroy = nullptr; // zeSamplerDestroy diff --git a/umd/level_zero_driver/api/core/ze_memory.hpp b/umd/level_zero_driver/api/core/ze_memory.hpp index 9240f29..fa0601a 100644 --- a/umd/level_zero_driver/api/core/ze_memory.hpp +++ b/umd/level_zero_driver/api/core/ze_memory.hpp @@ -8,6 +8,7 @@ #pragma once #include "level_zero_driver/core/source/driver/driver_handle.hpp" +#include "vpu_driver/source/memory/vpu_buffer_object.hpp" #include namespace L0 { @@ -22,12 +23,48 @@ ze_result_t zeMemAllocShared(ze_context_handle_t hContext, return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (deviceDesc == nullptr || hostDesc == nullptr) { + if (deviceDesc == nullptr || hostDesc == nullptr || + (deviceDesc->pNext && !checkPtrAlignment(deviceDesc->pNext))) { return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - return L0::Context::fromHandle(hContext) - ->allocSharedMem(hDevice, 0, hostDesc->flags, size, alignment, pptr); + ze_structure_type_t extendedAllocType = + deviceDesc->pNext ? *reinterpret_cast(deviceDesc->pNext) + : ZE_STRUCTURE_TYPE_FORCE_UINT32; + + /* For alloc exportable buffer single ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF flag is supported, + * combination flags not allowed + */ + switch (extendedAllocType) { + case ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC: { + const ze_external_memory_export_desc_t *pExtMemDesc = + reinterpret_cast(deviceDesc->pNext); + + if (pExtMemDesc->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) + return L0::Context::fromHandle(hContext)->allocSharedMem( + hDevice, + 0, + hostDesc->flags, + size, + alignment, + pptr, + VPU::VPUBufferObject::Location::ExternalShared); + return ZE_RESULT_ERROR_INVALID_ENUMERATION; + } + case ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD: { + const ze_external_memory_import_fd_t *pImportMemDesc = + reinterpret_cast(deviceDesc->pNext); + if (pImportMemDesc->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) + return L0::Context::fromHandle(hContext)->importMemory( + VPU::VPUBufferObject::Location::ExternalShared, + pImportMemDesc->fd, + pptr); + return ZE_RESULT_ERROR_INVALID_ENUMERATION; + } + default: + return L0::Context::fromHandle(hContext) + ->allocSharedMem(hDevice, 0, hostDesc->flags, size, alignment, pptr); + } } ze_result_t zeMemAllocDevice(ze_context_handle_t hContext, @@ -40,11 +77,46 @@ ze_result_t zeMemAllocDevice(ze_context_handle_t hContext, return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (deviceDesc == nullptr) { + if (deviceDesc == nullptr || + (deviceDesc->pNext && !checkPtrAlignment(deviceDesc->pNext))) { return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - return L0::Context::fromHandle(hContext)->allocDeviceMem(hDevice, 0, size, alignment, pptr); + ze_structure_type_t extendedAllocType = + deviceDesc->pNext ? *reinterpret_cast(deviceDesc->pNext) + : ZE_STRUCTURE_TYPE_FORCE_UINT32; + + /* For alloc exportable buffer single ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF flag is supported, + * combination flags not allowed + */ + switch (extendedAllocType) { + case ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC: { + const ze_external_memory_export_desc_t *pExtMemDesc = + reinterpret_cast(deviceDesc->pNext); + + if (pExtMemDesc->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) + return L0::Context::fromHandle(hContext)->allocDeviceMem( + hDevice, + 0, + size, + alignment, + pptr, + VPU::VPUBufferObject::Location::ExternalDevice); + return ZE_RESULT_ERROR_INVALID_ENUMERATION; + } + case ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD: { + const ze_external_memory_import_fd_t *pImportMemDesc = + reinterpret_cast(deviceDesc->pNext); + if (pImportMemDesc->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) + return L0::Context::fromHandle(hContext)->importMemory( + VPU::VPUBufferObject::Location::ExternalDevice, + pImportMemDesc->fd, + pptr); + return ZE_RESULT_ERROR_INVALID_ENUMERATION; + } + default: + return L0::Context::fromHandle(hContext)->allocDeviceMem(hDevice, 0, size, alignment, pptr); + } } ze_result_t zeMemAllocHost(ze_context_handle_t hContext, @@ -56,11 +128,48 @@ ze_result_t zeMemAllocHost(ze_context_handle_t hContext, return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (hostDesc == nullptr) { + if (hostDesc == nullptr || + (hostDesc->pNext && !checkPtrAlignment(hostDesc->pNext))) { return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - return L0::Context::fromHandle(hContext)->allocHostMem(hostDesc->flags, size, alignment, pptr); + ze_structure_type_t extendedAllocType = + hostDesc->pNext ? *reinterpret_cast(hostDesc->pNext) + : ZE_STRUCTURE_TYPE_FORCE_UINT32; + + /* For alloc exportable buffer single ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF flag is supported, + * combination flags not allowed + */ + switch (extendedAllocType) { + case ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC: { + const ze_external_memory_export_desc_t *pExtMemDesc = + reinterpret_cast(hostDesc->pNext); + + if (pExtMemDesc->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) + return L0::Context::fromHandle(hContext)->allocHostMem( + hostDesc->flags, + size, + alignment, + pptr, + VPU::VPUBufferObject::Location::ExternalHost); + return ZE_RESULT_ERROR_INVALID_ENUMERATION; + } + case ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD: { + const ze_external_memory_import_fd_t *pImportMemDesc = + reinterpret_cast(hostDesc->pNext); + if (pImportMemDesc->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) + return L0::Context::fromHandle(hContext)->importMemory( + VPU::VPUBufferObject::Location::ExternalHost, + pImportMemDesc->fd, + pptr); + return ZE_RESULT_ERROR_INVALID_ENUMERATION; + } + default: + return L0::Context::fromHandle(hContext)->allocHostMem(hostDesc->flags, + size, + alignment, + pptr); + } } ze_result_t zeMemFree(ze_context_handle_t hContext, void *ptr) { diff --git a/umd/level_zero_driver/api/core/zes_loader.cpp b/umd/level_zero_driver/api/core/zes_loader.cpp index 3d3e1ac..c9ae795 100644 --- a/umd/level_zero_driver/api/core/zes_loader.cpp +++ b/umd/level_zero_driver/api/core/zes_loader.cpp @@ -25,7 +25,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetDeviceProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -96,7 +96,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetDriverProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -123,7 +123,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetDiagnosticsProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -152,7 +152,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetEngineProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -179,7 +179,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetFabricPortProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -214,7 +214,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetFanProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -249,7 +249,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetFirmwareProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -276,7 +276,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetFrequencyProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -333,7 +333,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetLedProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -364,7 +364,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetMemoryProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -394,7 +394,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetPerformanceFactorProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -423,7 +423,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetPowerProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -458,7 +458,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetPsuProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -485,7 +485,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetRasProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -516,7 +516,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetSchedulerProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -555,7 +555,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetStandbyProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -584,7 +584,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zesGetTemperatureProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; diff --git a/umd/level_zero_driver/api/ext/ze_graph.cpp b/umd/level_zero_driver/api/ext/ze_graph.cpp index 6f974a5..dec429a 100644 --- a/umd/level_zero_driver/api/ext/ze_graph.cpp +++ b/umd/level_zero_driver/api/ext/ze_graph.cpp @@ -277,6 +277,10 @@ zeGraphQueryNetworkCreate(ze_context_handle_t hContext, ze_result_t ZE_APICALL zeGraphQueryNetworkDestroy(ze_graph_query_network_handle_t hGraphQueryNetwork) { + if (hGraphQueryNetwork == nullptr) { + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + } + return L0::QueryNetwork::fromHandle(hGraphQueryNetwork)->destroy(); } @@ -284,6 +288,10 @@ ze_result_t ZE_APICALL zeGraphQueryNetworkGetSupportedLayers(ze_graph_query_network_handle_t hGraphQueryNetwork, size_t *pSize, char *pSupportedLayers) { + if (hGraphQueryNetwork == nullptr) { + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + } + return L0::QueryNetwork::fromHandle(hGraphQueryNetwork) ->getSupportedLayers(pSize, pSupportedLayers); } diff --git a/umd/level_zero_driver/api/tools/ze_tools_loader.cpp b/umd/level_zero_driver/api/tools/ze_tools_loader.cpp index afe9f8e..855ea39 100644 --- a/umd/level_zero_driver/api/tools/ze_tools_loader.cpp +++ b/umd/level_zero_driver/api/tools/ze_tools_loader.cpp @@ -26,7 +26,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetDeviceProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -51,7 +51,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetContextProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -76,7 +76,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetCommandListProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -107,7 +107,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetKernelProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -132,7 +132,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetModuleProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -157,7 +157,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetDebugProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -202,7 +202,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -229,7 +229,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricGroupProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -258,7 +258,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricQueryProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -290,7 +290,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricQueryPoolProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -318,7 +318,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetMetricStreamerProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; @@ -347,7 +347,7 @@ ZE_DLLEXPORT ze_result_t ZE_APICALL zetGetTracerExpProcAddrTable( if (nullptr == pDdiTable) return ZE_RESULT_ERROR_INVALID_NULL_POINTER; - if (ZE_API_VERSION_CURRENT < version) + if (ZE_MAJOR_VERSION(ZE_API_VERSION_CURRENT) != ZE_MAJOR_VERSION(version)) return ZE_RESULT_ERROR_UNSUPPORTED_VERSION; ze_result_t result = ZE_RESULT_SUCCESS; diff --git a/umd/level_zero_driver/core/source/cmdlist/cmdlist.cpp b/umd/level_zero_driver/core/source/cmdlist/cmdlist.cpp index 40f8b1c..73cbc07 100644 --- a/umd/level_zero_driver/core/source/cmdlist/cmdlist.cpp +++ b/umd/level_zero_driver/core/source/cmdlist/cmdlist.cpp @@ -222,7 +222,7 @@ ze_result_t CommandList::appendMemoryFill(void *ptr, } auto patternBo = - ctx->createInternalBufferObject(size + patternSize, VPU::VPUBufferObject::Type::CachedHigh); + ctx->createInternalBufferObject(size + patternSize, VPU::VPUBufferObject::Type::CachedDma); if (patternBo == nullptr) { LOG_E("Failed to allocate memory"); return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; @@ -260,7 +260,7 @@ ze_result_t CommandList::appendWriteGlobalTimestamp(uint64_t *dstptr, } auto allignedBo = - ctx->createInternalBufferObject(sizeof(uint64_t), VPU::VPUBufferObject::Type::CachedLow); + ctx->createInternalBufferObject(sizeof(uint64_t), VPU::VPUBufferObject::Type::CachedFw); if (allignedBo == nullptr) { LOG_E("Failed to allocate memory"); diff --git a/umd/level_zero_driver/core/source/cmdqueue/cmdqueue.cpp b/umd/level_zero_driver/core/source/cmdqueue/cmdqueue.cpp index fce8afc..cf00a60 100644 --- a/umd/level_zero_driver/core/source/cmdqueue/cmdqueue.cpp +++ b/umd/level_zero_driver/core/source/cmdqueue/cmdqueue.cpp @@ -12,10 +12,24 @@ #include "vpu_driver/source/utilities/timer.hpp" #include "vpu_driver/source/utilities/log.hpp" +#include +#include #include namespace L0 { +static VPU::VPUCommandBuffer::Priority toDriverPriority(ze_command_queue_priority_t p) { + switch (p) { + case ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW: + return VPU::VPUCommandBuffer::Priority::IDLE; + case ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH: + return VPU::VPUCommandBuffer::Priority::REALTIME; + case ZE_COMMAND_QUEUE_PRIORITY_NORMAL: + default: + return VPU::VPUCommandBuffer::Priority::NORMAL; + } +} + ze_result_t CommandQueue::create(ze_context_handle_t hContext, ze_device_handle_t hDevice, const ze_command_queue_desc_t *desc, @@ -45,7 +59,8 @@ ze_result_t CommandQueue::create(ze_context_handle_t hContext, Context *pContext = Context::fromHandle(hContext); bool isCopyOnly = flags == ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY; - auto commandQueue = std::make_unique(pContext, pDevice, isCopyOnly); + auto commandQueue = + std::make_unique(pContext, pDevice, isCopyOnly, desc->priority); *phCommandQueue = commandQueue.get(); pContext->appendObject(std::move(commandQueue)); @@ -140,6 +155,7 @@ ze_result_t CommandQueue::executeCommandLists(uint32_t nCommandLists, return ZE_RESULT_ERROR_UNKNOWN; } + job->setPriority(toDriverPriority(priority)); if (!pContext->getDeviceContext()->submitJob(job.get())) { LOG_E("VPUJob submission failed"); if (errno == -EBADFD) @@ -162,11 +178,24 @@ ze_result_t CommandQueue::executeCommandLists(uint32_t nCommandLists, fence->setTrackedJobs(jobs); } + const std::lock_guard trackedJobsLock(trackedJobsMutex); std::copy(jobs.begin(), jobs.end(), std::back_inserter(trackedJobs)); return ZE_RESULT_SUCCESS; } ze_result_t CommandQueue::synchronize(uint64_t timeout) { + auto timeoutPoint = VPU::getAbsoluteTimePoint(timeout); + /* + * try_lock_until raise SIGABRT signal when timeout is => ~INT64_MAX in Ubuntu20.04 because + * of signed overflow, detected by compiler flag '-ftrapv'. try_lock_for is free from this + * limitation and is a first waiting function + */ + if (!trackedJobsMutex.try_lock_for(std::chrono::nanoseconds(timeout))) { + LOG_W("Failed to lock mutex on tracked jobs"); + return ZE_RESULT_NOT_READY; + } + + const std::lock_guard trackedJobsLock(trackedJobsMutex, std::adopt_lock); if (trackedJobs.empty()) { LOG_W("No command execution to observe"); return ZE_RESULT_SUCCESS; @@ -174,10 +203,11 @@ ze_result_t CommandQueue::synchronize(uint64_t timeout) { LOG_V("Synchronize for %lu ns, %zu job count", timeout, trackedJobs.size()); - bool allSignaled = waitForSignal(timeout, trackedJobs); - if (!allSignaled) { - LOG_W("Commands execution is not finished"); - return ZE_RESULT_NOT_READY; + for (auto const &job : trackedJobs) { + if (!job->waitForCompletion(timeoutPoint.time_since_epoch().count())) { + LOG_W("Commands execution is not finished"); + return ZE_RESULT_NOT_READY; + } } ze_result_t result = Device::jobStatusToResult(trackedJobs); diff --git a/umd/level_zero_driver/core/source/cmdqueue/cmdqueue.hpp b/umd/level_zero_driver/core/source/cmdqueue/cmdqueue.hpp index a39c8f2..a100d83 100644 --- a/umd/level_zero_driver/core/source/cmdqueue/cmdqueue.hpp +++ b/umd/level_zero_driver/core/source/cmdqueue/cmdqueue.hpp @@ -17,10 +17,14 @@ struct _ze_command_queue_handle_t {}; namespace L0 { struct CommandQueue : _ze_command_queue_handle_t, IContextObject { - CommandQueue(Context *context, Device *device, bool isCopyOnly) + CommandQueue(Context *context, + Device *device, + bool isCopyOnly, + ze_command_queue_priority_t priority) : pContext(context) , device(device) - , isCopyOnlyCommandQueue(isCopyOnly) {} + , isCopyOnlyCommandQueue(isCopyOnly) + , priority(priority) {} ~CommandQueue() = default; static ze_result_t create(ze_context_handle_t hContext, @@ -56,7 +60,9 @@ struct CommandQueue : _ze_command_queue_handle_t, IContextObject { Context *pContext = nullptr; Device *device = nullptr; bool isCopyOnlyCommandQueue = false; + ze_command_queue_priority_t priority; std::vector> trackedJobs; + std::timed_mutex trackedJobsMutex; }; } // namespace L0 diff --git a/umd/level_zero_driver/core/source/context/context.cpp b/umd/level_zero_driver/core/source/context/context.cpp index 5921b60..0e6e132 100644 --- a/umd/level_zero_driver/core/source/context/context.cpp +++ b/umd/level_zero_driver/core/source/context/context.cpp @@ -96,19 +96,16 @@ ze_result_t Context::createMetricQueryPool(zet_device_handle_t hDevice, return ZE_RESULT_NOT_READY; } - auto *metricQueryPool = new MetricQueryPool(ctx.get(), metricGroup, desc->count); - if (metricQueryPool == nullptr) { - LOG_E("Failed to create metric query pool."); - return ZE_RESULT_ERROR_NOT_AVAILABLE; - } + try { + auto metricQueryPool = std::make_unique(this, metricGroup, desc->count); + *phMetricQueryPool = metricQueryPool.get(); - if (!metricQueryPool->isInitialized()) { - LOG_E("MetricQueryPool not initialized correctly. Deleting instance."); - metricQueryPool->destroy(); - return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } + this->appendObject(std::move(metricQueryPool)); - *phMetricQueryPool = metricQueryPool->toHandle(); + LOG_I("MetricQueryPool created - %p", *phMetricQueryPool); + } catch (const DriverError &err) { + return err.result(); + } return ZE_RESULT_SUCCESS; } @@ -118,13 +115,23 @@ ze_result_t Context::metricStreamerOpen(zet_device_handle_t hDevice, zet_metric_streamer_desc_t *desc, ze_event_handle_t hNotificationEvent, zet_metric_streamer_handle_t *phMetricStreamer) { - if (hDevice == nullptr || hMetricGroup == nullptr) { - LOG_E("Device(%p) / MetricGroup(%p) handle is NULL.", hDevice, hMetricGroup); + if (hDevice == nullptr) { + LOG_E("Invalid hDevice pointer"); return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; } - if (desc == nullptr || phMetricStreamer == nullptr) { - LOG_E("Desc(%p) / MetricStreamer(%p) handle is NULL.", desc, phMetricStreamer); + if (hMetricGroup == nullptr) { + LOG_E("Invalid hMetricGroup pointer"); + return ZE_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (desc == nullptr) { + LOG_E("Invalid desc pointer"); + return ZE_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (phMetricStreamer == nullptr) { + LOG_E("Invalid phMetricStreamer pointer"); return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -140,46 +147,22 @@ ze_result_t Context::metricStreamerOpen(zet_device_handle_t hDevice, return ZE_RESULT_NOT_READY; } - auto metricContext = device->getMetricContext(); - if (metricContext->getMetricStreamer() != nullptr) { - LOG_E("Device already has a MetricStreamer opened."); - return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; - } - - if (desc->samplingPeriod < L0::MetricContext::MIN_SAMPLING_RATE_NS) { + if (desc->samplingPeriod < MetricContext::MIN_SAMPLING_RATE_NS) { LOG_E("Sampling rate is too low."); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - auto pMetricStreamer = new MetricStreamer(metricGroup, - desc->notifyEveryNReports, - ctx.get(), - device, - hNotificationEvent); - if (pMetricStreamer == nullptr) { - LOG_E("Failed to create metric streamer."); - return ZE_RESULT_ERROR_NOT_AVAILABLE; - } + try { + auto metricStreamer = + std::make_unique(this, metricGroup, desc, hNotificationEvent); + *phMetricStreamer = metricStreamer.get(); + this->appendObject(std::move(metricStreamer)); - metricContext->setMetricStreamer(pMetricStreamer); - - drm_ivpu_metric_streamer_start startData = {}; - startData.metric_group_mask = 0x1 << metricGroup->getGroupIndex(); - // Sampling rate expressed in nanoseconds - startData.sampling_rate_ns = desc->samplingPeriod; - startData.read_rate = desc->notifyEveryNReports; - - const VPU::VPUDriverApi &drvApi = ctx->getDriverApi(); - if (drvApi.metricStreamerStart(&startData) < 0) { - LOG_E("Failed to start metric streamer."); - pMetricStreamer->close(); - return ZE_RESULT_ERROR_UNKNOWN; + LOG_I("MetricStreamer created - %p", *phMetricStreamer); + } catch (const DriverError &err) { + return err.result(); } - metricContext->sampleSize = startData.sample_size; - - *phMetricStreamer = pMetricStreamer->toHandle(); - return ZE_RESULT_SUCCESS; } diff --git a/umd/level_zero_driver/core/source/context/context.hpp b/umd/level_zero_driver/core/source/context/context.hpp index 0259ffd..b1e391e 100644 --- a/umd/level_zero_driver/core/source/context/context.hpp +++ b/umd/level_zero_driver/core/source/context/context.hpp @@ -10,6 +10,7 @@ #include "level_zero_driver/core/source/driver/driver_handle.hpp" #include "level_zero_driver/include/l0_exception.hpp" #include "level_zero_driver/include/l0_handler.hpp" +#include "vpu_driver/source/memory/vpu_buffer_object.hpp" #include "vpu_driver/source/device/vpu_device_context.hpp" #include @@ -34,18 +35,29 @@ struct Context : _ze_context_handle_t { ze_result_t checkMemInputs(size_t size, size_t alignment, void **ptr); ze_result_t - allocHostMem(ze_host_mem_alloc_flags_t flags, size_t size, size_t alignment, void **ptr); - ze_result_t allocSharedMem(ze_device_handle_t hDevice, - ze_device_mem_alloc_flags_t flagsDev, - ze_host_mem_alloc_flags_t flagsHost, - size_t size, - size_t alignment, - void **ptr); - ze_result_t allocDeviceMem(ze_device_handle_t hDevice, - ze_device_mem_alloc_flags_t flags, - size_t size, - size_t alignment, - void **ptr); + allocHostMem(ze_host_mem_alloc_flags_t flags, + size_t size, + size_t alignment, + void **ptr, + VPU::VPUBufferObject::Location location = VPU::VPUBufferObject::Location::Host); + + ze_result_t allocSharedMem( + ze_device_handle_t hDevice, + ze_device_mem_alloc_flags_t flagsDev, + ze_host_mem_alloc_flags_t flagsHost, + size_t size, + size_t alignment, + void **ptr, + VPU::VPUBufferObject::Location location = VPU::VPUBufferObject::Location::Shared); + + ze_result_t allocDeviceMem( + ze_device_handle_t hDevice, + ze_device_mem_alloc_flags_t flags, + size_t size, + size_t alignment, + void **ptr, + VPU::VPUBufferObject::Location location = VPU::VPUBufferObject::Location::Device); + ze_result_t importMemory(VPU::VPUBufferObject::Location type, int32_t fd, void **ptr); ze_result_t freeMem(void *ptr); ze_result_t getMemAllocProperties(const void *ptr, diff --git a/umd/level_zero_driver/core/source/device/device.cpp b/umd/level_zero_driver/core/source/device/device.cpp index fdbed01..7f759db 100644 --- a/umd/level_zero_driver/core/source/device/device.cpp +++ b/umd/level_zero_driver/core/source/device/device.cpp @@ -23,9 +23,9 @@ namespace L0 { -Device::Device(DriverHandle *driverHandle, VPU::VPUDevice *vpuDevice) +Device::Device(DriverHandle *driverHandle, std::unique_ptr device) : driverHandle(driverHandle) - , vpuDevice(vpuDevice) + , vpuDevice(std::move(device)) , metricContext(std::make_shared(this)) { if (vpuDevice != nullptr) { Driver *pDriver = Driver::getInstance(); @@ -77,7 +77,7 @@ ze_result_t Device::getProperties(ze_device_properties_t *pDeviceProperties) { pDeviceProperties->type = ZE_DEVICE_TYPE_VPU; pDeviceProperties->vendorId = INTEL_PCI_VENDOR_ID; pDeviceProperties->deviceId = hwInfo.deviceId; - pDeviceProperties->subdeviceId = hwInfo.subdeviceId; + pDeviceProperties->subdeviceId = hwInfo.deviceRevision; pDeviceProperties->coreClockRate = hwInfo.coreClockRate; pDeviceProperties->maxMemAllocSize = hwInfo.maxMemAllocSize; pDeviceProperties->maxHardwareContexts = hwInfo.maxHardwareContexts; @@ -126,23 +126,9 @@ ze_result_t Device::getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdev LOG_E("Invalid pCount pointer."); return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - if (*pCount == 0) { - *pCount = numSubDevices; - return ZE_RESULT_SUCCESS; - } - if (phSubdevices == nullptr) { - LOG_E("Invalid phSubdevices pointer."); - return ZE_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (*pCount > numSubDevices) { - *pCount = numSubDevices; - } - - for (uint32_t i = 0; i < *pCount; i++) { - phSubdevices[i] = subDevices[i]; - } + if (*pCount > 0) + *pCount = 0; return ZE_RESULT_SUCCESS; } @@ -344,29 +330,8 @@ ze_result_t Device::getStatus() const { return vpuDevice->isConnected() ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_DEVICE_LOST; } -// Create L0 device from VPUDevice. -Device *Device::create(DriverHandle *driverHandle, VPU::VPUDevice *vpuDevice) { - auto device = new Device(driverHandle, vpuDevice); - if (device == nullptr) { - LOG_E("New Device creation failed!"); - return nullptr; - } - - return device; -} - -Device::~Device() { - if (vpuDevice != nullptr) { - delete vpuDevice; - } - - for (uint32_t i = 0; i < numSubDevices; i++) { - delete subDevices[i]; - } -} - VPU::VPUDevice *Device::getVPUDevice() { - return vpuDevice; + return vpuDevice.get(); } void Device::loadMetricGroupsInfo(std::vector &metricGroupsInfo) { @@ -431,7 +396,7 @@ bool Device::isMetricGroupAvailable(MetricGroup *metricGroup) const { } ze_result_t Device::metricGroupGet(uint32_t *pCount, zet_metric_group_handle_t *phMetricGroups) { - if (getVPUDevice()->getCapMetricStreamer() != 1) { + if (!getVPUDevice()->getCapMetricStreamer()) { LOG_E("Metrics are not supported."); return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/umd/level_zero_driver/core/source/device/device.hpp b/umd/level_zero_driver/core/source/device/device.hpp index 0514863..01d4151 100644 --- a/umd/level_zero_driver/core/source/device/device.hpp +++ b/umd/level_zero_driver/core/source/device/device.hpp @@ -30,12 +30,7 @@ struct MetricContext; struct MetricGroup; struct Device : _ze_device_handle_t { - Device(DriverHandle *driverHandle, VPU::VPUDevice *vpuDevice); - ~Device(); - - // Block copy constructor and assign operator. - Device &operator=(const Device &) = delete; - Device(const Device &rhs) = delete; + Device(DriverHandle *driverHandle, std::unique_ptr device); ze_result_t getP2PProperties(ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties); @@ -70,7 +65,6 @@ struct Device : _ze_device_handle_t { static Device *fromHandle(ze_device_handle_t handle) { return static_cast(handle); } inline ze_device_handle_t toHandle() { return this; } - static Device *create(DriverHandle *driverHandle, VPU::VPUDevice *vpuDevice); static ze_result_t jobStatusToResult(const std::vector> &jobs) { for (const auto &job : jobs) { auto jobStatus = job->getStatus(); @@ -99,15 +93,11 @@ struct Device : _ze_device_handle_t { private: DriverHandle *driverHandle = nullptr; - VPU::VPUDevice *vpuDevice = nullptr; + std::unique_ptr vpuDevice = nullptr; std::shared_ptr metricContext = nullptr; bool metricsLoaded = false; - // According to SAS this could be used for NCE compute tiles in the future - uint32_t numSubDevices = 0; - std::vector subDevices; - const int NS_IN_SEC = 1'000'000'000; }; diff --git a/umd/level_zero_driver/core/source/driver/driver.cpp b/umd/level_zero_driver/core/source/driver/driver.cpp index eaa1313..56165fd 100644 --- a/umd/level_zero_driver/core/source/driver/driver.cpp +++ b/umd/level_zero_driver/core/source/driver/driver.cpp @@ -6,6 +6,7 @@ */ #include "umd_common.hpp" +#include "level_zero_driver/ext/source/graph/compiler_common.hpp" #include "level_zero_driver/core/source/device/device.hpp" #include "level_zero_driver/core/source/driver/driver.hpp" @@ -34,10 +35,10 @@ void Driver::initializeEnvVariables() { envVariables.sharedForceDeviceAlloc = env == nullptr || env[0] == '0' || env[0] == '\0' ? false : true; - env = getenv("VPU_DRV_UMD_LOGLEVEL"); + env = getenv("ZE_INTEL_NPU_LOGLEVEL"); envVariables.umdLogLevel = env == nullptr ? "" : env; - env = getenv("VPU_DRV_CID_LOGLEVEL"); + env = getenv("ZE_INTEL_NPU_COMPILER_LOGLEVEL"); envVariables.cidLogLevel = env == nullptr ? "" : env; } @@ -50,7 +51,7 @@ void Driver::driverInit(ze_init_flags_t flags) { std::call_once(this->initDriverOnce, [&]() { initializeEnvVariables(); VPU::setLogLevel(envVariables.umdLogLevel); - Compiler::setCidLogLevel(envVariables.cidLogLevel); + setCidLogLevel(envVariables.cidLogLevel); if (osInfc == nullptr) { LOG_V("OS interface updated."); @@ -62,7 +63,7 @@ void Driver::driverInit(ze_init_flags_t flags) { } } - auto vpuDevices = VPU::DeviceFactory::createDevices(osInfc); + auto vpuDevices = VPU::DeviceFactory::createDevices(osInfc, envVariables.metrics); LOG_W("%zu VPU device(s) found.", vpuDevices.size()); if (!vpuDevices.empty()) { pGlobalDriverHandle = DriverHandle::create(std::move(vpuDevices)); diff --git a/umd/level_zero_driver/core/source/driver/driver.hpp b/umd/level_zero_driver/core/source/driver/driver.hpp index a34eed6..0444c76 100644 --- a/umd/level_zero_driver/core/source/driver/driver.hpp +++ b/umd/level_zero_driver/core/source/driver/driver.hpp @@ -37,7 +37,7 @@ class Driver { virtual void driverInit(ze_init_flags_t flags); virtual ze_result_t getInitStatus(); virtual const L0EnvVariables &getEnvVariables() { return envVariables; } - virtual DriverHandle *getDriverHandle() { return pGlobalDriverHandle; } + virtual DriverHandle *getDriverHandle() { return pGlobalDriverHandle.get(); } protected: static Driver *pDriver; @@ -46,7 +46,7 @@ class Driver { private: const uint32_t driverCount = 1; - DriverHandle *pGlobalDriverHandle = nullptr; + std::unique_ptr pGlobalDriverHandle = nullptr; VPU::OsInterface *osInfc = nullptr; ze_result_t initStatus = ZE_RESULT_ERROR_UNINITIALIZED; std::once_flag initDriverOnce; diff --git a/umd/level_zero_driver/core/source/driver/driver_handle.cpp b/umd/level_zero_driver/core/source/driver/driver_handle.cpp index 8f876c8..7dbd77a 100644 --- a/umd/level_zero_driver/core/source/driver/driver_handle.cpp +++ b/umd/level_zero_driver/core/source/driver/driver_handle.cpp @@ -69,13 +69,8 @@ ze_result_t DriverHandle::getProperties(ze_driver_properties_t *properties) { return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - uint32_t versionMajor = L0_PROJECT_VERSION_MAJOR; - uint32_t versionMinor = L0_PROJECT_VERSION_MINOR; - uint32_t versionBuild = VPU_VERSION_BUILD; - properties->uuid = ze_intel_vpu_driver_uuid; - properties->driverVersion = ((versionMajor << 24) & 0xFF000000) | - ((versionMinor << 16) & 0x00FF0000) | (versionBuild & 0x0000FFFF); + properties->driverVersion = DRIVER_VERSION; LOG_I("Driver properties returned successfully."); return ZE_RESULT_SUCCESS; @@ -137,40 +132,22 @@ DriverHandle::getMemAllocProperties(const void *ptr, return ZE_RESULT_SUCCESS; } -DriverHandle::~DriverHandle() { - for (auto &device : devices) - delete device; -} - -ze_result_t DriverHandle::initialize(std::vector> vpuDevices) { +DriverHandle::DriverHandle(std::vector> vpuDevices) { for (auto &vpuDevice : vpuDevices) - devices.emplace_back(Device::create(this, vpuDevice.release())); - - if (devices.size() == 0) { - LOG_W("No VPU devices found."); - return ZE_RESULT_ERROR_UNINITIALIZED; - } + devices.push_back(std::make_unique(this, std::move(vpuDevice))); numDevices = safe_cast(devices.size()); LOG_I("Update numDevices with '%d'.", numDevices); - - return ZE_RESULT_SUCCESS; } -DriverHandle *DriverHandle::create(std::vector> devices) { - auto *driverHandle = new DriverHandle; - if (driverHandle == nullptr) { - LOG_E("New DriverHandle allocation failed!"); - return nullptr; - } - - ze_result_t res = driverHandle->initialize(std::move(devices)); - if (res != ZE_RESULT_SUCCESS) { - delete driverHandle; +std::unique_ptr +DriverHandle::create(std::vector> devices) { + if (devices.size() == 0) { + LOG_W("No VPU devices found."); return nullptr; } - return driverHandle; + return std::make_unique(std::move(devices)); } ze_result_t DriverHandle::getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) { @@ -192,7 +169,7 @@ ze_result_t DriverHandle::getDevice(uint32_t *pCount, ze_device_handle_t *phDevi } for (uint32_t i = 0; i < *pCount; i++) { - phDevices[i] = devices[i]; + phDevices[i] = devices[i].get(); } return ZE_RESULT_SUCCESS; @@ -203,7 +180,7 @@ ze_result_t DriverHandle::getDevice(uint32_t *pCount, ze_device_handle_t *phDevi // shall be utilised as primary device. Device *DriverHandle::getPrimaryDevice() { if (devices[0] != nullptr) - return devices[0]; + return devices[0].get(); return nullptr; } diff --git a/umd/level_zero_driver/core/source/driver/driver_handle.hpp b/umd/level_zero_driver/core/source/driver/driver_handle.hpp index a9036fe..4cbd257 100644 --- a/umd/level_zero_driver/core/source/driver/driver_handle.hpp +++ b/umd/level_zero_driver/core/source/driver/driver_handle.hpp @@ -19,7 +19,8 @@ namespace L0 { struct Device; struct DriverHandle : _ze_driver_handle_t { - ~DriverHandle(); + DriverHandle(std::vector> vpuDevices); + ze_result_t createContext(const ze_context_desc_t *desc, ze_context_handle_t *phContext); ze_result_t getDevice(uint32_t *pCount, ze_device_handle_t *phDevices); ze_result_t getProperties(ze_driver_properties_t *properties); @@ -39,15 +40,13 @@ struct DriverHandle : _ze_driver_handle_t { inline ze_driver_handle_t toHandle() { return this; } - DriverHandle &operator=(const DriverHandle &) = delete; - DriverHandle &operator=(DriverHandle &&) = delete; - - static DriverHandle *create(std::vector> devices); + static std::unique_ptr + create(std::vector> devices); ze_result_t initialize(std::vector> vpuDevices); uint32_t numDevices = 0; - std::vector devices; + std::vector> devices; Device *getPrimaryDevice(); }; diff --git a/umd/level_zero_driver/core/source/event/event.cpp b/umd/level_zero_driver/core/source/event/event.cpp index a41b383..a92248f 100644 --- a/umd/level_zero_driver/core/source/event/event.cpp +++ b/umd/level_zero_driver/core/source/event/event.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -13,13 +13,16 @@ #include "vpu_driver/source/utilities/timer.hpp" #include +#include namespace L0 { -Event::Event(VPU::VPUEventCommand::KMDEventDataType *ptr, +Event::Event(VPU::VPUDeviceContext *ctx, + VPU::VPUEventCommand::KMDEventDataType *ptr, uint64_t vpuAddr, std::function &&destroyCb) - : eventState(ptr) + : pDevCtx(ctx) + , eventState(ptr) , eventVpuAddr(vpuAddr) , destroyCb(std::move(destroyCb)) { setEventState(VPU::VPUEventCommand::STATE_EVENT_INITIAL); @@ -36,6 +39,25 @@ ze_result_t Event::hostSignal() { return ZE_RESULT_SUCCESS; } +void Event::trackMetricData(int64_t timeoutNs) { + auto timeOut = std::chrono::steady_clock::time_point(std::chrono::nanoseconds(timeoutNs)); + + if (!msExpectedDataSize || !msGroupMask) + return; + + do { + size_t dataSize = 0; + if (MetricStreamer::getData(pDevCtx->getDriverApi(), msGroupMask, dataSize, nullptr) == + ZE_RESULT_SUCCESS) { + if (dataSize >= msExpectedDataSize) { + hostSignal(); + break; + } + } + std::this_thread::yield(); + } while (std::chrono::steady_clock::now() < timeOut); +} + ze_result_t Event::hostSynchronize(uint64_t timeout) { auto absoluteTimeout = VPU::getAbsoluteTimeoutNanoseconds(timeout); @@ -46,7 +68,7 @@ ze_result_t Event::hostSynchronize(uint64_t timeout) { associatedJobs.end()); LOG_I("Waiting for fence in VPUAddr: %#lx", eventVpuAddr); - /* Check if all jobs with this event are finished */ + for (auto &jobWeak : associatedJobs) { if (auto job = jobWeak.lock()) { for (const auto &cmdBuffer : job->getCommandBuffers()) { @@ -60,10 +82,13 @@ ze_result_t Event::hostSynchronize(uint64_t timeout) { } } - return queryStatus(); + return queryStatus(absoluteTimeout); } -ze_result_t Event::queryStatus() { +ze_result_t Event::queryStatus(uint64_t timeout) { + if (msExpectedDataSize && *eventState < VPU::VPUEventCommand::STATE_DEVICE_SIGNAL) + trackMetricData(timeout); + switch (*eventState) { case VPU::VPUEventCommand::STATE_EVENT_INITIAL: LOG_V("Sync point %p is still in initial state.", eventState); diff --git a/umd/level_zero_driver/core/source/event/event.hpp b/umd/level_zero_driver/core/source/event/event.hpp index a9d1726..352267b 100644 --- a/umd/level_zero_driver/core/source/event/event.hpp +++ b/umd/level_zero_driver/core/source/event/event.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,6 +8,7 @@ #pragma once #include "level_zero_driver/core/source/device/device.hpp" +#include "level_zero_driver/tools/source/metrics/metric_streamer.hpp" #include #include @@ -19,7 +20,8 @@ namespace L0 { struct Event : _ze_event_handle_t, IContextObject { public: - Event(VPU::VPUEventCommand::KMDEventDataType *ptr, + Event(VPU::VPUDeviceContext *ctx, + VPU::VPUEventCommand::KMDEventDataType *ptr, uint64_t vpuAddr, std::function &&destroyCb); ~Event() = default; @@ -30,20 +32,28 @@ struct Event : _ze_event_handle_t, IContextObject { ze_result_t destroy(); ze_result_t hostSignal(); ze_result_t hostSynchronize(uint64_t timeout); - ze_result_t queryStatus(); + ze_result_t queryStatus(uint64_t timeout = 0ULL); ze_result_t reset(); inline VPU::VPUEventCommand::KMDEventDataType *getSyncPointer() const { return eventState; } void associateJob(std::weak_ptr job) { associatedJobs.push_back(std::move(job)); } + void setMetricTrackData(uint64_t groupMask, size_t dataSize) { + msGroupMask = groupMask; + msExpectedDataSize = dataSize; + } + void trackMetricData(int64_t timeoutNs); private: void setEventState(VPU::VPUEventCommand::KMDEventDataType updateTo); + VPU::VPUDeviceContext *pDevCtx = nullptr; VPU::VPUEventCommand::KMDEventDataType *eventState = nullptr; uint64_t eventVpuAddr = 0; std::function destroyCb; std::vector> associatedJobs; + size_t msExpectedDataSize = 0; + uint64_t msGroupMask = 0ULL; }; } // namespace L0 diff --git a/umd/level_zero_driver/core/source/event/eventpool.cpp b/umd/level_zero_driver/core/source/event/eventpool.cpp index f98ce90..cec4bc5 100644 --- a/umd/level_zero_driver/core/source/event/eventpool.cpp +++ b/umd/level_zero_driver/core/source/event/eventpool.cpp @@ -72,7 +72,7 @@ EventPool::EventPool(Context *pContext, const ze_event_pool_desc_t *desc) , events(desc->count) { pEventPool = ctx->createInternalBufferObject(sizeof(VPU::VPUEventCommand::JsmEventData) * events.size(), - VPU::VPUBufferObject::Type::CachedLow); + VPU::VPUBufferObject::Type::CachedFw); L0_THROW_WHEN(pEventPool == nullptr, "Failed to allocate buffer object for event pool", ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY); @@ -117,8 +117,9 @@ ze_result_t EventPool::createEvent(const ze_event_desc_t *desc, ze_event_handle_ "Failed to get VPU address from cpu pointer", ZE_RESULT_ERROR_UNKNOWN); - events[index] = - std::make_unique(eventPtr, vpuAddr, [this, index]() { events[index].reset(); }); + events[index] = std::make_unique(ctx, eventPtr, vpuAddr, [this, index]() { + events[index].reset(); + }); *phEvent = events[index].get(); LOG_I("Event created - %p", *phEvent); diff --git a/umd/level_zero_driver/core/source/fence/fence.cpp b/umd/level_zero_driver/core/source/fence/fence.cpp index e643d49..dd334a2 100644 --- a/umd/level_zero_driver/core/source/fence/fence.cpp +++ b/umd/level_zero_driver/core/source/fence/fence.cpp @@ -35,10 +35,12 @@ ze_result_t Fence::hostSynchronize(uint64_t timeout) { LOG_V("Synchronize for %lu ns, %zu jobs count", timeout, trackedJobs.size()); - bool allSignaled = waitForSignal(timeout, trackedJobs); - if (!allSignaled) { - LOG_W("Commands execution is not finished"); - return ZE_RESULT_NOT_READY; + auto absTimeout = VPU::getAbsoluteTimeoutNanoseconds(timeout); + for (auto const &job : trackedJobs) { + if (!job->waitForCompletion(absTimeout)) { + LOG_W("Commands execution is not finished"); + return ZE_RESULT_NOT_READY; + } } ze_result_t result = Device::jobStatusToResult(trackedJobs); diff --git a/umd/level_zero_driver/core/source/memory/memory.cpp b/umd/level_zero_driver/core/source/memory/memory.cpp index 8846dff..d39a264 100644 --- a/umd/level_zero_driver/core/source/memory/memory.cpp +++ b/umd/level_zero_driver/core/source/memory/memory.cpp @@ -15,28 +15,17 @@ namespace L0 { -static VPU::VPUBufferObject::Type hostFlagToVPUBufferObjectType(ze_host_mem_alloc_flags_t flag) { +static VPU::VPUBufferObject::Type flagToVPUBufferObjectType(ze_host_mem_alloc_flags_t flag) { + // TODO: Fallback to shave range to fix incorrect address in Dma tasks for kernels (EISW-108894) switch (flag) { case ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED: - return VPU::VPUBufferObject::Type::CachedHigh; + return VPU::VPUBufferObject::Type::CachedShave; case ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED: - return VPU::VPUBufferObject::Type::UncachedHigh; + return VPU::VPUBufferObject::Type::UncachedShave; case ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED: - return VPU::VPUBufferObject::Type::WriteCombineHigh; + return VPU::VPUBufferObject::Type::WriteCombineShave; }; - return VPU::VPUBufferObject::Type::CachedHigh; -} - -static VPU::VPUBufferObject::Type sharedFlagToVPUBufferObjectType(ze_host_mem_alloc_flags_t flag) { - switch (flag) { - case ZE_HOST_MEM_ALLOC_FLAG_BIAS_CACHED: - return VPU::VPUBufferObject::Type::CachedLow; - case ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED: - return VPU::VPUBufferObject::Type::UncachedLow; - case ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED: - return VPU::VPUBufferObject::Type::WriteCombineLow; - }; - return VPU::VPUBufferObject::Type::CachedLow; + return VPU::VPUBufferObject::Type::CachedShave; } ze_result_t Context::checkMemInputs(size_t size, size_t alignment, void **ptr) { @@ -61,16 +50,21 @@ ze_result_t Context::checkMemInputs(size_t size, size_t alignment, void **ptr) { return ZE_RESULT_SUCCESS; } -ze_result_t -Context::allocHostMem(ze_host_mem_alloc_flags_t flags, size_t size, size_t alignment, void **ptr) { +ze_result_t Context::allocHostMem(ze_host_mem_alloc_flags_t flags, + size_t size, + size_t alignment, + void **ptr, + VPU::VPUBufferObject::Location location) { ze_result_t ret = checkMemInputs(size, alignment, ptr); if (ret != ZE_RESULT_SUCCESS) return ret; - if (0x7 < flags) + if (0x7 < flags || (location != VPU::VPUBufferObject::Location::Host && + location != VPU::VPUBufferObject::Location::ExternalHost)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; - *ptr = ctx->createHostMemAlloc(size, hostFlagToVPUBufferObjectType(flags)); + *ptr = ctx->createMemAlloc(size, flagToVPUBufferObjectType(flags), location); + if (*ptr == nullptr) { LOG_E("Failed to allocate host memory"); return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -84,15 +78,18 @@ ze_result_t Context::allocSharedMem(ze_device_handle_t hDevice, ze_host_mem_alloc_flags_t flagsHost, size_t size, size_t alignment, - void **ptr) { + void **ptr, + VPU::VPUBufferObject::Location location) { ze_result_t ret = checkMemInputs(size, alignment, ptr); if (ret != ZE_RESULT_SUCCESS) return ret; - if (0x7 < flagsDev || 0xf < flagsHost) + if (0x7 < flagsDev || 0xf < flagsHost || + (location != VPU::VPUBufferObject::Location::Shared && + location != VPU::VPUBufferObject::Location::ExternalShared)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; - *ptr = ctx->createSharedMemAlloc(size, sharedFlagToVPUBufferObjectType(flagsHost)); + *ptr = ctx->createMemAlloc(size, flagToVPUBufferObjectType(flagsHost), location); if (*ptr == nullptr) { LOG_E("Failed to allocate shared memory"); return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; @@ -105,15 +102,19 @@ ze_result_t Context::allocDeviceMem(ze_device_handle_t hDevice, ze_device_mem_alloc_flags_t flags, size_t size, size_t alignment, - void **ptr) { + void **ptr, + VPU::VPUBufferObject::Location location) { ze_result_t ret = checkMemInputs(size, alignment, ptr); if (ret != ZE_RESULT_SUCCESS) return ret; - if (0x3 < flags) + if (0x3 < flags || (location != VPU::VPUBufferObject::Location::Device && + location != VPU::VPUBufferObject::Location::ExternalDevice)) return ZE_RESULT_ERROR_INVALID_ENUMERATION; - *ptr = ctx->createDeviceMemAlloc(size); + // TODO: Fallback to shave range to fix incorrect address in Dma tasks for kernels (EISW-108894) + *ptr = ctx->createMemAlloc(size, VPU::VPUBufferObject::Type::WriteCombineShave, location); + if (*ptr == nullptr) { LOG_E("Failed to allocate device memory"); return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; @@ -122,6 +123,16 @@ ze_result_t Context::allocDeviceMem(ze_device_handle_t hDevice, return ZE_RESULT_SUCCESS; } +ze_result_t Context::importMemory(VPU::VPUBufferObject::Location type, int32_t fd, void **ptr) { + VPU::VPUBufferObject *bo = ctx->importBufferObject(type, fd); + if (bo == nullptr) { + LOG_E("Failed to import buffer"); + return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; + } + *ptr = bo->getBasePointer(); + return ZE_RESULT_SUCCESS; +} + ze_result_t Context::freeMem(void *ptr) { if (!ctx->freeMemAlloc(ptr)) return ZE_RESULT_ERROR_INVALID_ARGUMENT; @@ -147,12 +158,15 @@ ze_result_t Context::getMemAllocProperties(const void *ptr, switch (bo->getLocation()) { case VPU::VPUBufferObject::Location::Device: + case VPU::VPUBufferObject::Location::ExternalDevice: pMemAllocProperties->type = ZE_MEMORY_TYPE_DEVICE; break; case VPU::VPUBufferObject::Location::Host: + case VPU::VPUBufferObject::Location::ExternalHost: pMemAllocProperties->type = ZE_MEMORY_TYPE_HOST; break; case VPU::VPUBufferObject::Location::Shared: + case VPU::VPUBufferObject::Location::ExternalShared: pMemAllocProperties->type = ZE_MEMORY_TYPE_SHARED; break; default: @@ -162,6 +176,18 @@ ze_result_t Context::getMemAllocProperties(const void *ptr, pMemAllocProperties->id = 0u; // No specific ID for allocated memory, set as 0 pMemAllocProperties->pageSize = bo->getAllocSize(); + if (pMemAllocProperties->pNext && + checkPtrAlignment(pMemAllocProperties->pNext)) { + ze_external_memory_export_fd_t *pExtAllocProps = + reinterpret_cast(pMemAllocProperties->pNext); + + if (pExtAllocProps->stype == ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD && + pExtAllocProps->flags == ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) { + if (!bo->exportToFd(pExtAllocProps->fd)) { + return ZE_RESULT_ERROR_NOT_AVAILABLE; + } + } + } return ZE_RESULT_SUCCESS; } diff --git a/umd/level_zero_driver/ext/source/CMakeLists.txt b/umd/level_zero_driver/ext/source/CMakeLists.txt index 96d98b9..7d9b63b 100644 --- a/umd/level_zero_driver/ext/source/CMakeLists.txt +++ b/umd/level_zero_driver/ext/source/CMakeLists.txt @@ -7,6 +7,8 @@ target_sources(${TARGET_NAME_L0} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/graph/compiler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/graph/compiler.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/graph/compiler_common.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/graph/compiler_common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/graph/elf_parser.hpp ${CMAKE_CURRENT_SOURCE_DIR}/graph/elf_parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/graph/graph.cpp diff --git a/umd/level_zero_driver/ext/source/graph/compiler.cpp b/umd/level_zero_driver/ext/source/graph/compiler.cpp index 0bf798a..c20698c 100644 --- a/umd/level_zero_driver/ext/source/graph/compiler.cpp +++ b/umd/level_zero_driver/ext/source/graph/compiler.cpp @@ -8,6 +8,7 @@ #include "level_zero/ze_api.h" #include "level_zero_driver/ext/source/graph/compiler.hpp" #include "umd_common.hpp" +#include "compiler_common.hpp" #include "vcl_symbols.hpp" #include "vpu_driver/source/utilities/log.hpp" @@ -15,10 +16,8 @@ namespace L0 { -static vcl_log_level_t cidLogLevel = VCL_LOG_NONE; static int compilerPlatform; static vcl_compiler_properties_t compilerProperties; -static thread_local std::vector lastCompilerLog = {}; bool Compiler::compilerInit(int compilerPlatformType) { if (!Vcl::sym().ok()) @@ -29,7 +28,7 @@ bool Compiler::compilerInit(int compilerPlatformType) { vcl_log_handle_t logHandle = nullptr; compilerDesc.platform = static_cast(compilerPlatformType); - compilerDesc.debug_level = VCL_LOG_NONE; + compilerDesc.debug_level = cidLogLevel; auto ret = Vcl::sym().compilerCreate(compilerDesc, &compiler, &logHandle); if (ret) { @@ -49,23 +48,7 @@ bool Compiler::compilerInit(int compilerPlatformType) { return true; } -void Compiler::setCidLogLevel(std::string_view &str) { - if (str == "TRACE") { - cidLogLevel = VCL_LOG_TRACE; - } else if (str == "DEBUG") { - cidLogLevel = VCL_LOG_DEBUG; - } else if (str == "INFO") { - cidLogLevel = VCL_LOG_INFO; - } else if (str == "WARNING") { - cidLogLevel = VCL_LOG_WARNING; - } else if (str == "ERROR") { - cidLogLevel = VCL_LOG_ERROR; - } else { - cidLogLevel = VCL_LOG_NONE; - } -} - -static void getInternalCompilerLastError(vcl_log_handle_t logHandle) { +static void copyCompilerLog(vcl_log_handle_t logHandle, std::string &buffer) { if (!Vcl::sym().ok()) return; @@ -76,37 +59,42 @@ static void getInternalCompilerLastError(vcl_log_handle_t logHandle) { size_t compilerLogSize = 0; vcl_result_t logRet = Vcl::sym().logHandleGetString(logHandle, &compilerLogSize, NULL); if (logRet != VCL_RESULT_SUCCESS) { + buffer.clear(); LOG_E("Failed to get size of error message."); return; } if (compilerLogSize == 0) { - // No logs + buffer.clear(); return; } - lastCompilerLog.resize(compilerLogSize); - logRet = Vcl::sym().logHandleGetString(logHandle, &compilerLogSize, lastCompilerLog.data()); + buffer.resize(compilerLogSize); + logRet = Vcl::sym().logHandleGetString(logHandle, &compilerLogSize, buffer.data()); if (logRet != VCL_RESULT_SUCCESS) { LOG_E("Failed to get content of error message."); return; } + + LOG_I("Saved compiler message to log buffer, message: %s", buffer.c_str()); } static bool getCompilerExecutable(vcl_compiler_handle_t &comp, vcl_executable_handle_t *exec, ze_graph_desc_2_t &desc, - vcl_log_handle_t *logHandle) { + vcl_log_handle_t *logHandle, + std::string &logBuffer) { if (!Vcl::sym().ok()) return false; std::string options = ""; - if ((desc.pBuildFlags != nullptr) && (desc.pBuildFlags[0] != '\0')) { + if (desc.pBuildFlags != nullptr && desc.pBuildFlags[0] != '\0') { options = std::string(desc.pBuildFlags); LOG_V("Compiler options: %s", options.c_str()); } else { - LOG_E("Invalid Build Flags!"); + logBuffer = "Invalid pBuildFlags pointer!"; + LOG_E("Invalid pBuildFlags pointer"); return false; } @@ -120,7 +108,7 @@ static bool getCompilerExecutable(vcl_compiler_handle_t &comp, vcl_executable_desc_t exeDesc = {desc.pInput, desc.inputSize, options.c_str(), options.size()}; vcl_result_t ret = Vcl::sym().executableCreate(comp, exeDesc, exec); if (ret != VCL_RESULT_SUCCESS) { - getInternalCompilerLastError(*logHandle); + copyCompilerLog(*logHandle, logBuffer); LOG_E("Failed to create compiler executable! Result:%x", ret); return false; } @@ -130,7 +118,8 @@ static bool getCompilerExecutable(vcl_compiler_handle_t &comp, bool Compiler::getCompiledBlob(size_t &graphSize, std::vector &graphBlob, - ze_graph_desc_2_t &desc) { + ze_graph_desc_2_t &desc, + std::string &logBuffer) { if (!Vcl::sym().ok()) return false; @@ -149,26 +138,27 @@ bool Compiler::getCompiledBlob(size_t &graphSize, VCL_COMPILER_VERSION_MINOR, getCompilerVersionMajor(), getCompilerVersionMinor()); + logBuffer = "Compiler version mismatch"; return false; } ret = Vcl::sym().compilerCreate(compilerDesc, &compiler, &logHandle); if (ret != VCL_RESULT_SUCCESS) { - getInternalCompilerLastError(logHandle); + copyCompilerLog(logHandle, logBuffer); LOG_E("Failed to create compiler! Result:%x", ret); return false; } vcl_executable_handle_t executable; - if (!getCompilerExecutable(compiler, &executable, desc, &logHandle)) { - LOG_E("Failed to get compiler executable!"); + if (!getCompilerExecutable(compiler, &executable, desc, &logHandle, logBuffer)) { + LOG_E("Failed to get compiler executable"); Vcl::sym().compilerDestroy(compiler); return false; } ret = Vcl::sym().executableGetSerializableBlob(executable, NULL, &graphSize); if (ret != VCL_RESULT_SUCCESS || graphSize == 0) { - getInternalCompilerLastError(logHandle); + copyCompilerLog(logHandle, logBuffer); LOG_E("Failed to get blob size! Result:%x", ret); Vcl::sym().executableDestroy(executable); Vcl::sym().compilerDestroy(compiler); @@ -178,7 +168,7 @@ bool Compiler::getCompiledBlob(size_t &graphSize, graphBlob.resize(graphSize); ret = Vcl::sym().executableGetSerializableBlob(executable, graphBlob.data(), &graphSize); if (ret != VCL_RESULT_SUCCESS) { - getInternalCompilerLastError(logHandle); + copyCompilerLog(logHandle, logBuffer); LOG_E("Failed to get blob! Result:%x", ret); Vcl::sym().executableDestroy(executable); Vcl::sym().compilerDestroy(compiler); @@ -225,7 +215,8 @@ ze_result_t Compiler::getDecodedProfilingBuffer(ze_graph_profiling_type_t profil const uint8_t *profData, uint64_t profSize, uint32_t *pSize, - void *pData) { + void *pData, + std::string &logBuffer) { if (!Vcl::sym().ok()) return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; @@ -238,7 +229,7 @@ ze_result_t Compiler::getDecodedProfilingBuffer(ze_graph_profiling_type_t profil auto ret = Vcl::sym().profilingCreate(&profilingApiInput, &profHandle, &logHandle); if (ret != VCL_RESULT_SUCCESS) { - getInternalCompilerLastError(logHandle); + copyCompilerLog(logHandle, logBuffer); LOG_E("Failed to create profiling in compiler."); return ZE_RESULT_ERROR_UNKNOWN; } @@ -250,7 +241,7 @@ ze_result_t Compiler::getDecodedProfilingBuffer(ze_graph_profiling_type_t profil ret = Vcl::sym().getDecodedProfilingBuffer(profHandle, profType, &profOutput); if (ret != VCL_RESULT_SUCCESS) { - getInternalCompilerLastError(logHandle); + copyCompilerLog(logHandle, logBuffer); LOG_E("Failed to get decoded profiling data in compiler."); Vcl::sym().profilingDestroy(profHandle); return ZE_RESULT_ERROR_UNKNOWN; @@ -266,21 +257,4 @@ ze_result_t Compiler::getDecodedProfilingBuffer(ze_graph_profiling_type_t profil return ZE_RESULT_SUCCESS; } -bool Compiler::logGetString(uint32_t *pSize, char *pLog) { - if (*pSize == 0) { - *pSize = static_cast(lastCompilerLog.size()); - return true; - } - - if (*pSize > lastCompilerLog.size()) { - *pSize = static_cast(lastCompilerLog.size()); - } - - if (pLog != nullptr) { - memcpy(pLog, lastCompilerLog.data(), *pSize); - } - - return true; -} - } // namespace L0 diff --git a/umd/level_zero_driver/ext/source/graph/compiler.hpp b/umd/level_zero_driver/ext/source/graph/compiler.hpp index 3b484eb..5a8cf93 100644 --- a/umd/level_zero_driver/ext/source/graph/compiler.hpp +++ b/umd/level_zero_driver/ext/source/graph/compiler.hpp @@ -8,16 +8,19 @@ #pragma once #include -#include +#include #include namespace L0 { + class Compiler { public: static bool compilerInit(int compilerPlatformType); - static bool - getCompiledBlob(size_t &graphSize, std::vector &graphBlob, ze_graph_desc_2_t &desc); + static bool getCompiledBlob(size_t &graphSize, + std::vector &graphBlob, + ze_graph_desc_2_t &desc, + std::string &logBuffer); static bool getCompilerProperties(ze_device_graph_properties_t *pDeviceGraphProperties); static uint16_t getCompilerVersionMajor(); static uint16_t getCompilerVersionMinor(); @@ -27,9 +30,8 @@ class Compiler { const uint8_t *profData, uint64_t profSize, uint32_t *size, - void *data); - static void setCidLogLevel(std::string_view &str); - static bool logGetString(uint32_t *pSize, char *pLog); + void *data, + std::string &logBuffer); }; } // namespace L0 diff --git a/umd/level_zero_driver/ext/source/graph/compiler_common.cpp b/umd/level_zero_driver/ext/source/graph/compiler_common.cpp new file mode 100644 index 0000000..c23b625 --- /dev/null +++ b/umd/level_zero_driver/ext/source/graph/compiler_common.cpp @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "compiler_common.hpp" + +vcl_log_level_t cidLogLevel = VCL_LOG_NONE; + +void setCidLogLevel(std::string_view &str) { + if (str == "TRACE") { + cidLogLevel = VCL_LOG_TRACE; + } else if (str == "DEBUG") { + cidLogLevel = VCL_LOG_DEBUG; + } else if (str == "INFO") { + cidLogLevel = VCL_LOG_INFO; + } else if (str == "WARNING") { + cidLogLevel = VCL_LOG_WARNING; + } else if (str == "ERROR") { + cidLogLevel = VCL_LOG_ERROR; + } else { + cidLogLevel = VCL_LOG_NONE; + } +} diff --git a/umd/level_zero_driver/ext/source/graph/compiler_common.hpp b/umd/level_zero_driver/ext/source/graph/compiler_common.hpp new file mode 100644 index 0000000..3eba292 --- /dev/null +++ b/umd/level_zero_driver/ext/source/graph/compiler_common.hpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "vpux_driver_compiler.h" + +#include + +extern vcl_log_level_t cidLogLevel; + +void setCidLogLevel(std::string_view &str); diff --git a/umd/level_zero_driver/ext/source/graph/elf_parser.cpp b/umd/level_zero_driver/ext/source/graph/elf_parser.cpp index e7ad3d0..a1c8e14 100644 --- a/umd/level_zero_driver/ext/source/graph/elf_parser.cpp +++ b/umd/level_zero_driver/ext/source/graph/elf_parser.cpp @@ -10,7 +10,9 @@ #include "level_zero/ze_api.h" #include "level_zero/ze_graph_ext.h" #include "umd_common.hpp" +#include "vpux_driver_compiler.h" +#include #include #include #include @@ -30,9 +32,12 @@ class DriverBufferManager : public elf::BufferManager { VPU::VPUBufferObject::Type getBufferType(elf::Elf_Xword flag) { if (flag & elf::SHF_EXECINSTR) - return VPU::VPUBufferObject::Type::WriteCombineLow; + return VPU::VPUBufferObject::Type::WriteCombineFw; - return VPU::VPUBufferObject::Type::WriteCombineHigh; + if (flag & elf::VPU_SHF_PROC_SHAVE) + return VPU::VPUBufferObject::Type::WriteCombineShave; + + return VPU::VPUBufferObject::Type::WriteCombineDma; } elf::DeviceBuffer allocate(const elf::BufferSpecs &buffSpecs) override { @@ -119,25 +124,54 @@ bool ElfParser::checkMagic(uint8_t *ptr, size_t size) { return elf::utils::checkELFMagic(ptr); } +static inline elf::platform::ArchKind toArchKind(int platform) { + switch (platform) { + case VCL_PLATFORM_VPU3720: + return elf::platform::ArchKind::VPUX37XX; + default: + return elf::platform::ArchKind::UNKNOWN; + } +} + +static inline elf::Version toElfVersion(uint64_t ver) { + return elf::Version((ver >> 16) & UINT16_MAX, ver & UINT16_MAX, 0); +} + static std::shared_ptr -createHostParsedInference(elf::BufferManager *buffer, elf::AccessManager *access) { +createHostParsedInference(elf::BufferManager *buffer, + elf::AccessManager *access, + VPU::VPUDeviceContext *ctx, + std::string &errorMsg) { + elf::HPIConfigs config = {}; + config.nnVersion = toElfVersion(ctx->getFwMappedInferenceVersion()); + config.archKind = toArchKind(ctx->getCompilerPlatform()); + try { - return std::make_shared(buffer, access); + auto hpi = std::make_shared(buffer, access, config); + hpi->load(); + return hpi; } catch (const elf::AllocError &err) { LOG_E("Failed to create elf::HostParsedInference, type: elf::AllocError, reason: %s", err.what()); + errorMsg = err.what(); throw DriverError(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY); + } catch (elf::VersioningError &err) { + LOG_E("Failed to create elf::HostParsedInference, type: elf::VersioningError, reason: %s", + err.what()); + errorMsg = err.what(); + throw DriverError(ZE_RESULT_ERROR_UNSUPPORTED_VERSION); } catch (const elf::RuntimeError &err) { LOG_E("Failed to create elf::HostParsedInference, type: elf::RuntimeError, reason: %s", err.what()); + errorMsg = err.what(); } catch (const elf::LogicError &err) { LOG_E("Failed to create elf::HostParsedInference, type: elf::LogicError, reason: %s", err.what()); + errorMsg = err.what(); } catch (const std::exception &err) { LOG_E("Failed to create elf::HostParsedInference, type: std::exception, reason: %s", err.what()); - } catch (...) { - LOG_E("Failed to create elf::HostParsedInference, unknown exception type"); + errorMsg = err.what(); } return nullptr; } @@ -150,6 +184,10 @@ copyHostParsedInference(std::shared_ptr &hpi) { LOG_E("Failed to copy elf::HostParsedInference, type: elf::AllocError, reason: %s", err.what()); throw DriverError(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY); + } catch (elf::VersioningError &err) { + LOG_E("Failed to copy elf::HostParsedInference, type: elf::VersioningError, reason: %s", + err.what()); + throw DriverError(ZE_RESULT_ERROR_UNSUPPORTED_VERSION); } catch (const elf::RuntimeError &err) { LOG_E("Failed to copy elf::HostParsedInference, type: elf::RuntimeError, reason: %s", err.what()); @@ -157,19 +195,19 @@ copyHostParsedInference(std::shared_ptr &hpi) { LOG_E("Failed to copy elf::HostParsedInference, type: elf::LogicError, reason: %s", err.what()); } catch (const std::exception &err) { - LOG_E("Failed to copy elf::HostParsedInference, type: std::exception, reason: %s", + LOG_E("Failed to create elf::HostParsedInference, type: std::exception, reason: %s", err.what()); - } catch (...) { - LOG_E("Failed to copy elf::HostParsedInference, unknown exception type"); } return nullptr; } -std::unique_ptr -ElfParser::getElfParser(VPU::VPUDeviceContext *ctx, uint8_t *ptr, size_t size) { +std::unique_ptr ElfParser::getElfParser(VPU::VPUDeviceContext *ctx, + uint8_t *ptr, + size_t size, + std::string &logBuffer) { auto bufferManager = std::make_unique(ctx); auto accessManager = std::make_unique(ptr, size); - auto hpi = createHostParsedInference(bufferManager.get(), accessManager.get()); + auto hpi = createHostParsedInference(bufferManager.get(), accessManager.get(), ctx, logBuffer); if (hpi != nullptr) return std::make_unique(ctx, std::move(bufferManager), @@ -179,7 +217,7 @@ ElfParser::getElfParser(VPU::VPUDeviceContext *ctx, uint8_t *ptr, size_t size) { return nullptr; } -ze_graph_argument_precision_t ElfParser::getTensorPrecision(elf::DType type) { +static ze_graph_argument_precision_t getTensorPrecision(elf::DType type) { switch (type) { case elf::DType::DType_NOT_SET: return ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN; @@ -228,9 +266,9 @@ static constexpr std::array, 8> or {0x12345, ZE_GRAPH_ARGUMENT_LAYOUT_NCDHW}, {0x13452, ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC}}}; -constexpr float DIM_N = 0.f, DIM_C = 1.f, DIM_H = 2.f, DIM_W = 3.f, DIM_D = 4.f; +constexpr uint64_t DIM_N = 0, DIM_C = 1, DIM_H = 2, DIM_W = 3, DIM_D = 4; -constexpr std::array, ze_graph_argument_layout_t>, 7> +constexpr std::array, ze_graph_argument_layout_t>, 7> stridesToLayout = {{{{DIM_C}, ZE_GRAPH_ARGUMENT_LAYOUT_C}, {{DIM_N, DIM_C}, ZE_GRAPH_ARGUMENT_LAYOUT_NC}, {{DIM_C, DIM_H, DIM_W}, ZE_GRAPH_ARGUMENT_LAYOUT_CHW}, @@ -240,7 +278,7 @@ constexpr std::array, ze_graph_argument_layout_t> {{DIM_N, DIM_D, DIM_H, DIM_W, DIM_C}, ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC}}}; /* Compute layout from strides. Copied from blobParser and kmb-plugin. */ -static ze_graph_argument_layout_t computeLayoutFromStride(const float *strides, +static ze_graph_argument_layout_t computeLayoutFromStride(const uint64_t *strides, size_t stride_size) { const size_t TENSOR_5D_STRIDE_MAX = 6; const size_t TENSOR_4D_MAX = 5; @@ -323,76 +361,86 @@ static ze_graph_argument_layout_t getDeviceLayout(const elf::TensorRef &tensor) return computeLayoutFromStride(tensor.strides, tensor.strides_size); } -bool ElfParser::getArgumentProperties(std::vector &props) const { - auto metadata = hpi->getMetadata(); +static void fillDeviceProperties(const elf::TensorRef &devTensor, + ze_graph_argument_properties_3_t &prop) { + for (size_t j = 0; j < ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE; j++) { + if (j < devTensor.dimensions_size) { + prop.dims[j] = devTensor.dimensions[j]; + } else { + prop.dims[j] = 1; + } + } + prop.dims_count = devTensor.dimensions_size; + prop.devicePrecision = getTensorPrecision(devTensor.data_type); + prop.deviceLayout = getDeviceLayout(devTensor); +} - props.reserve(metadata.in_tenosr_count + metadata.out_tensor_count); +static void fillNetworkProperties(const elf::TensorRef &netTensor, + ze_graph_argument_properties_3_t &prop) { + memcpy(prop.name, netTensor.name, sizeof(prop.name) - 1); + prop.networkPrecision = getTensorPrecision(netTensor.data_type); + prop.networkLayout = getNetworkLayout(netTensor); +} - auto convert = [](const elf::TensorRef &devTensor, - const elf::TensorRef &netTensor, - const elf::OVNode &node, - ze_graph_argument_type_t argType, - ze_graph_argument_properties_3_t &prop) { - prop.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; +static void fillOVNodeProperties(const elf::OVNode &node, ze_graph_argument_properties_3_t &prop) { + memcpy(prop.debug_friendly_name, node.friendly_name, sizeof(prop.debug_friendly_name) - 1); + if (node.tensor_names_count > ZE_MAX_GRAPH_TENSOR_NAMES_SIZE) { + LOG_E("Tensor names count exceeds the Graph Extension limits (%u > %u)", + node.tensor_names_count, + ZE_MAX_GRAPH_TENSOR_NAMES_SIZE); + return; + } - strncpy(prop.name, netTensor.name, sizeof(prop.name) - 1); + for (unsigned i = 0; i < node.tensor_names_count; i++) { + memcpy(prop.associated_tensor_names[i], + node.tensor_names[i], + sizeof(prop.associated_tensor_names[i]) - 1); + } + prop.associated_tensor_names_count = node.tensor_names_count; +} - prop.type = argType; - for (size_t j = 0; j < ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE; j++) { - if (j < devTensor.dimensions_size) { - prop.dims[j] = devTensor.dimensions[j]; - } else { - prop.dims[j] = 1; - } - } - prop.dims_count = devTensor.dimensions_size; - prop.networkPrecision = getTensorPrecision(netTensor.data_type); - prop.networkLayout = getNetworkLayout(netTensor); - prop.devicePrecision = getTensorPrecision(devTensor.data_type); - prop.deviceLayout = getDeviceLayout(devTensor); +bool ElfParser::getArgumentProperties(std::vector &props) const { + auto metadata = hpi->getMetadata(); + props.reserve(metadata->mInTensorDescriptors.size() + metadata->mOutTensorDescriptors.size()); + for (size_t i = 0; i < metadata->mInTensorDescriptors.size(); i++) { + ze_graph_argument_properties_3_t prop = {}; + prop.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; + prop.type = ZE_GRAPH_ARGUMENT_TYPE_INPUT; + + // TODO: Add support for quantization parameters (EISW-72376) prop.quantReverseScale = 1.f; prop.quantZeroPoint = 0; - strncpy(prop.debug_friendly_name, node.friendly_name, sizeof(prop.debug_friendly_name) - 1); + if (metadata->mInTensorDescriptors.size() > i) + fillDeviceProperties(metadata->mInTensorDescriptors[i], prop); - if (node.tensor_names_count > ZE_MAX_GRAPH_TENSOR_NAMES_SIZE) { - LOG_E("Tensor names count exceeds the Graph Extension limits (%u > %u)", - node.tensor_names_count, - ZE_MAX_GRAPH_TENSOR_NAMES_SIZE); - return false; - } - - for (unsigned i = 0; i < node.tensor_names_count; i++) { - strncpy(prop.associated_tensor_names[i], - node.tensor_names[i], - sizeof(prop.associated_tensor_names[i]) - 1); - } - prop.associated_tensor_names_count = node.tensor_names_count; + if (metadata->mNetInputs.size() > i) + fillNetworkProperties(metadata->mNetInputs[i], prop); - return true; - }; - - for (size_t i = 0; i < metadata.in_tenosr_count; i++) { - ze_graph_argument_properties_3_t prop = {}; - if (!convert(metadata.in_tensor_desc[i], - metadata.net_input[i], - metadata.ov_parameters[i], - ZE_GRAPH_ARGUMENT_TYPE_INPUT, - prop)) - return false; + if (metadata->mOVParameters.size() > i) + fillOVNodeProperties(metadata->mOVParameters[i], prop); props.push_back(prop); } - for (size_t i = 0; i < metadata.out_tensor_count; i++) { + for (size_t i = 0; i < metadata->mOutTensorDescriptors.size(); i++) { ze_graph_argument_properties_3_t prop = {}; - if (!convert(metadata.out_tensor_desc[i], - metadata.net_output[i], - metadata.ov_results[i], - ZE_GRAPH_ARGUMENT_TYPE_OUTPUT, - prop)) - return false; + prop.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; + prop.type = ZE_GRAPH_ARGUMENT_TYPE_OUTPUT; + + // TODO: Add support for quantization parameters (EISW-72376) + prop.quantReverseScale = 0.f; + prop.quantZeroPoint = 0; + + if (metadata->mOutTensorDescriptors.size() > i) + fillDeviceProperties(metadata->mOutTensorDescriptors[i], prop); + + if (metadata->mNetOutputs.size() > i) + fillNetworkProperties(metadata->mNetOutputs[i], prop); + + if (metadata->mOVResults.size() > i) + fillOVNodeProperties(metadata->mOVResults[i], prop); props.push_back(prop); } @@ -423,7 +471,7 @@ constexpr std::array, 18> toM bool ElfParser::getArgumentMetadata(std::vector &args) const { auto metadata = hpi->getMetadata(); - args.reserve(metadata.net_input_count + metadata.net_output_count); + args.reserve(metadata->mNetInputs.size() + metadata->mNetOutputs.size()); auto convert = [](const elf::OVNode &node, ze_graph_argument_type_t type, @@ -431,7 +479,7 @@ bool ElfParser::getArgumentMetadata(std::vector &a arg.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_METADATA; arg.type = type; - strncpy(arg.friendly_name, node.friendly_name, sizeof(arg.friendly_name) - 1); + memcpy(arg.friendly_name, node.friendly_name, sizeof(arg.friendly_name) - 1); for (const auto &itr : toMetadataType) { if (itr.first == node.type) { @@ -450,35 +498,36 @@ bool ElfParser::getArgumentMetadata(std::vector &a return false; } - for (unsigned i = 0; i < node.tensor_names_count; i++) - strncpy(arg.tensor_names[i], node.tensor_names[i], sizeof(arg.tensor_names[i]) - 1); + for (unsigned i = 0; i < node.tensor_names_count; i++) { + memcpy(arg.tensor_names[i], node.tensor_names[i], sizeof(arg.tensor_names[i]) - 1); + } arg.tensor_names_count = node.tensor_names_count; - strncpy(arg.input_name, node.input_name, sizeof(arg.input_name) - 1); + memcpy(arg.input_name, node.input_name, sizeof(arg.input_name) - 1); return true; }; - for (size_t i = 0; i < metadata.net_input_count; i++) { - if (i >= metadata.ov_parameters_count) { + for (size_t i = 0; i < metadata->mNetInputs.size(); i++) { + if (i >= metadata->mOVParameters.size()) { args.emplace_back(); continue; } ze_graph_argument_metadata_t arg = {}; - if (!convert(metadata.ov_parameters[i], ZE_GRAPH_ARGUMENT_TYPE_INPUT, arg)) + if (!convert(metadata->mOVParameters[i], ZE_GRAPH_ARGUMENT_TYPE_INPUT, arg)) return false; args.push_back(arg); } - for (size_t i = 0; i < metadata.net_output_count; i++) { - if (i >= metadata.ov_results_count) { + for (size_t i = 0; i < metadata->mNetOutputs.size(); i++) { + if (i >= metadata->mOVResults.size()) { args.emplace_back(); continue; } ze_graph_argument_metadata_t arg = {}; - if (!convert(metadata.ov_results[i], ZE_GRAPH_ARGUMENT_TYPE_OUTPUT, arg)) + if (!convert(metadata->mOVResults[i], ZE_GRAPH_ARGUMENT_TYPE_OUTPUT, arg)) return false; args.push_back(arg); diff --git a/umd/level_zero_driver/ext/source/graph/elf_parser.hpp b/umd/level_zero_driver/ext/source/graph/elf_parser.hpp index d169e39..eb8fc1c 100644 --- a/umd/level_zero_driver/ext/source/graph/elf_parser.hpp +++ b/umd/level_zero_driver/ext/source/graph/elf_parser.hpp @@ -31,7 +31,7 @@ class ElfParser : public IParser { static bool checkMagic(uint8_t *ptr, size_t size); static std::unique_ptr - getElfParser(VPU::VPUDeviceContext *ctx, uint8_t *ptr, size_t size); + getElfParser(VPU::VPUDeviceContext *ctx, uint8_t *ptr, size_t size, std::string &logBuffer); bool getArgumentProperties(std::vector &props) const; bool getArgumentMetadata(std::vector &args) const; @@ -59,8 +59,6 @@ class ElfParser : public IParser { std::shared_ptr &execHpi) override; private: - static ze_graph_argument_precision_t getTensorPrecision(elf::DType type); - bool applyInputOutputs(elf::HostParsedInference &hpi, const std::vector> &inputs, const std::vector> &outputs, diff --git a/umd/level_zero_driver/ext/source/graph/graph.cpp b/umd/level_zero_driver/ext/source/graph/graph.cpp index 063bd26..1f75c1a 100644 --- a/umd/level_zero_driver/ext/source/graph/graph.cpp +++ b/umd/level_zero_driver/ext/source/graph/graph.cpp @@ -16,9 +16,10 @@ #include "vpu_driver/source/device/vpu_device.hpp" #include "vpu_driver/source/utilities/log.hpp" -#include - namespace L0 { + +static thread_local std::string lastErrorMsg = {}; + Graph::Graph(Context *pCtx, const ze_graph_desc_2_t *pDesc) : pContext(pCtx) , ctx(pCtx->getDeviceContext()) @@ -60,9 +61,8 @@ ze_result_t Graph::create(const ze_context_handle_t hContext, } ze_result_t Graph::destroy() { - LOG_V("Destroying graph."); - pContext->removeObject(this); + LOG_I("Graph destroyed - %p", this); return ZE_RESULT_SUCCESS; } @@ -209,24 +209,24 @@ ze_result_t Graph::createProfilingPool(uint32_t count, return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - size_t profilingPoolSize = count * getFwDataCacheAlign(profilingOutputSize); - auto *profilingPoolBuffer = - ctx->createInternalBufferObject(profilingPoolSize, VPU::VPUBufferObject::Type::CachedHigh); - if (profilingPoolBuffer == nullptr) { - LOG_E("Failed to allocate buffer object for profiling pool"); - return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; - } - - auto *profilingPool = - new GraphProfilingPool(ctx, profilingOutputSize, count, profilingPoolBuffer, &graphBlobRaw); - if (profilingPool == nullptr) { - LOG_E("Failed to create profiling pool"); - if (!ctx->freeMemAlloc(profilingPoolBuffer)) - LOG_W("Failed to free profiling pool"); - return ZE_RESULT_ERROR_UNKNOWN; + try { + auto profilingPool = + std::make_unique(ctx, + profilingOutputSize, + count, + &graphBlobRaw, + [this](auto *x) { profilingPools.erase(x); }); + auto [it, success] = profilingPools.emplace(profilingPool.get(), std::move(profilingPool)); + L0_THROW_WHEN(!success, + "Failed to place new GraphProfilingPool in profiling pools map", + ZE_RESULT_ERROR_UNKNOWN); + + *phProfilingPool = it->second.get(); + LOG_I("GraphProfilingPool created - %p", *phProfilingPool); + } catch (const DriverError &err) { + return err.result(); } - *phProfilingPool = profilingPool->toHandle(); return ZE_RESULT_SUCCESS; }; @@ -298,7 +298,7 @@ void Graph::initialize() { size_t graphSize = desc.inputSize; if (desc.format == ZE_GRAPH_FORMAT_NGRAPH_LITE) { - if (!Compiler::getCompiledBlob(graphSize, graphBlobRaw, desc)) { + if (!Compiler::getCompiledBlob(graphSize, graphBlobRaw, desc, lastErrorMsg)) { LOG_E("Failed to get compiled blob!"); throw DriverError(ZE_RESULT_ERROR_UNKNOWN); } @@ -309,9 +309,11 @@ void Graph::initialize() { if (ElfParser::checkMagic(graphBlobRaw.data(), graphBlobRaw.size())) { LOG_I("Detected Elf format"); - parser = ElfParser::getElfParser(ctx, graphBlobRaw.data(), graphBlobRaw.size()); + parser = + ElfParser::getElfParser(ctx, graphBlobRaw.data(), graphBlobRaw.size(), lastErrorMsg); } else { LOG_E("Failed to recognize blob format"); + lastErrorMsg = "Failed to recognize native binary format"; throw DriverError(ZE_RESULT_ERROR_INVALID_ARGUMENT); } @@ -330,8 +332,6 @@ void Graph::initialize() { outputArgs.emplace_back(nullptr, size); } } - - LOG_V("Graph initialized."); } std::shared_ptr Graph::allocateGraphInitCommand(VPU::VPUDeviceContext *ctx) { @@ -354,11 +354,19 @@ ze_result_t Graph::getLogString(uint32_t *pSize, char *pBuildLog) { return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - if (!Compiler::logGetString(pSize, pBuildLog)) { - LOG_E("Failed to get error message!"); - return ZE_RESULT_ERROR_UNKNOWN; + if (*pSize == 0) { + *pSize = static_cast(lastErrorMsg.size()); + return ZE_RESULT_SUCCESS; } + if (pBuildLog == nullptr) { + LOG_E("Invalid pBuildLog pointer"); + return ZE_RESULT_ERROR_INVALID_NULL_POINTER; + } + + *pSize = std::min(*pSize, static_cast(lastErrorMsg.size())); + memcpy(pBuildLog, lastErrorMsg.data(), *pSize); + return ZE_RESULT_SUCCESS; } diff --git a/umd/level_zero_driver/ext/source/graph/graph.hpp b/umd/level_zero_driver/ext/source/graph/graph.hpp index 0b44bd0..6feb4bf 100644 --- a/umd/level_zero_driver/ext/source/graph/graph.hpp +++ b/umd/level_zero_driver/ext/source/graph/graph.hpp @@ -21,6 +21,7 @@ #include #include +#include struct _ze_graph_handle_t {}; @@ -104,6 +105,7 @@ struct Graph : _ze_graph_handle_t, IContextObject { uint32_t profilingOutputSize = 0u; std::shared_ptr parser = nullptr; + std::unordered_map> profilingPools; }; } // namespace L0 diff --git a/umd/level_zero_driver/ext/source/graph/profiling_data.cpp b/umd/level_zero_driver/ext/source/graph/profiling_data.cpp index 188ab86..6aef19c 100644 --- a/umd/level_zero_driver/ext/source/graph/profiling_data.cpp +++ b/umd/level_zero_driver/ext/source/graph/profiling_data.cpp @@ -8,100 +8,98 @@ #include "level_zero/ze_api.h" #include "level_zero_driver/ext/source/graph/graph.hpp" +#include "level_zero_driver/ext/source/graph/profiling_data.hpp" +#include "level_zero_driver/include/l0_exception.hpp" +#include "vpu_driver/source/device/vpu_device_context.hpp" #include "vpu_driver/source/utilities/log.hpp" #include namespace L0 { +static thread_local std::string lastErrorMsg = {}; + GraphProfilingPool::GraphProfilingPool(VPU::VPUDeviceContext *ctx, const uint32_t size, const uint32_t count, - VPU::VPUBufferObject *profilingPoolBuffer, - std::vector *graphBlobRaw) + std::vector *graphBlobRaw, + std::function destroyCb) : ctx(ctx) , querySize(size) - , profilingPool(profilingPoolBuffer) - , graphBlobRaw(graphBlobRaw) { - queryAllocation.resize(count, nullptr); -}; - -GraphProfilingQuery::GraphProfilingQuery(VPU::VPUDeviceContext *ctx, - GraphProfilingPool *poolInput, - const uint32_t index, + , graphBlobRaw(graphBlobRaw) + , queries(count) + , destroyCb(std::move(destroyCb)) { + size_t poolSize = queries.size() * getFwDataCacheAlign(querySize); + poolBuffer = ctx->createInternalBufferObject(poolSize, VPU::VPUBufferObject::Type::CachedDma); + L0_THROW_WHEN(poolBuffer == nullptr, + "Failed to allocate buffer object for profiling pool", + ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY); +} + +GraphProfilingPool::~GraphProfilingPool() { + if (poolBuffer != nullptr && !ctx->freeMemAlloc(poolBuffer)) { + LOG_W("GraphProfilingPool memory failed to be free'd"); + } +} + +GraphProfilingQuery::GraphProfilingQuery(std::vector *graphBlobRaw, const uint32_t size, - void *queryPtrInput) - : pool(poolInput) - , index(index) - , size(size) - , queryPtr(queryPtrInput){}; + void *pData, + std::function &&destroyCb) + : size(size) + , data(pData) + , graphBlobRaw(graphBlobRaw) + , destroyCb(std::move(destroyCb)) {} ze_result_t GraphProfilingPool::createProfilingQuery(const uint32_t index, ze_graph_profiling_query_handle_t *phProfilingQuery) { - if (index >= queryAllocation.size()) { - LOG_E("Index (%u) passed in is incorrect. Pool size (%lu).", index, queryAllocation.size()); + if (index >= queries.size()) { + LOG_E("Index %u is greater than pool size %lu", index, queries.size()); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - if (!phProfilingQuery) { - LOG_E("ProfilingQuery handle is NULL."); + if (phProfilingQuery == nullptr) { + LOG_E("Invalid phProfilingQuery pointer"); return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - if (queryAllocation[index] != nullptr) { - LOG_E("Pool at index (%u) has already been allocated to Query (%p)", - index, - queryAllocation[index]); + if (queries[index] != nullptr) { + LOG_E("Index %u is occupied by GraphProfilingQuery (%p)", index, queries[index].get()); return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } - auto *queryPtr = reinterpret_cast(profilingPool->getBasePointer() + - (index * getFwDataCacheAlign(querySize))); - - auto profilingQuery = new GraphProfilingQuery(ctx, this, index, querySize, queryPtr); - if (!profilingQuery) { - LOG_E("ProfilingQuery creation has failed."); - return ZE_RESULT_ERROR_UNKNOWN; - } - - queryAllocation[index] = profilingQuery; - - *phProfilingQuery = profilingQuery->toHandle(); + auto *dataPtr = poolBuffer->getBasePointer() + (index * getFwDataCacheAlign(querySize)); + queries[index] = + std::make_unique(graphBlobRaw, querySize, dataPtr, [this, index]() { + queries[index].reset(); + }); + *phProfilingQuery = queries[index].get(); + LOG_I("GraphProfilingQuery created - %p", *phProfilingQuery); return ZE_RESULT_SUCCESS; } -void GraphProfilingPool::removeQuery(GraphProfilingQuery *profilingQuery) { - uint32_t index = profilingQuery->getIndex(); - - if (queryAllocation[index] != nullptr) { - queryAllocation[index] = nullptr; - LOG_I("QueryPool allocation at index %u has been reset.", index); - } else { - LOG_W("Query (%p) not found in QueryPool (%p)", profilingQuery, this); - } -} - ze_result_t GraphProfilingQuery::getData(ze_graph_profiling_type_t profilingType, uint32_t *pSize, uint8_t *pData) { if (pSize == nullptr) { - LOG_E("Invalid size pointer."); + LOG_E("Invalid pSize pointer"); return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } if (profilingType == ZE_GRAPH_PROFILING_LAYER_LEVEL || profilingType == ZE_GRAPH_PROFILING_TASK_LEVEL) { return Compiler::getDecodedProfilingBuffer(profilingType, - pool->getGraphBlobRaw(), - static_cast(queryPtr), + graphBlobRaw, + static_cast(data), size, pSize, - pData); + pData, + lastErrorMsg); } if (profilingType != ZE_GRAPH_PROFILING_RAW) { - LOG_E("Invalid profiling type."); + LOG_E("Invalid profiling type"); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } @@ -109,55 +107,50 @@ ze_result_t GraphProfilingQuery::getData(ze_graph_profiling_type_t profilingType *pSize = size; if (pData != nullptr) - memcpy(pData, queryPtr, *pSize); + memcpy(pData, data, *pSize); return ZE_RESULT_SUCCESS; } ze_result_t GraphProfilingQuery::destroy() { - pool->removeQuery(this); - delete this; + destroyCb(); + LOG_I("GraphProfilingQuery destroyed - %p", this); return ZE_RESULT_SUCCESS; } ze_result_t GraphProfilingQuery::getLogString(uint32_t *pSize, char *pProfilingLog) { if (pSize == nullptr) { - LOG_E("Invalid size pointer."); + LOG_E("Invalid pSize pointer"); return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - if (!Compiler::logGetString(pSize, pProfilingLog)) { - LOG_E("Failed to get error message!"); - return ZE_RESULT_ERROR_UNKNOWN; + if (*pSize == 0) { + *pSize = static_cast(lastErrorMsg.size()); + return ZE_RESULT_SUCCESS; + } + + if (pProfilingLog == nullptr) { + LOG_E("Invalid pProfilingLog pointer"); + return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } + *pSize = std::min(*pSize, static_cast(lastErrorMsg.size())); + memcpy(pProfilingLog, lastErrorMsg.data(), *pSize); return ZE_RESULT_SUCCESS; } ze_result_t GraphProfilingPool::destroy() { - size_t index = 0; - for (auto &query : queryAllocation) { - if (query != nullptr) { - LOG_E("Query object (%p) of index (%lu) has not been deleted.", query, index++); + for (size_t i = 0; i < queries.size(); i++) { + if (queries[i] != nullptr) { + LOG_E("GraphProfilingQuery object (%p) at index (%lu) has not been destroyed", + queries[i].get(), + i); return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } } - queryAllocation.clear(); - - if (!ctx) { - LOG_E("Failed to get device context."); - return ZE_RESULT_ERROR_DEVICE_LOST; - } - - if (profilingPool != nullptr && !ctx->freeMemAlloc(profilingPool)) { - LOG_W("GraphProfilingPool memory failed to be free'd."); - } else { - LOG_I("GraphProfilingPool memory free'd."); - } - - delete this; - + destroyCb(this); + LOG_I("GraphProfilingPool destroyed - %p", this); return ZE_RESULT_SUCCESS; } diff --git a/umd/level_zero_driver/ext/source/graph/profiling_data.hpp b/umd/level_zero_driver/ext/source/graph/profiling_data.hpp index fea1e1b..478a139 100644 --- a/umd/level_zero_driver/ext/source/graph/profiling_data.hpp +++ b/umd/level_zero_driver/ext/source/graph/profiling_data.hpp @@ -7,22 +7,23 @@ #pragma once +#include "level_zero_driver/core/source/context/context.hpp" +#include + struct _ze_graph_profiling_query_handle_t {}; struct _ze_graph_profiling_pool_handle_t {}; -#include - namespace L0 { + struct Graph; struct GraphProfilingPool; struct GraphProfilingQuery : _ze_graph_profiling_query_handle_t { public: - GraphProfilingQuery(VPU::VPUDeviceContext *ctx, - GraphProfilingPool *poolInput, - const uint32_t index, + GraphProfilingQuery(std::vector *graphBlobRaw, const uint32_t size, - void *queryPtrInput); + void *queryPtrInput, + std::function &&destroyCb); ze_result_t destroy(); ze_result_t getData(ze_graph_profiling_type_t profilingType, uint32_t *pSize, uint8_t *pData); @@ -33,15 +34,13 @@ struct GraphProfilingQuery : _ze_graph_profiling_query_handle_t { return static_cast(handle); } - inline uint32_t getIndex() const { return index; } - inline uint32_t getSize() const { return size; } - inline void *getQueryPtr() const { return queryPtr; } + inline void *getQueryPtr() const { return data; } private: - GraphProfilingPool *pool = nullptr; - uint32_t index = 0u; uint32_t size = 0u; - void *queryPtr = nullptr; + void *data = nullptr; + std::vector *graphBlobRaw; + std::function destroyCb; }; struct GraphProfilingPool : _ze_graph_profiling_pool_handle_t { @@ -49,15 +48,13 @@ struct GraphProfilingPool : _ze_graph_profiling_pool_handle_t { GraphProfilingPool(VPU::VPUDeviceContext *ctx, const uint32_t size, const uint32_t count, - VPU::VPUBufferObject *profilingPoolBuffer, - std::vector *graphBlobRaw); - ze_result_t destroy(); + std::vector *graphBlobRaw, + std::function destroyCb); + ~GraphProfilingPool(); + ze_result_t destroy(); ze_result_t createProfilingQuery(const uint32_t index, ze_graph_profiling_query_handle_t *phProfilingQuery); - void removeQuery(GraphProfilingQuery *profilingQuery); - - std::vector *getGraphBlobRaw() { return graphBlobRaw; }; inline ze_graph_profiling_pool_handle_t toHandle() { return this; } static GraphProfilingPool *fromHandle(ze_graph_profiling_pool_handle_t handle) { @@ -65,12 +62,13 @@ struct GraphProfilingPool : _ze_graph_profiling_pool_handle_t { } private: - VPU::VPUDeviceContext *ctx; + VPU::VPUDeviceContext *ctx = nullptr; uint32_t querySize = 0u; - VPU::VPUBufferObject *profilingPool = nullptr; + VPU::VPUBufferObject *poolBuffer = nullptr; std::vector *graphBlobRaw; - std::vector queryAllocation; + std::vector> queries; + std::function destroyCb; }; }; // namespace L0 diff --git a/umd/level_zero_driver/ext/source/graph/query_network.cpp b/umd/level_zero_driver/ext/source/graph/query_network.cpp index c6f3e09..62fd926 100644 --- a/umd/level_zero_driver/ext/source/graph/query_network.cpp +++ b/umd/level_zero_driver/ext/source/graph/query_network.cpp @@ -10,6 +10,7 @@ #include "vpu_driver/source/utilities/log.hpp" #include "compiler.hpp" +#include "compiler_common.hpp" #include "query_network.hpp" #include "level_zero/ze_api.h" @@ -49,7 +50,7 @@ ze_result_t QueryNetwork::create(ze_context_handle_t hContext, vcl_compiler_desc_t compilerDesc = {}; compilerDesc.platform = static_cast(pCtx->getCompilerPlatform()); - compilerDesc.debug_level = VCL_LOG_NONE; + compilerDesc.debug_level = cidLogLevel; vcl_compiler_handle_t compiler = NULL; vcl_log_handle_t logHandle = NULL; diff --git a/umd/level_zero_driver/ext/source/graph/query_network.hpp b/umd/level_zero_driver/ext/source/graph/query_network.hpp index f0cc324..06e944f 100644 --- a/umd/level_zero_driver/ext/source/graph/query_network.hpp +++ b/umd/level_zero_driver/ext/source/graph/query_network.hpp @@ -26,6 +26,7 @@ struct QueryNetwork : _ze_graph_query_network_handle_t { static QueryNetwork *fromHandle(ze_graph_query_network_handle_t handle) { return static_cast(handle); } + inline ze_graph_query_network_handle_t toHandle() { return this; } private: diff --git a/umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp b/umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp index 172f14e..f206ebe 100644 --- a/umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp +++ b/umd/level_zero_driver/ext/source/graph/vcl_symbols.hpp @@ -40,8 +40,7 @@ class Vcl { Vcl() { std::string errorMsg; for (auto name : compilerNames) { - // TODO: Used RTLD_NODELETE as WA for segfault on dlclose()) - handle = VclHandle(dlopen(name, RTLD_LAZY | RTLD_LOCAL | RTLD_NODELETE), &closeHandle); + handle = VclHandle(dlopen(name, RTLD_LAZY | RTLD_LOCAL), &closeHandle); if (handle) break; diff --git a/umd/level_zero_driver/tools/source/metrics/metric.cpp b/umd/level_zero_driver/tools/source/metrics/metric.cpp index 082e235..b67750f 100644 --- a/umd/level_zero_driver/tools/source/metrics/metric.cpp +++ b/umd/level_zero_driver/tools/source/metrics/metric.cpp @@ -131,6 +131,8 @@ ze_result_t MetricGroup::calculateMetricValues(zet_metric_group_calculation_type const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues) { + ze_result_t result = ZE_RESULT_SUCCESS; + if (pRawData == nullptr) { LOG_E("Invalid pRawData pointer."); return ZE_RESULT_ERROR_INVALID_NULL_POINTER; @@ -146,38 +148,43 @@ ze_result_t MetricGroup::calculateMetricValues(zet_metric_group_calculation_type return ZE_RESULT_ERROR_INVALID_ENUMERATION; } - uint32_t metricSize = safe_cast(metrics.size()); + switch (type) { + case ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES: + result = calculateMetricValues(rawDataSize, pRawData, pMetricValueCount, pMetricValues); + break; + case ZET_METRIC_GROUP_CALCULATION_TYPE_MAX_METRIC_VALUES: + result = calculateMaxMetricValues(rawDataSize, pRawData, pMetricValueCount, pMetricValues); + break; + default: + result = ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + + return result; +} + +ze_result_t MetricGroup::calculateMetricValues(size_t rawDataSize, + const uint8_t *pRawData, + uint32_t *pMetricValueCount, + zet_typed_value_t *pMetricValues) { + size_t sampleCount = rawDataSize / allocationSize; + + uint32_t metricCount = safe_cast(metrics.size()); + + size_t metricValueCount = sampleCount * metricCount; + if (*pMetricValueCount == 0) { - *pMetricValueCount = metricSize; + *pMetricValueCount = static_cast(metricValueCount); return ZE_RESULT_SUCCESS; - } else if (*pMetricValueCount > metricSize) { - *pMetricValueCount = metricSize; } - if (pMetricValues != nullptr) { - if (type == ZET_METRIC_GROUP_CALCULATION_TYPE_MAX_METRIC_VALUES) { - if (*pMetricValueCount != metrics.size()) { - LOG_E("Invalid pMetricValueCount."); - return ZE_RESULT_ERROR_INVALID_SIZE; - } - calculateMaxMetricValues(pRawData, pMetricValueCount, pMetricValues); - } else { - calculateMetricValues(pRawData, pMetricValueCount, pMetricValues); - } - } else { - LOG_I("Input pMetricValues pointer is NULL."); - } + *pMetricValueCount = std::min(*pMetricValueCount, static_cast(metricValueCount)); - return ZE_RESULT_SUCCESS; -} - -void MetricGroup::calculateMetricValues(const uint8_t *pRawData, - uint32_t *pMetricValueCount, - zet_typed_value_t *pMetricValues) { for (uint32_t i = 0; i < *pMetricValueCount; i++) { - zet_metric_properties_t properties; - metrics[i]->getProperties(&properties); + zet_metric_properties_t properties = {}; + metrics[i % metricCount]->getProperties(&properties); + pMetricValues[i].type = properties.resultType; + auto rawData = const_cast(pRawData); switch (properties.resultType) { @@ -202,44 +209,66 @@ void MetricGroup::calculateMetricValues(const uint8_t *pRawData, pRawData += allocationSize / metrics.size(); } + + return ZE_RESULT_SUCCESS; } -void MetricGroup::calculateMaxMetricValues(const uint8_t *pRawData, - uint32_t *pMetricValueCount, - zet_typed_value_t *pMetricValues) { - std::vector metricValues(*pMetricValueCount); - calculateMetricValues(pRawData, pMetricValueCount, metricValues.data()); +ze_result_t MetricGroup::calculateMaxMetricValues(size_t rawDataSize, + const uint8_t *pRawData, + uint32_t *pMetricValueCount, + zet_typed_value_t *pMetricValues) { + uint32_t metricCount = safe_cast(metrics.size()); - for (uint8_t i = 0; i < *pMetricValueCount; i++) { - zet_metric_properties_t properties; - metrics[i]->getProperties(&properties); - pMetricValues[i].type = properties.resultType; + if (*pMetricValueCount == 0) { + *pMetricValueCount = metricCount; + return ZE_RESULT_SUCCESS; + } + + if (*pMetricValueCount != metricCount) { + LOG_E( + "The *pMetricValueCount should be equal to the number of metrics from a given group."); + return ZE_RESULT_ERROR_INVALID_SIZE; + } + + uint32_t count = 0; + calculateMetricValues(rawDataSize, pRawData, &count, nullptr); + + std::vector metricValues(count); + calculateMetricValues(rawDataSize, pRawData, &count, metricValues.data()); + + for (uint8_t i = 0; i < count; i++) { + zet_metric_properties_t properties = {}; + metrics[i % metricCount]->getProperties(&properties); + + pMetricValues[i % metricCount].type = properties.resultType; switch (properties.resultType) { case ZET_VALUE_TYPE_UINT32: - pMetricValues[i].value.ui32 = - std::max(pMetricValues[i].value.ui32, metricValues[i].value.ui32); + pMetricValues[i % metricCount].value.ui32 = + std::max(pMetricValues[i % metricCount].value.ui32, metricValues[i].value.ui32); break; case ZET_VALUE_TYPE_UINT64: - pMetricValues[i].value.ui64 = - std::max(pMetricValues[i].value.ui64, metricValues[i].value.ui64); + pMetricValues[i % metricCount].value.ui64 = + std::max(pMetricValues[i % metricCount].value.ui64, metricValues[i].value.ui64); break; case ZET_VALUE_TYPE_FLOAT32: - pMetricValues[i].value.fp32 = - std::max(pMetricValues[i].value.fp32, metricValues[i].value.fp32); + pMetricValues[i % metricCount].value.fp32 = + std::max(pMetricValues[i % metricCount].value.fp32, metricValues[i].value.fp32); break; case ZET_VALUE_TYPE_FLOAT64: - pMetricValues[i].value.fp64 = - std::max(pMetricValues[i].value.fp64, metricValues[i].value.fp64); + pMetricValues[i % metricCount].value.fp64 = + std::max(pMetricValues[i % metricCount].value.fp64, metricValues[i].value.fp64); break; case ZET_VALUE_TYPE_BOOL8: - pMetricValues[i].value.b8 = - std::max(pMetricValues[i].value.b8, metricValues[i].value.b8); + pMetricValues[i % metricCount].value.b8 = + std::max(pMetricValues[i % metricCount].value.b8, metricValues[i].value.b8); break; default: break; } } + + return ZE_RESULT_SUCCESS; } void MetricContext::deactivateMetricGroups(const int vpuFd) { diff --git a/umd/level_zero_driver/tools/source/metrics/metric.hpp b/umd/level_zero_driver/tools/source/metrics/metric.hpp index 501acda..8c75b3d 100644 --- a/umd/level_zero_driver/tools/source/metrics/metric.hpp +++ b/umd/level_zero_driver/tools/source/metrics/metric.hpp @@ -7,7 +7,6 @@ #pragma once -#include "level_zero_driver/tools/source/metrics/metric_streamer.hpp" #include "level_zero_driver/core/source/device/device.hpp" #include @@ -31,7 +30,6 @@ struct Metric : _zet_metric_handle_t { ze_result_t getProperties(zet_metric_properties_t *pProperties); static zet_metric_type_t getMetricType(VPU::CounterInfo::MetricType metricTypeInput); static zet_value_type_t getValueType(VPU::CounterInfo::ValueType valueTypeInput); - static size_t getMetricValueSize(VPU::CounterInfo::ValueType valueTypeInput); private: @@ -58,12 +56,14 @@ struct MetricGroup : _zet_metric_group_handle_t { const uint8_t *pRawData, uint32_t *pMetricValueCount, zet_typed_value_t *pMetricValues); - void calculateMetricValues(const uint8_t *pRawData, - uint32_t *pMetricValueCount, - zet_typed_value_t *pMetricValues); - void calculateMaxMetricValues(const uint8_t *pRawData, - uint32_t *pMetricValueCount, - zet_typed_value_t *pMetricValues); + ze_result_t calculateMetricValues(size_t rawDataSize, + const uint8_t *pRawData, + uint32_t *pMetricValueCount, + zet_typed_value_t *pMetricValues); + ze_result_t calculateMaxMetricValues(size_t rawDataSize, + const uint8_t *pRawData, + uint32_t *pMetricValueCount, + zet_typed_value_t *pMetricValues); void setActivationStatus(bool activationStatus) { activated = activationStatus; } bool isActivated() const { return activated; } @@ -87,15 +87,9 @@ struct MetricContext { ~MetricContext() = default; ze_result_t activateMetricGroups(int vpuFd, uint32_t count, zet_metric_group_handle_t *phMetricGroups); - MetricStreamer *getMetricStreamer() const { return pMetricStreamer; } - void setMetricStreamer(MetricStreamer *metricStreamerInput) { - pMetricStreamer = metricStreamerInput; - } // Value from FW - 10 [ms] constexpr static uint32_t MIN_SAMPLING_RATE_NS = 10'000'000; - uint32_t sampleSize = 0u; - uint64_t actualBufferSize = 0u; protected: /** @@ -107,7 +101,6 @@ struct MetricContext { private: Device *device = nullptr; - MetricStreamer *pMetricStreamer = nullptr; /** Deactivate all metric groups in activatedMetricGroups map. diff --git a/umd/level_zero_driver/tools/source/metrics/metric_query.cpp b/umd/level_zero_driver/tools/source/metrics/metric_query.cpp index 1a9ff01..06f07cb 100644 --- a/umd/level_zero_driver/tools/source/metrics/metric_query.cpp +++ b/umd/level_zero_driver/tools/source/metrics/metric_query.cpp @@ -10,90 +10,70 @@ #include "vpu_driver/source/utilities/log.hpp" +#include #include namespace L0 { -MetricQueryPool::MetricQueryPool(VPU::VPUDeviceContext *ctx, - MetricGroup *metricGroupInput, - const size_t poolSize) - : ctx(ctx) - , metricGroup(metricGroupInput) { - if (ctx == nullptr) { - LOG_E("Invalid VPU Device Context."); - return; - } - - if (poolSize == 0) { - LOG_E("Invalid metric query pool size."); - return; - } - - if (pQueryPool != nullptr) { - LOG_E("MetricQuery pool has already been allocated. Possible memory leak."); - return; - } - - if (metricGroup == nullptr) { - LOG_E("MetricGroup passed in is NULL."); - return; - } +/* + * Firmware requires to pass an array of addresses to where the data from metric (counter) should + * be copied. To fullfill this requirement driver use following structure for MetricQuery commands: + * + * struct { + * uint64_t addressTable[metricCount]; // points to data fields + * uint64_t data[metricCount]; + * } + */ +static size_t getMetricQueryAddrTableOffset(const size_t index, MetricGroup &group) { + uint64_t metricAddressTableSize = + getFwDataCacheAlign(sizeof(uint64_t) * group.getNumberOfMetricGroups()); + return index * getFwDataCacheAlign(metricAddressTableSize + group.getAllocationSize()); +} - size_t metricBufferSize = metricGroup->getAllocationSize(); - LOG_I("Query Data buffer size: %lu", metricBufferSize); +static size_t getMetricQueryDataOffset(const size_t index, MetricGroup &group) { + uint64_t metricAddressTableSize = + getFwDataCacheAlign(sizeof(uint64_t) * group.getNumberOfMetricGroups()); + return getMetricQueryAddrTableOffset(index, group) + metricAddressTableSize; +} - size_t numberOfGroups = metricGroup->getNumberOfMetricGroups(); - addressTableSize = getFwDataCacheAlign(sizeof(uint64_t) * numberOfGroups); +MetricQueryPool::MetricQueryPool(Context *pContext, + MetricGroup *metricGroupInput, + const size_t poolSize) + : pContext(pContext) + , ctx(pContext->getDeviceContext()) + , metricGroup(metricGroupInput) + , metricQueries(poolSize) { + size_t bufferSize = getMetricQueryAddrTableOffset(poolSize, *metricGroup); + pQueryPoolBuffer = + ctx->createInternalBufferObject(bufferSize, VPU::VPUBufferObject::Type::CachedFw); + L0_THROW_WHEN(pQueryPoolBuffer == nullptr, + "Failed to allocate buffer object for metric query pool", + ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY); +} - size_t bufferSize = poolSize * getFwDataCacheAlign(addressTableSize + metricBufferSize); - auto queryPoolBO = - ctx->createInternalBufferObject(bufferSize, VPU::VPUBufferObject::Type::CachedLow); - if (queryPoolBO == nullptr) { - LOG_E("Failed to allocate buffer object for query pool"); - return; +MetricQueryPool::~MetricQueryPool() { + if (pQueryPoolBuffer != nullptr && !ctx->freeMemAlloc(pQueryPoolBuffer)) { + LOG_W("MetricQueryPool memory failed to be free'd"); } - - pQueryPool = queryPoolBO->getBasePointer(); - LOG_I("MetricQueryPool pointer: %p", pQueryPool); - - queryAllocation.resize(poolSize, nullptr); - - // Mark successfully initialized. - initialized = true; } MetricQuery::MetricQuery(MetricGroup &metricGroupInput, - VPU::VPUDeviceContext *ctx, - MetricQueryPool *poolInput, - uint32_t indexInput, - uint64_t *queryPtrInput) + uint64_t *addressTablePtr, + uint64_t *dataPtr, + std::function &&destroyCb) : metricGroup(metricGroupInput) - , pool(poolInput) - , index(indexInput) { - uint32_t groupBit = metricGroup.getGroupIndex(); - metricGroupMask = 0x1 << groupBit; - LOG_I("Metric Group mask for MetricQuery: %x", metricGroupMask); - - if (pool == nullptr || queryPtrInput == nullptr) { - LOG_E("QueryPool/QueryArrayPtr passed in is NULL!"); - return; - } - - metricQueryPtr = reinterpret_cast(queryPtrInput); - - dataAddress = reinterpret_cast(metricQueryPtr) + pool->getAddressTableSize(); - - metricQueryPtr[groupBit] = ctx->getBufferVPUAddress(reinterpret_cast(dataAddress)); - - LOG_I("Data pointer %p, CPU address %p to VPU address table for metric groups, metric data VPU " - "address %lu stored at position %u", - reinterpret_cast(dataAddress), - metricQueryPtr, - metricQueryPtr[groupBit], - groupBit); - - // Mark successfully initialized. - initialized = true; + , addrTablePtr(addressTablePtr) + , dataPtr(dataPtr) + , destroyCb(std::move(destroyCb)) { + metricGroupMask = 0x1 << metricGroup.getGroupIndex(); + LOG_I( + "MetricQuery -> group mask: %#x, cpu address table: %p, group index: %u, cpu data address: " + "%p, vpu data address: %#lx", + metricGroupMask, + addrTablePtr, + metricGroup.getGroupIndex(), + dataPtr, + addrTablePtr[metricGroup.getGroupIndex()]); } ze_result_t MetricQueryPool::createMetricQuery(uint32_t index, @@ -104,61 +84,40 @@ ze_result_t MetricQueryPool::createMetricQuery(uint32_t index, } if (!metricGroup->isActivated()) { - LOG_E("MetricGroup (%p) is not activated! Please activate metric group before Query " - "creation.", - metricGroup); + LOG_E("MetricGroup (%p) is not activated! Please activate metric group first", metricGroup); return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } - if (index >= queryAllocation.size()) { - LOG_E("Index (%u) passed in is incorrect. Pool size (%lu).", index, queryAllocation.size()); + if (index >= metricQueries.size()) { + LOG_E("Index (%u) passed in is incorrect. Pool size (%lu)", index, metricQueries.size()); return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - if (queryAllocation[index] != nullptr) { - LOG_E("Pool at index (%u) has already been allocated to Query (%p)", - index, - queryAllocation[index]); + if (metricQueries[index] != nullptr) { + LOG_E("Index (%u) is occupied by MetricQuery (%p)", index, metricQueries[index].get()); return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } - uint64_t *queryPtr = reinterpret_cast( - reinterpret_cast(pQueryPool) + - (index * getFwDataCacheAlign(addressTableSize + metricGroup->getAllocationSize()))); - - MetricQuery *metricQuery = new MetricQuery(*metricGroup, ctx, this, index, queryPtr); - if (metricQuery == nullptr) { - LOG_E("MetricQuery creation has failed."); - return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } else if (!metricQuery->isInitialized()) { - LOG_E("MetricQuery has not been initialized correctly."); - metricQuery->destroy(); - return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - queryAllocation[index] = metricQuery; + uint64_t *addressTablePtr = reinterpret_cast( + pQueryPoolBuffer->getBasePointer() + getMetricQueryAddrTableOffset(index, *metricGroup)); + uint64_t *dataPtr = reinterpret_cast(pQueryPoolBuffer->getBasePointer() + + getMetricQueryDataOffset(index, *metricGroup)); + addressTablePtr[metricGroup->getGroupIndex()] = ctx->getBufferVPUAddress(dataPtr); - *phMetricQuery = metricQuery->toHandle(); + metricQueries[index] = + std::make_unique(*metricGroup, addressTablePtr, dataPtr, [this, index]() { + metricQueries[index].reset(); + }); + *phMetricQuery = metricQueries[index].get(); + LOG_I("MetricQuery created - %p", *phMetricQuery); return ZE_RESULT_SUCCESS; } -void MetricQueryPool::removeQuery(MetricQuery *metricQuery) { - uint32_t index = metricQuery->getIndex(); - - if (queryAllocation[index] != nullptr) { - queryAllocation[index] = nullptr; - LOG_I("QueryPool allocation at index %u has been reset.", index); - } else { - LOG_W("Query (%p) not found in QueryPool (%p)", metricQuery, this); - } -} - ze_result_t MetricQuery::destroy() { - pool->removeQuery(this); - this->reset(); - delete this; + destroyCb(); + LOG_I("MetricQuery destroyed - %p", this); return ZE_RESULT_SUCCESS; } @@ -182,7 +141,7 @@ ze_result_t MetricQuery::getData(size_t *pRawDataSize, uint8_t *pRawData) { LOG_E("Failed to copy data. dataSize exceeds *pRawDataSize"); return ZE_RESULT_ERROR_UNKNOWN; } - memcpy(pRawData, reinterpret_cast(dataAddress), *pRawDataSize); + memcpy(pRawData, dataPtr, *pRawDataSize); } else { LOG_W("Input raw data pointer is NULL."); } @@ -191,37 +150,26 @@ ze_result_t MetricQuery::getData(size_t *pRawDataSize, uint8_t *pRawData) { } ze_result_t MetricQuery::reset() { - if (metricQueryPtr == nullptr) { - LOG_E("Invalid pointer. metricQueryPtr: %p.", metricQueryPtr); - return ZE_RESULT_ERROR_UNINITIALIZED; - } - size_t dataSize = metricGroup.getAllocationSize(); - memset(reinterpret_cast(dataAddress), 0, dataSize); + memset(dataPtr, 0, dataSize); - LOG_I("MetricQuery has been reset successfully."); + LOG_I("MetricQuery has been reset successfully"); return ZE_RESULT_SUCCESS; } ze_result_t MetricQueryPool::destroy() { - size_t index = 0; - for (auto &query : queryAllocation) { - if (query != nullptr) { - LOG_E("Query object (%p) of index (%lu) has not been deleted.", query, index++); + for (size_t i = 0; i < metricQueries.size(); i++) { + if (metricQueries[i] != nullptr) { + LOG_E("MetricQuery object (%p) at index (%lu) has not been destroyed", + metricQueries[i].get(), + i); return ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE; } } - queryAllocation.clear(); - if (pQueryPool != nullptr && !ctx->freeMemAlloc(pQueryPool)) { - LOG_W("MetricQueryPool memory failed to be free'd."); - } else { - LOG_I("MetricQueryPool memory free'd."); - } - - delete this; - + pContext->removeObject(this); + LOG_I("MetricQueryPool destroyed - %p", this); return ZE_RESULT_SUCCESS; } diff --git a/umd/level_zero_driver/tools/source/metrics/metric_query.hpp b/umd/level_zero_driver/tools/source/metrics/metric_query.hpp index 6abc556..bc340b6 100644 --- a/umd/level_zero_driver/tools/source/metrics/metric_query.hpp +++ b/umd/level_zero_driver/tools/source/metrics/metric_query.hpp @@ -8,6 +8,7 @@ #pragma once #include "umd_common.hpp" +#include "level_zero_driver/core/source/context/context.hpp" #include "level_zero_driver/tools/source/metrics/metric.hpp" #include @@ -20,10 +21,9 @@ struct MetricQueryPool; struct MetricQuery : _zet_metric_query_handle_t { MetricQuery(MetricGroup &metricGroupInput, - VPU::VPUDeviceContext *ctx, - MetricQueryPool *poolInput, - uint32_t indexInput, - uint64_t *queryPtrInput); + uint64_t *addressTablePtr, + uint64_t *dataPtr, + std::function &&destroyCb); ~MetricQuery() = default; inline zet_metric_query_handle_t toHandle() { return this; } @@ -32,35 +32,27 @@ struct MetricQuery : _zet_metric_query_handle_t { } ze_result_t destroy(); + ze_result_t getData(size_t *pRawDataSize, uint8_t *pRawData); ze_result_t reset(); - uint32_t getIndex() const { return index; } uint32_t getMetricGroupMask() const { return metricGroupMask; } - - // metricQueryPtr is a CPU address to table with VPU addresses for metric query command - uint64_t *getMetricAddrPtr() { return metricQueryPtr; } - + uint64_t *getMetricAddrPtr() { return addrTablePtr; } bool isGroupActivated() const { return metricGroup.isActivated(); } - bool isInitialized() const { return initialized; } protected: - uint64_t dataAddress = 0u; - uint64_t *metricQueryPtr = nullptr; MetricGroup &metricGroup; + uint64_t *addrTablePtr = nullptr; + uint64_t *dataPtr = 0u; private: - bool initialized = false; - MetricQueryPool *pool = nullptr; - uint32_t index = 0u; uint32_t metricGroupMask = 0u; + std::function destroyCb; }; -struct MetricQueryPool : _zet_metric_query_pool_handle_t { - MetricQueryPool(VPU::VPUDeviceContext *ctx, - MetricGroup *metricGroupInput, - const size_t poolSizeInput); - ~MetricQueryPool() = default; +struct MetricQueryPool : _zet_metric_query_pool_handle_t, IContextObject { + MetricQueryPool(Context *pContext, MetricGroup *metricGroupInput, const size_t poolSizeInput); + ~MetricQueryPool(); inline zet_metric_query_pool_handle_t toHandle() { return this; } static MetricQueryPool *fromHandle(zet_metric_query_pool_handle_t handle) { @@ -68,25 +60,15 @@ struct MetricQueryPool : _zet_metric_query_pool_handle_t { } ze_result_t destroy(); - bool isInitialized() const { return initialized; } - size_t getAddressTableSize() const { return addressTableSize; } - void removeQuery(MetricQuery *metricQuery); ze_result_t createMetricQuery(uint32_t index, zet_metric_query_handle_t *phMetricQuery); private: - bool initialized = false; - - VPU::VPUDeviceContext *ctx; + Context *pContext = nullptr; + VPU::VPUDeviceContext *ctx = nullptr; MetricGroup *metricGroup = nullptr; - size_t addressTableSize = 0u; - - /** - * Query allocation map - */ - std::vector queryAllocation; - - void *pQueryPool = nullptr; + std::vector> metricQueries; + VPU::VPUBufferObject *pQueryPoolBuffer = nullptr; }; } // namespace L0 diff --git a/umd/level_zero_driver/tools/source/metrics/metric_streamer.cpp b/umd/level_zero_driver/tools/source/metrics/metric_streamer.cpp index 1b311ad..37bb6ee 100644 --- a/umd/level_zero_driver/tools/source/metrics/metric_streamer.cpp +++ b/umd/level_zero_driver/tools/source/metrics/metric_streamer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,44 +7,72 @@ #include "level_zero/ze_api.h" #include "level_zero_driver/core/source/device/device.hpp" +#include "level_zero_driver/core/source/event/event.hpp" #include "level_zero_driver/tools/source/metrics/metric_streamer.hpp" #include "level_zero_driver/tools/source/metrics/metric.hpp" #include "vpu_driver/source/utilities/log.hpp" +#include "vpu_driver/source/utilities/timer.hpp" namespace L0 { -MetricStreamer::MetricStreamer(MetricGroup *metricGroupInput, - uint32_t notifyNReport, - VPU::VPUDeviceContext *ctxInput, - Device *deviceInput, - ze_event_handle_t hNotifyEvent) - : metricGroup(metricGroupInput) - , nReports(notifyNReport) - , ctx(ctxInput) - , device(deviceInput) - , eventHandle(hNotifyEvent) { - if (eventHandle) - LOG_W("No support for event handle in MetricStreamer"); +MetricStreamer::MetricStreamer(Context *pContext, + MetricGroup *metricGroupInput, + zet_metric_streamer_desc_t *desc, + ze_event_handle_t notifyHandle) + : pContext(pContext) + , metricGroup(metricGroupInput) + , ctx(pContext->getDeviceContext()) { + drm_ivpu_metric_streamer_start startData = {}; + startData.metric_group_mask = 0x1 << metricGroup->getGroupIndex(); + // Sampling rate expressed in nanoseconds + startData.sampling_rate_ns = desc->samplingPeriod; + startData.read_rate = desc->notifyEveryNReports; + + L0_THROW_WHEN(ctx->getDriverApi().metricStreamerStart(&startData) < 0, + "Failed to start metric streamer", + ZE_RESULT_ERROR_UNKNOWN); + sampleSize = startData.sample_size; + + if (notifyHandle && desc->notifyEveryNReports) { + auto notifyEvent = L0::Event::fromHandle(notifyHandle); + + notifyEvent->setMetricTrackData(0x1ULL << metricGroup->getGroupIndex(), + sampleSize * desc->notifyEveryNReports); + } } -ze_result_t MetricStreamer::close() { - const VPU::VPUDriverApi &drvApi = ctx->getDriverApi(); - +MetricStreamer::~MetricStreamer() { drm_ivpu_metric_streamer_stop stopData = {}; - stopData.metric_group_mask = 0x1 << metricGroup->getGroupIndex(); + stopData.metric_group_mask = 0x1ULL << metricGroup->getGroupIndex(); + if (ctx->getDriverApi().metricStreamerStop(&stopData) < 0) { + LOG_W("Failed to stop metric streamer."); + } +} - if (drvApi.metricStreamerStop(&stopData) < 0) { - LOG_E("Failed to stop metric streamer."); +ze_result_t MetricStreamer::close() { + pContext->removeObject(this); + LOG_I("MetricStreamer destroyed - %p", this); + return ZE_RESULT_SUCCESS; +} + +ze_result_t MetricStreamer::getData(const VPU::VPUDriverApi &drvApi, + const uint64_t groupMask, + size_t &rawDataSize, + uint8_t *pRawData) { + drm_ivpu_metric_streamer_get_data data = {}; + data.metric_group_mask = groupMask; + data.size = rawDataSize; + data.buffer_ptr = reinterpret_cast(pRawData); + + if (drvApi.metricStreamerGetData(&data) < 0) { + LOG_E("Failed to get metric streamer data."); return ZE_RESULT_ERROR_UNKNOWN; } - device->getMetricContext()->setMetricStreamer(nullptr); - - delete this; + rawDataSize = data.size; return ZE_RESULT_SUCCESS; } - ze_result_t MetricStreamer::readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData) { if (pRawDataSize == nullptr) { @@ -52,54 +80,29 @@ MetricStreamer::readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t return ZE_RESULT_ERROR_INVALID_NULL_POINTER; } - if (maxReportCount > nReports) - maxReportCount = nReports; - - auto metricContext = device->getMetricContext(); - const VPU::VPUDriverApi &drvApi = ctx->getDriverApi(); - if (*pRawDataSize == 0) { - drm_ivpu_metric_streamer_get_data data = {}; - data.metric_group_mask = 0x1 << metricGroup->getGroupIndex(); - data.size = 0; - data.buffer_ptr = 0; - - if (drvApi.metricStreamerGetData(&data) < 0) { - LOG_E("Failed to get metric streamer data."); + size_t dataSize = 0; + if (getData(drvApi, 0x1ULL << metricGroup->getGroupIndex(), dataSize, nullptr) != + ZE_RESULT_SUCCESS) return ZE_RESULT_ERROR_UNKNOWN; - } - // Actual size of the buffer - *pRawDataSize = data.size; - metricContext->actualBufferSize = data.size; + actualBufferSize = dataSize; + *pRawDataSize = actualBufferSize; return ZE_RESULT_SUCCESS; } - if (*pRawDataSize > metricContext->actualBufferSize) { + if (*pRawDataSize > actualBufferSize) { LOG_W("Size requested (%lu) is larger than actual data buffer size: %lu", *pRawDataSize, - metricContext->actualBufferSize); - *pRawDataSize = metricContext->actualBufferSize; - } - - if (maxReportCount * metricContext->sampleSize < metricContext->actualBufferSize) { - *pRawDataSize = maxReportCount * metricContext->sampleSize; + actualBufferSize); + *pRawDataSize = actualBufferSize; } + *pRawDataSize = std::min(*pRawDataSize, maxReportCount * sampleSize); if (pRawData != nullptr) { - drm_ivpu_metric_streamer_get_data data = {}; - data.metric_group_mask = 0x1 << metricGroup->getGroupIndex(); - data.size = *pRawDataSize; - data.buffer_ptr = reinterpret_cast(pRawData); - - if (drvApi.metricStreamerGetData(&data) < 0) { - LOG_E("Failed to get metric streamer data."); - return ZE_RESULT_ERROR_UNKNOWN; - } - - *pRawDataSize = data.size; + return getData(drvApi, 0x1ULL << metricGroup->getGroupIndex(), *pRawDataSize, pRawData); } else { LOG_W("Input raw data pointer is NULL."); } diff --git a/umd/level_zero_driver/tools/source/metrics/metric_streamer.hpp b/umd/level_zero_driver/tools/source/metrics/metric_streamer.hpp index 695bf8d..a2bed60 100644 --- a/umd/level_zero_driver/tools/source/metrics/metric_streamer.hpp +++ b/umd/level_zero_driver/tools/source/metrics/metric_streamer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,9 +7,7 @@ #pragma once -#include "level_zero/ze_api.h" -#include "umd_common.hpp" -#include "vpu_driver/source/device/vpu_device_context.hpp" +#include "level_zero_driver/core/source/context/context.hpp" #include @@ -20,12 +18,12 @@ namespace L0 { struct Device; struct MetricGroup; -struct MetricStreamer : _zet_metric_streamer_handle_t { - MetricStreamer(MetricGroup *metricGroupInput, - uint32_t notifyNReport, - VPU::VPUDeviceContext *ctxInput, - Device *deviceInput, +struct MetricStreamer : _zet_metric_streamer_handle_t, IContextObject { + MetricStreamer(Context *pContext, + MetricGroup *metricGroupInput, + zet_metric_streamer_desc_t *desc, ze_event_handle_t hNotifyEvent); + ~MetricStreamer(); inline zet_metric_streamer_handle_t toHandle() { return this; } static MetricStreamer *fromHandle(zet_metric_streamer_handle_t handle) { @@ -35,12 +33,17 @@ struct MetricStreamer : _zet_metric_streamer_handle_t { ze_result_t close(); ze_result_t readData(uint32_t maxReportCount, size_t *pRawDataSize, uint8_t *pRawData); + static ze_result_t getData(const VPU::VPUDriverApi &drvApi, + const uint64_t groupMask, + size_t &rawDataSize, + uint8_t *pRawData); + private: + Context *pContext = nullptr; MetricGroup *metricGroup = nullptr; - uint32_t nReports = 0u; VPU::VPUDeviceContext *ctx = nullptr; - Device *device = nullptr; - ze_event_handle_t eventHandle; + uint64_t sampleSize = 0u; + uint64_t actualBufferSize = 0u; }; } // namespace L0 diff --git a/umd/level_zero_driver/unit_tests/fixtures/device_fixture.hpp b/umd/level_zero_driver/unit_tests/fixtures/device_fixture.hpp index 28a1e59..e465091 100644 --- a/umd/level_zero_driver/unit_tests/fixtures/device_fixture.hpp +++ b/umd/level_zero_driver/unit_tests/fixtures/device_fixture.hpp @@ -30,15 +30,14 @@ struct DeviceFixture { std::vector> devices; devices.push_back(std::move(vpuDevice)); - driverHandle->initialize(std::move(devices)); - device = driverHandle->devices[0]; + driverHandle = std::make_unique(std::move(devices)); + device = driverHandle->devices[0].get(); } virtual void TearDown() {} Mock driver; - std::unique_ptr> driverHandle = - std::make_unique>(); + std::unique_ptr driverHandle; L0::Device *device = nullptr; VPU::MockVPUDevice *mockVpuDevice = nullptr; @@ -55,15 +54,13 @@ struct MultiDeviceFixture { devices.push_back(std::move(mockDevice)); } - driverHandle = std::make_unique>(); - ze_result_t res = driverHandle->initialize(std::move(devices)); - EXPECT_EQ(ZE_RESULT_SUCCESS, res); + driverHandle = std::make_unique(std::move(devices)); } virtual void TearDown() {} Mock driver; - std::unique_ptr> driverHandle; + std::unique_ptr driverHandle; const uint32_t numDevices = 4u; VPU::MockOsInterfaceImp osInfc; }; @@ -93,7 +90,6 @@ struct ContextFixture : DeviceFixture { void TearDown() override { EXPECT_EQ(ctx->getBuffersCount(), 0u); - if (context) context->destroy(); DeviceFixture::TearDown(); diff --git a/umd/level_zero_driver/unit_tests/main.cpp b/umd/level_zero_driver/unit_tests/main.cpp index b7c4968..d076c3d 100644 --- a/umd/level_zero_driver/unit_tests/main.cpp +++ b/umd/level_zero_driver/unit_tests/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,10 +7,86 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "vpu_driver/unit_tests/options.hpp" +#include "level_zero_driver/unit_tests/options.hpp" +#include "vpu_driver/source/utilities/log.hpp" + +#include + +namespace TestOptions { + +std::filesystem::path blobPath; +std::filesystem::path modelPath; +std::string modelFlags; + +} // namespace TestOptions + +static void printHelpMessage() { + constexpr auto helpMessage = + "\n" + "Extra test options:\n" + " -v/--verbose Set log level to verbose\n" + " -l/--loglevel Set log level - error, warning, info, verbose\n" + " -b/--blob Path to blob\n" + " -m/--model Path to model\n" + " -c/--model_flags Model build flags\n" + "\n"; + + printf("%s\n", helpMessage); +} + +static bool parseOptions(int argc, char **argv) { + static struct option longOptions[] = {{"verbose", no_argument, 0, 'v'}, + {"loglevel", required_argument, 0, 'l'}, + {"blob", required_argument, 0, 'b'}, + {"model", required_argument, 0, 'm'}, + {"model_flags", required_argument, 0, 'c'}, + {}}; + + int opt; + while ((opt = getopt_long(argc, argv, "vl:b:m:c:", longOptions, nullptr)) != -1) { + switch (opt) { + case 'v': + VPU::setLogLevel(VERBOSE); + break; + case 'l': + VPU::setLogLevel(optarg); + break; + case 'b': + TestOptions::blobPath = optarg; + break; + case 'm': + TestOptions::modelPath = optarg; + break; + case 'c': + TestOptions::modelFlags = optarg; + break; + default: + printHelpMessage(); + return false; + } + } + return true; +} + +static void setDefaultOptions() { + const std::filesystem::path defaultBlob = "validation/blobs/mtl/mobilenet-v2/vpuip.blob"; + if (TestOptions::blobPath.empty() && std::filesystem::exists(defaultBlob)) { + TestOptions::blobPath = defaultBlob; + } + + const std::filesystem::path defaultModel = "validation/models/mtl/add_abc/add_abc.xml"; + if (TestOptions::modelPath.empty() && std::filesystem::exists(defaultModel)) { + TestOptions::modelPath = defaultModel; + TestOptions::modelFlags = + "--inputs_precisions=\"A:fp16 B:fp16 C:fp16\" --inputs_layouts=\"A:C B:C C:C\" " + "--outputs_precisions=\"Y:fp16\" --outputs_layouts=\"Y:C\""; + } +} int main(int argc, char **argv) { ::testing::InitGoogleMock(&argc, argv); - UnitTestOptions::parseLogOptions(argc, argv); + if (!parseOptions(argc, argv)) + return 1; + setDefaultOptions(); return RUN_ALL_TESTS(); } diff --git a/umd/level_zero_driver/unit_tests/mocks/mock_metrics.cpp b/umd/level_zero_driver/unit_tests/mocks/mock_metrics.cpp index 748900d..aeada2c 100644 --- a/umd/level_zero_driver/unit_tests/mocks/mock_metrics.cpp +++ b/umd/level_zero_driver/unit_tests/mocks/mock_metrics.cpp @@ -13,7 +13,7 @@ namespace ult { void MockMetricQuery::injectDummyData() { size_t dataCount = metricGroup.getAllocationSize() / sizeof(uint64_t); - uint64_t *metricData64 = reinterpret_cast(dataAddress); + uint64_t *metricData64 = reinterpret_cast(dataPtr); for (uint64_t i = 0; i < dataCount; i++) { metricData64[i] = 10 * (i + 1); } diff --git a/umd/level_zero_driver/unit_tests/mocks/mock_metrics.hpp b/umd/level_zero_driver/unit_tests/mocks/mock_metrics.hpp index c905ec1..e74d1db 100644 --- a/umd/level_zero_driver/unit_tests/mocks/mock_metrics.hpp +++ b/umd/level_zero_driver/unit_tests/mocks/mock_metrics.hpp @@ -18,8 +18,8 @@ namespace ult { class MockMetricDevice : public L0::Device { public: - MockMetricDevice(DriverHandle *driverHandle, VPU::VPUDevice *vpuDevice) - : L0::Device(driverHandle, vpuDevice) {} + MockMetricDevice(DriverHandle *driverHandle, std::unique_ptr vpuDevice) + : L0::Device(driverHandle, std::move(vpuDevice)) {} }; class MockMetricContext : public L0::MetricContext { @@ -31,25 +31,8 @@ class MockMetricContext : public L0::MetricContext { class MockMetricQuery : public L0::MetricQuery { public: - MockMetricQuery(MetricGroup &metricGroupInput, - VPU::VPUDeviceContext *ctx, - MetricQueryPool *poolInput, - uint32_t indexInput, - uint64_t *queryPtrInput) - : L0::MetricQuery(metricGroupInput, ctx, poolInput, indexInput, queryPtrInput) {} void injectDummyData(); }; -class MockMetricStreamer : public L0::MetricStreamer { - public: - MockMetricStreamer(MetricGroup *metricGroupInput, - uint32_t notifyNReport, - VPU::VPUDeviceContext *ctxInput, - Device *deviceInput, - ze_event_handle_t hNotifyEvent) - : L0::MetricStreamer(metricGroupInput, notifyNReport, ctxInput, deviceInput, hNotifyEvent) { - } -}; - } // namespace ult } // namespace L0 diff --git a/umd/level_zero_driver/unit_tests/options.hpp b/umd/level_zero_driver/unit_tests/options.hpp new file mode 100644 index 0000000..784f773 --- /dev/null +++ b/umd/level_zero_driver/unit_tests/options.hpp @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2022-2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include + +namespace TestOptions { + +extern std::filesystem::path blobPath; +extern std::filesystem::path modelPath; +extern std::string modelFlags; + +}; // namespace TestOptions diff --git a/umd/level_zero_driver/unit_tests/source/core/cmdlist/test_cmdlist_api.cpp b/umd/level_zero_driver/unit_tests/source/core/cmdlist/test_cmdlist_api.cpp index 37c9809..3a3b235 100644 --- a/umd/level_zero_driver/unit_tests/source/core/cmdlist/test_cmdlist_api.cpp +++ b/umd/level_zero_driver/unit_tests/source/core/cmdlist/test_cmdlist_api.cpp @@ -13,6 +13,8 @@ #include "level_zero_driver/core/source/cmdlist/cmdlist.hpp" #include "level_zero_driver/tools/source/metrics/metric_query.hpp" #include "level_zero_driver/unit_tests/fixtures/device_fixture.hpp" +#include "level_zero_driver/unit_tests/options.hpp" +#include "level_zero_driver/unit_tests/utils.hpp" #include #include @@ -102,6 +104,42 @@ struct CommandListFixture : CommandQueueFixture { uint64_t *ptrAlloc2 = nullptr; }; +struct CommandListGraphFixture : CommandListFixture { + void SetUp() override { + CommandListFixture::SetUp(); + + ASSERT_FALSE(TestOptions::blobPath.empty()) << "Blob path has not been provided"; + + loadBlobFromFile(TestOptions::blobPath, blob); + ASSERT_NE(0u, blob.size()); + + const ze_graph_desc_2_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + .pNext = nullptr, + .format = ZE_GRAPH_FORMAT_NATIVE, + .inputSize = blob.size(), + .pInput = blob.data(), + .pBuildFlags = nullptr, + .flags = 0}; + + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_EQ(ZE_RESULT_SUCCESS, res); + ASSERT_NE(nullptr, hGraph); + pGraph = L0::Graph::fromHandle(hGraph); + } + + void TearDown() override { + if (pGraph != nullptr) { + pGraph->destroy(); + } + + CommandListFixture::TearDown(); + } + + L0::Graph *pGraph = nullptr; + std::vector blob; + ze_graph_handle_t hGraph = nullptr; +}; + struct CommandListMetricFixture : CommandListFixture { void SetUp() override { CommandListFixture::SetUp(); @@ -164,6 +202,7 @@ struct CommandListMetricFixture : CommandListFixture { }; using CommandListApiTest = Test; +using CommandListGraphApiTest = Test; using CommandListMetricsApiTest = Test; TEST_F(CommandListApiTest, whenCalledCloseSuccessIsReturned) { @@ -351,6 +390,75 @@ TEST_F(CommandListApiTest, whenCalledAppendGraphInitializeWithoutInitGraphFailur EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, result); } +TEST_F(CommandListGraphApiTest, + givenCallAppendGraphInitializeAndExecuteWithEventGetExpectedResults) { + auto result = commandList->appendGraphInitialize(hGraph, nullptr, 0u, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + result = commandList->appendGraphInitialize(hGraph, nullptr, 1u, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, result); + result = commandList->appendGraphInitialize(hGraph, nullptr, 0u, &event0); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + result = commandList->appendGraphInitialize(hGraph, nullptr, 1u, &event0); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + result = commandList->appendGraphInitialize(hGraph, event1, 1u, &event0); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(ZE_RESULT_SUCCESS, pGraph->setArgumentValue(0, reinterpret_cast(ptrAlloc))); + EXPECT_EQ(ZE_RESULT_SUCCESS, pGraph->setArgumentValue(1, reinterpret_cast(ptrAlloc2))); + + result = commandList->appendGraphExecute(hGraph, nullptr, nullptr, 0u, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + result = commandList->appendGraphExecute(hGraph, nullptr, nullptr, 1u, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, result); + result = commandList->appendGraphExecute(hGraph, nullptr, nullptr, 0u, &event0); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + result = commandList->appendGraphExecute(hGraph, nullptr, nullptr, 1u, &event0); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + result = commandList->appendGraphExecute(hGraph, nullptr, event1, 1u, &event0); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + +TEST_F(CommandListGraphApiTest, + resetCommandListAfterGraphInitThenAppendingGraphExecAndExecuteReturnsSuccess) { + ze_command_queue_handle_t hCommandQueue = createCommandQueue(0); + ASSERT_NE(hCommandQueue, nullptr); + + auto commandQueue = L0::CommandQueue::fromHandle(hCommandQueue); + ASSERT_TRUE(commandQueue); + + auto result = commandList->appendGraphInitialize(hGraph, nullptr, 0u, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + commandList->close(); + + auto cmdListHandle = commandList->toHandle(); + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr); + EXPECT_EQ(result, ZE_RESULT_SUCCESS); + + result = commandList->reset(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + pGraph->setArgumentValue(0, reinterpret_cast(ptrAlloc)); + pGraph->setArgumentValue(1, reinterpret_cast(ptrAlloc2)); + + result = commandList->appendGraphExecute(hGraph, nullptr, nullptr, 0u, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + result = commandList->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(1u, commandList->getCommands().size()); + EXPECT_EQ(VPU_CMD_INFERENCE_EXECUTE, commandList->getCommands()[0]->getCommandType()); + + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr); + EXPECT_EQ(result, ZE_RESULT_SUCCESS); + + result = commandQueue->destroy(); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + struct CommandListEventApiTest : Test { void SetUp() override { CommandListFixture::SetUp(); diff --git a/umd/level_zero_driver/unit_tests/source/core/device/test_device.cpp b/umd/level_zero_driver/unit_tests/source/core/device/test_device.cpp index 7fe42e3..6d04a22 100644 --- a/umd/level_zero_driver/unit_tests/source/core/device/test_device.cpp +++ b/umd/level_zero_driver/unit_tests/source/core/device/test_device.cpp @@ -28,7 +28,7 @@ namespace ult { TEST(DeviceCap, capabilitiesShouldBeInitializedWithDeviceInitialization) { VPU::MockOsInterfaceImp mockOSInfc; VPU::MockVPUDevice vpuDevice(FAKE_TEST_DEV_NODE, mockOSInfc); - EXPECT_TRUE(vpuDevice.init()); + EXPECT_TRUE(vpuDevice.init(true)); } using SingleDeviceTest = Test; @@ -99,7 +99,7 @@ TEST_F(SingleDeviceTest, givenCallToDevicePropertiesThenBasicPropertiesCorrectly EXPECT_EQ(l0DevProps.deviceId, hwInfo.deviceId); // Sub device ID. - EXPECT_EQ(l0DevProps.subdeviceId, hwInfo.subdeviceId); + EXPECT_EQ(l0DevProps.subdeviceId, hwInfo.deviceRevision); // Core clock rate. EXPECT_EQ(l0DevProps.coreClockRate, hwInfo.coreClockRate); diff --git a/umd/level_zero_driver/unit_tests/source/core/driver/test_driver.cpp b/umd/level_zero_driver/unit_tests/source/core/driver/test_driver.cpp index b9eb20e..64450d9 100644 --- a/umd/level_zero_driver/unit_tests/source/core/driver/test_driver.cpp +++ b/umd/level_zero_driver/unit_tests/source/core/driver/test_driver.cpp @@ -72,20 +72,11 @@ TEST(zeDriverGet, returnsCorrectNumberOfDriverInstances) { using DriverVersionTest = Test; TEST_F(DriverVersionTest, returnsExpectedDriverVersion) { - ze_result_t res = driverHandle->getProperties(nullptr); - EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, res); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, driverHandle->getProperties(nullptr)); ze_driver_properties_t properties; - res = driverHandle->getProperties(&properties); - EXPECT_EQ(ZE_RESULT_SUCCESS, res); - - uint32_t versionMajor = (properties.driverVersion & 0xFF000000) >> 24; - uint32_t versionMinor = (properties.driverVersion & 0x00FF0000) >> 16; - uint32_t versionBuild = properties.driverVersion & 0x0000FFFF; - - EXPECT_EQ(L0_PROJECT_VERSION_MAJOR, versionMajor); - EXPECT_EQ(L0_PROJECT_VERSION_MINOR, versionMinor); - EXPECT_EQ(VPU_VERSION_BUILD, versionBuild); + EXPECT_EQ(ZE_RESULT_SUCCESS, driverHandle->getProperties(&properties)); + EXPECT_EQ(DRIVER_VERSION, properties.driverVersion); ze_driver_uuid_t uuid = ze_intel_vpu_driver_uuid; EXPECT_EQ(memcmp(&properties.uuid, &uuid, sizeof(properties.uuid)), 0); @@ -263,13 +254,13 @@ TEST_F(DriverVersionTest, checkEnvironmentVariableInitialization) { char *enableMetricsDefault = getenv("ZET_ENABLE_METRICS"); char *enablePciIdOrderDefault = getenv("ZE_ENABLE_PCI_ID_DEVICE_ORDER"); char *sharedForceDeviceAllocDefault = getenv("ZE_SHARED_FORCE_DEVICE_ALLOC"); - char *umdLogLevel = getenv("VPU_DRV_UMD_LOGLEVEL"); + char *umdLogLevel = getenv("ZE_INTEL_NPU_LOGLEVEL"); unsetenv("ZE_AFFINITY_MASK"); unsetenv("ZET_ENABLE_METRICS"); unsetenv("ZE_ENABLE_PCI_ID_DEVICE_ORDER"); unsetenv("ZE_SHARED_FORCE_DEVICE_ALLOC"); - unsetenv("VPU_DRV_UMD_LOGLEVEL"); + unsetenv("ZE_INTEL_NPU_LOGLEVEL"); driver.initializeEnvVariables(); EXPECT_EQ(driver.getEnvVariables().affinityMask, ""); @@ -282,7 +273,7 @@ TEST_F(DriverVersionTest, checkEnvironmentVariableInitialization) { setenv("ZET_ENABLE_METRICS", "1", 1); setenv("ZE_ENABLE_PCI_ID_DEVICE_ORDER", "1", 1); setenv("ZE_SHARED_FORCE_DEVICE_ALLOC", "1", 1); - setenv("VPU_DRV_UMD_LOGLEVEL", "VERBOSE", 1); + setenv("ZE_INTEL_NPU_LOGLEVEL", "VERBOSE", 1); driver.initializeEnvVariables(); EXPECT_EQ(driver.getEnvVariables().affinityMask, "0,1"); @@ -301,8 +292,8 @@ TEST_F(DriverVersionTest, checkEnvironmentVariableInitialization) { sharedForceDeviceAllocDefault == nullptr ? unsetenv("ZE_SHARED_FORCE_DEVICE_ALLOC") : setenv("ZE_SHARED_FORCE_DEVICE_ALLOC", sharedForceDeviceAllocDefault, 1); - umdLogLevel == nullptr ? unsetenv("VPU_DRV_UMD_LOGLEVEL") - : setenv("VPU_DRV_UMD_LOGLEVEL", umdLogLevel, 1); + umdLogLevel == nullptr ? unsetenv("ZE_INTEL_NPU_LOGLEVEL") + : setenv("ZE_INTEL_NPU_LOGLEVEL", umdLogLevel, 1); } } // namespace ult diff --git a/umd/level_zero_driver/unit_tests/source/ext/CMakeLists.txt b/umd/level_zero_driver/unit_tests/source/ext/CMakeLists.txt new file mode 100644 index 0000000..4b7d7f5 --- /dev/null +++ b/umd/level_zero_driver/unit_tests/source/ext/CMakeLists.txt @@ -0,0 +1,7 @@ +# +# Copyright (C) 2022 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +add_subdirectories() diff --git a/umd/level_zero_driver/unit_tests/source/ext/graph/CMakeLists.txt b/umd/level_zero_driver/unit_tests/source/ext/graph/CMakeLists.txt new file mode 100644 index 0000000..279df0f --- /dev/null +++ b/umd/level_zero_driver/unit_tests/source/ext/graph/CMakeLists.txt @@ -0,0 +1,10 @@ +# +# Copyright (C) 2022 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +target_sources(${TARGET_NAME} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/test_graph.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_graph_cid.cpp +) diff --git a/umd/level_zero_driver/unit_tests/source/ext/graph/test_graph.cpp b/umd/level_zero_driver/unit_tests/source/ext/graph/test_graph.cpp new file mode 100644 index 0000000..3285a99 --- /dev/null +++ b/umd/level_zero_driver/unit_tests/source/ext/graph/test_graph.cpp @@ -0,0 +1,242 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/ze_graph_ext.h" +#include "level_zero_driver/ext/source/graph/graph.hpp" +#include "level_zero_driver/unit_tests/fixtures/device_fixture.hpp" +#include "level_zero_driver/unit_tests/options.hpp" +#include "level_zero_driver/unit_tests/utils.hpp" +#include "vpu_driver/unit_tests/test_macros/test.hpp" + +namespace L0 { +namespace ult { + +struct GraphNativeFixture : ContextFixture { + void SetUp() override { + ContextFixture::SetUp(); + + ASSERT_FALSE(TestOptions::blobPath.empty()) << "Blob path has not been provided"; + + loadBlobFromFile(TestOptions::blobPath, blob); + ASSERT_NE(0u, blob.size()); + + graphDesc.pInput = blob.data(); + graphDesc.inputSize = blob.size(); + + ze_graph_handle_t hGraph = nullptr; + ASSERT_EQ(L0::Graph::create(context, device, &graphDesc, &hGraph), ZE_RESULT_SUCCESS); + ASSERT_NE(hGraph, nullptr); + graph = L0::Graph::fromHandle(hGraph); + } + + void TearDown() override { + if (graph != nullptr) { + graph->destroy(); + } + + ContextFixture::TearDown(); + } + + L0::Graph *graph = nullptr; + std::vector blob; + ze_graph_desc_2_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + .pNext = nullptr, + .format = ZE_GRAPH_FORMAT_NATIVE, + .inputSize = 0, + .pInput = nullptr, + .pBuildFlags = nullptr, + .flags = 0}; +}; + +using GraphTest = Test; +using GraphNativeTest = Test; + +TEST_F(GraphTest, givenCallToGetDeviceGraphPropertiesExpectedValuesReturned) { + ze_device_graph_properties_t prop = {}; + + EXPECT_EQ(L0::Graph::getDeviceGraphProperties(device, nullptr), + ZE_RESULT_ERROR_INVALID_NULL_POINTER); + + EXPECT_EQ(L0::Graph::getDeviceGraphProperties(device, &prop), ZE_RESULT_SUCCESS); + EXPECT_EQ(prop.pNext, nullptr); + EXPECT_EQ(prop.graphExtensionVersion, ZE_GRAPH_EXT_VERSION_CURRENT); + EXPECT_TRUE(prop.graphFormatsSupported == ZE_GRAPH_FORMAT_NGRAPH_LITE || + prop.graphFormatsSupported == ZE_GRAPH_FORMAT_NATIVE); +} + +TEST_F(GraphTest, givenCallToGetProfilingDataPropertiesExpectedValuesReturned) { + ze_device_profiling_data_properties_t pDeviceProfilingDataProperties; + + EXPECT_EQ(L0::Graph::getProfilingDataProperties(nullptr), ZE_RESULT_ERROR_INVALID_NULL_POINTER); + EXPECT_EQ(L0::Graph::getProfilingDataProperties(&pDeviceProfilingDataProperties), + ZE_RESULT_SUCCESS); + EXPECT_EQ(pDeviceProfilingDataProperties.extensionVersion, + ZE_PROFILING_DATA_EXT_VERSION_CURRENT); +} + +TEST_F(GraphNativeTest, whenCreatingGraphFromMalformedBufferInvalidArgumentIsReturned) { + size_t memSize = 4096u; + void *data = ctx->createHostMemAlloc(memSize); + ASSERT_TRUE(data); + memset(data, 0xfe, memSize); + + graphDesc.inputSize = memSize; + graphDesc.pInput = (const uint8_t *)data; + ze_graph_handle_t hGraphNew; + + auto res = L0::Graph::create(context, device, &graphDesc, &hGraphNew); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); + + EXPECT_TRUE(ctx->freeMemAlloc(data)); +} + +TEST_F(GraphNativeTest, whenCallgetNativeBinaryWithoutSizePointerExpectInvalidNullPointerError) { + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, graph->getNativeBinary(nullptr, nullptr)); +} + +TEST_F(GraphNativeTest, + whenCallgetNativeBinaryWithAndWithoutGraphNativeBinaryPointerExpectSuccess) { + size_t size = 0; + std::vector graphNativeBinary; + + EXPECT_EQ(ZE_RESULT_SUCCESS, graph->getNativeBinary(&size, nullptr)); + EXPECT_EQ(size, blob.size()); + graphNativeBinary.resize(size, 0xAA); + + EXPECT_EQ(ZE_RESULT_SUCCESS, graph->getNativeBinary(&size, graphNativeBinary.data())); + EXPECT_EQ(graphNativeBinary, blob); +} + +TEST_F(GraphNativeTest, whenCallsetArgumentValueWithInvalidArgumentErrorIsReturned) { + uint8_t argValue[1] = { + 0, + }; + + // Invalid data pointer. + auto res = graph->setArgumentValue(0, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, res); + + // Invalid argument input index. + res = graph->setArgumentValue(1, nullptr); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, res); + + res = graph->setArgumentValue(2, argValue); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); +} + +TEST_F(GraphNativeTest, givenCallgetPropertiesSuccessfullyParsesGraphInformation) { + ze_graph_properties_t graphProp; + + auto res = graph->getProperties(&graphProp); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(2u, graphProp.numGraphArgs); +} + +TEST_F(GraphNativeTest, whenCallgetArgumentPropertiesSuccessIsReturning) { + ze_graph_properties_t graphProp; + EXPECT_EQ(graph->getProperties(&graphProp), ZE_RESULT_SUCCESS); + + ze_graph_argument_properties_t prop = {}; + for (uint32_t index = 0; index < graphProp.numGraphArgs; index++) { + EXPECT_EQ(ZE_RESULT_SUCCESS, graph->getArgumentProperties(index, &prop)); + // Name + EXPECT_GT(strnlen(prop.name, 50), 0); + // Type + if (index == 0) { + EXPECT_EQ(ZE_GRAPH_ARGUMENT_TYPE_INPUT, prop.type); + } else if (index == graphProp.numGraphArgs - 1) { + EXPECT_EQ(ZE_GRAPH_ARGUMENT_TYPE_OUTPUT, prop.type); + } else if (prop.type != ZE_GRAPH_ARGUMENT_TYPE_INPUT && + prop.type != ZE_GRAPH_ARGUMENT_TYPE_OUTPUT) { + FAIL() << "Invalid graph argument type"; + } + // Dimensions + for (int i = 0; i < ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE; i++) + EXPECT_GT(prop.dims[i], 0u); + // Network precision + EXPECT_NE(prop.networkPrecision, ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN); + EXPECT_NE(prop.networkPrecision, ZE_GRAPH_ARGUMENT_PRECISION_BIN); + // Network layout + EXPECT_GE(prop.networkLayout, ZE_GRAPH_ARGUMENT_LAYOUT_ANY); + // Device precision + EXPECT_NE(prop.devicePrecision, ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN); + EXPECT_NE(prop.devicePrecision, ZE_GRAPH_ARGUMENT_PRECISION_BIN); + // Device layout + EXPECT_GE(prop.deviceLayout, ZE_GRAPH_ARGUMENT_LAYOUT_ANY); + } +} + +TEST_F(GraphNativeTest, wrongUserInputShouldBeHandled) { + ze_graph_properties_t props; + EXPECT_EQ(graph->getProperties(&props), ZE_RESULT_SUCCESS); + + { + ze_graph_argument_properties_t prop = {}; + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, + graph->getArgumentProperties(props.numGraphArgs, &prop)); + + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, graph->getArgumentProperties(0, nullptr)); + } + + { + ze_graph_argument_properties_2_t prop = {}; + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, + graph->getArgumentProperties2(props.numGraphArgs, &prop)); + + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, graph->getArgumentProperties2(0, nullptr)); + } + + { + ze_graph_argument_properties_3_t prop = {}; + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, + graph->getArgumentProperties3(props.numGraphArgs, &prop)); + + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, graph->getArgumentProperties3(0, nullptr)); + } +} + +TEST_F(GraphNativeTest, whenCallgetArgumentProperties2ExpectSuccess) { + ze_graph_properties_t props = {}; + graph->getProperties(&props); + + ze_graph_argument_properties_2_t prop = {}; + for (uint32_t index = 0; index < props.numGraphArgs; index++) { + EXPECT_EQ(ZE_RESULT_SUCCESS, graph->getArgumentProperties2(index, &prop)); + + if (prop.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) { + EXPECT_EQ(prop.quantReverseScale, 1.0f); + EXPECT_EQ(prop.quantZeroPoint, 0); + } else { + EXPECT_EQ(prop.quantReverseScale, 0.f); + EXPECT_EQ(prop.quantZeroPoint, 0); + } + } +} + +TEST_F(GraphNativeTest, whenCallgetArgumentProperties3ExpectSuccess) { + ze_graph_properties_t props; + graph->getProperties(&props); + + ze_graph_argument_properties_3_t prop = {}; + for (uint32_t index = 0; index < props.numGraphArgs; index++) { + EXPECT_EQ(ZE_RESULT_SUCCESS, graph->getArgumentProperties3(index, &prop)); + + if (prop.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) { + EXPECT_EQ(prop.dims_count, 4); + } else { + EXPECT_EQ(prop.dims_count, 2); + } + } +} + +// TODO: Elf create internal buffer object that are detected as memory in ContextFixture::TearDown() +TEST_F(GraphNativeTest, DISABLED_expectThatContextDestroyDestructGraphObject) { + graph = nullptr; +} + +} // namespace ult +} // namespace L0 diff --git a/umd/level_zero_driver/unit_tests/source/ext/graph/test_graph_cid.cpp b/umd/level_zero_driver/unit_tests/source/ext/graph/test_graph_cid.cpp new file mode 100644 index 0000000..e2408e1 --- /dev/null +++ b/umd/level_zero_driver/unit_tests/source/ext/graph/test_graph_cid.cpp @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "level_zero/ze_graph_ext.h" +#include "level_zero_driver/ext/source/graph/graph.hpp" +#include "level_zero_driver/unit_tests/fixtures/device_fixture.hpp" +#include "level_zero_driver/unit_tests/options.hpp" +#include "vpu_driver/unit_tests/test_macros/test.hpp" + +#include "gtest/gtest.h" +#include "vpux_driver_compiler.h" + +#include + +namespace L0 { +namespace ult { + +struct CompilerInDriverFixture : public ContextFixture { + void SetUp() override { + ContextFixture::SetUp(); + + ASSERT_FALSE(TestOptions::modelPath.empty()) << "Model path has not been provided"; + ASSERT_FALSE(TestOptions::modelFlags.empty()) << "Model config has not been provided"; + + ze_device_graph_properties_t pDeviceGraphProperties = {}; + EXPECT_EQ(L0::Graph::getDeviceGraphProperties(device, &pDeviceGraphProperties), + ZE_RESULT_SUCCESS); + + if (!(pDeviceGraphProperties.graphFormatsSupported & ZE_GRAPH_FORMAT_NGRAPH_LITE)) + GTEST_SKIP_("Compiler in driver is not loaded!"); + + createModelData(); + } + + void TearDown() override { + if (hGraph != nullptr) { + L0::Graph::fromHandle(hGraph)->destroy(); + } + + ContextFixture::TearDown(); + } + + void appendFileToModel(const std::string &path, std::vector &data) { + std::ifstream file(path, std::ios::binary | std::ios::ate); + ASSERT_TRUE(file.is_open()) << "Failed to open file: " << path; + uint64_t fileSize = static_cast(file.tellg()); + file.seekg(0, std::ios::beg); + + size_t dataSize = data.size(); + data.resize(dataSize + sizeof(uint64_t)); + memcpy(&data[dataSize], &fileSize, sizeof(uint64_t)); + + dataSize = data.size(); + data.resize(dataSize + static_cast(fileSize)); + file.read(reinterpret_cast(&data[dataSize]), static_cast(fileSize)); + } + + void createModelData() { + ze_device_graph_properties_t pDeviceGraphProperties = {}; + EXPECT_EQ(L0::Graph::getDeviceGraphProperties(device, &pDeviceGraphProperties), + ZE_RESULT_SUCCESS); + + ze_graph_compiler_version_info_t version = { + .major = pDeviceGraphProperties.compilerVersion.major, + .minor = pDeviceGraphProperties.compilerVersion.minor}; + + /* + * NGraph Lite format used in driver: + * + * struct ModelIR { + * ze_graph_compiler_version_info_t version; + * uint32_t numInputs; + * struct Buffer { + * uint64_t bufferSize; + * char buffer[]; + * } buffers[]; + * }; + */ + + uint32_t numInputs = 2; + modelIR.resize(sizeof(version) + sizeof(numInputs)); + + memcpy(&modelIR[0], &version, sizeof(version)); + memcpy(&modelIR[sizeof(version)], &numInputs, sizeof(numInputs)); + + appendFileToModel(TestOptions::modelPath, modelIR); + + auto binaryPath = std::filesystem::path(TestOptions::modelPath).replace_extension(".bin"); + appendFileToModel(binaryPath, modelIR); + + buildFlags = TestOptions::modelFlags; + graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + .pNext = nullptr, + .format = ZE_GRAPH_FORMAT_NGRAPH_LITE, + .inputSize = modelIR.size(), + .pInput = modelIR.data(), + .pBuildFlags = buildFlags.c_str(), + .flags = 0}; + } + + ze_graph_handle_t hGraph = nullptr; + ze_graph_desc_2_t graphDesc = {}; + std::vector modelIR; + std::string buildFlags; +}; + +using CompilerInDriver = Test; + +TEST_F(CompilerInDriver, versionCheck) { + ASSERT_GT(Compiler::getCompilerVersionMajor(), 0); + ASSERT_EQ(Compiler::getCompilerVersionMajor(), VCL_COMPILER_VERSION_MAJOR); + ASSERT_EQ(Compiler::getCompilerVersionMinor(), VCL_COMPILER_VERSION_MINOR); + ASSERT_TRUE(Compiler::checkVersion(VCL_COMPILER_VERSION_MAJOR, VCL_COMPILER_VERSION_MINOR)); +} + +TEST_F(CompilerInDriver, creatingNgraphLiteWithNullInputReturnsFailure) { + graphDesc.pInput = nullptr; + + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER, res); + ASSERT_EQ(nullptr, hGraph); +} + +TEST_F(CompilerInDriver, creatingNgraphLiteWithInputSizeZeroReturnsFailure) { + graphDesc.inputSize = 0u; + + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_EQ(ZE_RESULT_ERROR_INVALID_SIZE, res); + ASSERT_EQ(nullptr, hGraph); +} + +TEST_F(CompilerInDriver, creatingNgraphLiteWithNullBuildOptionsReturnsError) { + graphDesc.pBuildFlags = nullptr; + + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_NE(ZE_RESULT_SUCCESS, res); +} + +TEST_F(CompilerInDriver, creatingNgraphLiteWithNoBuildOptionsReturnsError) { + graphDesc.pBuildFlags = ""; + + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_NE(ZE_RESULT_SUCCESS, res); +} + +TEST_F(CompilerInDriver, creatingNgraphLiteWithBuildOptionsReturnsSuccess) { + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_EQ(ZE_RESULT_SUCCESS, res); + ASSERT_NE(nullptr, hGraph); + + EXPECT_EQ(L0::Graph::fromHandle(hGraph)->getProfilingOutputSize(), 0); +} + +TEST_F(CompilerInDriver, creatingNgraphLiteWithPerfCountConfigReturnsSuccess) { + if (buildFlags.find("--config") == std::string::npos) + buildFlags += " --config"; + buildFlags += " PERF_COUNT=\"YES\""; + graphDesc.pBuildFlags = buildFlags.c_str(); + + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_EQ(ZE_RESULT_SUCCESS, res); + ASSERT_NE(nullptr, hGraph); + + EXPECT_GT(L0::Graph::fromHandle(hGraph)->getProfilingOutputSize(), 0); +} + +TEST_F(CompilerInDriver, creatingNgraphLiteWithEnableProfilingFlagReturnsSuccess) { + graphDesc.flags = ZE_GRAPH_FLAG_ENABLE_PROFILING; + + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_EQ(ZE_RESULT_SUCCESS, res); + ASSERT_NE(nullptr, hGraph); + + EXPECT_GT(L0::Graph::fromHandle(hGraph)->getProfilingOutputSize(), 0); +} + +TEST_F(CompilerInDriver, creatingNgraphLiteWithEnableProfilingFlagAndLogLevelConfigReturnsSuccess) { + if (buildFlags.find("--config") == std::string::npos) + buildFlags += " --config"; + buildFlags += " LOG_LEVEL=\"LOG_ERROR\""; + graphDesc.pBuildFlags = buildFlags.c_str(); + + graphDesc.flags = ZE_GRAPH_FLAG_ENABLE_PROFILING; + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_EQ(ZE_RESULT_SUCCESS, res); + ASSERT_NE(nullptr, hGraph); + + EXPECT_GT(L0::Graph::fromHandle(hGraph)->getProfilingOutputSize(), 0); +} + +TEST_F(CompilerInDriver, + creatingNgraphLiteWithEnableProfilingFlagAndPerfCountConfigReturnsSuccess) { + if (buildFlags.find("--config") == std::string::npos) + buildFlags += " --config"; + buildFlags += " PERF_COUNT=\"YES\""; + graphDesc.pBuildFlags = buildFlags.c_str(); + + graphDesc.flags = ZE_GRAPH_FLAG_ENABLE_PROFILING; + auto res = L0::Graph::create(context, device, &graphDesc, &hGraph); + ASSERT_EQ(ZE_RESULT_SUCCESS, res); + ASSERT_NE(nullptr, hGraph); + + EXPECT_GT(L0::Graph::fromHandle(hGraph)->getProfilingOutputSize(), 0); +} + +} // namespace ult +} // namespace L0 diff --git a/umd/level_zero_driver/unit_tests/source/tools/metrics/test_metrics.cpp b/umd/level_zero_driver/unit_tests/source/tools/metrics/test_metrics.cpp index ac4a8c4..c24ffe6 100644 --- a/umd/level_zero_driver/unit_tests/source/tools/metrics/test_metrics.cpp +++ b/umd/level_zero_driver/unit_tests/source/tools/metrics/test_metrics.cpp @@ -603,7 +603,6 @@ struct MetricGroupCalculateTest : public Test { auto metricQuery = MetricQuery::fromHandle(hMetricQuery); // Retrieve size and values of query data - size_t rawDataSize = 0u; EXPECT_EQ(metricQuery->getData(&rawDataSize, nullptr), ZE_RESULT_SUCCESS); ASSERT_GT(rawDataSize, 0u); @@ -627,6 +626,7 @@ struct MetricGroupCalculateTest : public Test { zet_metric_query_pool_handle_t hMetricQueryPool = nullptr; zet_metric_query_handle_t hMetricQuery = nullptr; std::vector rawData = {}; + size_t rawDataSize = 0u; }; TEST_F(MetricGroupCalculateTest, calculateMetricValuesReturnsFailureWithIncorrectInput) { @@ -634,7 +634,7 @@ TEST_F(MetricGroupCalculateTest, calculateMetricValuesReturnsFailureWithIncorrec // Expect error when rawData is nullptr EXPECT_EQ(MetricGroup::fromHandle(metricGroups[0]) ->calculateMetricValues(ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, - rawData.size(), + rawDataSize, nullptr, &metricValueCount, nullptr), @@ -643,7 +643,7 @@ TEST_F(MetricGroupCalculateTest, calculateMetricValuesReturnsFailureWithIncorrec // Expect error when pMetricValueCount is nullptr EXPECT_EQ(MetricGroup::fromHandle(metricGroups[0]) ->calculateMetricValues(ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, - rawData.size(), + rawDataSize, reinterpret_cast(rawData.data()), nullptr, nullptr), @@ -652,7 +652,7 @@ TEST_F(MetricGroupCalculateTest, calculateMetricValuesReturnsFailureWithIncorrec // Expect error when metric group calculation type exceeding max EXPECT_EQ(MetricGroup::fromHandle(metricGroups[0]) ->calculateMetricValues(ZET_METRIC_GROUP_CALCULATION_TYPE_FORCE_UINT32, - rawData.size(), + rawDataSize, reinterpret_cast(rawData.data()), &metricValueCount, nullptr), @@ -664,7 +664,7 @@ TEST_F(MetricGroupCalculateTest, calculateMetricValuesReturnsFailureWithIncorrec std::vector metricValues(metricValueCount); EXPECT_EQ(MetricGroup::fromHandle(metricGroups[0]) ->calculateMetricValues(ZET_METRIC_GROUP_CALCULATION_TYPE_MAX_METRIC_VALUES, - rawData.size(), + rawDataSize, reinterpret_cast(rawData.data()), &metricValueCount, metricValues.data()), @@ -675,7 +675,7 @@ TEST_F(MetricGroupCalculateTest, calculateMetricValuesReturnsExpectedResults) { uint32_t metricValueCount = 0; EXPECT_EQ(MetricGroup::fromHandle(metricGroups[0]) ->calculateMetricValues(ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, - rawData.size(), + rawDataSize, reinterpret_cast(rawData.data()), &metricValueCount, nullptr), @@ -685,7 +685,7 @@ TEST_F(MetricGroupCalculateTest, calculateMetricValuesReturnsExpectedResults) { std::vector metricValues(metricValueCount); EXPECT_EQ(MetricGroup::fromHandle(metricGroups[0]) ->calculateMetricValues(ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, - rawData.size(), + rawDataSize, reinterpret_cast(rawData.data()), &metricValueCount, metricValues.data()), diff --git a/umd/level_zero_driver/unit_tests/utils.hpp b/umd/level_zero_driver/unit_tests/utils.hpp new file mode 100644 index 0000000..b3aac04 --- /dev/null +++ b/umd/level_zero_driver/unit_tests/utils.hpp @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2022-2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "gtest/gtest.h" + +#include +#include +#include + +inline void loadBlobFromFile(const std::string &path, std::vector &blob) { + std::ifstream file(path, std::ios::binary | std::ios::ate); + ASSERT_TRUE(file.is_open()) << "Failed to open file: " << path; + std::streamsize fileSize = file.tellg(); + file.seekg(0, std::ios::beg); + + blob.resize(static_cast(fileSize)); + file.read(reinterpret_cast(blob.data()), fileSize); +} diff --git a/umd/vpu_driver/include/umd_common.hpp b/umd/vpu_driver/include/umd_common.hpp index 2816625..0c874a3 100644 --- a/umd/vpu_driver/include/umd_common.hpp +++ b/umd/vpu_driver/include/umd_common.hpp @@ -13,6 +13,11 @@ #include #include +template +bool checkPtrAlignment(const void *p) noexcept { + return !(reinterpret_cast(p) % alignof(T)); +}; + template To safe_cast(From x) { static_assert(!std::is_floating_point_v, diff --git a/umd/vpu_driver/source/command/vpu_command_buffer.cpp b/umd/vpu_driver/source/command/vpu_command_buffer.cpp index da2d78c..c4ad052 100644 --- a/umd/vpu_driver/source/command/vpu_command_buffer.cpp +++ b/umd/vpu_driver/source/command/vpu_command_buffer.cpp @@ -18,7 +18,8 @@ VPUCommandBuffer::VPUCommandBuffer(VPUDeviceContext *ctx, VPUBufferObject *buffe : ctx(ctx) , buffer(buffer) , targetEngine(target) - , jobStatus(std::numeric_limits::max()) { + , jobStatus(std::numeric_limits::max()) + , priority(Priority::NORMAL) { bufferHandles.emplace_back(buffer->getHandle()); } @@ -54,9 +55,11 @@ VPUCommandBuffer::allocateCommandBuffer(VPUDeviceContext *ctx, cmdSize += sizeof(vpu_cmd_fence_t); } - VPUBufferObject *buffer = ctx->createInternalBufferObject( - sizeof(CommandHeader) + getFwDataCacheAlign(cmdSize) + descriptorSize, - VPUBufferObject::Type::CachedLow); + size_t cmdBufferSize = sizeof(CommandHeader) + getFwDataCacheAlign(cmdSize) + descriptorSize + + ctx->getExtraDmaDescriptorSize(); + + VPUBufferObject *buffer = + ctx->createInternalBufferObject(cmdBufferSize, VPUBufferObject::Type::CachedFw); if (buffer == nullptr) { LOG_E("Failed to allocate buffer object for command buffer for %s engine", targetEngineToStr(engineType)); @@ -201,6 +204,7 @@ bool VPUCommandBuffer::addCommand(VPUCommand *cmd, uint64_t &cmdOffset, uint64_t cmdOffset += cmd->getCommitSize(); descOffset += getFwDataCacheAlign(cmd->getDescriptorSize()); + return true; } diff --git a/umd/vpu_driver/source/command/vpu_command_buffer.hpp b/umd/vpu_driver/source/command/vpu_command_buffer.hpp index 4986d2c..26dcd61 100644 --- a/umd/vpu_driver/source/command/vpu_command_buffer.hpp +++ b/umd/vpu_driver/source/command/vpu_command_buffer.hpp @@ -21,6 +21,12 @@ namespace VPU { class VPUCommandBuffer { public: enum class Target { COMPUTE = DRM_IVPU_ENGINE_COMPUTE, COPY = DRM_IVPU_ENGINE_COPY }; + enum class Priority : uint32_t { + IDLE = DRM_IVPU_JOB_PRIORITY_IDLE, + NORMAL = DRM_IVPU_JOB_PRIORITY_NORMAL, + FOCUS = DRM_IVPU_JOB_PRIORITY_FOCUS, + REALTIME = DRM_IVPU_JOB_PRIORITY_REALTIME, + }; static const char *targetEngineToStr(Target type) { switch (type) { @@ -90,6 +96,9 @@ class VPUCommandBuffer { */ uint64_t getFenceAddr() const { return syncFenceVpuAddr; } + void setPriority(Priority p) { priority = p; } + Priority getPriority() const { return priority; } + private: /** * Initialize command buffer header @@ -140,6 +149,7 @@ class VPUCommandBuffer { VPUBufferObject *buffer; Target targetEngine; uint32_t jobStatus; + Priority priority; uint64_t syncFenceVpuAddr = 0; std::vector bufferHandles; diff --git a/umd/vpu_driver/source/command/vpu_event_command.hpp b/umd/vpu_driver/source/command/vpu_event_command.hpp index 1aaf78c..7d3c56e 100644 --- a/umd/vpu_driver/source/command/vpu_event_command.hpp +++ b/umd/vpu_driver/source/command/vpu_event_command.hpp @@ -18,9 +18,10 @@ class VPUEventCommand : public VPUCommand { public: typedef uint64_t KMDEventDataType; enum State : KMDEventDataType { - STATE_EVENT_INITIAL, + STATE_EVENT_INITIAL = 0, STATE_DEVICE_RESET, STATE_HOST_RESET, + STATE_WAIT, STATE_DEVICE_SIGNAL, STATE_HOST_SIGNAL, }; @@ -84,7 +85,7 @@ class VPUEventWaitCommand : public VPUEventCommand { EngineSupport::Forward, VPU_CMD_FENCE_WAIT, eventHeapPtr, - VPUEventCommand::STATE_DEVICE_SIGNAL); + VPUEventCommand::STATE_WAIT); } }; diff --git a/umd/vpu_driver/source/command/vpu_graph_init_command.cpp b/umd/vpu_driver/source/command/vpu_graph_init_command.cpp index 6e51e17..7e1067a 100644 --- a/umd/vpu_driver/source/command/vpu_graph_init_command.cpp +++ b/umd/vpu_driver/source/command/vpu_graph_init_command.cpp @@ -62,7 +62,7 @@ std::shared_ptr VPUGraphInitCommand::create(VPUDeviceContex } auto kernelBuffer = ctx->createInternalBufferObject(ctx->getPageAlignedSize(blobSize), - VPUBufferObject::Type::WriteCombineLow); + VPUBufferObject::Type::WriteCombineFw); if (kernelBuffer == nullptr) { LOG_E("Failed to allocate kernel heap for graph data."); return nullptr; @@ -76,7 +76,7 @@ std::shared_ptr VPUGraphInitCommand::create(VPUDeviceContex const size_t bufferCount = getBufferCount(metadataSize); auto scratchBuffer = ctx->createInternalBufferObject(ctx->getPageAlignedSize(scratchSize) * bufferCount, - VPUBufferObject::Type::WriteCombineHigh); + VPUBufferObject::Type::WriteCombineShave); if (scratchBuffer == nullptr) { LOG_E("Failed to allocate memory for scratch pointer!"); return nullptr; @@ -84,7 +84,7 @@ std::shared_ptr VPUGraphInitCommand::create(VPUDeviceContex auto metadataBuffer = ctx->createInternalBufferObject(ctx->getPageAlignedSize(metadataSize) * bufferCount, - VPUBufferObject::Type::WriteCombineLow); + VPUBufferObject::Type::WriteCombineFw); if (metadataBuffer == nullptr) { LOG_E("Failed to allocate memory for metadata pointer!"); return nullptr; @@ -93,7 +93,7 @@ std::shared_ptr VPUGraphInitCommand::create(VPUDeviceContex VPUBufferObject *actKernelBuffer = nullptr; if (kernelData != nullptr && kernelDataSize != 0) { actKernelBuffer = ctx->createInternalBufferObject(ctx->getPageAlignedSize(kernelDataSize), - VPUBufferObject::Type::WriteCombineHigh); + VPUBufferObject::Type::WriteCombineShave); if (actKernelBuffer == nullptr) { LOG_E("Failed to allocate kernel data pointer!"); return nullptr; diff --git a/umd/vpu_driver/source/command/vpu_job.hpp b/umd/vpu_driver/source/command/vpu_job.hpp index cdb71cb..59eb552 100644 --- a/umd/vpu_driver/source/command/vpu_job.hpp +++ b/umd/vpu_driver/source/command/vpu_job.hpp @@ -66,6 +66,11 @@ class VPUJob { /* Job is closed, no more append commands is allowed. Job is ready for submission */ bool isClosed() const { return closed; } + void setPriority(VPUCommandBuffer::Priority p) { + for (auto &c : cmdBuffers) + c->setPriority(p); + } + private: /** * @brief Segregate commands into command buffers based on following rules: diff --git a/umd/vpu_driver/source/device/hw_info.hpp b/umd/vpu_driver/source/device/hw_info.hpp index 0fd3d2a..457e310 100644 --- a/umd/vpu_driver/source/device/hw_info.hpp +++ b/umd/vpu_driver/source/device/hw_info.hpp @@ -24,8 +24,6 @@ using PrintCopyDescriptor = void(void *, vpu_cmd_header_t *); struct VPUHwInfo { uint32_t deviceId = 0u; - uint32_t supportedDeviceIds[2] = {0, 0}; - uint32_t numSupportedDevices = 0; int compilerPlatform = -1; uint32_t deviceRevision = 0u; uint32_t subdeviceId = 0u; @@ -44,22 +42,28 @@ struct VPUHwInfo { char name[256] = "Intel(R) AI Boost"; - uint64_t baseLowAddres = 0; + uint64_t baseLowAddress = 0; + + uint32_t extraDmaDescriptorSize = 0; + uint32_t fwMappedInferenceIndex = 0; + uint64_t fwMappedInferenceVersion = 0; + + bool metricStreamerCapability = false; + bool dmaMemoryRangeCapability = false; GetCopyCommand *getCopyCommand = nullptr; PrintCopyDescriptor *printCopyDescriptor = nullptr; - - bool IsDeviceId(uint32_t deviceId) const { - for (uint32_t i = 0; i < numSupportedDevices; i++) { - if (deviceId == supportedDeviceIds[i]) - return true; - } - return false; - } }; extern VPUHwInfo vpuHwInfo37xx; -const VPUHwInfo VPUHwInfos[] = {vpuHwInfo37xx}; +inline VPUHwInfo getHwInfoByDeviceId(uint32_t deviceId) { + switch (deviceId) { + case 0x7d1d: + case 0xad1d: + return vpuHwInfo37xx; + } + throw std::runtime_error("Unrecognized PCI device ID"); +} } // namespace VPU diff --git a/umd/vpu_driver/source/device/vpu_37xx/vpu_hw_37xx.cpp b/umd/vpu_driver/source/device/vpu_37xx/vpu_hw_37xx.cpp index dcf0ede..cdcf052 100644 --- a/umd/vpu_driver/source/device/vpu_37xx/vpu_hw_37xx.cpp +++ b/umd/vpu_driver/source/device/vpu_37xx/vpu_hw_37xx.cpp @@ -9,6 +9,7 @@ #include "vpu_driver/source/command/vpu_copy_command.hpp" #include "vpu_driver/source/device/hw_info.hpp" +#include "api/vpu_nnrt_api_37xx.h" #include "vpux_driver_compiler.h" #include @@ -21,19 +22,23 @@ static bool getCopyCommandDescriptor37xx(VPUDeviceContext *ctx, void *dst, size_t size, VPUDescriptor &desc) { - return VPUCopyCommand::fillDescriptor(ctx, src, dst, size, desc); + return VPUCopyCommand::fillDescriptor(ctx, + src, + dst, + size, + desc); } static void printCopyDescriptor37xx(void *desc, vpu_cmd_header_t *cmd) { - VPUCopyCommand::printCopyDesc(desc, cmd); + VPUCopyCommand::printCopyDesc(desc, cmd); } -struct VPUHwInfo vpuHwInfo37xx = {.supportedDeviceIds = {0x7D1D, 0xAD1D}, - .numSupportedDevices = 2, - .compilerPlatform = VCL_PLATFORM_VPU3720, +struct VPUHwInfo vpuHwInfo37xx = {.compilerPlatform = VCL_PLATFORM_VPU3720, .nExecUnits = 4096, .numSubslicesPerSlice = 2, .tileFuseMask = 0x3, + .extraDmaDescriptorSize = 16, + .fwMappedInferenceIndex = VPU_NNRT_37XX_API_VER_INDEX, .getCopyCommand = &getCopyCommandDescriptor37xx, .printCopyDescriptor = &printCopyDescriptor37xx}; } // namespace VPU diff --git a/umd/vpu_driver/source/device/vpu_device.cpp b/umd/vpu_driver/source/device/vpu_device.cpp index c2fde1a..df99f67 100644 --- a/umd/vpu_driver/source/device/vpu_device.cpp +++ b/umd/vpu_driver/source/device/vpu_device.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -28,92 +29,37 @@ VPUDevice::VPUDevice(std::string devnode, OsInterface &osInfc) , osInfc(osInfc) {} bool VPUDevice::initializeCaps(VPUDriverApi *drvApi) { - struct drm_ivpu_param arg = {}; - uint32_t deviceId; - - arg.param = DRM_IVPU_PARAM_DEVICE_ID; - if (drvApi->getDeviceParam(&arg)) { - LOG_E("Failed to call device config ioctl. -errno: %d", errno); - return false; - } - - deviceId = safe_cast(arg.value); - - LOG_I("Pci device ID: %#llx", arg.value); - for (auto &info : VPUHwInfos) { - if (info.IsDeviceId(deviceId)) { - hwInfo = info; - hwInfo.deviceId = deviceId; - break; - } - } - - if (hwInfo.deviceId == 0) { - LOG_E("Failed to find a device with PCI ID: %#llx", arg.value); + try { + uint32_t deviceId = drvApi->getDeviceParam(DRM_IVPU_PARAM_DEVICE_ID); + LOG_I("PCI device ID: %#x", deviceId); + + hwInfo = getHwInfoByDeviceId(deviceId); + hwInfo.deviceId = deviceId; + hwInfo.deviceRevision = drvApi->getDeviceParam(DRM_IVPU_PARAM_DEVICE_REVISION); + hwInfo.maxHardwareContexts = drvApi->getDeviceParam(DRM_IVPU_PARAM_NUM_CONTEXTS); + hwInfo.coreClockRate = drvApi->getDeviceParam(DRM_IVPU_PARAM_CORE_CLOCK_RATE); + hwInfo.platformType = drvApi->getDeviceParam(DRM_IVPU_PARAM_PLATFORM_TYPE); + hwInfo.baseLowAddress = drvApi->getDeviceParam(DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS); + hwInfo.fwMappedInferenceVersion = + drvApi->getDeviceParam(DRM_IVPU_PARAM_FW_API_VERSION, hwInfo.fwMappedInferenceIndex); + LOG_I("Base address of device is %#lx", hwInfo.baseLowAddress); + + uint32_t tileConfigParam = drvApi->getDeviceParam(DRM_IVPU_PARAM_TILE_CONFIG); + hwInfo.tileConfig = ~tileConfigParam & hwInfo.tileFuseMask; + } catch (const std::exception &err) { + LOG_E("Failed to initialize hardware info, error: %s", err.what()); return false; } - arg.param = DRM_IVPU_PARAM_DEVICE_REVISION; - if (drvApi->getDeviceParam(&arg)) { - LOG_E("Failed to get device revision using ioctl. -errno: %d", errno); - return false; - } - hwInfo.deviceRevision = safe_cast(arg.value); - - arg.param = DRM_IVPU_PARAM_NUM_CONTEXTS; - if (drvApi->getDeviceParam(&arg)) { - LOG_E("Failed to get number of contexts using ioctl. -errno: %d", errno); - return false; - } - hwInfo.maxHardwareContexts = safe_cast(arg.value); - - arg.param = DRM_IVPU_PARAM_CORE_CLOCK_RATE; - if (drvApi->getDeviceParam(&arg)) { - LOG_E("Failed to get core clock rate using ioctl. -errno: %d", errno); - return false; - } - hwInfo.coreClockRate = safe_cast(arg.value); - - arg.param = DRM_IVPU_PARAM_PLATFORM_TYPE; - if (drvApi->getDeviceParam(&arg)) { - LOG_E("Failed to get platform type using ioctl. -errno: %d", errno); - return false; - } - hwInfo.platformType = safe_cast(arg.value); - - arg.param = DRM_IVPU_PARAM_TILE_CONFIG; - if (drvApi->getDeviceParam(&arg)) { - LOG_E("Failed to get tile config using ioctl. -errno: %d", errno); - return false; - } - hwInfo.tileConfig = (~safe_cast(arg.value)) & hwInfo.tileFuseMask; - - arg.param = DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS; - if (drvApi->getDeviceParam(&arg) != 0) { - LOG_E("Failed to get context base address using ioctl. -errno: %d", errno); - return false; - } - - hwInfo.baseLowAddres = arg.value; - LOG_I("Base address of device is %#lx", hwInfo.baseLowAddres); - - arg.param = DRM_IVPU_PARAM_CAPABILITIES; - arg.index = DRM_IVPU_CAP_METRIC_STREAMER; - if (drvApi->getDeviceParam(&arg)) { - LOG_W("Failed to get metric streamer capabilities using ioctl. -errno: %d", errno); - } else { - capMetricStreamer = safe_cast(arg.value); - } + if (drvApi->checkDeviceCapability(DRM_IVPU_CAP_METRIC_STREAMER)) + hwInfo.metricStreamerCapability = true; + if (drvApi->checkDeviceCapability(DRM_IVPU_CAP_DMA_MEMORY_RANGE)) + hwInfo.dmaMemoryRangeCapability = true; return true; } bool VPUDevice::initializeMetricGroups(VPUDriverApi *drvApi) { - if (capMetricStreamer != 1) { - LOG_W("Metrics are not supported."); - return true; - } - uint64_t metricGroupMask = -1llu; drm_ivpu_metric_streamer_get_data get_info_params = {}; @@ -272,7 +218,7 @@ bool VPUDevice::initializeMetricGroups(VPUDriverApi *drvApi) { return true; } -bool VPUDevice::init() { +bool VPUDevice::init(bool enableMetrics) { LOG_V("Initializing VPU device."); if (devnode.empty()) { @@ -289,9 +235,11 @@ bool VPUDevice::init() { return false; } - if (!initializeMetricGroups(drvApi.get())) { - LOG_W("Failed to initialize metric groups."); - return false; + if (enableMetrics && getCapMetricStreamer()) { + if (!initializeMetricGroups(drvApi.get())) { + LOG_W("Failed to initialize metric groups."); + return false; + } } LOG_V("VPU device initialized successfully."); @@ -306,8 +254,8 @@ const std::vector VPUDevice::getMetricGroupsInfo() const { return groupsInfo; } -uint32_t VPUDevice::getCapMetricStreamer() const { - return capMetricStreamer; +bool VPUDevice::getCapMetricStreamer() const { + return hwInfo.metricStreamerCapability; } bool VPUDevice::isConnected() { diff --git a/umd/vpu_driver/source/device/vpu_device.hpp b/umd/vpu_driver/source/device/vpu_device.hpp index 7c44e81..21f92da 100644 --- a/umd/vpu_driver/source/device/vpu_device.hpp +++ b/umd/vpu_driver/source/device/vpu_device.hpp @@ -21,14 +21,14 @@ enum class EngineType { COMPUTE = 0, COPY, INVALID, ENGINE_MAX = INVALID }; class VPUDevice { public: - bool init(); + bool init(bool enableMetrics); VPUDevice(std::string devnode, OsInterface &osInfc); virtual ~VPUDevice() = default; const VPUHwInfo &getHwInfo() const; const std::vector getMetricGroupsInfo() const; - uint32_t getCapMetricStreamer() const; + bool getCapMetricStreamer() const; virtual std::unique_ptr createDeviceContext(); size_t getNumberOfEngineGroups(void) const; @@ -52,7 +52,6 @@ class VPUDevice { protected: VPUHwInfo hwInfo = {}; std::vector groupsInfo = {}; - uint32_t capMetricStreamer = 0u; private: std::string devnode; diff --git a/umd/vpu_driver/source/device/vpu_device_context.cpp b/umd/vpu_driver/source/device/vpu_device_context.cpp index ac8c70f..659eee4 100644 --- a/umd/vpu_driver/source/device/vpu_device_context.cpp +++ b/umd/vpu_driver/source/device/vpu_device_context.cpp @@ -14,11 +14,11 @@ #include "vpu_driver/source/memory/vpu_buffer_object.hpp" #include "vpu_driver/source/utilities/log.hpp" -#include +#include #include -#include -#include #include +#include +#include namespace VPU { @@ -28,9 +28,44 @@ VPUDeviceContext::VPUDeviceContext(std::unique_ptr drvApi, VPUHwIn LOG_I("VPUDeviceContext is created"); } -VPUBufferObject *VPUDeviceContext::createBufferObject(const size_t size, - const VPUBufferObject::Type type, - const VPUBufferObject::Location loc) { +VPUBufferObject::Type convertDmaToShaveRange(VPUBufferObject::Type type) { + switch (type) { + case VPUBufferObject::Type::WriteCombineDma: + return VPUBufferObject::Type::WriteCombineShave; + case VPUBufferObject::Type::UncachedDma: + return VPUBufferObject::Type::UncachedShave; + case VPUBufferObject::Type::CachedDma: + return VPUBufferObject::Type::CachedShave; + default: + break; + } + return type; +} + +VPUBufferObject *VPUDeviceContext::importBufferObject(VPUBufferObject::Location type, int32_t fd) { + std::unique_ptr bo = VPUBufferObject::importFromFd(*drvApi, type, fd); + if (bo == nullptr) { + LOG_E("Failed to import VPUBufferObject from file descriptor"); + return nullptr; + } + void *ptr = bo->getBasePointer(); + + const std::lock_guard lock(mtx); + auto [it, success] = trackedBuffers.try_emplace(ptr, std::move(bo)); + if (!success) { + LOG_E("Failed to add buffer object to trackedBuffers"); + return nullptr; + } + LOG_I("Buffer object %p successfully imported and added to trackedBuffers", &it->second); + return it->second.get(); +} + +VPUBufferObject *VPUDeviceContext::createBufferObject(size_t size, + VPUBufferObject::Type type, + VPUBufferObject::Location loc) { + if (!hwInfo->dmaMemoryRangeCapability && (static_cast(type) & DRM_IVPU_BO_DMA_MEM)) + type = convertDmaToShaveRange(type); + std::unique_ptr bo = VPUBufferObject::create(*drvApi, loc, type, size); if (bo == nullptr) { LOG_E("Failed to create VPUBufferObject"); @@ -66,6 +101,8 @@ bool VPUDeviceContext::freeMemAlloc(void *ptr) { return false; } + bo->allowDeleteExternalHandle(); + return freeMemAlloc(bo); } @@ -125,8 +162,8 @@ VPUBufferObject *VPUDeviceContext::createInternalBufferObject(size_t size, return nullptr; } - if (!(range == VPUBufferObject::Type::UncachedHigh || - range == VPUBufferObject::Type::UncachedLow)) { + if (!(range == VPUBufferObject::Type::UncachedShave || + range == VPUBufferObject::Type::UncachedFw)) { memset(bo->getBasePointer(), 0, bo->getAllocSize()); } @@ -156,18 +193,36 @@ bool VPUDeviceContext::submitCommandBuffer(const VPUCommandBuffer *cmdBuffer) { execParam.buffers_ptr = reinterpret_cast(cmdBuffer->getBufferHandles().data()); execParam.buffer_count = safe_cast(cmdBuffer->getBufferHandles().size()); execParam.engine = cmdBuffer->getEngine(); + execParam.priority = static_cast(cmdBuffer->getPriority()); - LOG_I("Buffer type: %s.", cmdBuffer->getName()); - LOG_I("Exec engine: %u, flags: %u, commands_offset: %u, buffer_count: %u, buffers_ptr: %#llx", + LOG_I("Submit buffer type: %s.", cmdBuffer->getName()); + LOG_I("Submit params -> engine: %u, flags: %u, offset: %u, count: %u, ptr: %#llx, prior: %u", execParam.engine, execParam.flags, execParam.commands_offset, execParam.buffer_count, - execParam.buffers_ptr); + execParam.buffers_ptr, + execParam.priority); + + constexpr auto pollTime = std::chrono::seconds(2); + const auto timeoutPoint = std::chrono::steady_clock::now() + pollTime; + while (drvApi->submitCommandBuffer(&execParam) < 0) { + /* + * SUBMIT ioctl returns EBUSY if command queue is full. Driver should wait till firmware + * completes a job and make a space for new job in queue. Polling time is set to 2 seconds + * to match with TDR timeout. + */ + if (errno != EBUSY) { + LOG_E("Failed to submit %s command buffer: %p", cmdBuffer->getName(), cmdBuffer); + return false; + } - if (drvApi->submitCommandBuffer(&execParam) < 0) { - LOG_E("Failed to submit %s command buffer: %p", cmdBuffer->getName(), cmdBuffer); - return false; + if (std::chrono::steady_clock::now() > timeoutPoint) { + LOG_E("Timed out waiting for driver to submit a job"); + return false; + } + + std::this_thread::sleep_for(std::chrono::microseconds(100)); } return true; } @@ -216,14 +271,12 @@ void VPUDeviceContext::printCopyDescriptor(void *desc, vpu_cmd_header_t *cmd) { } bool VPUDeviceContext::getUniqueInferenceId(uint64_t &inferenceId) { - struct drm_ivpu_param deviceParameter = {}; - deviceParameter.param = DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID; - if (drvApi->getDeviceParam(&deviceParameter)) { - LOG_E("Failed to get inference ID"); + try { + inferenceId = drvApi->getDeviceParam(DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID); + } catch (const std::exception &err) { + LOG_E("Failed to get unique inference id, error: %s", err.what()); return false; } - - inferenceId = deviceParameter.value; return true; } diff --git a/umd/vpu_driver/source/device/vpu_device_context.hpp b/umd/vpu_driver/source/device/vpu_device_context.hpp index 9876bbf..7ef64db 100644 --- a/umd/vpu_driver/source/device/vpu_device_context.hpp +++ b/umd/vpu_driver/source/device/vpu_device_context.hpp @@ -36,19 +36,19 @@ class VPUDeviceContext { inline void * createHostMemAlloc(size_t size, - VPUBufferObject::Type type = VPUBufferObject::Type::CachedHigh) { + VPUBufferObject::Type type = VPUBufferObject::Type::CachedShave) { return createMemAlloc(size, type, VPUBufferObject::Location::Host); }; inline void * createDeviceMemAlloc(size_t size, - VPUBufferObject::Type type = VPUBufferObject::Type::WriteCombineLow) { + VPUBufferObject::Type type = VPUBufferObject::Type::WriteCombineFw) { return createMemAlloc(size, type, VPUBufferObject::Location::Device); }; inline void * createSharedMemAlloc(size_t size, - VPUBufferObject::Type type = VPUBufferObject::Type::CachedLow) { + VPUBufferObject::Type type = VPUBufferObject::Type::CachedFw) { return createMemAlloc(size, type, VPUBufferObject::Location::Shared); }; @@ -96,7 +96,7 @@ class VPUDeviceContext { @return pointer to buffer object */ VPUBufferObject *createInternalBufferObject(size_t size, VPUBufferObject::Type type); - + VPUBufferObject *importBufferObject(VPUBufferObject::Location type, int32_t fd); int getFd() const { return drvApi->getFd(); } /** @@ -117,7 +117,11 @@ class VPUDeviceContext { /** * Return the lowest VPU address from VPU low range that is accessible by firmware device */ - uint64_t getVPULowBaseAddress() const { return hwInfo->baseLowAddres; } + uint64_t getVPULowBaseAddress() const { return hwInfo->baseLowAddress; } + + uint32_t getExtraDmaDescriptorSize() const { return hwInfo->extraDmaDescriptorSize; } + + uint64_t getFwMappedInferenceVersion() const { return hwInfo->fwMappedInferenceVersion; } /** * Return number of currently tracking buffer objects in the structure @@ -153,9 +157,9 @@ class VPUDeviceContext { @param location memory type being identified @return pointer to VPUBufferObject, on failure return nullptr */ - VPUBufferObject *createBufferObject(const size_t size, - const VPUBufferObject::Type range, - const VPUBufferObject::Location location); + VPUBufferObject *createBufferObject(size_t size, + VPUBufferObject::Type range, + VPUBufferObject::Location location); bool submitCommandBuffer(const VPUCommandBuffer *cmdBuffer); diff --git a/umd/vpu_driver/source/memory/vpu_buffer_object.cpp b/umd/vpu_driver/source/memory/vpu_buffer_object.cpp index 1bfb345..2cb7d9b 100644 --- a/umd/vpu_driver/source/memory/vpu_buffer_object.cpp +++ b/umd/vpu_driver/source/memory/vpu_buffer_object.cpp @@ -35,6 +35,12 @@ VPUBufferObject::~VPUBufferObject() { LOG_E("Failed to unmap handle %d", handle); } + /* The exportable buffers are managed by user space, driver never closes + * handle without direct api call from user space. + */ + if (static_cast(location) & externalMemMask) + return; + if (drvApi.closeBuffer(handle) != 0) { LOG_E("Failed to close handle %d", handle); } @@ -67,6 +73,42 @@ VPUBufferObject::create(const VPUDriverApi &drvApi, Location type, Type range, s return std::make_unique(drvApi, type, range, ptr, size, handle, vpuAddr); } +std::unique_ptr +VPUBufferObject::importFromFd(const VPUDriverApi &drvApi, Location type, int32_t fd) { + uint32_t handle = 0; + + int32_t err = drvApi.importBuffer(fd, 0, handle); + if (err != 0) { + LOG_E("Buffer import failed, system error code = %d", err); + return nullptr; + } + /* Kernel doesn't track how many imports was done from single descriptor, + * if there would be situation that user-space imports the same memory object twice + * on the same DRM file description, the same GEM handle would be returned by both imports, + * and user-space needs to ensure * &DRM_IOCTL_GEM_CLOSE is performed once only. + * Here in case of fail we can't invoke close operation, this instance doesn't know how many + * instances of handle is in use. + */ + void *ptr = nullptr; + uint64_t offset = 0, size = 0, vpuAddr = 0; + uint32_t flags = 0; + + if (drvApi.getExtBufferInfo(handle, flags, vpuAddr, size, offset)) { + LOG_E("Failed to get info about buffer"); + return nullptr; + } + /* Flags are ignored, range is set always to Type::ExternalMemory */ + Type range = Type::ImportedMemory; + + ptr = drvApi.mmap(size, safe_cast(offset)); + if (ptr == nullptr) { + LOG_E("Failed to mmap the imported buffer"); + return nullptr; + } + + return std::make_unique(drvApi, type, range, ptr, size, handle, vpuAddr); +} + bool VPUBufferObject::copyToBuffer(const void *data, size_t size, uint64_t offset) { if (offset > allocSize) { LOG_E("Invalid offset value"); @@ -120,4 +162,19 @@ bool VPUBufferObject::fillBuffer(const void *pattern, size_t patternSize) { return true; } +bool VPUBufferObject::exportToFd(int32_t &fd) { + if ((static_cast(getLocation()) & VPUBufferObject::externalMemMask) != + VPUBufferObject::externalMemMask) { + LOG_E("Buffer should be created as exportable"); + return false; + } + + int32_t err = drvApi.exportBuffer(handle, O_RDWR, fd); + if (err != 0) { + LOG_E("Buffer export failed, system error code = %d", err); + return false; + } + return true; +} + } // namespace VPU diff --git a/umd/vpu_driver/source/memory/vpu_buffer_object.hpp b/umd/vpu_driver/source/memory/vpu_buffer_object.hpp index 57e362c..f994312 100644 --- a/umd/vpu_driver/source/memory/vpu_buffer_object.hpp +++ b/umd/vpu_driver/source/memory/vpu_buffer_object.hpp @@ -24,19 +24,38 @@ namespace VPU { class VPUBufferObject { public: enum class Type { - CachedLow = DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE, - CachedHigh = DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_HIGH_MEM, - UncachedLow = DRM_IVPU_BO_UNCACHED, - UncachedHigh = DRM_IVPU_BO_UNCACHED | DRM_IVPU_BO_HIGH_MEM, - WriteCombineLow = DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE, - WriteCombineHigh = DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_HIGH_MEM, + CachedFw = DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE, + CachedShave = DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_HIGH_MEM, + CachedDma = DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_DMA_MEM, + UncachedFw = DRM_IVPU_BO_UNCACHED, + UncachedShave = DRM_IVPU_BO_UNCACHED | DRM_IVPU_BO_HIGH_MEM, + UncachedDma = DRM_IVPU_BO_UNCACHED | DRM_IVPU_BO_DMA_MEM, + WriteCombineFw = DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE, + WriteCombineShave = DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_HIGH_MEM, + WriteCombineDma = DRM_IVPU_BO_WC | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_DMA_MEM, + ImportedMemory = 0, + }; + const uint32_t externalMemMask = 0x8000; + enum class Location { + Internal = 0x1, + Host = 0x2, + Device = 0x4, + Shared = 0x8, + ExternalHost = 0x8002, + ExternalDevice = 0x8004, + ExternalShared = 0x8008, }; - - enum class Location { Internal, Host, Device, Shared }; static std::unique_ptr create(const VPUDriverApi &drvApi, Location type, Type range, size_t size); + /** + * @brief Import Buffer from file descriptor + * + */ + static std::unique_ptr + importFromFd(const VPUDriverApi &drvApi, Location type, int32_t fd); + VPUBufferObject(const VPUDriverApi &drvApi, Location memoryType, Type range, @@ -61,6 +80,22 @@ class VPUBufferObject { */ Type getType() const { return type; } + void allowDeleteExternalHandle() { + switch (location) { + case Location::ExternalHost: + location = Location::Host; + break; + case Location::ExternalDevice: + location = Location::Device; + break; + case Location::ExternalShared: + location = Location::Shared; + break; + default: + break; + } + } + /** Returns memory size of the buffer object. */ @@ -109,6 +144,12 @@ class VPUBufferObject { */ bool fillBuffer(const void *pattern, size_t patternSize); + /** + * @brief Export Buffer to file descriptor + * + */ + bool exportToFd(int32_t &fd); + private: const VPUDriverApi &drvApi; Location location; diff --git a/umd/vpu_driver/source/os_interface/os_interface.hpp b/umd/vpu_driver/source/os_interface/os_interface.hpp index 13ea0e8..7a97077 100644 --- a/umd/vpu_driver/source/os_interface/os_interface.hpp +++ b/umd/vpu_driver/source/os_interface/os_interface.hpp @@ -30,8 +30,6 @@ class OsInterface { virtual int osiFcntl(int fd, int cmd) = 0; virtual int osiIoctl(int fd, unsigned long request, void *arg) = 0; - virtual void *osiAlloc(size_t size) = 0; - virtual int osiFree(void *ptr) = 0; virtual size_t osiGetSystemPageSize() = 0; virtual void *osiMmap(void *addr, size_t size, int prot, int flags, int fd, off_t offset) = 0; diff --git a/umd/vpu_driver/source/os_interface/os_interface_imp.cpp b/umd/vpu_driver/source/os_interface/os_interface_imp.cpp index cfb18fd..8e353d7 100644 --- a/umd/vpu_driver/source/os_interface/os_interface_imp.cpp +++ b/umd/vpu_driver/source/os_interface/os_interface_imp.cpp @@ -64,20 +64,6 @@ int OsInterfaceImp::osiIoctl(int fd, unsigned long request, void *args) { return ioctl(fd, request, args); } -void *OsInterfaceImp::osiAlloc(size_t size) { - void *ptr; - - if (posix_memalign(&ptr, osiGetSystemPageSize(), size)) - return nullptr; - - return ptr; -} - -int OsInterfaceImp::osiFree(void *ptr) { - free(ptr); - return 0; -} - size_t OsInterfaceImp::osiGetSystemPageSize() { return safe_cast(sysconf(_SC_PAGESIZE)); } diff --git a/umd/vpu_driver/source/os_interface/os_interface_imp.hpp b/umd/vpu_driver/source/os_interface/os_interface_imp.hpp index 3d1728a..cb0b935 100644 --- a/umd/vpu_driver/source/os_interface/os_interface_imp.hpp +++ b/umd/vpu_driver/source/os_interface/os_interface_imp.hpp @@ -23,8 +23,6 @@ class OsInterfaceImp : public OsInterface { int osiFcntl(int fd, int cmd) override; int osiIoctl(int fd, unsigned long request, void *arg) override; - void *osiAlloc(size_t size) override; - int osiFree(void *ptr) override; size_t osiGetSystemPageSize() override; void *osiMmap(void *addr, size_t size, int prot, int flags, int fd, off_t offset) override; diff --git a/umd/vpu_driver/source/os_interface/vpu_device_factory.cpp b/umd/vpu_driver/source/os_interface/vpu_device_factory.cpp index 7a20730..5905719 100644 --- a/umd/vpu_driver/source/os_interface/vpu_device_factory.cpp +++ b/umd/vpu_driver/source/os_interface/vpu_device_factory.cpp @@ -16,27 +16,19 @@ namespace VPU { -std::vector> DeviceFactory::createDevices(OsInterface *osi) { +std::vector> DeviceFactory::createDevices(OsInterface *osi, + bool enableMetrics) { std::vector> devices; - std::string devPrefix; std::string devPath; - std::error_code ec; - int maxMinor; - int minMinor; - - if (std::filesystem::exists("/sys/class/accel", ec)) { - devPrefix = "/dev/accel/accel"; - minMinor = 0; - } else { - devPrefix = "/dev/dri/renderD"; - minMinor = 128; - } - maxMinor = minMinor + 63; + + constexpr std::string_view devPrefix = "/dev/accel/accel"; + int minMinor = 0; + int maxMinor = minMinor + 63; for (int minor = minMinor; minor <= maxMinor; minor++) { - devPath = devPrefix + std::to_string(minor); + devPath = std::string(devPrefix) + std::to_string(minor); auto device = std::make_unique(devPath, *osi); - if (!device->init()) { + if (!device->init(enableMetrics)) { continue; } devices.push_back(std::move(device)); diff --git a/umd/vpu_driver/source/os_interface/vpu_device_factory.hpp b/umd/vpu_driver/source/os_interface/vpu_device_factory.hpp index 4736319..53fecf4 100644 --- a/umd/vpu_driver/source/os_interface/vpu_device_factory.hpp +++ b/umd/vpu_driver/source/os_interface/vpu_device_factory.hpp @@ -16,7 +16,8 @@ namespace VPU { class DeviceFactory { public: - static std::vector> createDevices(OsInterface *osi); + static std::vector> createDevices(OsInterface *osi, + bool enableMetrics); }; } // namespace VPU diff --git a/umd/vpu_driver/source/os_interface/vpu_driver_api.cpp b/umd/vpu_driver/source/os_interface/vpu_driver_api.cpp index e41b2d0..84a6354 100644 --- a/umd/vpu_driver/source/os_interface/vpu_driver_api.cpp +++ b/umd/vpu_driver/source/os_interface/vpu_driver_api.cpp @@ -5,17 +5,14 @@ * */ -#include "vpu_driver/source/utilities/log.hpp" #include "vpu_driver/source/os_interface/vpu_driver_api.hpp" #include -#include +#include #include #include #include -#include #include -#include namespace VPU { @@ -127,8 +124,22 @@ int VPUDriverApi::submitCommandBuffer(drm_ivpu_submit *arg) const { return doIoctl(DRM_IOCTL_IVPU_SUBMIT, arg); } -int VPUDriverApi::getDeviceParam(drm_ivpu_param *arg) const { - return doIoctl(DRM_IOCTL_IVPU_GET_PARAM, arg); +bool VPUDriverApi::checkDeviceCapability(uint32_t index) const { + struct drm_ivpu_param arg = {}; + arg.param = DRM_IVPU_PARAM_CAPABILITIES; + arg.index = index; + if (doIoctl(DRM_IOCTL_IVPU_GET_PARAM, &arg)) { + LOG_W("Capability does not exist, index: %#x, errno: %d", index, errno); + return false; + } + + if (arg.value == 0) { + LOG_W("Capability from index: %#x is not set", index); + return false; + } + + LOG_I("Capability from index: %#x is set", index); + return true; } bool VPUDriverApi::checkDeviceStatus() const { @@ -148,14 +159,6 @@ int VPUDriverApi::closeBuffer(uint32_t handle) const { return doIoctl(DRM_IOCTL_GEM_CLOSE, &args); } -void *VPUDriverApi::alloc(size_t size) const { - return osInfc.osiAlloc(size); -} - -int VPUDriverApi::free(void *ptr) const { - return osInfc.osiFree(ptr); -} - int VPUDriverApi::createBuffer(size_t size, uint32_t flags, uint32_t &handle, @@ -186,7 +189,6 @@ int VPUDriverApi::getBufferInfo(uint32_t handle, uint64_t &mmap_offset) const { int ret = doIoctl(DRM_IOCTL_IVPU_BO_INFO, &args); if (ret) { LOG_E("Failed to call DRM_IOCTL_IVPU_BO_INFO"); - closeBuffer(handle); return ret; } @@ -194,6 +196,53 @@ int VPUDriverApi::getBufferInfo(uint32_t handle, uint64_t &mmap_offset) const { return ret; } +int VPUDriverApi::getExtBufferInfo(uint32_t handle, + uint32_t &flags, + uint64_t &vpu_address, + uint64_t &size, + uint64_t &mmap_offset) const { + drm_ivpu_bo_info args = {}; + args.handle = handle; + + int ret = doIoctl(DRM_IOCTL_IVPU_BO_INFO, &args); + if (ret) { + LOG_E("Failed to call DRM_IOCTL_IVPU_BO_INFO"); + return ret; + } + + flags = args.flags; + vpu_address = args.vpu_addr; + size = args.size; + mmap_offset = args.mmap_offset; + return ret; +} + +int VPUDriverApi::exportBuffer(uint32_t handle, uint32_t flags, int32_t &fd) const { + drm_prime_handle args = {.handle = handle, .flags = flags, .fd = -1}; + + int ret = doIoctl(DRM_IOCTL_PRIME_HANDLE_TO_FD, &args); + if (ret) { + LOG_E("Failed to call DRM_IOCTL_PRIME_HANDLE_TO_FD"); + return ret; + } + + fd = args.fd; + return ret; +} + +int VPUDriverApi::importBuffer(int32_t fd, uint32_t flags, uint32_t &handle) const { + drm_prime_handle args = {.handle = 0, .flags = flags, .fd = fd}; + + int ret = doIoctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &args); + if (ret) { + LOG_E("Failed to call DRM_IOCTL_PRIME_FD_TO_HANDLE"); + return ret; + } + + handle = args.handle; + return ret; +} + void *VPUDriverApi::mmap(size_t size, off_t offset) const { void *ptr = osInfc.osiMmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, vpuFd, offset); if (ptr == MAP_FAILED) { diff --git a/umd/vpu_driver/source/os_interface/vpu_driver_api.hpp b/umd/vpu_driver/source/os_interface/vpu_driver_api.hpp index 435fb4e..c72d003 100644 --- a/umd/vpu_driver/source/os_interface/vpu_driver_api.hpp +++ b/umd/vpu_driver/source/os_interface/vpu_driver_api.hpp @@ -7,6 +7,8 @@ #pragma once +#include "umd_common.hpp" +#include "vpu_driver/source/utilities/log.hpp" #include "vpu_driver/source/os_interface/os_interface_imp.hpp" #include @@ -39,18 +41,22 @@ class VPUDriverApi final { int getFd() const { return vpuFd; } bool isVpuDevice() const; int submitCommandBuffer(drm_ivpu_submit *arg) const; - int getDeviceParam(drm_ivpu_param *arg) const; bool checkDeviceStatus() const; + bool checkDeviceCapability(uint32_t index) const; size_t getPageSize() const; - void *alloc(size_t size) const; - int free(void *ptr) const; - int wait(void *args) const; int closeBuffer(uint32_t handle) const; int createBuffer(size_t size, uint32_t flags, uint32_t &handle, uint64_t &vpuAddr) const; int getBufferInfo(uint32_t handle, uint64_t &mmap_offset) const; + int getExtBufferInfo(uint32_t handle, + uint32_t &flags, + uint64_t &vpu_address, + uint64_t &size, + uint64_t &mmap_offset) const; + int exportBuffer(uint32_t handle, uint32_t flags, int32_t &fd) const; + int importBuffer(int32_t fd, uint32_t flags, uint32_t &handle) const; void *mmap(size_t size, off_t offset) const; int unmap(void *ptr, size_t size) const; @@ -59,6 +65,20 @@ class VPUDriverApi final { int metricStreamerGetData(drm_ivpu_metric_streamer_get_data *data) const; int metricStreamerGetInfo(drm_ivpu_metric_streamer_get_data *data) const; + template + T getDeviceParam(uint32_t param, uint32_t index = 0) const { + struct drm_ivpu_param arg = {}; + arg.param = param; + arg.index = index; + if (doIoctl(DRM_IOCTL_IVPU_GET_PARAM, &arg)) { + LOG_E("Failed to read device param, param: %#x, errno: %d", param, errno); + throw std::runtime_error("Failed to get device param"); + } + if constexpr (std::is_same_v) + return arg.value; + return safe_cast(arg.value); + } + private: bool openDevice(); bool closeDevice(); diff --git a/umd/vpu_driver/source/utilities/log.cpp b/umd/vpu_driver/source/utilities/log.cpp index 88bd77e..732e2c6 100644 --- a/umd/vpu_driver/source/utilities/log.cpp +++ b/umd/vpu_driver/source/utilities/log.cpp @@ -63,16 +63,16 @@ void setLogLevel(LogLevel level) { LOG_W("Invalid log level(%d) keeping current level(%d)\n", level, curLogLevel); } -void setLogLevel(std::string_view &str) { - if (str == "QUIET") { +void setLogLevel(std::string_view str) { + if (str == "QUIET" || str == "quiet") { setLogLevel(QUIET); - } else if (str == "ERROR") { + } else if (str == "ERROR" || str == "error") { setLogLevel(ERROR); - } else if (str == "WARNING") { + } else if (str == "WARNING" || str == "warning") { setLogLevel(WARNING); - } else if (str == "INFO") { + } else if (str == "INFO" || str == "info") { setLogLevel(INFO); - } else if (str == "VERBOSE") { + } else if (str == "VERBOSE" || str == "verbose") { setLogLevel(VERBOSE); } } diff --git a/umd/vpu_driver/source/utilities/log.hpp b/umd/vpu_driver/source/utilities/log.hpp index 75548e7..56d6e92 100644 --- a/umd/vpu_driver/source/utilities/log.hpp +++ b/umd/vpu_driver/source/utilities/log.hpp @@ -27,7 +27,7 @@ void printLog(LogLevel debugLevel, ...) __attribute__((format(printf, 5, 6))); void setLogLevel(LogLevel level); -void setLogLevel(std::string_view &str); +void setLogLevel(std::string_view str); LogLevel getLogLevel(); diff --git a/umd/vpu_driver/source/utilities/timer.cpp b/umd/vpu_driver/source/utilities/timer.cpp index a557178..54e96d2 100644 --- a/umd/vpu_driver/source/utilities/timer.cpp +++ b/umd/vpu_driver/source/utilities/timer.cpp @@ -5,24 +5,16 @@ * */ -#include "umd_common.hpp" - -#include "vpu_driver/source/device/hw_info.hpp" #include "vpu_driver/source/utilities/log.hpp" #include "vpu_driver/source/utilities/timer.hpp" -#include "vpu_driver/source/command/vpu_job.hpp" -#include "vpu_driver/source/command/vpu_event_command.hpp" - -#include -#include -#include -#include -#include -#include -#include namespace VPU { +std::chrono::steady_clock::time_point getAbsoluteTimePoint(uint64_t userTimeout) { + int64_t absTimeout = getAbsoluteTimeoutNanoseconds(userTimeout); + return std::chrono::steady_clock::time_point(std::chrono::nanoseconds(absTimeout)); +} + int64_t getAbsoluteTimeoutNanoseconds(uint64_t userTimeout) { int64_t timeout = userTimeout > INT64_MAX ? INT64_MAX : static_cast(userTimeout); @@ -43,12 +35,4 @@ int64_t getAbsoluteTimeoutNanoseconds(uint64_t userTimeout) { return timeout_abs_ns; } -bool waitForSignal(uint64_t userTimeout, const std::vector> &jobs) { - int64_t absoluteTimeout = getAbsoluteTimeoutNanoseconds(userTimeout); - for (auto const &job : jobs) - if (!job->waitForCompletion(absoluteTimeout)) - return false; - return true; -} - } // namespace VPU diff --git a/umd/vpu_driver/source/utilities/timer.hpp b/umd/vpu_driver/source/utilities/timer.hpp index e407039..a0608dc 100644 --- a/umd/vpu_driver/source/utilities/timer.hpp +++ b/umd/vpu_driver/source/utilities/timer.hpp @@ -7,29 +7,13 @@ #pragma once -#include -#include -#include -#include - -#include "vpu_driver/source/command/vpu_job.hpp" -#include "vpu_driver/source/command/vpu_event_command.hpp" -#include "vpu_driver/source/device/vpu_device.hpp" +#include +#include namespace VPU { -/** - * @brief Wait for given timeout until the jobs are all completed by KMD. - * - * @param timeout [in] A time out value in nano sec. Give max value for not time bound wait. - * @param jobs [in] Vector for submitted command buffers. - * @return true All jobs are completed within given time. - * @return false Otherwise. - */ -bool waitForSignal(uint64_t timeout, const std::vector> &jobs); +std::chrono::steady_clock::time_point getAbsoluteTimePoint(uint64_t userTimeout); -/** - * @brief Return the absolute timeout to user timeout that is passed in nanoseconds. - */ int64_t getAbsoluteTimeoutNanoseconds(uint64_t userTimeout); + } // namespace VPU diff --git a/umd/vpu_driver/unit_tests/job_submission/job_test.cpp b/umd/vpu_driver/unit_tests/job_submission/job_test.cpp index dad338b..28bad64 100644 --- a/umd/vpu_driver/unit_tests/job_submission/job_test.cpp +++ b/umd/vpu_driver/unit_tests/job_submission/job_test.cpp @@ -195,7 +195,7 @@ TEST_F(VPUJobTest, createJobWithCopyCommandsforCopyEngine) { TEST_F(VPUJobTest, createJobWithDifferentTypesOfCommandExpectSuccess) { VPUBufferObject *event = ctx->createInternalBufferObject(sizeof(VPUEventCommand::KMDEventDataType), - VPU::VPUBufferObject::Type::CachedLow); + VPU::VPUBufferObject::Type::CachedFw); ASSERT_TRUE(event); uint64_t *tsHeap = reinterpret_cast(ctx->createSharedMemAlloc(sizeof(uint64_t))); ASSERT_NE(tsHeap, nullptr); diff --git a/umd/vpu_driver/unit_tests/job_submission/vpu_command_test.cpp b/umd/vpu_driver/unit_tests/job_submission/vpu_command_test.cpp index 589c8f7..6d2c186 100644 --- a/umd/vpu_driver/unit_tests/job_submission/vpu_command_test.cpp +++ b/umd/vpu_driver/unit_tests/job_submission/vpu_command_test.cpp @@ -285,7 +285,7 @@ struct VPUEventCommandTest : public VPUCommandTest { void SetUp() { VPUCommandTest::SetUp(); - eventBuffer = ctx->createInternalBufferObject(4096, VPUBufferObject::Type::CachedLow); + eventBuffer = ctx->createInternalBufferObject(4096, VPUBufferObject::Type::CachedFw); ASSERT_TRUE(eventBuffer); cmdBufferHeader.fence_heap_base_address = ctx->getVPULowBaseAddress(); @@ -320,7 +320,7 @@ TEST_F(VPUEventCommandTest, eventWaitCommandsShouldReturnExpectedProperties) { EXPECT_EQ(VPU_CMD_FENCE_WAIT, actual->header.type); EXPECT_EQ(sizeof(vpu_cmd_fence_t), actual->header.size); EXPECT_EQ(0u, actual->offset); - EXPECT_EQ(VPUEventCommand::STATE_DEVICE_SIGNAL, actual->value); + EXPECT_EQ(VPUEventCommand::STATE_WAIT, actual->value); // 64bits offsetted event wait command. VPUEventCommand::KMDEventDataType *offsetEventHeapPtr = eventHeapPtr + 1; @@ -332,7 +332,7 @@ TEST_F(VPUEventCommandTest, eventWaitCommandsShouldReturnExpectedProperties) { EXPECT_EQ(VPU_CMD_FENCE_WAIT, actual->header.type); EXPECT_EQ(sizeof(vpu_cmd_fence_t), actual->header.size); EXPECT_EQ(8u, actual->offset); - EXPECT_EQ(VPUEventCommand::STATE_DEVICE_SIGNAL, actual->value); + EXPECT_EQ(VPUEventCommand::STATE_WAIT, actual->value); } TEST_F(VPUEventCommandTest, eventSignalCommandsShouldReturnExpectedProperties) { diff --git a/umd/vpu_driver/unit_tests/main.cpp b/umd/vpu_driver/unit_tests/main.cpp index b7c4968..9255597 100644 --- a/umd/vpu_driver/unit_tests/main.cpp +++ b/umd/vpu_driver/unit_tests/main.cpp @@ -7,10 +7,46 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "vpu_driver/unit_tests/options.hpp" +#include "vpu_driver/source/utilities/log.hpp" + +#include + +static void printHelpMessage() { + constexpr auto helpMessage = + "\n" + "Extra test options:\n" + " -v/--verbose Set log level to verbose\n" + " -l/--loglevel Set log level - error, warning, info, verbose\n" + "\n"; + + printf("%s\n", helpMessage); +} + +static bool parseOptions(int argc, char **argv) { + static struct option longOptions[] = {{"verbose", no_argument, 0, 'v'}, + {"loglevel", required_argument, 0, 'l'}, + {}}; + + int opt; + while ((opt = getopt_long(argc, argv, "vl:b:m:", longOptions, nullptr)) != -1) { + switch (opt) { + case 'v': + VPU::setLogLevel(VERBOSE); + break; + case 'l': + VPU::setLogLevel(optarg); + break; + default: + printHelpMessage(); + return false; + } + } + return true; +} int main(int argc, char **argv) { ::testing::InitGoogleMock(&argc, argv); - UnitTestOptions::parseLogOptions(argc, argv); + if (!parseOptions(argc, argv)) + return 1; return RUN_ALL_TESTS(); } diff --git a/umd/vpu_driver/unit_tests/memory/buffer_object_test.cpp b/umd/vpu_driver/unit_tests/memory/buffer_object_test.cpp index 4f890e4..f74ad36 100644 --- a/umd/vpu_driver/unit_tests/memory/buffer_object_test.cpp +++ b/umd/vpu_driver/unit_tests/memory/buffer_object_test.cpp @@ -28,14 +28,14 @@ struct VPUBufferObjectTest : public ::testing::Test { TEST_F(VPUBufferObjectTest, createBufferObject) { EXPECT_TRUE(VPUBufferObject::create(ctx->getDriverApi(), VPUBufferObject::Location::Host, - VPUBufferObject::Type::CachedLow, + VPUBufferObject::Type::CachedFw, 4096) != nullptr); } TEST_F(VPUBufferObjectTest, supportRangeTestForGivenArgument) { auto bo = VPUBufferObject::create(ctx->getDriverApi(), VPUBufferObject::Location::Host, - VPUBufferObject::Type::CachedLow, + VPUBufferObject::Type::CachedFw, 4096); uint8_t *ptr = bo->getBasePointer(); EXPECT_TRUE(bo->isInRange(ptr)); @@ -53,7 +53,7 @@ TEST_F(VPUBufferObjectTest, copyToBufferMethodExpectSuccess) { auto bo = VPUBufferObject::create(ctx->getDriverApi(), VPUBufferObject::Location::Host, - VPUBufferObject::Type::CachedLow, + VPUBufferObject::Type::CachedFw, data.size()); EXPECT_TRUE(bo->copyToBuffer(data.data(), data.size(), 0)); @@ -66,7 +66,7 @@ TEST_F(VPUBufferObjectTest, copyToBufferOutsideRangeExpectFailure) { auto bo = VPUBufferObject::create(ctx->getDriverApi(), VPUBufferObject::Location::Host, - VPUBufferObject::Type::CachedLow, + VPUBufferObject::Type::CachedFw, size); EXPECT_FALSE(bo->copyToBuffer(data.data(), data.size(), size / 2)); } @@ -77,7 +77,7 @@ TEST_F(VPUBufferObjectTest, copyToBufferGreaterThenBufferSizeExpectFailure) { auto bo = VPUBufferObject::create(ctx->getDriverApi(), VPUBufferObject::Location::Host, - VPUBufferObject::Type::CachedLow, + VPUBufferObject::Type::CachedFw, size); EXPECT_FALSE(bo->copyToBuffer(data.data(), data.size(), 0)); } @@ -88,7 +88,7 @@ TEST_F(VPUBufferObjectTest, copyToBufferNullptrDataExpectFailure) { auto bo = VPUBufferObject::create(ctx->getDriverApi(), VPUBufferObject::Location::Host, - VPUBufferObject::Type::CachedLow, + VPUBufferObject::Type::CachedFw, size); EXPECT_FALSE(bo->copyToBuffer(nullptr, size, 0)); } diff --git a/umd/vpu_driver/unit_tests/mocks/gmock_os_interface_imp.hpp b/umd/vpu_driver/unit_tests/mocks/gmock_os_interface_imp.hpp index 00bcb75..87aee33 100644 --- a/umd/vpu_driver/unit_tests/mocks/gmock_os_interface_imp.hpp +++ b/umd/vpu_driver/unit_tests/mocks/gmock_os_interface_imp.hpp @@ -20,8 +20,6 @@ class GMockOsInterfaceImp : public OsInterface { MOCK_METHOD(int, osiClose, (int), (override)); MOCK_METHOD(int, osiFcntl, (int, int), (override)); MOCK_METHOD(int, osiIoctl, (int, unsigned long, void *), (override)); - MOCK_METHOD(void *, osiAlloc, (size_t), (override)); - MOCK_METHOD(int, osiFree, (void *), (override)); MOCK_METHOD(size_t, osiGetSystemPageSize, (), (override)); MOCK_METHOD(void *, osiMmap, diff --git a/umd/vpu_driver/unit_tests/mocks/mock_os_interface_imp.cpp b/umd/vpu_driver/unit_tests/mocks/mock_os_interface_imp.cpp index 3ccdaed..2814c81 100644 --- a/umd/vpu_driver/unit_tests/mocks/mock_os_interface_imp.cpp +++ b/umd/vpu_driver/unit_tests/mocks/mock_os_interface_imp.cpp @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include +#include "api/vpu_nnrt_api_37xx.h" #include "vpu_driver/source/utilities/log.hpp" #include "vpu_driver/unit_tests/mocks/mock_os_interface_imp.hpp" @@ -86,13 +88,18 @@ int MockOsInterfaceImp::osiIoctl(int fd, unsigned long request, void *data) { break; case DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID: args->value = unique_id++; + break; + case DRM_IVPU_PARAM_FW_API_VERSION: + if (args->index == VPU_NNRT_37XX_API_VER_INDEX) + args->value = VPU_NNRT_37XX_API_VER; + break; default: break; } } else if (request == DRM_IOCTL_IVPU_BO_CREATE) { if (failNextAlloc) { failNextAlloc = false; - errno = -ENOMEM; + errno = ENOMEM; return -1; } @@ -106,7 +113,7 @@ int MockOsInterfaceImp::osiIoctl(int fd, unsigned long request, void *data) { bool timeout = waitFailed.test(0); waitFailed >>= 1; if (timeout) { - errno = -ETIMEDOUT; + errno = ETIMEDOUT; return -1; } @@ -193,7 +200,11 @@ int MockOsInterfaceImp::osiIoctl(int fd, unsigned long request, void *data) { return -1; } -void *MockOsInterfaceImp::osiAlloc(size_t size) { +void * +MockOsInterfaceImp::osiMmap(void *addr, size_t size, int prot, int flags, int fd, off_t offset) { + if (offset == 0) + return nullptr; + if (failNextAlloc) { failNextAlloc = false; return nullptr; @@ -207,24 +218,12 @@ void *MockOsInterfaceImp::osiAlloc(size_t size) { return ptr; } -int MockOsInterfaceImp::osiFree(void *ptr) { +int MockOsInterfaceImp::osiMunmap(void *addr, size_t size) { callCntFree++; - free(ptr); + free(addr); return 0; } -void * -MockOsInterfaceImp::osiMmap(void *addr, size_t size, int prot, int flags, int fd, off_t offset) { - if (offset == 0) - return nullptr; - - return osiAlloc(size); -} - -int MockOsInterfaceImp::osiMunmap(void *addr, size_t size) { - return osiFree(addr); -} - size_t MockOsInterfaceImp::osiGetSystemPageSize() { return 4u * 1024u; } diff --git a/umd/vpu_driver/unit_tests/mocks/mock_os_interface_imp.hpp b/umd/vpu_driver/unit_tests/mocks/mock_os_interface_imp.hpp index bc8bb3d..434a207 100644 --- a/umd/vpu_driver/unit_tests/mocks/mock_os_interface_imp.hpp +++ b/umd/vpu_driver/unit_tests/mocks/mock_os_interface_imp.hpp @@ -46,7 +46,7 @@ class MockOsInterfaceImp : public OsInterface { int kmdIoctlRetCode = 0; - MockOsInterfaceImp(uint32_t pciDevId = vpuHwInfo37xx.supportedDeviceIds[0]); + MockOsInterfaceImp(uint32_t pciDevId = 0x7d1d); MockOsInterfaceImp(const MockOsInterfaceImp &) = delete; MockOsInterfaceImp &operator=(const MockOsInterfaceImp &) = delete; MockOsInterfaceImp(MockOsInterfaceImp &&) = delete; @@ -58,14 +58,12 @@ class MockOsInterfaceImp : public OsInterface { int osiFcntl(int fd, int cmd) override; int osiIoctl(int fd, unsigned long request, void *args) override; - void *osiAlloc(size_t size) override; - int osiFree(void *ptr) override; size_t osiGetSystemPageSize() override; void *osiMmap(void *addr, size_t size, int prot, int flags, int fd, off_t offset) override; int osiMunmap(void *addr, size_t size) override; - void mockFailNextAlloc(); // Fails next call to osiAlloc + void mockFailNextAlloc(); // Fails next call to osiMmap void mockFailNextJobWait(); void mockSuccessNextJobWait(); void mockFailNextJobStatus(); diff --git a/umd/vpu_driver/unit_tests/mocks/mock_vpu_device.cpp b/umd/vpu_driver/unit_tests/mocks/mock_vpu_device.cpp index 2735324..d24e734 100644 --- a/umd/vpu_driver/unit_tests/mocks/mock_vpu_device.cpp +++ b/umd/vpu_driver/unit_tests/mocks/mock_vpu_device.cpp @@ -22,7 +22,7 @@ MockVPUDevice::MockVPUDevice(std::string devnode, MockOsInterfaceImp &mockOSInf) std::unique_ptr MockVPUDevice::createWithDefaultHardwareInfo(MockOsInterfaceImp &mockOSInf) { auto device = std::make_unique(FAKE_TEST_DEV_NODE, mockOSInf); - if (!device->init()) + if (!device->init(true)) throw std::runtime_error("Failed to initialize MockVPUDevice"); return device; }; diff --git a/umd/vpu_driver/unit_tests/options.hpp b/umd/vpu_driver/unit_tests/options.hpp deleted file mode 100644 index 2b4e958..0000000 --- a/umd/vpu_driver/unit_tests/options.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (C) 2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once - -#include "vpu_driver/source/utilities/log.hpp" - -#include -#include -#include - -namespace UnitTestOptions { - -inline LogLevel getLogLevelFromStr(const char *str) { - static constexpr std::array, 5> logLevel = { - {{"verbose", VERBOSE}, - {"info", INFO}, - {"warning", WARNING}, - {"error", ERROR}, - {"quiet", QUIET}}}; - for (const auto &l : logLevel) - if (strncmp(l.first, str, strlen(l.first)) == 0) - return l.second; - LOG_E("Failed to set log level. Log level %s does not exist", str); - return ERROR; -} - -inline void parseLogOptions(int argc, char **argv) { - static struct option longOptions[] = {{"verbose", no_argument, 0, 'v'}, - {"loglevel", required_argument, 0, 'l'}, - {0, 0, 0, 0}}; - - int opt; - while ((opt = getopt_long(argc, argv, "vl:", longOptions, nullptr)) != -1) { - switch (opt) { - case 'v': - VPU::setLogLevel(VERBOSE); - break; - case 'l': - VPU::setLogLevel(getLogLevelFromStr(optarg)); - break; - } - } -} -} // namespace UnitTestOptions diff --git a/umd/vpu_driver/unit_tests/os_interface/vpu_device_factory_test.cpp b/umd/vpu_driver/unit_tests/os_interface/vpu_device_factory_test.cpp index 7c06038..6c79c45 100644 --- a/umd/vpu_driver/unit_tests/os_interface/vpu_device_factory_test.cpp +++ b/umd/vpu_driver/unit_tests/os_interface/vpu_device_factory_test.cpp @@ -23,6 +23,6 @@ TEST(DeviceFactoryTest, devicesDiscoveredWithUDevFuncs) { EXPECT_CALL(gmockInfc, osiIoctl).Times(64); // Devices vector will be returned upon number of discovered devices. - auto devVector = DeviceFactory::createDevices(&gmockInfc); + auto devVector = DeviceFactory::createDevices(&gmockInfc, true); EXPECT_EQ(0u, devVector.size()); } diff --git a/umd/vpu_driver/unit_tests/os_interface/vpu_driver_api_test.cpp b/umd/vpu_driver/unit_tests/os_interface/vpu_driver_api_test.cpp index 838b887..5b70789 100644 --- a/umd/vpu_driver/unit_tests/os_interface/vpu_driver_api_test.cpp +++ b/umd/vpu_driver/unit_tests/os_interface/vpu_driver_api_test.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #define FAKE_TEST_DEV_NODE "dev/node/fake" @@ -58,9 +59,9 @@ struct VPUDriverApiIoctlTest : public VPUDriverApiTest { }; TEST_F(VPUDriverApiIoctlTest, getDeviceParamIoctl) { - struct drm_ivpu_param param = {}; - param.param = DRM_IVPU_PARAM_DEVICE_ID; - EXPECT_EQ(0, driverApi->getDeviceParam(¶m)); + uint64_t deviceId = 0; + EXPECT_NO_THROW(deviceId = driverApi->getDeviceParam(DRM_IVPU_PARAM_DEVICE_ID)); + EXPECT_EQ(deviceId, 0x7d1d); EXPECT_EQ(1u, mockOsInfc.callCntIoctl); EXPECT_EQ(DRM_IOCTL_IVPU_GET_PARAM, mockOsInfc.ioctlLastCommand); @@ -80,9 +81,7 @@ TEST_F(VPUDriverApiIoctlTest, ioctlError) { struct drm_ivpu_submit exec = {}; EXPECT_EQ(-1, driverApi->submitCommandBuffer(&exec)); - struct drm_ivpu_param param = {}; - param.param = DRM_IVPU_PARAM_DEVICE_ID; - EXPECT_EQ(-1, driverApi->getDeviceParam(¶m)); + EXPECT_THROW(driverApi->getDeviceParam(DRM_IVPU_PARAM_DEVICE_ID), std::runtime_error); struct drm_ivpu_bo_wait args = {}; EXPECT_EQ(-1, driverApi->wait(&args)); diff --git a/umd/vpu_driver/unit_tests/vpu_device/device_context_test.cpp b/umd/vpu_driver/unit_tests/vpu_device/device_context_test.cpp index d3f5112..a4bb036 100644 --- a/umd/vpu_driver/unit_tests/vpu_device/device_context_test.cpp +++ b/umd/vpu_driver/unit_tests/vpu_device/device_context_test.cpp @@ -138,14 +138,14 @@ TEST_F(DeviceContextTest, freeDeviceMemoryUsingNonBasePointerExpectFail) { } TEST_F(DeviceContextTest, createAndFreeDeviceMemoryInHighRangeExpectSuccess) { - auto ptr = ctx->createSharedMemAlloc(allocSize, VPUBufferObject::Type::CachedHigh); + auto ptr = ctx->createSharedMemAlloc(allocSize, VPUBufferObject::Type::CachedShave); EXPECT_NE(nullptr, ptr); EXPECT_EQ(1u, ctx->getBuffersCount()); EXPECT_TRUE(ctx->freeMemAlloc(ptr)); } TEST_F(DeviceContextTest, createAndFreeHostMemoryInHighRangeExpectSuccess) { - auto ptr = ctx->createHostMemAlloc(allocSize, VPUBufferObject::Type::CachedHigh); + auto ptr = ctx->createHostMemAlloc(allocSize, VPUBufferObject::Type::CachedShave); EXPECT_NE(nullptr, ptr); EXPECT_EQ(1u, ctx->getBuffersCount()); EXPECT_TRUE(ctx->freeMemAlloc(ptr)); @@ -289,7 +289,7 @@ TEST_F(DeviceContextTest, ASSERT_NE(commands.back(), nullptr); EXPECT_EQ(commands.size(), 4u); - auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedLow); + auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedFw); ASSERT_NE(descBuffer, nullptr); checkOffsets(commands, descBuffer); @@ -328,7 +328,7 @@ TEST_F(DeviceContextTest, createTimestampAndCopyCommandListToCheckCommandsOffset EXPECT_EQ(commands.size(), 3u); // replicating functionality from commandqueueExecuteCommandLists - auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedLow); + auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedFw); ASSERT_NE(descBuffer, nullptr); checkOffsets(commands, descBuffer); @@ -368,7 +368,7 @@ TEST_F(DeviceContextTest, EXPECT_EQ(commands.size(), 2u); // replicating functionality from commandqueueExecuteCommandLists - auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedLow); + auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedFw); ASSERT_NE(descBuffer, nullptr); checkOffsets(commands, descBuffer); @@ -410,7 +410,7 @@ TEST_F(DeviceContextTest, createMemAndAppendCommandListNotInOrderOffsetReturnsCo EXPECT_EQ(commands.size(), 3u); // replicating functionality from commandqueueExecuteCommandLists - auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedLow); + auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedFw); ASSERT_NE(descBuffer, nullptr); checkOffsets(commands, descBuffer); @@ -465,7 +465,7 @@ TEST_F(DeviceContextTest, createMemAndAppendLargeCommandListOffsetReturnsCorrect EXPECT_EQ(commands.size(), 7u); // replicating functionality from commandqueueExecuteCommandLists - auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedLow); + auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedFw); ASSERT_NE(descBuffer, nullptr); checkOffsets(commands, descBuffer); @@ -570,7 +570,7 @@ TEST_F(DeviceContextTest, createGraphCommandOffsetsReturnsCorrectly) { EXPECT_EQ(commands.size(), 8u); - auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedLow); + auto descBuffer = ctx->createInternalBufferObject(allocSize, VPUBufferObject::Type::CachedFw); ASSERT_NE(descBuffer, nullptr); checkOffsets(commands, descBuffer); diff --git a/umd/vpu_driver/unit_tests/vpu_device/vpu_device_test.cpp b/umd/vpu_driver/unit_tests/vpu_device/vpu_device_test.cpp index 2db7e15..6e8e203 100644 --- a/umd/vpu_driver/unit_tests/vpu_device/vpu_device_test.cpp +++ b/umd/vpu_driver/unit_tests/vpu_device/vpu_device_test.cpp @@ -52,27 +52,6 @@ TEST_F(VPUDeviceTest, jobSubmissionTriggersIoctls) { EXPECT_TRUE(ctx->freeMemAlloc(static_cast(tsDest))); } -TEST_F(VPUDeviceTest, allocateMemory) { - void *memPtr1 = nullptr; - void *memPtr2 = nullptr; - size_t size = 10; - const auto &drvApi = ctx->getDriverApi(); - - // Alloc failed case. - osInfc.mockFailNextAlloc(); - memPtr1 = drvApi.alloc(size); - EXPECT_EQ(nullptr, memPtr1); - // Attempting to free invalid memory space. - EXPECT_FALSE(ctx->freeMemAlloc(memPtr1)); - - // Alloc successful. - memPtr2 = ctx->createHostMemAlloc(size, VPUBufferObject::Type::CachedLow); - EXPECT_NE(nullptr, memPtr2); - - bool unmapRes = ctx->freeMemAlloc(memPtr2); - EXPECT_TRUE(unmapRes); -} - TEST_F(VPUDeviceTest, givenCallIsConnectedReportsDeviceConnectionStatus) { // Device disconnected. osInfc.deviceConnected = false; diff --git a/validation/umd-test/CMakeLists.txt b/validation/umd-test/CMakeLists.txt index f761d62..a9074c4 100644 --- a/validation/umd-test/CMakeLists.txt +++ b/validation/umd-test/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2022 Intel Corporation +# Copyright (C) 2022-2024 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -11,17 +11,14 @@ project(vpu-umd-test) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -# umd-test will be linked against ze_loader by default but you can -# use USE_ZE_INTEL_VPU to link against the ze_intel_vpu instead -if(DEFINED USE_ZE_INTEL_VPU) - set(LEVEL_ZERO_LIB_NAME ze_intel_vpu) -else() - set(LEVEL_ZERO_LIB_NAME ze_loader) -endif() -message(STATUS "LEVEL_ZERO_LIB_NAME = ${LEVEL_ZERO_LIB_NAME}" ) +add_subdirectory(utilities) + +find_package(OpenVINO QUIET COMPONENTS Runtime) +find_package(OpenCV QUIET) add_executable(${PROJECT_NAME} blob_params.cpp + image.cpp model_params.cpp main.cpp umd_test.cpp @@ -42,9 +39,24 @@ add_executable(${PROJECT_NAME} test_eventpool.cpp test_metric.cpp test_metric_streamer.cpp + test_priority.cpp + test_prime_buffers.cpp ) +target_include_directories(${PROJECT_NAME} PRIVATE "${CMAKE_SOURCE_DIR}/linux/include/uapi") + +if (OpenVINO_FOUND) + target_sources(${PROJECT_NAME} PRIVATE test_ov_inference.cpp) + target_link_libraries(${PROJECT_NAME} openvino::runtime) + target_compile_definitions(${PROJECT_NAME} PUBLIC UMD_TESTS_USE_OPENVINO=1) +endif() + +if (OpenCV_FOUND) + target_link_libraries(${PROJECT_NAME} opencv_core opencv_imgcodecs) + target_compile_definitions(${PROJECT_NAME} PRIVATE UMD_TESTS_USE_OPENCV) +endif() + target_compile_options(${PROJECT_NAME} PRIVATE -DVPU_GTEST_APP -Wall -Wextra -Werror) -target_link_libraries(${PROJECT_NAME} ${LEVEL_ZERO_LIB_NAME} test_app_lib yaml-cpp) +target_link_libraries(${PROJECT_NAME} ze_loader test_app_lib yaml-cpp data_handle) install(TARGETS ${PROJECT_NAME} COMPONENT validation-npu) diff --git a/validation/umd-test/configs/README.md b/validation/umd-test/configs/README.md index 0450721..836cfd1 100644 --- a/validation/umd-test/configs/README.md +++ b/validation/umd-test/configs/README.md @@ -31,7 +31,7 @@ Empty configuration causes that Umd.ConfigurationCheck test fails. ## Global variables Defines directories where models, blobs and pictures are stored that are used by tests -In this section is also defined logging level, accepted values are: QUIET ERROR, WARNING, INFO, VERBOSE +In this section is also defined logging level, accepted values are: QUIET, ERROR, WARNING, INFO, VERBOSE Example: ``` @@ -44,7 +44,7 @@ image_dir: /opt/user/sample-images/ ## Section "graph\_execution" Defines list of compiled blobs used for graph execution tests from groups: -"CommandGraph\*.\* , GraphInference.\* , GraphNative.\* , InferencePerformance.\*" +"CommandGraph\*.\*, GraphInference.\*, GraphNative\*.\*, InferencePerformance.\*" Order of defining blobs is significant, simple tests takes only first blob from this section most complex executes all defined. @@ -84,17 +84,18 @@ graph_metrics: name: mobilenet-v2 in: [ input-0.bin ] out: [ exp-output-0.bin ] - act_shave_tasks: false + metric_groups: [ NOC ] ``` ## Section "compiler\_in\_driver" Defines list of models used to test compiler in driver. It is used by tests: -"CompilerInDriver.\*, CompilerInDriverLong.\*" +"CompilerInDriver.\*, CompilerInDriverLayers.\*, CompilerInDriverLong.\*, CompilerInDriverWithProfiling.\*" There must be specified: - **path:** path to model to compile, the generated test name will be the name of model - **flags:** compilation flags passed directly to compiler +- **graph_profiling:** if the flag is set to "false", graph profiling tests are disabled Example: @@ -104,6 +105,7 @@ compiler_in_driver: flags: --inputs_precisions="A:fp16 B:fp16 C:fp16" --inputs_layouts="A:C B:C C:C" --outputs_precisions="Y:fp16" --outputs_layouts="Y:C" - path: mobilenet-v2/onnx/FP16-INT8/mobilenet-v2.xml flags: --inputs_precisions="result.1:u8" --inputs_layouts="result.1:NHWC" --outputs_precisions="473:fp32" --outputs_layouts="473:NC" + graph_profiling: false ``` ## Section "image\_classification\_imagenet" @@ -113,7 +115,7 @@ For each model must be specified: - **path:** path to model to compile, the generated test name will be the name of model - **flags:** compilation flags passed directly to compiler - **input:** images list used as an input for network, "image\_dir" prefix will added to this by default -- **output:** expected output class for each image +- **class_index:** expected class index for each image - **iterations:** number of iterations for each network Example: @@ -122,35 +124,39 @@ image_classification_imagenet: - path: resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml flags: --inputs_precisions="result.1:u8" --inputs_layouts="result.1:NHWC" --outputs_precisions="495:fp32" --outputs_layouts="495:NC" input: [ cat3.bmp, watch.bmp ] - output: [ 283, 531 ] + class_index: [ 283, 531 ] iterations: 100 ``` ## Section "multi\_inference" -This configuration is used by single CompilerInDriverMultiinference.ImageClassification test +This configuration is used by single CompilerInDriverMultiInference.Pipeline test All defined models are compiled and then executed simultanously in separate threads with target fps rate. -The input and output are optional, when input is not defined the random data is passed to network +The input and class_index are optional, when input is not defined the random data is passed to network For each model can be specified: - **path:** path to model to compile, the generated test name will be the name of model - **flags:** compilation flags passed directly to compiler - **input:** optional, images list used as an input for network, "image\_dir" prefix will added to this by default -- **output:** optional, expected output class for each image +- **class_index:** optional, expected class index for each image - **target\_fps:** target fps rate - **exec\_time\_in\_secs:** execution time in seconds +- **priority:** set command queue priority, available priority levels: high, low, normal +- **delay_in_us:** wait for specific time before starting the inference Example: ``` multi_inference: - - path: resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml - flags: --inputs_precisions="result.1:u8" --inputs_layouts="result.1:NHWC" --outputs_precisions="495:fp32" --outputs_layouts="495:NC" - input: [ watch.bmp ] - output: [ 531 ] - target_fps: 30 - exec_time_in_secs: 10 - - path: mobilenet-v2/onnx/FP16-INT8/mobilenet-v2.xml - flags: --inputs_precisions="result.1:u8" --inputs_layouts="result.1:NHWC" --outputs_precisions="473:fp32" --outputs_layouts="473:NC" - target_fps: 30 - exec_time_in_secs: 10 + - name: "ImageClassificationNetworks" + pipeline: + - path: resnet-50-pytorch/onnx/FP16-INT8/resnet-50-pytorch.xml + flags: --inputs_precisions="result.1:u8" --inputs_layouts="result.1:NHWC" --outputs_precisions="495:fp32" --outputs_layouts="495:NC" + input: [ watch.bmp ] + class_index: [ 531 ] + target_fps: 30 + exec_time_in_secs: 10 + - path: mobilenet-v2/onnx/FP16-INT8/mobilenet-v2.xml + flags: --inputs_precisions="result.1:u8" --inputs_layouts="result.1:NHWC" --outputs_precisions="473:fp32" --outputs_layouts="473:NC" + target_fps: 30 + exec_time_in_secs: 10 ``` --- diff --git a/validation/umd-test/graph_utilities.hpp b/validation/umd-test/graph_utilities.hpp new file mode 100644 index 0000000..46e63d8 --- /dev/null +++ b/validation/umd-test/graph_utilities.hpp @@ -0,0 +1,364 @@ +/* + * Copyright (C) 2022-2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "umd_extensions.h" +#include "blob_params.hpp" +#include "model_params.hpp" +#include "umd_test.h" +#include "utilities/data_handle.h" +#include "utilities/graph_to_str.h" +#include "image.hpp" + +#include +#include + +enum MemType { + DEVICE_MEMORY, + HOST_MEMORY, + SHARED_MEMORY, +}; + +class Graph { + public: + Graph(ze_context_handle_t hContext, ze_device_handle_t hDevice, graph_dditable_ext_t *graphDDI) + : hContext(hContext) + , hDevice(hDevice) + , graphDDI(graphDDI) {} + + static std::shared_ptr create(ze_context_handle_t hContext, + ze_device_handle_t hDevice, + graph_dditable_ext_t *graphDDI, + std::filesystem::path path, + const YAML::Node &node, + uint32_t graphFlags = ZE_GRAPH_FLAG_NONE) { + auto graph = std::make_shared(hContext, hDevice, graphDDI); + + if (path.extension() == ".xml") { + graph->createFromModel(std::move(path), node, graphFlags); + } else { + graph->createFromBlob(std::move(path), node, graph->vpuBlob); + } + + graph->queryArguments(); + + return graph; + } + + static size_t graphPrecisionToByteSize(ze_graph_argument_precision_t arg) { + switch (arg) { + case ZE_GRAPH_ARGUMENT_PRECISION_FP64: + case ZE_GRAPH_ARGUMENT_PRECISION_INT64: + case ZE_GRAPH_ARGUMENT_PRECISION_UINT64: + return sizeof(uint64_t); + + case ZE_GRAPH_ARGUMENT_PRECISION_FP32: + case ZE_GRAPH_ARGUMENT_PRECISION_INT32: + case ZE_GRAPH_ARGUMENT_PRECISION_UINT32: + return sizeof(uint32_t); + + case ZE_GRAPH_ARGUMENT_PRECISION_BF16: + case ZE_GRAPH_ARGUMENT_PRECISION_FP16: + case ZE_GRAPH_ARGUMENT_PRECISION_INT16: + case ZE_GRAPH_ARGUMENT_PRECISION_UINT16: + return sizeof(uint16_t); + + case ZE_GRAPH_ARGUMENT_PRECISION_INT8: + case ZE_GRAPH_ARGUMENT_PRECISION_UINT8: + return sizeof(uint8_t); + + case ZE_GRAPH_ARGUMENT_PRECISION_INT4: + case ZE_GRAPH_ARGUMENT_PRECISION_UINT4: + case ZE_GRAPH_ARGUMENT_PRECISION_BIN: + case ZE_GRAPH_ARGUMENT_PRECISION_BOOLEAN: + return 0; + + case ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN: + case ZE_GRAPH_ARGUMENT_PRECISION_DYNAMIC: + default: + ADD_FAILURE() << "Unsupported graph argument precision"; + return 0; + } + } + + static void generateRandomData(std::vector &data, size_t size) { + std::random_device rd; + std::uniform_int_distribution dist; + + data.reserve(size); + for (size_t i = 0; i < size; i++) { + data.push_back(dist(rd)); + } + } + + void allocateArguments(MemType memType) { + allocateInputArguments(memType); + allocateOutputArguments(memType); + } + + void allocateInputArguments(MemType memType) { + inArgs.clear(); + + uint32_t argIndex = 0; + for (auto size : inputSize) { + auto ptr = allocMemory(size, memType); + inArgs.push_back(ptr); + ASSERT_EQ(setArgumentValue(argIndex++, ptr), ZE_RESULT_SUCCESS); + } + } + + void allocateOutputArguments(MemType memType) { + outArgs.clear(); + + uint32_t argIndex = inputSize.size(); + for (auto size : outputSize) { + auto ptr = allocMemory(size, memType); + outArgs.push_back(ptr); + ASSERT_EQ(setArgumentValue(argIndex++, ptr), ZE_RESULT_SUCCESS); + } + } + + void setRandomInput() { + std::vector> inputData; + inputData.resize(inputSize.size()); + for (size_t i = 0; i < inputSize.size(); ++i) { + generateRandomData(inputData[i], inputSize[i]); + memcpy(inArgs[i], inputData[i].data(), inputData[i].size()); + } + } + + void copyInputData() { + for (size_t i = 0; i < inArgs.size(); i++) { + memcpy(inArgs[i], inputBin[i].data(), inputBin[i].size()); + } + } + + void loadInputData(std::filesystem::path path) { + ASSERT_EQ(path.extension(), ".bmp"); + + Image image(path); + ASSERT_EQ(inputSize[0], image.getSizeInBytes()); + memcpy(inArgs[0], image.getPtr(), inputSize[0]); + } + + ze_result_t setArgumentValue(uint32_t argIndex, const void *pArgValue) { + return graphDDI->pfnSetArgumentValue(handle, argIndex, pArgValue); + } + + void checkResults() { + for (size_t i = 0; i < outArgs.size(); i++) { + ASSERT_EQ(memcmp(outArgs[i], outputBin[i].data(), outputBin[i].size()), 0); + } + } + + void checkResults(uint16_t imageClassIndex) { + ze_graph_argument_precision_t outputType = outputPrecision.at(0); + size_t elementSize = graphPrecisionToByteSize(outputType); + if (elementSize == 0) + FAIL() << "element size must be greater than 0"; + std::vector outputData(outputSize.at(0) / elementSize); + + switch (outputType) { + case ZE_GRAPH_ARGUMENT_PRECISION_FP32: { + memcpy(outputData.data(), outArgs.at(0), outputSize.at(0)); + break; + } + case ZE_GRAPH_ARGUMENT_PRECISION_FP16: { + auto outputPtr = reinterpret_cast(outArgs.at(0)); + for (size_t i = 0; i < outputData.size(); i++) { + outputData[i] = DataHandle::rawFp16ToFp32(outputPtr[i]); + } + break; + } + default: + FAIL() << "Unsupported output precision " << zeGraphArgumentPrecisionToStr(outputType); + } + + auto it = std::max_element(outputData.begin(), outputData.end()); + size_t index = std::distance(outputData.begin(), it); + + TRACE("Class = %li, Accuracy = %f\n", index, *it); + + ASSERT_EQ(index, imageClassIndex) << "Class index does not match!" << std::endl + << "Class: " << index << "!=" << imageClassIndex; + } + + void clearOutput() { + for (size_t i = 0; i < outArgs.size(); i++) { + memset(outArgs[i], 0, outputSize[i]); + } + } + + private: + void queryArguments() { + getArgumentsProperties(); + ASSERT_NE(inputSize.size(), 0); + ASSERT_NE(outputSize.size(), 0); + ASSERT_NE(outputPrecision.size(), 0); + } + + void getArgumentsProperties() { + ze_graph_properties_t properties{}; + ASSERT_EQ(graphDDI->pfnGetProperties(handle, &properties), ZE_RESULT_SUCCESS); + + for (uint32_t i = 0; i < properties.numGraphArgs; i++) { + ze_graph_argument_properties_t argProperties{}; + + ASSERT_EQ(graphDDI->pfnGetArgumentProperties(handle, i, &argProperties), + ZE_RESULT_SUCCESS); + + size_t size = 1u; + for (int i = 0; i < ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE; i++) + size *= argProperties.dims[i]; + size *= graphPrecisionToByteSize(argProperties.devicePrecision); + + ASSERT_GT(size, 0u); + if (argProperties.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) { + inputSize.push_back(size); + } else { + outputSize.push_back(size); + outputPrecision.push_back(argProperties.devicePrecision); + } + } + } + + void *allocMemory(size_t size, MemType memType) { + if (memType == DEVICE_MEMORY) { + mem.push_back(zeMemory::allocDevice(hContext, hDevice, size)); + } else if (memType == HOST_MEMORY) { + mem.push_back(zeMemory::allocHost(hContext, size)); + } else { + mem.push_back(zeMemory::allocShared(hContext, hDevice, size)); + } + return mem.back().get(); + } + + std::vector getFlagsFromString(std::string flags) { + std::vector buildFlags; + + for (auto c : flags) + buildFlags.push_back(c); + buildFlags.push_back('\0'); + return buildFlags; + } + + void createGraphDescriptorForModel(const std::string &modelPath, uint32_t graphFlags) { + std::vector modelXml, modelBin; + ze_device_graph_properties_t graphProperties; + + ASSERT_TRUE(getModelFromPath(modelPath, modelXml, modelBin)); + + ASSERT_EQ(graphDDI->pfnDeviceGetGraphProperties(hDevice, &graphProperties), + ZE_RESULT_SUCCESS); + + ze_graph_compiler_version_info_t version = {.major = graphProperties.compilerVersion.major, + .minor = graphProperties.compilerVersion.minor}; + + uint64_t xml_len = modelXml.size(); + uint64_t bin_len = modelBin.size(); + uint32_t numInputs = 2; + uint64_t modelSize = sizeof(version) + sizeof(numInputs) + sizeof(xml_len) + xml_len + + sizeof(bin_len) + bin_len; + + modelIR.resize(modelSize); + + uint64_t offset = 0; + memcpy(&modelIR[0], &version, sizeof(version)); + offset += sizeof(version); + + memcpy(&modelIR[offset], &numInputs, sizeof(numInputs)); + offset += sizeof(numInputs); + + memcpy(&modelIR[offset], &xml_len, sizeof(xml_len)); + offset += sizeof(xml_len); + + memcpy(&modelIR[offset], modelXml.data(), xml_len); + offset += xml_len; + + memcpy(&modelIR[offset], &bin_len, sizeof(bin_len)); + offset += sizeof(bin_len); + + memcpy(&modelIR[offset], modelBin.data(), bin_len); + + desc.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES; + desc.pNext = nullptr; + desc.format = ZE_GRAPH_FORMAT_NGRAPH_LITE; + desc.inputSize = modelIR.size(); + desc.pInput = modelIR.data(); + desc.pBuildFlags = buildFlags.data(); + desc.flags = graphFlags; + } + + void createFromModel(std::string &&path, const YAML::Node &node, uint32_t graphFlags) { + ASSERT_GT(node["flags"].as().size(), 0); + + ze_result_t ret = ZE_RESULT_SUCCESS; + + buildFlags = getFlagsFromString(node["flags"].as()); + + createGraphDescriptorForModel(path, graphFlags); + + scopedGraphHandle = zeScope::graphCreate2(graphDDI, hContext, hDevice, desc, ret); + EXPECT_EQ(ret, ZE_RESULT_SUCCESS); + + handle = scopedGraphHandle.get(); + } + + void createFromBlob(std::string &&path, const YAML::Node &node, std::vector &vpuBlob) { + ASSERT_GT(node["in"].as>().size(), 0); + ASSERT_GT(node["out"].as>().size(), 0); + + ze_result_t ret = ZE_RESULT_SUCCESS; + + ASSERT_TRUE(getBlobFromPath(std::move(path), + node["in"].as>(), + node["out"].as>(), + vpuBlob, + inputBin, + outputBin, + vpuBin)); + + desc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + .pNext = nullptr, + .format = ZE_GRAPH_FORMAT_NATIVE, + .inputSize = vpuBlob.size(), + .pInput = reinterpret_cast(vpuBlob.data()), + .pBuildFlags = nullptr, + .flags = ZE_GRAPH_FLAG_NONE}; + + scopedGraphHandle = zeScope::graphCreate2(graphDDI, hContext, hDevice, desc, ret); + EXPECT_EQ(ret, ZE_RESULT_SUCCESS); + + handle = scopedGraphHandle.get(); + } + + public: + ze_context_handle_t hContext = nullptr; + ze_device_handle_t hDevice = nullptr; + graph_dditable_ext_t *graphDDI = nullptr; + + std::vector> inputBin, outputBin; + std::vector vpuBlob, vpuBin; + + ze_graph_handle_t handle = nullptr; + + std::vector inputSize; + std::vector outputSize; + std::vector outputPrecision; + + std::vector inArgs, outArgs; + + private: + ze_graph_desc_2_t desc = {}; + std::vector modelIR = {}; + std::vector buildFlags = {}; + + std::vector> mem; + + zeScope::SharedPtr scopedGraphHandle = nullptr; +}; diff --git a/validation/umd-test/image.cpp b/validation/umd-test/image.cpp new file mode 100644 index 0000000..39a2e4a --- /dev/null +++ b/validation/umd-test/image.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "image.hpp" + +#ifdef UMD_TESTS_USE_OPENCV +#include + +struct Image::Impl { + Impl(const std::string &path) { + data = cv::imread(path); + if (data.empty()) { + throw "Failed to read image " + path; + } + } + + void *getPtr() { return data.data; } + size_t getHeight() const { return data.size().height; } + size_t getWidth() const { return data.size().width; } + size_t getChannels() const { return data.channels(); } + size_t getElementSize() const { return data.elemSize1(); } + size_t getSizeInBytes() const { return data.total() * data.elemSize(); } + + private: + cv::Mat data; +}; + +#else +#include "umd_test.h" +#include + +struct __attribute__((packed)) BmpFileHeader { + uint16_t header; + uint32_t size; + uint32_t reserve; + uint32_t offset; +}; + +struct __attribute__((packed)) BmpInfoHeader { + int32_t size; + int32_t width; + int32_t height; + int16_t planes; + int16_t bitCount; + int32_t compression; + int32_t imageSize; + int32_t XPixelsPerMeter; + int32_t YPixelsPerMeter; + int32_t colorsUsed; + int32_t colorsImportant; +}; + +struct Image::Impl { + Impl(const std::string &path) { + if (!UmdTest::loadFile(path, data)) { + throw "Failed to read image " + path; + } + + auto fileContentPtr = data.data(); + + auto *bmpFileHeader = reinterpret_cast(fileContentPtr); + auto *bmpInfoHeader = + reinterpret_cast(fileContentPtr + sizeof(BmpFileHeader)); + + if (bmpInfoHeader->compression != 0) { + throw "Reading compressed BMP image is not supported"; + } + + height = std::abs(bmpInfoHeader->height); + width = bmpInfoHeader->width; + channels = bmpInfoHeader->bitCount / 8; + offset = bmpFileHeader->offset; + + if (channels != 3) { + throw "Only 24 bpp is supported. This image (" + path + ") has " + + std::to_string(bmpInfoHeader->bitCount) + " bpp"; + } + + size_t stride = width * channels; + if (stride % 4 != 0) { + throw "Reading image with padding is not supported"; + } + + sizeInBytes = height * stride; + + if (bmpInfoHeader->height > 0) { + // If height > 0, pixels are laid out 'upside down'. + // The last line is at the beginning of pixel data and the first line is at the end. + // So we need to reverse the data in memory. + auto dstPtr = fileContentPtr + offset; + auto srcPtr = dstPtr + (height - 1) * stride; + for (size_t i = 0; i < height / 2; i++) { + for (size_t j = 0; j < stride; j++) { + std::swap(srcPtr[j], dstPtr[j]); + } + dstPtr += stride; + srcPtr -= stride; + } + } + } + + void *getPtr() { return &data[0] + offset; } + size_t getHeight() const { return height; } + size_t getWidth() const { return width; } + size_t getChannels() const { return channels; } + size_t getElementSize() const { return 1; } + size_t getSizeInBytes() const { return sizeInBytes; } + + private: + std::vector data; + size_t height; + size_t width; + size_t channels; + size_t sizeInBytes; + size_t offset; +}; + +#endif + +Image::Image(const std::string &path) { + impl = std::make_unique(path); +} + +Image::~Image() = default; + +void *Image::getPtr() { + return impl->getPtr(); +} + +size_t Image::getHeight() const { + return impl->getHeight(); +} + +size_t Image::getWidth() const { + return impl->getWidth(); +} + +size_t Image::getChannels() const { + return impl->getChannels(); +} + +size_t Image::getElementSize() const { + return impl->getElementSize(); +} + +size_t Image::getSizeInBytes() const { + return impl->getSizeInBytes(); +} diff --git a/validation/umd-test/image.hpp b/validation/umd-test/image.hpp new file mode 100644 index 0000000..b1ced0a --- /dev/null +++ b/validation/umd-test/image.hpp @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include +#include +#include + +struct Image { + Image(const std::string &path); + ~Image(); + + void *getPtr(); + size_t getHeight() const; + size_t getWidth() const; + size_t getChannels() const; + size_t getElementSize() const; + size_t getSizeInBytes() const; + + private: + struct Impl; + std::unique_ptr impl; +}; diff --git a/validation/umd-test/test_commands.cpp b/validation/umd-test/test_commands.cpp index 045e09f..85981ed 100644 --- a/validation/umd-test/test_commands.cpp +++ b/validation/umd-test/test_commands.cpp @@ -66,7 +66,8 @@ class Command : public UmdTest { ze_result_t ret; }; -TEST_F(Command, CommandListDestroyErrorHandle) { +// TODO: Validation layer is disabled. OpenVino issue: EISW-113275 +TEST_F(Command, DISABLED_CommandListDestroyErrorHandle) { EXPECT_EQ(zeCommandListDestroy(nullptr), ZE_RESULT_ERROR_INVALID_NULL_HANDLE); } @@ -79,7 +80,12 @@ TEST_F(Command, CreateCloseResetAndDestroyList) { ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); } +// TODO: Validation layer is disabled when OpenVino is used, test will fail, issue: EISW-101738 +#ifdef UMD_TESTS_USE_OPENVINO +TEST_F(Command, DISABLED_CreateAndDestroyQueueErrorHandle) { +#else TEST_F(Command, CreateAndDestroyQueueErrorHandle) { +#endif EXPECT_EQ(zeCommandQueueCreate(nullptr, nullptr, &cmdQueueDesc, &queue), ZE_RESULT_ERROR_INVALID_NULL_HANDLE); EXPECT_EQ(zeCommandQueueCreate(zeContext, nullptr, &cmdQueueDesc, &queue), @@ -93,7 +99,8 @@ TEST_F(Command, CreateSynchronizeAndDestroyQueue) { ASSERT_EQ(zeCommandQueueSynchronize(queue, 0), ZE_RESULT_SUCCESS); } -TEST_F(Command, CreateExecuteSynchronizeAndDestroyQueueErrorHandle) { +// TODO: Validation layer is disabled. OpenVino issue: EISW-113275 +TEST_F(Command, DISABLED_CreateExecuteSynchronizeAndDestroyQueueErrorHandle) { EXPECT_EQ(zeCommandQueueExecuteCommandLists(nullptr, 1, &list, nullptr), ZE_RESULT_ERROR_INVALID_NULL_HANDLE); EXPECT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, nullptr, nullptr), @@ -1087,9 +1094,8 @@ TEST_F(CommandStress, MultipleVPUCommandBuffers) { TEST_F(CommandStress, MultipleVPUJobs) { /* * VPUJob is created for every zeCommandList. The test is similar to MultipleVPUCommandBuffers. - * The limit of job submission is 63 in KMD before VPU consumes them. */ - const size_t listCount = 8; + const size_t listCount = 16; const size_t copyCount = 7; const uint64_t referenceValue = 0x1234567890abcd00; const uint64_t copySize = 4 * KB; diff --git a/validation/umd-test/test_context.cpp b/validation/umd-test/test_context.cpp index 095f9c3..e4d3fd3 100644 --- a/validation/umd-test/test_context.cpp +++ b/validation/umd-test/test_context.cpp @@ -30,16 +30,6 @@ class Context : public UmdTest { .pNext = nullptr, .commandQueueGroupOrdinal = 0, .flags = 0}; - - ze_host_mem_alloc_desc_t hostMemAllocDesc = {.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, - .pNext = nullptr, - .flags = 0}; - - ze_device_mem_alloc_desc_t deviceMemAllocDesc = {.stype = - ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, - .pNext = nullptr, - .flags = 0, - .ordinal = 0}; }; TEST_F(Context, CreateContextRepeat) { @@ -98,7 +88,7 @@ TEST_P(MultiContext, AllocatedMemoryCannotBeUsedInDifferentContext) { ASSERT_EQ(ret, ZE_RESULT_SUCCESS); auto list = scopedList.get(); - auto mem = zeScope::memAllocHost(ctx2, hostMemAllocDesc, size, 0, ret); + auto mem = zeMemory::allocHost(ctx2, size); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); uint64_t *ts = static_cast(mem.get()); @@ -108,22 +98,10 @@ TEST_P(MultiContext, AllocatedMemoryCannotBeUsedInDifferentContext) { } void MultiContext::AllocateAndFreeMemory(ze_context_handle_t ctx, size_t size, uint32_t iteration) { - ze_result_t ret; for (uint32_t i = 0; i < iteration; i++) { - auto memHost = zeScope::memAllocHost(ctx, hostMemAllocDesc, size, 0, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - - auto memShared = zeScope::memAllocShared(ctx, - deviceMemAllocDesc, - hostMemAllocDesc, - size, - 0, - zeDevice, - ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - - auto memDevice = zeScope::memAllocDevice(ctx, deviceMemAllocDesc, size, 0, zeDevice, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + auto memHost = zeMemory::allocHost(ctx, size); + auto memShared = zeMemory::allocShared(ctx, zeDevice, size); + auto memDevice = zeMemory::allocDevice(ctx, zeDevice, size); } } @@ -163,9 +141,7 @@ void MultiContext::RunAppendGlobalTimestampAndSynchronize(ze_context_handle_t ct ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ze_command_list_handle_t list = scopedList.get(); - auto sharedMem = - zeScope::memAllocShared(ctx, deviceMemAllocDesc, hostMemAllocDesc, size, 0, zeDevice, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + auto sharedMem = zeMemory::allocShared(ctx, zeDevice, size); uint64_t *ts = static_cast(sharedMem.get()); ASSERT_TRUE(ts) << "Failed to allocate memory"; @@ -202,10 +178,6 @@ class MultiContextGraph : public Context, void SetUp() override { Context::SetUp(); - YAML::Node &configuration = Environment::getConfiguration(); - if (configuration["blob_dir"].IsDefined()) - blobDir = configuration["blob_dir"].as(); - auto [numOfContext, node] = GetParam(); ze_result_t ret; @@ -232,7 +204,6 @@ class MultiContextGraph : public Context, std::vector> inputBin, outputBin; std::vector vpuBlob, vpuBin; - std::string blobDir = ""; }; GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MultiContextGraph); @@ -263,27 +234,21 @@ void MultiContextGraph::RunInference(ze_context_handle_t ctx) { ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ze_command_list_handle_t list = scopedList.get(); - ze_graph_desc_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = nullptr, - .format = ZE_GRAPH_FORMAT_NATIVE, - .inputSize = vpuBlob.size(), - .pInput = reinterpret_cast(vpuBlob.data()), - .pBuildFlags = nullptr}; + ze_graph_desc_2_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + .pNext = nullptr, + .format = ZE_GRAPH_FORMAT_NATIVE, + .inputSize = vpuBlob.size(), + .pInput = reinterpret_cast(vpuBlob.data()), + .pBuildFlags = nullptr, + .flags = ZE_GRAPH_FLAG_NONE}; - auto scopedGraph = zeScope::graphCreate(zeGraphDDITableExt, ctx, zeDevice, graphDesc, ret); + auto scopedGraph = zeScope::graphCreate2(zeGraphDDITableExt, ctx, zeDevice, graphDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ze_graph_handle_t graph = scopedGraph.get(); uint32_t index = 0; for (auto &input : inputBin) { - auto memInput = zeScope::memAllocShared(ctx, - deviceMemAllocDesc, - hostMemAllocDesc, - input.size(), - 0, - zeDevice, - ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + auto memInput = zeMemory::allocShared(ctx, zeDevice, input.size()); mem.push_back(memInput); memcpy(memInput.get(), input.data(), input.size()); @@ -293,14 +258,7 @@ void MultiContextGraph::RunInference(ze_context_handle_t ctx) { } for (auto &output : outputBin) { - auto memOutput = zeScope::memAllocShared(ctx, - deviceMemAllocDesc, - hostMemAllocDesc, - output.size(), - 0, - zeDevice, - ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + auto memOutput = zeMemory::allocShared(ctx, zeDevice, output.size()); mem.push_back(memOutput); graphOutput.push_back(memOutput.get()); diff --git a/validation/umd-test/test_device.cpp b/validation/umd-test/test_device.cpp index 99ced85..4f9d9e4 100644 --- a/validation/umd-test/test_device.cpp +++ b/validation/umd-test/test_device.cpp @@ -18,4 +18,7 @@ TEST_F(Device, GetProperties) { EXPECT_EQ(devProp.vendorId, 0x8086u); EXPECT_TRUE(test_app::is_vpu(devProp.deviceId)) << "Invalid PCI Device ID" << devProp.deviceId; EXPECT_STREQ(devProp.name, "Intel(R) AI Boost"); + + TRACE("PCI Device ID: %#x\n", devProp.deviceId); + TRACE("Tile count: %u\n", devProp.numSlices); } diff --git a/validation/umd-test/test_driver.cpp b/validation/umd-test/test_driver.cpp index 58b595c..783f4f7 100644 --- a/validation/umd-test/test_driver.cpp +++ b/validation/umd-test/test_driver.cpp @@ -9,7 +9,8 @@ class Driver : public UmdTest {}; -TEST(Init, HandleWrongInitFlagError) { +/* TODO: Test disabled: EISW-107116 */ +TEST(Init, DISABLED_HandleWrongInitFlagError) { EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ENUMERATION, zeInit(ZE_INIT_FLAG_GPU_ONLY)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ENUMERATION, zeInit(0x04)); } diff --git a/validation/umd-test/test_event.cpp b/validation/umd-test/test_event.cpp index c1c0180..b8d868f 100644 --- a/validation/umd-test/test_event.cpp +++ b/validation/umd-test/test_event.cpp @@ -31,7 +31,12 @@ struct Event : public UmdTest { zeScope::SharedPtr scopedEventPool = nullptr; }; +// TODO: Validation layer is disabled when OpenVino is used, test will fail, issue: EISW-101738 +#ifdef UMD_TESTS_USE_OPENVINO +TEST_F(Event, DISABLED_CreateEventWithInvalidParameters) { +#else TEST_F(Event, CreateEventWithInvalidParameters) { +#endif ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, 0, @@ -68,7 +73,8 @@ TEST_F(Event, AllocateEventTwiceExpectError) { EXPECT_EQ(expectNullptr, nullptr); } -TEST_F(Event, DestroyEventWithInvalidParameter) { +// TODO: Validation layer is disabled. OpenVino issue: EISW-113275 +TEST_F(Event, DISABLED_DestroyEventWithInvalidParameter) { EXPECT_EQ(zeEventDestroy(nullptr), ZE_RESULT_ERROR_INVALID_NULL_HANDLE); } diff --git a/validation/umd-test/test_event_sync.cpp b/validation/umd-test/test_event_sync.cpp index 2fd601f..fd5b3b2 100644 --- a/validation/umd-test/test_event_sync.cpp +++ b/validation/umd-test/test_event_sync.cpp @@ -154,12 +154,6 @@ void EventSync::WaitHostSignal(bool computeEngineWait) { queue = cpQue; } - // Copy command list. - // for copy queue. - // | Wait on event (0x101) | Timestamp (0x100) | - // - // for NN queue. - // | Wait on event (0x101) | Timestamp (0x100) | ASSERT_EQ(zeCommandListAppendWaitOnEvents(cmdlist, 1, &event), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListAppendWriteGlobalTimestamp(cmdlist, ts, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); @@ -168,7 +162,7 @@ void EventSync::WaitHostSignal(bool computeEngineWait) { // Execute command lists. ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &cmdlist, nullptr), ZE_RESULT_SUCCESS); - std::thread hostSignalThread(&EventSync::HostSignalTask, this, event, 1000); + std::thread hostSignalThread(&EventSync::HostSignalTask, this, event, 100); // Command queue host sync. EXPECT_EQ(zeCommandQueueSynchronize(queue, syncTimeout), ZE_RESULT_SUCCESS); @@ -225,11 +219,6 @@ void EventSync::WaitAndSignalBetweenEngines(bool computeEngineWait, bool testEve signalCmdlist = nnCmdlist; } - // Waiting engine. - // If compute engine. - // | Wait on event(0x101) | Timestamp(0x100) | (Reset event:0x102) | - // If copy engine. - // | Wait on event(0x101) | Timestamp(0x100) | (Reset event:0x102) | ASSERT_EQ(zeCommandListAppendWaitOnEvents(waitCmdlist, 1, &event), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListAppendWriteGlobalTimestamp(waitCmdlist, ts, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); @@ -245,12 +234,6 @@ void EventSync::WaitAndSignalBetweenEngines(bool computeEngineWait, bool testEve ASSERT_EQ(zeCommandQueueExecuteCommandLists(waitQue, 1, &waitCmdlist, nullptr), ZE_RESULT_SUCCESS); - // Signal engine. - // If compute engine. - // | Timestamp(0x100) | Signal event(0x102) | - // - // If copy engine. - // | Timestamp(0x100) | Signal event(0x102) | ASSERT_EQ(zeCommandListAppendWriteGlobalTimestamp(signalCmdlist, ts1, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListAppendSignalEvent(signalCmdlist, event), ZE_RESULT_SUCCESS); @@ -346,10 +329,6 @@ void EventSync::MultiCommandListSyncOnSingleEngine(bool runOnComputeEngine) { auto cmdlist1 = scopedCmdList1.get(); auto cmdlist2 = scopedCmdList2.get(); - // Multi cmdlist sync tests - // Command list 0. - // NN command list0 : | Wait on event0 | L2L copy | Signal event1 | - // CP command list0 : | Wait on event0 | L2S copy | Signal event1 | ASSERT_EQ(zeCommandListAppendWaitOnEvents(cmdlist0, 1, &event0), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListAppendMemoryCopy(cmdlist0, destMem0Ptr, @@ -362,9 +341,6 @@ void EventSync::MultiCommandListSyncOnSingleEngine(bool runOnComputeEngine) { ASSERT_EQ(zeCommandListAppendSignalEvent(cmdlist0, event1), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(cmdlist0), ZE_RESULT_SUCCESS); - // Command list 1. - // NN command list1 : | Wait on event1 | L2L copy | Signal event2 | - // CP command list1 : | Wait on event1 | L2S copy | Signal event2 | ASSERT_EQ(zeCommandListAppendWaitOnEvents(cmdlist1, 1, &event1), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListAppendMemoryCopy(cmdlist1, destMem1Ptr, @@ -377,9 +353,6 @@ void EventSync::MultiCommandListSyncOnSingleEngine(bool runOnComputeEngine) { ASSERT_EQ(zeCommandListAppendSignalEvent(cmdlist1, event2), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(cmdlist1), ZE_RESULT_SUCCESS); - // COmmand list 2. - // NN command list2 : | Wait on event2 | L2L copy | - // CP command list2 : | Wait on event2 | L2S copy | ASSERT_EQ(zeCommandListAppendWaitOnEvents(cmdlist2, 1, &event2), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListAppendMemoryCopy(cmdlist2, destMem2Ptr, diff --git a/validation/umd-test/test_eventpool.cpp b/validation/umd-test/test_eventpool.cpp index 49044f7..2a747fc 100644 --- a/validation/umd-test/test_eventpool.cpp +++ b/validation/umd-test/test_eventpool.cpp @@ -9,7 +9,8 @@ struct EventPool : public UmdTest {}; -TEST_F(EventPool, CreateEventPoolWithInvalidParameters) { +// TODO: Validation layer is disabled. OpenVino issue: EISW-113275 +TEST_F(EventPool, DISABLED_CreateEventPoolWithInvalidParameters) { ze_event_pool_handle_t eventPool = nullptr; ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, @@ -46,6 +47,7 @@ TEST_F(EventPool, CreateEventPoolShouldBeSuccessful) { EXPECT_EQ(zeEventPoolDestroy(eventPool), ZE_RESULT_SUCCESS); } -TEST_F(EventPool, DestroyEventPoolWithInvalidParameter) { +// TODO: Validation layer is disabled. OpenVino issue: EISW-113275 +TEST_F(EventPool, DISABLED_DestroyEventPoolWithInvalidParameter) { EXPECT_EQ(zeEventPoolDestroy(nullptr), ZE_RESULT_ERROR_INVALID_NULL_HANDLE); -} \ No newline at end of file +} diff --git a/validation/umd-test/test_fence.cpp b/validation/umd-test/test_fence.cpp index cfc0207..f3046d6 100644 --- a/validation/umd-test/test_fence.cpp +++ b/validation/umd-test/test_fence.cpp @@ -45,11 +45,13 @@ class Fence : public UmdTest { ze_result_t ret; }; -TEST_F(Fence, CreateFenceWithQueueEqualToNull) { +// TODO: Validation layer is disabled. OpenVino issue: EISW-113275 +TEST_F(Fence, DISABLED_CreateFenceWithQueueEqualToNull) { ASSERT_EQ(zeFenceCreate(nullptr, &fenceDesc, &fence), ZE_RESULT_ERROR_INVALID_NULL_HANDLE); } -TEST_F(Fence, DestroyUsingNullptr) { +// TODO: Validation layer is disabled. OpenVino issue: EISW-113275 +TEST_F(Fence, DISABLED_DestroyUsingNullptr) { ASSERT_EQ(zeFenceDestroy(nullptr), ZE_RESULT_ERROR_INVALID_NULL_HANDLE); } diff --git a/validation/umd-test/test_graph.cpp b/validation/umd-test/test_graph.cpp index 2cb3e0c..6164398 100644 --- a/validation/umd-test/test_graph.cpp +++ b/validation/umd-test/test_graph.cpp @@ -5,12 +5,11 @@ * */ -#include "model_params.hpp" -#include "umd_test.h" +#include "graph_utilities.hpp" #include -class GraphNative : public UmdTest { +class GraphNativeBase : public UmdTest { public: void SetUp() override { UmdTest::SetUp(); @@ -18,10 +17,6 @@ class GraphNative : public UmdTest { if (!Environment::getConfiguration("graph_execution").size()) GTEST_SKIP() << "Do not find blobs to execute test"; - std::string blobDir = ""; - YAML::Node &configuration = Environment::getConfiguration(); - if (configuration["blob_dir"].IsDefined()) - blobDir = configuration["blob_dir"].as(); /* Tests from this group will be run on first blob taken from configuration */ const YAML::Node node = Environment::getConfiguration("graph_execution")[0]; @@ -39,78 +34,88 @@ class GraphNative : public UmdTest { vpuBin)); } - ze_graph_desc_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = nullptr, - .format = ZE_GRAPH_FORMAT_NATIVE, - .inputSize = 0, - .pInput = nullptr, - .pBuildFlags = nullptr}; + void TearDown() override { UmdTest::TearDown(); } + + ze_graph_desc_2_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + .pNext = nullptr, + .format = ZE_GRAPH_FORMAT_NATIVE, + .inputSize = 0, + .pInput = nullptr, + .pBuildFlags = nullptr, + .flags = ZE_GRAPH_FLAG_NONE}; ze_graph_handle_t graphHandle = nullptr; - ze_result_t ret; + ze_result_t ret = ZE_RESULT_SUCCESS; size_t size = 0; std::vector> inputBin, outputBin; std::vector vpuBlob, vpuBin; }; -TEST_F(GraphNative, CreatingGraphWithNullptrInputGraph) { +TEST_F(GraphNativeBase, CreatingGraphWithNullptrInputGraph) { graphDesc.inputSize = vpuBlob.size(); graphDesc.pInput = nullptr; - EXPECT_EQ(zeGraphDDITableExt->pfnCreate(zeContext, zeDevice, &graphDesc, &graphHandle), + EXPECT_EQ(zeGraphDDITableExt->pfnCreate2(zeContext, zeDevice, &graphDesc, &graphHandle), ZE_RESULT_ERROR_INVALID_NULL_POINTER); } -TEST_F(GraphNative, CreatingGraphWithNullPtrDesc) { - const ze_graph_desc_t *graphDesc = nullptr; - EXPECT_EQ(zeGraphDDITableExt->pfnCreate(zeContext, zeDevice, graphDesc, &graphHandle), +TEST_F(GraphNativeBase, CreatingGraphWithNullPtrDesc) { + const ze_graph_desc_2_t *graphDesc = nullptr; + EXPECT_EQ(zeGraphDDITableExt->pfnCreate2(zeContext, zeDevice, graphDesc, &graphHandle), ZE_RESULT_ERROR_INVALID_NULL_POINTER); } -TEST_F(GraphNative, CreatingGraphCorrectBlobFileAndDesc) { - graphDesc.inputSize = vpuBlob.size(); - graphDesc.pInput = reinterpret_cast(vpuBlob.data()); - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - EXPECT_EQ(ret, ZE_RESULT_SUCCESS); +TEST_F(GraphNativeBase, GetProfilingDataPropertiesExpectSuccess) { + ze_device_profiling_data_properties_t pDeviceProfilingDataProperties; + EXPECT_EQ(zeGraphProfilingDDITableExt->pfnDeviceGetProfilingDataProperties( + zeDevice, + &pDeviceProfilingDataProperties), + ZE_RESULT_SUCCESS); } -TEST_F(GraphNative, GetGraphNativeBinaryWithoutGraphNativeBinaryPointerExpectSuccess) { - graphDesc.inputSize = vpuBlob.size(); - graphDesc.pInput = reinterpret_cast(vpuBlob.data()); - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - EXPECT_EQ(ret, ZE_RESULT_SUCCESS); - graphHandle = scopedGraphHandle.get(); - EXPECT_EQ(zeGraphDDITableExt->pfnGetNativeBinary(graphHandle, &size, nullptr), +class GraphNativeBinary : public UmdTest { + public: + void SetUp() override { + UmdTest::SetUp(); + + if (!Environment::getConfiguration("graph_execution").size()) + GTEST_SKIP() << "Do not find blobs to execute test"; + + /* CommandGraph test will be run on first blob taken from configuration */ + const YAML::Node node = Environment::getConfiguration("graph_execution")[0]; + + /* Validate configuration */ + ASSERT_GT(node["path"].as().size(), 0); + + graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + blobDir + node["path"].as(), + node); + } + + void TearDown() override { UmdTest::TearDown(); } + + size_t size = 0; + std::shared_ptr graph; +}; + +TEST_F(GraphNativeBinary, GetGraphNativeBinaryWithoutGraphNativeBinaryPointerExpectSuccess) { + EXPECT_EQ(zeGraphDDITableExt->pfnGetNativeBinary(graph->handle, &size, nullptr), ZE_RESULT_SUCCESS); - EXPECT_EQ(size, vpuBlob.size()); + EXPECT_EQ(size, graph->vpuBlob.size()); } -TEST_F(GraphNative, GetGraphNativeBinaryWithAndWithoutGraphNativeBinaryPointerExpectSuccess) { +TEST_F(GraphNativeBinary, GetGraphNativeBinaryWithAndWithoutGraphNativeBinaryPointerExpectSuccess) { std::vector graphNativeBinary; - graphDesc.inputSize = vpuBlob.size(); - graphDesc.pInput = reinterpret_cast(vpuBlob.data()); - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - EXPECT_EQ(ret, ZE_RESULT_SUCCESS); - graphHandle = scopedGraphHandle.get(); - - EXPECT_EQ(zeGraphDDITableExt->pfnGetNativeBinary(graphHandle, &size, nullptr), + EXPECT_EQ(zeGraphDDITableExt->pfnGetNativeBinary(graph->handle, &size, nullptr), ZE_RESULT_SUCCESS); - EXPECT_EQ(size, vpuBlob.size()); + EXPECT_EQ(size, graph->vpuBlob.size()); graphNativeBinary.resize(size, 0xAA); - EXPECT_EQ(zeGraphDDITableExt->pfnGetNativeBinary(graphHandle, &size, graphNativeBinary.data()), - ZE_RESULT_SUCCESS); - EXPECT_EQ(size, vpuBlob.size()); - EXPECT_EQ(memcmp(graphNativeBinary.data(), vpuBlob.data(), vpuBlob.size()), 0); -} + EXPECT_EQ( + zeGraphDDITableExt->pfnGetNativeBinary(graph->handle, &size, graphNativeBinary.data()), + ZE_RESULT_SUCCESS); -TEST_F(GraphNative, GetProfilingDataPropertiesExpectSuccess) { - ze_device_profiling_data_properties_t pDeviceProfilingDataProperties; - EXPECT_EQ(zeGraphProfilingDDITableExt->pfnDeviceGetProfilingDataProperties( - zeDevice, - &pDeviceProfilingDataProperties), - ZE_RESULT_SUCCESS); + EXPECT_EQ(memcmp(graphNativeBinary.data(), graph->vpuBlob.data(), graph->vpuBlob.size()), 0); } diff --git a/validation/umd-test/test_graph_cid.cpp b/validation/umd-test/test_graph_cid.cpp index 3472b3c..97dc846 100644 --- a/validation/umd-test/test_graph_cid.cpp +++ b/validation/umd-test/test_graph_cid.cpp @@ -1,101 +1,26 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ -#include "model_params.hpp" -#include "umd_test.h" +#include "graph_utilities.hpp" +#include "umd_prime_buffers.h" +#include #include -#include - -struct __attribute__((packed)) BmpFormat { - uint16_t header; - uint32_t size; - uint32_t reserve; - uint32_t offset; -}; +#include +#include class CompilerInDriverBase : public UmdTest { protected: - void SetUp() override { - UmdTest::SetUp(); - - /*Get base configuration from config file*/ - YAML::Node &configuration = Environment::getConfiguration(); - if (configuration["model_dir"].IsDefined()) - modelDir = configuration["model_dir"].as(); - - if (configuration["image_dir"].IsDefined()) - imageDir = configuration["image_dir"].as(); - } - - void createModelData(std::vector &model, - std::vector &xml, - std::vector &bin, - std::vector &flags, - ze_graph_desc_t &desc) { - ze_device_graph_properties_t pDeviceGraphProperties; - EXPECT_EQ( - zeGraphDDITableExt->pfnDeviceGetGraphProperties(zeDevice, &pDeviceGraphProperties), - ZE_RESULT_SUCCESS); - - ze_graph_compiler_version_info_t version = { - .major = pDeviceGraphProperties.compilerVersion.major, - .minor = pDeviceGraphProperties.compilerVersion.minor}; - - uint64_t xml_len = xml.size(); - uint64_t bin_len = bin.size(); - uint32_t numInputs = 2; - uint64_t modelSize = sizeof(version) + sizeof(numInputs) + sizeof(xml_len) + xml_len + - sizeof(bin_len) + bin_len; - - model.resize(modelSize); + void SetUp() override { UmdTest::SetUp(); } - uint64_t offset = 0; - memcpy(&model[0], &version, sizeof(version)); - offset += sizeof(version); - - memcpy(&model[offset], &numInputs, sizeof(numInputs)); - offset += sizeof(numInputs); - - memcpy(&model[offset], &xml_len, sizeof(xml_len)); - offset += sizeof(xml_len); - - memcpy(&model[offset], xml.data(), xml_len); - offset += xml_len; - - memcpy(&model[offset], &bin_len, sizeof(bin_len)); - offset += sizeof(bin_len); - - memcpy(&model[offset], bin.data(), bin_len); - - desc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = nullptr, - .format = ZE_GRAPH_FORMAT_NGRAPH_LITE, - .inputSize = model.size(), - .pInput = model.data(), - .pBuildFlags = flags.data()}; - } - - std::vector getFlagsFromString(std::string flags) { - std::vector buildFlags; - - for (auto c : flags) - buildFlags.push_back(c); - buildFlags.push_back('\0'); - return buildFlags; - } - - ze_graph_handle_t graphHandle = nullptr; - ze_graph_desc_t graphDesc = {}; + zeScope::SharedPtr scopedGraphHandle = nullptr; + ze_graph_desc_2_t graphDesc = {}; std::vector modelIR = {}; - std::vector modelXml, modelBin, buildFlags; - - std::string modelDir = ""; - std::string imageDir = ""; + std::vector buildFlags = {}; }; class CompilerInDriver : public CompilerInDriverBase { @@ -114,46 +39,78 @@ class CompilerInDriver : public CompilerInDriverBase { ASSERT_GT(node["flags"].as().size(), 0); /* Setup */ - ASSERT_TRUE( - getModelFromPath(modelDir + node["path"].as(), modelXml, modelBin)); - buildFlags = getFlagsFromString(node["flags"].as()); - - createModelData(modelIR, modelXml, modelBin, buildFlags, graphDesc); + createGraphDescriptorForModel(modelDir + node["path"].as(), + buildFlags, + modelIR, + graphDesc); } }; TEST_F(CompilerInDriver, CreatingGraphWithNullptrInputGraphExpectFailure) { + ze_graph_handle_t handle = nullptr; graphDesc.pInput = nullptr; - EXPECT_EQ(zeGraphDDITableExt->pfnCreate(zeContext, zeDevice, &graphDesc, &graphHandle), + EXPECT_EQ(zeGraphDDITableExt->pfnCreate2(zeContext, zeDevice, &graphDesc, &handle), ZE_RESULT_ERROR_INVALID_NULL_POINTER); } TEST_F(CompilerInDriver, CreatingGraphWithZeroGraphSizeExpectFailure) { + ze_graph_handle_t handle = nullptr; graphDesc.inputSize = 0u; - EXPECT_EQ(zeGraphDDITableExt->pfnCreate(zeContext, zeDevice, &graphDesc, &graphHandle), + EXPECT_EQ(zeGraphDDITableExt->pfnCreate2(zeContext, zeDevice, &graphDesc, &handle), ZE_RESULT_ERROR_INVALID_SIZE); } TEST_F(CompilerInDriver, CreatingGraphCorrectBlobFileAndDescExpectSuccess) { - ze_result_t ret; - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); + ze_result_t ret = ZE_RESULT_SUCCESS; + scopedGraphHandle = + zeScope::graphCreate2(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); EXPECT_EQ(ret, ZE_RESULT_SUCCESS); } -class CompilerInDriverLongT : public CompilerInDriverBase { +class CompilerInDriverLayers : public CompilerInDriver, + public ::testing::WithParamInterface { protected: - enum MemType : uint8_t { - DEVICE_MEMORY, - HOST_MEMORY, - SHARED_MEMORY, - }; + void SetUp() override { CompilerInDriver::SetUp(); } +}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverLayers); + +INSTANTIATE_TEST_SUITE_P(, + CompilerInDriverLayers, + ::testing::ValuesIn(Environment::getConfiguration("compiler_in_driver")), + [](const testing::TestParamInfo &p) { + return generateTestNameFromNode(p.param); + }); + +TEST_P(CompilerInDriverLayers, QueryNetworkLayers) { + ze_graph_query_network_handle_t hQuery = nullptr; + + ASSERT_EQ(zeGraphDDITableExt->pfnQueryNetworkCreate2(zeContext, zeDevice, &graphDesc, &hQuery), + ZE_RESULT_SUCCESS); + + size_t size = 0; + EXPECT_EQ(zeGraphDDITableExt->pfnQueryNetworkGetSupportedLayers(hQuery, &size, nullptr), + ZE_RESULT_SUCCESS); + EXPECT_GT(size, 0); + std::vector layers(size, '\0'); + EXPECT_EQ(zeGraphDDITableExt->pfnQueryNetworkGetSupportedLayers(hQuery, &size, layers.data()), + ZE_RESULT_SUCCESS); + + EXPECT_GT(layers.size(), 0); + + TRACE("Supported layers: %s\n", layers.data()); + + ASSERT_EQ(zeGraphDDITableExt->pfnQueryNetworkDestroy(hQuery), ZE_RESULT_SUCCESS); +} + +class CompilerInDriverLongT : public CompilerInDriverBase { + protected: void SetUp() override { CompilerInDriverBase::SetUp(); - ze_result_t ret; + ze_result_t ret = ZE_RESULT_SUCCESS; scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); queue = scopedQueue.get(); @@ -163,121 +120,99 @@ class CompilerInDriverLongT : public CompilerInDriverBase { list = scopedList.get(); } - void getGraphArgumentSize(ze_graph_handle_t hGraph, - std::vector &inputSize, - std::vector &outputSize) { - ze_graph_properties_t graphProps{}; - ASSERT_EQ(zeGraphDDITableExt->pfnGetProperties(hGraph, &graphProps), ZE_RESULT_SUCCESS); + ze_command_queue_desc_t cmdQueueDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, + .pNext = nullptr, + .ordinal = 0, + .index = 0, + .flags = 0, + .mode = ZE_COMMAND_QUEUE_MODE_DEFAULT, + .priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; - for (uint32_t i = 0; i < graphProps.numGraphArgs; i++) { - ze_graph_argument_properties_t graphArgProps{}; + ze_command_list_desc_t cmdListDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, + .pNext = nullptr, + .commandQueueGroupOrdinal = 0, + .flags = 0}; - ASSERT_EQ(zeGraphDDITableExt->pfnGetArgumentProperties(hGraph, i, &graphArgProps), - ZE_RESULT_SUCCESS); + ze_command_queue_handle_t queue = nullptr; + ze_command_list_handle_t list = nullptr; - size_t size = 1u; - for (int i = 0; i < ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE; i++) - size *= graphArgProps.dims[i]; - - switch (graphArgProps.devicePrecision) { - case ZE_GRAPH_ARGUMENT_PRECISION_FP32: - case ZE_GRAPH_ARGUMENT_PRECISION_INT32: - case ZE_GRAPH_ARGUMENT_PRECISION_UINT32: - size *= sizeof(uint32_t); - break; - case ZE_GRAPH_ARGUMENT_PRECISION_BF16: - case ZE_GRAPH_ARGUMENT_PRECISION_FP16: - case ZE_GRAPH_ARGUMENT_PRECISION_INT16: - case ZE_GRAPH_ARGUMENT_PRECISION_UINT16: - size *= sizeof(uint16_t); - break; - case ZE_GRAPH_ARGUMENT_PRECISION_INT8: - case ZE_GRAPH_ARGUMENT_PRECISION_UINT8: - size *= sizeof(uint8_t); - break; - case ZE_GRAPH_ARGUMENT_PRECISION_INT4: - case ZE_GRAPH_ARGUMENT_PRECISION_UINT4: - size /= 2; - break; - default: - ASSERT_TRUE(false) << "Invalid Graph Argument Precision"; - } + private: + zeScope::SharedPtr scopedQueue = nullptr; + zeScope::SharedPtr scopedList = nullptr; +}; - ASSERT_NE(size, 0u); - if (graphArgProps.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) { - inputSize.push_back(size); - } else { - outputSize.push_back(size); - } +class CompilerInDriverLong : public CompilerInDriverLongT, + public ::testing::WithParamInterface { + protected: + void SetUp() override { + CompilerInDriverLongT::SetUp(); - EXPECT_NE(graphArgProps.name, ""); - } - } + const YAML::Node node = GetParam(); - bool loadImageData(void *graphInput, size_t graphInputSize, const std::string &imagePath) { - std::vector bmp; + ASSERT_GT(node["path"].as().size(), 0); - if (!UmdTest::loadFile(imagePath, bmp)) { - PRINTF("Image file %s not found.\n", imagePath.c_str()); - return false; - } + graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + modelDir + node["path"].as(), + node); - auto *bmpHeader = reinterpret_cast(bmp.data()); + graph->allocateArguments(MemType::SHARED_MEMORY); + } - size_t imgSize = bmpHeader->size - bmpHeader->offset; - if (imgSize > graphInputSize) { - PRINTF("Image file %s too large.\n", imagePath.c_str()); - return false; - } + std::shared_ptr graph; +}; - memcpy(graphInput, (char *)bmpHeader + bmpHeader->offset, imgSize); - return true; - } +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverLong); - bool checkOutputDataset(void *graphOutput, - size_t graphOutputSize, - uint16_t imageClass, - bool verbose = false) { - std::vector output_data(graphOutputSize / sizeof(float)); - memcpy(output_data.data(), graphOutput, graphOutputSize); +INSTANTIATE_TEST_SUITE_P(, + CompilerInDriverLong, + ::testing::ValuesIn(Environment::getConfiguration("compiler_in_driver")), + [](const testing::TestParamInfo &p) { + return generateTestNameFromNode(p.param); + }); - auto it = std::max_element(output_data.begin(), output_data.end()); - size_t index = std::distance(output_data.begin(), it); +TEST_P(CompilerInDriverLong, CompileModelWithGraphInitAndExecute) { + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - if (verbose) - PRINTF("Class = %li, Accuracy = %f\n", index, *it); + ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - EXPECT_EQ(index, imageClass) << "Class index does not match!" << std::endl - << "Class: " << index << "!=" << imageClass; - return index == imageClass; - } + graph->setRandomInput(); - void allocListOfMemory(std::vector &listOfMemSize, - std::vector &listOfMem, - MemType memType) { - for (const auto &m : listOfMemSize) { - { - std::lock_guard lock(memMutex); - if (memType == DEVICE_MEMORY) { - mem.push_back(AllocDeviceMemory(m)); - } else if (memType == HOST_MEMORY) { - mem.push_back(AllocHostMemory(m)); - } else { - mem.push_back(AllocSharedMemory(m)); - } - listOfMem.push_back(mem.back().get()); - } - } - } + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); +} - inline void generateRandomData(std::vector &data, size_t size) { - std::random_device rd; - std::uniform_int_distribution dist; +class CompilerInDriverWithProfiling : public CompilerInDriverLongT, + public ::testing::WithParamInterface { + protected: + void SetUp() override { + CompilerInDriverLongT::SetUp(); - data.reserve(size); - for (size_t i = 0; i < size; i++) { - data.push_back(dist(rd)); + const YAML::Node node = GetParam(); + if (node["graph_profiling"].IsDefined() && node["graph_profiling"].as() == false) { + SKIP_("The profiling graph test has been disabled."); } + + ASSERT_GT(node["path"].as().size(), 0); + + graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + modelDir + node["path"].as(), + node, + ZE_GRAPH_FLAG_ENABLE_PROFILING); + + graph->allocateArguments(MemType::SHARED_MEMORY); } template @@ -330,196 +265,48 @@ class CompilerInDriverLongT : public CompilerInDriverBase { } } - inline void addGraphProfilingOption(std::vector &flags) { - while (!flags.empty() && flags.back() == '\0') - flags.pop_back(); - - for (char c : " --config PERF_COUNT=\"YES\"\0") - flags.push_back(c); - /* Reassign flags pointer, vector was extended data buffer could be changed */ - graphDesc.pBuildFlags = flags.data(); - } - - ze_command_queue_handle_t queue = nullptr; - ze_command_list_handle_t list = nullptr; - std::vector graphInput, graphOutput; - - ze_command_queue_desc_t cmdQueueDesc{.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, - .pNext = nullptr, - .ordinal = 0, - .index = 0, - .flags = 0, - .mode = ZE_COMMAND_QUEUE_MODE_DEFAULT, - .priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; - - ze_command_list_desc_t cmdListDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, - .pNext = nullptr, - .commandQueueGroupOrdinal = 0, - .flags = 0}; - - private: - zeScope::SharedPtr scopedQueue = nullptr; - zeScope::SharedPtr scopedList = nullptr; - std::vector> mem; - std::mutex memMutex; -}; - -class CompilerInDriverLong : public CompilerInDriverLongT, - public ::testing::WithParamInterface { - protected: - void SetUp() override { - CompilerInDriverLongT::SetUp(); - - const YAML::Node node = GetParam(); - - /* Validate configuration */ - ASSERT_GT(node["path"].as().size(), 0); - ASSERT_GT(node["flags"].as().size(), 0); - - /* Setup */ - ASSERT_TRUE( - getModelFromPath(modelDir + node["path"].as(), modelXml, modelBin)); - - buildFlags = getFlagsFromString(node["flags"].as()); - - createModelData(modelIR, modelXml, modelBin, buildFlags, graphDesc); - } + std::shared_ptr graph; }; -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverLong); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverWithProfiling); INSTANTIATE_TEST_SUITE_P(, - CompilerInDriverLong, + CompilerInDriverWithProfiling, ::testing::ValuesIn(Environment::getConfiguration("compiler_in_driver")), [](const testing::TestParamInfo &p) { return generateTestNameFromNode(p.param); }); -TEST_P(CompilerInDriverLong, QueryNetwork) { - ze_graph_query_network_handle_t hQuery = nullptr; - ASSERT_EQ(zeGraphDDITableExt->pfnQueryNetworkCreate(zeContext, zeDevice, &graphDesc, &hQuery), - ZE_RESULT_SUCCESS); - - size_t size = 0; - EXPECT_EQ(zeGraphDDITableExt->pfnQueryNetworkGetSupportedLayers(hQuery, &size, nullptr), - ZE_RESULT_SUCCESS); - EXPECT_GT(size, 0); - - std::vector layers(size, '\0'); - EXPECT_EQ(zeGraphDDITableExt->pfnQueryNetworkGetSupportedLayers(hQuery, &size, layers.data()), - ZE_RESULT_SUCCESS); - - EXPECT_GT(size, 0); - - ASSERT_EQ(zeGraphDDITableExt->pfnQueryNetworkDestroy(hQuery), ZE_RESULT_SUCCESS); -} - -TEST_P(CompilerInDriverLong, CompileModelWithGraphInitAndExecute) { - ze_result_t ret; - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - EXPECT_EQ(ret, ZE_RESULT_SUCCESS); - graphHandle = scopedGraphHandle.get(); - - std::vector graphInputSize; - std::vector graphOutputSize; - getGraphArgumentSize(graphHandle, graphInputSize, graphOutputSize); - ASSERT_NE(graphInputSize.size(), 0); - ASSERT_NE(graphOutputSize.size(), 0); - - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - - ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - - allocListOfMemory(graphInputSize, graphInput, SHARED_MEMORY); - allocListOfMemory(graphOutputSize, graphOutput, SHARED_MEMORY); - - for (size_t i = 0; i < graphInputSize.size(); ++i) { - std::vector inputData; - generateRandomData(inputData, graphInputSize[i]); - memcpy(graphInput[i], inputData.data(), graphInputSize[i]); - } - - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } +TEST_P(CompilerInDriverWithProfiling, CompileModelWithGraphProfilingFlag) { + ze_result_t ret = ZE_RESULT_SUCCESS; ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); -} - -TEST_P(CompilerInDriverLong, CompileModelWithGraphProfilingAndRunInference) { - addGraphProfilingOption(buildFlags); - ze_result_t ret; - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - graphHandle = scopedGraphHandle.get(); - - std::vector graphInputSize; - std::vector graphOutputSize; - getGraphArgumentSize(graphHandle, graphInputSize, graphOutputSize); - ASSERT_NE(graphInputSize.size(), 0); - ASSERT_NE(graphOutputSize.size(), 0); - - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - allocListOfMemory(graphInputSize, graphInput, SHARED_MEMORY); - allocListOfMemory(graphOutputSize, graphOutput, SHARED_MEMORY); - - for (size_t i = 0; i < graphInputSize.size(); ++i) { - std::vector inputData; - generateRandomData(inputData, graphInputSize[i]); - memcpy(graphInput[i], inputData.data(), graphInputSize[i]); - } - - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } + graph->setRandomInput(); uint32_t poolSize = 1; auto scopedProfilingPool = - zeScope::profilingPoolCreate(zeGraphProfilingDDITableExt, graphHandle, poolSize, ret); - + zeScope::profilingPoolCreate(zeGraphProfilingDDITableExt, graph->handle, poolSize, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ze_graph_profiling_pool_handle_t hProfilingPool = scopedProfilingPool.get(); uint32_t index = 0; auto scopedProfilingQuery = zeScope::profilingQueryCreate(zeGraphProfilingDDITableExt, hProfilingPool, index, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); ze_graph_profiling_query_handle_t hProfilingQuery = scopedProfilingQuery.get(); - ASSERT_EQ(zeGraphDDITableExt - ->pfnAppendGraphExecute(list, graphHandle, hProfilingQuery, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, hProfilingQuery, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); @@ -575,31 +362,31 @@ class CompilerInDriverLongBmp : public CompilerInDriverLongT, const YAML::Node node = GetParam(); - /* Validate configuration */ ASSERT_GT(node["path"].as().size(), 0); - ASSERT_GT(node["flags"].as().size(), 0); - ASSERT_EQ(node["output"].as>().size(), + ASSERT_EQ(node["class_index"].as>().size(), node["input"].as>().size()); - /* Setup */ - ASSERT_TRUE( - getModelFromPath(modelDir + node["path"].as(), modelXml, modelBin)); - - buildFlags = getFlagsFromString(node["flags"].as()); + graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + modelDir + node["path"].as(), + node); - createModelData(modelIR, modelXml, modelBin, buildFlags, graphDesc); + graph->allocateArguments(MemType::SHARED_MEMORY); - /*Set expected output*/ - imageClass = node["output"].as>(); + /* Set expected output */ + imageClassIndexes = node["class_index"].as>(); - /*Create list of images to load*/ + /* Create list of images to load */ for (auto &image : node["input"].as>()) { testImages.push_back(imageDir + image); } } - std::vector testImages; // paths to test images - std::vector imageClass; // expected result + std::vector testImages; // paths to test images + std::vector imageClassIndexes; // expected result + + std::shared_ptr graph; }; GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverLongBmp); @@ -611,53 +398,135 @@ INSTANTIATE_TEST_SUITE_P( [](const testing::TestParamInfo &p) { return generateTestNameFromNode(p.param); }); TEST_P(CompilerInDriverLongBmp, CompileModelWithGraphInitAndExecuteThenCheckAccuracy) { - ze_result_t ret; - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - EXPECT_EQ(ret, ZE_RESULT_SUCCESS); - graphHandle = scopedGraphHandle.get(); - - std::vector graphInputSize; - std::vector graphOutputSize; - getGraphArgumentSize(graphHandle, graphInputSize, graphOutputSize); - ASSERT_NE(graphInputSize.size(), 0); - ASSERT_NE(graphOutputSize.size(), 0); - - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - allocListOfMemory(graphInputSize, graphInput, SHARED_MEMORY); - allocListOfMemory(graphOutputSize, graphOutput, SHARED_MEMORY); + for (size_t i = 0; i < imageClassIndexes.size(); i++) { + graph->loadInputData(testImages[i]); + + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); + + graph->checkResults(imageClassIndexes[i]); + graph->clearOutput(); + + ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); + } +} + +class CompilerInDriverBmpWithPrimeBuffers : public CompilerInDriverLongBmp { + public: + void SetUp() override { + if (!primeHelper.hasDMABufferSupport()) + GTEST_SKIP() << "Missed support or insufficient permissions for" + << " dma buffer allocation in the system. Skip test"; + CompilerInDriverLongT::SetUp(); + + const YAML::Node node = GetParam(); + + ASSERT_GT(node["path"].as().size(), 0); + ASSERT_EQ(node["class_index"].as>().size(), + node["input"].as>().size()); + + graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + modelDir + node["path"].as(), + node); + + /* Create list of DMA memory buffers outside driver and use it as network inputs */ - for (uint32_t imageIndex = 0; imageIndex < imageClass.size(); imageIndex++) { - ASSERT_TRUE(loadImageData(graphInput.at(0), graphInputSize.at(0), testImages[imageIndex])); + ze_device_mem_alloc_desc_t pDeviceMemAllocDesc = { + .stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, + .pNext = &primeHelper.externalImportFromFdDesc, + .flags = 0, + .ordinal = 0}; + dmaBuffers.resize(graph->inputSize.size(), nullptr); uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); + uint32_t bufferIndex = 0; + for (auto &dmaBuffer : dmaBuffers) { + size_t size = graph->inputSize[bufferIndex++]; + ze_result_t ret; + int32_t dmaBufferFd = -1; + ASSERT_TRUE(primeHelper.createDMABuffer(size, dmaBufferFd)); + ASSERT_GE(dmaBufferFd, 0); + dmaBuffer = primeHelper.mmapDmaBuffer(dmaBufferFd); + /* Import buffer as device memory */ + primeHelper.externalImportFromFdDesc.fd = dmaBufferFd; + auto scopedImportedMemory = + zeScope::memAllocDevice(zeContext, pDeviceMemAllocDesc, size, 0, zeDevice, ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + ASSERT_NE(scopedImportedMemory.get(), nullptr); + /* Set as input imported buffer, alocated by device alloc from dma file descriptor + it should consit image loaded above + */ + graph->setArgumentValue(argIndex++, scopedImportedMemory.get()); + importedGraphInput.push_back(scopedImportedMemory); } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); + + graph->allocateOutputArguments(MemType::SHARED_MEMORY); + + imageClassIndexes = node["class_index"].as>(); + + for (auto &image : node["input"].as>()) { + testImages.push_back(imageDir + image); } + } + + void TearDown() override { + importedGraphInput.clear(); + dmaBuffers.clear(); + UmdTest::TearDown(); + } + std::vector dmaBuffers; + std::vector> importedGraphInput; + PrimeBufferHelper primeHelper; +}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverBmpWithPrimeBuffers); + +INSTANTIATE_TEST_SUITE_P( + , + CompilerInDriverBmpWithPrimeBuffers, + ::testing::ValuesIn(Environment::getConfiguration("image_classification_imagenet")), + [](const testing::TestParamInfo &p) { return generateTestNameFromNode(p.param); }); + +TEST_P(CompilerInDriverBmpWithPrimeBuffers, CompileInitExecuteUsingPrimeBufferInput) { + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); + + for (uint32_t i = 0; i < imageClassIndexes.size(); i++) { + /* Load image to dma buffer created by mmap dma file descriptor */ + Image image(testImages[i]); + ASSERT_EQ(graph->inputSize.at(0), image.getSizeInBytes()); + memcpy(dmaBuffers[0], image.getPtr(), image.getSizeInBytes()); ASSERT_EQ(zeGraphDDITableExt - ->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - ASSERT_TRUE(checkOutputDataset(graphOutput.at(0), - graphOutputSize.at(0), - imageClass[imageIndex], - true)); - memset(graphOutput.at(0), 0, graphOutputSize.at(0)); + graph->checkResults(imageClassIndexes[i]); + graph->clearOutput(); + ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); } } @@ -670,30 +539,18 @@ class CompilerInDriverThreaded : public CompilerInDriverLongT, const YAML::Node node = GetParam(); - /* Validate configuration */ ASSERT_GT(node["path"].as().size(), 0); - ASSERT_GT(node["flags"].as().size(), 0); - ASSERT_EQ(node["output"].as>().size(), + ASSERT_EQ(node["class_index"].as>().size(), node["input"].as>().size()); - /* Setup */ threads = node["input"].as>().size(); if (node["iterations"].IsDefined()) iterations = node["iterations"].as(); else iterations = 1; - ASSERT_TRUE( - getModelFromPath(modelDir + node["path"].as(), modelXml, modelBin)); - - buildFlags = getFlagsFromString(node["flags"].as()); - - createModelData(modelIR, modelXml, modelBin, buildFlags, graphDesc); - - /*Set expected output*/ - imageClass = node["output"].as>(); + imageClassIndexes = node["class_index"].as>(); - /*Create list of images to load*/ for (auto &image : node["input"].as>()) { testImages.push_back(imageDir + image); } @@ -701,8 +558,8 @@ class CompilerInDriverThreaded : public CompilerInDriverLongT, uint32_t threads; uint32_t iterations; - std::vector testImages; // paths to test images - std::vector imageClass; // expected result + std::vector testImages; + std::vector imageClassIndexes; }; GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverThreaded); @@ -714,75 +571,52 @@ INSTANTIATE_TEST_SUITE_P( [](const testing::TestParamInfo &p) { return generateTestNameFromNode(p.param); }); TEST_P(CompilerInDriverThreaded, ImageClassificationUsingImagenet) { - auto runInference = [&](uint16_t imageClass, std::string testImagePath) -> void { - ze_result_t ret; + auto runInference = [&](uint16_t imageClassIndex, std::string testImagePath) -> void { + const YAML::Node node = GetParam(); + + ze_result_t ret = ZE_RESULT_SUCCESS; auto scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + auto queue = scopedQueue.get(); auto scopedList = zeScope::commandListCreate(zeContext, zeDevice, cmdListDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); auto list = scopedList.get(); - zeScope::SharedPtr scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - std::vector graphInputSize, graphOutputSize; - getGraphArgumentSize(scopedGraphHandle.get(), graphInputSize, graphOutputSize); - ASSERT_NE(graphInputSize.size(), 0); - ASSERT_NE(graphOutputSize.size(), 0); + std::shared_ptr graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + modelDir + node["path"].as(), + node); - std::vector in, out; - allocListOfMemory(graphInputSize, in, SHARED_MEMORY); - allocListOfMemory(graphOutputSize, out, SHARED_MEMORY); + graph->allocateArguments(MemType::SHARED_MEMORY); - ASSERT_TRUE(loadImageData(in.at(0), graphInputSize.at(0), testImagePath)); + graph->loadInputData(testImagePath); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, - scopedGraphHandle.get(), - nullptr, - 0, - nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueExecuteCommandLists(scopedQueue.get(), 1, &list, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueSynchronize(scopedQueue.get(), graphSyncTimeout), - ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - uint32_t argIndex = 0; - for (const auto &ptr : in) { - ASSERT_EQ( - zeGraphDDITableExt->pfnSetArgumentValue(scopedGraphHandle.get(), argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : out) { - ASSERT_EQ( - zeGraphDDITableExt->pfnSetArgumentValue(scopedGraphHandle.get(), argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphExecute(list, - scopedGraphHandle.get(), - nullptr, - nullptr, - 0, - nullptr), + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); for (uint32_t iteration = 0; iteration < iterations; iteration++) { - memset(out.at(0), 0, graphOutputSize.at(0)); + memset(graph->outArgs.at(0), 0, graph->outputSize.at(0)); - ASSERT_EQ(zeCommandQueueExecuteCommandLists(scopedQueue.get(), 1, &list, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueSynchronize(scopedQueue.get(), graphSyncTimeout), + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - ASSERT_TRUE(checkOutputDataset(out.at(0), graphOutputSize.at(0), imageClass)) - << "Failed on iteration: " << iteration; + graph->checkResults(imageClassIndex); + graph->clearOutput(); } ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); }; // end of runInference @@ -790,38 +624,39 @@ TEST_P(CompilerInDriverThreaded, ImageClassificationUsingImagenet) { std::vector> results; for (uint32_t i = 0; i < threads; i++) results.push_back( - std::async(std::launch::async, runInference, imageClass[i], testImages[i])); + std::async(std::launch::async, runInference, imageClassIndexes[i], testImages[i])); for (auto &r : results) { r.wait(); } } -class CompilerInDriverMultiinference : public CompilerInDriverThreaded { +class CompilerInDriverMultiInference : public CompilerInDriverThreaded { public: struct localInference { std::string modelName; - ze_graph_desc_t graphDesc = {}; - std::shared_ptr<_ze_graph_handle_t> scopedGraphHandle; - std::vector modelIR = {}; + std::shared_ptr graph; + uint32_t time; uint32_t targetFps; - std::vector testImages; // paths to test images - std::vector imageClass; // expected result + std::vector testImages; // paths to test images + std::vector imageClassIndexes; // expected result + ze_command_queue_priority_t priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + size_t delayInUs = 0; }; + std::vector testInferences = {}; void SetUp() override { CompilerInDriverLongT::SetUp(); - const YAML::Node modelsSet = GetParam(); + const YAML::Node modelsSet = GetParam()["pipeline"]; if (modelsSet.size() == 0) SKIP_("Missing models for testing"); for (auto &model : modelsSet) { ASSERT_GT(model["path"].as().size(), 0); - ASSERT_GT(model["flags"].as().size(), 0); localInference inference; @@ -834,99 +669,101 @@ class CompilerInDriverMultiinference : public CompilerInDriverThreaded { inference.time = model["exec_time_in_secs"].as(); else inference.time = 3; - - ASSERT_TRUE( - getModelFromPath(modelDir + model["path"].as(), modelXml, modelBin)); - buildFlags = getFlagsFromString(model["flags"].as()); - - createModelData(inference.modelIR, modelXml, modelBin, buildFlags, inference.graphDesc); - if (model["input"].IsDefined() && model["input"].as>().size()) { for (auto &image : model["input"].as>()) inference.testImages.push_back(imageDir + image); } - if (model["output"].IsDefined() && model["output"].as>().size()) - inference.imageClass = model["output"].as>(); + if (model["class_index"].IsDefined() && + model["class_index"].as>().size()) + inference.imageClassIndexes = model["class_index"].as>(); + + if (model["priority"].IsDefined() && model["priority"].as().size()) + inference.priority = toZePriority(model["priority"].as()); + + if (model["delay_in_us"].IsDefined()) + inference.delayInUs = model["delay_in_us"].as(); + + std::shared_ptr graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + modelDir + model["path"].as(), + model); + + inference.graph = std::move(graph); - ze_result_t ret; - inference.scopedGraphHandle = zeScope::graphCreate(zeGraphDDITableExt, - zeContext, - zeDevice, - inference.graphDesc, - ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); inference.modelName = model["path"].as(); testInferences.push_back(inference); } } + + ze_command_queue_priority_t toZePriority(const std::string &p) { + if (p == "high") { + return ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH; + } else if (p == "low") { + return ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; + } else if (p == "normal") { + return ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + } + throw std::runtime_error("Invalid priority, should be: high, low or normal"); + } }; -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverMultiinference); +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(CompilerInDriverMultiInference); INSTANTIATE_TEST_SUITE_P(, - CompilerInDriverMultiinference, - ::testing::Values(Environment::getConfiguration("multi_inference")), + CompilerInDriverMultiInference, + ::testing::ValuesIn(Environment::getConfiguration("multi_inference")), [](const testing::TestParamInfo &p) { - return std::to_string(p.param.size()) + std::string("_Inferences"); + return p.param["name"].as(); }); -#define BREAK_ON_FAIL(operResult, retStats) \ - { \ - if (operResult != ZE_RESULT_SUCCESS) { \ - retStats.status = operResult; \ - return retStats; \ - } \ +#define BREAK_ON_FAIL(ret, stats) \ + if (ret != ZE_RESULT_SUCCESS) { \ + EXPECT_EQ(ret, ZE_RESULT_SUCCESS); \ + stats.status = ret; \ + return stats; \ } -TEST_P(CompilerInDriverMultiinference, ImageClassification) { +TEST_P(CompilerInDriverMultiInference, Pipeline) { struct inferenceStats { - std::string model; ze_result_t status; - uint32_t totalFrames = 0; - uint32_t droppedFrames = 0; + int totalFrames = 0; + int droppedFrames = 0; double realFPS = 0; double maxExecTimePerFrame; double minExecTimePerFrame; }; auto runInference = - [&](const CompilerInDriverMultiinference::localInference &inference) -> inferenceStats { - ze_result_t ret; + [&](const CompilerInDriverMultiInference::localInference &inference) -> inferenceStats { + ze_result_t ret = ZE_RESULT_SUCCESS; inferenceStats stats = {}; stats.status = ZE_RESULT_SUCCESS; stats.minExecTimePerFrame = DBL_MAX; - stats.model = inference.modelName; - auto scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); + auto cmdQueueDescInference = cmdQueueDesc; + cmdQueueDescInference.priority = inference.priority; + auto scopedQueue = + zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDescInference, ret); BREAK_ON_FAIL(ret, stats); + auto queue = scopedQueue.get(); auto scopedList = zeScope::commandListCreate(zeContext, zeDevice, cmdListDesc, ret); BREAK_ON_FAIL(ret, stats); auto list = scopedList.get(); - BREAK_ON_FAIL(ret, stats); - std::vector inSize, outSize; - getGraphArgumentSize(inference.scopedGraphHandle.get(), inSize, outSize); - EXPECT_NE(inSize.size(), 0); - EXPECT_NE(outSize.size(), 0); - - std::vector in, out; - allocListOfMemory(inSize, in, SHARED_MEMORY); - allocListOfMemory(outSize, out, SHARED_MEMORY); + inference.graph->allocateArguments(MemType::SHARED_MEMORY); if (inference.testImages.size()) { - EXPECT_TRUE(loadImageData(in.at(0), inSize.at(0), inference.testImages[0])); + inference.graph->loadInputData(inference.testImages[0]); } else { - for (size_t i = 0; i < inSize.size(); ++i) { - std::vector inputData; - generateRandomData(inputData, inSize[i]); - memcpy(in[i], inputData.data(), inSize[i]); - } + inference.graph->setRandomInput(); } + ret = zeGraphDDITableExt->pfnAppendGraphInitialize(list, - inference.scopedGraphHandle.get(), + inference.graph->handle, nullptr, 0, nullptr); @@ -934,31 +771,18 @@ TEST_P(CompilerInDriverMultiinference, ImageClassification) { ret = zeCommandListClose(list); BREAK_ON_FAIL(ret, stats); - ret = zeCommandQueueExecuteCommandLists(scopedQueue.get(), 1, &list, nullptr); + + ret = zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr); BREAK_ON_FAIL(ret, stats); - ret = zeCommandQueueSynchronize(scopedQueue.get(), graphSyncTimeout); + ret = zeCommandQueueSynchronize(queue, graphSyncTimeout); BREAK_ON_FAIL(ret, stats); ret = zeCommandListReset(list); BREAK_ON_FAIL(ret, stats); - uint32_t argIndex = 0; - for (const auto &ptr : in) { - ret = zeGraphDDITableExt->pfnSetArgumentValue(inference.scopedGraphHandle.get(), - argIndex++, - ptr); - BREAK_ON_FAIL(ret, stats); - } - for (const auto &ptr : out) { - ret = zeGraphDDITableExt->pfnSetArgumentValue(inference.scopedGraphHandle.get(), - argIndex++, - ptr); - BREAK_ON_FAIL(ret, stats); - } - ret = zeGraphDDITableExt->pfnAppendGraphExecute(list, - inference.scopedGraphHandle.get(), + inference.graph->handle, nullptr, nullptr, 0, @@ -967,6 +791,9 @@ TEST_P(CompilerInDriverMultiinference, ImageClassification) { ret = zeCommandListClose(list); BREAK_ON_FAIL(ret, stats); + + std::this_thread::sleep_for(std::chrono::microseconds(inference.delayInUs)); + auto endInferenceTime = std::chrono::steady_clock::now() + std::chrono::seconds(inference.time); double summaryInferenceTimeMs = 0; @@ -975,16 +802,16 @@ TEST_P(CompilerInDriverMultiinference, ImageClassification) { while (std::chrono::steady_clock::now() < endInferenceTime) { auto frameBeginIncludingWait = std::chrono::steady_clock::now(); - if (std::chrono::steady_clock::now() > nextFrameStartPoint) - stats.droppedFrames++; - while (std::chrono::steady_clock::now() < nextFrameStartPoint) std::this_thread::yield(); auto frameBegin = std::chrono::steady_clock::now(); - ret = zeCommandQueueExecuteCommandLists(scopedQueue.get(), 1, &list, nullptr); + ret = zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr); BREAK_ON_FAIL(ret, stats); - ret = zeCommandQueueSynchronize(scopedQueue.get(), graphSyncTimeout); + + ret = zeCommandQueueSynchronize( + queue, + std::chrono::nanoseconds(std::chrono::seconds(inference.time)).count()); BREAK_ON_FAIL(ret, stats); nextFrameStartPoint = frameBegin + frameTargetTimeUs; @@ -993,7 +820,7 @@ TEST_P(CompilerInDriverMultiinference, ImageClassification) { std::chrono::steady_clock::now() - frameBeginIncludingWait; summaryInferenceTimeMs += durationMs.count(); - /* calculate min max frame time*/ + /* Calculate min max frame time */ durationMs = std::chrono::steady_clock::now() - frameBegin; stats.maxExecTimePerFrame = std::max(stats.maxExecTimePerFrame, durationMs.count()); @@ -1001,34 +828,35 @@ TEST_P(CompilerInDriverMultiinference, ImageClassification) { stats.totalFrames++; - if (inference.imageClass.size()) { - EXPECT_TRUE(checkOutputDataset(out.at(0), outSize.at(0), inference.imageClass[0])) - << "Failed "; - memset(out.at(0), 0, outSize.at(0)); + if (inference.imageClassIndexes.size()) { + inference.graph->checkResults(inference.imageClassIndexes[0]); + inference.graph->clearOutput(); } } stats.realFPS = 1000 * stats.totalFrames / summaryInferenceTimeMs; + int targetFrames = inference.targetFps * inference.time; + stats.droppedFrames = std::max(targetFrames - stats.totalFrames, 0); return stats; }; // end of runInference std::vector> results; - for (uint32_t i = 0; i < testInferences.size(); i++) + for (size_t i = 0; i < testInferences.size(); i++) results.push_back(std::async(std::launch::async, runInference, testInferences[i])); - for (auto &r : results) { - r.wait(); - inferenceStats s = r.get(); + for (size_t i = 0; i < results.size(); i++) { + inferenceStats stats = results[i].get(); + PRINTF("----------------------------------------------------\n"); - PRINTF("Model: %s \n", s.model.c_str()); - if (s.status == ZE_RESULT_SUCCESS) + PRINTF("Model: %s \n", testInferences[i].modelName.c_str()); + if (stats.status == ZE_RESULT_SUCCESS) PRINTF("Status: SUCCESS \n"); else - PRINTF("Status: FAIL %d \n", s.status); - PRINTF("FramesExecuted: %d \n", s.totalFrames); - PRINTF("FramesDropped: %d \n", s.droppedFrames); - PRINTF("CalculatedFPS: %f \n", s.realFPS); - PRINTF("MaxFrameExecTime[ms]: %f \n", s.maxExecTimePerFrame); - PRINTF("MinFrameExecTime[ms]: %f \n", s.minExecTimePerFrame); + PRINTF("Status: FAIL (%#x) \n", stats.status); + PRINTF("FramesExecuted: %d \n", stats.totalFrames); + PRINTF("FramesDropped: %d \n", stats.droppedFrames); + PRINTF("CalculatedFPS: %f \n", stats.realFPS); + PRINTF("MaxFrameExecTime[ms]: %f \n", stats.maxExecTimePerFrame); + PRINTF("MinFrameExecTime[ms]: %f \n", stats.minExecTimePerFrame); } } diff --git a/validation/umd-test/test_graph_commands.cpp b/validation/umd-test/test_graph_commands.cpp index abd4985..9b6d95a 100644 --- a/validation/umd-test/test_graph_commands.cpp +++ b/validation/umd-test/test_graph_commands.cpp @@ -5,8 +5,7 @@ * */ -#include "blob_params.hpp" -#include "umd_test.h" +#include "graph_utilities.hpp" #include #include @@ -18,11 +17,6 @@ class CommandGraphBase : public UmdTest { void SetUp() override { UmdTest::SetUp(); - /*Get base configuration from config file*/ - YAML::Node &configuration = Environment::getConfiguration(); - if (configuration["blob_dir"].IsDefined()) - blobDir = configuration["blob_dir"].as(); - scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); queue = scopedQueue.get(); @@ -36,49 +30,6 @@ class CommandGraphBase : public UmdTest { void threadedCommandQueueSyncWrapper(std::promise<_ze_result_t> &&promise); - ze_graph_handle_t createGraph(std::vector &vpuBlob, std::vector &vpuBin) { - ze_activation_kernel_desc_t actKernelDesc = {}; - if (!vpuBin.empty()) { - actKernelDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_ACTIVATION_KERNEL, - .pNext = nullptr, - .kernelDataSize = vpuBin.size(), - .pKernelData = reinterpret_cast(vpuBin.data())}; - } - - const ze_graph_desc_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = !vpuBin.empty() ? &actKernelDesc : nullptr, - .format = ZE_GRAPH_FORMAT_NATIVE, - .inputSize = vpuBlob.size(), - .pInput = reinterpret_cast(vpuBlob.data()), - .pBuildFlags = nullptr}; - - scopedGraphHandle.push_back( - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret)); - EXPECT_EQ(ret, ZE_RESULT_SUCCESS); - return scopedGraphHandle.back().get(); - } - - enum MemType : uint8_t { - DEVICE_MEMORY, - HOST_MEMORY, - SHARED_MEMORY, - }; - - void allocListOfMemory(const std::vector> &data, - std::vector &listOfMem, - MemType memType) { - for (const auto &d : data) { - if (memType == DEVICE_MEMORY) { - mem.push_back(AllocDeviceMemory(d.size())); - } else if (memType == HOST_MEMORY) { - mem.push_back(AllocHostMemory(d.size())); - } else { - mem.push_back(AllocSharedMemory(d.size())); - } - listOfMem.push_back(mem.back().get()); - } - } - ze_command_queue_desc_t cmdQueueDesc{.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, .pNext = nullptr, .ordinal = 0, @@ -94,21 +45,14 @@ class CommandGraphBase : public UmdTest { ze_command_queue_handle_t queue = nullptr; ze_command_list_handle_t list = nullptr; - ze_graph_handle_t graphHandle = nullptr; - ze_result_t ret = ZE_RESULT_SUCCESS; - - std::vector> inputBin, outputBin; - std::vector vpuBlob, vpuBin; - std::vector graphInput, graphOutput; - std::vector> mem; + ze_result_t ret = ZE_RESULT_SUCCESS; - std::string blobDir = ""; + std::shared_ptr graph; private: zeScope::SharedPtr scopedQueue = nullptr; zeScope::SharedPtr scopedList = nullptr; - std::vector> scopedGraphHandle; }; // Functor for threaded use case of zeCommandQueueSync with a promise as parameter @@ -136,19 +80,14 @@ class CommandGraph : public CommandGraphBase { /* CommandGraph test will be run on first blob taken from configuration */ const YAML::Node node = Environment::getConfiguration("graph_execution")[0]; - /*Validate configuration*/ + /* Validate configuration */ ASSERT_GT(node["path"].as().size(), 0); - ASSERT_GT(node["in"].as>().size(), 0); - ASSERT_GT(node["out"].as>().size(), 0); - - ASSERT_TRUE(getBlobFromPath(blobDir + node["path"].as(), - node["in"].as>(), - node["out"].as>(), - vpuBlob, - inputBin, - outputBin, - vpuBin)); - graphHandle = createGraph(vpuBlob, vpuBin); + + graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + blobDir + node["path"].as(), + node); } }; @@ -158,52 +97,50 @@ TEST_F(CommandGraph, AppendGraphInitWithNullGraphHandle) { } TEST_F(CommandGraph, AppendGraphInitExecuteWithoutSettingArgumentForInputOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), - ZE_RESULT_ERROR_UNINITIALIZED); + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_ERROR_UNINITIALIZED); } TEST_F(CommandGraph, SettingNullArgumentForInputOutput) { ze_graph_properties_t graphProps = {}; graphProps.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; - ASSERT_EQ(zeGraphDDITableExt->pfnGetProperties(graphHandle, &graphProps), ZE_RESULT_SUCCESS) + ASSERT_EQ(zeGraphDDITableExt->pfnGetProperties(graph->handle, &graphProps), ZE_RESULT_SUCCESS) << "Failed to get Graph properties"; for (size_t index = 0; index < graphProps.numGraphArgs; index++) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, index, nullptr), - ZE_RESULT_ERROR_INVALID_NULL_POINTER); + ASSERT_EQ(graph->setArgumentValue(index, nullptr), ZE_RESULT_ERROR_INVALID_NULL_POINTER); } } TEST_F(CommandGraph, SetArgumentIndexGreaterThanExpectedArgumentIndexLimit) { ze_graph_properties_t graphProps = {}; graphProps.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; - ASSERT_EQ(zeGraphDDITableExt->pfnGetProperties(graphHandle, &graphProps), ZE_RESULT_SUCCESS) + ASSERT_EQ(zeGraphDDITableExt->pfnGetProperties(graph->handle, &graphProps), ZE_RESULT_SUCCESS) << "Failed to get Graph properties"; - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, - graphProps.numGraphArgs, - inputBin.at(0).data()), + ASSERT_EQ(graph->setArgumentValue(graphProps.numGraphArgs, graph->inputBin.at(0).data()), ZE_RESULT_ERROR_INVALID_ARGUMENT); } TEST_F(CommandGraph, GetArgumentPropertiesReturnsExpectedProperties) { - ASSERT_EQ(zeGraphDDITableExt->pfnGetArgumentProperties(graphHandle, 0, nullptr), + ASSERT_EQ(zeGraphDDITableExt->pfnGetArgumentProperties(graph->handle, 0, nullptr), ZE_RESULT_ERROR_INVALID_NULL_POINTER); ze_graph_properties_t graphProps = {}; graphProps.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; - ASSERT_EQ(zeGraphDDITableExt->pfnGetProperties(graphHandle, &graphProps), ZE_RESULT_SUCCESS) + ASSERT_EQ(zeGraphDDITableExt->pfnGetProperties(graph->handle, &graphProps), ZE_RESULT_SUCCESS) << "Failed to get Graph properties"; ze_graph_argument_properties_t pGraphArgumentProperties = {}; graphProps.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; for (uint32_t index = 0; index < graphProps.numGraphArgs; index++) { - ASSERT_EQ(zeGraphDDITableExt->pfnGetArgumentProperties(graphHandle, + ASSERT_EQ(zeGraphDDITableExt->pfnGetArgumentProperties(graph->handle, index, &pGraphArgumentProperties), ZE_RESULT_SUCCESS); @@ -226,23 +163,22 @@ TEST_F(CommandGraph, GetArgumentPropertiesReturnsExpectedProperties) { ASSERT_NE(pGraphArgumentProperties.devicePrecision, ZE_GRAPH_ARGUMENT_PRECISION_BIN); } - ASSERT_EQ(zeGraphDDITableExt->pfnGetArgumentProperties(graphHandle, + ASSERT_EQ(zeGraphDDITableExt->pfnGetArgumentProperties(graph->handle, graphProps.numGraphArgs, &pGraphArgumentProperties), ZE_RESULT_ERROR_INVALID_ARGUMENT); } TEST_F(CommandGraph, AppendGraphInitExecuteWithoutAllocatingInputOutputMemory) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, 0, inputBin.at(0).data()), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, 1, nullptr), - ZE_RESULT_ERROR_INVALID_NULL_POINTER); + ASSERT_EQ(graph->setArgumentValue(0, graph->inputBin.at(0).data()), ZE_RESULT_SUCCESS); + ASSERT_EQ(graph->setArgumentValue(1, nullptr), ZE_RESULT_ERROR_INVALID_NULL_POINTER); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), - ZE_RESULT_ERROR_UNINITIALIZED); + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_ERROR_UNINITIALIZED); } class CommandGraphLong : public CommandGraphBase, public ::testing::WithParamInterface { @@ -253,21 +189,14 @@ class CommandGraphLong : public CommandGraphBase, public ::testing::WithParamInt /* Validate configuration */ ASSERT_GT(node["path"].as().size(), 0); - ASSERT_GT(node["in"].as>().size(), 0); - ASSERT_GT(node["out"].as>().size(), 0); - - ASSERT_TRUE(getBlobFromPath(blobDir + node["path"].as(), - node["in"].as>(), - node["out"].as>(), - vpuBlob, - inputBin, - outputBin, - vpuBin)); - graphHandle = createGraph(vpuBlob, vpuBin); + graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + blobDir + node["path"].as(), + node); - allocListOfMemory(inputBin, graphInput, SHARED_MEMORY); - allocListOfMemory(outputBin, graphOutput, SHARED_MEMORY); + graph->allocateArguments(MemType::SHARED_MEMORY); } }; @@ -281,9 +210,7 @@ INSTANTIATE_TEST_SUITE_P(, }); TEST_P(CommandGraphLong, AppendGraphInitExecuteAndSynchronize) { - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } + graph->copyInputData(); std::chrono::steady_clock::time_point graphInitializeStart, graphInitializeStop, executeInferenceStart, executeInferenceStop; @@ -292,8 +219,9 @@ TEST_P(CommandGraphLong, AppendGraphInitExecuteAndSynchronize) { graphInitializeStart = std::chrono::steady_clock::now(); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); @@ -308,21 +236,11 @@ TEST_P(CommandGraphLong, AppendGraphInitExecuteAndSynchronize) { ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - executeInferenceStart = std::chrono::steady_clock::now(); - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); @@ -338,17 +256,15 @@ TEST_P(CommandGraphLong, AppendGraphInitExecuteAndSynchronize) { printf("\nFirst inference latency: %f [s]\n\n", (durationGraphInitialize + durationExecuteInference).count()); - for (size_t i = 0; i < graphOutput.size(); i++) - EXPECT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); + graph->checkResults(); } TEST_P(CommandGraphLong, AppendGraphInitExecuteAndThreadedSynchronize) { - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } + graph->copyInputData(); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); @@ -356,19 +272,9 @@ TEST_P(CommandGraphLong, AppendGraphInitExecuteAndThreadedSynchronize) { ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); @@ -404,13 +310,13 @@ TEST_P(CommandGraphLong, AppendGraphInitExecuteAndThreadedSynchronize) { ASSERT_EQ(result, ZE_RESULT_SUCCESS) << "TIMEOUT from threadedCommandQueueSyncWrapper"; ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - for (size_t i = 0; i < graphOutput.size(); i++) - EXPECT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); + graph->checkResults(); } TEST_P(CommandGraphLong, RunGraphInitOnly) { - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); @@ -418,128 +324,99 @@ TEST_P(CommandGraphLong, RunGraphInitOnly) { } TEST_P(CommandGraphLong, AppendGraphInitTwiceAndExecute) { - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } - - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } + graph->copyInputData(); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - for (size_t i = 0; i < graphOutput.size(); i++) - EXPECT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); + graph->checkResults(); } -TEST_P(CommandGraphLong, MultiGraphExecutionTest) { - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } +TEST_P(CommandGraphLong, RunGraphExecuteThreeTimes) { + graph->copyInputData(); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (uint32_t i = 0; i < 3; i++) { ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeGraphDDITableExt - ->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - for (size_t j = 0; j < graphOutput.size(); j++) { - EXPECT_EQ(memcmp(graphOutput[j], outputBin[j].data(), outputBin[j].size()), 0); - memset(graphOutput[j], 0, outputBin[j].size()); - } + graph->checkResults(); + graph->clearOutput(); } } TEST_P(CommandGraphLong, SingleListGraphExecutionWithBarrierTest) { - // Memory Allocation and Argument Setting - std::vector inputMemHost, outputMemHost, inputMemDev, outputMemDev; - allocListOfMemory(inputBin, inputMemHost, HOST_MEMORY); - allocListOfMemory(outputBin, outputMemHost, HOST_MEMORY); - allocListOfMemory(inputBin, inputMemDev, DEVICE_MEMORY); - allocListOfMemory(outputBin, outputMemDev, DEVICE_MEMORY); - - for (size_t i = 0; i < inputBin.size(); i++) { - memcpy(inputMemHost[i], inputBin[i].data(), inputBin[i].size()); - } + std::vector> mem; + std::vector inputMemHost, outputMemHost; - uint32_t argIndex = 0; - for (const auto &ptr : inputMemDev) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); + for (auto &input : graph->inputBin) { + auto memInput = AllocHostMemory(input.size()); + + mem.push_back(memInput); + inputMemHost.push_back(memInput.get()); + memcpy(memInput.get(), input.data(), input.size()); } - for (const auto &ptr : outputMemDev) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); + + for (auto &output : graph->outputBin) { + auto memOutput = AllocHostMemory(output.size()); + + mem.push_back(memOutput); + outputMemHost.push_back(memOutput.get()); } - // Building command list - for (size_t i = 0; i < inputBin.size(); i++) { + for (size_t i = 0; i < graph->inputBin.size(); i++) { ASSERT_EQ(zeCommandListAppendMemoryCopy(list, - inputMemDev[i], + graph->inArgs[i], inputMemHost[i], - inputBin[i].size(), + graph->inputBin[i].size(), nullptr, 0, nullptr), ZE_RESULT_SUCCESS); } - // Appending barrier to commandlist to ensure copy completion ASSERT_EQ(zeCommandListAppendBarrier(list, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); - // Appending barrier to commandlist to ensure execution completion + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandListAppendBarrier(list, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); - for (size_t i = 0; i < outputBin.size(); i++) { + for (size_t i = 0; i < graph->outputBin.size(); i++) { ASSERT_EQ(zeCommandListAppendMemoryCopy(list, outputMemHost[i], - outputMemDev[i], - outputBin[i].size(), + graph->outArgs[i], + graph->outputBin[i].size(), nullptr, 0, nullptr), @@ -548,57 +425,40 @@ TEST_P(CommandGraphLong, SingleListGraphExecutionWithBarrierTest) { ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); - // Execution and sync ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - // Readback - for (size_t i = 0; i < outputBin.size(); i++) - ASSERT_EQ(memcmp(outputMemHost[i], outputBin[i].data(), outputBin[i].size()), 0); + for (size_t i = 0; i < graph->outArgs.size(); i++) + ASSERT_EQ(memcmp(outputMemHost[i], graph->outputBin[i].data(), graph->outputBin[i].size()), + 0); } TEST_P(CommandGraphLong, LoadGraphOnceAndRunExecutionTwice) { - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } + graph->copyInputData(); - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - for (size_t i = 0; i < outputBin.size(); i++) - ASSERT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); + graph->checkResults(); } TEST_P(CommandGraphLong, RunGraphExecuteInTwoSeparateCommandLists) { - std::vector graphOutput2; - allocListOfMemory(outputBin, graphOutput2, SHARED_MEMORY); - - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } + graph->copyInputData(); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); @@ -606,47 +466,28 @@ TEST_P(CommandGraphLong, RunGraphExecuteInTwoSeparateCommandLists) { ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); - ze_result_t ret; + ze_result_t ret = ZE_RESULT_SUCCESS; auto scopedList1 = zeScope::commandListCreate(zeContext, zeDevice, cmdListDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); auto list1 = scopedList1.get(); - argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput2) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } + const std::vector &graphOutput1 = graph->outArgs; - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list1, graphHandle, nullptr, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + graph->allocateArguments(MemType::SHARED_MEMORY); + + graph->copyInputData(); + + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list1, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list1), ZE_RESULT_SUCCESS); - // Create fences - ze_fence_desc_t fenceDesc = { - ZE_STRUCTURE_TYPE_FENCE_DESC, - nullptr, - 0 // flags - }; + ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC, nullptr, 0}; auto scopedFence0 = zeScope::fenceCreate(queue, fenceDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); @@ -661,96 +502,82 @@ TEST_P(CommandGraphLong, RunGraphExecuteInTwoSeparateCommandLists) { ASSERT_EQ(zeFenceHostSynchronize(fence0, graphSyncTimeout), ZE_RESULT_SUCCESS); ASSERT_EQ(zeFenceHostSynchronize(fence1, graphSyncTimeout), ZE_RESULT_SUCCESS); - for (size_t i = 0; i < outputBin.size(); i++) { - EXPECT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); - EXPECT_EQ(memcmp(graphOutput2[i], outputBin[i].data(), outputBin[i].size()), 0); + for (size_t i = 0; i < graph->outArgs.size(); i++) { + EXPECT_EQ(memcmp(graphOutput1[i], graph->outputBin[i].data(), graph->outputBin[i].size()), + 0); + EXPECT_EQ(memcmp(graph->outArgs[i], graph->outputBin[i].data(), graph->outputBin[i].size()), + 0); } } TEST_P(CommandGraphLong, AppendGraphInitAndExecuteWithSingleMemoryAllocation) { auto offset = 0x100; size_t totalArgSize = 0; - for (const auto &arg : inputBin) { + for (const auto &arg : graph->inputBin) { totalArgSize += arg.size(); } - for (const auto &arg : outputBin) { + for (const auto &arg : graph->outputBin) { totalArgSize += arg.size(); } - mem.push_back(AllocHostMemory(offset + totalArgSize)); - uint8_t *address = static_cast(mem.back().get()) + offset; + auto mem = AllocHostMemory(offset + totalArgSize); + uint8_t *address = static_cast(mem.get()) + offset; - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(address, inputBin[i].data(), inputBin[i].size()); - address += inputBin[i].size(); + for (size_t i = 0; i < graph->inArgs.size(); i++) { + memcpy(address, graph->inputBin[i].data(), graph->inputBin[i].size()); + address += graph->inputBin[i].size(); } uint32_t argIndex = 0; - address = static_cast(mem.back().get()) + offset; - for (const auto &arg : inputBin) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, address), - ZE_RESULT_SUCCESS); + address = static_cast(mem.get()) + offset; + for (const auto &arg : graph->inputBin) { + ASSERT_EQ(graph->setArgumentValue(argIndex++, address), ZE_RESULT_SUCCESS); address += arg.size(); } - graphOutput.clear(); - for (const auto &arg : outputBin) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, address), - ZE_RESULT_SUCCESS); - graphOutput.push_back(address); + graph->outArgs.clear(); + for (const auto &arg : graph->outputBin) { + ASSERT_EQ(graph->setArgumentValue(argIndex++, address), ZE_RESULT_SUCCESS); + graph->outArgs.push_back(address); address += arg.size(); } - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - for (size_t i = 0; i < graphOutput.size(); i++) - EXPECT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); + graph->checkResults(); } TEST_P(CommandGraphLong, GraphInitAndExecWith200msDelay) { - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } + graph->copyInputData(); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - std::this_thread::sleep_for(std::chrono::milliseconds(200)); ASSERT_EQ(zeCommandListReset(list), ZE_RESULT_SUCCESS); - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - for (size_t i = 0; i < graphOutput.size(); i++) { - EXPECT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); - memset(graphOutput[i], 0, outputBin[i].size()); - } + graph->checkResults(); } class CommandGraphLongThreaded @@ -774,92 +601,48 @@ TEST_P(CommandGraphLongThreaded, AppendGraphInitExecuteAndSynchronize) { auto param = GetParam(); const YAML::Node node(std::get<0>(param)); uint32_t threadParam = std::get<1>(param); - std::vector> inputBin, outputBin; - std::vector vpuBlob, vpuBin; /* Validate configuration */ ASSERT_GT(node["path"].as().size(), 0); - ASSERT_GT(node["in"].as>().size(), 0); - ASSERT_GT(node["out"].as>().size(), 0); - - ASSERT_TRUE(getBlobFromPath(blobDir + node["path"].as(), - node["in"].as>(), - node["out"].as>(), - vpuBlob, - inputBin, - outputBin, - vpuBin)); - - std::mutex mutex; + std::vector> tasks; for (size_t i = 0; i < threadParam; i++) { - tasks.push_back( - std::make_unique([this, vpuBlob, vpuBin, inputBin, outputBin, &mutex]() { - ze_activation_kernel_desc_t actKernelDesc = {}; - if (!vpuBin.empty()) { - actKernelDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_ACTIVATION_KERNEL, - .pNext = nullptr, - .kernelDataSize = vpuBin.size(), - .pKernelData = - reinterpret_cast(vpuBin.data())}; - } - - const ze_graph_desc_t graphDesc = { - .stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = !vpuBin.empty() ? &actKernelDesc : nullptr, - .format = ZE_GRAPH_FORMAT_NATIVE, - .inputSize = vpuBlob.size(), - .pInput = reinterpret_cast(vpuBlob.data()), - .pBuildFlags = nullptr}; - ze_result_t ret; - std::vector graphInput, graphOutput; - auto scopedQueue = - zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - auto scopedList = zeScope::commandListCreate(zeContext, zeDevice, cmdListDesc, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - auto hGraph = scopedGraphHandle.get(); - auto queue = scopedQueue.get(); - auto list = scopedList.get(); - - { - std::lock_guard lock(mutex); - allocListOfMemory(inputBin, graphInput, SHARED_MEMORY); - allocListOfMemory(outputBin, graphOutput, SHARED_MEMORY); - } - - for (uint32_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } - - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(hGraph, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(hGraph, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphInitialize(list, hGraph, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeGraphDDITableExt - ->pfnAppendGraphExecute(list, hGraph, nullptr, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); - - ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - - for (size_t i = 0; i < outputBin.size(); i++) - ASSERT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); - })); + tasks.push_back(std::make_unique([this, node]() { + std::shared_ptr graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + blobDir + node["path"].as(), + node); + + ze_result_t ret = ZE_RESULT_SUCCESS; + + auto scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + auto queue = scopedQueue.get(); + + auto scopedList = zeScope::commandListCreate(zeContext, zeDevice, cmdListDesc, ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + auto list = scopedList.get(); + + graph->allocateArguments(MemType::SHARED_MEMORY); + + graph->copyInputData(); + + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); + + graph->checkResults(); + })); } for (const auto &t : tasks) { t.get()->join(); @@ -867,7 +650,6 @@ TEST_P(CommandGraphLongThreaded, AppendGraphInitExecuteAndSynchronize) { } TEST_F(CommandGraphLongThreaded, RunAllBlobsInSingleContextSimultaneously) { - std::mutex mutex; std::vector> tasks; if (!Environment::getConfiguration("graph_execution").size()) @@ -875,43 +657,20 @@ TEST_F(CommandGraphLongThreaded, RunAllBlobsInSingleContextSimultaneously) { for (const auto &node : Environment::getConfiguration("graph_execution")) { ASSERT_GT(node["path"].as().size(), 0); - ASSERT_GT(node["in"].as>().size(), 0); - ASSERT_GT(node["out"].as>().size(), 0); - - tasks.push_back(std::make_unique([this, node, &mutex]() { - std::vector> inputBin, outputBin; - std::vector vpuBlob, vpuBin; - - ASSERT_TRUE(getBlobFromPath(blobDir + node["path"].as(), - node["in"].as>(), - node["out"].as>(), - vpuBlob, - inputBin, - outputBin, - vpuBin)); - - ze_graph_handle_t hGraph = nullptr; - std::vector graphInput, graphOutput; + + tasks.push_back(std::make_unique([this, node]() { + std::shared_ptr graph; { - std::lock_guard lock(mutex); - hGraph = createGraph(vpuBlob, vpuBin); - allocListOfMemory(inputBin, graphInput, SHARED_MEMORY); - allocListOfMemory(outputBin, graphOutput, SHARED_MEMORY); - } + graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + blobDir + node["path"].as(), + node); - for (uint32_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); + graph->allocateArguments(MemType::SHARED_MEMORY); } - uint32_t argIndex = 0; - for (const auto &ptr : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(hGraph, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } - for (const auto &ptr : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(hGraph, argIndex++, ptr), - ZE_RESULT_SUCCESS); - } + graph->copyInputData(); auto scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); @@ -921,20 +680,20 @@ TEST_F(CommandGraphLongThreaded, RunAllBlobsInSingleContextSimultaneously) { ASSERT_EQ(ret, ZE_RESULT_SUCCESS); auto list = scopedList.get(); - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphInitialize(list, hGraph, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); ASSERT_EQ(zeGraphDDITableExt - ->pfnAppendGraphExecute(list, hGraph, nullptr, nullptr, 0, nullptr), + ->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ( + zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - for (size_t i = 0; i < graphOutput.size(); i++) - EXPECT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); + graph->checkResults(); })); } for (const auto &t : tasks) { diff --git a/validation/umd-test/test_graph_inference.cpp b/validation/umd-test/test_graph_inference.cpp index 2bf96c8..2662f91 100644 --- a/validation/umd-test/test_graph_inference.cpp +++ b/validation/umd-test/test_graph_inference.cpp @@ -33,11 +33,12 @@ class GraphObject : public UmdTest { fence = scopedFence.get(); } - void GraphInitialize(const ze_graph_desc_t graphDesc, + void GraphInitialize(const ze_graph_desc_2_t graphDesc, std::vector &sizeGraphInput, std::vector &sizeGraphOutput) { ze_result_t ret; - scopedGraph = zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); + scopedGraph = + zeScope::graphCreate2(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS) << "Failed to create Graph Object"; graph = scopedGraph.get(); @@ -275,11 +276,6 @@ class GraphInferenceT : public GraphPipeline { void SetUp() override { GraphPipeline::SetUp(); - /*Get base configuration from config file*/ - YAML::Node &configuration = Environment::getConfiguration(); - if (configuration["blob_dir"].IsDefined()) - blobDir = configuration["blob_dir"].as(); - for (uint32_t index = 0; index < stage::COUNT; ++index) { ASSERT_NE(queueVec[index], nullptr); ASSERT_NE(listVec[index], nullptr); @@ -324,8 +320,6 @@ class GraphInferenceT : public GraphPipeline { EXPECT_EQ(zeFenceReset(fence), ZE_RESULT_SUCCESS) << "Failed to reset Fence"; } } - - std::string blobDir = ""; }; class GraphInference : public GraphInferenceT, public ::testing::WithParamInterface { @@ -368,12 +362,13 @@ TEST_P(GraphInference, InferenceTest) { .kernelDataSize = vpuBin.size(), .pKernelData = reinterpret_cast(vpuBin.data())}; - const ze_graph_desc_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = &actShaveKernel, - .format = ZE_GRAPH_FORMAT_NATIVE, - .inputSize = vpuBlob.size(), - .pInput = reinterpret_cast(vpuBlob.data()), - .pBuildFlags = nullptr}; + const ze_graph_desc_2_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + .pNext = &actShaveKernel, + .format = ZE_GRAPH_FORMAT_NATIVE, + .inputSize = vpuBlob.size(), + .pInput = reinterpret_cast(vpuBlob.data()), + .pBuildFlags = nullptr, + .flags = ZE_GRAPH_FLAG_NONE}; GraphInitialize(graphDesc, graphInputSize, graphOutputSize); diff --git a/validation/umd-test/test_inference_performance.cpp b/validation/umd-test/test_inference_performance.cpp index efa2647..e51133f 100644 --- a/validation/umd-test/test_inference_performance.cpp +++ b/validation/umd-test/test_inference_performance.cpp @@ -5,8 +5,7 @@ * */ -#include "umd_test.h" -#include "blob_params.hpp" +#include "graph_utilities.hpp" #include @@ -36,10 +35,7 @@ class InferencePerformance : public UmdTest, public ::testing::WithParamInterfac void SetUp() override { UmdTest::SetUp(); - /*Get base configuration from config file*/ - YAML::Node &configuration = Environment::getConfiguration(); - if (configuration["blob_dir"].IsDefined()) - blobDir = configuration["blob_dir"].as(); + SKIP_PRESILICON("The test does not apply to the Simics platform"); scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); @@ -58,7 +54,6 @@ class InferencePerformance : public UmdTest, public ::testing::WithParamInterfac } void TearDown() override { UmdTest::TearDown(); } - std::string blobDir = ""; private: zeScope::SharedPtr scopedQueue = nullptr; @@ -75,79 +70,29 @@ INSTANTIATE_TEST_SUITE_P(, }); TEST_P(InferencePerformance, MeasureTimeBetweenTwoInferencesAfterPutVPUInIdleState) { - std::vector> inputBin, outputBin; - std::vector vpuBlob, vpuBin; - const YAML::Node node = GetParam(); - /* Validate configuration */ ASSERT_GT(node["path"].as().size(), 0); - ASSERT_GT(node["in"].as>().size(), 0); - ASSERT_GT(node["out"].as>().size(), 0); - - ASSERT_TRUE(getBlobFromPath(blobDir + node["path"].as(), - node["in"].as>(), - node["out"].as>(), - vpuBlob, - inputBin, - outputBin, - vpuBin)); - - std::chrono::steady_clock::time_point start; - size_t execIter = 3; - - ze_activation_kernel_desc_t actKernelDesc = {}; - if (!vpuBin.empty()) { - actKernelDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_ACTIVATION_KERNEL, - .pNext = nullptr, - .kernelDataSize = vpuBin.size(), - .pKernelData = reinterpret_cast(vpuBin.data())}; - } - const ze_graph_desc_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = !vpuBin.empty() ? &actKernelDesc : nullptr, - .format = ZE_GRAPH_FORMAT_NATIVE, - .inputSize = vpuBlob.size(), - .pInput = reinterpret_cast(vpuBlob.data()), - .pBuildFlags = nullptr}; - - ze_result_t ret; - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - auto graphHandle = scopedGraphHandle.get(); - - std::vector> mem; - std::vector graphInput, graphOutput; - for (const auto &s : inputBin) { - mem.push_back(AllocHostMemory(s.size())); - graphInput.push_back(mem.back().get()); - } + std::shared_ptr graph = Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + blobDir + node["path"].as(), + node); - for (const auto &s : outputBin) { - mem.push_back(AllocHostMemory(s.size())); - graphOutput.push_back(mem.back().get()); - } + graph->allocateArguments(MemType::HOST_MEMORY); - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); - } + graph->copyInputData(); - uint32_t argIndex = 0; - for (const auto &s : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, s), - ZE_RESULT_SUCCESS); - } - for (const auto &s : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, s), - ZE_RESULT_SUCCESS); - } + std::chrono::steady_clock::time_point start; + size_t execIter = 3; - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); @@ -158,8 +103,8 @@ TEST_P(InferencePerformance, MeasureTimeBetweenTwoInferencesAfterPutVPUInIdleSta ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); inferenceDuration.push_back(sectionDuration(start)); - for (size_t i = 0; i < graphOutput.size(); i++) - EXPECT_EQ(memcmp(graphOutput[i], outputBin[i].data(), outputBin[i].size()), 0); + graph->checkResults(); + graph->clearOutput(); PRINTF("Inference #%zu took: %f ms\n", i, inferenceDuration[i].count()); } diff --git a/validation/umd-test/test_memory.cpp b/validation/umd-test/test_memory.cpp index e11ea5e..3fb3578 100644 --- a/validation/umd-test/test_memory.cpp +++ b/validation/umd-test/test_memory.cpp @@ -239,7 +239,7 @@ TEST_P(MemoryExecution, ExecuteCopyCommandInMemoryHighRange) { EXPECT_EQ(*copySrc, *copyDst) << "Value after copy should be equal"; } -// TODO: Allow copy from user pointer +// TODO: Allow copy from user pointer, EISW-19284 TEST_F(MemoryExecution, DISABLED_CopyingFromUnpinnedHostMemoryShouldBeAllowed) { size_t size = 4 * 1024; diff --git a/validation/umd-test/test_metric.cpp b/validation/umd-test/test_metric.cpp index 63bce17..075e2a8 100644 --- a/validation/umd-test/test_metric.cpp +++ b/validation/umd-test/test_metric.cpp @@ -5,10 +5,15 @@ * */ -#include "umd_test.h" +#include "graph_utilities.hpp" #include +/*test case definition: + * std::tuple + */ +using metricTestCase_t = std::tuple; + class MetricGroup : public UmdTest { public: void SetUp() override { @@ -39,7 +44,6 @@ TEST_F(MetricGroup, RetrieveMetricGroupProperties) { for (auto &v : properties) v.stype = ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES; - // Retrieve all metric group properties and compare for (uint8_t i = 0; i < metricGroupsCount; i++) { EXPECT_EQ(zetMetricGroupGetProperties(metricGroups[i], &properties[i]), ZE_RESULT_SUCCESS); @@ -89,7 +93,6 @@ class Metric : public MetricGroup { }; TEST_F(Metric, ValidatePropertiesForMetric) { - // Group index 0, first metric EXPECT_GT(metricsPropertiesAll[0][0].metricType, 0); EXPECT_GT(metricsPropertiesAll[0][0].resultType, 0); EXPECT_GE(metricsPropertiesAll[0][0].tierNumber, 0); @@ -143,21 +146,18 @@ TEST_F(MetricQueryPool, ActivateAndCreateMetricQuery) { ASSERT_EQ(zetContextActivateMetricGroups(zeContext, zeDevice, 0u, nullptr), ZE_RESULT_SUCCESS); } -struct MetricQueryExecParam { - std::string groupName; - uint8_t groupIndex; - uint8_t queryIndex; -}; - -class MetricQuery - : public Metric, - public ::testing::WithParamInterface> { - void SetUp() override { - Metric::SetUp(); +class MetricQuery : public Metric, public ::testing::WithParamInterface { + public: + void SetUp() override { Metric::SetUp(); } - YAML::Node &configuration = Environment::getConfiguration(); - if (configuration["blob_dir"].IsDefined()) - blobDir = configuration["blob_dir"].as(); + uint32_t findMetricGroupIndex(std::string groupName) { + uint32_t index; + for (index = 0; index < groupProperties.size(); index++) { + if (groupName == groupProperties[index].name) + break; + } + EXPECT_LT(index, groupProperties.size()); + return index; } void TearDown() override { @@ -175,30 +175,32 @@ class MetricQuery Metric::TearDown(); } - protected: - zet_metric_query_pool_desc_t desc = {.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC, - .pNext = nullptr, - .type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE, - .count = 2u}; - zet_metric_query_pool_handle_t pool = nullptr; - zet_metric_query_handle_t query = nullptr; - - ze_command_queue_desc_t cmdQueueDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, - .pNext = nullptr, - .ordinal = 0, - .index = 0, - .flags = 0, - .mode = ZE_COMMAND_QUEUE_MODE_DEFAULT, - .priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; - ze_command_list_desc_t cmdListDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, - .pNext = nullptr, - .commandQueueGroupOrdinal = 0, - .flags = 0}; - ze_command_queue_handle_t queue = nullptr; - ze_command_list_handle_t list = nullptr; - std::string blobDir = ""; + /* Functions returns combinations of network and defined + * for this network metric groups and queryIndex. + */ + static std::vector + createCasesForMetricsTest(std::vector &queryIndexes) { + std::vector combinations; + try { + std::vector networkList = Environment::getConfiguration("graph_metrics"); + + if (networkList.empty()) + return combinations; + + for (auto &network : networkList) + for (auto &groupName : network["metric_groups"].as>()) + for (auto index : queryIndexes) + combinations.push_back({network, groupName, index}); + + return combinations; + } catch (YAML::Exception &e) { + PRINTF("Bad node: Reason: %s\n", e.what()); + combinations.clear(); + return combinations; + } + } - void MetricInitialize(uint8_t groupIndex, uint8_t queryIndex) { + void MetricInitialize(uint8_t groupIndex, uint8_t queryIndex, uint32_t ordinal) { ASSERT_EQ( zetContextActivateMetricGroups(zeContext, zeDevice, 1u, &metricGroups[groupIndex]), ZE_RESULT_SUCCESS); @@ -211,7 +213,21 @@ class MetricQuery ASSERT_EQ(zetMetricQueryCreate(pool, queryIndex, &query), ZE_RESULT_SUCCESS); ASSERT_NE(query, nullptr); - ze_result_t ret; + ze_command_queue_desc_t cmdQueueDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, + .pNext = nullptr, + .ordinal = 0, + .index = 0, + .flags = 0, + .mode = ZE_COMMAND_QUEUE_MODE_DEFAULT, + .priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; + ze_command_list_desc_t cmdListDesc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, + .pNext = nullptr, + .commandQueueGroupOrdinal = 0, + .flags = 0}; + + cmdQueueDesc.ordinal = ordinal; + cmdListDesc.commandQueueGroupOrdinal = ordinal; + scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); queue = scopedQueue.get(); @@ -221,28 +237,22 @@ class MetricQuery list = scopedList.get(); } - private: + zet_metric_query_pool_desc_t desc = {.stype = ZET_STRUCTURE_TYPE_METRIC_QUERY_POOL_DESC, + .pNext = nullptr, + .type = ZET_METRIC_QUERY_POOL_TYPE_PERFORMANCE, + .count = 2u}; + zet_metric_query_pool_handle_t pool = nullptr; + zet_metric_query_handle_t query = nullptr; + uint8_t testedMetricIndex = 0xff; + zeScope::SharedPtr scopedQueue = nullptr; zeScope::SharedPtr scopedList = nullptr; + ze_command_queue_handle_t queue = nullptr; + ze_command_list_handle_t list = nullptr; }; TEST_F(MetricQuery, RunMetricQueryOnEmptyCommandList) { - MetricInitialize(0u, 0u); - - EXPECT_EQ(zetCommandListAppendMetricQueryBegin(list, query), ZE_RESULT_SUCCESS); - - EXPECT_EQ(zetCommandListAppendMetricQueryEnd(list, query, nullptr, 0u, nullptr), - ZE_RESULT_SUCCESS); - EXPECT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); - - ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueSynchronize(queue, syncTimeout), ZE_RESULT_SUCCESS); -} - -TEST_F(MetricQuery, RunMetricQueryOnEmptyCommandList_CopyEngine) { - cmdQueueDesc.ordinal = 1; - cmdListDesc.commandQueueGroupOrdinal = 1; - MetricInitialize(0u, 0u); + MetricInitialize(0u, 0u, computeGrpOrdinal); EXPECT_EQ(zetCommandListAppendMetricQueryBegin(list, query), ZE_RESULT_SUCCESS); @@ -256,7 +266,7 @@ TEST_F(MetricQuery, RunMetricQueryOnEmptyCommandList_CopyEngine) { TEST_F(MetricQuery, MetricGroupCalculateEmptyMetricQuery) { size_t groupIndex = 1; - MetricInitialize(groupIndex, 0); + MetricInitialize(groupIndex, 0, computeGrpOrdinal); size_t queryDataSize = 0u; EXPECT_EQ(zetMetricQueryGetData(query, &queryDataSize, nullptr), ZE_RESULT_SUCCESS); @@ -295,106 +305,56 @@ TEST_F(MetricQuery, MetricGroupCalculateEmptyMetricQuery) { EXPECT_EQ(metricValues[i].type, metricsPropertiesAll[groupIndex][i].resultType); EXPECT_EQ(metricValues[i].value.ui64, 0llu); } + + TRACE_BUF(metricValues.data(), metricValues.size() * sizeof(zet_typed_value_t)); } + +std::vector queryIndexesComputeEngine = {0, 1}; + GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MetricQuery); INSTANTIATE_TEST_SUITE_P( , MetricQuery, - ::testing::Combine(::testing::ValuesIn(Environment::getConfiguration("graph_metrics")), - ::testing::Values(MetricQueryExecParam{"ShaveIL1Cache", 0u, 0u}, - MetricQueryExecParam{"ShaveIL1Cache", 0u, 1u}, - MetricQueryExecParam{"ShaveDL1Cache", 1u, 0u}, - MetricQueryExecParam{"ShaveDL1Cache", 1u, 1u}, - MetricQueryExecParam{"ShaveL2Cache", 2u, 0u}, - MetricQueryExecParam{"ShaveL2Cache", 2u, 1u}, - MetricQueryExecParam{"NOC", 3u, 0u}, - MetricQueryExecParam{"NOC", 3u, 1u})), - [](const testing::TestParamInfo> &info) { - auto node = std::get<0>(info.param); - auto metricParam = std::get<1>(info.param); - std::string postfix = metricParam.groupName; - postfix += (metricParam.queryIndex != 0u) - ? "OnIndex" + std::to_string(metricParam.queryIndex) - : ""; - return generateTestNameFromNode(node) + postfix; + ::testing::ValuesIn(MetricQuery::createCasesForMetricsTest(queryIndexesComputeEngine)), + [](const testing::TestParamInfo &p) { + auto node = std::get<0>(p.param); + auto metricGroupName = std::get<1>(p.param); + auto queryIndex = std::get<2>(p.param); + + return generateTestNameFromNode(node) + "_" + metricGroupName + "_OnIndex" + + std::to_string(queryIndex); }); TEST_P(MetricQuery, GetDataValueCheck) { - auto &[node, metricParam] = GetParam(); - - if (!isSilicon() && metricParam.groupName == "NOC") - SKIP_("Feature not supported"); - - std::vector> inputBin, outputBin; - std::vector vpuBlob, vpuBin; - - ASSERT_TRUE(getBlobFromPath(blobDir + node["path"].as(), - node["in"].as>(), - node["out"].as>(), - vpuBlob, - inputBin, - outputBin, - vpuBin)); - bool checkShaveCounters = false; - if (node["act_shave_tasks"].IsDefined()) - checkShaveCounters = node["act_shave_tasks"].as(); - - MetricInitialize(metricParam.groupIndex, metricParam.queryIndex); - - ze_activation_kernel_desc_t actKernelDesc = {}; - if (!vpuBin.empty()) { - actKernelDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_ACTIVATION_KERNEL, - .pNext = nullptr, - .kernelDataSize = vpuBin.size(), - .pKernelData = reinterpret_cast(vpuBin.data())}; - } + auto &[node, metricGroupName, queryIndex] = GetParam(); + std::filesystem::path path(node["path"].as()); - const ze_graph_desc_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = !vpuBin.empty() ? &actKernelDesc : nullptr, - .format = ZE_GRAPH_FORMAT_NATIVE, - .inputSize = vpuBlob.size(), - .pInput = reinterpret_cast(vpuBlob.data()), - .pBuildFlags = nullptr}; - - ze_result_t ret; - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - auto graphHandle = scopedGraphHandle.get(); - - std::vector> mem; - std::vector graphInput, graphOutput; - for (const auto &s : inputBin) { - mem.push_back(AllocHostMemory(s.size())); - graphInput.push_back(mem.back().get()); - } + std::shared_ptr graph = + Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + path.extension() == ".xml" ? modelDir + node["path"].as() + : blobDir + node["path"].as(), + node); - for (const auto &s : outputBin) { - mem.push_back(AllocHostMemory(s.size())); - graphOutput.push_back(mem.back().get()); - } + graph->allocateArguments(MemType::SHARED_MEMORY); - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); + if (path.extension() == ".xml") { + graph->setRandomInput(); } - uint32_t argIndex = 0; - for (const auto &s : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, s), - ZE_RESULT_SUCCESS); - } - for (const auto &s : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, s), - ZE_RESULT_SUCCESS); - } + uint32_t groupIndex = findMetricGroupIndex(metricGroupName); + + MetricInitialize(groupIndex, queryIndex, computeGrpOrdinal); ASSERT_EQ(zetCommandListAppendMetricQueryBegin(list, query), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0, nullptr), - ZE_RESULT_SUCCESS); ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0, nullptr), + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0, nullptr), + ZE_RESULT_SUCCESS); ASSERT_EQ(zeCommandListAppendBarrier(list, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); ASSERT_EQ(zetCommandListAppendMetricQueryEnd(list, query, nullptr, 0u, nullptr), @@ -415,48 +375,167 @@ TEST_P(MetricQuery, GetDataValueCheck) { ZE_RESULT_SUCCESS); TRACE_BUF(queryRawData.data(), queryDataSize); - if (metricParam.groupName == "NOC" || checkShaveCounters) { - // TODO: Temporary WA to 0 value in hit counter - if (metricParam.groupName == "ShaveDL1Cache") { - ASSERT_GT(queryRawData.size(), 1); - EXPECT_TRUE(queryRawData[0] > 0 || queryRawData[1] > 0); - } else { - EXPECT_GT(queryRawData[0], 0u); - } - } else { - EXPECT_EQ(queryRawData[0], 0u); - } + + EXPECT_GT(queryRawData[0], 0u); uint32_t metricValueCount = 0; - EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[metricParam.groupIndex], + EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[groupIndex], ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, - queryRawData.size(), + queryDataSize, reinterpret_cast(queryRawData.data()), &metricValueCount, nullptr), ZE_RESULT_SUCCESS); - EXPECT_EQ(metricValueCount, groupProperties[metricParam.groupIndex].metricCount); + EXPECT_EQ(metricValueCount, groupProperties[groupIndex].metricCount); std::vector metricValues(metricValueCount); - EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[metricParam.groupIndex], + EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[groupIndex], ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, - queryRawData.size(), + queryDataSize, reinterpret_cast(queryRawData.data()), &metricValueCount, metricValues.data()), ZE_RESULT_SUCCESS); - EXPECT_EQ(metricValues[0].type, metricsPropertiesAll[metricParam.groupIndex][0].resultType); - if (metricParam.groupName == "NOC" || checkShaveCounters) { - // TODO: Temporary WA to 0 value in hit counter - if (metricParam.groupName == "ShaveDL1Cache") { - ASSERT_GT(metricValues.size(), 1u); - EXPECT_TRUE(metricValues[0].value.ui64 > 0llu || metricValues[1].value.ui64 > 0llu); - } else { - EXPECT_GT(metricValues[0].value.ui64, 0llu); + EXPECT_EQ(metricValues[0].type, metricsPropertiesAll[groupIndex][0].resultType); + + EXPECT_GT(metricValues[0].value.ui64, 0llu); + + TRACE_BUF(metricValues.data(), metricValues.size() * sizeof(zet_typed_value_t)); + + for (uint32_t i = 0; i < metricValueCount; i++) { + TRACE("Metric %lu -> type: %#x, value: %lu\n", + static_cast(i), + metricValues[i].type, + metricValues[i].value.ui64); + } +} + +class MetricQueryCopyEngine : public MetricQuery { + public: + void SetUp() override { Metric::SetUp(); } + + void TearDown() override { MetricQuery::TearDown(); } + + /* Functions returns combinations of network and defined + * for this network metric groups and queryIndex. + */ + static std::vector + createCasesForMetricsTest(std::vector &queryIndexes) { + std::vector combinations; + try { + std::vector network = Environment::getConfiguration("graph_metrics"); + + if (network.empty()) + return combinations; + + for (auto index : queryIndexes) + combinations.push_back({network[0], std::string("NOC"), index}); + + return combinations; + } catch (YAML::Exception &e) { + PRINTF("Bad node: Reason: %s\n", e.what()); + combinations.clear(); + return combinations; } - } else { - EXPECT_EQ(metricValues[0].value.ui64, 0llu); } +}; + +TEST_F(MetricQueryCopyEngine, RunMetricQueryOnEmptyCommandList) { + MetricInitialize(0u, 0u, copyGrpOrdinal); + + EXPECT_EQ(zetCommandListAppendMetricQueryBegin(list, query), ZE_RESULT_SUCCESS); + + EXPECT_EQ(zetCommandListAppendMetricQueryEnd(list, query, nullptr, 0u, nullptr), + ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, syncTimeout), ZE_RESULT_SUCCESS); +} + +std::vector queryIndexesCopyEngine = {0}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MetricQueryCopyEngine); + +INSTANTIATE_TEST_SUITE_P( + , + MetricQueryCopyEngine, + ::testing::ValuesIn(MetricQueryCopyEngine::createCasesForMetricsTest(queryIndexesCopyEngine)), + [](const testing::TestParamInfo &p) { + auto metricGroupName = std::get<1>(p.param); + auto queryIndex = std::get<2>(p.param); + return metricGroupName + "_OnIndex" + std::to_string(queryIndex); + }); + +TEST_P(MetricQueryCopyEngine, GetDataValue) { + auto &[node, metricGroupName, queryIndex] = GetParam(); + const size_t allocSize = 2048 * 1024; + + std::shared_ptr srcMem, dstMem; + srcMem = AllocSharedMemory(allocSize); + dstMem = AllocSharedMemory(allocSize); + + uint32_t groupIndex = findMetricGroupIndex(metricGroupName); + MetricInitialize(groupIndex, queryIndex, copyGrpOrdinal); + + ASSERT_EQ(zetCommandListAppendMetricQueryBegin(list, query), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandListAppendMemoryCopy(list, + dstMem.get(), + srcMem.get(), + allocSize, + nullptr, + 0, + nullptr), + ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandListAppendBarrier(list, nullptr, 0, nullptr), ZE_RESULT_SUCCESS); + + ASSERT_EQ(zetCommandListAppendMetricQueryEnd(list, query, nullptr, 0u, nullptr), + ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); + + size_t queryDataSize = 0u; + EXPECT_EQ(zetMetricQueryGetData(query, &queryDataSize, nullptr), ZE_RESULT_SUCCESS); + EXPECT_GT(queryDataSize, 0u); + + std::vector queryRawData(queryDataSize / sizeof(uint64_t), 0u); + EXPECT_EQ(zetMetricQueryGetData(query, + &queryDataSize, + reinterpret_cast(queryRawData.data())), + ZE_RESULT_SUCCESS); + + TRACE_BUF(queryRawData.data(), queryDataSize); + + EXPECT_GT(queryRawData[0], 0u); + + uint32_t metricValueCount = 0; + EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[groupIndex], + ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, + queryDataSize, + reinterpret_cast(queryRawData.data()), + &metricValueCount, + nullptr), + ZE_RESULT_SUCCESS); + + EXPECT_EQ(metricValueCount, groupProperties[groupIndex].metricCount); + + std::vector metricValues(metricValueCount); + EXPECT_EQ(zetMetricGroupCalculateMetricValues(metricGroups[groupIndex], + ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, + queryDataSize, + reinterpret_cast(queryRawData.data()), + &metricValueCount, + metricValues.data()), + ZE_RESULT_SUCCESS); + + TRACE_BUF(metricValues.data(), metricValues.size() * sizeof(zet_typed_value_t)); + + EXPECT_EQ(metricValues[0].type, metricsPropertiesAll[groupIndex][0].resultType); + + EXPECT_GT(metricValues[0].value.ui64, 0llu); } diff --git a/validation/umd-test/test_metric_streamer.cpp b/validation/umd-test/test_metric_streamer.cpp index e265bd5..21b03cd 100644 --- a/validation/umd-test/test_metric_streamer.cpp +++ b/validation/umd-test/test_metric_streamer.cpp @@ -5,12 +5,17 @@ * */ -#include "umd_test.h" +#include "graph_utilities.hpp" +#include #include -class MetricStreamer : public UmdTest, - public ::testing::WithParamInterface> { +/*test case definition: + * std::tuple + */ +using metricTestCase_t = std::tuple; + +class MetricStreamer : public UmdTest, public ::testing::WithParamInterface { public: ze_command_queue_desc_t cmdQueueDesc{.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, .pNext = nullptr, @@ -25,17 +30,19 @@ class MetricStreamer : public UmdTest, .commandQueueGroupOrdinal = 0, .flags = 0}; + zeScope::SharedPtr scopedQueue; + zeScope::SharedPtr scopedList; ze_command_queue_handle_t queue = nullptr; ze_command_list_handle_t list = nullptr; + bool useCopyOrdinal = false; void SetUp() override { UmdTest::SetUp(); - ze_result_t ret; - - YAML::Node &configuration = Environment::getConfiguration(); - if (configuration["blob_dir"].IsDefined()) - blobDir = configuration["blob_dir"].as(); + if (useCopyOrdinal) { + cmdQueueDesc.ordinal = UmdTest::copyGrpOrdinal; + cmdListDesc.commandQueueGroupOrdinal = UmdTest::copyGrpOrdinal; + } scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, cmdQueueDesc, ret); ASSERT_EQ(ret, ZE_RESULT_SUCCESS); @@ -45,6 +52,9 @@ class MetricStreamer : public UmdTest, ASSERT_EQ(ret, ZE_RESULT_SUCCESS); list = scopedList.get(); + std::vector metricGroups; + uint32_t metricGroupsCount = 0; + ret = zetMetricGroupGet(zeDevice, &metricGroupsCount, nullptr); if (ret == ZE_RESULT_ERROR_UNSUPPORTED_FEATURE) { SKIP_("Metrics are not supported"); @@ -56,164 +66,458 @@ class MetricStreamer : public UmdTest, ASSERT_EQ(zetMetricGroupGet(zeDevice, &metricGroupsCount, metricGroups.data()), ZE_RESULT_SUCCESS); EXPECT_NE(*metricGroups.data(), nullptr); + + auto [node, metricGroupName, execTime] = GetParam(); + + for (uint32_t i = 0; i < metricGroupsCount; i++) { + metricGroupProperties = {}; + metricGroupProperties.stype = ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES; + + ASSERT_EQ(zetMetricGroupGetProperties(metricGroups[i], &metricGroupProperties), + ZE_RESULT_SUCCESS); + if (metricGroupName == metricGroupProperties.name) { + testedMetricGroup = metricGroups[i]; + break; + } + } + + ASSERT_NE(testedMetricGroup, nullptr); + + testedMetricCount = metricGroupProperties.metricCount; + ASSERT_GT(testedMetricCount, 0); + + std::vector metrics(testedMetricCount); + + ASSERT_EQ(zetMetricGet(testedMetricGroup, &testedMetricCount, metrics.data()), + ZE_RESULT_SUCCESS); + + for (uint8_t i = 0; i < testedMetricCount; i++) { + zet_metric_properties_t properties = {}; + properties.stype = ZET_STRUCTURE_TYPE_METRIC_PROPERTIES; + + EXPECT_EQ(zetMetricGetProperties(metrics[i], &properties), ZE_RESULT_SUCCESS); + metricsProperties.push_back(properties); + } + } + + void TearDown() override { + if (hMetricStreamer) { + ASSERT_EQ(zetMetricStreamerClose(hMetricStreamer), ZE_RESULT_SUCCESS); + ASSERT_EQ(zetContextActivateMetricGroups(zeContext, zeDevice, 0u, nullptr), + ZE_RESULT_SUCCESS); + } + UmdTest::TearDown(); + } + + /* Functions returns combinations of network and defined + * for this network metric groups and inference execution time + * required to gather metrics. + */ + static std::vector + createCasesForMetricsTest(std::vector &executionTime) { + std::vector combinations; + try { + std::vector networkList = Environment::getConfiguration("graph_metrics"); + + if (networkList.empty()) + return combinations; + + for (auto &network : networkList) + for (auto &groupName : network["metric_groups"].as>()) + for (auto execTime : executionTime) + combinations.push_back({network, groupName, execTime}); + + return combinations; + } catch (YAML::Exception &e) { + PRINTF("Bad node: Reason: %s\n", e.what()); + combinations.clear(); + return combinations; + } } - void TearDown() override { UmdTest::TearDown(); } + void getMetrics(uint32_t &numReports, std::vector &metricValues) { + ASSERT_NE(hMetricStreamer, nullptr); + + size_t rawDataSize = 0; + EXPECT_EQ(zetMetricStreamerReadData(hMetricStreamer, UINT32_MAX, &rawDataSize, nullptr), + ZE_RESULT_SUCCESS); + + ASSERT_GT(rawDataSize, 0); + + std::vector rawData(rawDataSize / sizeof(uint64_t), 0u); + EXPECT_EQ(zetMetricStreamerReadData(hMetricStreamer, + UINT32_MAX, + &rawDataSize, + reinterpret_cast(rawData.data())), + ZE_RESULT_SUCCESS); + + TRACE_BUF(rawData.data(), rawDataSize); + + EXPECT_GT(rawData[0], 0u); + + uint32_t metricValueCount = 0; + EXPECT_EQ( + zetMetricGroupCalculateMetricValues(testedMetricGroup, + ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, + rawDataSize, + reinterpret_cast(rawData.data()), + &metricValueCount, + nullptr), + ZE_RESULT_SUCCESS); + + metricValues.resize(metricValueCount); + EXPECT_EQ( + zetMetricGroupCalculateMetricValues(testedMetricGroup, + ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES, + rawDataSize, + reinterpret_cast(rawData.data()), + &metricValueCount, + metricValues.data()), + ZE_RESULT_SUCCESS); + + TRACE_BUF(metricValues.data(), metricValues.size() * sizeof(zet_typed_value_t)); + numReports = testedMetricCount ? (metricValueCount / testedMetricCount) : 0; + } protected: + zet_metric_streamer_handle_t hMetricStreamer = nullptr; ze_result_t ret = ZE_RESULT_SUCCESS; - uint32_t metricGroupsCount = 0; - std::vector metricGroups; - - zeScope::SharedPtr scopedQueue; - zeScope::SharedPtr scopedList; - std::string blobDir = ""; + zet_metric_group_handle_t testedMetricGroup = nullptr; + zet_metric_group_properties_t metricGroupProperties; + uint32_t testedMetricCount = 0; + std::vector metricsProperties; }; -std::vector metricGroupNames = {"ShaveIL1Cache", - "ShaveDL1Cache", - "ShaveL2Cache", - "NOC"}; +std::vector execTimeComputeEngineMs = {200}; GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MetricStreamer); INSTANTIATE_TEST_SUITE_P( , MetricStreamer, - ::testing::Combine(::testing::ValuesIn(metricGroupNames), - ::testing::ValuesIn(Environment::getConfiguration("graph_metrics"))), - [](const testing::TestParamInfo> &p) { - const YAML::Node node(std::get<1>(p.param)); - return std::get<0>(p.param) + "_" + generateTestNameFromNode(node); + ::testing::ValuesIn(MetricStreamer::createCasesForMetricsTest(execTimeComputeEngineMs)), + [](const testing::TestParamInfo &p) { + const YAML::Node node(std::get<0>(p.param)); + return generateTestNameFromNode(node) + "_" + std::get<1>(p.param); }); TEST_P(MetricStreamer, RunInferenceWithTimeBasedCollection) { - auto [metric, node] = GetParam(); - - std::vector> inputBin, outputBin; - std::vector vpuBlob, vpuBin; + auto [node, metricGroupName, execTime] = GetParam(); + std::filesystem::path path(node["path"].as()); - ASSERT_TRUE(getBlobFromPath(blobDir + node["path"].as(), - node["in"].as>(), - node["out"].as>(), - vpuBlob, - inputBin, - outputBin, - vpuBin)); + std::shared_ptr graph = + Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + path.extension() == ".xml" ? modelDir + node["path"].as() + : blobDir + node["path"].as(), + node); - zet_metric_group_handle_t hMetricGroup = nullptr; + graph->allocateArguments(MemType::SHARED_MEMORY); - const char *metricName = metric.c_str(); - for (uint32_t i = 0; i < metricGroupsCount; i++) { - zet_metric_group_properties_t metricGroupProperties = {}; - metricGroupProperties.stype = ZET_STRUCTURE_TYPE_METRIC_GROUP_PROPERTIES; - - ASSERT_EQ(zetMetricGroupGetProperties(metricGroups[i], &metricGroupProperties), - ZE_RESULT_SUCCESS); - if (strcmp(metricName, metricGroupProperties.name) == 0) { - hMetricGroup = metricGroups[i]; - break; - } + if (path.extension() == ".xml") { + graph->setRandomInput(); } - ASSERT_EQ(zetContextActivateMetricGroups(zeContext, zeDevice, 1, &hMetricGroup), + ASSERT_EQ(zetContextActivateMetricGroups(zeContext, zeDevice, 1, &testedMetricGroup), ZE_RESULT_SUCCESS); - zet_metric_streamer_handle_t hMetricStreamer = nullptr; zet_metric_streamer_desc_t metricStreamerDesc = {}; metricStreamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; metricStreamerDesc.samplingPeriod = 10'000'000u; // 10 [ms] metricStreamerDesc.notifyEveryNReports = 20; - ze_activation_kernel_desc_t actKernelDesc = {}; - if (!vpuBin.empty()) { - actKernelDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_ACTIVATION_KERNEL, - .pNext = nullptr, - .kernelDataSize = vpuBin.size(), - .pKernelData = reinterpret_cast(vpuBin.data())}; + ASSERT_EQ(zetMetricStreamerOpen(zeContext, + zeDevice, + testedMetricGroup, + &metricStreamerDesc, + nullptr, + &hMetricStreamer), + ZE_RESULT_SUCCESS); + + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0u, nullptr), + ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0u, nullptr), + ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + + std::chrono::steady_clock::time_point timeOut = + std::chrono::steady_clock::now() + std::chrono::milliseconds(execTime); + + while (std::chrono::steady_clock::now() < timeOut) { + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); } - const ze_graph_desc_t graphDesc = {.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - .pNext = !vpuBin.empty() ? &actKernelDesc : nullptr, - .format = ZE_GRAPH_FORMAT_NATIVE, - .inputSize = vpuBlob.size(), - .pInput = reinterpret_cast(vpuBlob.data()), - .pBuildFlags = nullptr}; + uint32_t numReports = 0; + std::vector metricValues; - auto scopedGraphHandle = - zeScope::graphCreate(zeGraphDDITableExt, zeContext, zeDevice, graphDesc, ret); - ASSERT_EQ(ret, ZE_RESULT_SUCCESS); - auto graphHandle = scopedGraphHandle.get(); + getMetrics(numReports, metricValues); + ASSERT_GT(numReports, 0); + + std::vector sumOfMetricValues(testedMetricCount, 0); + + int report = 1; + for (size_t i = 0; i < metricValues.size(); i++) { + sumOfMetricValues[i % testedMetricCount] += metricValues[i].value.ui64; + + if (i % testedMetricCount == 0) { + TRACE("Report #%d\n", report); + report++; + } + + TRACE("Metric %lu -> type: %#x, value: %lu\n", + i % (testedMetricCount), + metricValues[i].type, + metricValues[i].value.ui64); + } + + TRACE("Summed values from individual metrics:\n"); + for (uint32_t i = 0; i < sumOfMetricValues.size(); i++) { + EXPECT_GT(sumOfMetricValues[i], 0llu) + << "Sum of values ​​for the metric " << i << " is equal to 0"; + TRACE("Sum for Metric #%lu: %llu\n", static_cast(i), sumOfMetricValues[i]); + } +} + +TEST_P(MetricStreamer, RunInferenceUseEventToCollectMetrics) { + const uint32_t samplingPeriodMs = 10; + const uint32_t nReportsNotification = 3; + + auto [node, metricGroupName, execTime] = GetParam(); + std::filesystem::path path(node["path"].as()); + + ASSERT_GT(execTime, samplingPeriodMs * nReportsNotification) << "Too short execution time set"; + + std::shared_ptr graph = + Graph::create(zeContext, + zeDevice, + zeGraphDDITableExt, + path.extension() == ".xml" ? modelDir + node["path"].as() + : blobDir + node["path"].as(), + node); - std::vector> mem; - std::vector graphInput, graphOutput; + graph->allocateArguments(MemType::SHARED_MEMORY); - for (const auto &s : inputBin) { - mem.push_back(AllocHostMemory(s.size())); - graphInput.push_back(mem.back().get()); + if (path.extension() == ".xml") { + graph->setRandomInput(); } + ASSERT_EQ(zetContextActivateMetricGroups(zeContext, zeDevice, 1, &testedMetricGroup), + ZE_RESULT_SUCCESS); + + zet_metric_streamer_desc_t metricStreamerDesc = {}; + metricStreamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; + metricStreamerDesc.samplingPeriod = static_cast(samplingPeriodMs) * 1'000'000u; + metricStreamerDesc.notifyEveryNReports = nReportsNotification; + + const ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, + nullptr, + ZE_EVENT_POOL_FLAG_HOST_VISIBLE, + 1}; + const ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC, + nullptr, + 0, + ZE_EVENT_SCOPE_FLAG_HOST, + ZE_EVENT_SCOPE_FLAG_HOST}; + auto scopedEventPool = zeScope::eventPoolCreate(zeContext, eventPoolDesc, 1, zeDevice, ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + ASSERT_NE(scopedEventPool.get(), nullptr); + auto metricEvent = zeScope::eventCreate(scopedEventPool.get(), eventDesc, ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + ASSERT_NE(metricEvent.get(), nullptr); + + ASSERT_EQ(zetMetricStreamerOpen(zeContext, + zeDevice, + testedMetricGroup, + &metricStreamerDesc, + metricEvent.get(), + &hMetricStreamer), + ZE_RESULT_SUCCESS); - for (const auto &s : outputBin) { - mem.push_back(AllocHostMemory(s.size())); - graphOutput.push_back(mem.back().get()); + ASSERT_EQ( + zeGraphDDITableExt->pfnAppendGraphInitialize(list, graph->handle, nullptr, 0u, nullptr), + ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeGraphDDITableExt + ->pfnAppendGraphExecute(list, graph->handle, nullptr, nullptr, 0u, nullptr), + ZE_RESULT_SUCCESS); + + ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + + /* warm up - memory allocation and HW wake up */ + EXPECT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); + + uint32_t inferenceTime = (samplingPeriodMs * nReportsNotification) + (2 * samplingPeriodMs); + std::chrono::steady_clock::time_point testTimeOut = + std::chrono::steady_clock::now() + std::chrono::milliseconds(execTime); + + std::chrono::steady_clock::time_point inferenceTimeOut = + std::chrono::steady_clock::now() + std::chrono::milliseconds(inferenceTime); + + /* Check if event is signalled after expected time */ + while (std::chrono::steady_clock::now() < inferenceTimeOut) { + EXPECT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); } + ASSERT_EQ(zeEventQueryStatus(metricEvent.get()), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeEventHostReset(metricEvent.get()), ZE_RESULT_SUCCESS); + + /* Metric data still stored in buffer, check if event signalled */ + ASSERT_EQ(zeEventQueryStatus(metricEvent.get()), ZE_RESULT_SUCCESS); + + /* clear buffer and reset event before continue test */ + { + uint32_t numReports = 0; + std::vector metricValues; + getMetrics(numReports, metricValues); + ASSERT_EQ(zeEventHostReset(metricEvent.get()), ZE_RESULT_SUCCESS); + } + + /* check zeEventHostSynchronize on metric event */ + while (std::chrono::steady_clock::now() < testTimeOut) { + inferenceTimeOut = + std::chrono::steady_clock::now() + std::chrono::milliseconds(inferenceTime); + + EXPECT_NE(zeEventQueryStatus(metricEvent.get()), ZE_RESULT_SUCCESS); + + std::future<_ze_result_t> eventSynchronize = std::async(std::launch::async, [&] { + return zeEventHostSynchronize(metricEvent.get(), inferenceTime * 1'000'000); + }); + + while (std::chrono::steady_clock::now() < inferenceTimeOut) { + EXPECT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), + ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); + } - for (size_t i = 0; i < graphInput.size(); i++) { - memcpy(graphInput[i], inputBin[i].data(), inputBin[i].size()); + EXPECT_EQ(eventSynchronize.get(), ZE_RESULT_SUCCESS); + EXPECT_EQ(zeEventQueryStatus(metricEvent.get()), ZE_RESULT_SUCCESS); + + uint32_t numReports = 0; + std::vector metricValues; + + getMetrics(numReports, metricValues); + /* It is expected greater or equal number of reports */ + EXPECT_GE(numReports, nReportsNotification); + EXPECT_EQ(zeEventHostReset(metricEvent.get()), ZE_RESULT_SUCCESS); } +} - uint32_t argIndex = 0; - for (const auto &input : graphInput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, input), - ZE_RESULT_SUCCESS); +class MetricStreamerCopyEngine : public MetricStreamer { + public: + void SetUp() override { + useCopyOrdinal = true; + MetricStreamer::SetUp(); } - for (const auto &output : graphOutput) { - ASSERT_EQ(zeGraphDDITableExt->pfnSetArgumentValue(graphHandle, argIndex++, output), - ZE_RESULT_SUCCESS); + void TearDown() override { MetricStreamer::TearDown(); } + + /* Generates test cases for copy engine, it ignores network and returns + * combinations of metric groups and inference execution time of copy job + * required to gather metrics. + */ + static std::vector + createCasesForMetricsTest(std::vector &executionTime) { + std::vector combinations; + try { + std::vector network = Environment::getConfiguration("graph_metrics"); + + if (network.empty()) + return combinations; + /* network is pushed only to satisfy type definition, it is not used */ + for (auto &execTime : executionTime) + combinations.push_back({network[0], std::string("NOC"), execTime}); + + return combinations; + } catch (YAML::Exception &e) { + PRINTF("Bad node: Reason: %s\n", e.what()); + combinations.clear(); + return combinations; + } } +}; + +/* Note: When execution time is shorter than sampling time then device can enter D3 state and + * stop sampling timer. In this case for first sampling period host doesn't get samples, + * the buffer will not be returned. + * The current implementation in firmware creates buffer when the first sampling period has passed + * To overcome this problem the job execution time must be longer than sampling preiod. + * (details:EISW-96802) + */ +std::vector execTimeCopyEngineMs = {20, 100}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(MetricStreamerCopyEngine); + +INSTANTIATE_TEST_SUITE_P( + , + MetricStreamerCopyEngine, + ::testing::ValuesIn(MetricStreamerCopyEngine::createCasesForMetricsTest(execTimeCopyEngineMs)), + [](const testing::TestParamInfo &p) { + std::string groupName = std::get<1>(p.param); + uint32_t execTime = std::get<2>(p.param); + return groupName + "_" + std::to_string(execTime) + "ms"; + }); + +TEST_P(MetricStreamerCopyEngine, RunCopyWithTimeBasedCollection) { + auto [node, metricGroupName, execTime] = GetParam(); + + ASSERT_EQ(zetContextActivateMetricGroups(zeContext, zeDevice, 1, &testedMetricGroup), + ZE_RESULT_SUCCESS); + + zet_metric_streamer_desc_t metricStreamerDesc = {}; + metricStreamerDesc.stype = ZET_STRUCTURE_TYPE_METRIC_STREAMER_DESC; + metricStreamerDesc.samplingPeriod = 10'000'000u; // 10 [ms] + metricStreamerDesc.notifyEveryNReports = 20; - // Open metric streamer ASSERT_EQ(zetMetricStreamerOpen(zeContext, zeDevice, - hMetricGroup, + testedMetricGroup, &metricStreamerDesc, nullptr, &hMetricStreamer), ZE_RESULT_SUCCESS); - // Workload - ASSERT_EQ(zeGraphDDITableExt->pfnAppendGraphInitialize(list, graphHandle, nullptr, 0u, nullptr), + const size_t allocSize = 64 * KB; + std::shared_ptr srcMem, dstMem; + srcMem = AllocSharedMemory(allocSize); + dstMem = AllocSharedMemory(allocSize); + ASSERT_EQ(zeCommandListAppendMemoryCopy(list, + dstMem.get(), + srcMem.get(), + allocSize, + nullptr, + 0, + nullptr), ZE_RESULT_SUCCESS); - ASSERT_EQ( - zeGraphDDITableExt->pfnAppendGraphExecute(list, graphHandle, nullptr, nullptr, 0u, nullptr), - ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandListClose(list), ZE_RESULT_SUCCESS); + /* Warm up NPU to set metric streamer in busy state */ ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); - ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); - // Read raw data - size_t rawDataSize = 0; - // maxReportCount – the maximum number of reports the application wants to receive. - // If UINT32_MAX, then function will retrieve all reports available - ASSERT_EQ(zetMetricStreamerReadData(hMetricStreamer, UINT32_MAX, &rawDataSize, nullptr), - ZE_RESULT_SUCCESS); + std::chrono::steady_clock::time_point timeOut = + std::chrono::steady_clock::now() + std::chrono::milliseconds(execTime); - std::vector rawData(rawDataSize / sizeof(uint64_t), 0u); - ASSERT_EQ(zetMetricStreamerReadData(hMetricStreamer, - UINT32_MAX, - &rawDataSize, - reinterpret_cast(rawData.data())), - ZE_RESULT_SUCCESS); + while (std::chrono::steady_clock::now() < timeOut) { + ASSERT_EQ(zeCommandQueueExecuteCommandLists(queue, 1, &list, nullptr), ZE_RESULT_SUCCESS); + ASSERT_EQ(zeCommandQueueSynchronize(queue, graphSyncTimeout), ZE_RESULT_SUCCESS); + } + + uint32_t numReports = 0; + std::vector metricValues; - TRACE_BUF(rawData.data(), rawDataSize); + getMetrics(numReports, metricValues); + EXPECT_GT(numReports, 0); + ASSERT_GT(metricValues.size(), 0); + EXPECT_EQ(metricValues[0].type, metricsProperties[0].resultType); - // Close metric streamer - ASSERT_EQ(zetMetricStreamerClose(hMetricStreamer), ZE_RESULT_SUCCESS); + EXPECT_GT(metricValues[0].value.ui64, 0llu); - ASSERT_EQ(zetContextActivateMetricGroups(zeContext, zeDevice, 0u, nullptr), ZE_RESULT_SUCCESS); + TRACE_BUF(metricValues.data(), metricValues.size() * sizeof(zet_typed_value_t)); } diff --git a/validation/umd-test/test_ov_inference.cpp b/validation/umd-test/test_ov_inference.cpp new file mode 100644 index 0000000..926906c --- /dev/null +++ b/validation/umd-test/test_ov_inference.cpp @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2023-2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ +#include "umd_test.h" +#include "openvino/openvino.hpp" +#include "image.hpp" + +class OpenVinoBasic : public UmdTest, public ::testing::WithParamInterface { + protected: + void SetUp() override { + UmdTest::SetUp(); + + YAML::Node &configuration = Environment::getConfiguration(); + if (configuration["ov_log_level"].IsDefined()) { + std::string lvl = configuration["ov_log_level"].as(); + ov::log::Level logLevel; + if (lvl == "ERR") + logLevel = ov::log::Level::ERR; + else if (lvl == "WARNING") + logLevel = ov::log::Level::WARNING; + else if (lvl == "INFO") + logLevel = ov::log::Level::INFO; + else if (lvl == "DEBUG") + logLevel = ov::log::Level::DEBUG; + else if (lvl == "TRACE") + logLevel = ov::log::Level::TRACE; + else + logLevel = ov::log::Level::NO; + + core.set_property(ov::log::level(logLevel)); + } + } + + template + void processResults(const ov::Tensor &inferenceOutput, uint32_t classExpected) { + using T = ov::fundamental_type_for; + ov::Shape outputShape = inferenceOutput.get_shape(); + ASSERT_GT(outputShape.size(), 0); + + /* Only batch size 1 is processed */ + ASSERT_EQ(outputShape[0], 1); + + std::vector results(inferenceOutput.get_size()); + const T *outputData = inferenceOutput.data(); + std::for_each(results.begin(), results.end(), [&](T &n) { n = *outputData++; }); + auto it = std::max_element(results.begin(), results.end()); + size_t index = std::distance(results.begin(), it); + PRINTF("Class = %li, Accuracy = %f\n", index, static_cast(*it)); + ASSERT_EQ(index, static_cast(classExpected)); + } + + void processResults(const ov::Tensor &results, uint32_t exp) { + using T = ov::element::Type_t; + switch (results.get_element_type()) { + case T::f32: + processResults(results, exp); + break; + case T::f64: + processResults(results, exp); + break; + case T::f16: + processResults(results, exp); + break; + case T::i16: + processResults(results, exp); + break; + case T::u8: + processResults(results, exp); + break; + case T::i8: + processResults(results, exp); + break; + case T::u16: + processResults(results, exp); + break; + case T::i32: + processResults(results, exp); + break; + case T::u32: + processResults(results, exp); + break; + case T::i64: + processResults(results, exp); + break; + case T::u64: + processResults(results, exp); + break; + default: + GTEST_FAIL() << "Unrecognized result type" << std::endl; + break; + } + } + + ov::Core core; +}; + +TEST_F(OpenVinoBasic, CheckDevice) { + try { + ov::Version version = ov::get_openvino_version(); + TRACE("OpenVINO name: %s\n", version.description); + TRACE("OpenVINO build: %s\n", version.buildNumber); + + const std::vector devices = core.get_available_devices(); + for (auto &&device : devices) { + if (device == "NPU") { + auto deviceVersion = core.get_versions(device); + for (auto &v : deviceVersion) { + TRACE("Plugin name: %s\n", v.second.description); + TRACE("Plugin build: %s\n", v.second.buildNumber); + } + return; + } + } + GTEST_FAIL() << "NPU device not available" << std::endl; + } catch (const std::exception &e) { + GTEST_FAIL() << "Exception: " << e.what() << std::endl; + } +} + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(OpenVinoBasic); + +INSTANTIATE_TEST_SUITE_P(, + OpenVinoBasic, + ::testing::ValuesIn(Environment::getConfiguration("openvino")), + [](const testing::TestParamInfo &p) { + return generateTestNameFromNode(p.param); + }); + +TEST_P(OpenVinoBasic, CompileModelWithGraphInitAndExecute) { + const YAML::Node node = GetParam(); + + std::string modelPath(modelDir + node["path"].as()); + auto mainModel = core.read_model(modelPath.c_str()); + + ASSERT_EQ(mainModel->inputs().size(), 1); + + std::vector testImages; + for (auto &image : node["input"].as>()) { + testImages.push_back(imageDir + image); + } + + if (testImages.empty()) + GTEST_FAIL() << "Test image not found" << std::endl; + std::vector expectedImageClassIndexes; + + if (node["class_index"].IsDefined()) { + expectedImageClassIndexes = node["class_index"].as>(); + ASSERT_EQ(expectedImageClassIndexes.size(), testImages.size()); + } + + for (size_t i = 0; i < testImages.size(); i++) { + /* Use default BGR order for color channels*/ + Image img(testImages[i]); + + /* This test is limited to images with one byte per channel -> 3 bytes per pixel */ + /* TODO: Add support for other pixel data formats */ + ASSERT_EQ(img.getElementSize(), sizeof(ov::fundamental_type_for)); + + ov::element::Type input_type = ov::element::u8; + const ov::Layout input_layout{"NHWC"}; + ov::Shape input_shape = {1, + static_cast(img.getHeight()), + static_cast(img.getWidth()), + static_cast(img.getChannels())}; + ov::Tensor input_tensor = ov::Tensor(input_type, input_shape, img.getPtr()); + + auto networkOutputs = mainModel->outputs(); + ov::preprocess::PrePostProcessor ppp(mainModel); + + ppp.input() + .tensor() + .set_shape(input_shape) + .set_element_type(input_type) + .set_layout(input_layout); + ppp.input().preprocess().resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + for (auto &o : networkOutputs) + ppp.output(o.get_any_name()).tensor().set_element_type(ov::element::f32); + auto model = ppp.build(); + + ov::CompiledModel compiled_model = core.compile_model(model, "NPU"); + ov::InferRequest infer_request = compiled_model.create_infer_request(); + + infer_request.set_input_tensor(input_tensor); + infer_request.infer(); + + /* Test can parse and compare results for imagenet classes where + * output format has shape: [ 1, 1000 ] + */ + if (networkOutputs.size() == 1 && + networkOutputs[0].get_shape() == ov::Shape(std::initializer_list({1, 1000})) && + !expectedImageClassIndexes.empty()) { + const ov::Tensor &output = infer_request.get_output_tensor(); + processResults(output, expectedImageClassIndexes[i]); + } else { + for (size_t index = 0; index < networkOutputs.size(); index++) { + const ov::Tensor &output = infer_request.get_output_tensor(index); + ASSERT_GT(output.get_size(), 0); + } + } + } +} diff --git a/validation/umd-test/test_prime_buffers.cpp b/validation/umd-test/test_prime_buffers.cpp new file mode 100644 index 0000000..9ef3bdf --- /dev/null +++ b/validation/umd-test/test_prime_buffers.cpp @@ -0,0 +1,205 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "umd_test.h" +#include "umd_prime_buffers.h" + +#include +#include +#include + +class PrimeBuffers : public UmdTest, public ::testing::WithParamInterface { + public: + void TearDown() override { UmdTest::TearDown(); } + /* Get allocation properties descriptor */ + ze_memory_allocation_properties_t pMemAllocProperties = { + .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES, + .pNext = nullptr, + .type = ZE_MEMORY_TYPE_UNKNOWN, + .id = 0, + .pageSize = 0}; + + /* Alloc host memory descriptor */ + ze_host_mem_alloc_desc_t pHostMemAllocDesc = {.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, + .pNext = nullptr, + .flags = 0}; + /* Alloc device memory descriptor */ + ze_device_mem_alloc_desc_t pDeviceMemAllocDesc = {.stype = + ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, + .pNext = nullptr, + .flags = 0, + .ordinal = 0}; + + PrimeBufferHelper primeHelper; +}; + +INSTANTIATE_TEST_SUITE_P(Sizes, + PrimeBuffers, + ::testing::Values(2 * KB, 16 * MB, 255 * MB), + [](const testing::TestParamInfo &cmd) { + return memSizeToStr(cmd.param); + }); + +TEST_P(PrimeBuffers, exportDeviceMemory) { + ze_result_t ret; + uint64_t size = GetParam(); + + /* Pepare request for external allocation */ + pDeviceMemAllocDesc.pNext = &primeHelper.externalExportDesc; + + auto scopedMem = + zeScope::memAllocDevice(zeContext, pDeviceMemAllocDesc, size, 0, zeDevice, ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + ASSERT_NE(scopedMem.get(), nullptr); + const int32_t pattern = 0xDEADAABB; + memcpy(scopedMem.get(), &pattern, sizeof(pattern)); + + /* Prepare request for map allocation to fd */ + pMemAllocProperties.pNext = &primeHelper.externalExportFdDesc; + ASSERT_EQ(ZE_RESULT_SUCCESS, + zeMemGetAllocProperties(zeContext, scopedMem.get(), &pMemAllocProperties, nullptr)); + ASSERT_GE(primeHelper.externalExportFdDesc.fd, 0); + EXPECT_EQ(pMemAllocProperties.type, ZE_MEMORY_TYPE_DEVICE); + EXPECT_EQ(pMemAllocProperties.id, 0u); + EXPECT_EQ(pMemAllocProperties.pageSize, size); + EXPECT_EQ(lseek(primeHelper.externalExportFdDesc.fd, 0, SEEK_END), ALLIGN_TO_PAGE(size)); + lseek(primeHelper.externalExportFdDesc.fd, 0, SEEK_CUR); + + /* mmap memory and check pattern */ + void *ptr = mmap(NULL, + size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + primeHelper.externalExportFdDesc.fd, + 0); + ASSERT_NE(ptr, MAP_FAILED) << "error " << errno; + EXPECT_EQ(*static_cast(ptr), pattern); + EXPECT_EQ(munmap(ptr, size), 0); + EXPECT_EQ(close(primeHelper.externalExportFdDesc.fd), 0); +} + +TEST_P(PrimeBuffers, exportHostMemory) { + ze_result_t ret; + uint64_t size = GetParam(); + + /* Pepare request for external allocation */ + pHostMemAllocDesc.pNext = &primeHelper.externalExportDesc; + + auto scopedMem = zeScope::memAllocHost(zeContext, pHostMemAllocDesc, size, 0, ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + ASSERT_NE(scopedMem.get(), nullptr); + const int32_t pattern = 0xDEADAABB; + memcpy(scopedMem.get(), &pattern, sizeof(pattern)); + + /* Prepare request for map allocation to fd */ + pMemAllocProperties.pNext = &primeHelper.externalExportFdDesc; + ASSERT_EQ(ZE_RESULT_SUCCESS, + zeMemGetAllocProperties(zeContext, scopedMem.get(), &pMemAllocProperties, nullptr)); + ASSERT_GE(primeHelper.externalExportFdDesc.fd, 0); + EXPECT_EQ(pMemAllocProperties.type, ZE_MEMORY_TYPE_HOST); + EXPECT_EQ(pMemAllocProperties.id, 0u); + EXPECT_EQ(pMemAllocProperties.pageSize, size); + EXPECT_EQ(lseek(primeHelper.externalExportFdDesc.fd, 0, SEEK_END), ALLIGN_TO_PAGE(size)); + lseek(primeHelper.externalExportFdDesc.fd, 0, SEEK_CUR); + + /* mmap memory and check pattern */ + void *ptr = mmap(NULL, + size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + primeHelper.externalExportFdDesc.fd, + 0); + ASSERT_NE(ptr, MAP_FAILED) << "error " << errno; + EXPECT_EQ(*static_cast(ptr), pattern); + EXPECT_EQ(munmap(ptr, size), 0); + EXPECT_EQ(close(primeHelper.externalExportFdDesc.fd), 0); +} + +TEST_P(PrimeBuffers, exportSharedMemory) { + ze_result_t ret; + uint64_t size = GetParam(); + + /* Pepare request for external allocation */ + pDeviceMemAllocDesc.pNext = &primeHelper.externalExportDesc; + + auto scopedMem = zeScope::memAllocShared(zeContext, + pDeviceMemAllocDesc, + pHostMemAllocDesc, + size, + 0, + zeDevice, + ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + ASSERT_NE(scopedMem.get(), nullptr); + const int32_t pattern = 0xDEADAABB; + memcpy(scopedMem.get(), &pattern, sizeof(pattern)); + + /* Prepare request for map allocation to fd */ + pMemAllocProperties.pNext = &primeHelper.externalExportFdDesc; + ASSERT_EQ(ZE_RESULT_SUCCESS, + zeMemGetAllocProperties(zeContext, scopedMem.get(), &pMemAllocProperties, nullptr)); + ASSERT_GE(primeHelper.externalExportFdDesc.fd, 0); + EXPECT_EQ(pMemAllocProperties.type, ZE_MEMORY_TYPE_SHARED); + EXPECT_EQ(pMemAllocProperties.id, 0u); + EXPECT_EQ(pMemAllocProperties.pageSize, size); + EXPECT_EQ(lseek(primeHelper.externalExportFdDesc.fd, 0, SEEK_END), ALLIGN_TO_PAGE(size)); + lseek(primeHelper.externalExportFdDesc.fd, 0, SEEK_CUR); + + /* mmap memory and check pattern */ + void *ptr = mmap(NULL, + size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + primeHelper.externalExportFdDesc.fd, + 0); + ASSERT_NE(ptr, MAP_FAILED) << "error " << errno; + EXPECT_EQ(*static_cast(ptr), pattern); + EXPECT_EQ(munmap(ptr, size), 0); + EXPECT_EQ(close(primeHelper.externalExportFdDesc.fd), 0); +} + +TEST_P(PrimeBuffers, importDeviceMemory) { + ze_result_t ret; + size_t dmaBufferSize = GetParam(); + int32_t dmaBufferFd = -1; + + if (!primeHelper.hasDMABufferSupport()) + GTEST_SKIP() << "Missed support or insufficient permissions for" + << " dma buffer allocation in the system. Skip test"; + + ASSERT_TRUE(primeHelper.createDMABuffer(dmaBufferSize, dmaBufferFd)); + + /* Prepare request to import dma buffer as device memory */ + primeHelper.externalImportFromFdDesc.fd = dmaBufferFd; + pDeviceMemAllocDesc.pNext = &primeHelper.externalImportFromFdDesc; + + auto scopedImportedMemory = + zeScope::memAllocDevice(zeContext, pDeviceMemAllocDesc, dmaBufferSize, 0, zeDevice, ret); + ASSERT_EQ(ret, ZE_RESULT_SUCCESS); + ASSERT_NE(scopedImportedMemory.get(), nullptr); + + /* Check allocation properties */ + pMemAllocProperties.pNext = nullptr; + ASSERT_EQ(ZE_RESULT_SUCCESS, + zeMemGetAllocProperties(zeContext, + scopedImportedMemory.get(), + &pMemAllocProperties, + nullptr)); + ASSERT_EQ(pMemAllocProperties.type, ZE_MEMORY_TYPE_DEVICE); + ASSERT_EQ(pMemAllocProperties.id, 0u); + ASSERT_EQ(pMemAllocProperties.pageSize, ALLIGN_TO_PAGE(dmaBufferSize)); + + /* mmap original dma buffer and write pattern to it */ + void *dmaBufferPtr = primeHelper.mmapDmaBuffer(dmaBufferFd); + ASSERT_NE(dmaBufferPtr, MAP_FAILED) << "error " << errno; + const int32_t pattern = 0xDEADAABB; + memcpy(dmaBufferPtr, &pattern, sizeof(pattern)); + + /*check if pattern match on imported buffer*/ + int32_t *importedPtr = static_cast(scopedImportedMemory.get()); + EXPECT_EQ(*importedPtr, pattern); +} diff --git a/validation/umd-test/test_priority.cpp b/validation/umd-test/test_priority.cpp new file mode 100644 index 0000000..d97a14f --- /dev/null +++ b/validation/umd-test/test_priority.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "umd_test.h" +#include "ze_scope.hpp" + +#include +#include + +class CommandQueuePriority : public UmdTest { + public: + ze_result_t createCommandQueue(ze_command_queue_priority_t priority, + ze_command_queue_handle_t *handle) { + ze_result_t ret = ZE_RESULT_SUCCESS; + ze_command_queue_desc_t desc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, + .pNext = nullptr, + .ordinal = 0, + .index = 0, + .flags = 0, + .mode = ZE_COMMAND_QUEUE_MODE_DEFAULT, + .priority = priority}; + auto scopedQueue = zeScope::commandQueueCreate(zeContext, zeDevice, desc, ret); + std::lock_guard lock(mutex); + scopedQueueVec.push_back(std::move(scopedQueue)); + *handle = scopedQueueVec.back().get(); + return ret; + } + + ze_result_t createCommandList(ze_command_list_handle_t *handle) { + ze_result_t ret = ZE_RESULT_SUCCESS; + ze_command_list_desc_t desc = {.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, + .pNext = nullptr, + .commandQueueGroupOrdinal = 0, + .flags = 0}; + auto scopedList = zeScope::commandListCreate(zeContext, zeDevice, desc, ret); + std::lock_guard lock(mutex); + scopedListVec.push_back(std::move(scopedList)); + *handle = scopedListVec.back().get(); + return ret; + } + + ze_result_t createEvent(ze_event_handle_t *handle) { + ze_result_t ret = ZE_RESULT_SUCCESS; + const ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, + nullptr, + ZE_EVENT_POOL_FLAG_HOST_VISIBLE, + 1}; + + auto scopedEventPool = zeScope::eventPoolCreate(zeContext, eventPoolDesc, 1, zeDevice, ret); + if (ret != ZE_RESULT_SUCCESS) + return ret; + + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC, + nullptr, + 0, + ZE_EVENT_SCOPE_FLAG_HOST, + ZE_EVENT_SCOPE_FLAG_HOST}; + auto scopedEvent = zeScope::eventCreate(scopedEventPool.get(), eventDesc, ret); + + std::lock_guard lock(mutex); + scopedEventPoolVec.push_back(std::move(scopedEventPool)); + scopedEventVec.push_back(std::move(scopedEvent)); + *handle = scopedEventVec.back().get(); + return ret; + } + + void + executeCopyCommand(ze_command_queue_priority_t priority, size_t memSize, size_t timeExecMs) { + ze_command_queue_handle_t hQueue = nullptr; + EXPECT_EQ(createCommandQueue(priority, &hQueue), ZE_RESULT_SUCCESS); + + ze_command_list_handle_t hList = nullptr; + EXPECT_EQ(createCommandList(&hList), ZE_RESULT_SUCCESS); + + auto srcMem = AllocDeviceMemory(memSize); + memset(srcMem.get(), 'c', memSize); + auto dstMem = AllocDeviceMemory(memSize); + + EXPECT_EQ(zeCommandListAppendMemoryCopy(hList, + dstMem.get(), + srcMem.get(), + memSize, + nullptr, + 0, + nullptr), + ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandListClose(hList), ZE_RESULT_SUCCESS); + + auto timeout = std::chrono::steady_clock::now() + std::chrono::microseconds(timeExecMs); + do { + EXPECT_EQ(zeCommandQueueExecuteCommandLists(hQueue, 1, &hList, nullptr), + ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandQueueSynchronize(hQueue, syncTimeout), ZE_RESULT_SUCCESS); + EXPECT_EQ(memcmp(dstMem.get(), srcMem.get(), memSize), ZE_RESULT_SUCCESS); + } while (std::chrono::steady_clock::now() < timeout); + } + + std::mutex mutex; + std::vector> scopedQueueVec; + std::vector> scopedListVec; + std::vector> scopedEventPoolVec; + std::vector> scopedEventVec; + + std::vector priorities = { + ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW, + ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH, + }; +}; + +TEST_F(CommandQueuePriority, createQueueWithDifferentPriority) { + ze_command_queue_handle_t handle = nullptr; + for (const auto &priority : priorities) { + EXPECT_EQ(createCommandQueue(priority, &handle), ZE_RESULT_SUCCESS); + } +} + +TEST_F(CommandQueuePriority, executeCopyUsingDifferentPrioritySequentially) { + for (const auto &priority : priorities) { + executeCopyCommand(priority, 16 * KB, 20); + } +} + +TEST_F(CommandQueuePriority, executeCopyUsingDifferentPrioritySimultaneously) { + auto job = [this](ze_command_queue_priority_t priority) { + executeCopyCommand(priority, 16 * KB, 20); + }; + std::vector> tasks; + for (const auto &priority : priorities) { + tasks.push_back(std::async(std::launch::async, job, priority)); + } + for (const auto &task : tasks) { + task.wait(); + } +} + +// TODO: Test fails sporadically in HWS mode (EISW-105158) +TEST_F(CommandQueuePriority, + DISABLED_executeManyLowPriorityJobsExpectHighPriorityJobCompletesFirst) { + SKIP_NO_HWS("Test can be run in HWS mode"); + + size_t lowJobCount = 4; + size_t tsOffset = 16; + auto tsMem = AllocDeviceMemory((lowJobCount + 1) * tsOffset * sizeof(uint64_t)); + uint64_t *ts = static_cast(tsMem.get()); + + ze_event_handle_t hDeviceEvent = nullptr; + EXPECT_EQ(createEvent(&hDeviceEvent), ZE_RESULT_SUCCESS); + + ze_event_handle_t hHostEvent = nullptr; + EXPECT_EQ(createEvent(&hHostEvent), ZE_RESULT_SUCCESS); + + std::vector hQueueLows(lowJobCount, nullptr); + for (auto &hQueueLow : hQueueLows) { + EXPECT_EQ(createCommandQueue(ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW, &hQueueLow), + ZE_RESULT_SUCCESS); + } + + ze_command_queue_handle_t hQueueHigh = nullptr; + EXPECT_EQ(createCommandQueue(ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH, &hQueueHigh), + ZE_RESULT_SUCCESS); + + std::vector hListLows(lowJobCount, nullptr); + for (auto &hListLow : hListLows) { + EXPECT_EQ(createCommandList(&hListLow), ZE_RESULT_SUCCESS); + } + + ze_command_list_handle_t hListHigh = nullptr; + EXPECT_EQ(createCommandList(&hListHigh), ZE_RESULT_SUCCESS); + + for (size_t i = 0; i < lowJobCount; i++) { + EXPECT_EQ(zeCommandListAppendWaitOnEvents(hListLows[i], 1, &hHostEvent), ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandListAppendWriteGlobalTimestamp(hListLows[i], + ts + tsOffset * i, + nullptr, + 0, + nullptr), + ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandListClose(hListLows[i]), ZE_RESULT_SUCCESS); + } + + EXPECT_EQ(zeCommandListAppendSignalEvent(hListHigh, hDeviceEvent), ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandListAppendWaitOnEvents(hListHigh, 1, &hHostEvent), ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandListAppendWriteGlobalTimestamp(hListHigh, + ts + tsOffset * lowJobCount, + nullptr, + 0, + nullptr), + ZE_RESULT_SUCCESS); + EXPECT_EQ(zeCommandListClose(hListHigh), ZE_RESULT_SUCCESS); + + for (size_t i = 0; i < lowJobCount; i++) { + EXPECT_EQ(zeCommandQueueExecuteCommandLists(hQueueLows[i], 1, &hListLows[i], nullptr), + ZE_RESULT_SUCCESS); + } + EXPECT_EQ(zeCommandQueueExecuteCommandLists(hQueueHigh, 1, &hListHigh, nullptr), + ZE_RESULT_SUCCESS); + + EXPECT_EQ(zeEventHostSynchronize(hDeviceEvent, syncTimeout), ZE_RESULT_SUCCESS); + EXPECT_EQ(zeEventHostSignal(hHostEvent), ZE_RESULT_SUCCESS); + + for (auto &hQueueLow : hQueueLows) { + EXPECT_EQ(zeCommandQueueSynchronize(hQueueLow, syncTimeout), ZE_RESULT_SUCCESS); + } + EXPECT_EQ(zeCommandQueueSynchronize(hQueueHigh, syncTimeout), ZE_RESULT_SUCCESS); + + uint64_t tsHigh = *(ts + tsOffset * lowJobCount); + for (size_t i = 0; i < lowJobCount; i++) { + uint64_t tsLow = *(ts + tsOffset * i); + EXPECT_LT(tsHigh, tsLow) << "Low priority job[" << i + << "] set timestamp earlier then job with high priority"; + } +} diff --git a/validation/umd-test/testenv.hpp b/validation/umd-test/testenv.hpp index f63ab87..5ce711b 100644 --- a/validation/umd-test/testenv.hpp +++ b/validation/umd-test/testenv.hpp @@ -6,9 +6,14 @@ */ #pragma once -#include -#include "umd_test.h" +#include "test_app.h" +#include "umd_extensions.h" + +#include +#include +#include +#include class Environment : public ::testing::Environment { public: @@ -19,9 +24,13 @@ class Environment : public ::testing::Environment { uint32_t drvCount = 0u; uint32_t devCount = 0u; - EXPECT_EQ(setenv("ZET_ENABLE_METRICS", "1", 1), 0); + EXPECT_EQ(setenv("ZET_ENABLE_METRICS", "1", 0), 0); + +// TODO: Validation layer should be disabled when OpenVino is used, issue: EISW-101738 +#ifndef UMD_TESTS_USE_OPENVINO EXPECT_EQ(setenv("ZE_ENABLE_VALIDATION_LAYER", "1", 0), 0); EXPECT_EQ(setenv("ZE_ENABLE_PARAMETER_VALIDATION", "1", 0), 0); +#endif ASSERT_EQ(zeInit(ZE_INIT_FLAG_VPU_ONLY), ZE_RESULT_SUCCESS); ASSERT_EQ(zeDriverGet(&drvCount, nullptr), ZE_RESULT_SUCCESS); @@ -83,15 +92,27 @@ class Environment : public ::testing::Environment { << "Failed to find graph profiling DDI table"; } - ze_driver_handle_t getDriver() { return zeDriver; } - ze_device_handle_t getDevice() { return zeDevice; } - graph_dditable_ext_t *getGraphDDITable() { return zeGraphDDITableExt; } + ze_driver_handle_t getDriver() { + return zeDriver; + } + ze_device_handle_t getDevice() { + return zeDevice; + } + graph_dditable_ext_t *getGraphDDITable() { + return zeGraphDDITableExt; + } ze_graph_profiling_dditable_ext_t *getGraphProfilingDDITable() { return zeGraphProfilingDDITableExt; } - uint64_t getMaxMemAllocSize() { return maxMemAllocSize; } - uint16_t getPciDevId() { return pciDevId; } - uint16_t getPlatformType() { return platformType; } + uint64_t getMaxMemAllocSize() { + return maxMemAllocSize; + } + uint16_t getPciDevId() { + return pciDevId; + } + uint16_t getPlatformType() { + return platformType; + } static Environment *getInstance() { static Environment *testEnv = nullptr; @@ -169,7 +190,7 @@ class Environment : public ::testing::Environment { std::string level = config["log_level"].as(); if (std::find(validLevels.begin(), validLevels.end(), level) != validLevels.end()) { - if (setenv("VPU_DRV_UMD_LOGLEVEL", level.c_str(), 1) != 0) { + if (setenv("ZE_INTEL_NPU_LOGLEVEL", level.c_str(), 0) != 0) { PRINTF("Set log level to requested %s failed.\n", level.c_str()); return false; } diff --git a/validation/umd-test/umd_prime_buffers.h b/validation/umd-test/umd_prime_buffers.h new file mode 100644 index 0000000..6f7c7de --- /dev/null +++ b/validation/umd-test/umd_prime_buffers.h @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include +#include +#include +#include +#include +#include + +#define ALLIGN_TO_PAGE(x) __ALIGN_KERNEL((x), (UmdTest::PAGE_SIZE)) + +class PrimeBufferHelper { + public: + PrimeBufferHelper() { + if (!std::filesystem::exists("/dev/dma_heap/system")) + return; + + fd = open("/dev/dma_heap/system", O_RDWR); + } + PrimeBufferHelper(const PrimeBufferHelper &obj) = delete; + PrimeBufferHelper(PrimeBufferHelper &&obj) = delete; + PrimeBufferHelper &operator=(const PrimeBufferHelper &obj) = delete; + PrimeBufferHelper &operator=(PrimeBufferHelper &&obj) = delete; + + ~PrimeBufferHelper() { + for (const auto &[bufferFd, value] : buffers) { + if (value.second != nullptr && value.second != MAP_FAILED) + munmap(value.second, value.first); + close(bufferFd); + } + if (fd >= 0) + close(fd); + } + + bool hasDMABufferSupport() { + if (fd < 0) + return false; + return true; + } + + bool createDMABuffer(size_t size, int32_t &bufferFd) { + if (!hasDMABufferSupport() || !size) + return false; + + struct dma_heap_allocation_data heapAlloc = { + .len = ALLIGN_TO_PAGE(size), + .fd = 0, + .fd_flags = O_RDWR | O_CLOEXEC, + .heap_flags = 0, + }; + + int ret = ioctl(fd, DMA_HEAP_IOCTL_ALLOC, &heapAlloc); + if (ret != 0) + return false; + + bufferFd = heapAlloc.fd; + buffers.insert({heapAlloc.fd, {size, nullptr}}); + return true; + } + + void *mmapDmaBuffer(int32_t &bufferFd) { + if (auto it = buffers.find(bufferFd); it != buffers.end()) { + if (it->second.second == nullptr) + it->second.second = + mmap(NULL, it->second.first, PROT_WRITE | PROT_READ, MAP_SHARED, bufferFd, 0); + return it->second.second; + } + return MAP_FAILED; + } + + /* Descriptor for create exportable memory */ + ze_external_memory_export_desc_t externalExportDesc = { + .stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC, + .pNext = nullptr, + .flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF}; + + /* Descriptor to retrieve Fd through get properties */ + ze_external_memory_export_fd_t externalExportFdDesc = { + .stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_FD, + .pNext = nullptr, + .flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF, + .fd = -1}; + + /* Descriptor to import allocation from fd */ + ze_external_memory_import_fd_t externalImportFromFdDesc = { + .stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_FD, + .pNext = nullptr, + .flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF, + .fd = -1}; + + protected: + using BufferFd = int32_t; + using BufferSize = size_t; + using BufferPtr = void *; + std::map> buffers; + int32_t fd = -1; +}; diff --git a/validation/umd-test/umd_test.cpp b/validation/umd-test/umd_test.cpp index a7ffe3e..3c8ca0c 100644 --- a/validation/umd-test/umd_test.cpp +++ b/validation/umd-test/umd_test.cpp @@ -6,9 +6,9 @@ */ #include -#include #include #include +#include #include "umd_test.h" #include "testenv.hpp" @@ -78,6 +78,17 @@ void UmdTest::SetUp() { syncTimeout = 30'000'000'000; // 30 seconds graphSyncTimeout = 600'000'000'000; // 10 minutes } + + /*Get base configuration from config file*/ + YAML::Node &configuration = Environment::getConfiguration(); + if (configuration["blob_dir"].IsDefined()) + blobDir = configuration["blob_dir"].as(); + + if (configuration["image_dir"].IsDefined()) + imageDir = configuration["image_dir"].as(); + + if (configuration["model_dir"].IsDefined()) + modelDir = configuration["model_dir"].as(); } void UmdTest::TearDown() { @@ -117,58 +128,82 @@ bool UmdTest::isSilicon() { } std::shared_ptr UmdTest::AllocSharedMemory(size_t size, ze_host_mem_alloc_flags_t flagsHost) { - ze_device_mem_alloc_desc_t deviceMemAllocDesc = {.stype = - ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, - .pNext = nullptr, - .flags = 0, - .ordinal = 0}; + return zeMemory::allocShared(zeContext, zeDevice, size, flagsHost); +} - ze_host_mem_alloc_desc_t hostMemAllocDesc = {.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, - .pNext = nullptr, - .flags = flagsHost}; +std::shared_ptr UmdTest::AllocDeviceMemory(size_t size) { + return zeMemory::allocDevice(zeContext, zeDevice, size); +} - ze_result_t ret; - auto scopedMem = zeScope::memAllocShared(zeContext, - deviceMemAllocDesc, - hostMemAllocDesc, - size, - 0, - zeDevice, - ret); - if (ret != ZE_RESULT_SUCCESS) - throw std::runtime_error("Failed to allocate shared memory"); +std::shared_ptr UmdTest::AllocHostMemory(size_t size, ze_host_mem_alloc_flags_t flagsHost) { + return zeMemory::allocHost(zeContext, size, flagsHost); +} + +std::vector UmdTest::getFlagsFromString(std::string flags) { + std::vector buildFlags; - memset(scopedMem.get(), 0, size); - return scopedMem; + for (auto c : flags) + buildFlags.push_back(c); + buildFlags.push_back('\0'); + return buildFlags; } -std::shared_ptr UmdTest::AllocDeviceMemory(size_t size) { - ze_device_mem_alloc_desc_t deviceMemAllocDesc = {.stype = - ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, - .pNext = nullptr, - .flags = 0, - .ordinal = 0}; +void UmdTest::createGraphDescriptorForModel(const std::string &modelPath, + const std::vector &modelBuildFlags, + std::vector &testModelIR, + ze_graph_desc_2_t &graphDesc) { + std::vector testModelXml, testModelBin; + ze_device_graph_properties_t pDeviceGraphProperties; - ze_result_t ret; - auto scopedMem = zeScope::memAllocDevice(zeContext, deviceMemAllocDesc, size, 0, zeDevice, ret); - if (ret != ZE_RESULT_SUCCESS) - throw std::runtime_error("Failed to allocate device memory"); + ASSERT_TRUE(getModelFromPath(modelPath, testModelXml, testModelBin)); - return scopedMem; -} + ASSERT_EQ(zeGraphDDITableExt->pfnDeviceGetGraphProperties(zeDevice, &pDeviceGraphProperties), + ZE_RESULT_SUCCESS); -std::shared_ptr UmdTest::AllocHostMemory(size_t size, ze_host_mem_alloc_flags_t flagsHost) { - ze_host_mem_alloc_desc_t hostMemAllocDesc = {.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, - .pNext = nullptr, - .flags = flagsHost}; + ze_graph_compiler_version_info_t version = { + .major = pDeviceGraphProperties.compilerVersion.major, + .minor = pDeviceGraphProperties.compilerVersion.minor}; - ze_result_t ret; - auto scopedMem = zeScope::memAllocHost(zeContext, hostMemAllocDesc, size, 0, ret); - if (ret != ZE_RESULT_SUCCESS) - throw std::runtime_error("Failed to allocate host memory"); + uint64_t xml_len = testModelXml.size(); + uint64_t bin_len = testModelBin.size(); + uint32_t numInputs = 2; + uint64_t modelSize = + sizeof(version) + sizeof(numInputs) + sizeof(xml_len) + xml_len + sizeof(bin_len) + bin_len; + + testModelIR.resize(modelSize); + + uint64_t offset = 0; + memcpy(&testModelIR[0], &version, sizeof(version)); + offset += sizeof(version); + + memcpy(&testModelIR[offset], &numInputs, sizeof(numInputs)); + offset += sizeof(numInputs); + + memcpy(&testModelIR[offset], &xml_len, sizeof(xml_len)); + offset += sizeof(xml_len); + + memcpy(&testModelIR[offset], testModelXml.data(), xml_len); + offset += xml_len; + + memcpy(&testModelIR[offset], &bin_len, sizeof(bin_len)); + offset += sizeof(bin_len); + + memcpy(&testModelIR[offset], testModelBin.data(), bin_len); + + graphDesc.stype = ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES; + graphDesc.pNext = nullptr; + graphDesc.format = ZE_GRAPH_FORMAT_NGRAPH_LITE; + graphDesc.inputSize = testModelIR.size(); + graphDesc.pInput = testModelIR.data(); + graphDesc.pBuildFlags = modelBuildFlags.data(); + graphDesc.flags = ZE_GRAPH_FLAG_NONE; +} - memset(scopedMem.get(), 0, size); - return scopedMem; +bool UmdTest::isHwsModeEnabled() { + std::vector out; + if (!loadFile("/sys/module/intel_vpu/parameters/sched_mode", out)) + return false; + return out.size() > 0 && out[0] == 'Y'; } TEST(Umd, ZeDevTypeStr) { diff --git a/validation/umd-test/umd_test.h b/validation/umd-test/umd_test.h index 01be067..91586e0 100644 --- a/validation/umd-test/umd_test.h +++ b/validation/umd-test/umd_test.h @@ -10,9 +10,10 @@ #include "blob_params.hpp" #include "model_params.hpp" #include "test_app.h" +#include "testenv.hpp" #include "umd_extensions.h" +#include "ze_memory.hpp" #include "ze_scope.hpp" -#include "testenv.hpp" #include #include @@ -28,6 +29,16 @@ void PrintTo(const ze_result_t &ze_result, std::ostream *os); GTEST_SKIP_(msg); \ } +#define SKIP_PRESILICON(msg) \ + if (!isSilicon()) { \ + SKIP_(msg); \ + } + +#define SKIP_NO_HWS(msg) \ + if (!isHwsModeEnabled()) { \ + SKIP_(msg); \ + } + #define KB (1024llu) #define MB (1024llu * 1024) @@ -92,6 +103,9 @@ class UmdTest : public ::testing::Test { static constexpr int PAGE_SIZE = 4096; + uint32_t computeGrpOrdinal = std::numeric_limits::max(); + uint32_t copyGrpOrdinal = std::numeric_limits::max(); + protected: void SetUp() override; void TearDown() override; @@ -99,8 +113,14 @@ class UmdTest : public ::testing::Test { std::shared_ptr AllocSharedMemory(size_t size, ze_host_mem_alloc_flags_t flagsHost = 0); std::shared_ptr AllocDeviceMemory(size_t size); std::shared_ptr AllocHostMemory(size_t size, ze_host_mem_alloc_flags_t flagsHost = 0); + std::vector getFlagsFromString(std::string flags); + void createGraphDescriptorForModel(const std::string &modelPath, + const std::vector &modelBuildFlags, + std::vector &testModelIR, + ze_graph_desc_2_t &graphDesc); bool isSilicon(); + bool isHwsModeEnabled(); /** @brief Handle to the Level Zero API driver object */ ze_driver_handle_t zeDriver = nullptr; @@ -116,8 +136,6 @@ class UmdTest : public ::testing::Test { /** @brief Retrieve command group ordinals for compute and copy engine usage */ void CommandQueueGroupSetUp(); - uint32_t computeGrpOrdinal = std::numeric_limits::max(); - uint32_t copyGrpOrdinal = std::numeric_limits::max(); uint16_t pciDevId = 0u; uint32_t platformType = 0u; @@ -125,6 +143,10 @@ class UmdTest : public ::testing::Test { uint64_t syncTimeout = 2'000'000'000; // 2 seconds uint64_t graphSyncTimeout = syncTimeout; // 2 seconds + std::string blobDir = ""; + std::string imageDir = ""; + std::string modelDir = ""; + private: zeScope::SharedPtr scopedContext; }; diff --git a/validation/umd-test/utilities/CMakeLists.txt b/validation/umd-test/utilities/CMakeLists.txt new file mode 100644 index 0000000..65cd1fd --- /dev/null +++ b/validation/umd-test/utilities/CMakeLists.txt @@ -0,0 +1,10 @@ +# +# Copyright (C) 2022 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +add_library(data_handle OBJECT + ${CMAKE_CURRENT_SOURCE_DIR}/data_handle.h + ${CMAKE_CURRENT_SOURCE_DIR}/data_handle.cpp +) diff --git a/validation/umd-test/utilities/data_handle.cpp b/validation/umd-test/utilities/data_handle.cpp new file mode 100644 index 0000000..3df947d --- /dev/null +++ b/validation/umd-test/utilities/data_handle.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "data_handle.h" + +#include +#include +#include +#include + +namespace DataHandle { + +void generateRandomData(std::vector &data, size_t size) { + std::random_device rd; + std::uniform_int_distribution dist; + + data.reserve(size); + + for (size_t i = 0; i < size; i++) { + data.push_back(dist(rd)); + } +} + +int loadFile(const std::string &filePath, std::vector &dataOut) { + // Load binary file + std::ifstream fileInputStream(filePath, std::ios::binary | std::ios::ate); + if (fileInputStream.is_open()) { + std::streamsize size = fileInputStream.tellg(); + fileInputStream.seekg(0, std::ios::beg); + + dataOut.resize(size); + fileInputStream.read((char *)dataOut.data(), size); + fileInputStream.close(); + + return 0; + } + return -1; +} + +struct __attribute__((packed)) BmpFormat { + uint16_t header; + uint32_t size; + uint32_t reserve; + uint32_t offset; +}; + +int loadImageFile(const std::string &filePath, std::vector &dataOut) { + if (std::filesystem::exists(filePath) == false) + return -1; + + std::vector file; + auto ret = loadFile(filePath, file); + if (ret != 0) + return ret; + + auto bmpHeader = reinterpret_cast(file.data()); + dataOut.resize(bmpHeader->size - bmpHeader->offset); + memcpy(dataOut.data(), &file.at(bmpHeader->offset), dataOut.size()); + return 0; +} + +int writeFile(const std::string &filePath, void *dataIn, size_t inputSize) { + // Write to binary file + std::ofstream fileOutputStream(filePath, std::ios::out | std::ios::binary); + if (fileOutputStream.is_open()) { + fileOutputStream.write((const char *)dataIn, inputSize); + + return 0; + } + return -1; +} + +/* +This function translates fp16 into fp32(float) format. + +In half-precision(fp16), single-precision(fp32) & double-precision(fp64), +all these precision consisted of sign bit, exponent(exp), significand precision(mantis). +fp16: [BIT15] [BIT14 - BIT10] [BIT9 - BIT0] +fp32: [BIT31] [BIT30 - BIT23] [BIT22 - BIT0] + +The way precision is interpreted is different when the exponent is 0 and when it is not 0. +That is why there is an "if (exp != 0)" below to split the interpretation. + +It's more straightforward when exp is not 0, +only add 0x70 to exp due to the difference between fp16 and fp32. + +When exp is 0, 0x70 cannot be added directly to fp32, +instead "(v - 37) << 23" is to get required value of exp on fp32, +then "mantis << (150 - v)" is to get required bit shift on mantis, +and finally 0x7FE000 to trim it. +*/ +float rawFp16ToFp32(const uint16_t &data) { + uint32_t exp = (data & 0x7C00) >> 10; + uint32_t mantis = (data & 0x03FF) << 13; + uint32_t output = (data & 0x8000) << 16; + + if (exp != 0) + output |= (exp + 0x70) << 23 | mantis; + else if (mantis != 0) { + auto tmp = static_cast(mantis); + uint32_t v = *reinterpret_cast(&tmp) >> 23; + output |= (v - 37) << 23 | ((mantis << (150 - v)) & 0x7FE000); + } + + return *reinterpret_cast(&output); +} + +/* This function translates bfp16 into fp32(float) format. + * Conversion to FP32 is done by move exponent part unchanged + * (both formats uses 8bits + sign), and extend of mantis by zeroes. + */ +float rawBfp16ToFp32(const uint16_t &data) { + uint32_t output = (data << 16); + + return *reinterpret_cast(&output); +} + +} // namespace DataHandle diff --git a/validation/umd-test/utilities/data_handle.h b/validation/umd-test/utilities/data_handle.h new file mode 100644 index 0000000..d6e0ec0 --- /dev/null +++ b/validation/umd-test/utilities/data_handle.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include +#include +#include + +namespace DataHandle { + +/** + Fill data with random values + @param size[in]: Size of data + @param data[out]: Reference to vector to store data + */ +void generateRandomData(std::vector &data, size_t size); + +/** + Load a binary file into the program + @param filePath[in]: Pointer to string containing filepath for file to be loaded. + @param dataOut[out]: Reference to vector to store raw data. + @return : int 0 indicating success, -1 if otherwise + */ +int loadFile(const std::string &filePath, std::vector &dataOut); + +/** + Load an image file into the program + @param filePath[in]: Pointer to string containing filepath for file to be loaded. + @param dataOut[out]: Reference to vector to store raw data. + @return : int 0 indicating success, -1 if otherwise + */ +int loadImageFile(const std::string &filePath, std::vector &dataOut); + +/** + Write data out to a file + @param filePath[in]: Pointer to string containing filepath where results would be stored in. + @param dataIn[in]: Reference to vector to obtain data for storing. + @param inputSize[in]: size of data to be written from void pointer + @return : int 0 indicating success, -1 if otherwise + */ +int writeFile(const std::string &filePath, void *dataIn, size_t inputSize); + +/** + Take reference of raw FP16 data and convert into float(FP32) + @param data[in]: Reference to raw FP16 to convert into float(FP32). + */ +float rawFp16ToFp32(const uint16_t &data); + +/** + Take reference of raw BFP16 data and convert into float(FP32) + @param data[in]: Reference to raw BFP16 to convert into float(FP32). + */ +float rawBfp16ToFp32(const uint16_t &data); +} // namespace DataHandle diff --git a/validation/umd-test/utilities/graph_to_str.h b/validation/umd-test/utilities/graph_to_str.h new file mode 100644 index 0000000..dedac74 --- /dev/null +++ b/validation/umd-test/utilities/graph_to_str.h @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include + +inline const char *zeGraphArgumentToStr(ze_graph_argument_type_t a) { + switch (a) { + case ZE_GRAPH_ARGUMENT_TYPE_INPUT: + return "ZE_GRAPH_ARGUMENT_TYPE_INPUT"; + case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: + return "ZE_GRAPH_ARGUMENT_TYPE_OUTPUT"; + default: + return "UNKNOWN"; + } +} + +inline const char *zeGraphArgumentPrecisionToStr(ze_graph_argument_precision_t a) { + switch (a) { + case ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN: + return "ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN"; + case ZE_GRAPH_ARGUMENT_PRECISION_FP32: + return "ZE_GRAPH_ARGUMENT_PRECISION_FP32"; + case ZE_GRAPH_ARGUMENT_PRECISION_FP16: + return "ZE_GRAPH_ARGUMENT_PRECISION_FP16"; + case ZE_GRAPH_ARGUMENT_PRECISION_UINT16: + return "ZE_GRAPH_ARGUMENT_PRECISION_UINT16"; + case ZE_GRAPH_ARGUMENT_PRECISION_UINT8: + return "ZE_GRAPH_ARGUMENT_PRECISION_UINT8"; + case ZE_GRAPH_ARGUMENT_PRECISION_UINT4: + return "ZE_GRAPH_ARGUMENT_PRECISION_UINT4"; + case ZE_GRAPH_ARGUMENT_PRECISION_INT32: + return "ZE_GRAPH_ARGUMENT_PRECISION_INT32"; + case ZE_GRAPH_ARGUMENT_PRECISION_INT16: + return "ZE_GRAPH_ARGUMENT_PRECISION_INT16"; + case ZE_GRAPH_ARGUMENT_PRECISION_INT8: + return "ZE_GRAPH_ARGUMENT_PRECISION_INT8"; + case ZE_GRAPH_ARGUMENT_PRECISION_INT4: + return "ZE_GRAPH_ARGUMENT_PRECISION_INT4"; + case ZE_GRAPH_ARGUMENT_PRECISION_BIN: + return "ZE_GRAPH_ARGUMENT_PRECISION_BIN"; + case ZE_GRAPH_ARGUMENT_PRECISION_BF16: + return "ZE_GRAPH_ARGUMENT_PRECISION_BF16"; + default: + return "UNKNOWN"; + } +} + +inline const char *zeGraphArgumentLayoutToStr(ze_graph_argument_layout_t a) { + switch (a) { + case ZE_GRAPH_ARGUMENT_LAYOUT_ANY: + return "ZE_GRAPH_ARGUMENT_LAYOUT_ANY"; + case ZE_GRAPH_ARGUMENT_LAYOUT_NCHW: + return "ZE_GRAPH_ARGUMENT_LAYOUT_NCHW"; + case ZE_GRAPH_ARGUMENT_LAYOUT_NHWC: + return "ZE_GRAPH_ARGUMENT_LAYOUT_NHWC"; + case ZE_GRAPH_ARGUMENT_LAYOUT_NCDHW: + return "ZE_GRAPH_ARGUMENT_LAYOUT_NCDHW"; + case ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC: + return "ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC"; + case ZE_GRAPH_ARGUMENT_LAYOUT_OIHW: + return "ZE_GRAPH_ARGUMENT_LAYOUT_OIHW"; + case ZE_GRAPH_ARGUMENT_LAYOUT_C: + return "ZE_GRAPH_ARGUMENT_LAYOUT_C"; + case ZE_GRAPH_ARGUMENT_LAYOUT_CHW: + return "ZE_GRAPH_ARGUMENT_LAYOUT_CHW"; + case ZE_GRAPH_ARGUMENT_LAYOUT_HW: + return "ZE_GRAPH_ARGUMENT_LAYOUT_HW"; + case ZE_GRAPH_ARGUMENT_LAYOUT_NC: + return "ZE_GRAPH_ARGUMENT_LAYOUT_NC"; + case ZE_GRAPH_ARGUMENT_LAYOUT_CN: + return "ZE_GRAPH_ARGUMENT_LAYOUT_CN"; + case ZE_GRAPH_ARGUMENT_LAYOUT_BLOCKED: + return "ZE_GRAPH_ARGUMENT_LAYOUT_BLOCKED"; + default: + return "UNKNOWN"; + } +} + +inline const char *getExecTypeStr(int execType) { + switch (execType) { + case ZE_TASK_EXECUTE_NONE: + return "ZE_TASK_EXECUTE_NONE"; + case ZE_TASK_EXECUTE_DPU: + return "ZE_TASK_EXECUTE_DPU"; + case ZE_TASK_EXECUTE_SW: + return "ZE_TASK_EXECUTE_SW"; + case ZE_TASK_EXECUTE_DMA: + return "ZE_TASK_EXECUTE_DMA"; + default: + return ""; + } +} + +inline const char *getStatusStr(int status) { + switch (status) { + case ZE_LAYER_STATUS_NOT_RUN: + return "ZE_LAYER_STATUS_NOT_RUN"; + case ZE_LAYER_STATUS_OPTIMIZED_OUT: + return "ZE_LAYER_STATUS_OPTIMIZED_OUT"; + case ZE_LAYER_STATUS_EXECUTED: + return "ZE_LAYER_STATUS_EXECUTED"; + default: + return ""; + } +} diff --git a/validation/umd-test/ze_memory.hpp b/validation/umd-test/ze_memory.hpp new file mode 100644 index 0000000..539f676 --- /dev/null +++ b/validation/umd-test/ze_memory.hpp @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2022-2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "ze_scope.hpp" +#include + +namespace zeMemory { + +inline void allocError(const char *msg) { +#ifdef GTEST_TEST + ASSERT_FALSE(true) << msg; +#else + throw std::runtime_error(msg); +#endif +} + +inline std::shared_ptr allocShared(ze_context_handle_t hContext, + ze_device_handle_t hDevice, + uint32_t size, + ze_host_mem_alloc_flags_t flags = 0) { + ze_device_mem_alloc_desc_t devDesc = {.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, + .pNext = nullptr, + .flags = 0, + .ordinal = 0}; + ze_host_mem_alloc_desc_t hostDesc = {.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, + .pNext = nullptr, + .flags = flags}; + ze_result_t ret; + auto scopedMem = zeScope::memAllocShared(hContext, devDesc, hostDesc, size, 0, hDevice, ret); + if (ret != ZE_RESULT_SUCCESS) + allocError("Failed to allocate shared memory"); + memset(scopedMem.get(), 0, size); + return scopedMem; +} + +inline std::shared_ptr +allocDevice(ze_context_handle_t hContext, ze_device_handle_t hDevice, size_t size) { + ze_device_mem_alloc_desc_t desc = {.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC, + .pNext = nullptr, + .flags = 0, + .ordinal = 0}; + ze_result_t ret; + auto scopedMem = zeScope::memAllocDevice(hContext, desc, size, 0, hDevice, ret); + if (ret != ZE_RESULT_SUCCESS) + allocError("Failed to allocate device memory"); + return scopedMem; +} + +inline std::shared_ptr +allocHost(ze_context_handle_t hContext, size_t size, ze_host_mem_alloc_flags_t flags = 0) { + ze_host_mem_alloc_desc_t desc = {.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, + .pNext = nullptr, + .flags = flags}; + ze_result_t ret; + auto scopedMem = zeScope::memAllocHost(hContext, desc, size, 0, ret); + if (ret != ZE_RESULT_SUCCESS) + allocError("Failed to allocate host memory"); + memset(scopedMem.get(), 0, size); + return scopedMem; +} +}; // namespace zeMemory diff --git a/validation/umd-test/ze_scope.hpp b/validation/umd-test/ze_scope.hpp index ec6ab9e..930c0bc 100644 --- a/validation/umd-test/ze_scope.hpp +++ b/validation/umd-test/ze_scope.hpp @@ -136,6 +136,27 @@ inline SharedPtr graphCreate(graph_dditable_ext_t *ddi, }); } +inline SharedPtr graphCreate2(graph_dditable_ext_t *ddi, + ze_context_handle_t ctx, + ze_device_handle_t dev, + const ze_graph_desc_2_t &desc, + ze_result_t &ret) { + ze_graph_handle_t handle = nullptr; + + if (!ddi) { + ret = ZE_RESULT_ERROR_INVALID_NULL_POINTER; + return nullptr; + } + + ret = ddi->pfnCreate2(ctx, dev, &desc, &handle); + if (ret != ZE_RESULT_SUCCESS) + return nullptr; + + return SharedPtr(std::move(handle), [ddi](auto x) { + EXPECT_EQ(ddi->pfnDestroy(x), ZE_RESULT_SUCCESS); + }); +} + inline SharedPtr fenceCreate(ze_command_queue_handle_t queue, const ze_fence_desc_t &desc, ze_result_t &ret) { ze_fence_handle_t handle = nullptr; diff --git a/version.h.in b/version.h.in index 8b96c5d..1e418b0 100644 --- a/version.h.in +++ b/version.h.in @@ -7,6 +7,6 @@ #pragma once -#cmakedefine VPU_VERSION "${VPU_VERSION}" +#cmakedefine BUILD_VERSION "${BUILD_VERSION}" -const char vpu_drv_version_str[] = "VPU version: " VPU_VERSION; +const char vpu_drv_version_str[] = "NPU version: " BUILD_VERSION;