diff --git a/cmake/FindCUDACompilerNVCC.cmake b/cmake/FindCUDACompilerNVCC.cmake index 030dc0f312..d9600ecf6d 100644 --- a/cmake/FindCUDACompilerNVCC.cmake +++ b/cmake/FindCUDACompilerNVCC.cmake @@ -50,6 +50,8 @@ target_compile_options( $<$:-Xptxas=-O3 -Xcompiler=-Og,-g,--coverage,-fprofile-abs-path> $<$:-Xptxas=-O3 -Xcompiler=-O3,-g> $<$:-Xcompiler=-isysroot;-Xcompiler=${CMAKE_OSX_SYSROOT}> + # workaround for https://github.com/espressomd/espresso/issues/4943 + $<$:$<$:--coverage -fprofile-abs-path>> ) function(espresso_add_gpu_library) diff --git a/cmake/unit_test.cmake b/cmake/espresso_unit_test.cmake similarity index 91% rename from cmake/unit_test.cmake rename to cmake/espresso_unit_test.cmake index 8b87986176..c857d9bed1 100644 --- a/cmake/unit_test.cmake +++ b/cmake/espresso_unit_test.cmake @@ -1,5 +1,5 @@ # -# Copyright (C) 2016-2022 The ESPResSo project +# Copyright (C) 2016-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,9 +17,12 @@ # along with this program. If not, see . # -# unit_test function -function(UNIT_TEST) - cmake_parse_arguments(TEST "" "NAME;NUM_PROC" "SRC;DEPENDS" ${ARGN}) +function(ESPRESSO_UNIT_TEST) + cmake_parse_arguments(TEST "" "SRC;NAME;NUM_PROC" "DEPENDS" ${ARGN}) + if(NOT DEFINED TEST_NAME) + cmake_path(GET TEST_SRC STEM TEST_NAME) + set(TEST_NAME ${TEST_NAME} PARENT_SCOPE) + endif() if(${TEST_SRC} MATCHES ".*\.cu$") espresso_add_gpu_executable(${TEST_NAME} ${TEST_SRC}) else() @@ -72,4 +75,4 @@ function(UNIT_TEST) ${TEST_NAME} PROPERTIES ENVIRONMENT "${TEST_ENV_VARIABLES}") add_dependencies(check_unit_tests ${TEST_NAME}) -endfunction(UNIT_TEST) +endfunction() diff --git a/maintainer/CI/build_cmake.sh b/maintainer/CI/build_cmake.sh index 5cd18df07c..6d043c8b59 100755 --- a/maintainer/CI/build_cmake.sh +++ b/maintainer/CI/build_cmake.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (C) 2016-2022 The ESPResSo project +# Copyright (C) 2016-2024 The ESPResSo project # Copyright (C) 2014 Olaf Lenz # # Copying and distribution of this file, with or without modification, @@ -151,50 +151,18 @@ cmake_params="-D CMAKE_BUILD_TYPE=${build_type} -D ESPRESSO_WARNINGS_ARE_ERRORS= cmake_params="${cmake_params} -D CMAKE_INSTALL_PREFIX=/tmp/espresso-unit-tests -D ESPRESSO_INSIDE_DOCKER=ON" cmake_params="${cmake_params} -D ESPRESSO_CTEST_ARGS:STRING=-j${check_procs} -D ESPRESSO_TEST_TIMEOUT=${test_timeout}" -if [ "${make_check_benchmarks}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_BENCHMARKS=ON" -fi - -if [ "${with_ccache}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CCACHE=ON" -fi - -if [ "${with_caliper}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CALIPER=ON" -fi - -if [ "${with_hdf5}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_HDF5=ON" -else - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_HDF5=OFF" -fi - -if [ "${with_fftw}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_FFTW=ON" -else - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_FFTW=OFF" -fi - -if [ "${with_gsl}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_GSL=ON" -else - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_GSL=OFF" -fi - -if [ "${with_scafacos}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_SCAFACOS=ON" -else - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_SCAFACOS=OFF" -fi - -if [ "${with_stokesian_dynamics}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=ON" -else - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=OFF" -fi +cmake_params="${cmake_params} -D ESPRESSO_BUILD_BENCHMARKS=${make_check_benchmarks}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CCACHE=${with_ccache}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CALIPER=${with_caliper}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_HDF5=${with_hdf5}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_FFTW=${with_fftw}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_GSL=${with_gsl}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_SCAFACOS=${with_scafacos}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_STOKESIAN_DYNAMICS=${with_stokesian_dynamics}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA=${with_walberla}" if [ "${with_walberla}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA=ON -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON" + cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA_FFT=ON" if [ "${with_walberla_avx}" = true ]; then cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_WALBERLA_AVX=ON" fi @@ -203,39 +171,18 @@ if [ "${with_walberla}" = true ]; then mpiexec_preflags="${mpiexec_preflags:+$mpiexec_preflags;}--bind-to;none" fi -if [ "${with_coverage}" = true ]; then - cmake_params="-D ESPRESSO_BUILD_WITH_COVERAGE=ON ${cmake_params}" -fi - -if [ "${with_coverage_python}" = true ]; then - cmake_params="-D ESPRESSO_BUILD_WITH_COVERAGE_PYTHON=ON ${cmake_params}" -fi - -if [ "${with_asan}" = true ]; then - cmake_params="-D ESPRESSO_BUILD_WITH_ASAN=ON ${cmake_params}" -fi - -if [ "${with_ubsan}" = true ]; then - cmake_params="-D ESPRESSO_BUILD_WITH_UBSAN=ON ${cmake_params}" -fi - -if [ "${with_static_analysis}" = true ]; then - cmake_params="-D ESPRESSO_BUILD_WITH_CLANG_TIDY=ON ${cmake_params}" -fi - -if [ "${run_checks}" = true ]; then - cmake_params="${cmake_params} -D ESPRESSO_BUILD_TESTS=ON" -else - cmake_params="${cmake_params} -D ESPRESSO_BUILD_TESTS=OFF" -fi +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_COVERAGE=${with_coverage}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_COVERAGE_PYTHON=${with_coverage_python}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_ASAN=${with_asan}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_UBSAN=${with_ubsan}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CLANG_TIDY=${with_static_analysis}" +cmake_params="${cmake_params} -D ESPRESSO_BUILD_WITH_CUDA=${with_cuda}" if [ "${with_cuda}" = true ]; then - cmake_params="-D ESPRESSO_BUILD_WITH_CUDA=ON -D CUDAToolkit_ROOT=/usr/lib/cuda ${cmake_params}" + cmake_params="${cmake_params} -D CUDAToolkit_ROOT=/usr/lib/cuda" if [ "${CUDACXX}" = "" ] && [ "${CXX}" != "" ]; then - cmake_params="-D CMAKE_CUDA_FLAGS='--compiler-bindir=$(which "${CXX}")' ${cmake_params}" + cmake_params="${cmake_params} -D CMAKE_CUDA_FLAGS='--compiler-bindir=$(which "${CXX}")'" fi -else - cmake_params="-D ESPRESSO_BUILD_WITH_CUDA=OFF ${cmake_params}" fi command -v nvidia-smi && nvidia-smi || true @@ -281,11 +228,10 @@ fi # CONFIGURE start "CONFIGURE" -MYCONFIG_DIR="${srcdir}/maintainer/configs" if [ "${myconfig}" = "default" ]; then echo "Using default myconfig." else - myconfig_file="${MYCONFIG_DIR}/${myconfig}.hpp" + myconfig_file="${srcdir}/maintainer/configs/${myconfig}.hpp" if [ ! -e "${myconfig_file}" ]; then echo "${myconfig_file} does not exist!" exit 1 @@ -315,9 +261,8 @@ end "BUILD" # library. See details in https://github.com/espressomd/espresso/issues/2249 # Can't do this check on CUDA though because nvcc creates a host function # that just calls exit() for each device function, and can't do this with -# coverage because gcov 9.0 adds code that calls exit(), and can't do this # with walberla because the library calls exit() in assertions. -if [[ "${with_coverage}" == false && ( "${with_cuda}" == false || "${with_cuda_compiler}" != "nvcc" ) && "${with_walberla}" != "true" ]]; then +if [[ ( "${with_cuda}" == false || "${with_cuda_compiler}" != "nvcc" ) && "${with_walberla}" != "true" ]]; then if nm -o -C $(find . -name '*.so') | grep '[^a-z]exit@@GLIBC'; then echo "Found calls to exit() function in shared libraries." exit 1 @@ -409,20 +354,7 @@ if [ "${with_coverage}" = true ] || [ "${with_coverage_python}" = true ]; then if [ "${with_coverage}" = true ]; then echo "Running lcov and gcov..." codecov_opts="${codecov_opts} --gcov" - lcov --gcov-tool "${GCOV:-gcov}" \ - --quiet \ - --ignore-errors graph,mismatch,mismatch,gcov,unused \ - --directory . \ - --filter brace,blank,range,region \ - --capture \ - --rc lcov_json_module="JSON::XS" \ - --exclude "/usr/*" \ - --exclude "$(realpath .)/_deps/*" \ - --exclude "$(realpath .)/src/python/espressomd/*" \ - --exclude "$(realpath "${srcdir}")/src/walberla_bridge/src/*/generated_kernels/*" \ - --exclude "$(realpath "${srcdir}")/libs/*" \ - --exclude "*/tmpxft_*cudafe1.stub.*" \ - --output-file coverage.info + ./run_lcov.sh coverage.info fi if [ "${with_coverage_python}" = true ]; then echo "Running python3-coverage..." diff --git a/maintainer/CI/run_lcov.sh b/maintainer/CI/run_lcov.sh new file mode 100755 index 0000000000..22052acf45 --- /dev/null +++ b/maintainer/CI/run_lcov.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env sh +# +# Copyright (C) 2017-2024 The ESPResSo project +# +# This file is part of ESPResSo. +# +# ESPResSo is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# ESPResSo is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +set -e + +output="${1:-coverage.info}" +bindir="$(realpath .)" +srcdir="$(sed -nr "s/^ESPResSo_SOURCE_DIR:STATIC=(.+)/\1/p" "${bindir}/CMakeCache.txt")" + +if [ "${srcdir}" = "" ]; then + echo "Cannot extract ESPResSo_SOURCE_DIR variable from the CMake cache" >&2 + exit 2 +fi + +lcov --gcov-tool "${GCOV:-gcov}" \ + --quiet \ + --ignore-errors graph,mismatch,mismatch,gcov,unused \ + --directory . \ + --filter brace,blank,range,region \ + --capture \ + --rc lcov_json_module="JSON::XS" \ + --exclude "/usr/*" \ + --exclude "*/tmpxft_*cudafe1.stub.*" \ + --exclude "${bindir}/_deps/*" \ + --exclude "${bindir}/src/python/espressomd/*" \ + --exclude "${srcdir}/src/walberla_bridge/src/*/generated_kernels/*" \ + --exclude "${srcdir}/libs/*" \ + --output-file "${output}" diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu index 7cca223490..c9f8ae4dc9 100644 --- a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu +++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cu @@ -46,13 +46,8 @@ #if defined(__NVCC__) #define RESTRICT __restrict__ -#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) #pragma nv_diagnostic push #pragma nv_diag_suppress 177 // unused variable -#else -#pragma push -#pragma diag_suppress 177 // unused variable -#endif #elif defined(__clang__) #if defined(__CUDA__) #if defined(__CUDA_ARCH__) @@ -60,20 +55,17 @@ #define RESTRICT __restrict__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #else // clang compiling CUDA code in host mode #define RESTRICT __restrict__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #endif #endif #elif defined(__GNUC__) or defined(__GNUG__) #define RESTRICT __restrict__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" #elif defined(_MSC_VER) #define RESTRICT __restrict #else @@ -101,62 +93,77 @@ namespace accessor { namespace Population { - __global__ void kernel_get_interval( +// LCOV_EXCL_START + __global__ void kernel_get( gpu::FieldAccessor< {{dtype}} > pdf, - {{dtype}} * RESTRICT const pop ) + {{dtype}} * RESTRICT pop ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{Q}}u); pdf.set( blockIdx, threadIdx ); + pop += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{Q}}u); {% for i in range(Q) -%} - pop[offset + {{i}}u] = pdf.get({{i}}u); + pop[{{i}}u] = pdf.get({{i}}u); {% endfor -%} } } - __global__ void kernel_get( + __global__ void kernel_set( gpu::FieldAccessor< {{dtype}} > pdf, - {{dtype}} * RESTRICT const pop ) + {{dtype}} const * RESTRICT pop ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{Q}}u); pdf.set( blockIdx, threadIdx ); + pop += offset; if (pdf.isValidPosition()) { {% for i in range(Q) -%} - pop[{{i}}u] = pdf.get({{i}}u); + pdf.get({{i}}u) = pop[{{i}}u]; {% endfor -%} } } - __global__ void kernel_set_interval( + __global__ void kernel_broadcast( gpu::FieldAccessor< {{dtype}} > pdf, - {{dtype}} const * RESTRICT const pop ) + {{dtype}} const * RESTRICT pop ) { pdf.set( blockIdx, threadIdx ); if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{Q}}u); {% for i in range(Q) -%} - pdf.get({{i}}u) = pop[offset + {{i}}u]; + pdf.get({{i}}u) = pop[{{i}}u]; {% endfor -%} } } - __global__ void kernel_set( + __global__ void kernel_set_vel( gpu::FieldAccessor< {{dtype}} > pdf, - {{dtype}} const * RESTRICT const pop ) + gpu::FieldAccessor< {{dtype}} > velocity, + gpu::FieldAccessor< {{dtype}} > force, + {{dtype}} const * RESTRICT pop ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{Q}}u); pdf.set( blockIdx, threadIdx ); + velocity.set( blockIdx, threadIdx ); + force.set( blockIdx, threadIdx ); + pop += offset; if (pdf.isValidPosition()) { {% for i in range(Q) -%} - pdf.get({{i}}u) = pop[{{i}}u]; + const {{dtype}} f_{{i}} = pdf.get({{i}}u) = pop[{{i}}u]; {% endfor -%} + {{momentum_density_getter | substitute_force_getter_cu | indent(8) }} + const {{dtype}} rho_inv = {{dtype}} {1} / rho; + {% for i in range(D) -%} + velocity.get({{i}}u) = md_{{i}} * rho_inv; + {% endfor %} } } +// LCOV_EXCL_STOP std::array<{{dtype}}, {{Q}}u> get( gpu::GPUField< {{dtype}} > const * pdf_field, Cell const & cell ) { CellInterval ci ( cell, cell ); - thrust::device_vector< {{dtype}} > dev_data({{Q}}u, {{dtype}} {0}); + thrust::device_vector< {{dtype}} > dev_data({{Q}}u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); auto kernel = gpu::make_kernel( kernel_get ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); @@ -172,7 +179,7 @@ namespace Population std::array< {{dtype}}, {{Q}}u > const & pop, Cell const & cell ) { - thrust::device_vector< {{dtype}} > dev_data(pop.data(), pop.data() + {{Q}}u); + thrust::device_vector< {{dtype}} > dev_data(pop.begin(), pop.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); CellInterval ci ( cell, cell ); auto kernel = gpu::make_kernel( kernel_set ); @@ -181,14 +188,32 @@ namespace Population kernel(); } + void set( + gpu::GPUField< {{dtype}} > * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > const * force_field, + std::array< {{dtype}}, {{Q}}u > const & pop, + Cell const & cell ) + { + thrust::device_vector< {{dtype}} > dev_data(pop.begin(), pop.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + CellInterval ci ( cell, cell ); + auto kernel = gpu::make_kernel( kernel_set_vel ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); + kernel.addParam( const_cast(dev_data_ptr) ); + kernel(); + } + void initialize( gpu::GPUField< {{dtype}} > * pdf_field, std::array< {{dtype}}, {{Q}}u > const & pop ) { CellInterval ci = pdf_field->xyzSizeWithGhostLayer(); - thrust::device_vector< {{dtype}} > dev_data(pop.data(), pop.data() + {{Q}}u); + thrust::device_vector< {{dtype}} > dev_data(pop.begin(), pop.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel( kernel_set ); + auto kernel = gpu::make_kernel( kernel_broadcast ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); kernel.addParam( const_cast(dev_data_ptr) ); kernel(); @@ -200,7 +225,7 @@ namespace Population { thrust::device_vector< {{dtype}} > dev_data(ci.numCells() * {{Q}}u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel( kernel_get_interval ); + auto kernel = gpu::make_kernel( kernel_get ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); kernel.addParam( dev_data_ptr ); kernel(); @@ -216,89 +241,99 @@ namespace Population { thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel( kernel_set_interval ); + auto kernel = gpu::make_kernel( kernel_set ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); kernel.addParam( const_cast(dev_data_ptr) ); kernel(); } -} // namespace Population -namespace Vector -{ - __global__ void kernel_get_interval( - gpu::FieldAccessor< {{dtype}} > vec, - {{dtype}} * const out ) + void set( + gpu::GPUField< {{dtype}} > * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > const * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) { - vec.set( blockIdx, threadIdx ); - if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); - {% for i in range(D) -%} - out[offset + {{i}}u] = vec.get({{i}}u); - {% endfor %} - } + thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_set_vel ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); + kernel.addParam( const_cast(dev_data_ptr) ); + kernel(); } +} // namespace Population +namespace Vector +{ +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor< {{dtype}} > vec, - {{dtype}} * const out ) + {{dtype}} * u_out ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); vec.set( blockIdx, threadIdx ); + u_out += offset; if (vec.isValidPosition()) { {% for i in range(D) -%} - out[{{i}}u] = vec.get({{i}}u); + u_out[{{i}}u] = vec.get({{i}}u); {% endfor %} } } - __global__ void kernel_set_interval( + __global__ void kernel_set( gpu::FieldAccessor< {{dtype}} > vec, - {{dtype}} const * RESTRICT const u ) + {{dtype}} const * RESTRICT u_in ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); vec.set( blockIdx, threadIdx ); + u_in += offset; if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); {% for i in range(D) -%} - vec.get({{i}}u) = u[offset + {{i}}u]; + vec.get({{i}}u) = u_in[{{i}}u]; {% endfor %} } } - __global__ void kernel_set( + __global__ void kernel_broadcast( gpu::FieldAccessor< {{dtype}} > vec, - const {{dtype}} * RESTRICT const u ) + {{dtype}} const * RESTRICT u_in ) { vec.set( blockIdx, threadIdx ); if (vec.isValidPosition()) { {% for i in range(D) -%} - vec.get({{i}}u) = u[{{i}}u]; + vec.get({{i}}u) = u_in[{{i}}u]; {% endfor %} } } - __global__ void kernel_add_interval( + __global__ void kernel_add( gpu::FieldAccessor< {{dtype}} > vec, - {{dtype}} const * RESTRICT const u ) + {{dtype}} const * RESTRICT u_in ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); vec.set( blockIdx, threadIdx ); + u_in += offset; if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); {% for i in range(D) -%} - vec.get({{i}}u) += u[offset + {{i}}u]; + vec.get({{i}}u) += u_in[{{i}}u]; {% endfor %} } } - __global__ void kernel_add( + __global__ void kernel_broadcast_add( gpu::FieldAccessor< {{dtype}} > vec, - {{dtype}} const * RESTRICT const u ) + {{dtype}} const * RESTRICT u_in ) { vec.set( blockIdx, threadIdx ); if (vec.isValidPosition()) { {% for i in range(D) -%} - vec.get({{i}}u) += u[{{i}}u]; + vec.get({{i}}u) += u_in[{{i}}u]; {% endfor %} } } +// LCOV_EXCL_STOP Vector{{D}}< {{dtype}} > get( gpu::GPUField< {{dtype}} > const * vec_field, @@ -351,7 +386,7 @@ namespace Vector CellInterval ci = vec_field->xyzSizeWithGhostLayer(); thrust::device_vector< {{dtype}} > dev_data(vec.data(), vec.data() + {{D}}u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel( kernel_set ); + auto kernel = gpu::make_kernel( kernel_broadcast ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) ); kernel.addParam( const_cast(dev_data_ptr) ); kernel(); @@ -364,7 +399,7 @@ namespace Vector CellInterval ci = vec_field->xyzSizeWithGhostLayer(); thrust::device_vector< {{dtype}} > dev_data(vec.data(), vec.data() + {{D}}u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel( kernel_add ); + auto kernel = gpu::make_kernel( kernel_broadcast_add ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) ); kernel.addParam( const_cast(dev_data_ptr) ); kernel(); @@ -376,7 +411,7 @@ namespace Vector { thrust::device_vector< {{dtype}} > dev_data(ci.numCells() * {{D}}u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel( kernel_get_interval ); + auto kernel = gpu::make_kernel( kernel_get ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) ); kernel.addParam( dev_data_ptr ); kernel(); @@ -392,7 +427,7 @@ namespace Vector { thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel( kernel_set_interval ); + auto kernel = gpu::make_kernel( kernel_set ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *vec_field, ci ) ); kernel.addParam( const_cast(dev_data_ptr) ); kernel(); @@ -401,6 +436,7 @@ namespace Vector namespace Interpolation { +// LCOV_EXCL_START /** @brief Calculate interpolation weights. */ static __forceinline__ __device__ void calculate_weights( {{dtype}} const *RESTRICT const pos, @@ -495,6 +531,7 @@ namespace Interpolation } } } +// LCOV_EXCL_STOP static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, @@ -552,6 +589,7 @@ namespace Interpolation namespace Equilibrium { +// LCOV_EXCL_START __device__ void kernel_set_device( gpu::FieldAccessor< {{dtype}} > pdf, {{dtype}} const * RESTRICT const u, @@ -565,44 +603,49 @@ namespace Equilibrium pdf.get({{loop.index0 }}u) = {{eqTerm}}; {% endfor -%} } +// LCOV_EXCL_STOP } // namespace Equilibrium namespace Density { +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor< {{dtype}} > pdf, - {{dtype}} * RESTRICT const out ) + {{dtype}} * RESTRICT rho_out ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u); pdf.set( blockIdx, threadIdx ); + rho_out += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(1u)); {% for i in range(Q) -%} {{dtype}} const f_{{i}} = pdf.get({{i}}u); {% endfor -%} {{density_getters | indent(12)}} - out[offset] = rho; + rho_out[0u] = rho; } } __global__ void kernel_set( gpu::FieldAccessor< {{dtype}} > pdf, - {{dtype}} const * RESTRICT const rho_in ) + {{dtype}} const * RESTRICT rho_in ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u); pdf.set( blockIdx, threadIdx ); + rho_in += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(1u)); {% for i in range(Q) -%} {{dtype}} const f_{{i}} = pdf.get({{i}}u); {% endfor -%} {{unshifted_momentum_density_getter | indent(12)}} // calculate current velocity (before density change) - {{dtype}} const conversion = {{dtype}}(1) / rho; - {{dtype}} const u_old[{{D}}] = { {% for i in range(D) %}momdensity_{{i}} * conversion{% if not loop.last %}, {% endif %}{% endfor %} }; + {{dtype}} const rho_inv = {{dtype}} {1} / rho; + {{dtype}} const u_old[{{D}}] = { {% for i in range(D) %}momdensity_{{i}} * rho_inv{% if not loop.last %}, {% endif %}{% endfor %} }; - Equilibrium::kernel_set_device(pdf, u_old, rho_in[offset] {%if not compressible %} + {{dtype}}(1) {%endif%}); + Equilibrium::kernel_set_device(pdf, u_old, rho_in[0u] {%if not compressible %} + {{dtype}} {1} {%endif%}); } } +// LCOV_EXCL_STOP {{dtype}} get( gpu::GPUField< {{dtype}} > const * pdf_field, @@ -619,20 +662,6 @@ namespace Density return rho; } - void set( - gpu::GPUField< {{dtype}} > * pdf_field, - const {{dtype}} rho, - Cell const & cell ) - { - CellInterval ci ( cell, cell ); - thrust::device_vector< {{dtype}} > dev_data(1u, rho); - auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel( kernel_set ); - kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); - kernel.addParam( const_cast(dev_data_ptr) ); - kernel(); - } - std::vector< {{dtype}} > get( gpu::GPUField< {{dtype}} > const * pdf_field, CellInterval const & ci ) @@ -643,11 +672,25 @@ namespace Density kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); kernel.addParam( dev_data_ptr ); kernel(); - std::vector< {{dtype}} > out(ci.numCells()); + std::vector< {{dtype}} > out(dev_data.size()); thrust::copy(dev_data.begin(), dev_data.end(), out.begin()); return out; } + void set( + gpu::GPUField< {{dtype}} > * pdf_field, + const {{dtype}} rho, + Cell const & cell ) + { + CellInterval ci ( cell, cell ); + thrust::device_vector< {{dtype}} > dev_data(1u, rho); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_set ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addParam( const_cast(dev_data_ptr) ); + kernel(); + } + void set( gpu::GPUField< {{dtype}} > * pdf_field, std::vector< {{dtype}} > const & values, @@ -664,31 +707,95 @@ namespace Density namespace Velocity { +// LCOV_EXCL_START + __global__ void kernel_get( + gpu::FieldAccessor< {{dtype}} > pdf, + gpu::FieldAccessor< {{dtype}} > force, + {{dtype}} * RESTRICT u_out ) + { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); + pdf.set( blockIdx, threadIdx ); + force.set( blockIdx, threadIdx ); + u_out += offset; + if (pdf.isValidPosition()) { + {% for i in range(Q) -%} + {{dtype}} const f_{{i}} = pdf.get({{i}}u); + {% endfor -%} + {{momentum_density_getter | substitute_force_getter_cu | indent(8) }} + auto const rho_inv = {{dtype}} {1} / rho; + {% for i in range(D) -%} + u_out[{{i}}u] = md_{{i}} * rho_inv; + {% endfor %} + } + } + __global__ void kernel_set( gpu::FieldAccessor< {{dtype}} > pdf, + gpu::FieldAccessor< {{dtype}} > velocity, gpu::FieldAccessor< {{dtype}} > force, - {{dtype}} const * RESTRICT const u_in ) + {{dtype}} const * RESTRICT u_in ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); pdf.set( blockIdx, threadIdx ); + velocity.set( blockIdx, threadIdx ); force.set( blockIdx, threadIdx ); + u_in += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint({{D}}u)); - uint const bufsize = {{D}}u; - {{dtype}} const * RESTRICT const u = u_in + bufsize * offset; {% for i in range(Q) -%} {{dtype}} const f_{{i}} = pdf.get({{i}}u); {% endfor -%} + {{dtype}} const * RESTRICT const u = u_in; {{density_getters | indent(8)}} {{density_velocity_setter_macroscopic_values | substitute_force_getter_cu | indent(8)}} + {% for i in range(D) -%} + velocity.get({{i}}u) = u_in[{{i}}u]; + {% endfor %} {{dtype}} u_new[{{D}}] = { {% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %} }; Equilibrium::kernel_set_device(pdf, u_new, rho {%if not compressible %} + {{dtype}}(1) {%endif%}); } } +// LCOV_EXCL_STOP + + Vector{{D}}< {{dtype}} > get( + gpu::GPUField< {{dtype}} > const * pdf_field, + gpu::GPUField< {{dtype}} > const * force_field, + Cell const & cell ) + { + CellInterval ci ( cell, cell ); + thrust::device_vector< {{dtype}} > dev_data({{D}}u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_get ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); + kernel.addParam( dev_data_ptr ); + kernel(); + Vector{{D}}< {{dtype}} > vec; + thrust::copy(dev_data.begin(), dev_data.end(), vec.data()); + return vec; + } + + std::vector< {{dtype}} > get( + gpu::GPUField< {{dtype}} > const * pdf_field, + gpu::GPUField< {{dtype}} > const * force_field, + CellInterval const & ci ) + { + thrust::device_vector< {{dtype}} > dev_data({{D}}u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_get ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); + kernel.addParam( dev_data_ptr ); + kernel(); + std::vector< {{dtype}} > out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + return out; + } void set( gpu::GPUField< {{dtype}} > * pdf_field, - gpu::GPUField< {{dtype}} > * force_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > const * force_field, Vector{{D}}< {{dtype}} > const & u, Cell const & cell ) { @@ -697,33 +804,121 @@ namespace Velocity auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); auto kernel = gpu::make_kernel( kernel_set ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); + kernel.addParam( const_cast(dev_data_ptr) ); + kernel(); + } + + void set( + gpu::GPUField< {{dtype}} > * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > const * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) + { + thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_set ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) ); kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); kernel.addParam( const_cast(dev_data_ptr) ); kernel(); } } // namespace Velocity +namespace Force { +// LCOV_EXCL_START + __global__ void kernel_set( + gpu::FieldAccessor< {{dtype}} > pdf, + gpu::FieldAccessor< {{dtype}} > velocity, + gpu::FieldAccessor< {{dtype}} > force, + {{dtype}} const * RESTRICT f_in ) + { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); + pdf.set( blockIdx, threadIdx ); + velocity.set( blockIdx, threadIdx ); + force.set( blockIdx, threadIdx ); + f_in += offset; + if (pdf.isValidPosition()) { + {% for i in range(Q) -%} + {{dtype}} const f_{{i}} = pdf.get({{i}}u); + {% endfor -%} + + {{momentum_density_getter | substitute_force_getter_pattern("force->get\(x, ?y, ?z, ?([0-9])u?\)", "f_in[\g<1>u]") | indent(8) }} + auto const rho_inv = {{dtype}} {1} / rho; + + {% for i in range(D) -%} + force.get({{i}}u) = f_in[{{i}}u]; + {% endfor %} + + {% for i in range(D) -%} + velocity.get({{i}}u) = md_{{i}} * rho_inv; + {% endfor %} + } + } +// LCOV_EXCL_STOP + + void + set( gpu::GPUField< {{dtype}} > const * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > * force_field, + Vector{{D}}< {{dtype}} > const & u, + Cell const & cell ) + { + CellInterval ci ( cell, cell ); + thrust::device_vector< {{dtype}} > dev_data(u.data(), u.data() + {{D}}u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_set ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); + kernel.addParam( const_cast(dev_data_ptr) ); + kernel(); + } + + void + set( gpu::GPUField< {{dtype}} > const * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) + { + thrust::device_vector< {{dtype}} > dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_set ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *velocity_field, ci ) ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *force_field, ci ) ); + kernel.addParam( const_cast(dev_data_ptr) ); + kernel(); + } +} // namespace Force + namespace MomentumDensity { +// LCOV_EXCL_START __global__ void kernel_sum( gpu::FieldAccessor< {{dtype}} > pdf, gpu::FieldAccessor< {{dtype}} > force, - {{dtype}} * RESTRICT const out ) + {{dtype}} * RESTRICT out ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D}}u); pdf.set( blockIdx, threadIdx ); force.set( blockIdx, threadIdx ); + out += offset; if (pdf.isValidPosition()) { - uint const bufsize = {{D}}u; - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, bufsize); {% for i in range(Q) -%} {{dtype}} const f_{{i}} = pdf.get({{i}}u); {% endfor -%} {{momentum_density_getter | substitute_force_getter_cu | indent(8) }} {% for i in range(D) -%} - out[bufsize * offset + {{i}}u] += md_{{i}}; + out[{{i}}u] += md_{{i}}; {% endfor %} } } +// LCOV_EXCL_STOP Vector{{D}}< {{dtype}} > reduce( gpu::GPUField< {{dtype}} > const * pdf_field, @@ -748,25 +943,25 @@ namespace MomentumDensity namespace PressureTensor { +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor< {{dtype}} > pdf, - {{dtype}} * RESTRICT const out ) + {{dtype}} * RESTRICT p_out ) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, {{D**2}}u); pdf.set( blockIdx, threadIdx ); + p_out += offset; if (pdf.isValidPosition()) { - uint const bufsize = {{D**2}}u; - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, bufsize); {% for i in range(Q) -%} {{dtype}} const f_{{i}} = pdf.get({{i}}u); {% endfor -%} {{second_momentum_getter | indent(12) }} - {% for i in range(D) -%} - {% for j in range(D) -%} - out[bufsize * offset + {{i*D+j}}u] = p_{{i*D+j}}; - {% endfor %} + {% for i in range(D**2) -%} + p_out[{{i}}u] = p_{{i}}; {% endfor %} } } +// LCOV_EXCL_STOP Matrix{{D}}< {{dtype}} > get( gpu::GPUField< {{dtype}} > const * pdf_field, @@ -780,7 +975,22 @@ namespace PressureTensor kernel.addParam( dev_data_ptr ); kernel(); Matrix{{D}}< {{dtype}} > out; - thrust::copy(dev_data.begin(), dev_data.begin() + {{D**2}}u, out.data()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + return out; + } + + std::vector< {{dtype}} > get( + gpu::GPUField< {{dtype}} > const * pdf_field, + CellInterval const & ci ) + { + thrust::device_vector< {{dtype}} > dev_data({{D**2}}u * ci.numCells()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel( kernel_get ); + kernel.addFieldIndexingParam( gpu::FieldIndexing< {{dtype}} >::interval( *pdf_field, ci ) ); + kernel.addParam( dev_data_ptr ); + kernel(); + std::vector< {{dtype}} > out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); return out; } } // namespace PressureTensor diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh index e75586057b..65f776abee 100644 --- a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh +++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.cuh @@ -39,18 +39,6 @@ #include #include -#ifdef WALBERLA_CXX_COMPILER_IS_GNU -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#endif - -#ifdef WALBERLA_CXX_COMPILER_IS_CLANG -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" -#endif - namespace walberla { namespace {{namespace}} { namespace accessor { @@ -65,6 +53,13 @@ namespace Population { set( gpu::GPUField< {{dtype}} > * pdf_field, std::array< {{dtype}}, {{Q}}u > const & pop, Cell const & cell ); + /** @brief Set populations and recalculate velocities on a single cell. */ + void + set( gpu::GPUField< {{dtype}} > * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > const * force_field, + std::array< {{dtype}}, {{Q}}u > const & pop, + Cell const & cell ); /** @brief Initialize all cells with the same value. */ void initialize( gpu::GPUField< {{dtype}} > * pdf_field, @@ -78,6 +73,13 @@ namespace Population { set( gpu::GPUField< {{dtype}} > * pdf_field, std::vector< {{dtype}} > const & values, CellInterval const & ci ); + /** @brief Set populations and recalculate velocities on a cell interval. */ + void + set( gpu::GPUField< {{dtype}} > * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > const * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ); } // namespace Population namespace Vector { @@ -141,13 +143,43 @@ namespace Density { } // namespace Density namespace Velocity { + Vector{{D}}< {{dtype}} > + get( gpu::GPUField< {{dtype}} > const * pdf_field, + gpu::GPUField< {{dtype}} > const * force_field, + Cell const & cell ); + std::vector< {{dtype}} > + get( gpu::GPUField< {{dtype}} > const * pdf_field, + gpu::GPUField< {{dtype}} > const * force_field, + CellInterval const & ci ); void set( gpu::GPUField< {{dtype}} > * pdf_field, - gpu::GPUField< {{dtype}} > * force_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > const * force_field, Vector{{D}}< {{dtype}} > const & u, Cell const & cell ); + void + set( gpu::GPUField< {{dtype}} > * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > const * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ); } // namespace Velocity +namespace Force { + void + set( gpu::GPUField< {{dtype}} > const * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > * force_field, + Vector{{D}}< {{dtype}} > const & u, + Cell const & cell ); + void + set( gpu::GPUField< {{dtype}} > const * pdf_field, + gpu::GPUField< {{dtype}} > * velocity_field, + gpu::GPUField< {{dtype}} > * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ); +} // namespace Force + namespace DensityAndVelocity { std::tuple< {{dtype}} , Vector{{D}}< {{dtype}} > > get( gpu::GPUField< {{dtype}} > const * pdf_field, @@ -178,16 +210,11 @@ namespace PressureTensor { Matrix{{D}}< {{dtype}} > get( gpu::GPUField< {{dtype}} > const * pdf_field, Cell const & cell ); + std::vector< {{dtype}} > + get( gpu::GPUField< {{dtype}} > const * pdf_field, + CellInterval const & ci ); } // namespace PressureTensor } // namespace accessor } // namespace {{namespace}} } // namespace walberla - -#ifdef WALBERLA_CXX_COMPILER_IS_GNU -#pragma GCC diagnostic pop -#endif - -#ifdef WALBERLA_CXX_COMPILER_IS_CLANG -#pragma clang diagnostic pop -#endif diff --git a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h index 95f8992555..d443243bba 100644 --- a/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h +++ b/maintainer/walberla_kernels/templates/FieldAccessors.tmpl.h @@ -37,19 +37,18 @@ #include #include +#include #include #include #ifdef WALBERLA_CXX_COMPILER_IS_GNU #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" #endif #ifdef WALBERLA_CXX_COMPILER_IS_CLANG #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #endif namespace walberla { @@ -58,7 +57,7 @@ namespace accessor { namespace Population { - inline std::array<{{dtype}}, {{Q}}u> + inline auto get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, Cell const & cell ) { @@ -81,6 +80,28 @@ namespace Population {% endfor -%} } + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field, + std::array<{{dtype}}, {{Q}}u> const & pop, + Cell const & cell ) + { + auto & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u]; + {% endfor -%} + + {% for c in "xyz" -%} + const auto {{c}} = cell.{{c}}(); + {% endfor -%} + {{momentum_density_getter | substitute_force_getter_cpp | indent(8) }} + const auto rho_inv = {{dtype}} {1} / rho; + {% for i in range(D) -%} + velocity_field->get(cell, uint_t{ {{i}}u }) = md_{{i}} * rho_inv; + {% endfor -%} + } + inline void initialize( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, std::array<{{dtype}}, {{Q}}u> const & pop) @@ -93,7 +114,7 @@ namespace Population }); } - inline std::vector< {{dtype}} > + inline auto get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, CellInterval const & ci ) { @@ -118,15 +139,42 @@ namespace Population CellInterval const & ci ) { assert(uint_c(values.size()) == ci.numCells() * uint_t({{Q}}u)); - auto values_ptr = values.data(); + auto pop = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u]; + {% endfor -%} + std::advance(pop, {{Q}}); + } + } + } + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) + { + assert(uint_c(values.size()) == ci.numCells() * uint_t({{Q}}u)); + auto pop = values.data(); for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); {% for i in range(Q) -%} - pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = values_ptr[{{i}}u]; + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }) = pop[{{i}}u]; + {% endfor -%} + {{momentum_density_getter | substitute_force_getter_cpp | indent(12) }} + const auto rho_inv = {{dtype}} {1} / rho; + {% for i in range(D) -%} + velocity_field->get(x, y, z, uint_t{ {{i}}u }) = md_{{i}} * rho_inv; {% endfor -%} - values_ptr += {{Q}}u; + std::advance(pop, {{Q}}); } } } @@ -135,7 +183,7 @@ namespace Population namespace Vector { - inline Vector{{D}}< {{dtype}} > + inline auto get( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * vec_field, Cell const & cell ) { @@ -193,7 +241,7 @@ namespace Vector }); } - inline std::vector< {{dtype}} > + inline auto get( GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * vec_field, CellInterval const & ci ) { @@ -226,7 +274,7 @@ namespace Vector {% for i in range(D) -%} vec_field->getF( &xyz0, uint_t{ {{i}}u }) = values_ptr[{{i}}u]; {% endfor -%} - values_ptr += {{D}}u; + std::advance(values_ptr, {{D}}); } } } @@ -237,11 +285,11 @@ namespace EquilibriumDistribution { inline {{dtype}} get( stencil::Direction const direction, - Vector{{D}}< {{dtype}} > const & u = Vector{{D}}< {{dtype}} >( {{dtype}}(0.0) ), - {{dtype}} rho = {{dtype}}(1.0) ) + Vector{{D}}< {{dtype}} > const & u = Vector{{D}}< {{dtype}} >( {{dtype}} {0} ), + {{dtype}} rho = {{dtype}} {1} ) { {% if not compressible %} - rho -= {{dtype}}(1.0); + rho -= {{dtype}} {1}; {% endif %} {{equilibrium_from_direction}} } @@ -256,7 +304,7 @@ namespace Equilibrium Cell const & cell ) { {%if not compressible %} - rho -= {{dtype}}(1.0); + rho -= {{dtype}} {1}; {%endif %} {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u }); @@ -293,13 +341,13 @@ namespace Density {{unshifted_momentum_density_getter | indent(8)}} // calculate current velocity (before density change) - const {{dtype}} conversion = {{dtype}}(1) / rho; + const {{dtype}} conversion = {{dtype}} {1} / rho; Vector{{D}}< {{dtype}} > velocity; {% for i in range(D) -%} velocity[{{i}}u] = momdensity_{{i}} * conversion; {% endfor %} - Equilibrium::set(pdf_field, velocity, rho_in {%if not compressible %} + {{dtype}}(1) {%endif%}, cell); + Equilibrium::set(pdf_field, velocity, rho_in {%if not compressible %} + {{dtype}} {1} {%endif%}, cell); } inline std::vector< {{dtype}} > @@ -341,13 +389,13 @@ namespace Density {{unshifted_momentum_density_getter | indent(12)}} // calculate current velocity (before density change) - const {{dtype}} conversion = {{dtype}}(1) / rho; + const {{dtype}} conversion = {{dtype}} {1} / rho; Vector{{D}}< {{dtype}} > velocity; {% for i in range(D) -%} velocity[{{i}}u] = momdensity_{{i}} * conversion; {% endfor %} - Equilibrium::set(pdf_field, velocity, *values_it {%if not compressible %} + {{dtype}}(1) {%endif%}, Cell{x, y, z}); + Equilibrium::set(pdf_field, velocity, *values_it {%if not compressible %} + {{dtype}} {1} {%endif%}, Cell{x, y, z}); ++values_it; } } @@ -357,8 +405,53 @@ namespace Density namespace Velocity { + inline auto + get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field, + Cell const & cell ) + { + const {{dtype}} & xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {% for c in "xyz" -%} + const auto {{c}} = cell.{{c}}(); + {% endfor -%} + {{momentum_density_getter | substitute_force_getter_cpp | indent(8) }} + const {{dtype}} rho_inv = {{dtype}} {1} / rho; + + return Vector3<{{dtype}}>(md_0 * rho_inv, md_1 * rho_inv, md_2 * rho_inv); + } + + inline auto + get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field, + CellInterval const & ci ) + { + std::vector< {{dtype}} > out; + out.reserve(ci.numCells() * uint_t({{D}}u)); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + {{momentum_density_getter | substitute_force_getter_cpp | indent(12) }} + const {{dtype}} rho_inv = {{dtype}} {1} / rho; + {% for i in range(D) -%} + out.emplace_back(md_{{i}} * rho_inv); + {% endfor -%} + } + } + } + return out; + } + inline void set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field, GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field, Vector{{D}}< {{dtype}} > const & u, Cell const & cell ) @@ -373,14 +466,113 @@ namespace Velocity const auto {{c}} = cell.{{c}}(); {% endfor -%} {{density_velocity_setter_macroscopic_values | substitute_force_getter_cpp | indent(8)}} + {% for i in range(D) -%} + velocity_field->get(x, y, z, uint_t{ {{i}}u }) = u[{{i}}u]; + {% endfor %} - Equilibrium::set(pdf_field, Vector{{D}}<{{dtype}}>({% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %}), rho {%if not compressible %} + {{dtype}}(1) {%endif%}, cell); + Equilibrium::set(pdf_field, Vector{{D}}<{{dtype}}>({% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %}), rho {%if not compressible %} + {{dtype}} {1} {%endif%}, cell); + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) + { + assert(uint_c(values.size()) == ci.numCells() * uint_t({{D}}u)); + auto u = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + {{dtype}} & pdf_xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {{dtype}} & vel_xyz0 = velocity_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &pdf_xyz0, uint_t{ {{i}}u }); + {% endfor -%} + {{density_getters | indent(8)}} + + {{density_velocity_setter_macroscopic_values | substitute_force_getter_cpp | indent(8)}} + {% for i in range(D) -%} + velocity_field->getF( &vel_xyz0, uint_t{ {{i}}u }) = u[{{i}}u]; + {% endfor %} + std::advance(u, {{D}}); + + Equilibrium::set(pdf_field, Vector{{D}}<{{dtype}}>({% for i in range(D) %}u_{{i}}{% if not loop.last %}, {% endif %}{% endfor %}), rho {%if not compressible %} + {{dtype}} {1} {%endif%}, Cell{x, y, z}); + } + } + } } } // namespace Velocity +namespace Force +{ + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * force_field, + Vector{{D}}< {{dtype}} > const & force, + Cell const & cell ) + { + {{dtype}} const & pdf_xyz0 = pdf_field->get(cell, uint_t{ 0u }); + {{dtype}} & vel_xyz0 = velocity_field->get(cell, uint_t{ 0u }); + {{dtype}} & laf_xyz0 = force_field->get(cell, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &pdf_xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {{momentum_density_getter | substitute_force_getter_pattern("force->get\(x, ?y, ?z, ?([0-9])u?\)", "force[\g<1>u]") | indent(8) }} + auto const rho_inv = {{dtype}} {1} / rho; + + {% for i in range(D) -%} + force_field->getF( &laf_xyz0, uint_t{ {{i}}u }) = force[{{i}}u]; + {% endfor %} + + {% for i in range(D) -%} + velocity_field->getF( &vel_xyz0, uint_t{ {{i}}u }) = md_{{i}} * rho_inv; + {% endfor %} + } + + inline void + set( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * velocity_field, + GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > * force_field, + std::vector< {{dtype}} > const & values, + CellInterval const & ci ) + { + assert(uint_c(values.size()) == ci.numCells() * uint_t({{D}}u)); + auto force = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + {{dtype}} const & pdf_xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {{dtype}} & vel_xyz0 = velocity_field->get(x, y, z, uint_t{ 0u }); + {{dtype}} & laf_xyz0 = force_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &pdf_xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {{momentum_density_getter | substitute_force_getter_pattern("force->get\(x, ?y, ?z, ?([0-9])u?\)", "force[\g<1>u]") | indent(12) }} + auto const rho_inv = {{dtype}} {1} / rho; + + {% for i in range(D) -%} + force_field->getF( &laf_xyz0, uint_t{ {{i}}u }) = force[{{i}}u]; + {% endfor %} + + {% for i in range(D) -%} + velocity_field->getF( &vel_xyz0, uint_t{ {{i}}u }) = md_{{i}} * rho_inv; + {% endfor %} + + std::advance(force, {{D}}); + } + } + } + } +} // namespace Force + namespace MomentumDensity { - inline Vector{{D}}< {{dtype}} > + inline auto reduce( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, GhostLayerField< {{dtype}}, uint_t{ {{D}}u } > const * force_field ) { @@ -403,7 +595,7 @@ namespace MomentumDensity namespace PressureTensor { - inline Matrix{{D}}< {{dtype}} > + inline auto get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, Cell const & cell ) { @@ -422,6 +614,33 @@ namespace PressureTensor {% endfor %} return pressureTensor; } + + inline auto + get( GhostLayerField< {{dtype}}, uint_t{ {{Q}}u } > const * pdf_field, + CellInterval const & ci ) + { + std::vector< {{dtype}} > out; + out.reserve(ci.numCells() * uint_t({{D**2}}u)); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const {{dtype}} & xyz0 = pdf_field->get(x, y, z, uint_t{ 0u }); + {% for i in range(Q) -%} + const {{dtype}} f_{{i}} = pdf_field->getF( &xyz0, uint_t{ {{i}}u }); + {% endfor -%} + + {{second_momentum_getter | indent(12) }} + + {% for i in range(D) -%} + {% for j in range(D) -%} + out.emplace_back(p_{{i*D+j}}); + {% endfor %} + {% endfor %} + } + } + } + return out; + } } // namespace PressureTensor } // namespace accessor diff --git a/maintainer/walberla_kernels/walberla_lbm_generation.py b/maintainer/walberla_kernels/walberla_lbm_generation.py index f746fe2f8b..6aec095662 100644 --- a/maintainer/walberla_kernels/walberla_lbm_generation.py +++ b/maintainer/walberla_kernels/walberla_lbm_generation.py @@ -19,6 +19,7 @@ # import os +import re import sympy as sp import pystencils as ps import lbmpy_walberla @@ -104,6 +105,12 @@ def equations_to_code(equations, variable_prefix="", return "\n".join(result) +def substitute_force_getter_pattern(code, pattern, subst): + re_pat = re.compile(pattern) + assert re_pat.search(code) is not None, f"pattern '{pattern} not found in '''\n{code}\n'''" # nopep8 + return re_pat.sub(subst, code) + + def substitute_force_getter_cpp(code): field_getter = "force->" assert field_getter in code is not None, f"pattern '{field_getter} not found in '''\n{code}\n'''" # nopep8 @@ -120,6 +127,7 @@ def substitute_force_getter_cu(code): def add_espresso_filters_to_jinja_env(jinja_env): jinja_env.filters["substitute_force_getter_cpp"] = substitute_force_getter_cpp jinja_env.filters["substitute_force_getter_cu"] = substitute_force_getter_cu + jinja_env.filters["substitute_force_getter_pattern"] = substitute_force_getter_pattern def generate_macroscopic_values_accessors(ctx, config, lb_method, templates): diff --git a/src/core/reaction_methods/tests/CMakeLists.txt b/src/core/reaction_methods/tests/CMakeLists.txt index 7dd67a9e63..e63fa9630a 100644 --- a/src/core/reaction_methods/tests/CMakeLists.txt +++ b/src/core/reaction_methods/tests/CMakeLists.txt @@ -1,4 +1,5 @@ -# Copyright (C) 2021-2022 The ESPResSo project +# +# Copyright (C) 2021-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -14,14 +15,13 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . +# -include(unit_test) +include(espresso_unit_test) -unit_test(NAME SingleReaction_test SRC SingleReaction_test.cpp DEPENDS - espresso::core) -unit_test(NAME ReactionAlgorithm_test SRC ReactionAlgorithm_test.cpp DEPENDS - espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) -unit_test(NAME particle_tracking_test SRC particle_tracking_test.cpp DEPENDS - espresso::core Boost::mpi MPI::MPI_CXX) -unit_test(NAME reaction_methods_utils_test SRC reaction_methods_utils_test.cpp - DEPENDS espresso::core) +espresso_unit_test(SRC SingleReaction_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC ReactionAlgorithm_test.cpp DEPENDS espresso::core + Boost::mpi MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test(SRC particle_tracking_test.cpp DEPENDS espresso::core + Boost::mpi MPI::MPI_CXX) +espresso_unit_test(SRC reaction_methods_utils_test.cpp DEPENDS espresso::core) diff --git a/src/core/unit_tests/CMakeLists.txt b/src/core/unit_tests/CMakeLists.txt index 60fcadae76..92d56587c4 100644 --- a/src/core/unit_tests/CMakeLists.txt +++ b/src/core/unit_tests/CMakeLists.txt @@ -19,68 +19,59 @@ # along with this program. If not, see . # -include(unit_test) +include(espresso_unit_test) -# Add tests here -unit_test(NAME RuntimeError_test SRC RuntimeError_test.cpp DEPENDS - Boost::serialization) -unit_test(NAME RuntimeErrorCollector_test SRC RuntimeErrorCollector_test.cpp - DEPENDS espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) -unit_test(NAME EspressoSystemStandAlone_parallel_test SRC - EspressoSystemStandAlone_test.cpp DEPENDS espresso::core Boost::mpi - MPI::MPI_CXX NUM_PROC 4) -unit_test(NAME EspressoSystemStandAlone_serial_test SRC - EspressoSystemStandAlone_test.cpp DEPENDS espresso::core Boost::mpi - MPI::MPI_CXX NUM_PROC 1) -unit_test(NAME EspressoSystem_test SRC EspressoSystem_test.cpp DEPENDS - espresso::core Boost::mpi NUM_PROC 2) -unit_test(NAME ResourceCleanup_test SRC ResourceCleanup_test.cpp DEPENDS - espresso::core Boost::mpi NUM_PROC 2) -unit_test(NAME MpiCallbacks_test SRC MpiCallbacks_test.cpp DEPENDS - espresso::utils Boost::mpi MPI::MPI_CXX NUM_PROC 2) -unit_test(NAME ParticleIterator_test SRC ParticleIterator_test.cpp DEPENDS - espresso::utils) -unit_test(NAME link_cell_test SRC link_cell_test.cpp DEPENDS espresso::utils) -unit_test(NAME Particle_test SRC Particle_test.cpp DEPENDS espresso::utils - Boost::serialization) -unit_test(NAME Particle_serialization_test SRC Particle_serialization_test.cpp - DEPENDS espresso::utils Boost::serialization) -unit_test(NAME rotation_test SRC rotation_test.cpp DEPENDS espresso::utils - espresso::core) -unit_test(NAME field_coupling_couplings SRC field_coupling_couplings_test.cpp - DEPENDS espresso::utils) -unit_test(NAME field_coupling_fields SRC field_coupling_fields_test.cpp DEPENDS - espresso::utils) -unit_test(NAME field_coupling_force_field SRC - field_coupling_force_field_test.cpp DEPENDS espresso::utils) -unit_test(NAME periodic_fold_test SRC periodic_fold_test.cpp) -unit_test(NAME grid_test SRC grid_test.cpp DEPENDS espresso::core) -unit_test(NAME lees_edwards_test SRC lees_edwards_test.cpp DEPENDS - espresso::core) -unit_test(NAME BoxGeometry_test SRC BoxGeometry_test.cpp DEPENDS espresso::core) -unit_test(NAME LocalBox_test SRC LocalBox_test.cpp DEPENDS espresso::core) -unit_test(NAME Verlet_list_test SRC Verlet_list_test.cpp DEPENDS espresso::core - NUM_PROC 4) -unit_test(NAME VerletCriterion_test SRC VerletCriterion_test.cpp DEPENDS - espresso::core) -unit_test(NAME thermostats_test SRC thermostats_test.cpp DEPENDS espresso::core) -unit_test(NAME random_test SRC random_test.cpp DEPENDS espresso::utils - Random123) -unit_test(NAME BondList_test SRC BondList_test.cpp DEPENDS espresso::core) -unit_test(NAME energy_test SRC energy_test.cpp DEPENDS espresso::core) -unit_test(NAME bonded_interactions_map_test SRC - bonded_interactions_map_test.cpp DEPENDS espresso::core) -unit_test(NAME bond_breakage_test SRC bond_breakage_test.cpp DEPENDS - espresso::core) +espresso_unit_test(SRC RuntimeError_test.cpp DEPENDS Boost::serialization) +espresso_unit_test(SRC RuntimeErrorCollector_test.cpp DEPENDS espresso::core + Boost::mpi MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test( + NAME EspressoSystemStandAlone_parallel_test SRC + EspressoSystemStandAlone_test.cpp DEPENDS espresso::core Boost::mpi + MPI::MPI_CXX NUM_PROC 4) +espresso_unit_test( + NAME EspressoSystemStandAlone_serial_test SRC + EspressoSystemStandAlone_test.cpp DEPENDS espresso::core Boost::mpi + MPI::MPI_CXX NUM_PROC 1) +espresso_unit_test(SRC EspressoSystem_test.cpp DEPENDS espresso::core + Boost::mpi NUM_PROC 2) +espresso_unit_test(SRC ResourceCleanup_test.cpp DEPENDS espresso::core + Boost::mpi NUM_PROC 2) +espresso_unit_test(SRC MpiCallbacks_test.cpp DEPENDS espresso::utils Boost::mpi + MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test(SRC ParticleIterator_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC link_cell_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC Particle_test.cpp DEPENDS espresso::utils + Boost::serialization) +espresso_unit_test(SRC Particle_serialization_test.cpp DEPENDS espresso::utils + Boost::serialization) +espresso_unit_test(SRC rotation_test.cpp DEPENDS espresso::utils espresso::core) +espresso_unit_test(SRC field_coupling_couplings_test.cpp DEPENDS + espresso::utils) +espresso_unit_test(SRC field_coupling_fields_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC field_coupling_force_field_test.cpp DEPENDS + espresso::utils) +espresso_unit_test(SRC periodic_fold_test.cpp) +espresso_unit_test(SRC grid_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC lees_edwards_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC BoxGeometry_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC LocalBox_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC Verlet_list_test.cpp DEPENDS espresso::core NUM_PROC 4) +espresso_unit_test(SRC VerletCriterion_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC thermostats_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC random_test.cpp DEPENDS espresso::utils Random123) +espresso_unit_test(SRC BondList_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC energy_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC bonded_interactions_map_test.cpp DEPENDS espresso::core) +espresso_unit_test(SRC bond_breakage_test.cpp DEPENDS espresso::core) if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") # AppleClang doesn't implement C++17's mathematical special functions - unit_test(NAME specfunc_test SRC specfunc_test.cpp DEPENDS espresso::utils - espresso::core) + espresso_unit_test(SRC specfunc_test.cpp DEPENDS espresso::utils + espresso::core) endif() -unit_test(NAME lb_particle_coupling_test SRC lb_particle_coupling_test.cpp - DEPENDS espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) -unit_test(NAME ek_interface_test SRC ek_interface_test.cpp DEPENDS - espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test(SRC lb_particle_coupling_test.cpp DEPENDS espresso::core + Boost::mpi MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test(SRC ek_interface_test.cpp DEPENDS espresso::core Boost::mpi + MPI::MPI_CXX NUM_PROC 2) if(ESPRESSO_BUILD_WITH_WALBERLA) target_link_libraries( lb_particle_coupling_test PRIVATE espresso::walberla @@ -90,15 +81,12 @@ if(ESPRESSO_BUILD_WITH_WALBERLA) endif() if(ESPRESSO_BUILD_WITH_FFTW) - unit_test(NAME p3m_test SRC p3m_test.cpp DEPENDS espresso::utils - espresso::core) - unit_test(NAME fft_test SRC fft_test.cpp DEPENDS espresso::utils - espresso::core) - unit_test(NAME math_test SRC math_test.cpp DEPENDS espresso::utils - espresso::core) + espresso_unit_test(SRC p3m_test.cpp DEPENDS espresso::utils espresso::core) + espresso_unit_test(SRC fft_test.cpp DEPENDS espresso::utils espresso::core) + espresso_unit_test(SRC math_test.cpp DEPENDS espresso::utils espresso::core) endif() if(ESPRESSO_BUILD_WITH_CUDA) - unit_test(NAME cuda_test SRC cuda_test.cu DEPENDS espresso::utils - espresso::core espresso::cuda) + espresso_unit_test(SRC cuda_test.cu DEPENDS espresso::utils espresso::core + espresso::cuda) endif() diff --git a/src/particle_observables/tests/CMakeLists.txt b/src/particle_observables/tests/CMakeLists.txt index 244c47bfab..241c6d12a6 100644 --- a/src/particle_observables/tests/CMakeLists.txt +++ b/src/particle_observables/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2022 The ESPResSo project +# Copyright (C) 2020-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,11 +17,11 @@ # along with this program. If not, see . # -include(unit_test) +include(espresso_unit_test) -unit_test(NAME properties_test SRC properties.cpp DEPENDS - espresso::particle_observables) -unit_test(NAME algorithms_test SRC algorithms.cpp DEPENDS - espresso::particle_observables) -unit_test(NAME observables_test SRC observables.cpp DEPENDS - espresso::particle_observables) +espresso_unit_test(SRC properties_test.cpp DEPENDS + espresso::particle_observables) +espresso_unit_test(SRC algorithms_test.cpp DEPENDS + espresso::particle_observables) +espresso_unit_test(SRC observables_test.cpp DEPENDS + espresso::particle_observables) diff --git a/src/particle_observables/tests/algorithms.cpp b/src/particle_observables/tests/algorithms_test.cpp similarity index 100% rename from src/particle_observables/tests/algorithms.cpp rename to src/particle_observables/tests/algorithms_test.cpp diff --git a/src/particle_observables/tests/observables.cpp b/src/particle_observables/tests/observables_test.cpp similarity index 100% rename from src/particle_observables/tests/observables.cpp rename to src/particle_observables/tests/observables_test.cpp diff --git a/src/particle_observables/tests/properties.cpp b/src/particle_observables/tests/properties_test.cpp similarity index 100% rename from src/particle_observables/tests/properties.cpp rename to src/particle_observables/tests/properties_test.cpp diff --git a/src/script_interface/tests/CMakeLists.txt b/src/script_interface/tests/CMakeLists.txt index 399e44255a..dc34f91e1b 100644 --- a/src/script_interface/tests/CMakeLists.txt +++ b/src/script_interface/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2022 The ESPResSo project +# Copyright (C) 2020-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,46 +17,42 @@ # along with this program. If not, see . # -include(unit_test) +include(espresso_unit_test) -unit_test(NAME ObjectHandle_test SRC ObjectHandle_test.cpp DEPENDS - espresso::script_interface) -unit_test(NAME AutoParameters_test SRC AutoParameters_test.cpp DEPENDS - espresso::script_interface) -unit_test(NAME AutoParameter_test SRC AutoParameter_test.cpp DEPENDS - espresso::script_interface) -unit_test(NAME Variant_test SRC Variant_test.cpp DEPENDS - espresso::script_interface) -unit_test(NAME get_value_test SRC get_value_test.cpp DEPENDS - espresso::script_interface) -unit_test(NAME None_test SRC None_test.cpp DEPENDS espresso::script_interface) -unit_test(NAME reduction_test SRC reduction_test.cpp DEPENDS - espresso::script_interface Boost::mpi MPI::MPI_CXX NUM_PROC 4) -unit_test(NAME LocalContext_test SRC LocalContext_test.cpp DEPENDS - espresso::script_interface Boost::mpi MPI::MPI_CXX NUM_PROC 1) -unit_test(NAME GlobalContext_test SRC GlobalContext_test.cpp DEPENDS - espresso::script_interface Boost::mpi MPI::MPI_CXX NUM_PROC 2) -unit_test(NAME Exception_test SRC Exception_test.cpp DEPENDS - espresso::script_interface) -unit_test(NAME ParallelExceptionHandler_test SRC - ParallelExceptionHandler_test.cpp DEPENDS espresso::script_interface - espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) -unit_test(NAME packed_variant_test SRC packed_variant_test.cpp DEPENDS - espresso::script_interface) -unit_test(NAME ObjectList_test SRC ObjectList_test.cpp DEPENDS - espresso::script_interface espresso::core Boost::mpi) -unit_test(NAME ObjectMap_test SRC ObjectMap_test.cpp DEPENDS - espresso::script_interface espresso::core Boost::mpi) -unit_test(NAME Accumulators_test SRC Accumulators_test.cpp DEPENDS - espresso::script_interface espresso::core Boost::mpi MPI::MPI_CXX - NUM_PROC 2) -unit_test(NAME Constraints_test SRC Constraints_test.cpp DEPENDS - espresso::script_interface espresso::core) -unit_test(NAME Actors_test SRC Actors_test.cpp DEPENDS - espresso::script_interface espresso::core) -unit_test(NAME ConstantpHEnsemble_test SRC ConstantpHEnsemble_test.cpp DEPENDS - espresso::core espresso::script_interface Boost::mpi MPI::MPI_CXX - NUM_PROC 2) -unit_test(NAME ReactionEnsemble_test SRC ReactionEnsemble_test.cpp DEPENDS - espresso::core espresso::script_interface Boost::mpi MPI::MPI_CXX - NUM_PROC 2) +espresso_unit_test(SRC ObjectHandle_test.cpp DEPENDS espresso::script_interface) +espresso_unit_test(SRC AutoParameters_test.cpp DEPENDS + espresso::script_interface) +espresso_unit_test(SRC AutoParameter_test.cpp DEPENDS + espresso::script_interface) +espresso_unit_test(SRC Variant_test.cpp DEPENDS espresso::script_interface) +espresso_unit_test(SRC get_value_test.cpp DEPENDS espresso::script_interface) +espresso_unit_test(SRC None_test.cpp DEPENDS espresso::script_interface) +espresso_unit_test(SRC reduction_test.cpp DEPENDS espresso::script_interface + Boost::mpi MPI::MPI_CXX NUM_PROC 4) +espresso_unit_test(SRC LocalContext_test.cpp DEPENDS espresso::script_interface + Boost::mpi MPI::MPI_CXX NUM_PROC 1) +espresso_unit_test( + SRC GlobalContext_test.cpp DEPENDS espresso::script_interface Boost::mpi + MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test(SRC Exception_test.cpp DEPENDS espresso::script_interface) +espresso_unit_test( + SRC ParallelExceptionHandler_test.cpp DEPENDS espresso::script_interface + espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test(SRC packed_variant_test.cpp DEPENDS + espresso::script_interface) +espresso_unit_test(SRC ObjectList_test.cpp DEPENDS espresso::script_interface + espresso::core Boost::mpi) +espresso_unit_test(SRC ObjectMap_test.cpp DEPENDS espresso::script_interface + espresso::core Boost::mpi) +espresso_unit_test(SRC Accumulators_test.cpp DEPENDS espresso::script_interface + espresso::core Boost::mpi MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test(SRC Constraints_test.cpp DEPENDS espresso::script_interface + espresso::core) +espresso_unit_test(SRC Actors_test.cpp DEPENDS espresso::script_interface + espresso::core) +espresso_unit_test( + SRC ConstantpHEnsemble_test.cpp DEPENDS espresso::core + espresso::script_interface Boost::mpi MPI::MPI_CXX NUM_PROC 2) +espresso_unit_test( + SRC ReactionEnsemble_test.cpp DEPENDS espresso::core + espresso::script_interface Boost::mpi MPI::MPI_CXX NUM_PROC 2) diff --git a/src/script_interface/thermostat/thermostat.hpp b/src/script_interface/thermostat/thermostat.hpp index 1f5c223f01..57318778f1 100644 --- a/src/script_interface/thermostat/thermostat.hpp +++ b/src/script_interface/thermostat/thermostat.hpp @@ -612,6 +612,12 @@ class Thermostat : public AutoParameters { Variant do_call_method(std::string const &name, VariantMap const ¶ms) override { + if (params.contains("act_on_virtual")) { + context()->parallel_try_catch([&]() { + throw std::runtime_error( + name + "() got an unexpected keyword argument 'act_on_virtual'"); + }); + } if (name == "set_langevin") { setup_thermostat(langevin, params); return {}; diff --git a/src/shapes/unit_tests/CMakeLists.txt b/src/shapes/unit_tests/CMakeLists.txt index 61480e1a23..5123a75d2c 100644 --- a/src/shapes/unit_tests/CMakeLists.txt +++ b/src/shapes/unit_tests/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2016-2022 The ESPResSo project +# Copyright (C) 2016-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,16 +17,14 @@ # along with this program. If not, see . # -include(unit_test) -unit_test(NAME Wall_test SRC Wall_test.cpp DEPENDS espresso::shapes - espresso::utils) -unit_test(NAME HollowConicalFrustum_test SRC HollowConicalFrustum_test.cpp - DEPENDS espresso::shapes espresso::utils) -unit_test(NAME Union_test SRC Union_test.cpp DEPENDS espresso::shapes - espresso::utils) -unit_test(NAME Ellipsoid_test SRC Ellipsoid_test.cpp DEPENDS espresso::shapes - espresso::utils) -unit_test(NAME Sphere_test SRC Sphere_test.cpp DEPENDS espresso::shapes - espresso::utils) -unit_test(NAME NoWhere_test SRC NoWhere_test.cpp DEPENDS espresso::shapes - espresso::utils) +include(espresso_unit_test) + +espresso_unit_test(SRC Wall_test.cpp DEPENDS espresso::shapes espresso::utils) +espresso_unit_test(SRC HollowConicalFrustum_test.cpp DEPENDS espresso::shapes + espresso::utils) +espresso_unit_test(SRC Union_test.cpp DEPENDS espresso::shapes espresso::utils) +espresso_unit_test(SRC Ellipsoid_test.cpp DEPENDS espresso::shapes + espresso::utils) +espresso_unit_test(SRC Sphere_test.cpp DEPENDS espresso::shapes espresso::utils) +espresso_unit_test(SRC NoWhere_test.cpp DEPENDS espresso::shapes + espresso::utils) diff --git a/src/utils/tests/CMakeLists.txt b/src/utils/tests/CMakeLists.txt index 89755749fa..c7f954e78b 100644 --- a/src/utils/tests/CMakeLists.txt +++ b/src/utils/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2010-2022 The ESPResSo project +# Copyright (C) 2010-2024 The ESPResSo project # Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 # Max-Planck-Institute for Polymer Research, Theory Group # @@ -19,92 +19,76 @@ # along with this program. If not, see . # -include(unit_test) +include(espresso_unit_test) -unit_test(NAME Vector_test SRC Vector_test.cpp DEPENDS espresso::utils) -unit_test(NAME Factory_test SRC Factory_test.cpp DEPENDS espresso::utils) -unit_test(NAME NumeratedContainer_test SRC NumeratedContainer_test.cpp DEPENDS - espresso::utils) -unit_test(NAME keys_test SRC keys_test.cpp DEPENDS espresso::utils) -unit_test(NAME Cache_test SRC Cache_test.cpp DEPENDS espresso::utils) -unit_test(NAME histogram SRC histogram.cpp DEPENDS espresso::utils) -unit_test(NAME accumulator SRC accumulator.cpp DEPENDS espresso::utils - Boost::serialization) -unit_test(NAME int_pow SRC int_pow_test.cpp DEPENDS espresso::utils) -unit_test(NAME sgn SRC sgn_test.cpp DEPENDS espresso::utils) -unit_test(NAME AS_erfc_part SRC AS_erfc_part_test.cpp DEPENDS espresso::utils) -unit_test(NAME permute_ifield_test SRC permute_ifield_test.cpp DEPENDS - espresso::utils) -unit_test(NAME vec_rotate SRC vec_rotate_test.cpp DEPENDS espresso::utils) -unit_test(NAME tensor_product SRC tensor_product_test.cpp DEPENDS - espresso::utils) -unit_test(NAME linear_interpolation SRC linear_interpolation_test.cpp DEPENDS - espresso::utils) -unit_test(NAME interpolation_gradient SRC interpolation_gradient_test.cpp - DEPENDS espresso::utils) -unit_test(NAME interpolation SRC interpolation_test.cpp DEPENDS espresso::utils) -unit_test(NAME bspline_test SRC bspline_test.cpp DEPENDS espresso::utils) -unit_test(NAME matrix_vector_product SRC matrix_vector_product.cpp DEPENDS - espresso::utils) -unit_test(NAME index_test SRC index_test.cpp DEPENDS espresso::utils) -unit_test(NAME tuple_test SRC tuple_test.cpp DEPENDS espresso::utils) -unit_test(NAME Array_test SRC Array_test.cpp DEPENDS Boost::serialization - espresso::utils) -unit_test(NAME contains_test SRC contains_test.cpp DEPENDS espresso::utils) -unit_test(NAME Counter_test SRC Counter_test.cpp DEPENDS espresso::utils - Boost::serialization) -unit_test(NAME RunningAverage_test SRC RunningAverage_test.cpp DEPENDS - espresso::utils) -unit_test(NAME for_each_pair_test SRC for_each_pair_test.cpp DEPENDS - espresso::utils) -unit_test(NAME raster_test SRC raster_test.cpp DEPENDS espresso::utils) -unit_test(NAME make_lin_space_test SRC make_lin_space_test.cpp DEPENDS - espresso::utils) -unit_test(NAME sampling_test SRC sampling_test.cpp DEPENDS espresso::utils) -unit_test(NAME coordinate_transformation_test SRC coordinate_transformation.cpp - DEPENDS espresso::utils) -unit_test(NAME cylindrical_transformation_test SRC - cylindrical_transformation.cpp DEPENDS espresso::utils) -unit_test(NAME rotation_matrix_test SRC rotation_matrix_test.cpp DEPENDS - espresso::utils) -unit_test(NAME quaternion_test SRC quaternion_test.cpp DEPENDS espresso::utils) -unit_test(NAME mask_test SRC mask_test.cpp DEPENDS espresso::utils) -unit_test(NAME type_traits_test SRC type_traits_test.cpp DEPENDS - espresso::utils) -unit_test(NAME uniform_test SRC uniform_test.cpp DEPENDS espresso::utils) -unit_test(NAME memcpy_archive_test SRC memcpy_archive_test.cpp DEPENDS - espresso::utils) -unit_test(NAME triangle_functions_test SRC triangle_functions_test.cpp DEPENDS - espresso::utils) -unit_test(NAME Bag_test SRC Bag_test.cpp DEPENDS espresso::utils - Boost::serialization) -unit_test(NAME integral_parameter_test SRC integral_parameter_test.cpp DEPENDS - espresso::utils) -unit_test(NAME flatten_test SRC flatten_test.cpp DEPENDS espresso::utils) -unit_test(NAME pack_test SRC pack_test.cpp DEPENDS Boost::serialization - espresso::utils) -unit_test(NAME unordered_map_test SRC unordered_map_test.cpp DEPENDS - Boost::serialization espresso::utils) -unit_test(NAME u32_to_u64_test SRC u32_to_u64_test.cpp DEPENDS espresso::utils - NUM_PROC 1) -unit_test(NAME gather_buffer_test SRC gather_buffer_test.cpp DEPENDS - espresso::utils::mpi Boost::mpi MPI::MPI_CXX NUM_PROC 4) -unit_test(NAME scatter_buffer_test SRC scatter_buffer_test.cpp DEPENDS - espresso::utils::mpi Boost::mpi MPI::MPI_CXX NUM_PROC 4) -unit_test(NAME all_compare_test SRC all_compare_test.cpp DEPENDS - espresso::utils::mpi Boost::mpi MPI::MPI_CXX NUM_PROC 3) -unit_test(NAME gatherv_test SRC gatherv_test.cpp DEPENDS espresso::utils::mpi - Boost::mpi MPI::MPI_CXX NUM_PROC 3) -unit_test(NAME iall_gatherv_test SRC iall_gatherv_test.cpp DEPENDS - espresso::utils::mpi Boost::mpi MPI::MPI_CXX NUM_PROC 3) -unit_test(NAME sendrecv_test SRC sendrecv_test.cpp DEPENDS espresso::utils::mpi - Boost::mpi MPI::MPI_CXX espresso::utils NUM_PROC 3) -unit_test(NAME serialization_test SRC serialization_test.cpp DEPENDS - espresso::utils Boost::serialization Boost::mpi MPI::MPI_CXX NUM_PROC - 1) -unit_test(NAME matrix_test SRC matrix_test.cpp DEPENDS espresso::utils - Boost::serialization NUM_PROC 1) -unit_test(NAME orthonormal_vec_test SRC orthonormal_vec_test.cpp DEPENDS - espresso::utils Boost::serialization NUM_PROC 1) -unit_test(NAME reduce_optional_test SRC reduce_optional_test.cpp DEPENDS - espresso::utils::mpi Boost::mpi MPI::MPI_CXX NUM_PROC 4) +espresso_unit_test(SRC Vector_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC Factory_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC NumeratedContainer_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC keys_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC Cache_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC histogram_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC accumulator_test.cpp DEPENDS espresso::utils + Boost::serialization) +espresso_unit_test(SRC int_pow_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC sgn_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC AS_erfc_part_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC permute_ifield_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC vec_rotate_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC tensor_product_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC linear_interpolation_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC interpolation_gradient_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC interpolation_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC bspline_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC matrix_vector_product_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC index_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC tuple_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC Array_test.cpp DEPENDS Boost::serialization + espresso::utils) +espresso_unit_test(SRC contains_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC Counter_test.cpp DEPENDS espresso::utils + Boost::serialization) +espresso_unit_test(SRC RunningAverage_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC for_each_pair_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC raster_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC make_lin_space_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC sampling_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC coordinate_transformation_test.cpp DEPENDS + espresso::utils) +espresso_unit_test(SRC cylindrical_transformation_test.cpp DEPENDS + espresso::utils) +espresso_unit_test(SRC rotation_matrix_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC quaternion_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC mask_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC type_traits_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC uniform_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC memcpy_archive_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC triangle_functions_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC Bag_test.cpp DEPENDS espresso::utils + Boost::serialization) +espresso_unit_test(SRC integral_parameter_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC flatten_test.cpp DEPENDS espresso::utils) +espresso_unit_test(SRC pack_test.cpp DEPENDS Boost::serialization + espresso::utils) +espresso_unit_test(SRC unordered_map_test.cpp DEPENDS Boost::serialization + espresso::utils) +espresso_unit_test(SRC u32_to_u64_test.cpp DEPENDS espresso::utils NUM_PROC 1) +espresso_unit_test(SRC gather_buffer_test.cpp DEPENDS espresso::utils::mpi + Boost::mpi MPI::MPI_CXX NUM_PROC 4) +espresso_unit_test(SRC scatter_buffer_test.cpp DEPENDS espresso::utils::mpi + Boost::mpi MPI::MPI_CXX NUM_PROC 4) +espresso_unit_test(SRC all_compare_test.cpp DEPENDS espresso::utils::mpi + Boost::mpi MPI::MPI_CXX NUM_PROC 3) +espresso_unit_test(SRC gatherv_test.cpp DEPENDS espresso::utils::mpi Boost::mpi + MPI::MPI_CXX NUM_PROC 3) +espresso_unit_test(SRC iall_gatherv_test.cpp DEPENDS espresso::utils::mpi + Boost::mpi MPI::MPI_CXX NUM_PROC 3) +espresso_unit_test(SRC sendrecv_test.cpp DEPENDS espresso::utils::mpi + Boost::mpi MPI::MPI_CXX espresso::utils NUM_PROC 3) +espresso_unit_test(SRC serialization_test.cpp DEPENDS espresso::utils + Boost::serialization Boost::mpi MPI::MPI_CXX NUM_PROC 1) +espresso_unit_test(SRC matrix_test.cpp DEPENDS espresso::utils + Boost::serialization NUM_PROC 1) +espresso_unit_test(SRC orthonormal_vec_test.cpp DEPENDS espresso::utils + Boost::serialization NUM_PROC 1) +espresso_unit_test(SRC reduce_optional_test.cpp DEPENDS espresso::utils::mpi + Boost::mpi MPI::MPI_CXX NUM_PROC 4) diff --git a/src/utils/tests/accumulator.cpp b/src/utils/tests/accumulator_test.cpp similarity index 100% rename from src/utils/tests/accumulator.cpp rename to src/utils/tests/accumulator_test.cpp diff --git a/src/utils/tests/coordinate_transformation.cpp b/src/utils/tests/coordinate_transformation_test.cpp similarity index 100% rename from src/utils/tests/coordinate_transformation.cpp rename to src/utils/tests/coordinate_transformation_test.cpp diff --git a/src/utils/tests/cylindrical_transformation.cpp b/src/utils/tests/cylindrical_transformation_test.cpp similarity index 100% rename from src/utils/tests/cylindrical_transformation.cpp rename to src/utils/tests/cylindrical_transformation_test.cpp diff --git a/src/utils/tests/histogram.cpp b/src/utils/tests/histogram_test.cpp similarity index 100% rename from src/utils/tests/histogram.cpp rename to src/utils/tests/histogram_test.cpp diff --git a/src/utils/tests/matrix_vector_product.cpp b/src/utils/tests/matrix_vector_product_test.cpp similarity index 100% rename from src/utils/tests/matrix_vector_product.cpp rename to src/utils/tests/matrix_vector_product_test.cpp diff --git a/src/walberla_bridge/CMakeLists.txt b/src/walberla_bridge/CMakeLists.txt index 090f0c7337..6b2da504a0 100644 --- a/src/walberla_bridge/CMakeLists.txt +++ b/src/walberla_bridge/CMakeLists.txt @@ -23,6 +23,12 @@ target_link_libraries( espresso_walberla_cpp_flags INTERFACE espresso::cpp_flags $<$:espresso::avx_flags>) +add_library(espresso_walberla_cuda_flags INTERFACE) +add_library(espresso::walberla::cuda_flags ALIAS espresso_walberla_cuda_flags) +target_link_libraries( + espresso_walberla_cuda_flags + INTERFACE espresso::cuda_flags + $<$:espresso::avx_flags>) function(espresso_configure_walberla_target) set(TARGET_NAME ${ARGV0}) diff --git a/src/walberla_bridge/src/BoundaryHandling.hpp b/src/walberla_bridge/src/BoundaryHandling.hpp index bf82044cbd..d8b2082460 100644 --- a/src/walberla_bridge/src/BoundaryHandling.hpp +++ b/src/walberla_bridge/src/BoundaryHandling.hpp @@ -83,7 +83,7 @@ template class BoundaryHandling { m_value_boundary->erase(global); } - [[nodiscard]] auto + [[nodiscard]] auto & get_node_boundary_value(Utils::Vector3i const &node) const { auto const global = Cell(node[0], node[1], node[2]); return get_value(global); @@ -98,7 +98,7 @@ template class BoundaryHandling { std::shared_ptr> m_value_boundary; static constexpr T default_value{}; - [[nodiscard]] T get_value(Cell const &cell) const { + [[nodiscard]] T const &get_value(Cell const &cell) const { if (m_value_boundary->count(cell) == 0) { return default_value; } @@ -122,8 +122,8 @@ template class BoundaryHandling { : m_blocks(std::move(blocks)), m_flag_field_id(flag_field_id), m_callback(DynamicValueCallback()), m_pending_changes(false) { // reinitialize the flag field - for (auto b = m_blocks->begin(); b != m_blocks->end(); ++b) { - flag_reset_kernel(&*b); + for (auto block = m_blocks->begin(); block != m_blocks->end(); ++block) { + flag_reset_kernel(block->template getData(m_flag_field_id)); } // instantiate the boundary sweep std::function callback = m_callback; @@ -137,7 +137,7 @@ template class BoundaryHandling { return m_callback.node_is_boundary(node); } - [[nodiscard]] auto + [[nodiscard]] auto & get_node_value_at_boundary(Utils::Vector3i const &node) const { return m_callback.get_node_boundary_value(node); } @@ -185,8 +185,7 @@ template class BoundaryHandling { bool m_pending_changes; /** Register flags and reset all cells. */ - void flag_reset_kernel(IBlock *const block) { - auto flag_field = block->template getData(m_flag_field_id); + void flag_reset_kernel(FlagField *flag_field) { // register flags if (!flag_field->flagExists(Domain_flag)) flag_field->registerFlag(Domain_flag); diff --git a/src/walberla_bridge/src/lattice_boltzmann/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/CMakeLists.txt index 3a2c214c47..891cc7aa2b 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/CMakeLists.txt +++ b/src/walberla_bridge/src/lattice_boltzmann/CMakeLists.txt @@ -19,10 +19,8 @@ add_subdirectory(generated_kernels) -target_sources(espresso_walberla - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_walberla_init.cpp) +target_sources(espresso_walberla PRIVATE lb_walberla_init.cpp) if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA) - target_sources(espresso_walberla_cuda - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/lb_walberla_init.cu) + target_sources(espresso_walberla_cuda PRIVATE lb_walberla_init.cu) endif() diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp index 5c1e496269..98b0679f4b 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp +++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2023 The ESPResSo project + * Copyright (C) 2019-2024 The ESPResSo project * * This file is part of ESPResSo. * @@ -226,6 +226,13 @@ class LBWalberlaImpl : public LBWalberlaBase { } } + void pressure_tensor_correction(std::span tensor) const { + auto const revert_factor = pressure_tensor_correction_factor(); + for (auto const i : {1u, 2u, 3u, 5u, 6u, 7u}) { + tensor[i] *= revert_factor; + } + } + class interpolation_illegal_access : public std::runtime_error { public: explicit interpolation_illegal_access(std::string const &field, @@ -349,11 +356,11 @@ class LBWalberlaImpl : public LBWalberlaBase { if constexpr (Architecture == lbmpy::Arch::CPU) { #ifdef ESPRESSO_BUILD_WITH_AVX_KERNELS #if defined(__AVX512F__) - constexpr uint_t alignment = 64; + constexpr uint_t alignment = 64u; #elif defined(__AVX__) - constexpr uint_t alignment = 32; + constexpr uint_t alignment = 32u; #elif defined(__SSE__) - constexpr uint_t alignment = 16; + constexpr uint_t alignment = 16u; #else #error "Unsupported arch, check walberla src/field/allocation/FieldAllocator.h" #endif @@ -690,8 +697,8 @@ class LBWalberlaImpl : public LBWalberlaBase { auto force_field = bc->block->template getData(m_last_applied_force_field_id); auto const vel = to_vector3(v); - lbm::accessor::Velocity::set(pdf_field, force_field, vel, bc->cell); - lbm::accessor::Vector::set(vel_field, vel, bc->cell); + lbm::accessor::Velocity::set(pdf_field, vel_field, force_field, vel, + bc->cell); return true; } @@ -705,25 +712,31 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const &block = *(lattice.get_blocks()->begin()); auto const field = block.template getData(m_velocity_field_id); + auto const values = lbm::accessor::Vector::get(field, *ci); + assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); + assert(values.size() == 3u * ci->numCells()); + if constexpr (std::is_same_v) { + out = std::move(values); + } else { + out = std::vector(values.begin(), values.end()); + } auto const local_offset = std::get<0>(lattice.get_local_grid_range()); auto const lower_cell = ci->min(); auto const upper_cell = ci->max(); - out.reserve(ci->numCells()); - assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); + auto it = out.begin(); for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { auto const node = local_offset + Utils::Vector3i{{x, y, z}}; if (m_boundary->node_is_boundary(node)) { - auto const vec = m_boundary->get_node_value_at_boundary(node); + auto const &vec = m_boundary->get_node_value_at_boundary(node); for (uint_t f = 0u; f < 3u; ++f) { - out.emplace_back(double_c(vec[f])); + (*it) = double_c(vec[f]); + std::advance(it, 1l); } } else { - auto const vec = lbm::accessor::Vector::get(field, Cell{x, y, z}); - for (uint_t f = 0u; f < 3u; ++f) { - out.emplace_back(double_c(vec[f])); - } + std::advance(it, 3l); } } } @@ -738,30 +751,15 @@ class LBWalberlaImpl : public LBWalberlaBase { if (auto const ci = get_interval(lower_corner, upper_corner)) { auto const &lattice = get_lattice(); auto &block = *(lattice.get_blocks()->begin()); - // We have to set both, the pdf and the stored velocity field auto pdf_field = block.template getData(m_pdf_field_id); - auto vel_field = block.template getData(m_velocity_field_id); auto force_field = block.template getData(m_last_applied_force_field_id); - auto const lower_cell = ci->min(); - auto const upper_cell = ci->max(); - auto it = velocity.begin(); - assert(velocity.size() == 3u * ci->numCells()); + auto vel_field = block.template getData(m_velocity_field_id); assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { - for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { - for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { - auto const cell = Cell{x, y, z}; - Vector3 vec; - for (uint_t f = 0u; f < 3u; ++f) { - vec[f] = FloatType_c(*it); - ++it; - } - lbm::accessor::Velocity::set(pdf_field, force_field, vec, cell); - lbm::accessor::Vector::set(vel_field, vec, cell); - } - } - } + assert(velocity.size() == 3u * ci->numCells()); + std::vector const values(velocity.begin(), velocity.end()); + lbm::accessor::Velocity::set(pdf_field, vel_field, force_field, values, + *ci); } } @@ -954,10 +952,13 @@ class LBWalberlaImpl : public LBWalberlaBase { if (!bc) return false; - auto field = + auto pdf_field = bc->block->template getData(m_pdf_field_id); + auto force_field = bc->block->template getData(m_last_applied_force_field_id); + auto vel_field = + bc->block->template getData(m_velocity_field_id); auto const vec = to_vector3(force); - lbm::accessor::Vector::set(field, vec, bc->cell); + lbm::accessor::Force::set(pdf_field, vel_field, force_field, vec, bc->cell); return true; } @@ -971,22 +972,15 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const &block = *(lattice.get_blocks()->begin()); auto const field = block.template getData(m_last_applied_force_field_id); - auto const lower_cell = ci->min(); - auto const upper_cell = ci->max(); - auto const n_values = 3u * ci->numCells(); - out.reserve(n_values); + auto const values = lbm::accessor::Vector::get(field, *ci); assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { - for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { - for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { - auto const vec = lbm::accessor::Vector::get(field, Cell{x, y, z}); - for (uint_t f = 0u; f < 3u; ++f) { - out.emplace_back(double_c(vec[f])); - } - } - } + assert(values.size() == 3u * ci->numCells()); + if constexpr (std::is_same_v) { + out = std::move(values); + } else { + out = std::vector(values.begin(), values.end()); } - assert(out.size() == n_values); } return out; } @@ -997,25 +991,14 @@ class LBWalberlaImpl : public LBWalberlaBase { if (auto const ci = get_interval(lower_corner, upper_corner)) { auto const &lattice = get_lattice(); auto &block = *(lattice.get_blocks()->begin()); - auto field = + auto pdf_field = block.template getData(m_pdf_field_id); + auto force_field = block.template getData(m_last_applied_force_field_id); - auto const lower_cell = ci->min(); - auto const upper_cell = ci->max(); - auto it = force.begin(); - assert(force.size() == 3u * ci->numCells()); + auto vel_field = block.template getData(m_velocity_field_id); assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { - for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { - for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { - Vector3 vec; - for (uint_t f = 0u; f < 3u; ++f) { - vec[f] = FloatType_c(*it); - ++it; - } - lbm::accessor::Vector::set(field, vec, Cell{x, y, z}); - } - } - } + assert(force.size() == 3u * ci->numCells()); + std::vector const values(force.begin(), force.end()); + lbm::accessor::Force::set(pdf_field, vel_field, force_field, values, *ci); } } @@ -1044,11 +1027,16 @@ class LBWalberlaImpl : public LBWalberlaBase { return false; auto pdf_field = bc->block->template getData(m_pdf_field_id); + auto force_field = + bc->block->template getData(m_last_applied_force_field_id); + auto vel_field = + bc->block->template getData(m_velocity_field_id); std::array pop; for (uint_t f = 0u; f < Stencil::Size; ++f) { pop[f] = FloatType_c(population[f]); } - lbm::accessor::Population::set(pdf_field, pop, bc->cell); + lbm::accessor::Population::set(pdf_field, vel_field, force_field, pop, + bc->cell); return true; } @@ -1063,13 +1051,13 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const pdf_field = block.template getData(m_pdf_field_id); auto const values = lbm::accessor::Population::get(pdf_field, *ci); assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); + assert(values.size() == stencil_size() * ci->numCells()); if constexpr (std::is_same_v) { out = std::move(values); } else { out = std::vector(values.begin(), values.end()); } - assert(out.size() == stencil_size() * ci->numCells()); } return out; } @@ -1081,10 +1069,14 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const &lattice = get_lattice(); auto &block = *(lattice.get_blocks()->begin()); auto pdf_field = block.template getData(m_pdf_field_id); + auto force_field = + block.template getData(m_last_applied_force_field_id); + auto vel_field = block.template getData(m_velocity_field_id); assert(population.size() == stencil_size() * ci->numCells()); assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); std::vector const values(population.begin(), population.end()); - lbm::accessor::Population::set(pdf_field, values, *ci); + lbm::accessor::Population::set(pdf_field, vel_field, force_field, values, + *ci); } } @@ -1123,13 +1115,13 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const pdf_field = block.template getData(m_pdf_field_id); auto const values = lbm::accessor::Density::get(pdf_field, *ci); assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); + assert(values.size() == ci->numCells()); if constexpr (std::is_same_v) { out = std::move(values); } else { out = std::vector(values.begin(), values.end()); } - assert(out.size() == ci->numCells()); } return out; } @@ -1316,24 +1308,18 @@ class LBWalberlaImpl : public LBWalberlaBase { auto const &lattice = get_lattice(); auto const &block = *(lattice.get_blocks()->begin()); auto const pdf_field = block.template getData(m_pdf_field_id); - auto const lower_cell = ci->min(); - auto const upper_cell = ci->max(); - auto const n_values = 9u * ci->numCells(); - out.reserve(n_values); + auto values = lbm::accessor::PressureTensor::get(pdf_field, *ci); assert(++(lattice.get_blocks()->begin()) == lattice.get_blocks()->end()); - for (auto x = lower_cell.x(); x <= upper_cell.x(); ++x) { - for (auto y = lower_cell.y(); y <= upper_cell.y(); ++y) { - for (auto z = lower_cell.z(); z <= upper_cell.z(); ++z) { - auto const cell = Cell{x, y, z}; - auto tensor = lbm::accessor::PressureTensor::get(pdf_field, cell); - pressure_tensor_correction(tensor); - for (auto i = 0u; i < 9u; ++i) { - out.emplace_back(tensor[i]); - } - } - } + assert(values.size() == 9u * ci->numCells()); + for (auto it = values.begin(); it != values.end(); std::advance(it, 9l)) { + pressure_tensor_correction(std::span(it, 9ul)); + } + if constexpr (std::is_same_v) { + out = std::move(values); + } else { + out = std::vector(values.begin(), values.end()); } - assert(out.size() == n_values); } return out; } @@ -1444,7 +1430,9 @@ class LBWalberlaImpl : public LBWalberlaBase { template class DensityVTKWriter : public VTKWriter { public: - using VTKWriter::VTKWriter; + using Base = VTKWriter; + using Base::Base; + using Base::evaluate; protected: OutputType evaluate(cell_idx_t const x, cell_idx_t const y, @@ -1459,7 +1447,9 @@ class LBWalberlaImpl : public LBWalberlaBase { template class VelocityVTKWriter : public VTKWriter { public: - using VTKWriter::VTKWriter; + using Base = VTKWriter; + using Base::Base; + using Base::evaluate; protected: OutputType evaluate(cell_idx_t const x, cell_idx_t const y, @@ -1474,11 +1464,14 @@ class LBWalberlaImpl : public LBWalberlaBase { template class PressureTensorVTKWriter : public VTKWriter { public: + using Base = VTKWriter; + using Base::Base; + using Base::evaluate; + PressureTensorVTKWriter(ConstBlockDataID const &block_id, std::string const &id, FloatType unit_conversion, FloatType off_diag_factor) - : VTKWriter::VTKWriter(block_id, id, - unit_conversion), + : Base(block_id, id, unit_conversion), m_off_diag_factor(off_diag_factor) {} protected: @@ -1504,19 +1497,20 @@ class LBWalberlaImpl : public LBWalberlaBase { } else { if (flag_observables & static_cast(OutputVTK::density)) { auto const unit_conversion = FloatType_c(units.at("density")); - vtk_obj.addCellDataWriter(make_shared>( + vtk_obj.addCellDataWriter(std::make_shared>( m_pdf_field_id, "density", unit_conversion)); } if (flag_observables & static_cast(OutputVTK::velocity_vector)) { auto const unit_conversion = FloatType_c(units.at("velocity")); - vtk_obj.addCellDataWriter(make_shared>( + vtk_obj.addCellDataWriter(std::make_shared>( m_velocity_field_id, "velocity_vector", unit_conversion)); } if (flag_observables & static_cast(OutputVTK::pressure_tensor)) { auto const unit_conversion = FloatType_c(units.at("pressure")); - vtk_obj.addCellDataWriter(make_shared>( - m_pdf_field_id, "pressure_tensor", unit_conversion, - pressure_tensor_correction_factor())); + vtk_obj.addCellDataWriter( + std::make_shared>( + m_pdf_field_id, "pressure_tensor", unit_conversion, + pressure_tensor_correction_factor())); } } } diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h index 32650c3e3c..c73cb58c14 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h @@ -41,19 +41,18 @@ #include #include +#include #include #include #ifdef WALBERLA_CXX_COMPILER_IS_GNU #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" #endif #ifdef WALBERLA_CXX_COMPILER_IS_CLANG #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #endif namespace walberla { @@ -61,8 +60,8 @@ namespace lbm { namespace accessor { namespace Population { -inline std::array -get(GhostLayerField const *pdf_field, Cell const &cell) { +inline auto get(GhostLayerField const *pdf_field, + Cell const &cell) { double const &xyz0 = pdf_field->get(cell, uint_t{0u}); std::array pop; pop[0u] = pdf_field->getF(&xyz0, uint_t{0u}); @@ -111,6 +110,54 @@ inline void set(GhostLayerField *pdf_field, pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u]; } +inline void set(GhostLayerField *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField const *force_field, + std::array const &pop, Cell const &cell) { + auto &xyz0 = pdf_field->get(cell, uint_t{0u}); + const double f_0 = pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u]; + const double f_1 = pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u]; + const double f_2 = pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u]; + const double f_3 = pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u]; + const double f_4 = pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u]; + const double f_5 = pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u]; + const double f_6 = pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u]; + const double f_7 = pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u]; + const double f_8 = pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u]; + const double f_9 = pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u]; + const double f_10 = pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u]; + const double f_11 = pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u]; + const double f_12 = pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u]; + const double f_13 = pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u]; + const double f_14 = pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u]; + const double f_15 = pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u]; + const double f_16 = pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u]; + const double f_17 = pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u]; + const double f_18 = pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u]; + const auto x = cell.x(); + const auto y = cell.y(); + const auto z = cell.z(); + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const double md_0 = + force_field->get(x, y, z, 0) * 0.50000000000000000 + momdensity_0; + const double md_1 = + force_field->get(x, y, z, 1) * 0.50000000000000000 + momdensity_1; + const double md_2 = + force_field->get(x, y, z, 2) * 0.50000000000000000 + momdensity_2; + const auto rho_inv = double{1} / rho; + velocity_field->get(cell, uint_t{0u}) = md_0 * rho_inv; + velocity_field->get(cell, uint_t{1u}) = md_1 * rho_inv; + velocity_field->get(cell, uint_t{2u}) = md_2 * rho_inv; +} + inline void initialize(GhostLayerField *pdf_field, std::array const &pop) { WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdf_field, { @@ -137,9 +184,8 @@ inline void initialize(GhostLayerField *pdf_field, }); } -inline std::vector -get(GhostLayerField const *pdf_field, - CellInterval const &ci) { +inline auto get(GhostLayerField const *pdf_field, + CellInterval const &ci) { std::vector out; out.reserve(ci.numCells() * uint_t(19u)); for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { @@ -174,31 +220,86 @@ get(GhostLayerField const *pdf_field, inline void set(GhostLayerField *pdf_field, std::vector const &values, CellInterval const &ci) { assert(uint_c(values.size()) == ci.numCells() * uint_t(19u)); - auto values_ptr = values.data(); + auto pop = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + double &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u]; + pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u]; + pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u]; + pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u]; + pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u]; + pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u]; + pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u]; + pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u]; + pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u]; + pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u]; + pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u]; + pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u]; + pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u]; + pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u]; + pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u]; + pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u]; + pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u]; + pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u]; + pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u]; + std::advance(pop, 19); + } + } + } +} + +inline void set(GhostLayerField *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField const *force_field, + std::vector const &values, CellInterval const &ci) { + assert(uint_c(values.size()) == ci.numCells() * uint_t(19u)); + auto pop = values.data(); for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { double &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); - pdf_field->getF(&xyz0, uint_t{0u}) = values_ptr[0u]; - pdf_field->getF(&xyz0, uint_t{1u}) = values_ptr[1u]; - pdf_field->getF(&xyz0, uint_t{2u}) = values_ptr[2u]; - pdf_field->getF(&xyz0, uint_t{3u}) = values_ptr[3u]; - pdf_field->getF(&xyz0, uint_t{4u}) = values_ptr[4u]; - pdf_field->getF(&xyz0, uint_t{5u}) = values_ptr[5u]; - pdf_field->getF(&xyz0, uint_t{6u}) = values_ptr[6u]; - pdf_field->getF(&xyz0, uint_t{7u}) = values_ptr[7u]; - pdf_field->getF(&xyz0, uint_t{8u}) = values_ptr[8u]; - pdf_field->getF(&xyz0, uint_t{9u}) = values_ptr[9u]; - pdf_field->getF(&xyz0, uint_t{10u}) = values_ptr[10u]; - pdf_field->getF(&xyz0, uint_t{11u}) = values_ptr[11u]; - pdf_field->getF(&xyz0, uint_t{12u}) = values_ptr[12u]; - pdf_field->getF(&xyz0, uint_t{13u}) = values_ptr[13u]; - pdf_field->getF(&xyz0, uint_t{14u}) = values_ptr[14u]; - pdf_field->getF(&xyz0, uint_t{15u}) = values_ptr[15u]; - pdf_field->getF(&xyz0, uint_t{16u}) = values_ptr[16u]; - pdf_field->getF(&xyz0, uint_t{17u}) = values_ptr[17u]; - pdf_field->getF(&xyz0, uint_t{18u}) = values_ptr[18u]; - values_ptr += 19u; + const double f_0 = pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u]; + const double f_1 = pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u]; + const double f_2 = pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u]; + const double f_3 = pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u]; + const double f_4 = pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u]; + const double f_5 = pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u]; + const double f_6 = pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u]; + const double f_7 = pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u]; + const double f_8 = pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u]; + const double f_9 = pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u]; + const double f_10 = pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u]; + const double f_11 = pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u]; + const double f_12 = pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u]; + const double f_13 = pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u]; + const double f_14 = pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u]; + const double f_15 = pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u]; + const double f_16 = pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u]; + const double f_17 = pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u]; + const double f_18 = pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u]; + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = + -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + + vel0Term + vel1Term + vel2Term; + const double md_0 = + force_field->get(x, y, z, 0) * 0.50000000000000000 + momdensity_0; + const double md_1 = + force_field->get(x, y, z, 1) * 0.50000000000000000 + momdensity_1; + const double md_2 = + force_field->get(x, y, z, 2) * 0.50000000000000000 + momdensity_2; + const auto rho_inv = double{1} / rho; + velocity_field->get(x, y, z, uint_t{0u}) = md_0 * rho_inv; + velocity_field->get(x, y, z, uint_t{1u}) = md_1 * rho_inv; + velocity_field->get(x, y, z, uint_t{2u}) = md_2 * rho_inv; + std::advance(pop, 19); } } } @@ -206,8 +307,8 @@ inline void set(GhostLayerField *pdf_field, } // namespace Population namespace Vector { -inline Vector3 get(GhostLayerField const *vec_field, - Cell const &cell) { +inline auto get(GhostLayerField const *vec_field, + Cell const &cell) { const double &xyz0 = vec_field->get(cell, uint_t{0u}); Vector3 vec; vec[0] = vec_field->getF(&xyz0, uint_t{0u}); @@ -252,9 +353,8 @@ inline void add_to_all(GhostLayerField *vec_field, }); } -inline std::vector -get(GhostLayerField const *vec_field, - CellInterval const &ci) { +inline auto get(GhostLayerField const *vec_field, + CellInterval const &ci) { std::vector out; out.reserve(ci.numCells() * uint_t(3u)); for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { @@ -281,7 +381,7 @@ inline void set(GhostLayerField *vec_field, vec_field->getF(&xyz0, uint_t{0u}) = values_ptr[0u]; vec_field->getF(&xyz0, uint_t{1u}) = values_ptr[1u]; vec_field->getF(&xyz0, uint_t{2u}) = values_ptr[2u]; - values_ptr += 3u; + std::advance(values_ptr, 3); } } } @@ -290,8 +390,8 @@ inline void set(GhostLayerField *vec_field, namespace EquilibriumDistribution { inline double get(stencil::Direction const direction, - Vector3 const &u = Vector3(double(0.0)), - double rho = double(1.0)) { + Vector3 const &u = Vector3(double{0}), + double rho = double{1}) { using namespace stencil; switch (direction) { @@ -568,7 +668,7 @@ inline void set(GhostLayerField *pdf_field, vel1Term + vel2Term; // calculate current velocity (before density change) - const double conversion = double(1) / rho; + const double conversion = double{1} / rho; Vector3 velocity; velocity[0u] = momdensity_0 * conversion; velocity[1u] = momdensity_1 * conversion; @@ -656,7 +756,7 @@ inline void set(GhostLayerField *pdf_field, vel0Term + vel1Term + vel2Term; // calculate current velocity (before density change) - const double conversion = double(1) / rho; + const double conversion = double{1} / rho; Vector3 velocity; velocity[0u] = momdensity_0 * conversion; velocity[1u] = momdensity_1 * conversion; @@ -671,7 +771,108 @@ inline void set(GhostLayerField *pdf_field, } // namespace Density namespace Velocity { +inline auto get(GhostLayerField const *pdf_field, + GhostLayerField const *force_field, + Cell const &cell) { + const double &xyz0 = pdf_field->get(cell, uint_t{0u}); + const double f_0 = pdf_field->getF(&xyz0, uint_t{0u}); + const double f_1 = pdf_field->getF(&xyz0, uint_t{1u}); + const double f_2 = pdf_field->getF(&xyz0, uint_t{2u}); + const double f_3 = pdf_field->getF(&xyz0, uint_t{3u}); + const double f_4 = pdf_field->getF(&xyz0, uint_t{4u}); + const double f_5 = pdf_field->getF(&xyz0, uint_t{5u}); + const double f_6 = pdf_field->getF(&xyz0, uint_t{6u}); + const double f_7 = pdf_field->getF(&xyz0, uint_t{7u}); + const double f_8 = pdf_field->getF(&xyz0, uint_t{8u}); + const double f_9 = pdf_field->getF(&xyz0, uint_t{9u}); + const double f_10 = pdf_field->getF(&xyz0, uint_t{10u}); + const double f_11 = pdf_field->getF(&xyz0, uint_t{11u}); + const double f_12 = pdf_field->getF(&xyz0, uint_t{12u}); + const double f_13 = pdf_field->getF(&xyz0, uint_t{13u}); + const double f_14 = pdf_field->getF(&xyz0, uint_t{14u}); + const double f_15 = pdf_field->getF(&xyz0, uint_t{15u}); + const double f_16 = pdf_field->getF(&xyz0, uint_t{16u}); + const double f_17 = pdf_field->getF(&xyz0, uint_t{17u}); + const double f_18 = pdf_field->getF(&xyz0, uint_t{18u}); + const auto x = cell.x(); + const auto y = cell.y(); + const auto z = cell.z(); + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const double md_0 = + force_field->get(x, y, z, 0) * 0.50000000000000000 + momdensity_0; + const double md_1 = + force_field->get(x, y, z, 1) * 0.50000000000000000 + momdensity_1; + const double md_2 = + force_field->get(x, y, z, 2) * 0.50000000000000000 + momdensity_2; + const double rho_inv = double{1} / rho; + + return Vector3(md_0 * rho_inv, md_1 * rho_inv, md_2 * rho_inv); +} + +inline auto get(GhostLayerField const *pdf_field, + GhostLayerField const *force_field, + CellInterval const &ci) { + std::vector out; + out.reserve(ci.numCells() * uint_t(3u)); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const double &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + const double f_0 = pdf_field->getF(&xyz0, uint_t{0u}); + const double f_1 = pdf_field->getF(&xyz0, uint_t{1u}); + const double f_2 = pdf_field->getF(&xyz0, uint_t{2u}); + const double f_3 = pdf_field->getF(&xyz0, uint_t{3u}); + const double f_4 = pdf_field->getF(&xyz0, uint_t{4u}); + const double f_5 = pdf_field->getF(&xyz0, uint_t{5u}); + const double f_6 = pdf_field->getF(&xyz0, uint_t{6u}); + const double f_7 = pdf_field->getF(&xyz0, uint_t{7u}); + const double f_8 = pdf_field->getF(&xyz0, uint_t{8u}); + const double f_9 = pdf_field->getF(&xyz0, uint_t{9u}); + const double f_10 = pdf_field->getF(&xyz0, uint_t{10u}); + const double f_11 = pdf_field->getF(&xyz0, uint_t{11u}); + const double f_12 = pdf_field->getF(&xyz0, uint_t{12u}); + const double f_13 = pdf_field->getF(&xyz0, uint_t{13u}); + const double f_14 = pdf_field->getF(&xyz0, uint_t{14u}); + const double f_15 = pdf_field->getF(&xyz0, uint_t{15u}); + const double f_16 = pdf_field->getF(&xyz0, uint_t{16u}); + const double f_17 = pdf_field->getF(&xyz0, uint_t{17u}); + const double f_18 = pdf_field->getF(&xyz0, uint_t{18u}); + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = + -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + + vel0Term + vel1Term + vel2Term; + const double md_0 = + force_field->get(x, y, z, 0) * 0.50000000000000000 + momdensity_0; + const double md_1 = + force_field->get(x, y, z, 1) * 0.50000000000000000 + momdensity_1; + const double md_2 = + force_field->get(x, y, z, 2) * 0.50000000000000000 + momdensity_2; + const double rho_inv = double{1} / rho; + out.emplace_back(md_0 * rho_inv); + out.emplace_back(md_1 * rho_inv); + out.emplace_back(md_2 * rho_inv); + } + } + } + return out; +} + inline void set(GhostLayerField *pdf_field, + GhostLayerField *velocity_field, GhostLayerField const *force_field, Vector3 const &u, Cell const &cell) { const double &xyz0 = pdf_field->get(cell, uint_t{0u}); @@ -709,15 +910,183 @@ inline void set(GhostLayerField *pdf_field, -force_field->get(x, y, z, 1) * 0.50000000000000000 / rho + u[1]; const double u_2 = -force_field->get(x, y, z, 2) * 0.50000000000000000 / rho + u[2]; + velocity_field->get(x, y, z, uint_t{0u}) = u[0u]; + velocity_field->get(x, y, z, uint_t{1u}) = u[1u]; + velocity_field->get(x, y, z, uint_t{2u}) = u[2u]; Equilibrium::set(pdf_field, Vector3(u_0, u_1, u_2), rho, cell); } + +inline void set(GhostLayerField *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField const *force_field, + std::vector const &values, CellInterval const &ci) { + assert(uint_c(values.size()) == ci.numCells() * uint_t(3u)); + auto u = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + double &pdf_xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + double &vel_xyz0 = velocity_field->get(x, y, z, uint_t{0u}); + const double f_0 = pdf_field->getF(&pdf_xyz0, uint_t{0u}); + const double f_1 = pdf_field->getF(&pdf_xyz0, uint_t{1u}); + const double f_2 = pdf_field->getF(&pdf_xyz0, uint_t{2u}); + const double f_3 = pdf_field->getF(&pdf_xyz0, uint_t{3u}); + const double f_4 = pdf_field->getF(&pdf_xyz0, uint_t{4u}); + const double f_5 = pdf_field->getF(&pdf_xyz0, uint_t{5u}); + const double f_6 = pdf_field->getF(&pdf_xyz0, uint_t{6u}); + const double f_7 = pdf_field->getF(&pdf_xyz0, uint_t{7u}); + const double f_8 = pdf_field->getF(&pdf_xyz0, uint_t{8u}); + const double f_9 = pdf_field->getF(&pdf_xyz0, uint_t{9u}); + const double f_10 = pdf_field->getF(&pdf_xyz0, uint_t{10u}); + const double f_11 = pdf_field->getF(&pdf_xyz0, uint_t{11u}); + const double f_12 = pdf_field->getF(&pdf_xyz0, uint_t{12u}); + const double f_13 = pdf_field->getF(&pdf_xyz0, uint_t{13u}); + const double f_14 = pdf_field->getF(&pdf_xyz0, uint_t{14u}); + const double f_15 = pdf_field->getF(&pdf_xyz0, uint_t{15u}); + const double f_16 = pdf_field->getF(&pdf_xyz0, uint_t{16u}); + const double f_17 = pdf_field->getF(&pdf_xyz0, uint_t{17u}); + const double f_18 = pdf_field->getF(&pdf_xyz0, uint_t{18u}); + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double vel2Term = f_12 + f_13 + f_5; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + + vel0Term + vel1Term + vel2Term; + + const double u_0 = + -force_field->get(x, y, z, 0) * 0.50000000000000000 / rho + u[0]; + const double u_1 = + -force_field->get(x, y, z, 1) * 0.50000000000000000 / rho + u[1]; + const double u_2 = + -force_field->get(x, y, z, 2) * 0.50000000000000000 / rho + u[2]; + velocity_field->getF(&vel_xyz0, uint_t{0u}) = u[0u]; + velocity_field->getF(&vel_xyz0, uint_t{1u}) = u[1u]; + velocity_field->getF(&vel_xyz0, uint_t{2u}) = u[2u]; + + std::advance(u, 3); + + Equilibrium::set(pdf_field, Vector3(u_0, u_1, u_2), rho, + Cell{x, y, z}); + } + } + } +} } // namespace Velocity +namespace Force { +inline void set(GhostLayerField const *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField *force_field, + Vector3 const &force, Cell const &cell) { + double const &pdf_xyz0 = pdf_field->get(cell, uint_t{0u}); + double &vel_xyz0 = velocity_field->get(cell, uint_t{0u}); + double &laf_xyz0 = force_field->get(cell, uint_t{0u}); + const double f_0 = pdf_field->getF(&pdf_xyz0, uint_t{0u}); + const double f_1 = pdf_field->getF(&pdf_xyz0, uint_t{1u}); + const double f_2 = pdf_field->getF(&pdf_xyz0, uint_t{2u}); + const double f_3 = pdf_field->getF(&pdf_xyz0, uint_t{3u}); + const double f_4 = pdf_field->getF(&pdf_xyz0, uint_t{4u}); + const double f_5 = pdf_field->getF(&pdf_xyz0, uint_t{5u}); + const double f_6 = pdf_field->getF(&pdf_xyz0, uint_t{6u}); + const double f_7 = pdf_field->getF(&pdf_xyz0, uint_t{7u}); + const double f_8 = pdf_field->getF(&pdf_xyz0, uint_t{8u}); + const double f_9 = pdf_field->getF(&pdf_xyz0, uint_t{9u}); + const double f_10 = pdf_field->getF(&pdf_xyz0, uint_t{10u}); + const double f_11 = pdf_field->getF(&pdf_xyz0, uint_t{11u}); + const double f_12 = pdf_field->getF(&pdf_xyz0, uint_t{12u}); + const double f_13 = pdf_field->getF(&pdf_xyz0, uint_t{13u}); + const double f_14 = pdf_field->getF(&pdf_xyz0, uint_t{14u}); + const double f_15 = pdf_field->getF(&pdf_xyz0, uint_t{15u}); + const double f_16 = pdf_field->getF(&pdf_xyz0, uint_t{16u}); + const double f_17 = pdf_field->getF(&pdf_xyz0, uint_t{17u}); + const double f_18 = pdf_field->getF(&pdf_xyz0, uint_t{18u}); + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const double md_0 = force[0u] * 0.50000000000000000 + momdensity_0; + const double md_1 = force[1u] * 0.50000000000000000 + momdensity_1; + const double md_2 = force[2u] * 0.50000000000000000 + momdensity_2; + auto const rho_inv = double{1} / rho; + + force_field->getF(&laf_xyz0, uint_t{0u}) = force[0u]; + force_field->getF(&laf_xyz0, uint_t{1u}) = force[1u]; + force_field->getF(&laf_xyz0, uint_t{2u}) = force[2u]; + + velocity_field->getF(&vel_xyz0, uint_t{0u}) = md_0 * rho_inv; + velocity_field->getF(&vel_xyz0, uint_t{1u}) = md_1 * rho_inv; + velocity_field->getF(&vel_xyz0, uint_t{2u}) = md_2 * rho_inv; +} + +inline void set(GhostLayerField const *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField *force_field, + std::vector const &values, CellInterval const &ci) { + assert(uint_c(values.size()) == ci.numCells() * uint_t(3u)); + auto force = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + double const &pdf_xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + double &vel_xyz0 = velocity_field->get(x, y, z, uint_t{0u}); + double &laf_xyz0 = force_field->get(x, y, z, uint_t{0u}); + const double f_0 = pdf_field->getF(&pdf_xyz0, uint_t{0u}); + const double f_1 = pdf_field->getF(&pdf_xyz0, uint_t{1u}); + const double f_2 = pdf_field->getF(&pdf_xyz0, uint_t{2u}); + const double f_3 = pdf_field->getF(&pdf_xyz0, uint_t{3u}); + const double f_4 = pdf_field->getF(&pdf_xyz0, uint_t{4u}); + const double f_5 = pdf_field->getF(&pdf_xyz0, uint_t{5u}); + const double f_6 = pdf_field->getF(&pdf_xyz0, uint_t{6u}); + const double f_7 = pdf_field->getF(&pdf_xyz0, uint_t{7u}); + const double f_8 = pdf_field->getF(&pdf_xyz0, uint_t{8u}); + const double f_9 = pdf_field->getF(&pdf_xyz0, uint_t{9u}); + const double f_10 = pdf_field->getF(&pdf_xyz0, uint_t{10u}); + const double f_11 = pdf_field->getF(&pdf_xyz0, uint_t{11u}); + const double f_12 = pdf_field->getF(&pdf_xyz0, uint_t{12u}); + const double f_13 = pdf_field->getF(&pdf_xyz0, uint_t{13u}); + const double f_14 = pdf_field->getF(&pdf_xyz0, uint_t{14u}); + const double f_15 = pdf_field->getF(&pdf_xyz0, uint_t{15u}); + const double f_16 = pdf_field->getF(&pdf_xyz0, uint_t{16u}); + const double f_17 = pdf_field->getF(&pdf_xyz0, uint_t{17u}); + const double f_18 = pdf_field->getF(&pdf_xyz0, uint_t{18u}); + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = + -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + + vel0Term + vel1Term + vel2Term; + const double md_0 = force[0u] * 0.50000000000000000 + momdensity_0; + const double md_1 = force[1u] * 0.50000000000000000 + momdensity_1; + const double md_2 = force[2u] * 0.50000000000000000 + momdensity_2; + auto const rho_inv = double{1} / rho; + + force_field->getF(&laf_xyz0, uint_t{0u}) = force[0u]; + force_field->getF(&laf_xyz0, uint_t{1u}) = force[1u]; + force_field->getF(&laf_xyz0, uint_t{2u}) = force[2u]; + + velocity_field->getF(&vel_xyz0, uint_t{0u}) = md_0 * rho_inv; + velocity_field->getF(&vel_xyz0, uint_t{1u}) = md_1 * rho_inv; + velocity_field->getF(&vel_xyz0, uint_t{2u}) = md_2 * rho_inv; + + std::advance(force, 3); + } + } + } +} +} // namespace Force + namespace MomentumDensity { -inline Vector3 -reduce(GhostLayerField const *pdf_field, - GhostLayerField const *force_field) { +inline auto reduce(GhostLayerField const *pdf_field, + GhostLayerField const *force_field) { Vector3 momentumDensity(double{0}); WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { const double &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); @@ -766,8 +1135,8 @@ reduce(GhostLayerField const *pdf_field, } // namespace MomentumDensity namespace PressureTensor { -inline Matrix3 -get(GhostLayerField const *pdf_field, Cell const &cell) { +inline auto get(GhostLayerField const *pdf_field, + Cell const &cell) { const double &xyz0 = pdf_field->get(cell, uint_t{0u}); const double f_0 = pdf_field->getF(&xyz0, uint_t{0u}); const double f_1 = pdf_field->getF(&xyz0, uint_t{1u}); @@ -816,6 +1185,63 @@ get(GhostLayerField const *pdf_field, Cell const &cell) { return pressureTensor; } + +inline auto get(GhostLayerField const *pdf_field, + CellInterval const &ci) { + std::vector out; + out.reserve(ci.numCells() * uint_t(9u)); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const double &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + const double f_0 = pdf_field->getF(&xyz0, uint_t{0u}); + const double f_1 = pdf_field->getF(&xyz0, uint_t{1u}); + const double f_2 = pdf_field->getF(&xyz0, uint_t{2u}); + const double f_3 = pdf_field->getF(&xyz0, uint_t{3u}); + const double f_4 = pdf_field->getF(&xyz0, uint_t{4u}); + const double f_5 = pdf_field->getF(&xyz0, uint_t{5u}); + const double f_6 = pdf_field->getF(&xyz0, uint_t{6u}); + const double f_7 = pdf_field->getF(&xyz0, uint_t{7u}); + const double f_8 = pdf_field->getF(&xyz0, uint_t{8u}); + const double f_9 = pdf_field->getF(&xyz0, uint_t{9u}); + const double f_10 = pdf_field->getF(&xyz0, uint_t{10u}); + const double f_11 = pdf_field->getF(&xyz0, uint_t{11u}); + const double f_12 = pdf_field->getF(&xyz0, uint_t{12u}); + const double f_13 = pdf_field->getF(&xyz0, uint_t{13u}); + const double f_14 = pdf_field->getF(&xyz0, uint_t{14u}); + const double f_15 = pdf_field->getF(&xyz0, uint_t{15u}); + const double f_16 = pdf_field->getF(&xyz0, uint_t{16u}); + const double f_17 = pdf_field->getF(&xyz0, uint_t{17u}); + const double f_18 = pdf_field->getF(&xyz0, uint_t{18u}); + const double p_0 = + f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9; + const double p_1 = -f_10 - f_7 + f_8 + f_9; + const double p_2 = -f_13 + f_14 + f_17 - f_18; + const double p_3 = -f_10 - f_7 + f_8 + f_9; + const double p_4 = + f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9; + const double p_5 = f_11 - f_12 - f_15 + f_16; + const double p_6 = -f_13 + f_14 + f_17 - f_18; + const double p_7 = f_11 - f_12 - f_15 + f_16; + const double p_8 = + f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6; + + out.emplace_back(p_0); + out.emplace_back(p_1); + out.emplace_back(p_2); + + out.emplace_back(p_3); + out.emplace_back(p_4); + out.emplace_back(p_5); + + out.emplace_back(p_6); + out.emplace_back(p_7); + out.emplace_back(p_8); + } + } + } + return out; +} } // namespace PressureTensor } // namespace accessor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cu index a9fe53dc6f..62dda87100 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cu +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cu @@ -48,13 +48,8 @@ #if defined(__NVCC__) #define RESTRICT __restrict__ -#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) #pragma nv_diagnostic push #pragma nv_diag_suppress 177 // unused variable -#else -#pragma push -#pragma diag_suppress 177 // unused variable -#endif #elif defined(__clang__) #if defined(__CUDA__) #if defined(__CUDA_ARCH__) @@ -62,20 +57,17 @@ #define RESTRICT __restrict__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #else // clang compiling CUDA code in host mode #define RESTRICT __restrict__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #endif #endif #elif defined(__GNUC__) or defined(__GNUG__) #define RESTRICT __restrict__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" #elif defined(_MSC_VER) #define RESTRICT __restrict #else @@ -102,38 +94,13 @@ namespace lbm { namespace accessor { namespace Population { -__global__ void kernel_get_interval( - gpu::FieldAccessor pdf, - double *RESTRICT const pop) { - pdf.set(blockIdx, threadIdx); - if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); - pop[offset + 0u] = pdf.get(0u); - pop[offset + 1u] = pdf.get(1u); - pop[offset + 2u] = pdf.get(2u); - pop[offset + 3u] = pdf.get(3u); - pop[offset + 4u] = pdf.get(4u); - pop[offset + 5u] = pdf.get(5u); - pop[offset + 6u] = pdf.get(6u); - pop[offset + 7u] = pdf.get(7u); - pop[offset + 8u] = pdf.get(8u); - pop[offset + 9u] = pdf.get(9u); - pop[offset + 10u] = pdf.get(10u); - pop[offset + 11u] = pdf.get(11u); - pop[offset + 12u] = pdf.get(12u); - pop[offset + 13u] = pdf.get(13u); - pop[offset + 14u] = pdf.get(14u); - pop[offset + 15u] = pdf.get(15u); - pop[offset + 16u] = pdf.get(16u); - pop[offset + 17u] = pdf.get(17u); - pop[offset + 18u] = pdf.get(18u); - } -} - +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor pdf, - double *RESTRICT const pop) { + double *RESTRICT pop) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); pdf.set(blockIdx, threadIdx); + pop += offset; if (pdf.isValidPosition()) { pop[0u] = pdf.get(0u); pop[1u] = pdf.get(1u); @@ -157,37 +124,38 @@ __global__ void kernel_get( } } -__global__ void kernel_set_interval( +__global__ void kernel_set( gpu::FieldAccessor pdf, - double const *RESTRICT const pop) { + double const *RESTRICT pop) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); pdf.set(blockIdx, threadIdx); + pop += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); - pdf.get(0u) = pop[offset + 0u]; - pdf.get(1u) = pop[offset + 1u]; - pdf.get(2u) = pop[offset + 2u]; - pdf.get(3u) = pop[offset + 3u]; - pdf.get(4u) = pop[offset + 4u]; - pdf.get(5u) = pop[offset + 5u]; - pdf.get(6u) = pop[offset + 6u]; - pdf.get(7u) = pop[offset + 7u]; - pdf.get(8u) = pop[offset + 8u]; - pdf.get(9u) = pop[offset + 9u]; - pdf.get(10u) = pop[offset + 10u]; - pdf.get(11u) = pop[offset + 11u]; - pdf.get(12u) = pop[offset + 12u]; - pdf.get(13u) = pop[offset + 13u]; - pdf.get(14u) = pop[offset + 14u]; - pdf.get(15u) = pop[offset + 15u]; - pdf.get(16u) = pop[offset + 16u]; - pdf.get(17u) = pop[offset + 17u]; - pdf.get(18u) = pop[offset + 18u]; + pdf.get(0u) = pop[0u]; + pdf.get(1u) = pop[1u]; + pdf.get(2u) = pop[2u]; + pdf.get(3u) = pop[3u]; + pdf.get(4u) = pop[4u]; + pdf.get(5u) = pop[5u]; + pdf.get(6u) = pop[6u]; + pdf.get(7u) = pop[7u]; + pdf.get(8u) = pop[8u]; + pdf.get(9u) = pop[9u]; + pdf.get(10u) = pop[10u]; + pdf.get(11u) = pop[11u]; + pdf.get(12u) = pop[12u]; + pdf.get(13u) = pop[13u]; + pdf.get(14u) = pop[14u]; + pdf.get(15u) = pop[15u]; + pdf.get(16u) = pop[16u]; + pdf.get(17u) = pop[17u]; + pdf.get(18u) = pop[18u]; } } -__global__ void kernel_set( +__global__ void kernel_broadcast( gpu::FieldAccessor pdf, - double const *RESTRICT const pop) { + double const *RESTRICT pop) { pdf.set(blockIdx, threadIdx); if (pdf.isValidPosition()) { pdf.get(0u) = pop[0u]; @@ -212,11 +180,59 @@ __global__ void kernel_set( } } +__global__ void kernel_set_vel( + gpu::FieldAccessor pdf, + gpu::FieldAccessor velocity, + gpu::FieldAccessor force, + double const *RESTRICT pop) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); + pdf.set(blockIdx, threadIdx); + velocity.set(blockIdx, threadIdx); + force.set(blockIdx, threadIdx); + pop += offset; + if (pdf.isValidPosition()) { + const double f_0 = pdf.get(0u) = pop[0u]; + const double f_1 = pdf.get(1u) = pop[1u]; + const double f_2 = pdf.get(2u) = pop[2u]; + const double f_3 = pdf.get(3u) = pop[3u]; + const double f_4 = pdf.get(4u) = pop[4u]; + const double f_5 = pdf.get(5u) = pop[5u]; + const double f_6 = pdf.get(6u) = pop[6u]; + const double f_7 = pdf.get(7u) = pop[7u]; + const double f_8 = pdf.get(8u) = pop[8u]; + const double f_9 = pdf.get(9u) = pop[9u]; + const double f_10 = pdf.get(10u) = pop[10u]; + const double f_11 = pdf.get(11u) = pop[11u]; + const double f_12 = pdf.get(12u) = pop[12u]; + const double f_13 = pdf.get(13u) = pop[13u]; + const double f_14 = pdf.get(14u) = pop[14u]; + const double f_15 = pdf.get(15u) = pop[15u]; + const double f_16 = pdf.get(16u) = pop[16u]; + const double f_17 = pdf.get(17u) = pop[17u]; + const double f_18 = pdf.get(18u) = pop[18u]; + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const double md_0 = force.get(0) * 0.50000000000000000 + momdensity_0; + const double md_1 = force.get(1) * 0.50000000000000000 + momdensity_1; + const double md_2 = force.get(2) * 0.50000000000000000 + momdensity_2; + const double rho_inv = double{1} / rho; + velocity.get(0u) = md_0 * rho_inv; + velocity.get(1u) = md_1 * rho_inv; + velocity.get(2u) = md_2 * rho_inv; + } +} +// LCOV_EXCL_STOP + std::array get( gpu::GPUField const *pdf_field, Cell const &cell) { CellInterval ci(cell, cell); - thrust::device_vector dev_data(19u, double{0}); + thrust::device_vector dev_data(19u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); auto kernel = gpu::make_kernel(kernel_get); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); @@ -231,7 +247,7 @@ void set( gpu::GPUField *pdf_field, std::array const &pop, Cell const &cell) { - thrust::device_vector dev_data(pop.data(), pop.data() + 19u); + thrust::device_vector dev_data(pop.begin(), pop.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); CellInterval ci(cell, cell); auto kernel = gpu::make_kernel(kernel_set); @@ -240,13 +256,30 @@ void set( kernel(); } +void set( + gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::array const &pop, + Cell const &cell) { + thrust::device_vector dev_data(pop.begin(), pop.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + CellInterval ci(cell, cell); + auto kernel = gpu::make_kernel(kernel_set_vel); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} + void initialize( gpu::GPUField *pdf_field, std::array const &pop) { CellInterval ci = pdf_field->xyzSizeWithGhostLayer(); - thrust::device_vector dev_data(pop.data(), pop.data() + 19u); + thrust::device_vector dev_data(pop.begin(), pop.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set); + auto kernel = gpu::make_kernel(kernel_broadcast); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); @@ -257,7 +290,7 @@ std::vector get( CellInterval const &ci) { thrust::device_vector dev_data(ci.numCells() * 19u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_get_interval); + auto kernel = gpu::make_kernel(kernel_get); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); kernel.addParam(dev_data_ptr); kernel(); @@ -272,82 +305,92 @@ void set( CellInterval const &ci) { thrust::device_vector dev_data(values.begin(), values.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set_interval); + auto kernel = gpu::make_kernel(kernel_set); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); } -} // namespace Population -namespace Vector { -__global__ void kernel_get_interval( - gpu::FieldAccessor vec, - double *const out) { - vec.set(blockIdx, threadIdx); - if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); - out[offset + 0u] = vec.get(0u); - out[offset + 1u] = vec.get(1u); - out[offset + 2u] = vec.get(2u); - } +void set( + gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::vector const &values, + CellInterval const &ci) { + thrust::device_vector dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set_vel); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); } +} // namespace Population +namespace Vector { +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor vec, - double *const out) { + double *u_out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); vec.set(blockIdx, threadIdx); + u_out += offset; if (vec.isValidPosition()) { - out[0u] = vec.get(0u); - out[1u] = vec.get(1u); - out[2u] = vec.get(2u); + u_out[0u] = vec.get(0u); + u_out[1u] = vec.get(1u); + u_out[2u] = vec.get(2u); } } -__global__ void kernel_set_interval( +__global__ void kernel_set( gpu::FieldAccessor vec, - double const *RESTRICT const u) { + double const *RESTRICT u_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); vec.set(blockIdx, threadIdx); + u_in += offset; if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); - vec.get(0u) = u[offset + 0u]; - vec.get(1u) = u[offset + 1u]; - vec.get(2u) = u[offset + 2u]; + vec.get(0u) = u_in[0u]; + vec.get(1u) = u_in[1u]; + vec.get(2u) = u_in[2u]; } } -__global__ void kernel_set( +__global__ void kernel_broadcast( gpu::FieldAccessor vec, - const double *RESTRICT const u) { + double const *RESTRICT u_in) { vec.set(blockIdx, threadIdx); if (vec.isValidPosition()) { - vec.get(0u) = u[0u]; - vec.get(1u) = u[1u]; - vec.get(2u) = u[2u]; + vec.get(0u) = u_in[0u]; + vec.get(1u) = u_in[1u]; + vec.get(2u) = u_in[2u]; } } -__global__ void kernel_add_interval( +__global__ void kernel_add( gpu::FieldAccessor vec, - double const *RESTRICT const u) { + double const *RESTRICT u_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); vec.set(blockIdx, threadIdx); + u_in += offset; if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); - vec.get(0u) += u[offset + 0u]; - vec.get(1u) += u[offset + 1u]; - vec.get(2u) += u[offset + 2u]; + vec.get(0u) += u_in[0u]; + vec.get(1u) += u_in[1u]; + vec.get(2u) += u_in[2u]; } } -__global__ void kernel_add( +__global__ void kernel_broadcast_add( gpu::FieldAccessor vec, - double const *RESTRICT const u) { + double const *RESTRICT u_in) { vec.set(blockIdx, threadIdx); if (vec.isValidPosition()) { - vec.get(0u) += u[0u]; - vec.get(1u) += u[1u]; - vec.get(2u) += u[2u]; + vec.get(0u) += u_in[0u]; + vec.get(1u) += u_in[1u]; + vec.get(2u) += u_in[2u]; } } +// LCOV_EXCL_STOP Vector3 get( gpu::GPUField const *vec_field, @@ -396,7 +439,7 @@ void initialize( CellInterval ci = vec_field->xyzSizeWithGhostLayer(); thrust::device_vector dev_data(vec.data(), vec.data() + 3u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set); + auto kernel = gpu::make_kernel(kernel_broadcast); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*vec_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); @@ -408,7 +451,7 @@ void add_to_all( CellInterval ci = vec_field->xyzSizeWithGhostLayer(); thrust::device_vector dev_data(vec.data(), vec.data() + 3u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_add); + auto kernel = gpu::make_kernel(kernel_broadcast_add); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*vec_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); @@ -419,7 +462,7 @@ std::vector get( CellInterval const &ci) { thrust::device_vector dev_data(ci.numCells() * 3u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_get_interval); + auto kernel = gpu::make_kernel(kernel_get); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*vec_field, ci)); kernel.addParam(dev_data_ptr); kernel(); @@ -434,7 +477,7 @@ void set( CellInterval const &ci) { thrust::device_vector dev_data(values.begin(), values.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set_interval); + auto kernel = gpu::make_kernel(kernel_set); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*vec_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); @@ -442,6 +485,7 @@ void set( } // namespace Vector namespace Interpolation { +// LCOV_EXCL_START /** @brief Calculate interpolation weights. */ static __forceinline__ __device__ void calculate_weights( double const *RESTRICT const pos, @@ -535,6 +579,7 @@ __global__ void kernel_set( } } } +// LCOV_EXCL_STOP static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, @@ -589,6 +634,7 @@ void set( } // namespace Interpolation namespace Equilibrium { +// LCOV_EXCL_START __device__ void kernel_set_device( gpu::FieldAccessor pdf, double const *RESTRICT const u, @@ -614,15 +660,18 @@ __device__ void kernel_set_device( pdf.get(17u) = rho * -0.083333333333333329 * u[0] + rho * -0.083333333333333329 * u[2] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[2] * u[2]) + rho * 0.25 * u[0] * u[2]; pdf.get(18u) = rho * -0.083333333333333329 * u[2] + rho * -0.25 * u[0] * u[2] + rho * 0.027777777777777776 + rho * 0.083333333333333329 * u[0] + rho * 0.083333333333333329 * (u[0] * u[0]) + rho * 0.083333333333333329 * (u[2] * u[2]); } +// LCOV_EXCL_STOP } // namespace Equilibrium namespace Density { +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor pdf, - double *RESTRICT const out) { + double *RESTRICT rho_out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u); pdf.set(blockIdx, threadIdx); + rho_out += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(1u)); double const f_0 = pdf.get(0u); double const f_1 = pdf.get(1u); double const f_2 = pdf.get(2u); @@ -646,16 +695,17 @@ __global__ void kernel_get( const double vel1Term = f_1 + f_11 + f_15 + f_7; const double vel2Term = f_12 + f_13 + f_5; const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; - out[offset] = rho; + rho_out[0u] = rho; } } __global__ void kernel_set( gpu::FieldAccessor pdf, - double const *RESTRICT const rho_in) { + double const *RESTRICT rho_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u); pdf.set(blockIdx, threadIdx); + rho_in += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(1u)); double const f_0 = pdf.get(0u); double const f_1 = pdf.get(1u); double const f_2 = pdf.get(2u); @@ -684,12 +734,13 @@ __global__ void kernel_set( const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; // calculate current velocity (before density change) - double const conversion = double(1) / rho; - double const u_old[3] = {momdensity_0 * conversion, momdensity_1 * conversion, momdensity_2 * conversion}; + double const rho_inv = double{1} / rho; + double const u_old[3] = {momdensity_0 * rho_inv, momdensity_1 * rho_inv, momdensity_2 * rho_inv}; - Equilibrium::kernel_set_device(pdf, u_old, rho_in[offset]); + Equilibrium::kernel_set_device(pdf, u_old, rho_in[0u]); } } +// LCOV_EXCL_STOP double get( gpu::GPUField const *pdf_field, @@ -727,7 +778,7 @@ std::vector get( kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); kernel.addParam(dev_data_ptr); kernel(); - std::vector out(ci.numCells()); + std::vector out(dev_data.size()); thrust::copy(dev_data.begin(), dev_data.end(), out.begin()); return out; } @@ -746,16 +797,63 @@ void set( } // namespace Density namespace Velocity { +// LCOV_EXCL_START +__global__ void kernel_get( + gpu::FieldAccessor pdf, + gpu::FieldAccessor force, + double *RESTRICT u_out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); + pdf.set(blockIdx, threadIdx); + force.set(blockIdx, threadIdx); + u_out += offset; + if (pdf.isValidPosition()) { + double const f_0 = pdf.get(0u); + double const f_1 = pdf.get(1u); + double const f_2 = pdf.get(2u); + double const f_3 = pdf.get(3u); + double const f_4 = pdf.get(4u); + double const f_5 = pdf.get(5u); + double const f_6 = pdf.get(6u); + double const f_7 = pdf.get(7u); + double const f_8 = pdf.get(8u); + double const f_9 = pdf.get(9u); + double const f_10 = pdf.get(10u); + double const f_11 = pdf.get(11u); + double const f_12 = pdf.get(12u); + double const f_13 = pdf.get(13u); + double const f_14 = pdf.get(14u); + double const f_15 = pdf.get(15u); + double const f_16 = pdf.get(16u); + double const f_17 = pdf.get(17u); + double const f_18 = pdf.get(18u); + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const double md_0 = force.get(0) * 0.50000000000000000 + momdensity_0; + const double md_1 = force.get(1) * 0.50000000000000000 + momdensity_1; + const double md_2 = force.get(2) * 0.50000000000000000 + momdensity_2; + auto const rho_inv = double{1} / rho; + u_out[0u] = md_0 * rho_inv; + u_out[1u] = md_1 * rho_inv; + u_out[2u] = md_2 * rho_inv; + } +} + __global__ void kernel_set( gpu::FieldAccessor pdf, + gpu::FieldAccessor velocity, gpu::FieldAccessor force, - double const *RESTRICT const u_in) { + double const *RESTRICT u_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); pdf.set(blockIdx, threadIdx); + velocity.set(blockIdx, threadIdx); force.set(blockIdx, threadIdx); + u_in += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(3u)); - uint const bufsize = 3u; - double const *RESTRICT const u = u_in + bufsize * offset; double const f_0 = pdf.get(0u); double const f_1 = pdf.get(1u); double const f_2 = pdf.get(2u); @@ -775,6 +873,7 @@ __global__ void kernel_set( double const f_16 = pdf.get(16u); double const f_17 = pdf.get(17u); double const f_18 = pdf.get(18u); + double const *RESTRICT const u = u_in; const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; const double vel1Term = f_1 + f_11 + f_15 + f_7; const double vel2Term = f_12 + f_13 + f_5; @@ -782,15 +881,54 @@ __global__ void kernel_set( const double u_0 = -force.get(0) * 0.50000000000000000 / rho + u[0]; const double u_1 = -force.get(1) * 0.50000000000000000 / rho + u[1]; const double u_2 = -force.get(2) * 0.50000000000000000 / rho + u[2]; + velocity.get(0u) = u_in[0u]; + velocity.get(1u) = u_in[1u]; + velocity.get(2u) = u_in[2u]; + double u_new[3] = {u_0, u_1, u_2}; Equilibrium::kernel_set_device(pdf, u_new, rho); } } +// LCOV_EXCL_STOP + +Vector3 get( + gpu::GPUField const *pdf_field, + gpu::GPUField const *force_field, + Cell const &cell) { + CellInterval ci(cell, cell); + thrust::device_vector dev_data(3u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + Vector3 vec; + thrust::copy(dev_data.begin(), dev_data.end(), vec.data()); + return vec; +} + +std::vector get( + gpu::GPUField const *pdf_field, + gpu::GPUField const *force_field, + CellInterval const &ci) { + thrust::device_vector dev_data(3u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + std::vector out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + return out; +} void set( gpu::GPUField *pdf_field, - gpu::GPUField *force_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, Vector3 const &u, Cell const &cell) { CellInterval ci(cell, cell); @@ -798,22 +936,127 @@ void set( auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); auto kernel = gpu::make_kernel(kernel_set); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} + +void set( + gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::vector const &values, + CellInterval const &ci) { + thrust::device_vector dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); } } // namespace Velocity +namespace Force { +// LCOV_EXCL_START +__global__ void kernel_set( + gpu::FieldAccessor pdf, + gpu::FieldAccessor velocity, + gpu::FieldAccessor force, + double const *RESTRICT f_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); + pdf.set(blockIdx, threadIdx); + velocity.set(blockIdx, threadIdx); + force.set(blockIdx, threadIdx); + f_in += offset; + if (pdf.isValidPosition()) { + double const f_0 = pdf.get(0u); + double const f_1 = pdf.get(1u); + double const f_2 = pdf.get(2u); + double const f_3 = pdf.get(3u); + double const f_4 = pdf.get(4u); + double const f_5 = pdf.get(5u); + double const f_6 = pdf.get(6u); + double const f_7 = pdf.get(7u); + double const f_8 = pdf.get(8u); + double const f_9 = pdf.get(9u); + double const f_10 = pdf.get(10u); + double const f_11 = pdf.get(11u); + double const f_12 = pdf.get(12u); + double const f_13 = pdf.get(13u); + double const f_14 = pdf.get(14u); + double const f_15 = pdf.get(15u); + double const f_16 = pdf.get(16u); + double const f_17 = pdf.get(17u); + double const f_18 = pdf.get(18u); + const double vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const double momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const double vel1Term = f_1 + f_11 + f_15 + f_7; + const double momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const double vel2Term = f_12 + f_13 + f_5; + const double momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const double rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const double md_0 = f_in[0u] * 0.50000000000000000 + momdensity_0; + const double md_1 = f_in[1u] * 0.50000000000000000 + momdensity_1; + const double md_2 = f_in[2u] * 0.50000000000000000 + momdensity_2; + auto const rho_inv = double{1} / rho; + + force.get(0u) = f_in[0u]; + force.get(1u) = f_in[1u]; + force.get(2u) = f_in[2u]; + + velocity.get(0u) = md_0 * rho_inv; + velocity.get(1u) = md_1 * rho_inv; + velocity.get(2u) = md_2 * rho_inv; + } +} +// LCOV_EXCL_STOP + +void set(gpu::GPUField const *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField *force_field, + Vector3 const &u, + Cell const &cell) { + CellInterval ci(cell, cell); + thrust::device_vector dev_data(u.data(), u.data() + 3u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} + +void set(gpu::GPUField const *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField *force_field, + std::vector const &values, + CellInterval const &ci) { + thrust::device_vector dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} +} // namespace Force + namespace MomentumDensity { +// LCOV_EXCL_START __global__ void kernel_sum( gpu::FieldAccessor pdf, gpu::FieldAccessor force, - double *RESTRICT const out) { + double *RESTRICT out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); pdf.set(blockIdx, threadIdx); force.set(blockIdx, threadIdx); + out += offset; if (pdf.isValidPosition()) { - uint const bufsize = 3u; - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, bufsize); double const f_0 = pdf.get(0u); double const f_1 = pdf.get(1u); double const f_2 = pdf.get(2u); @@ -843,11 +1086,12 @@ __global__ void kernel_sum( const double md_0 = force.get(0) * 0.50000000000000000 + momdensity_0; const double md_1 = force.get(1) * 0.50000000000000000 + momdensity_1; const double md_2 = force.get(2) * 0.50000000000000000 + momdensity_2; - out[bufsize * offset + 0u] += md_0; - out[bufsize * offset + 1u] += md_1; - out[bufsize * offset + 2u] += md_2; + out[0u] += md_0; + out[1u] += md_1; + out[2u] += md_2; } } +// LCOV_EXCL_STOP Vector3 reduce( gpu::GPUField const *pdf_field, @@ -870,13 +1114,14 @@ Vector3 reduce( } // namespace MomentumDensity namespace PressureTensor { +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor pdf, - double *RESTRICT const out) { + double *RESTRICT p_out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 9u); pdf.set(blockIdx, threadIdx); + p_out += offset; if (pdf.isValidPosition()) { - uint const bufsize = 9u; - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, bufsize); double const f_0 = pdf.get(0u); double const f_1 = pdf.get(1u); double const f_2 = pdf.get(2u); @@ -905,19 +1150,18 @@ __global__ void kernel_get( const double p_6 = -f_13 + f_14 + f_17 - f_18; const double p_7 = f_11 - f_12 - f_15 + f_16; const double p_8 = f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6; - out[bufsize * offset + 0u] = p_0; - out[bufsize * offset + 1u] = p_1; - out[bufsize * offset + 2u] = p_2; - - out[bufsize * offset + 3u] = p_3; - out[bufsize * offset + 4u] = p_4; - out[bufsize * offset + 5u] = p_5; - - out[bufsize * offset + 6u] = p_6; - out[bufsize * offset + 7u] = p_7; - out[bufsize * offset + 8u] = p_8; + p_out[0u] = p_0; + p_out[1u] = p_1; + p_out[2u] = p_2; + p_out[3u] = p_3; + p_out[4u] = p_4; + p_out[5u] = p_5; + p_out[6u] = p_6; + p_out[7u] = p_7; + p_out[8u] = p_8; } } +// LCOV_EXCL_STOP Matrix3 get( gpu::GPUField const *pdf_field, @@ -930,7 +1174,21 @@ Matrix3 get( kernel.addParam(dev_data_ptr); kernel(); Matrix3 out; - thrust::copy(dev_data.begin(), dev_data.begin() + 9u, out.data()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + return out; +} + +std::vector get( + gpu::GPUField const *pdf_field, + CellInterval const &ci) { + thrust::device_vector dev_data(9u * ci.numCells()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + std::vector out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); return out; } } // namespace PressureTensor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh index 6b3723ab30..6ac754f263 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh @@ -43,18 +43,6 @@ #include #include -#ifdef WALBERLA_CXX_COMPILER_IS_GNU -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#endif - -#ifdef WALBERLA_CXX_COMPILER_IS_CLANG -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" -#endif - namespace walberla { namespace lbm { namespace accessor { @@ -66,6 +54,11 @@ std::array get(gpu::GPUField const *pdf_field, /** @brief Set populations on a single cell. */ void set(gpu::GPUField *pdf_field, std::array const &pop, Cell const &cell); +/** @brief Set populations and recalculate velocities on a single cell. */ +void set(gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::array const &pop, Cell const &cell); /** @brief Initialize all cells with the same value. */ void initialize(gpu::GPUField *pdf_field, std::array const &pop); @@ -75,6 +68,11 @@ std::vector get(gpu::GPUField const *pdf_field, /** @brief Set populations on a cell interval. */ void set(gpu::GPUField *pdf_field, std::vector const &values, CellInterval const &ci); +/** @brief Set populations and recalculate velocities on a cell interval. */ +void set(gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::vector const &values, CellInterval const &ci); } // namespace Population namespace Vector { @@ -116,10 +114,32 @@ void set(gpu::GPUField *pdf_field, std::vector const &values, } // namespace Density namespace Velocity { -void set(gpu::GPUField *pdf_field, gpu::GPUField *force_field, - Vector3 const &u, Cell const &cell); +Vector3 get(gpu::GPUField const *pdf_field, + gpu::GPUField const *force_field, Cell const &cell); +std::vector get(gpu::GPUField const *pdf_field, + gpu::GPUField const *force_field, + CellInterval const &ci); +void set(gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, Vector3 const &u, + Cell const &cell); +void set(gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::vector const &values, CellInterval const &ci); } // namespace Velocity +namespace Force { +void set(gpu::GPUField const *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField *force_field, Vector3 const &u, + Cell const &cell); +void set(gpu::GPUField const *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField *force_field, std::vector const &values, + CellInterval const &ci); +} // namespace Force + namespace DensityAndVelocity { std::tuple> get(gpu::GPUField const *pdf_field, @@ -141,16 +161,10 @@ Vector3 reduce(gpu::GPUField const *pdf_field, namespace PressureTensor { Matrix3 get(gpu::GPUField const *pdf_field, Cell const &cell); +std::vector get(gpu::GPUField const *pdf_field, + CellInterval const &ci); } // namespace PressureTensor } // namespace accessor } // namespace lbm } // namespace walberla - -#ifdef WALBERLA_CXX_COMPILER_IS_GNU -#pragma GCC diagnostic pop -#endif - -#ifdef WALBERLA_CXX_COMPILER_IS_CLANG -#pragma clang diagnostic pop -#endif diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h index 689fe936be..a7711c4f78 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h @@ -41,19 +41,18 @@ #include #include +#include #include #include #ifdef WALBERLA_CXX_COMPILER_IS_GNU #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" #endif #ifdef WALBERLA_CXX_COMPILER_IS_CLANG #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #endif namespace walberla { @@ -61,8 +60,8 @@ namespace lbm { namespace accessor { namespace Population { -inline std::array -get(GhostLayerField const *pdf_field, Cell const &cell) { +inline auto get(GhostLayerField const *pdf_field, + Cell const &cell) { float const &xyz0 = pdf_field->get(cell, uint_t{0u}); std::array pop; pop[0u] = pdf_field->getF(&xyz0, uint_t{0u}); @@ -111,6 +110,54 @@ inline void set(GhostLayerField *pdf_field, pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u]; } +inline void set(GhostLayerField *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField const *force_field, + std::array const &pop, Cell const &cell) { + auto &xyz0 = pdf_field->get(cell, uint_t{0u}); + const float f_0 = pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u]; + const float f_1 = pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u]; + const float f_2 = pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u]; + const float f_3 = pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u]; + const float f_4 = pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u]; + const float f_5 = pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u]; + const float f_6 = pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u]; + const float f_7 = pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u]; + const float f_8 = pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u]; + const float f_9 = pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u]; + const float f_10 = pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u]; + const float f_11 = pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u]; + const float f_12 = pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u]; + const float f_13 = pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u]; + const float f_14 = pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u]; + const float f_15 = pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u]; + const float f_16 = pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u]; + const float f_17 = pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u]; + const float f_18 = pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u]; + const auto x = cell.x(); + const auto y = cell.y(); + const auto z = cell.z(); + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const float md_0 = + force_field->get(x, y, z, 0) * 0.50000000000000000f + momdensity_0; + const float md_1 = + force_field->get(x, y, z, 1) * 0.50000000000000000f + momdensity_1; + const float md_2 = + force_field->get(x, y, z, 2) * 0.50000000000000000f + momdensity_2; + const auto rho_inv = float{1} / rho; + velocity_field->get(cell, uint_t{0u}) = md_0 * rho_inv; + velocity_field->get(cell, uint_t{1u}) = md_1 * rho_inv; + velocity_field->get(cell, uint_t{2u}) = md_2 * rho_inv; +} + inline void initialize(GhostLayerField *pdf_field, std::array const &pop) { WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdf_field, { @@ -137,9 +184,8 @@ inline void initialize(GhostLayerField *pdf_field, }); } -inline std::vector -get(GhostLayerField const *pdf_field, - CellInterval const &ci) { +inline auto get(GhostLayerField const *pdf_field, + CellInterval const &ci) { std::vector out; out.reserve(ci.numCells() * uint_t(19u)); for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { @@ -174,31 +220,86 @@ get(GhostLayerField const *pdf_field, inline void set(GhostLayerField *pdf_field, std::vector const &values, CellInterval const &ci) { assert(uint_c(values.size()) == ci.numCells() * uint_t(19u)); - auto values_ptr = values.data(); + auto pop = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + float &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u]; + pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u]; + pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u]; + pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u]; + pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u]; + pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u]; + pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u]; + pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u]; + pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u]; + pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u]; + pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u]; + pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u]; + pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u]; + pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u]; + pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u]; + pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u]; + pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u]; + pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u]; + pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u]; + std::advance(pop, 19); + } + } + } +} + +inline void set(GhostLayerField *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField const *force_field, + std::vector const &values, CellInterval const &ci) { + assert(uint_c(values.size()) == ci.numCells() * uint_t(19u)); + auto pop = values.data(); for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { float &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); - pdf_field->getF(&xyz0, uint_t{0u}) = values_ptr[0u]; - pdf_field->getF(&xyz0, uint_t{1u}) = values_ptr[1u]; - pdf_field->getF(&xyz0, uint_t{2u}) = values_ptr[2u]; - pdf_field->getF(&xyz0, uint_t{3u}) = values_ptr[3u]; - pdf_field->getF(&xyz0, uint_t{4u}) = values_ptr[4u]; - pdf_field->getF(&xyz0, uint_t{5u}) = values_ptr[5u]; - pdf_field->getF(&xyz0, uint_t{6u}) = values_ptr[6u]; - pdf_field->getF(&xyz0, uint_t{7u}) = values_ptr[7u]; - pdf_field->getF(&xyz0, uint_t{8u}) = values_ptr[8u]; - pdf_field->getF(&xyz0, uint_t{9u}) = values_ptr[9u]; - pdf_field->getF(&xyz0, uint_t{10u}) = values_ptr[10u]; - pdf_field->getF(&xyz0, uint_t{11u}) = values_ptr[11u]; - pdf_field->getF(&xyz0, uint_t{12u}) = values_ptr[12u]; - pdf_field->getF(&xyz0, uint_t{13u}) = values_ptr[13u]; - pdf_field->getF(&xyz0, uint_t{14u}) = values_ptr[14u]; - pdf_field->getF(&xyz0, uint_t{15u}) = values_ptr[15u]; - pdf_field->getF(&xyz0, uint_t{16u}) = values_ptr[16u]; - pdf_field->getF(&xyz0, uint_t{17u}) = values_ptr[17u]; - pdf_field->getF(&xyz0, uint_t{18u}) = values_ptr[18u]; - values_ptr += 19u; + const float f_0 = pdf_field->getF(&xyz0, uint_t{0u}) = pop[0u]; + const float f_1 = pdf_field->getF(&xyz0, uint_t{1u}) = pop[1u]; + const float f_2 = pdf_field->getF(&xyz0, uint_t{2u}) = pop[2u]; + const float f_3 = pdf_field->getF(&xyz0, uint_t{3u}) = pop[3u]; + const float f_4 = pdf_field->getF(&xyz0, uint_t{4u}) = pop[4u]; + const float f_5 = pdf_field->getF(&xyz0, uint_t{5u}) = pop[5u]; + const float f_6 = pdf_field->getF(&xyz0, uint_t{6u}) = pop[6u]; + const float f_7 = pdf_field->getF(&xyz0, uint_t{7u}) = pop[7u]; + const float f_8 = pdf_field->getF(&xyz0, uint_t{8u}) = pop[8u]; + const float f_9 = pdf_field->getF(&xyz0, uint_t{9u}) = pop[9u]; + const float f_10 = pdf_field->getF(&xyz0, uint_t{10u}) = pop[10u]; + const float f_11 = pdf_field->getF(&xyz0, uint_t{11u}) = pop[11u]; + const float f_12 = pdf_field->getF(&xyz0, uint_t{12u}) = pop[12u]; + const float f_13 = pdf_field->getF(&xyz0, uint_t{13u}) = pop[13u]; + const float f_14 = pdf_field->getF(&xyz0, uint_t{14u}) = pop[14u]; + const float f_15 = pdf_field->getF(&xyz0, uint_t{15u}) = pop[15u]; + const float f_16 = pdf_field->getF(&xyz0, uint_t{16u}) = pop[16u]; + const float f_17 = pdf_field->getF(&xyz0, uint_t{17u}) = pop[17u]; + const float f_18 = pdf_field->getF(&xyz0, uint_t{18u}) = pop[18u]; + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = + -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const float md_0 = + force_field->get(x, y, z, 0) * 0.50000000000000000f + momdensity_0; + const float md_1 = + force_field->get(x, y, z, 1) * 0.50000000000000000f + momdensity_1; + const float md_2 = + force_field->get(x, y, z, 2) * 0.50000000000000000f + momdensity_2; + const auto rho_inv = float{1} / rho; + velocity_field->get(x, y, z, uint_t{0u}) = md_0 * rho_inv; + velocity_field->get(x, y, z, uint_t{1u}) = md_1 * rho_inv; + velocity_field->get(x, y, z, uint_t{2u}) = md_2 * rho_inv; + std::advance(pop, 19); } } } @@ -206,8 +307,8 @@ inline void set(GhostLayerField *pdf_field, } // namespace Population namespace Vector { -inline Vector3 get(GhostLayerField const *vec_field, - Cell const &cell) { +inline auto get(GhostLayerField const *vec_field, + Cell const &cell) { const float &xyz0 = vec_field->get(cell, uint_t{0u}); Vector3 vec; vec[0] = vec_field->getF(&xyz0, uint_t{0u}); @@ -252,9 +353,8 @@ inline void add_to_all(GhostLayerField *vec_field, }); } -inline std::vector -get(GhostLayerField const *vec_field, - CellInterval const &ci) { +inline auto get(GhostLayerField const *vec_field, + CellInterval const &ci) { std::vector out; out.reserve(ci.numCells() * uint_t(3u)); for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { @@ -281,7 +381,7 @@ inline void set(GhostLayerField *vec_field, vec_field->getF(&xyz0, uint_t{0u}) = values_ptr[0u]; vec_field->getF(&xyz0, uint_t{1u}) = values_ptr[1u]; vec_field->getF(&xyz0, uint_t{2u}) = values_ptr[2u]; - values_ptr += 3u; + std::advance(values_ptr, 3); } } } @@ -290,8 +390,8 @@ inline void set(GhostLayerField *vec_field, namespace EquilibriumDistribution { inline float get(stencil::Direction const direction, - Vector3 const &u = Vector3(float(0.0)), - float rho = float(1.0)) { + Vector3 const &u = Vector3(float{0}), + float rho = float{1}) { using namespace stencil; switch (direction) { @@ -571,7 +671,7 @@ inline void set(GhostLayerField *pdf_field, vel1Term + vel2Term; // calculate current velocity (before density change) - const float conversion = float(1) / rho; + const float conversion = float{1} / rho; Vector3 velocity; velocity[0u] = momdensity_0 * conversion; velocity[1u] = momdensity_1 * conversion; @@ -659,7 +759,7 @@ inline void set(GhostLayerField *pdf_field, vel1Term + vel2Term; // calculate current velocity (before density change) - const float conversion = float(1) / rho; + const float conversion = float{1} / rho; Vector3 velocity; velocity[0u] = momdensity_0 * conversion; velocity[1u] = momdensity_1 * conversion; @@ -674,7 +774,108 @@ inline void set(GhostLayerField *pdf_field, } // namespace Density namespace Velocity { +inline auto get(GhostLayerField const *pdf_field, + GhostLayerField const *force_field, + Cell const &cell) { + const float &xyz0 = pdf_field->get(cell, uint_t{0u}); + const float f_0 = pdf_field->getF(&xyz0, uint_t{0u}); + const float f_1 = pdf_field->getF(&xyz0, uint_t{1u}); + const float f_2 = pdf_field->getF(&xyz0, uint_t{2u}); + const float f_3 = pdf_field->getF(&xyz0, uint_t{3u}); + const float f_4 = pdf_field->getF(&xyz0, uint_t{4u}); + const float f_5 = pdf_field->getF(&xyz0, uint_t{5u}); + const float f_6 = pdf_field->getF(&xyz0, uint_t{6u}); + const float f_7 = pdf_field->getF(&xyz0, uint_t{7u}); + const float f_8 = pdf_field->getF(&xyz0, uint_t{8u}); + const float f_9 = pdf_field->getF(&xyz0, uint_t{9u}); + const float f_10 = pdf_field->getF(&xyz0, uint_t{10u}); + const float f_11 = pdf_field->getF(&xyz0, uint_t{11u}); + const float f_12 = pdf_field->getF(&xyz0, uint_t{12u}); + const float f_13 = pdf_field->getF(&xyz0, uint_t{13u}); + const float f_14 = pdf_field->getF(&xyz0, uint_t{14u}); + const float f_15 = pdf_field->getF(&xyz0, uint_t{15u}); + const float f_16 = pdf_field->getF(&xyz0, uint_t{16u}); + const float f_17 = pdf_field->getF(&xyz0, uint_t{17u}); + const float f_18 = pdf_field->getF(&xyz0, uint_t{18u}); + const auto x = cell.x(); + const auto y = cell.y(); + const auto z = cell.z(); + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const float md_0 = + force_field->get(x, y, z, 0) * 0.50000000000000000f + momdensity_0; + const float md_1 = + force_field->get(x, y, z, 1) * 0.50000000000000000f + momdensity_1; + const float md_2 = + force_field->get(x, y, z, 2) * 0.50000000000000000f + momdensity_2; + const float rho_inv = float{1} / rho; + + return Vector3(md_0 * rho_inv, md_1 * rho_inv, md_2 * rho_inv); +} + +inline auto get(GhostLayerField const *pdf_field, + GhostLayerField const *force_field, + CellInterval const &ci) { + std::vector out; + out.reserve(ci.numCells() * uint_t(3u)); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const float &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + const float f_0 = pdf_field->getF(&xyz0, uint_t{0u}); + const float f_1 = pdf_field->getF(&xyz0, uint_t{1u}); + const float f_2 = pdf_field->getF(&xyz0, uint_t{2u}); + const float f_3 = pdf_field->getF(&xyz0, uint_t{3u}); + const float f_4 = pdf_field->getF(&xyz0, uint_t{4u}); + const float f_5 = pdf_field->getF(&xyz0, uint_t{5u}); + const float f_6 = pdf_field->getF(&xyz0, uint_t{6u}); + const float f_7 = pdf_field->getF(&xyz0, uint_t{7u}); + const float f_8 = pdf_field->getF(&xyz0, uint_t{8u}); + const float f_9 = pdf_field->getF(&xyz0, uint_t{9u}); + const float f_10 = pdf_field->getF(&xyz0, uint_t{10u}); + const float f_11 = pdf_field->getF(&xyz0, uint_t{11u}); + const float f_12 = pdf_field->getF(&xyz0, uint_t{12u}); + const float f_13 = pdf_field->getF(&xyz0, uint_t{13u}); + const float f_14 = pdf_field->getF(&xyz0, uint_t{14u}); + const float f_15 = pdf_field->getF(&xyz0, uint_t{15u}); + const float f_16 = pdf_field->getF(&xyz0, uint_t{16u}); + const float f_17 = pdf_field->getF(&xyz0, uint_t{17u}); + const float f_18 = pdf_field->getF(&xyz0, uint_t{18u}); + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = + -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const float md_0 = + force_field->get(x, y, z, 0) * 0.50000000000000000f + momdensity_0; + const float md_1 = + force_field->get(x, y, z, 1) * 0.50000000000000000f + momdensity_1; + const float md_2 = + force_field->get(x, y, z, 2) * 0.50000000000000000f + momdensity_2; + const float rho_inv = float{1} / rho; + out.emplace_back(md_0 * rho_inv); + out.emplace_back(md_1 * rho_inv); + out.emplace_back(md_2 * rho_inv); + } + } + } + return out; +} + inline void set(GhostLayerField *pdf_field, + GhostLayerField *velocity_field, GhostLayerField const *force_field, Vector3 const &u, Cell const &cell) { const float &xyz0 = pdf_field->get(cell, uint_t{0u}); @@ -712,15 +913,183 @@ inline void set(GhostLayerField *pdf_field, -force_field->get(x, y, z, 1) * 0.50000000000000000f / rho + u[1]; const float u_2 = -force_field->get(x, y, z, 2) * 0.50000000000000000f / rho + u[2]; + velocity_field->get(x, y, z, uint_t{0u}) = u[0u]; + velocity_field->get(x, y, z, uint_t{1u}) = u[1u]; + velocity_field->get(x, y, z, uint_t{2u}) = u[2u]; Equilibrium::set(pdf_field, Vector3(u_0, u_1, u_2), rho, cell); } + +inline void set(GhostLayerField *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField const *force_field, + std::vector const &values, CellInterval const &ci) { + assert(uint_c(values.size()) == ci.numCells() * uint_t(3u)); + auto u = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + float &pdf_xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + float &vel_xyz0 = velocity_field->get(x, y, z, uint_t{0u}); + const float f_0 = pdf_field->getF(&pdf_xyz0, uint_t{0u}); + const float f_1 = pdf_field->getF(&pdf_xyz0, uint_t{1u}); + const float f_2 = pdf_field->getF(&pdf_xyz0, uint_t{2u}); + const float f_3 = pdf_field->getF(&pdf_xyz0, uint_t{3u}); + const float f_4 = pdf_field->getF(&pdf_xyz0, uint_t{4u}); + const float f_5 = pdf_field->getF(&pdf_xyz0, uint_t{5u}); + const float f_6 = pdf_field->getF(&pdf_xyz0, uint_t{6u}); + const float f_7 = pdf_field->getF(&pdf_xyz0, uint_t{7u}); + const float f_8 = pdf_field->getF(&pdf_xyz0, uint_t{8u}); + const float f_9 = pdf_field->getF(&pdf_xyz0, uint_t{9u}); + const float f_10 = pdf_field->getF(&pdf_xyz0, uint_t{10u}); + const float f_11 = pdf_field->getF(&pdf_xyz0, uint_t{11u}); + const float f_12 = pdf_field->getF(&pdf_xyz0, uint_t{12u}); + const float f_13 = pdf_field->getF(&pdf_xyz0, uint_t{13u}); + const float f_14 = pdf_field->getF(&pdf_xyz0, uint_t{14u}); + const float f_15 = pdf_field->getF(&pdf_xyz0, uint_t{15u}); + const float f_16 = pdf_field->getF(&pdf_xyz0, uint_t{16u}); + const float f_17 = pdf_field->getF(&pdf_xyz0, uint_t{17u}); + const float f_18 = pdf_field->getF(&pdf_xyz0, uint_t{18u}); + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float vel2Term = f_12 + f_13 + f_5; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + + const float u_0 = + -force_field->get(x, y, z, 0) * 0.50000000000000000f / rho + u[0]; + const float u_1 = + -force_field->get(x, y, z, 1) * 0.50000000000000000f / rho + u[1]; + const float u_2 = + -force_field->get(x, y, z, 2) * 0.50000000000000000f / rho + u[2]; + velocity_field->getF(&vel_xyz0, uint_t{0u}) = u[0u]; + velocity_field->getF(&vel_xyz0, uint_t{1u}) = u[1u]; + velocity_field->getF(&vel_xyz0, uint_t{2u}) = u[2u]; + + std::advance(u, 3); + + Equilibrium::set(pdf_field, Vector3(u_0, u_1, u_2), rho, + Cell{x, y, z}); + } + } + } +} } // namespace Velocity +namespace Force { +inline void set(GhostLayerField const *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField *force_field, + Vector3 const &force, Cell const &cell) { + float const &pdf_xyz0 = pdf_field->get(cell, uint_t{0u}); + float &vel_xyz0 = velocity_field->get(cell, uint_t{0u}); + float &laf_xyz0 = force_field->get(cell, uint_t{0u}); + const float f_0 = pdf_field->getF(&pdf_xyz0, uint_t{0u}); + const float f_1 = pdf_field->getF(&pdf_xyz0, uint_t{1u}); + const float f_2 = pdf_field->getF(&pdf_xyz0, uint_t{2u}); + const float f_3 = pdf_field->getF(&pdf_xyz0, uint_t{3u}); + const float f_4 = pdf_field->getF(&pdf_xyz0, uint_t{4u}); + const float f_5 = pdf_field->getF(&pdf_xyz0, uint_t{5u}); + const float f_6 = pdf_field->getF(&pdf_xyz0, uint_t{6u}); + const float f_7 = pdf_field->getF(&pdf_xyz0, uint_t{7u}); + const float f_8 = pdf_field->getF(&pdf_xyz0, uint_t{8u}); + const float f_9 = pdf_field->getF(&pdf_xyz0, uint_t{9u}); + const float f_10 = pdf_field->getF(&pdf_xyz0, uint_t{10u}); + const float f_11 = pdf_field->getF(&pdf_xyz0, uint_t{11u}); + const float f_12 = pdf_field->getF(&pdf_xyz0, uint_t{12u}); + const float f_13 = pdf_field->getF(&pdf_xyz0, uint_t{13u}); + const float f_14 = pdf_field->getF(&pdf_xyz0, uint_t{14u}); + const float f_15 = pdf_field->getF(&pdf_xyz0, uint_t{15u}); + const float f_16 = pdf_field->getF(&pdf_xyz0, uint_t{16u}); + const float f_17 = pdf_field->getF(&pdf_xyz0, uint_t{17u}); + const float f_18 = pdf_field->getF(&pdf_xyz0, uint_t{18u}); + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const float md_0 = force[0u] * 0.50000000000000000f + momdensity_0; + const float md_1 = force[1u] * 0.50000000000000000f + momdensity_1; + const float md_2 = force[2u] * 0.50000000000000000f + momdensity_2; + auto const rho_inv = float{1} / rho; + + force_field->getF(&laf_xyz0, uint_t{0u}) = force[0u]; + force_field->getF(&laf_xyz0, uint_t{1u}) = force[1u]; + force_field->getF(&laf_xyz0, uint_t{2u}) = force[2u]; + + velocity_field->getF(&vel_xyz0, uint_t{0u}) = md_0 * rho_inv; + velocity_field->getF(&vel_xyz0, uint_t{1u}) = md_1 * rho_inv; + velocity_field->getF(&vel_xyz0, uint_t{2u}) = md_2 * rho_inv; +} + +inline void set(GhostLayerField const *pdf_field, + GhostLayerField *velocity_field, + GhostLayerField *force_field, + std::vector const &values, CellInterval const &ci) { + assert(uint_c(values.size()) == ci.numCells() * uint_t(3u)); + auto force = values.data(); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + float const &pdf_xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + float &vel_xyz0 = velocity_field->get(x, y, z, uint_t{0u}); + float &laf_xyz0 = force_field->get(x, y, z, uint_t{0u}); + const float f_0 = pdf_field->getF(&pdf_xyz0, uint_t{0u}); + const float f_1 = pdf_field->getF(&pdf_xyz0, uint_t{1u}); + const float f_2 = pdf_field->getF(&pdf_xyz0, uint_t{2u}); + const float f_3 = pdf_field->getF(&pdf_xyz0, uint_t{3u}); + const float f_4 = pdf_field->getF(&pdf_xyz0, uint_t{4u}); + const float f_5 = pdf_field->getF(&pdf_xyz0, uint_t{5u}); + const float f_6 = pdf_field->getF(&pdf_xyz0, uint_t{6u}); + const float f_7 = pdf_field->getF(&pdf_xyz0, uint_t{7u}); + const float f_8 = pdf_field->getF(&pdf_xyz0, uint_t{8u}); + const float f_9 = pdf_field->getF(&pdf_xyz0, uint_t{9u}); + const float f_10 = pdf_field->getF(&pdf_xyz0, uint_t{10u}); + const float f_11 = pdf_field->getF(&pdf_xyz0, uint_t{11u}); + const float f_12 = pdf_field->getF(&pdf_xyz0, uint_t{12u}); + const float f_13 = pdf_field->getF(&pdf_xyz0, uint_t{13u}); + const float f_14 = pdf_field->getF(&pdf_xyz0, uint_t{14u}); + const float f_15 = pdf_field->getF(&pdf_xyz0, uint_t{15u}); + const float f_16 = pdf_field->getF(&pdf_xyz0, uint_t{16u}); + const float f_17 = pdf_field->getF(&pdf_xyz0, uint_t{17u}); + const float f_18 = pdf_field->getF(&pdf_xyz0, uint_t{18u}); + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = + -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = + f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + + vel1Term + vel2Term; + const float md_0 = force[0u] * 0.50000000000000000f + momdensity_0; + const float md_1 = force[1u] * 0.50000000000000000f + momdensity_1; + const float md_2 = force[2u] * 0.50000000000000000f + momdensity_2; + auto const rho_inv = float{1} / rho; + + force_field->getF(&laf_xyz0, uint_t{0u}) = force[0u]; + force_field->getF(&laf_xyz0, uint_t{1u}) = force[1u]; + force_field->getF(&laf_xyz0, uint_t{2u}) = force[2u]; + + velocity_field->getF(&vel_xyz0, uint_t{0u}) = md_0 * rho_inv; + velocity_field->getF(&vel_xyz0, uint_t{1u}) = md_1 * rho_inv; + velocity_field->getF(&vel_xyz0, uint_t{2u}) = md_2 * rho_inv; + + std::advance(force, 3); + } + } + } +} +} // namespace Force + namespace MomentumDensity { -inline Vector3 -reduce(GhostLayerField const *pdf_field, - GhostLayerField const *force_field) { +inline auto reduce(GhostLayerField const *pdf_field, + GhostLayerField const *force_field) { Vector3 momentumDensity(float{0}); WALBERLA_FOR_ALL_CELLS_XYZ(pdf_field, { const float &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); @@ -768,8 +1137,8 @@ reduce(GhostLayerField const *pdf_field, } // namespace MomentumDensity namespace PressureTensor { -inline Matrix3 get(GhostLayerField const *pdf_field, - Cell const &cell) { +inline auto get(GhostLayerField const *pdf_field, + Cell const &cell) { const float &xyz0 = pdf_field->get(cell, uint_t{0u}); const float f_0 = pdf_field->getF(&xyz0, uint_t{0u}); const float f_1 = pdf_field->getF(&xyz0, uint_t{1u}); @@ -818,6 +1187,63 @@ inline Matrix3 get(GhostLayerField const *pdf_field, return pressureTensor; } + +inline auto get(GhostLayerField const *pdf_field, + CellInterval const &ci) { + std::vector out; + out.reserve(ci.numCells() * uint_t(9u)); + for (auto x = ci.xMin(); x <= ci.xMax(); ++x) { + for (auto y = ci.yMin(); y <= ci.yMax(); ++y) { + for (auto z = ci.zMin(); z <= ci.zMax(); ++z) { + const float &xyz0 = pdf_field->get(x, y, z, uint_t{0u}); + const float f_0 = pdf_field->getF(&xyz0, uint_t{0u}); + const float f_1 = pdf_field->getF(&xyz0, uint_t{1u}); + const float f_2 = pdf_field->getF(&xyz0, uint_t{2u}); + const float f_3 = pdf_field->getF(&xyz0, uint_t{3u}); + const float f_4 = pdf_field->getF(&xyz0, uint_t{4u}); + const float f_5 = pdf_field->getF(&xyz0, uint_t{5u}); + const float f_6 = pdf_field->getF(&xyz0, uint_t{6u}); + const float f_7 = pdf_field->getF(&xyz0, uint_t{7u}); + const float f_8 = pdf_field->getF(&xyz0, uint_t{8u}); + const float f_9 = pdf_field->getF(&xyz0, uint_t{9u}); + const float f_10 = pdf_field->getF(&xyz0, uint_t{10u}); + const float f_11 = pdf_field->getF(&xyz0, uint_t{11u}); + const float f_12 = pdf_field->getF(&xyz0, uint_t{12u}); + const float f_13 = pdf_field->getF(&xyz0, uint_t{13u}); + const float f_14 = pdf_field->getF(&xyz0, uint_t{14u}); + const float f_15 = pdf_field->getF(&xyz0, uint_t{15u}); + const float f_16 = pdf_field->getF(&xyz0, uint_t{16u}); + const float f_17 = pdf_field->getF(&xyz0, uint_t{17u}); + const float f_18 = pdf_field->getF(&xyz0, uint_t{18u}); + const float p_0 = + f_10 + f_13 + f_14 + f_17 + f_18 + f_3 + f_4 + f_7 + f_8 + f_9; + const float p_1 = -f_10 - f_7 + f_8 + f_9; + const float p_2 = -f_13 + f_14 + f_17 - f_18; + const float p_3 = -f_10 - f_7 + f_8 + f_9; + const float p_4 = + f_1 + f_10 + f_11 + f_12 + f_15 + f_16 + f_2 + f_7 + f_8 + f_9; + const float p_5 = f_11 - f_12 - f_15 + f_16; + const float p_6 = -f_13 + f_14 + f_17 - f_18; + const float p_7 = f_11 - f_12 - f_15 + f_16; + const float p_8 = + f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6; + + out.emplace_back(p_0); + out.emplace_back(p_1); + out.emplace_back(p_2); + + out.emplace_back(p_3); + out.emplace_back(p_4); + out.emplace_back(p_5); + + out.emplace_back(p_6); + out.emplace_back(p_7); + out.emplace_back(p_8); + } + } + } + return out; +} } // namespace PressureTensor } // namespace accessor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cu index 58c74fca3a..e0c8dd8102 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cu +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cu @@ -48,13 +48,8 @@ #if defined(__NVCC__) #define RESTRICT __restrict__ -#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__) #pragma nv_diagnostic push #pragma nv_diag_suppress 177 // unused variable -#else -#pragma push -#pragma diag_suppress 177 // unused variable -#endif #elif defined(__clang__) #if defined(__CUDA__) #if defined(__CUDA_ARCH__) @@ -62,20 +57,17 @@ #define RESTRICT __restrict__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #else // clang compiling CUDA code in host mode #define RESTRICT __restrict__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" #endif #endif #elif defined(__GNUC__) or defined(__GNUG__) #define RESTRICT __restrict__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" #elif defined(_MSC_VER) #define RESTRICT __restrict #else @@ -102,38 +94,13 @@ namespace lbm { namespace accessor { namespace Population { -__global__ void kernel_get_interval( - gpu::FieldAccessor pdf, - float *RESTRICT const pop) { - pdf.set(blockIdx, threadIdx); - if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); - pop[offset + 0u] = pdf.get(0u); - pop[offset + 1u] = pdf.get(1u); - pop[offset + 2u] = pdf.get(2u); - pop[offset + 3u] = pdf.get(3u); - pop[offset + 4u] = pdf.get(4u); - pop[offset + 5u] = pdf.get(5u); - pop[offset + 6u] = pdf.get(6u); - pop[offset + 7u] = pdf.get(7u); - pop[offset + 8u] = pdf.get(8u); - pop[offset + 9u] = pdf.get(9u); - pop[offset + 10u] = pdf.get(10u); - pop[offset + 11u] = pdf.get(11u); - pop[offset + 12u] = pdf.get(12u); - pop[offset + 13u] = pdf.get(13u); - pop[offset + 14u] = pdf.get(14u); - pop[offset + 15u] = pdf.get(15u); - pop[offset + 16u] = pdf.get(16u); - pop[offset + 17u] = pdf.get(17u); - pop[offset + 18u] = pdf.get(18u); - } -} - +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor pdf, - float *RESTRICT const pop) { + float *RESTRICT pop) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); pdf.set(blockIdx, threadIdx); + pop += offset; if (pdf.isValidPosition()) { pop[0u] = pdf.get(0u); pop[1u] = pdf.get(1u); @@ -157,37 +124,38 @@ __global__ void kernel_get( } } -__global__ void kernel_set_interval( +__global__ void kernel_set( gpu::FieldAccessor pdf, - float const *RESTRICT const pop) { + float const *RESTRICT pop) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); pdf.set(blockIdx, threadIdx); + pop += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); - pdf.get(0u) = pop[offset + 0u]; - pdf.get(1u) = pop[offset + 1u]; - pdf.get(2u) = pop[offset + 2u]; - pdf.get(3u) = pop[offset + 3u]; - pdf.get(4u) = pop[offset + 4u]; - pdf.get(5u) = pop[offset + 5u]; - pdf.get(6u) = pop[offset + 6u]; - pdf.get(7u) = pop[offset + 7u]; - pdf.get(8u) = pop[offset + 8u]; - pdf.get(9u) = pop[offset + 9u]; - pdf.get(10u) = pop[offset + 10u]; - pdf.get(11u) = pop[offset + 11u]; - pdf.get(12u) = pop[offset + 12u]; - pdf.get(13u) = pop[offset + 13u]; - pdf.get(14u) = pop[offset + 14u]; - pdf.get(15u) = pop[offset + 15u]; - pdf.get(16u) = pop[offset + 16u]; - pdf.get(17u) = pop[offset + 17u]; - pdf.get(18u) = pop[offset + 18u]; + pdf.get(0u) = pop[0u]; + pdf.get(1u) = pop[1u]; + pdf.get(2u) = pop[2u]; + pdf.get(3u) = pop[3u]; + pdf.get(4u) = pop[4u]; + pdf.get(5u) = pop[5u]; + pdf.get(6u) = pop[6u]; + pdf.get(7u) = pop[7u]; + pdf.get(8u) = pop[8u]; + pdf.get(9u) = pop[9u]; + pdf.get(10u) = pop[10u]; + pdf.get(11u) = pop[11u]; + pdf.get(12u) = pop[12u]; + pdf.get(13u) = pop[13u]; + pdf.get(14u) = pop[14u]; + pdf.get(15u) = pop[15u]; + pdf.get(16u) = pop[16u]; + pdf.get(17u) = pop[17u]; + pdf.get(18u) = pop[18u]; } } -__global__ void kernel_set( +__global__ void kernel_broadcast( gpu::FieldAccessor pdf, - float const *RESTRICT const pop) { + float const *RESTRICT pop) { pdf.set(blockIdx, threadIdx); if (pdf.isValidPosition()) { pdf.get(0u) = pop[0u]; @@ -212,11 +180,59 @@ __global__ void kernel_set( } } +__global__ void kernel_set_vel( + gpu::FieldAccessor pdf, + gpu::FieldAccessor velocity, + gpu::FieldAccessor force, + float const *RESTRICT pop) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 19u); + pdf.set(blockIdx, threadIdx); + velocity.set(blockIdx, threadIdx); + force.set(blockIdx, threadIdx); + pop += offset; + if (pdf.isValidPosition()) { + const float f_0 = pdf.get(0u) = pop[0u]; + const float f_1 = pdf.get(1u) = pop[1u]; + const float f_2 = pdf.get(2u) = pop[2u]; + const float f_3 = pdf.get(3u) = pop[3u]; + const float f_4 = pdf.get(4u) = pop[4u]; + const float f_5 = pdf.get(5u) = pop[5u]; + const float f_6 = pdf.get(6u) = pop[6u]; + const float f_7 = pdf.get(7u) = pop[7u]; + const float f_8 = pdf.get(8u) = pop[8u]; + const float f_9 = pdf.get(9u) = pop[9u]; + const float f_10 = pdf.get(10u) = pop[10u]; + const float f_11 = pdf.get(11u) = pop[11u]; + const float f_12 = pdf.get(12u) = pop[12u]; + const float f_13 = pdf.get(13u) = pop[13u]; + const float f_14 = pdf.get(14u) = pop[14u]; + const float f_15 = pdf.get(15u) = pop[15u]; + const float f_16 = pdf.get(16u) = pop[16u]; + const float f_17 = pdf.get(17u) = pop[17u]; + const float f_18 = pdf.get(18u) = pop[18u]; + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0; + const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1; + const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2; + const float rho_inv = float{1} / rho; + velocity.get(0u) = md_0 * rho_inv; + velocity.get(1u) = md_1 * rho_inv; + velocity.get(2u) = md_2 * rho_inv; + } +} +// LCOV_EXCL_STOP + std::array get( gpu::GPUField const *pdf_field, Cell const &cell) { CellInterval ci(cell, cell); - thrust::device_vector dev_data(19u, float{0}); + thrust::device_vector dev_data(19u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); auto kernel = gpu::make_kernel(kernel_get); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); @@ -231,7 +247,7 @@ void set( gpu::GPUField *pdf_field, std::array const &pop, Cell const &cell) { - thrust::device_vector dev_data(pop.data(), pop.data() + 19u); + thrust::device_vector dev_data(pop.begin(), pop.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); CellInterval ci(cell, cell); auto kernel = gpu::make_kernel(kernel_set); @@ -240,13 +256,30 @@ void set( kernel(); } +void set( + gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::array const &pop, + Cell const &cell) { + thrust::device_vector dev_data(pop.begin(), pop.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + CellInterval ci(cell, cell); + auto kernel = gpu::make_kernel(kernel_set_vel); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} + void initialize( gpu::GPUField *pdf_field, std::array const &pop) { CellInterval ci = pdf_field->xyzSizeWithGhostLayer(); - thrust::device_vector dev_data(pop.data(), pop.data() + 19u); + thrust::device_vector dev_data(pop.begin(), pop.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set); + auto kernel = gpu::make_kernel(kernel_broadcast); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); @@ -257,7 +290,7 @@ std::vector get( CellInterval const &ci) { thrust::device_vector dev_data(ci.numCells() * 19u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_get_interval); + auto kernel = gpu::make_kernel(kernel_get); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); kernel.addParam(dev_data_ptr); kernel(); @@ -272,82 +305,92 @@ void set( CellInterval const &ci) { thrust::device_vector dev_data(values.begin(), values.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set_interval); + auto kernel = gpu::make_kernel(kernel_set); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); } -} // namespace Population -namespace Vector { -__global__ void kernel_get_interval( - gpu::FieldAccessor vec, - float *const out) { - vec.set(blockIdx, threadIdx); - if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); - out[offset + 0u] = vec.get(0u); - out[offset + 1u] = vec.get(1u); - out[offset + 2u] = vec.get(2u); - } +void set( + gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::vector const &values, + CellInterval const &ci) { + thrust::device_vector dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set_vel); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); } +} // namespace Population +namespace Vector { +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor vec, - float *const out) { + float *u_out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); vec.set(blockIdx, threadIdx); + u_out += offset; if (vec.isValidPosition()) { - out[0u] = vec.get(0u); - out[1u] = vec.get(1u); - out[2u] = vec.get(2u); + u_out[0u] = vec.get(0u); + u_out[1u] = vec.get(1u); + u_out[2u] = vec.get(2u); } } -__global__ void kernel_set_interval( +__global__ void kernel_set( gpu::FieldAccessor vec, - float const *RESTRICT const u) { + float const *RESTRICT u_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); vec.set(blockIdx, threadIdx); + u_in += offset; if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); - vec.get(0u) = u[offset + 0u]; - vec.get(1u) = u[offset + 1u]; - vec.get(2u) = u[offset + 2u]; + vec.get(0u) = u_in[0u]; + vec.get(1u) = u_in[1u]; + vec.get(2u) = u_in[2u]; } } -__global__ void kernel_set( +__global__ void kernel_broadcast( gpu::FieldAccessor vec, - const float *RESTRICT const u) { + float const *RESTRICT u_in) { vec.set(blockIdx, threadIdx); if (vec.isValidPosition()) { - vec.get(0u) = u[0u]; - vec.get(1u) = u[1u]; - vec.get(2u) = u[2u]; + vec.get(0u) = u_in[0u]; + vec.get(1u) = u_in[1u]; + vec.get(2u) = u_in[2u]; } } -__global__ void kernel_add_interval( +__global__ void kernel_add( gpu::FieldAccessor vec, - float const *RESTRICT const u) { + float const *RESTRICT u_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); vec.set(blockIdx, threadIdx); + u_in += offset; if (vec.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); - vec.get(0u) += u[offset + 0u]; - vec.get(1u) += u[offset + 1u]; - vec.get(2u) += u[offset + 2u]; + vec.get(0u) += u_in[0u]; + vec.get(1u) += u_in[1u]; + vec.get(2u) += u_in[2u]; } } -__global__ void kernel_add( +__global__ void kernel_broadcast_add( gpu::FieldAccessor vec, - float const *RESTRICT const u) { + float const *RESTRICT u_in) { vec.set(blockIdx, threadIdx); if (vec.isValidPosition()) { - vec.get(0u) += u[0u]; - vec.get(1u) += u[1u]; - vec.get(2u) += u[2u]; + vec.get(0u) += u_in[0u]; + vec.get(1u) += u_in[1u]; + vec.get(2u) += u_in[2u]; } } +// LCOV_EXCL_STOP Vector3 get( gpu::GPUField const *vec_field, @@ -396,7 +439,7 @@ void initialize( CellInterval ci = vec_field->xyzSizeWithGhostLayer(); thrust::device_vector dev_data(vec.data(), vec.data() + 3u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set); + auto kernel = gpu::make_kernel(kernel_broadcast); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*vec_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); @@ -408,7 +451,7 @@ void add_to_all( CellInterval ci = vec_field->xyzSizeWithGhostLayer(); thrust::device_vector dev_data(vec.data(), vec.data() + 3u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_add); + auto kernel = gpu::make_kernel(kernel_broadcast_add); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*vec_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); @@ -419,7 +462,7 @@ std::vector get( CellInterval const &ci) { thrust::device_vector dev_data(ci.numCells() * 3u); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_get_interval); + auto kernel = gpu::make_kernel(kernel_get); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*vec_field, ci)); kernel.addParam(dev_data_ptr); kernel(); @@ -434,7 +477,7 @@ void set( CellInterval const &ci) { thrust::device_vector dev_data(values.begin(), values.end()); auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); - auto kernel = gpu::make_kernel(kernel_set_interval); + auto kernel = gpu::make_kernel(kernel_set); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*vec_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); @@ -442,6 +485,7 @@ void set( } // namespace Vector namespace Interpolation { +// LCOV_EXCL_START /** @brief Calculate interpolation weights. */ static __forceinline__ __device__ void calculate_weights( float const *RESTRICT const pos, @@ -535,6 +579,7 @@ __global__ void kernel_set( } } } +// LCOV_EXCL_STOP static dim3 calculate_dim_grid(uint const threads_x, uint const blocks_per_grid_y, @@ -589,6 +634,7 @@ void set( } // namespace Interpolation namespace Equilibrium { +// LCOV_EXCL_START __device__ void kernel_set_device( gpu::FieldAccessor pdf, float const *RESTRICT const u, @@ -614,15 +660,18 @@ __device__ void kernel_set_device( pdf.get(17u) = rho * -0.083333333333333329f * u[0] + rho * -0.083333333333333329f * u[2] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]) + rho * 0.25f * u[0] * u[2]; pdf.get(18u) = rho * -0.083333333333333329f * u[2] + rho * -0.25f * u[0] * u[2] + rho * 0.027777777777777776f + rho * 0.083333333333333329f * u[0] + rho * 0.083333333333333329f * (u[0] * u[0]) + rho * 0.083333333333333329f * (u[2] * u[2]); } +// LCOV_EXCL_STOP } // namespace Equilibrium namespace Density { +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor pdf, - float *RESTRICT const out) { + float *RESTRICT rho_out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u); pdf.set(blockIdx, threadIdx); + rho_out += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(1u)); float const f_0 = pdf.get(0u); float const f_1 = pdf.get(1u); float const f_2 = pdf.get(2u); @@ -646,16 +695,17 @@ __global__ void kernel_get( const float vel1Term = f_1 + f_11 + f_15 + f_7; const float vel2Term = f_12 + f_13 + f_5; const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; - out[offset] = rho; + rho_out[0u] = rho; } } __global__ void kernel_set( gpu::FieldAccessor pdf, - float const *RESTRICT const rho_in) { + float const *RESTRICT rho_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 1u); pdf.set(blockIdx, threadIdx); + rho_in += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(1u)); float const f_0 = pdf.get(0u); float const f_1 = pdf.get(1u); float const f_2 = pdf.get(2u); @@ -684,12 +734,13 @@ __global__ void kernel_set( const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; // calculate current velocity (before density change) - float const conversion = float(1) / rho; - float const u_old[3] = {momdensity_0 * conversion, momdensity_1 * conversion, momdensity_2 * conversion}; + float const rho_inv = float{1} / rho; + float const u_old[3] = {momdensity_0 * rho_inv, momdensity_1 * rho_inv, momdensity_2 * rho_inv}; - Equilibrium::kernel_set_device(pdf, u_old, rho_in[offset]); + Equilibrium::kernel_set_device(pdf, u_old, rho_in[0u]); } } +// LCOV_EXCL_STOP float get( gpu::GPUField const *pdf_field, @@ -727,7 +778,7 @@ std::vector get( kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); kernel.addParam(dev_data_ptr); kernel(); - std::vector out(ci.numCells()); + std::vector out(dev_data.size()); thrust::copy(dev_data.begin(), dev_data.end(), out.begin()); return out; } @@ -746,16 +797,63 @@ void set( } // namespace Density namespace Velocity { +// LCOV_EXCL_START +__global__ void kernel_get( + gpu::FieldAccessor pdf, + gpu::FieldAccessor force, + float *RESTRICT u_out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); + pdf.set(blockIdx, threadIdx); + force.set(blockIdx, threadIdx); + u_out += offset; + if (pdf.isValidPosition()) { + float const f_0 = pdf.get(0u); + float const f_1 = pdf.get(1u); + float const f_2 = pdf.get(2u); + float const f_3 = pdf.get(3u); + float const f_4 = pdf.get(4u); + float const f_5 = pdf.get(5u); + float const f_6 = pdf.get(6u); + float const f_7 = pdf.get(7u); + float const f_8 = pdf.get(8u); + float const f_9 = pdf.get(9u); + float const f_10 = pdf.get(10u); + float const f_11 = pdf.get(11u); + float const f_12 = pdf.get(12u); + float const f_13 = pdf.get(13u); + float const f_14 = pdf.get(14u); + float const f_15 = pdf.get(15u); + float const f_16 = pdf.get(16u); + float const f_17 = pdf.get(17u); + float const f_18 = pdf.get(18u); + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0; + const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1; + const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2; + auto const rho_inv = float{1} / rho; + u_out[0u] = md_0 * rho_inv; + u_out[1u] = md_1 * rho_inv; + u_out[2u] = md_2 * rho_inv; + } +} + __global__ void kernel_set( gpu::FieldAccessor pdf, + gpu::FieldAccessor velocity, gpu::FieldAccessor force, - float const *RESTRICT const u_in) { + float const *RESTRICT u_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); pdf.set(blockIdx, threadIdx); + velocity.set(blockIdx, threadIdx); force.set(blockIdx, threadIdx); + u_in += offset; if (pdf.isValidPosition()) { - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, uint(3u)); - uint const bufsize = 3u; - float const *RESTRICT const u = u_in + bufsize * offset; float const f_0 = pdf.get(0u); float const f_1 = pdf.get(1u); float const f_2 = pdf.get(2u); @@ -775,6 +873,7 @@ __global__ void kernel_set( float const f_16 = pdf.get(16u); float const f_17 = pdf.get(17u); float const f_18 = pdf.get(18u); + float const *RESTRICT const u = u_in; const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; const float vel1Term = f_1 + f_11 + f_15 + f_7; const float vel2Term = f_12 + f_13 + f_5; @@ -782,15 +881,54 @@ __global__ void kernel_set( const float u_0 = -force.get(0) * 0.50000000000000000f / rho + u[0]; const float u_1 = -force.get(1) * 0.50000000000000000f / rho + u[1]; const float u_2 = -force.get(2) * 0.50000000000000000f / rho + u[2]; + velocity.get(0u) = u_in[0u]; + velocity.get(1u) = u_in[1u]; + velocity.get(2u) = u_in[2u]; + float u_new[3] = {u_0, u_1, u_2}; Equilibrium::kernel_set_device(pdf, u_new, rho); } } +// LCOV_EXCL_STOP + +Vector3 get( + gpu::GPUField const *pdf_field, + gpu::GPUField const *force_field, + Cell const &cell) { + CellInterval ci(cell, cell); + thrust::device_vector dev_data(3u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + Vector3 vec; + thrust::copy(dev_data.begin(), dev_data.end(), vec.data()); + return vec; +} + +std::vector get( + gpu::GPUField const *pdf_field, + gpu::GPUField const *force_field, + CellInterval const &ci) { + thrust::device_vector dev_data(3u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + std::vector out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + return out; +} void set( gpu::GPUField *pdf_field, - gpu::GPUField *force_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, Vector3 const &u, Cell const &cell) { CellInterval ci(cell, cell); @@ -798,22 +936,127 @@ void set( auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); auto kernel = gpu::make_kernel(kernel_set); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} + +void set( + gpu::GPUField *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::vector const &values, + CellInterval const &ci) { + thrust::device_vector dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); kernel.addParam(const_cast(dev_data_ptr)); kernel(); } } // namespace Velocity +namespace Force { +// LCOV_EXCL_START +__global__ void kernel_set( + gpu::FieldAccessor pdf, + gpu::FieldAccessor velocity, + gpu::FieldAccessor force, + float const *RESTRICT f_in) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); + pdf.set(blockIdx, threadIdx); + velocity.set(blockIdx, threadIdx); + force.set(blockIdx, threadIdx); + f_in += offset; + if (pdf.isValidPosition()) { + float const f_0 = pdf.get(0u); + float const f_1 = pdf.get(1u); + float const f_2 = pdf.get(2u); + float const f_3 = pdf.get(3u); + float const f_4 = pdf.get(4u); + float const f_5 = pdf.get(5u); + float const f_6 = pdf.get(6u); + float const f_7 = pdf.get(7u); + float const f_8 = pdf.get(8u); + float const f_9 = pdf.get(9u); + float const f_10 = pdf.get(10u); + float const f_11 = pdf.get(11u); + float const f_12 = pdf.get(12u); + float const f_13 = pdf.get(13u); + float const f_14 = pdf.get(14u); + float const f_15 = pdf.get(15u); + float const f_16 = pdf.get(16u); + float const f_17 = pdf.get(17u); + float const f_18 = pdf.get(18u); + const float vel0Term = f_10 + f_14 + f_18 + f_4 + f_8; + const float momdensity_0 = -f_13 - f_17 - f_3 - f_7 - f_9 + vel0Term; + const float vel1Term = f_1 + f_11 + f_15 + f_7; + const float momdensity_1 = -f_10 - f_12 - f_16 - f_2 + f_8 - f_9 + vel1Term; + const float vel2Term = f_12 + f_13 + f_5; + const float momdensity_2 = f_11 + f_14 - f_15 - f_16 - f_17 - f_18 - f_6 + vel2Term; + const float rho = f_0 + f_16 + f_17 + f_2 + f_3 + f_6 + f_9 + vel0Term + vel1Term + vel2Term; + const float md_0 = f_in[0u] * 0.50000000000000000f + momdensity_0; + const float md_1 = f_in[1u] * 0.50000000000000000f + momdensity_1; + const float md_2 = f_in[2u] * 0.50000000000000000f + momdensity_2; + auto const rho_inv = float{1} / rho; + + force.get(0u) = f_in[0u]; + force.get(1u) = f_in[1u]; + force.get(2u) = f_in[2u]; + + velocity.get(0u) = md_0 * rho_inv; + velocity.get(1u) = md_1 * rho_inv; + velocity.get(2u) = md_2 * rho_inv; + } +} +// LCOV_EXCL_STOP + +void set(gpu::GPUField const *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField *force_field, + Vector3 const &u, + Cell const &cell) { + CellInterval ci(cell, cell); + thrust::device_vector dev_data(u.data(), u.data() + 3u); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} + +void set(gpu::GPUField const *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField *force_field, + std::vector const &values, + CellInterval const &ci) { + thrust::device_vector dev_data(values.begin(), values.end()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_set); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*velocity_field, ci)); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*force_field, ci)); + kernel.addParam(const_cast(dev_data_ptr)); + kernel(); +} +} // namespace Force + namespace MomentumDensity { +// LCOV_EXCL_START __global__ void kernel_sum( gpu::FieldAccessor pdf, gpu::FieldAccessor force, - float *RESTRICT const out) { + float *RESTRICT out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 3u); pdf.set(blockIdx, threadIdx); force.set(blockIdx, threadIdx); + out += offset; if (pdf.isValidPosition()) { - uint const bufsize = 3u; - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, bufsize); float const f_0 = pdf.get(0u); float const f_1 = pdf.get(1u); float const f_2 = pdf.get(2u); @@ -843,11 +1086,12 @@ __global__ void kernel_sum( const float md_0 = force.get(0) * 0.50000000000000000f + momdensity_0; const float md_1 = force.get(1) * 0.50000000000000000f + momdensity_1; const float md_2 = force.get(2) * 0.50000000000000000f + momdensity_2; - out[bufsize * offset + 0u] += md_0; - out[bufsize * offset + 1u] += md_1; - out[bufsize * offset + 2u] += md_2; + out[0u] += md_0; + out[1u] += md_1; + out[2u] += md_2; } } +// LCOV_EXCL_STOP Vector3 reduce( gpu::GPUField const *pdf_field, @@ -870,13 +1114,14 @@ Vector3 reduce( } // namespace MomentumDensity namespace PressureTensor { +// LCOV_EXCL_START __global__ void kernel_get( gpu::FieldAccessor pdf, - float *RESTRICT const out) { + float *RESTRICT p_out) { + auto const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, 9u); pdf.set(blockIdx, threadIdx); + p_out += offset; if (pdf.isValidPosition()) { - uint const bufsize = 9u; - uint const offset = getLinearIndex(blockIdx, threadIdx, gridDim, blockDim, bufsize); float const f_0 = pdf.get(0u); float const f_1 = pdf.get(1u); float const f_2 = pdf.get(2u); @@ -905,19 +1150,18 @@ __global__ void kernel_get( const float p_6 = -f_13 + f_14 + f_17 - f_18; const float p_7 = f_11 - f_12 - f_15 + f_16; const float p_8 = f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18 + f_5 + f_6; - out[bufsize * offset + 0u] = p_0; - out[bufsize * offset + 1u] = p_1; - out[bufsize * offset + 2u] = p_2; - - out[bufsize * offset + 3u] = p_3; - out[bufsize * offset + 4u] = p_4; - out[bufsize * offset + 5u] = p_5; - - out[bufsize * offset + 6u] = p_6; - out[bufsize * offset + 7u] = p_7; - out[bufsize * offset + 8u] = p_8; + p_out[0u] = p_0; + p_out[1u] = p_1; + p_out[2u] = p_2; + p_out[3u] = p_3; + p_out[4u] = p_4; + p_out[5u] = p_5; + p_out[6u] = p_6; + p_out[7u] = p_7; + p_out[8u] = p_8; } } +// LCOV_EXCL_STOP Matrix3 get( gpu::GPUField const *pdf_field, @@ -930,7 +1174,21 @@ Matrix3 get( kernel.addParam(dev_data_ptr); kernel(); Matrix3 out; - thrust::copy(dev_data.begin(), dev_data.begin() + 9u, out.data()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); + return out; +} + +std::vector get( + gpu::GPUField const *pdf_field, + CellInterval const &ci) { + thrust::device_vector dev_data(9u * ci.numCells()); + auto const dev_data_ptr = thrust::raw_pointer_cast(dev_data.data()); + auto kernel = gpu::make_kernel(kernel_get); + kernel.addFieldIndexingParam(gpu::FieldIndexing::interval(*pdf_field, ci)); + kernel.addParam(dev_data_ptr); + kernel(); + std::vector out(dev_data.size()); + thrust::copy(dev_data.begin(), dev_data.end(), out.data()); return out; } } // namespace PressureTensor diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh index dede426c02..9d25a37a85 100644 --- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh +++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh @@ -43,18 +43,6 @@ #include #include -#ifdef WALBERLA_CXX_COMPILER_IS_GNU -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-variable" -#pragma GCC diagnostic ignored "-Wunused-parameter" -#endif - -#ifdef WALBERLA_CXX_COMPILER_IS_CLANG -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-parameter" -#endif - namespace walberla { namespace lbm { namespace accessor { @@ -66,6 +54,10 @@ std::array get(gpu::GPUField const *pdf_field, /** @brief Set populations on a single cell. */ void set(gpu::GPUField *pdf_field, std::array const &pop, Cell const &cell); +/** @brief Set populations and recalculate velocities on a single cell. */ +void set(gpu::GPUField *pdf_field, gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::array const &pop, Cell const &cell); /** @brief Initialize all cells with the same value. */ void initialize(gpu::GPUField *pdf_field, std::array const &pop); @@ -75,6 +67,10 @@ std::vector get(gpu::GPUField const *pdf_field, /** @brief Set populations on a cell interval. */ void set(gpu::GPUField *pdf_field, std::vector const &values, CellInterval const &ci); +/** @brief Set populations and recalculate velocities on a cell interval. */ +void set(gpu::GPUField *pdf_field, gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::vector const &values, CellInterval const &ci); } // namespace Population namespace Vector { @@ -116,10 +112,30 @@ void set(gpu::GPUField *pdf_field, std::vector const &values, } // namespace Density namespace Velocity { -void set(gpu::GPUField *pdf_field, gpu::GPUField *force_field, - Vector3 const &u, Cell const &cell); +Vector3 get(gpu::GPUField const *pdf_field, + gpu::GPUField const *force_field, Cell const &cell); +std::vector get(gpu::GPUField const *pdf_field, + gpu::GPUField const *force_field, + CellInterval const &ci); +void set(gpu::GPUField *pdf_field, gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, Vector3 const &u, + Cell const &cell); +void set(gpu::GPUField *pdf_field, gpu::GPUField *velocity_field, + gpu::GPUField const *force_field, + std::vector const &values, CellInterval const &ci); } // namespace Velocity +namespace Force { +void set(gpu::GPUField const *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField *force_field, Vector3 const &u, + Cell const &cell); +void set(gpu::GPUField const *pdf_field, + gpu::GPUField *velocity_field, + gpu::GPUField *force_field, std::vector const &values, + CellInterval const &ci); +} // namespace Force + namespace DensityAndVelocity { std::tuple> get(gpu::GPUField const *pdf_field, gpu::GPUField const *force_field, @@ -141,16 +157,10 @@ Vector3 reduce(gpu::GPUField const *pdf_field, namespace PressureTensor { Matrix3 get(gpu::GPUField const *pdf_field, Cell const &cell); +std::vector get(gpu::GPUField const *pdf_field, + CellInterval const &ci); } // namespace PressureTensor } // namespace accessor } // namespace lbm } // namespace walberla - -#ifdef WALBERLA_CXX_COMPILER_IS_GNU -#pragma GCC diagnostic pop -#endif - -#ifdef WALBERLA_CXX_COMPILER_IS_CLANG -#pragma clang diagnostic pop -#endif diff --git a/src/walberla_bridge/tests/CMakeLists.txt b/src/walberla_bridge/tests/CMakeLists.txt index 4cbf9ce15d..83a7d9d2ee 100644 --- a/src/walberla_bridge/tests/CMakeLists.txt +++ b/src/walberla_bridge/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2023 The ESPResSo project +# Copyright (C) 2020-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,52 +17,44 @@ # along with this program. If not, see . # -include(unit_test) - -function(ESPRESSO_WALBERLA_UNIT_TEST) - cmake_parse_arguments(TEST "" "NAME;NUM_PROC" "SRC;DEPENDS" ${ARGN}) - unit_test(NAME ${TEST_NAME} NUM_PROC ${TEST_NUM_PROC} SRC ${TEST_SRC} DEPENDS - ${TEST_DEPENDS} espresso::walberla espresso::walberla::cpp_flags - espresso::utils) +include(espresso_unit_test) + +function(ESPRESSO_ADD_TEST) + cmake_parse_arguments(TEST "" "SRC;NAME;NUM_PROC" "DEPENDS" ${ARGN}) + espresso_unit_test( + SRC ${TEST_SRC} NAME ${TEST_NAME} NUM_PROC ${TEST_NUM_PROC} DEPENDS + ${TEST_DEPENDS} espresso::walberla espresso::utils) + if(${TEST_SRC} MATCHES ".*\.cu$") + target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cuda_flags + espresso::walberla_cuda) + else() + target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cpp_flags) + endif() set_target_properties(${TEST_NAME} PROPERTIES CXX_CLANG_TIDY "") target_include_directories(${TEST_NAME} PRIVATE ${WALBERLA_INCLUDE_DIRS} ${walberla_BINARY_DIR}/src) target_link_libraries(${TEST_NAME} PRIVATE ${WALBERLA_LIBS}) endfunction() -espresso_walberla_unit_test(NAME ResourceManager_test SRC - ResourceManager_test.cpp) - -espresso_walberla_unit_test(NAME lb_kernels_unit_tests SRC - lb_kernels_unit_tests.cpp) - -espresso_walberla_unit_test(NAME ek_kernels_unit_tests SRC - ek_kernels_unit_tests.cpp) - -espresso_walberla_unit_test( - NAME LatticeWalberla_unit_tests SRC LatticeWalberla_unit_tests.cpp DEPENDS - Boost::boost Boost::mpi NUM_PROC 2) - -espresso_walberla_unit_test( - NAME LBWalberlaImpl_unit_tests SRC LBWalberlaImpl_unit_tests.cpp DEPENDS - Boost::boost Boost::mpi NUM_PROC 2) - -espresso_walberla_unit_test( - NAME LBWalberlaImpl_bspline_tests SRC LBWalberlaImpl_bspline_tests.cpp - DEPENDS Boost::mpi NUM_PROC 2) +espresso_add_test(SRC ResourceManager_test.cpp) +espresso_add_test(SRC lb_kernels_unit_tests.cpp) +espresso_add_test(SRC ek_kernels_unit_tests.cpp) +espresso_add_test(SRC LatticeWalberla_unit_tests.cpp DEPENDS Boost::mpi + NUM_PROC 2) +espresso_add_test(SRC LBWalberlaImpl_unit_tests.cpp DEPENDS Boost::mpi NUM_PROC + 2) +espresso_add_test(SRC LBWalberlaImpl_bspline_tests.cpp DEPENDS Boost::mpi + NUM_PROC 2) +espresso_add_test(SRC LBWalberlaImpl_flow_tests.cpp DEPENDS Boost::mpi) +espresso_add_test(SRC LBWalberlaImpl_lees_edwards_tests.cpp DEPENDS Boost::mpi) +espresso_add_test(SRC EKinWalberlaImpl_unit_tests.cpp DEPENDS Boost::mpi + NUM_PROC 2) if(NOT (ESPRESSO_BUILD_WITH_ASAN OR ESPRESSO_BUILD_WITH_UBSAN)) - espresso_walberla_unit_test( - NAME LBWalberlaImpl_statistical_tests SRC - LBWalberlaImpl_statistical_tests.cpp DEPENDS Boost::mpi) + espresso_add_test(SRC LBWalberlaImpl_statistical_tests.cpp DEPENDS Boost::mpi) endif() -espresso_walberla_unit_test(NAME LBWalberlaImpl_flow_tests SRC - LBWalberlaImpl_flow_tests.cpp DEPENDS Boost::mpi) - -espresso_walberla_unit_test(NAME LBWalberlaImpl_lees_edwards_test SRC - LBWalberlaImpl_lees_edwards.cpp DEPENDS Boost::mpi) - -espresso_walberla_unit_test( - NAME EKinWalberlaImpl_unit_tests SRC EKinWalberlaImpl_unit_tests.cpp DEPENDS - Boost::boost Boost::mpi NUM_PROC 2) +if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA) + espresso_add_test(SRC LBWalberlaImpl_field_accessors_tests.cu DEPENDS + Boost::mpi) +endif() diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_field_accessors_tests.cu b/src/walberla_bridge/tests/LBWalberlaImpl_field_accessors_tests.cu new file mode 100644 index 0000000000..5829c49021 --- /dev/null +++ b/src/walberla_bridge/tests/LBWalberlaImpl_field_accessors_tests.cu @@ -0,0 +1,372 @@ +/* + * Copyright (C) 2024 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#define BOOST_TEST_MODULE "field accessors test" +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_NO_MAIN + +#if defined(__NVCC__) +// Fix for https://i10git.cs.fau.de/walberla/walberla/-/issues/244 +#pragma nv_diagnostic push +#pragma nv_diag_suppress 554 // unreachable conversion operator +#endif + +#include +#include +#include + +#include "../src/lattice_boltzmann/LBWalberlaImpl.hpp" +#include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecision.h" +#include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsDoublePrecisionCUDA.cuh" +#include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecision.h" +#include "../src/lattice_boltzmann/generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh" + +#include +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static Utils::Vector3i mpi_shape; + +template + requires std::ranges::sized_range +boost::test_tools::predicate_result almost_equal(R const &val, R const &ref, + typename R::value_type atol) { + assert(val.size() == ref.size()); + assert(not val.empty()); + auto const print_first_n = [](R const &v) { + auto constexpr n = std::size_t{6u}; + std::stringstream stream; + stream << "["; + for (auto i = 0ul, end = std::min(v.size(), n); i < end; ++i) { + if (i) { + stream << ", "; + } + stream << v[i]; + } + if (v.size() > n) { + stream << ", ..."; + } + stream << "]"; + return stream.str(); + }; + boost::test_tools::predicate_result res(true); + for (auto i = 0ul; i < val.size(); ++i) { + if (auto const diff = std::abs(val[i] - ref[i]); diff > atol) { + res = false; + res.message() << "val{" << print_first_n(val) << "} and " << "ref{" + << print_first_n(ref) << "} mismatch: " << "val[" << i + << "]{" << val[i] << "} != " << "ref[" << i << "]{" + << ref[i] << "} " << "(difference{" << diff << "} > delta{" + << atol << "})"; + break; + } + } + return res; +} + +template +boost::test_tools::predicate_result almost_equal(walberla::Vector3 const &a, + walberla::Vector3 const &b, + T atol) { + return almost_equal(std::span(a.data(), 3ul), std::span(b.data(), 3ul), atol); +} + +template +boost::test_tools::predicate_result almost_equal(walberla::Matrix3 const &a, + walberla::Matrix3 const &b, + T atol) { + return almost_equal(std::span(a.data(), 9ul), std::span(b.data(), 9ul), atol); +} + +template + requires std::floating_point +boost::test_tools::predicate_result almost_equal(T a, T b, T atol) { + return almost_equal(std::span(&a, 1ul), std::span(&b, 1ul), atol); +} + +template +class LBWalberlaImplTest : public walberla::LBWalberlaImpl { +public: + using Base = walberla::LBWalberlaImpl; + using Base::Base; + using Base::m_density; + using Base::m_last_applied_force_field_id; + using Base::m_pdf_field_id; + using Base::m_velocity_field_id; +}; + +template struct Fixture { + std::shared_ptr<::LatticeWalberla> lattice; + std::shared_ptr> lbfluid; + + Fixture() { + auto const grid_dim = Utils::Vector3i::broadcast(4); + auto const viscosity = FT(1.5); + auto const density = FT(0.9); + lattice = std::make_shared<::LatticeWalberla>(grid_dim, mpi_shape, 1u); + lbfluid = std::make_shared>( + lattice, viscosity, density); + } + + void runTest() { + auto const bc = walberla::get_block_and_cell(*lattice, {0, 0, 0}, false); + BOOST_CHECK(bc); + auto const cell = bc->cell; + auto const ci = walberla::CellInterval(cell, cell); + auto &block = *(bc->block); + check_getters_setters(block, cell); + check_getters_setters(block, ci); + } + + template + void check_getters_setters(walberla::IBlock &block, CellIt const &it) { + using namespace walberla; + using PdfField = LBWalberlaImplTest::PdfField; + using VectorField = LBWalberlaImplTest::VectorField; + + auto constexpr is_interval = std::is_same_v; + auto constexpr epsilon = std::numeric_limits::epsilon(); + auto constexpr exact = FT{0}; + + auto const make_ref_vector = [](std::initializer_list values) { + if constexpr (is_interval) { + assert(values.size() % 3ul == 0ul); + return std::vector(values); + } else { + assert(values.size() == 3ul); + return Vector3(std::data(values)); + } + }; + auto const make_ref_matrix = [](std::initializer_list values) { + if constexpr (is_interval) { + assert(values.size() % 9ul == 0ul); + return std::vector(values); + } else { + assert(values.size() == 9ul); + return Matrix3(std::data(values)); + } + }; + auto const to_number = [](auto const &value) { + if constexpr (std::is_same_v) { + return value; + } else { + assert(value.size() == 1ul); + return value[0u]; + } + }; + + auto const density = lbfluid->m_density; + auto pdf_field = block.template getData(lbfluid->m_pdf_field_id); + auto velocity_field = + block.template getData(lbfluid->m_velocity_field_id); + auto force_field = block.template getData( + lbfluid->m_last_applied_force_field_id); + + std::conditional_t, std::array> + cur_pop = lbm::accessor::Population::get(pdf_field, it); + std::conditional_t, Vector3> cur_vel; + std::conditional_t, Vector3> cur_laf; + + auto const reset_fields = [&, initial_pop = cur_pop]() { + auto const null_vector = Vector3(FT{0}); + std::array pop; + for (std::size_t i = 0ul; i < pop.size(); ++i) { + pop[i] = initial_pop[i]; + } + lbm::accessor::Population::initialize(pdf_field, pop); + lbm::accessor::Vector::initialize(force_field, null_vector); + lbm::accessor::Vector::initialize(velocity_field, null_vector); + }; + + { + auto diag = FT{0}; + auto const zero = FT{0}; + auto const old_pop = lbm::accessor::Population::get(pdf_field, it); + auto const old_pre = lbm::accessor::PressureTensor::get(pdf_field, it); + auto const old_laf = lbm::accessor::Vector::get(force_field, it); + auto const old_rho = lbm::accessor::Density::get(pdf_field, it); + auto ref_pop = old_pop; + std::transform(old_pop.begin(), old_pop.end(), ref_pop.begin(), + [](auto const &f) { return FT{2} * f; }); + lbm::accessor::Population::set(pdf_field, ref_pop, it); + auto const new_pop = lbm::accessor::Population::get(pdf_field, it); + auto const new_pre = lbm::accessor::PressureTensor::get(pdf_field, it); + auto const new_rho = lbm::accessor::Density::get(pdf_field, it); + BOOST_CHECK(almost_equal(new_pop, ref_pop, exact)); + // clang-format off + diag = density * (FT{1} / FT{3}); + auto const old_pre_ref = make_ref_matrix({diag, zero, zero, + zero, diag, zero, + zero, zero, diag}); + BOOST_CHECK(almost_equal(old_pre, old_pre_ref, epsilon)); + diag = density * (FT{2} / FT{3}); + auto const new_pre_ref = make_ref_matrix({diag, zero, zero, + zero, diag, zero, + zero, zero, diag}); + BOOST_CHECK(almost_equal(new_pre, new_pre_ref, epsilon)); + // clang-format on + auto const old_laf_ref = make_ref_vector({FT{0}, FT{0}, FT{0}}); + auto const new_laf_ref = make_ref_vector({FT{2}, FT{3}, FT{4}}); + lbm::accessor::Vector::set(force_field, new_laf_ref, it); + auto const new_laf = lbm::accessor::Vector::get(force_field, it); + BOOST_CHECK(almost_equal(old_laf, old_laf_ref, exact)); + BOOST_CHECK(almost_equal(new_laf, new_laf_ref, exact)); + lbm::accessor::Vector::set(force_field, old_laf_ref, it); + cur_laf = lbm::accessor::Vector::get(force_field, it); + BOOST_CHECK(almost_equal(cur_laf, old_laf_ref, exact)); + lbm::accessor::Vector::add_to_all(force_field, + Vector3(new_laf_ref.data())); + cur_laf = lbm::accessor::Vector::get(force_field, it); + BOOST_CHECK(almost_equal(cur_laf, new_laf_ref, exact)); + lbm::accessor::Vector::set(force_field, old_laf_ref, it); + cur_laf = lbm::accessor::Vector::get(force_field, it); + BOOST_CHECK(almost_equal(cur_laf, old_laf_ref, exact)); + lbm::accessor::Density::set(pdf_field, {FT{7} * density}, it); + auto const cur_rho = lbm::accessor::Density::get(pdf_field, it); + BOOST_CHECK( + almost_equal(to_number(old_rho), density * FT{1}, FT{20} * epsilon)); + BOOST_CHECK( + almost_equal(to_number(new_rho), density * FT{2}, FT{20} * epsilon)); + BOOST_CHECK( + almost_equal(to_number(cur_rho), density * FT{7}, FT{20} * epsilon)); + } + reset_fields(); + { + // update cached velocities and recalculate populations in a single step + auto const old_pop = lbm::accessor::Population::get(pdf_field, it); + auto const old_vel = lbm::accessor::Vector::get(velocity_field, it); + auto const ref_vel = make_ref_vector({FT(0.001), FT(0.002), FT(0.003)}); + lbm::accessor::Velocity::set(pdf_field, velocity_field, force_field, + ref_vel, it); + auto const new_pop = lbm::accessor::Population::get(pdf_field, it); + auto const new_vel = lbm::accessor::Vector::get(velocity_field, it); + BOOST_CHECK(almost_equal(new_vel, ref_vel, epsilon)); + auto const new_mom = + lbm::accessor::MomentumDensity::reduce(pdf_field, force_field); + auto const ref_mom = Vector3( + ref_vel[0u] * density, ref_vel[1u] * density, ref_vel[2u] * density); + BOOST_CHECK(almost_equal(new_mom, ref_mom, FT{20} * epsilon)); + // update populations and recalculate cached velocities in a single step + lbm::accessor::Population::set(pdf_field, velocity_field, force_field, + old_pop, it); + cur_pop = lbm::accessor::Population::get(pdf_field, it); + cur_vel = lbm::accessor::Vector::get(velocity_field, it); + BOOST_CHECK(almost_equal(cur_pop, old_pop, exact)); + BOOST_CHECK(almost_equal(cur_vel, old_vel, epsilon)); + cur_vel = lbm::accessor::Velocity::get(pdf_field, force_field, it); + BOOST_CHECK(almost_equal(cur_vel, old_vel, epsilon)); + lbm::accessor::Population::set(pdf_field, velocity_field, force_field, + new_pop, it); + cur_pop = lbm::accessor::Population::get(pdf_field, it); + cur_vel = lbm::accessor::Vector::get(velocity_field, it); + BOOST_CHECK(almost_equal(cur_pop, new_pop, exact)); + BOOST_CHECK(almost_equal(cur_vel, new_vel, epsilon)); + cur_vel = lbm::accessor::Velocity::get(pdf_field, force_field, it); + BOOST_CHECK(almost_equal(cur_vel, new_vel, epsilon)); + // update forces and recalculate cached velocities in a single step + auto const ref_laf = make_ref_vector({ref_vel[0u] * FT{2} * density, + ref_vel[1u] * FT{2} * density, + ref_vel[2u] * FT{2} * density}); + lbm::accessor::Population::set(pdf_field, velocity_field, force_field, + old_pop, it); + lbm::accessor::Force::set(pdf_field, velocity_field, force_field, ref_laf, + it); + cur_pop = lbm::accessor::Population::get(pdf_field, it); + cur_vel = lbm::accessor::Vector::get(velocity_field, it); + cur_laf = lbm::accessor::Vector::get(force_field, it); + BOOST_CHECK(almost_equal(cur_pop, old_pop, exact)); + BOOST_CHECK(almost_equal(cur_vel, new_vel, epsilon)); + BOOST_CHECK(almost_equal(cur_laf, ref_laf, epsilon)); + cur_vel = lbm::accessor::Velocity::get(pdf_field, force_field, it); + BOOST_CHECK(almost_equal(cur_vel, new_vel, epsilon)); + // update velocities and recalculate populations in a single step + lbm::accessor::Population::set(pdf_field, velocity_field, force_field, + old_pop, it); + lbm::accessor::Velocity::set(pdf_field, velocity_field, force_field, + ref_vel, it); + cur_pop = lbm::accessor::Population::get(pdf_field, it); + cur_vel = lbm::accessor::Vector::get(velocity_field, it); + cur_laf = lbm::accessor::Vector::get(force_field, it); + BOOST_CHECK(almost_equal(cur_pop, old_pop, epsilon)); + BOOST_CHECK(almost_equal(cur_vel, new_vel, epsilon)); + BOOST_CHECK(almost_equal(cur_laf, ref_laf, epsilon)); + cur_vel = lbm::accessor::Velocity::get(pdf_field, force_field, it); + BOOST_CHECK(almost_equal(cur_vel, new_vel, epsilon)); + } + reset_fields(); + } +}; + +BOOST_AUTO_TEST_CASE(test_custom_predicate) { + std::vector const val = {0, 1, 2, 3, 4, 5, 99, 2}; + std::vector const ref = {0, 1, 2, 3, 4, 5, 7, 80}; + auto const is_true = almost_equal(ref, ref, 0); + auto const is_false = almost_equal(val, ref, 1); + BOOST_REQUIRE(is_true); + BOOST_REQUIRE(not is_false); + BOOST_CHECK_EQUAL(is_true.message(), ""); + BOOST_REQUIRE_EQUAL( + is_false.message(), + "val{[0, 1, 2, 3, 4, 5, ...]} and ref{[0, 1, 2, 3, 4, 5, ...]} " + "mismatch: val[6]{99} != ref[6]{7} (difference{92} > delta{1})"); +} + +using test_types = boost::mpl::list; + +BOOST_AUTO_TEST_CASE_TEMPLATE(macroscopic_accessors, FT, test_types) { + Fixture().runTest(); + Fixture().runTest(); +} + +int main(int argc, char **argv) { + int n_nodes; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &n_nodes); + MPI_Dims_create(n_nodes, 3, mpi_shape.data()); + walberla::mpi_init(); + + auto const res = boost::unit_test::unit_test_main(init_unit_test, argc, argv); + MPI_Finalize(); + return res; +} + +#if defined(__NVCC__) +#pragma nv_diagnostic pop +#endif diff --git a/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards.cpp b/src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards_tests.cpp similarity index 100% rename from src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards.cpp rename to src/walberla_bridge/tests/LBWalberlaImpl_lees_edwards_tests.cpp diff --git a/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp b/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp index 3045e9974c..f45cfb542f 100644 --- a/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp +++ b/src/walberla_bridge/tests/lb_kernels_unit_tests.cpp @@ -19,10 +19,6 @@ #define BOOST_TEST_MODULE waLBerla LB kernels #define BOOST_TEST_DYN_LINK -#include "config/config.hpp" - -#ifdef WALBERLA - #include #include "../src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precision.h" @@ -189,5 +185,3 @@ BOOST_AUTO_TEST_CASE(macroscopic_accessor_equilibrium_distribution) { } } } - -#endif diff --git a/testsuite/CMakeLists.txt b/testsuite/CMakeLists.txt index 027cc3f852..d46e6094db 100644 --- a/testsuite/CMakeLists.txt +++ b/testsuite/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2015-2022 The ESPResSo project +# Copyright (C) 2015-2024 The ESPResSo project # # This file is part of ESPResSo. # @@ -17,8 +17,6 @@ # along with this program. If not, see . # -include(unit_test) - if(ESPRESSO_BUILD_WITH_COVERAGE_PYTHON) set(PYPRESSO_OPTIONS --coverage) endif() diff --git a/testsuite/python/lb.py b/testsuite/python/lb.py index e5a5ebe9ab..86daa7fe51 100644 --- a/testsuite/python/lb.py +++ b/testsuite/python/lb.py @@ -89,13 +89,14 @@ def test_properties(self): self.check_properties(lbf) def check_properties(self, lbf): + density = self.params["density"] agrid = self.params["agrid"] - tau = self.system.time_step + tau = self.params["tau"] # check LB object self.assertAlmostEqual(lbf.tau, tau, delta=self.atol) self.assertAlmostEqual(lbf.agrid, agrid, delta=self.atol) self.assertAlmostEqual(lbf.kinematic_viscosity, 3., delta=self.atol) - self.assertAlmostEqual(lbf.density, 0.85, delta=self.atol) + self.assertAlmostEqual(lbf.density, density, delta=self.atol) self.assertAlmostEqual(lbf.kT, 1.0, delta=self.atol) self.assertEqual(lbf.seed, 42) self.assertEqual( @@ -136,6 +137,57 @@ def check_properties(self, lbf): lbf[0, 0, 0].velocity = [1, 2] with self.assertRaises(TypeError): lbf[0, 1].velocity = [1, 2, 3] + # check node setters update cached velocities (with precision loss) + lbnode = lbf[0, 0, 0] + lbnode.last_applied_force = [0., 0., 0.] + lbnode.velocity = [0., 0., 0.] + old_pop = np.copy(lbnode.population) + old_vel = np.copy(lbnode.velocity) + old_rho = np.copy(lbnode.density) + lbnode.velocity = [1., 2., 3.] + new_pop = np.copy(lbnode.population) + new_vel = np.copy(lbnode.velocity) + lbnode.population = old_pop + np.testing.assert_allclose( + np.copy(lbnode.velocity), old_vel, atol=self.atol * 20.) + lbnode.population = new_pop + np.testing.assert_allclose( + np.copy(lbnode.velocity), new_vel, atol=self.atol * 20.) + lbnode.population = old_pop + lbnode.last_applied_force = [0.4, 0.5, 0.6] + np.testing.assert_allclose( + np.copy(lbnode.velocity), + np.copy([0.4, 0.5, 0.6]) / (agrid / tau * old_rho / 2.), + atol=self.atol * 20.) + lbnode.last_applied_force = [0., 0., 0.] + lbnode.population = old_pop + # check slice setters update cached velocities (with precision loss) + lbslice = lbf[0:5, 0:5, 0:5] + lbslice.last_applied_force = [0., 0., 0.] + lbslice.velocity = [0., 0., 0.] + old_pop = np.copy(lbslice.population) + old_vel = np.copy(lbslice.velocity) + old_rho = np.copy(lbslice.density) + lbslice.velocity = [1., 2., 3.] + new_pop = np.copy(lbslice.population) + new_vel = np.copy(lbslice.velocity) + lbslice.population = old_pop + np.testing.assert_allclose( + np.copy(lbslice.velocity), old_vel, atol=self.atol * 100.) + lbslice.population = new_pop + np.testing.assert_allclose( + np.copy(lbslice.velocity), new_vel, atol=self.atol * 100.) + lbslice.population = old_pop + lbslice.last_applied_force = [0.4, 0.5, 0.6] + vel2force = 2. * tau / agrid + np.testing.assert_allclose( + np.copy(lbslice.velocity), + np.tile(np.copy([0.4, 0.5, 0.6]), (5, 5, 5, 1)) * vel2force / + np.tile(old_rho.reshape((5, 5, 5, 1)), (3,)), + atol=self.atol * 20.) + lbslice.last_applied_force = [0., 0., 0.] + lbslice.population = old_pop + # check node boundary conditions node = lbf[0, 0, 0] self.assertIsNone(node.boundary) self.assertIsNone(node.boundary_force) @@ -373,13 +425,13 @@ def test_lb_node_set_get(self): self.assertAlmostEqual(lbf[0, 0, 0].density, density, delta=1e-4) self.assertEqual(lbf[3, 2, 1].index, (3, 2, 1)) - ext_force_density = [0.1, 0.2, 1.2] - last_applied_force = [0.2, 0.4, 0.6] + ext_force_density = np.array([0.1, 0.2, 1.2]) + last_applied_force = np.array([0.2, 0.4, 0.6]) lbf.ext_force_density = ext_force_density node = lbf[1, 2, 3] node.velocity = v_fluid - node.last_applied_force = last_applied_force np.testing.assert_allclose(np.copy(node.velocity), v_fluid, atol=1e-4) + node.last_applied_force = last_applied_force np.testing.assert_allclose( np.copy(node.last_applied_force), last_applied_force, atol=1e-4) np.testing.assert_allclose( diff --git a/testsuite/python/lb_thermostat.py b/testsuite/python/lb_thermostat.py index 4f0ee2bb62..c67709dd98 100644 --- a/testsuite/python/lb_thermostat.py +++ b/testsuite/python/lb_thermostat.py @@ -204,6 +204,9 @@ def test_friction(self): @utx.skipIfMissingFeatures(["PARTICLE_ANISOTROPY", "THERMOSTAT_PER_PARTICLE"]) def test_exceptions(self): + with self.assertRaisesRegex(RuntimeError, r"set_lb\(\) got an unexpected keyword argument 'act_on_virtual'"): + self.system.thermostat.set_lb( + LB_fluid=self.lbf, act_on_virtual=False) self.system.part.add(pos=[0., 0., 0.], gamma=[1., 2., 3.], id=2) with self.assertRaisesRegex(Exception, r"ERROR: anisotropic particle \(id 2\) coupled to LB"): self.system.integrator.run(1) diff --git a/testsuite/python/test_checkpoint.py b/testsuite/python/test_checkpoint.py index b8d46c49a4..1ab0d748af 100644 --- a/testsuite/python/test_checkpoint.py +++ b/testsuite/python/test_checkpoint.py @@ -158,8 +158,6 @@ def test_lb_fluid(self): np.testing.assert_allclose(np.copy(node.velocity), slip_velocity1) for node in lbf[-1, :, :]: np.testing.assert_allclose(np.copy(node.velocity), slip_velocity2) - for node in lbf[2, :, :]: - np.testing.assert_allclose(np.copy(node.velocity), 0.) # remove boundaries lbf.clear_boundaries() np.testing.assert_equal(