From 7a91cfa355e25094f62220141f0d6ef66281858a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= Date: Fri, 14 Jun 2024 23:02:13 +0200 Subject: [PATCH] Redesign P3M grid operations Encapsulate P3M hot loops into lambdas executed via for_each_3d(). Regroup P3M-specific math functions into a single file. --- src/core/electrostatics/p3m.cpp | 243 ++++++------ src/core/electrostatics/p3m_gpu_cuda.cu | 8 +- src/core/electrostatics/p3m_gpu_error_cuda.cu | 55 +-- src/core/magnetostatics/dp3m.cpp | 358 ++++++++---------- src/core/p3m/common.cpp | 44 --- src/core/p3m/common.hpp | 12 +- src/core/p3m/for_each_3d.hpp | 70 ++++ src/core/p3m/influence_function.hpp | 115 +++--- src/core/p3m/influence_function_dipolar.hpp | 219 ++++++----- src/core/p3m/math.hpp | 151 ++++++++ src/core/unit_tests/CMakeLists.txt | 4 +- src/core/unit_tests/fft_test.cpp | 38 ++ src/core/unit_tests/math_test.cpp | 105 +++++ src/core/unit_tests/p3m_test.cpp | 30 +- src/utils/include/utils/Vector.hpp | 34 +- src/utils/include/utils/math/abs.hpp | 40 -- src/utils/include/utils/math/sinc.hpp | 61 --- src/utils/include/utils/math/sqr.hpp | 2 + src/utils/tests/CMakeLists.txt | 2 - src/utils/tests/Vector_test.cpp | 32 +- src/utils/tests/abs_test.cpp | 38 -- src/utils/tests/sinc_test.cpp | 39 -- 22 files changed, 877 insertions(+), 823 deletions(-) create mode 100644 src/core/p3m/for_each_3d.hpp create mode 100644 src/core/p3m/math.hpp create mode 100644 src/core/unit_tests/math_test.cpp delete mode 100644 src/utils/include/utils/math/abs.hpp delete mode 100644 src/utils/include/utils/math/sinc.hpp delete mode 100644 src/utils/tests/abs_test.cpp delete mode 100644 src/utils/tests/sinc_test.cpp diff --git a/src/core/electrostatics/p3m.cpp b/src/core/electrostatics/p3m.cpp index 15cb7028cbc..b16cb96f141 100644 --- a/src/core/electrostatics/p3m.cpp +++ b/src/core/electrostatics/p3m.cpp @@ -36,7 +36,9 @@ #include "p3m/TuningAlgorithm.hpp" #include "p3m/TuningLogger.hpp" #include "p3m/fft.hpp" +#include "p3m/for_each_3d.hpp" #include "p3m/influence_function.hpp" +#include "p3m/math.hpp" #include "BoxGeometry.hpp" #include "LocalBox.hpp" @@ -57,7 +59,6 @@ #include #include #include -#include #include #include @@ -76,9 +77,11 @@ #include #include #include +#include #include #include #include +#include #include void CoulombP3M::count_charged_particles() { @@ -115,7 +118,7 @@ void CoulombP3M::calc_influence_function_force() { auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh}; p3m.g_force = grid_influence_function<1>(p3m.params, start, start + size, - get_system().box_geo->length()); + get_system().box_geo->length_inv()); } /** Calculate the influence function optimized for the energy and the @@ -126,41 +129,38 @@ void CoulombP3M::calc_influence_function_energy() { auto const size = Utils::Vector3i{p3m.fft.plan[3].new_mesh}; p3m.g_energy = grid_influence_function<0>(p3m.params, start, start + size, - get_system().box_geo->length()); + get_system().box_geo->length_inv()); } /** Aliasing sum used by @ref p3m_k_space_error. */ -static auto p3m_tune_aliasing_sums(int nx, int ny, int nz, +static auto p3m_tune_aliasing_sums(Utils::Vector3i const &shift, Utils::Vector3i const &mesh, Utils::Vector3d const &mesh_i, int cao, double alpha_L_i) { - using Utils::sinc; + auto constexpr m_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN); + auto constexpr m_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1); auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i); - auto alias1 = 0.; auto alias2 = 0.; - for (int mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) { - auto const nmx = nx + mx * mesh[0]; - auto const fnmx = mesh_i[0] * nmx; - for (int my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) { - auto const nmy = ny + my * mesh[1]; - auto const fnmy = mesh_i[1] * nmy; - for (int mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) { - auto const nmz = nz + mz * mesh[2]; - auto const fnmz = mesh_i[2] * nmz; - - auto const nm2 = Utils::sqr(nmx) + Utils::sqr(nmy) + Utils::sqr(nmz); - auto const ex = exp(-factor1 * nm2); - auto const ex2 = Utils::sqr(ex); - - auto const U2 = pow(sinc(fnmx) * sinc(fnmy) * sinc(fnmz), 2.0 * cao); - - alias1 += ex2 / nm2; - alias2 += U2 * ex * (nx * nmx + ny * nmy + nz * nmz) / nm2; - } - } - } + + Utils::Vector3i indices{}; + Utils::Vector3i nm{}; + Utils::Vector3d fnm{}; + for_each_3d( + m_start, m_stop, indices, + [&]() { + auto const norm_sq = nm.norm2(); + auto const ex = exp(-factor1 * norm_sq); + auto const energy = std::pow(Utils::product(fnm), 2 * cao); + alias1 += Utils::sqr(ex) / norm_sq; + alias2 += energy * ex * (shift * nm) / norm_sq; + }, + [&](unsigned dim, int n) { + nm[dim] = shift[dim] + n * mesh[dim]; + fnm[dim] = math::sinc(nm[dim] * mesh_i[dim]); + }); + return std::make_pair(alias1, alias2); } @@ -198,51 +198,40 @@ static double p3m_real_space_error(double pref, double r_cut_iL, int n_c_part, static double p3m_k_space_error(double pref, Utils::Vector3i const &mesh, int cao, int n_c_part, double sum_q2, double alpha_L, Utils::Vector3d const &box_l) { - auto const mesh_i = - Utils::hadamard_division(Utils::Vector3d::broadcast(1.), mesh); + + auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao); + auto const mesh_i = 1. / Utils::Vector3d(mesh); auto const alpha_L_i = 1. / alpha_L; + auto const m_stop = mesh / 2; + auto const m_start = -m_stop; + auto indices = Utils::Vector3i{}; + auto values = Utils::Vector3d{}; auto he_q = 0.; - for (int nx = -mesh[0] / 2; nx < mesh[0] / 2; nx++) { - auto const ctan_x = p3m_analytic_cotangent_sum(nx, mesh_i[0], cao); - for (int ny = -mesh[1] / 2; ny < mesh[1] / 2; ny++) { - auto const ctan_y = - ctan_x * p3m_analytic_cotangent_sum(ny, mesh_i[1], cao); - for (int nz = -mesh[2] / 2; nz < mesh[2] / 2; nz++) { - if ((nx != 0) || (ny != 0) || (nz != 0)) { - auto const n2 = Utils::sqr(nx) + Utils::sqr(ny) + Utils::sqr(nz); - auto const cs = - p3m_analytic_cotangent_sum(nz, mesh_i[2], cao) * ctan_y; + for_each_3d( + m_start, m_stop, indices, + [&]() { + if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) { + auto const n2 = indices.norm2(); + auto const cs = Utils::product(values); auto const [alias1, alias2] = - p3m_tune_aliasing_sums(nx, ny, nz, mesh, mesh_i, cao, alpha_L_i); + p3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i); auto const d = alias1 - Utils::sqr(alias2 / cs) / n2; /* at high precision, d can become negative due to extinction; also, don't take values that have no significant digits left*/ - if (d > 0 && (fabs(d / alias1) > ROUND_ERROR_PREC)) + if (d > 0. and std::fabs(d / alias1) > ROUND_ERROR_PREC) { he_q += d; + } } - } - } - } + }, + [&values, &mesh_i, cotangent_sum](unsigned dim, int n) { + values[dim] = cotangent_sum(n, mesh_i[dim]); + }); + return 2. * pref * sum_q2 * sqrt(he_q / static_cast(n_c_part)) / (box_l[1] * box_l[2]); } -#ifdef CUDA -static double p3mgpu_k_space_error(double prefactor, - Utils::Vector3i const &mesh, int cao, - int npart, double sum_q2, double alpha_L, - Utils::Vector3d const &box_l) { - auto ks_err = 0.; - if (this_node == 0) { - ks_err = p3m_k_space_error_gpu(prefactor, mesh.data(), cao, npart, sum_q2, - alpha_L, box_l.data()); - } - boost::mpi::broadcast(comm_cart, ks_err, 0); - return ks_err; -} -#endif - void CoulombP3M::init() { assert(p3m.params.mesh >= Utils::Vector3i::broadcast(1)); assert(p3m.params.cao >= 1 and p3m.params.cao <= 7); @@ -422,48 +411,44 @@ CoulombP3M::long_range_pressure(ParticleRange const &particles) { p3m.sm.gather_grid(p3m.rs_mesh.data(), comm_cart, p3m.local_mesh.dim); fft_perform_forw(p3m.rs_mesh.data(), p3m.fft, comm_cart); - auto diagonal = 0.; - int ind = 0; - int j[3]; + auto constexpr m_start = Utils::Vector3i::broadcast(0); auto const half_alpha_inv_sq = Utils::sqr(1. / 2. / p3m.params.alpha); - for (j[0] = 0; j[0] < p3m.fft.plan[3].new_mesh[RX]; j[0]++) { - for (j[1] = 0; j[1] < p3m.fft.plan[3].new_mesh[RY]; j[1]++) { - for (j[2] = 0; j[2] < p3m.fft.plan[3].new_mesh[RZ]; j[2]++) { - auto const kx = 2. * std::numbers::pi * - p3m.d_op[RX][j[KX] + p3m.fft.plan[3].start[KX]] * - box_geo.length_inv()[RX]; - auto const ky = 2. * std::numbers::pi * - p3m.d_op[RY][j[KY] + p3m.fft.plan[3].start[KY]] * - box_geo.length_inv()[RY]; - auto const kz = 2. * std::numbers::pi * - p3m.d_op[RZ][j[KZ] + p3m.fft.plan[3].start[KZ]] * - box_geo.length_inv()[RZ]; - auto const sqk = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz); - - if (sqk != 0.) { - auto const node_k_space_energy = - p3m.g_energy[ind] * (Utils::sqr(p3m.rs_mesh[2 * ind]) + - Utils::sqr(p3m.rs_mesh[2 * ind + 1])); - auto const vterm = -2. * (1. / sqk + half_alpha_inv_sq); - auto const pref = node_k_space_energy * vterm; - node_k_space_pressure_tensor[0] += pref * kx * kx; /* sigma_xx */ - node_k_space_pressure_tensor[1] += pref * kx * ky; /* sigma_xy */ - node_k_space_pressure_tensor[2] += pref * kx * kz; /* sigma_xz */ - node_k_space_pressure_tensor[3] += pref * ky * kx; /* sigma_yx */ - node_k_space_pressure_tensor[4] += pref * ky * ky; /* sigma_yy */ - node_k_space_pressure_tensor[5] += pref * ky * kz; /* sigma_yz */ - node_k_space_pressure_tensor[6] += pref * kz * kx; /* sigma_zx */ - node_k_space_pressure_tensor[7] += pref * kz * ky; /* sigma_zy */ - node_k_space_pressure_tensor[8] += pref * kz * kz; /* sigma_zz */ - diagonal += node_k_space_energy; - } - ind++; - } + auto const m_stop = std::span(p3m.fft.plan[3].new_mesh); + auto const offset = std::span(p3m.fft.plan[3].start); + auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv(); + auto indices = Utils::Vector3i{}; + auto index = std::size_t(0u); + auto diagonal = 0.; + + for_each_3d(m_start, m_stop, indices, [&]() { + auto const kx = p3m.d_op[RX][indices[KX] + offset[KX]] * wavevector[RX]; + auto const ky = p3m.d_op[RY][indices[KY] + offset[KY]] * wavevector[RY]; + auto const kz = p3m.d_op[RZ][indices[KZ] + offset[KZ]] * wavevector[RZ]; + auto const norm_sq = Utils::sqr(kx) + Utils::sqr(ky) + Utils::sqr(kz); + + if (norm_sq != 0.) { + auto const node_k_space_energy = + p3m.g_energy[index] * (Utils::sqr(p3m.rs_mesh[2u * index + 0u]) + + Utils::sqr(p3m.rs_mesh[2u * index + 1u])); + auto const vterm = -2. * (1. / norm_sq + half_alpha_inv_sq); + auto const pref = node_k_space_energy * vterm; + node_k_space_pressure_tensor[0u] += pref * kx * kx; /* sigma_xx */ + node_k_space_pressure_tensor[1u] += pref * kx * ky; /* sigma_xy */ + node_k_space_pressure_tensor[2u] += pref * kx * kz; /* sigma_xz */ + node_k_space_pressure_tensor[4u] += pref * ky * ky; /* sigma_yy */ + node_k_space_pressure_tensor[5u] += pref * ky * kz; /* sigma_yz */ + node_k_space_pressure_tensor[8u] += pref * kz * kz; /* sigma_zz */ + diagonal += node_k_space_energy; } - } - node_k_space_pressure_tensor[0] += diagonal; - node_k_space_pressure_tensor[4] += diagonal; - node_k_space_pressure_tensor[8] += diagonal; + ++index; + }); + + node_k_space_pressure_tensor[0u] += diagonal; + node_k_space_pressure_tensor[4u] += diagonal; + node_k_space_pressure_tensor[8u] += diagonal; + node_k_space_pressure_tensor[3u] = node_k_space_pressure_tensor[1u]; + node_k_space_pressure_tensor[6u] = node_k_space_pressure_tensor[2u]; + node_k_space_pressure_tensor[7u] = node_k_space_pressure_tensor[5u]; } return node_k_space_pressure_tensor * prefactor / (2. * box_geo.volume()); @@ -510,35 +495,35 @@ double CoulombP3M::long_range_kernel(bool force_flag, bool energy_flag, /* === k-space force calculation === */ if (force_flag) { - /* sqrt(-1)*k differentiation */ - int j[3]; - int ind = 0; + /* i*k differentiation */ + auto constexpr m_start = Utils::Vector3i::broadcast(0); + auto const m_stop = std::span(p3m.fft.plan[3].new_mesh); + auto const offset = std::span(p3m.fft.plan[3].start); + auto const wavevector = (2. * std::numbers::pi) * box_geo.length_inv(); + auto indices = Utils::Vector3i{}; + auto index = std::size_t(0u); + /* compute electric field */ // Eq. (3.49) @cite deserno00b - for (j[0] = 0; j[0] < p3m.fft.plan[3].new_mesh[0]; j[0]++) { - for (j[1] = 0; j[1] < p3m.fft.plan[3].new_mesh[1]; j[1]++) { - for (j[2] = 0; j[2] < p3m.fft.plan[3].new_mesh[2]; j[2]++) { - auto const rho_hat = std::complex(p3m.rs_mesh[2 * ind + 0], - p3m.rs_mesh[2 * ind + 1]); - auto const phi_hat = p3m.g_force[ind] * rho_hat; - - for (int d = 0; d < 3; d++) { - /* direction in r-space: */ - int d_rs = (d + p3m.ks_pnum) % 3; - /* directions */ - auto const k = 2. * std::numbers::pi * - p3m.d_op[d_rs][j[d] + p3m.fft.plan[3].start[d]] * - box_geo.length_inv()[d_rs]; - - /* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */ - p3m.E_mesh[d_rs][2 * ind + 0] = -k * phi_hat.imag(); - p3m.E_mesh[d_rs][2 * ind + 1] = +k * phi_hat.real(); - } - - ind++; - } + for_each_3d(m_start, m_stop, indices, [&]() { + auto const rho_hat = std::complex(p3m.rs_mesh[2u * index + 0u], + p3m.rs_mesh[2u * index + 1u]); + auto const phi_hat = p3m.g_force[index] * rho_hat; + + for (int d = 0; d < 3; d++) { + /* direction in r-space: */ + int d_rs = (d + p3m.ks_pnum) % 3; + /* directions */ + auto const k = + p3m.d_op[d_rs][indices[d] + offset[d]] * wavevector[d_rs]; + + /* i*k*(Re+i*Im) = - Im*k + i*Re*k (i=sqrt(-1)) */ + p3m.E_mesh[d_rs][2u * index + 0u] = -k * phi_hat.imag(); + p3m.E_mesh[d_rs][2u * index + 1u] = +k * phi_hat.real(); } - } + + ++index; + }); /* Back FFT force component mesh */ auto const check_complex = !p3m.params.tuning and check_complex_residuals; @@ -679,8 +664,12 @@ class CoulombTuningAlgorithm : public TuningAlgorithm { #ifdef CUDA auto const &solver = m_system.coulomb.impl->solver; if (has_actor_of_type(solver)) { - ks_err = p3mgpu_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart, - p3m.sum_q2, alpha_L, box_geo.length()); + if (this_node == 0) { + ks_err = + p3m_k_space_error_gpu(m_prefactor, mesh.data(), cao, p3m.sum_qpart, + p3m.sum_q2, alpha_L, box_geo.length().data()); + } + boost::mpi::broadcast(comm_cart, ks_err, 0); } else #endif ks_err = p3m_k_space_error(m_prefactor, mesh, cao, p3m.sum_qpart, diff --git a/src/core/electrostatics/p3m_gpu_cuda.cu b/src/core/electrostatics/p3m_gpu_cuda.cu index a01b64b1496..c734ba0cccd 100644 --- a/src/core/electrostatics/p3m_gpu_cuda.cu +++ b/src/core/electrostatics/p3m_gpu_cuda.cu @@ -53,11 +53,11 @@ #include "electrostatics/p3m_gpu_cuda.cuh" #include "cuda/utils.cuh" +#include "p3m/math.hpp" #include "system/System.hpp" #include #include -#include #include #include @@ -185,15 +185,15 @@ __device__ void static Aliasing_sums_ik(const P3MGpuData p, int NX, int NY, for (MX = -P3M_BRILLOUIN; MX <= P3M_BRILLOUIN; MX++) { NMX = static_cast(((NX > p.mesh[0] / 2) ? NX - p.mesh[0] : NX) + p.mesh[0] * MX); - S1 = int_pow<2 * cao>(Utils::sinc(Meshi[0] * NMX)); + S1 = int_pow<2 * cao>(math::sinc(Meshi[0] * NMX)); for (MY = -P3M_BRILLOUIN; MY <= P3M_BRILLOUIN; MY++) { NMY = static_cast( ((NY > p.mesh[1] / 2) ? NY - p.mesh[1] : NY) + p.mesh[1] * MY); - S2 = S1 * int_pow<2 * cao>(Utils::sinc(Meshi[1] * NMY)); + S2 = S1 * int_pow<2 * cao>(math::sinc(Meshi[1] * NMY)); for (MZ = -P3M_BRILLOUIN; MZ <= P3M_BRILLOUIN; MZ++) { NMZ = static_cast( ((NZ > p.mesh[2] / 2) ? NZ - p.mesh[2] : NZ) + p.mesh[2] * MZ); - S3 = S2 * int_pow<2 * cao>(Utils::sinc(Meshi[2] * NMZ)); + S3 = S2 * int_pow<2 * cao>(math::sinc(Meshi[2] * NMZ)); NM2 = sqr(NMX * Leni[0]) + sqr(NMY * Leni[1]) + sqr(NMZ * Leni[2]); *Nenner += S3; diff --git a/src/core/electrostatics/p3m_gpu_error_cuda.cu b/src/core/electrostatics/p3m_gpu_error_cuda.cu index 6e54e2dfe7e..fbc51fecf4a 100644 --- a/src/core/electrostatics/p3m_gpu_error_cuda.cu +++ b/src/core/electrostatics/p3m_gpu_error_cuda.cu @@ -23,12 +23,12 @@ * The corresponding header file is p3m_gpu_error.hpp. */ +#include "p3m/math.hpp" #include "p3m_gpu_error.hpp" #include "cuda/utils.cuh" #include -#include #include #include @@ -44,49 +44,12 @@ #error CU-file includes mpi.h! This should not happen! #endif -using Utils::int_pow; -using Utils::sqr; - -/** @todo Extend to higher order. This comes from some 1/sin expansion in - * @cite hockney88a - */ - -template -__device__ static double p3m_analytic_cotangent_sum(int n, double mesh_i) { - const double c = sqr(cos(std::numbers::pi * mesh_i * n)); - - switch (cao) { - case 1: - return 1; - case 2: - return (1.0 + c * 2.0) / 3.0; - case 3: - return (2.0 + c * (11.0 + c * 2.0)) / 15.0; - case 4: - return (17.0 + c * (180.0 + c * (114.0 + c * 4.0))) / 315.0; - case 5: - return (62.0 + c * (1072.0 + c * (1452.0 + c * (247.0 + c * 2.0)))) / - 2835.0; - case 6: - return (1382.0 + - c * (35396.0 + - c * (83021.0 + c * (34096.0 + c * (2026.0 + c * 4.0))))) / - 155925.0; - case 7: - return (21844.0 + - c * (776661.0 + - c * (2801040.0 + - c * (2123860.0 + - c * (349500.0 + c * (8166.0 + c * 4.0)))))) / - 6081075.0; - } - - return 0.0; -} - template __global__ void p3m_k_space_error_gpu_kernel_ik(int3 mesh, double3 meshi, double alpha_L, double *he_q) { + using Utils::int_pow; + using Utils::sqr; + const int nx = -mesh.x / 2 + static_cast(blockDim.x * blockIdx.x + threadIdx.x); const int ny = @@ -103,14 +66,14 @@ __global__ void p3m_k_space_error_gpu_kernel_ik(int3 mesh, double3 meshi, if ((nx != 0) || (ny != 0) || (nz != 0)) { const double alpha_L_i = 1. / alpha_L; const double n2 = sqr(nx) + sqr(ny) + sqr(nz); - const double cs = p3m_analytic_cotangent_sum(nz, meshi.z) * - p3m_analytic_cotangent_sum(nx, meshi.x) * - p3m_analytic_cotangent_sum(ny, meshi.y); + const double cs = math::analytic_cotangent_sum(nz, meshi.z) * + math::analytic_cotangent_sum(nx, meshi.x) * + math::analytic_cotangent_sum(ny, meshi.y); const double ex = exp(-sqr(std::numbers::pi * alpha_L_i) * n2); const double ex2 = sqr(ex); const double U2 = - int_pow<2 * cao>(Utils::sinc(meshi.x * nx) * Utils::sinc(meshi.y * ny) * - Utils::sinc(meshi.z * nz)); + int_pow<2 * cao>(math::sinc(meshi.x * nx) * math::sinc(meshi.y * ny) * + math::sinc(meshi.z * nz)); auto const alias1 = ex2 / n2; auto const d = alias1 - sqr(U2 * ex / cs) / n2; diff --git a/src/core/magnetostatics/dp3m.cpp b/src/core/magnetostatics/dp3m.cpp index c5485a2ca1c..4214900fb52 100644 --- a/src/core/magnetostatics/dp3m.cpp +++ b/src/core/magnetostatics/dp3m.cpp @@ -37,6 +37,7 @@ #include "p3m/fft.hpp" #include "p3m/influence_function_dipolar.hpp" #include "p3m/interpolation.hpp" +#include "p3m/math.hpp" #include "p3m/send_mesh.hpp" #include "BoxGeometry.hpp" @@ -56,7 +57,6 @@ #include #include #include -#include #include #include @@ -67,10 +67,13 @@ #include #include #include +#include #include #include +#include #include #include +#include #include void DipolarP3M::count_magnetic_particles() { @@ -272,15 +275,13 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, dipole_assign(particles); /* Gather information for FFT grid inside the nodes domain (inner local * mesh) and perform forward 3D FFT (Charge Assignment Mesh). */ - std::array meshes = {{dp3m.rs_mesh_dip[0].data(), - dp3m.rs_mesh_dip[1].data(), - dp3m.rs_mesh_dip[2].data()}}; - + std::array meshes = {{dp3m.rs_mesh_dip[0u].data(), + dp3m.rs_mesh_dip[1u].data(), + dp3m.rs_mesh_dip[2u].data()}}; dp3m.sm.gather_grid(meshes, comm_cart, dp3m.local_mesh.dim); - - fft_perform_forw(dp3m.rs_mesh_dip[0].data(), dp3m.fft, comm_cart); - fft_perform_forw(dp3m.rs_mesh_dip[1].data(), dp3m.fft, comm_cart); - fft_perform_forw(dp3m.rs_mesh_dip[2].data(), dp3m.fft, comm_cart); + for (auto i = 0u; i < 3u; ++i) { + fft_perform_forw(dp3m.rs_mesh_dip[i].data(), dp3m.fft, comm_cart); + } // Note: after these calls, the grids are in the order yzx and not xyz // anymore!!! } @@ -293,34 +294,32 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, if (dp3m.sum_mu2 > 0.) { /* i*k differentiation for dipolar gradients: * |(\Fourier{\vect{mu}}(k)\cdot \vect{k})|^2 */ - int ind = 0; - int i = 0; - int j[3]; - double node_energy = 0.0; - for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { - for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { - for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { - node_energy += - dp3m.g_energy[i] * - (Utils::sqr( - dp3m.rs_mesh_dip[0][ind] * - dp3m.d_op[0][j[2] + dp3m.fft.plan[3].start[2]] + - dp3m.rs_mesh_dip[1][ind] * - dp3m.d_op[0][j[0] + dp3m.fft.plan[3].start[0]] + - dp3m.rs_mesh_dip[2][ind] * - dp3m.d_op[0][j[1] + dp3m.fft.plan[3].start[1]]) + - Utils::sqr( - dp3m.rs_mesh_dip[0][ind + 1] * - dp3m.d_op[0][j[2] + dp3m.fft.plan[3].start[2]] + - dp3m.rs_mesh_dip[1][ind + 1] * - dp3m.d_op[0][j[0] + dp3m.fft.plan[3].start[0]] + - dp3m.rs_mesh_dip[2][ind + 1] * - dp3m.d_op[0][j[1] + dp3m.fft.plan[3].start[1]])); - ind += 2; - i++; - } - } - } + + auto constexpr m_start = Utils::Vector3i::broadcast(0); + auto const m_stop = std::span(dp3m.fft.plan[3].new_mesh); + auto const offset = std::span(dp3m.fft.plan[3].start); + auto const &d_op = dp3m.d_op[0u]; + auto const &mesh_dip = dp3m.rs_mesh_dip; + auto indices = Utils::Vector3i{}; + auto index = std::size_t(0u); + auto it_energy = dp3m.g_energy.begin(); + auto node_energy = 0.; + for_each_3d(m_start, m_stop, indices, [&]() { + using namespace detail::FFT_indexing; + // Re(mu)*k + auto const re = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + + mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + + mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + ++index; + // Im(mu)*k + auto const im = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + + mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + + mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + ++index; + node_energy += *it_energy * (Utils::sqr(re) + Utils::sqr(im)); + std::advance(it_energy, 1); + }); + node_energy *= dipole_prefac * std::numbers::pi * box_geo.length_inv()[0]; boost::mpi::reduce(comm_cart, node_energy, energy, std::plus<>(), 0); @@ -344,62 +343,45 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, * DIPOLAR TORQUES (k-space) ****************************/ if (dp3m.sum_mu2 > 0.) { - auto const two_pi_L_i = 2. * std::numbers::pi * box_geo.length_inv()[0]; + auto const wavenumber = 2. * std::numbers::pi * box_geo.length_inv()[0u]; + auto constexpr m_start = Utils::Vector3i::broadcast(0); + auto const m_stop = std::span(dp3m.fft.plan[3].new_mesh); + auto const offset = std::span(dp3m.fft.plan[3].start); + auto const &d_op = dp3m.d_op[0u]; + auto const &mesh_dip = dp3m.rs_mesh_dip; + auto indices = Utils::Vector3i{}; + auto index = std::size_t(0u); + /* fill in ks_mesh array for torque calculation */ - int ind = 0; - int i = 0; - int j[3]; - double tmp0, tmp1; - - for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; - j[1]++) { // j[1]=n_z - for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; - j[2]++) { // j[2]=n_x - // tmp0 = Re(mu)*k, tmp1 = Im(mu)*k - - tmp0 = dp3m.rs_mesh_dip[0][ind] * - dp3m.d_op[0][j[2] + dp3m.fft.plan[3].start[2]] + - dp3m.rs_mesh_dip[1][ind] * - dp3m.d_op[0][j[0] + dp3m.fft.plan[3].start[0]] + - dp3m.rs_mesh_dip[2][ind] * - dp3m.d_op[0][j[1] + dp3m.fft.plan[3].start[1]]; - - tmp1 = dp3m.rs_mesh_dip[0][ind + 1] * - dp3m.d_op[0][j[2] + dp3m.fft.plan[3].start[2]] + - dp3m.rs_mesh_dip[1][ind + 1] * - dp3m.d_op[0][j[0] + dp3m.fft.plan[3].start[0]] + - dp3m.rs_mesh_dip[2][ind + 1] * - dp3m.d_op[0][j[1] + dp3m.fft.plan[3].start[1]]; - - /* the optimal influence function is the same for torques - and energy */ - dp3m.ks_mesh[ind] = tmp0 * dp3m.g_energy[i]; - dp3m.ks_mesh[ind + 1] = tmp1 * dp3m.g_energy[i]; - ind += 2; - i++; - } - } - } + auto it_energy = dp3m.g_energy.begin(); + index = 0u; + for_each_3d(m_start, m_stop, indices, [&]() { + using namespace detail::FFT_indexing; + // Re(mu)*k + auto const re = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + + mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + + mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + dp3m.ks_mesh[index] = *it_energy * re; + ++index; + // Im(mu)*k + auto const im = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + + mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + + mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + dp3m.ks_mesh[index] = *it_energy * im; + ++index; + std::advance(it_energy, 1); + }); /* Force component loop */ for (int d = 0; d < 3; d++) { - auto const d_rs = (d + dp3m.ks_pnum) % 3; - ind = 0; - for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { - for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; j[1]++) { - for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; j[2]++) { - dp3m.rs_mesh[ind] = - dp3m.d_op[0][j[d] + dp3m.fft.plan[3].start[d]] * - dp3m.ks_mesh[ind]; - ind++; - dp3m.rs_mesh[ind] = - dp3m.d_op[0][j[d] + dp3m.fft.plan[3].start[d]] * - dp3m.ks_mesh[ind]; - ind++; - } - } - } + index = 0u; + for_each_3d(m_start, m_stop, indices, [&]() { + auto const pos = indices[d] + offset[d]; + dp3m.rs_mesh[index] = d_op[pos] * dp3m.ks_mesh[index]; + ++index; + dp3m.rs_mesh[index] = d_op[pos] * dp3m.ks_mesh[index]; + ++index; + }); /* Back FFT force component mesh */ fft_perform_back(dp3m.rs_mesh.data(), false, dp3m.fft, comm_cart); @@ -407,94 +389,68 @@ double DipolarP3M::long_range_kernel(bool force_flag, bool energy_flag, dp3m.sm.spread_grid(dp3m.rs_mesh.data(), comm_cart, dp3m.local_mesh.dim); /* Assign force component from mesh to particle */ + auto const d_rs = (d + dp3m.ks_pnum) % 3; Utils::integral_parameter( - dp3m.params.cao, dp3m, dipole_prefac * two_pi_L_i, d_rs, particles); + dp3m.params.cao, dp3m, dipole_prefac * wavenumber, d_rs, particles); } /*************************** DIPOLAR FORCES (k-space) ****************************/ - // Compute forces after torques because the algorithm below overwrites the // grids dp3m.rs_mesh_dip ! // Note: I'll do here 9 inverse FFTs. By symmetry, we can reduce this // number to 6 ! /* fill in ks_mesh array for force calculation */ - ind = 0; - i = 0; - for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; - j[1]++) { // j[1]=n_z - for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; - j[2]++) { // j[2]=n_x - // tmp0 = Im(mu)*k, tmp1 = -Re(mu)*k - tmp0 = dp3m.rs_mesh_dip[0][ind + 1] * - dp3m.d_op[0][j[2] + dp3m.fft.plan[3].start[2]] + - dp3m.rs_mesh_dip[1][ind + 1] * - dp3m.d_op[0][j[0] + dp3m.fft.plan[3].start[0]] + - dp3m.rs_mesh_dip[2][ind + 1] * - dp3m.d_op[0][j[1] + dp3m.fft.plan[3].start[1]]; - tmp1 = dp3m.rs_mesh_dip[0][ind] * - dp3m.d_op[0][j[2] + dp3m.fft.plan[3].start[2]] + - dp3m.rs_mesh_dip[1][ind] * - dp3m.d_op[0][j[0] + dp3m.fft.plan[3].start[0]] + - dp3m.rs_mesh_dip[2][ind] * - dp3m.d_op[0][j[1] + dp3m.fft.plan[3].start[1]]; - dp3m.ks_mesh[ind] = tmp0 * dp3m.g_force[i]; - dp3m.ks_mesh[ind + 1] = -tmp1 * dp3m.g_force[i]; - ind += 2; - i++; - } - } - } + auto it_force = dp3m.g_force.begin(); + index = 0u; + for_each_3d(m_start, m_stop, indices, [&]() { + using namespace detail::FFT_indexing; + // Re(mu)*k + auto const re = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + + mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + + mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + ++index; + // Im(mu)*k + auto const im = mesh_dip[0u][index] * d_op[indices[KX] + offset[KX]] + + mesh_dip[1u][index] * d_op[indices[KY] + offset[KY]] + + mesh_dip[2u][index] * d_op[indices[KZ] + offset[KZ]]; + ++index; + dp3m.ks_mesh[index - 2] = *it_force * im; + dp3m.ks_mesh[index - 1] = *it_force * (-re); + std::advance(it_force, 1); + }); /* Force component loop */ - for (int d = 0; d < 3; d++) { /* direction in k-space: */ - auto const d_rs = (d + dp3m.ks_pnum) % 3; - ind = 0; - for (j[0] = 0; j[0] < dp3m.fft.plan[3].new_mesh[0]; - j[0]++) { // j[0]=n_y - for (j[1] = 0; j[1] < dp3m.fft.plan[3].new_mesh[1]; - j[1]++) { // j[1]=n_z - for (j[2] = 0; j[2] < dp3m.fft.plan[3].new_mesh[2]; - j[2]++) { // j[2]=n_x - tmp0 = dp3m.d_op[0][j[d] + dp3m.fft.plan[3].start[d]] * - dp3m.ks_mesh[ind]; - dp3m.rs_mesh_dip[0][ind] = - dp3m.d_op[0][j[2] + dp3m.fft.plan[3].start[2]] * tmp0; - dp3m.rs_mesh_dip[1][ind] = - dp3m.d_op[0][j[0] + dp3m.fft.plan[3].start[0]] * tmp0; - dp3m.rs_mesh_dip[2][ind] = - dp3m.d_op[0][j[1] + dp3m.fft.plan[3].start[1]] * tmp0; - ind++; - tmp0 = dp3m.d_op[0][j[d] + dp3m.fft.plan[3].start[d]] * - dp3m.ks_mesh[ind]; - dp3m.rs_mesh_dip[0][ind] = - dp3m.d_op[0][j[2] + dp3m.fft.plan[3].start[2]] * tmp0; - dp3m.rs_mesh_dip[1][ind] = - dp3m.d_op[0][j[0] + dp3m.fft.plan[3].start[0]] * tmp0; - dp3m.rs_mesh_dip[2][ind] = - dp3m.d_op[0][j[1] + dp3m.fft.plan[3].start[1]] * tmp0; - ind++; - } - } - } + for (int d = 0; d < 3; d++) { + auto &rs_mesh_dip = dp3m.rs_mesh_dip; + index = 0u; + for_each_3d(m_start, m_stop, indices, [&]() { + using namespace detail::FFT_indexing; + auto const f1 = d_op[indices[d] + offset[d]] * dp3m.ks_mesh[index]; + rs_mesh_dip[0][index] = d_op[indices[KX] + offset[KX]] * f1; + rs_mesh_dip[1][index] = d_op[indices[KY] + offset[KY]] * f1; + rs_mesh_dip[2][index] = d_op[indices[KZ] + offset[KZ]] * f1; + ++index; + auto const f2 = d_op[indices[d] + offset[d]] * dp3m.ks_mesh[index]; + rs_mesh_dip[0][index] = d_op[indices[KX] + offset[KX]] * f2; + rs_mesh_dip[1][index] = d_op[indices[KY] + offset[KY]] * f2; + rs_mesh_dip[2][index] = d_op[indices[KZ] + offset[KZ]] * f2; + ++index; + }); /* Back FFT force component mesh */ - fft_perform_back(dp3m.rs_mesh_dip[0].data(), false, dp3m.fft, - comm_cart); - fft_perform_back(dp3m.rs_mesh_dip[1].data(), false, dp3m.fft, - comm_cart); - fft_perform_back(dp3m.rs_mesh_dip[2].data(), false, dp3m.fft, - comm_cart); + for (auto i = 0u; i < 3u; ++i) { + fft_perform_back(rs_mesh_dip[i].data(), false, dp3m.fft, comm_cart); + } /* redistribute force component mesh */ - std::array meshes = {{dp3m.rs_mesh_dip[0].data(), - dp3m.rs_mesh_dip[1].data(), - dp3m.rs_mesh_dip[2].data()}}; - + std::array meshes = {{rs_mesh_dip[0].data(), + rs_mesh_dip[1].data(), + rs_mesh_dip[2].data()}}; dp3m.sm.spread_grid(meshes, comm_cart, dp3m.local_mesh.dim); /* Assign force component from mesh to particle */ + auto const d_rs = (d + dp3m.ks_pnum) % 3; Utils::integral_parameter( - dp3m.params.cao, dp3m, dipole_prefac * Utils::sqr(two_pi_L_i), d_rs, + dp3m.params.cao, dp3m, dipole_prefac * Utils::sqr(wavenumber), d_rs, particles); } } /* if (dp3m.sum_mu2 > 0) */ @@ -590,15 +546,15 @@ void DipolarP3M::calc_influence_function_force() { auto const size = Utils::Vector3i{dp3m.fft.plan[3].new_mesh}; dp3m.g_force = grid_influence_function<3>(dp3m.params, start, start + size, - get_system().box_geo->length()); + get_system().box_geo->length_inv()); } void DipolarP3M::calc_influence_function_energy() { auto const start = Utils::Vector3i{dp3m.fft.plan[3].start}; auto const size = Utils::Vector3i{dp3m.fft.plan[3].new_mesh}; - dp3m.g_energy = grid_influence_function<2>(dp3m.params, start, start + size, - get_system().box_geo->length()); + dp3m.g_energy = grid_influence_function<2>( + dp3m.params, start, start + size, get_system().box_geo->length_inv()); } class DipolarTuningAlgorithm : public TuningAlgorithm { @@ -749,62 +705,68 @@ void DipolarP3M::tune() { } /** Tuning dipolar-P3M */ -static auto dp3m_tune_aliasing_sums(int nx, int ny, int nz, int mesh, +static auto dp3m_tune_aliasing_sums(Utils::Vector3i const &shift, int mesh, double mesh_i, int cao, double alpha_L_i) { - using Utils::sinc; + auto constexpr m_start = Utils::Vector3i::broadcast(-P3M_BRILLOUIN); + auto constexpr m_stop = Utils::Vector3i::broadcast(P3M_BRILLOUIN + 1); auto const factor1 = Utils::sqr(std::numbers::pi * alpha_L_i); - auto alias1 = 0.; auto alias2 = 0.; - for (int mx = -P3M_BRILLOUIN; mx <= P3M_BRILLOUIN; mx++) { - auto const nmx = nx + mx * mesh; - auto const fnmx = mesh_i * nmx; - for (int my = -P3M_BRILLOUIN; my <= P3M_BRILLOUIN; my++) { - auto const nmy = ny + my * mesh; - auto const fnmy = mesh_i * nmy; - for (int mz = -P3M_BRILLOUIN; mz <= P3M_BRILLOUIN; mz++) { - auto const nmz = nz + mz * mesh; - auto const fnmz = mesh_i * nmz; - - auto const nm2 = Utils::sqr(nmx) + Utils::sqr(nmy) + Utils::sqr(nmz); - auto const ex = std::exp(-factor1 * nm2); - - auto const U2 = pow(sinc(fnmx) * sinc(fnmy) * sinc(fnmz), 2. * cao); - - alias1 += Utils::sqr(ex) * nm2; - alias2 += U2 * ex * pow((nx * nmx + ny * nmy + nz * nmz), 3.) / nm2; - } - } - } + + Utils::Vector3i indices{}; + Utils::Vector3i nm{}; + Utils::Vector3d fnm{}; + for_each_3d( + m_start, m_stop, indices, + [&]() { + auto const norm_sq = nm.norm2(); + auto const ex = std::exp(-factor1 * norm_sq); + auto const U2 = std::pow(Utils::product(fnm), 2 * cao); + alias1 += Utils::sqr(ex) * norm_sq; + alias2 += U2 * ex * std::pow(shift * nm, 3) / norm_sq; + }, + [&](unsigned dim, int n) { + nm[dim] = shift[dim] + n * mesh; + fnm[dim] = math::sinc(nm[dim] * mesh_i); + }); + return std::make_pair(alias1, alias2); } /** Calculate the k-space error of dipolar-P3M */ static double dp3m_k_space_error(double box_size, int mesh, int cao, int n_c_part, double sum_q2, double alpha_L) { - double he_q = 0.; - auto const mesh_i = 1. / mesh; - auto const alpha_L_i = 1. / alpha_L; - for (int nx = -mesh / 2; nx < mesh / 2; nx++) - for (int ny = -mesh / 2; ny < mesh / 2; ny++) - for (int nz = -mesh / 2; nz < mesh / 2; nz++) - if ((nx != 0) || (ny != 0) || (nz != 0)) { - auto const n2 = Utils::sqr(nx) + Utils::sqr(ny) + Utils::sqr(nz); - auto const cs = p3m_analytic_cotangent_sum(nx, mesh_i, cao) * - p3m_analytic_cotangent_sum(ny, mesh_i, cao) * - p3m_analytic_cotangent_sum(nz, mesh_i, cao); + auto const cotangent_sum = math::get_analytic_cotangent_sum_kernel(cao); + auto const mesh_i = 1. / static_cast(mesh); + auto const alpha_L_i = 1. / alpha_L; + auto const m_stop = Utils::Vector3i::broadcast(mesh / 2); + auto const m_start = -m_stop; + auto indices = Utils::Vector3i{}; + auto values = Utils::Vector3d{}; + auto he_q = 0.; + + for_each_3d( + m_start, m_stop, indices, + [&]() { + if ((indices[0] != 0) or (indices[1] != 0) or (indices[2] != 0)) { + auto const n2 = indices.norm2(); + auto const cs = Utils::product(values); auto const [alias1, alias2] = - dp3m_tune_aliasing_sums(nx, ny, nz, mesh, mesh_i, cao, alpha_L_i); + dp3m_tune_aliasing_sums(indices, mesh, mesh_i, cao, alpha_L_i); auto const d = alias1 - Utils::sqr(alias2 / cs) / Utils::int_pow<3>(static_cast(n2)); /* at high precision, d can become negative due to extinction; also, don't take values that have no significant digits left*/ - if (d > 0 && (fabs(d / alias1) > ROUND_ERROR_PREC)) + if (d > 0. and std::fabs(d / alias1) > ROUND_ERROR_PREC) he_q += d; } + }, + [&values, &mesh_i, cotangent_sum](unsigned dim, int n) { + values[dim] = cotangent_sum(n, mesh_i); + }); return 8. * Utils::sqr(std::numbers::pi) / 3. * sum_q2 * sqrt(he_q / n_c_part) / Utils::int_pow<4>(box_size); diff --git a/src/core/p3m/common.cpp b/src/core/p3m/common.cpp index bc87e99a6a8..fb8023eba74 100644 --- a/src/core/p3m/common.cpp +++ b/src/core/p3m/common.cpp @@ -28,52 +28,8 @@ #include "LocalBox.hpp" #include -#include #include -#include -#include - -double p3m_analytic_cotangent_sum(int n, double mesh_i, int cao) { - auto const c = - Utils::sqr(std::cos(std::numbers::pi * mesh_i * static_cast(n))); - - switch (cao) { - case 1: { - return 1.0; - } - case 2: { - return (1.0 + c * 2.0) / 3.0; - } - case 3: { - return (2.0 + c * (11.0 + c * 2.0)) / 15.0; - } - case 4: { - return (17.0 + c * (180.0 + c * (114.0 + c * 4.0))) / 315.0; - } - case 5: { - return (62.0 + c * (1072.0 + c * (1452.0 + c * (247.0 + c * 2.0)))) / - 2835.0; - } - case 6: { - return (1382.0 + - c * (35396.0 + - c * (83021.0 + c * (34096.0 + c * (2026.0 + c * 4.0))))) / - 155925.0; - } - case 7: { - return (21844.0 + - c * (776661.0 + - c * (2801040.0 + - c * (2123860.0 + - c * (349500.0 + c * (8166.0 + c * 4.0)))))) / - 6081075.0; - } - default: { - throw std::logic_error("Invalid value cao=" + std::to_string(cao)); - } - } -} void P3MLocalMesh::calc_local_ca_mesh(P3MParameters const ¶ms, LocalBox const &local_geo, double skin, diff --git a/src/core/p3m/common.hpp b/src/core/p3m/common.hpp index e5a42993755..6bc30828ee5 100644 --- a/src/core/p3m/common.hpp +++ b/src/core/p3m/common.hpp @@ -38,6 +38,7 @@ #include +#include #include #include @@ -49,6 +50,7 @@ auto constexpr P3M_EPSILON_METALLIC = 0.0; #include "LocalBox.hpp" #include +#include #include #include @@ -226,16 +228,6 @@ struct P3MLocalMesh { double space_layer); }; -/** One of the aliasing sums used to compute k-space errors. - * Fortunately the one which is most important (because it converges - * most slowly, since it is not damped exponentially) can be - * calculated analytically. The result (which depends on the order of - * the spline interpolation) can be written as an even trigonometric - * polynomial. The results are tabulated here (the employed formula - * is eq. (7.66) in @cite hockney88a). - */ -double p3m_analytic_cotangent_sum(int n, double mesh_i, int cao); - #endif /* P3M || DP3M */ namespace detail { diff --git a/src/core/p3m/for_each_3d.hpp b/src/core/p3m/for_each_3d.hpp new file mode 100644 index 00000000000..9f44871bd54 --- /dev/null +++ b/src/core/p3m/for_each_3d.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2024 The ESPResSo project + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include + +namespace detail { + +constexpr inline void noop_projector(unsigned, int) {} + +template +concept IndexVectorConcept = requires(T vector) { + { vector[0] } -> std::convertible_to; +}; + +} // namespace detail + +/** + * @brief Repeat an operation on every element of a 3D grid. + * + * Intermediate values that depend on the iterated coordinates + * are calculated and stored once per iteration. This is useful + * when the operation is costly. + * + * @param start Initial values for the loop counters. + * @param stop Final values (one-past-the-end) for the loop counters. + * @param counters Loop counters. + * @param kernel Functor to execute. + * @param projector Projection of the current loop counter. + * @tparam Kernel Nullary function. + * @tparam Projector Binary function that takes a nesting depth and a loop + * counter as arguments and projects a value. + */ +template + requires std::invocable and std::invocable +void for_each_3d(detail::IndexVectorConcept auto &&start, + detail::IndexVectorConcept auto &&stop, + detail::IndexVectorConcept auto &&counters, Kernel &&kernel, + Projector &&projector = detail::noop_projector) { + auto &nx = counters[0u]; + auto &ny = counters[1u]; + auto &nz = counters[2u]; + for (nx = start[0u]; nx < stop[0u]; ++nx) { + projector(0u, nx); + for (ny = start[1u]; ny < stop[1u]; ++ny) { + projector(1u, ny); + for (nz = start[2u]; nz < stop[2u]; ++nz) { + projector(2u, nz); + kernel(); + } + } + } +} diff --git a/src/core/p3m/influence_function.hpp b/src/core/p3m/influence_function.hpp index 9c322c23506..24853d43e25 100644 --- a/src/core/p3m/influence_function.hpp +++ b/src/core/p3m/influence_function.hpp @@ -16,15 +16,16 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef ESPRESSO_P3M_INFLUENCE_FUNCTION_HPP -#define ESPRESSO_P3M_INFLUENCE_FUNCTION_HPP + +#pragma once #include "p3m/common.hpp" +#include "p3m/for_each_3d.hpp" +#include "p3m/math.hpp" #include #include #include -#include #include #include @@ -52,44 +53,43 @@ template double G_opt(int cao, double alpha, Utils::Vector3d const &k, Utils::Vector3d const &h) { - using namespace detail::FFT_indexing; - using Utils::int_pow; - using Utils::sinc; - - auto constexpr two_pi = 2. * std::numbers::pi; - auto constexpr two_pi_i = 1. / two_pi; - auto constexpr limit = 30.; auto const k2 = k.norm2(); - if (k2 == 0.0) { - return 0.0; + if (k2 == 0.) { + return 0.; } - double numerator = 0.0; - double denominator = 0.0; - - for (int mx = -m; mx <= m; mx++) { - for (int my = -m; my <= m; my++) { - for (int mz = -m; mz <= m; mz++) { - auto const km = - k + two_pi * Utils::Vector3d{mx / h[RX], my / h[RY], mz / h[RZ]}; - auto const U2 = std::pow(sinc(km[RX] * h[RX] * two_pi_i) * - sinc(km[RY] * h[RY] * two_pi_i) * - sinc(km[RZ] * h[RZ] * two_pi_i), - 2 * cao); - + auto constexpr limit = 30.; + auto constexpr m_start = Utils::Vector3i::broadcast(-m); + auto constexpr m_stop = Utils::Vector3i::broadcast(m + 1); + auto const exponent_prefactor = Utils::sqr(1. / (2. * alpha)); + auto const wavevector = (2. * std::numbers::pi) / h; + auto const wavevector_i = 1. / wavevector; + auto indices = Utils::Vector3i{}; + auto km = Utils::Vector3d{}; + auto fnm = Utils::Vector3d{}; + auto numerator = 0.; + auto denominator = 0.; + + for_each_3d( + m_start, m_stop, indices, + [&]() { + using namespace detail::FFT_indexing; + auto const U2 = std::pow(Utils::product(fnm), 2 * cao); auto const km2 = km.norm2(); - auto const exponent = Utils::sqr(1. / (2. * alpha)) * km2; + auto const exponent = exponent_prefactor * km2; if (exponent < limit) { auto const f3 = std::exp(-exponent) * (4. * std::numbers::pi / km2); - numerator += U2 * f3 * int_pow(k * km); + numerator += U2 * f3 * Utils::int_pow(k * km); } denominator += U2; - } - } - } + }, + [&](unsigned dim, int n) { + km[dim] = k[dim] + n * wavevector[dim]; + fnm[dim] = math::sinc(km[dim] * wavevector_i[dim]); + }); - return numerator / (int_pow(k2) * Utils::sqr(denominator)); + return numerator / (Utils::int_pow(k2) * Utils::sqr(denominator)); } /** @@ -104,20 +104,18 @@ double G_opt(int cao, double alpha, Utils::Vector3d const &k, * * @param params P3M parameters * @param n_start Lower left corner of the grid - * @param n_end Upper right corner of the grid. - * @param box_l Box size + * @param n_stop Upper right corner of the grid. + * @param inv_box_l Inverse box length * @return Values of G_opt at regular grid points. */ template std::vector grid_influence_function(const P3MParameters ¶ms, const Utils::Vector3i &n_start, - const Utils::Vector3i &n_end, - const Utils::Vector3d &box_l) { - using namespace detail::FFT_indexing; + const Utils::Vector3i &n_stop, + const Utils::Vector3d &inv_box_l) { auto const shifts = detail::calc_meshift(params.mesh); - - auto const size = n_end - n_start; + auto const size = n_stop - n_start; /* The influence function grid */ auto g = std::vector(Utils::product(size), 0.); @@ -128,31 +126,24 @@ std::vector grid_influence_function(const P3MParameters ¶ms, return g; } - auto const h = Utils::Vector3d{params.a}; - - Utils::Vector3i n{}; - for (n[0] = n_start[0]; n[0] < n_end[0]; n[0]++) { - for (n[1] = n_start[1]; n[1] < n_end[1]; n[1]++) { - for (n[2] = n_start[2]; n[2] < n_end[2]; n[2]++) { - auto const ind = Utils::get_linear_index(n - n_start, size, - Utils::MemoryOrder::ROW_MAJOR); - if ((n[KX] % (params.mesh[RX] / 2) == 0) && - (n[KY] % (params.mesh[RY] / 2) == 0) && - (n[KZ] % (params.mesh[RZ] / 2) == 0)) { - g[ind] = 0.0; - } else { - auto const k = 2. * std::numbers::pi * - Utils::Vector3d{shifts[RX][n[KX]] / box_l[RX], - shifts[RY][n[KY]] / box_l[RY], - shifts[RZ][n[KZ]] / box_l[RZ]}; - - g[ind] = G_opt(params.cao, params.alpha, k, h); - } - } + auto const wavevector = (2. * std::numbers::pi) * inv_box_l; + auto const half_mesh = params.mesh / 2; + auto indices = Utils::Vector3i{}; + auto index = std::size_t(0u); + + for_each_3d(n_start, n_stop, indices, [&]() { + using namespace detail::FFT_indexing; + if ((indices[KX] % half_mesh[RX] != 0) or + (indices[KY] % half_mesh[RY] != 0) or + (indices[KZ] % half_mesh[RZ] != 0)) { + auto const k = + Utils::Vector3d{{shifts[RX][indices[KX]] * wavevector[RX], + shifts[RY][indices[KY]] * wavevector[RY], + shifts[RZ][indices[KZ]] * wavevector[RZ]}}; + g[index] = G_opt(params.cao, params.alpha, k, params.a); } - } + ++index; + }); return g; } - -#endif diff --git a/src/core/p3m/influence_function_dipolar.hpp b/src/core/p3m/influence_function_dipolar.hpp index e327846d20e..d52a1a6d675 100644 --- a/src/core/p3m/influence_function_dipolar.hpp +++ b/src/core/p3m/influence_function_dipolar.hpp @@ -18,19 +18,19 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef ESPRESSO_CORE_P3M_INFLUENCE_FUNCTION_DIPOLAR_HPP -#define ESPRESSO_CORE_P3M_INFLUENCE_FUNCTION_DIPOLAR_HPP + +#pragma once #include "config/config.hpp" #if defined(DP3M) #include "p3m/common.hpp" +#include "p3m/for_each_3d.hpp" +#include "p3m/math.hpp" #include -#include #include -#include #include #include @@ -54,40 +54,41 @@ template double G_opt_dipolar(P3MParameters const ¶ms, Utils::Vector3i const &shift, Utils::Vector3i const &d_op) { - using Utils::int_pow; - using Utils::sinc; + auto constexpr limit = P3M_BRILLOUIN; auto constexpr exp_limit = 30.; - auto const exponent = 2. * params.cao; - + auto constexpr m_start = Utils::Vector3i::broadcast(-limit); + auto constexpr m_stop = Utils::Vector3i::broadcast(limit + 1); + auto const cao = params.cao; + auto const mesh = params.mesh[0]; + auto const offset = + static_cast(shift) / static_cast(mesh); + auto const f2 = Utils::sqr(std::numbers::pi / params.alpha_L); + auto indices = Utils::Vector3i{}; + auto nm = Utils::Vector3i{}; + auto fnm = Utils::Vector3d{}; auto numerator = 0.; auto denominator = 0.; - auto const f1 = 1. / static_cast(params.mesh[0]); - auto const f2 = Utils::sqr(std::numbers::pi / params.alpha_L); - - for (int mx = -limit; mx <= limit; mx++) { - auto const nmx = shift[0] + params.mesh[0] * mx; - auto const sx = std::pow(sinc(f1 * nmx), exponent); - for (int my = -limit; my <= limit; my++) { - auto const nmy = shift[1] + params.mesh[0] * my; - auto const sy = sx * std::pow(sinc(f1 * nmy), exponent); - for (int mz = -limit; mz <= limit; mz++) { - auto const nmz = shift[2] + params.mesh[0] * mz; - auto const sz = sy * std::pow(sinc(f1 * nmz), exponent); - auto const nm2 = Utils::sqr(nmx) + Utils::sqr(nmy) + Utils::sqr(nmz); - auto const exp_term = f2 * nm2; + for_each_3d( + m_start, m_stop, indices, + [&]() { + auto const norm_sq = nm.norm2(); + auto const sz = std::pow(Utils::product(fnm), 2 * cao); + auto const exp_term = f2 * norm_sq; if (exp_term < exp_limit) { - auto const f3 = sz * std::exp(-exp_term) / nm2; - auto const n_nm = d_op[0] * nmx + d_op[1] * nmy + d_op[2] * nmz; - numerator += f3 * int_pow(n_nm); + auto const f3 = sz * std::exp(-exp_term) / norm_sq; + numerator += f3 * Utils::int_pow(d_op * nm); } denominator += sz; - } - } - } - return numerator / (int_pow(static_cast(d_op.norm2())) * - Utils::sqr(denominator)); + }, + [&](unsigned dim, int n) { + nm[dim] = shift[dim] + n * mesh; + fnm[dim] = math::sinc(offset[dim] + n * mesh); + }); + + return numerator / (Utils::int_pow(static_cast(d_op.norm2())) * + Utils::int_pow<2>(denominator)); } /** @@ -100,17 +101,17 @@ double G_opt_dipolar(P3MParameters const ¶ms, Utils::Vector3i const &shift, * * @param params DP3M parameters * @param n_start Lower left corner of the grid - * @param n_end Upper right corner of the grid. - * @param box_l Box size + * @param n_stop Upper right corner of the grid. + * @param inv_box_l Inverse box length * @return Values of the influence function at regular grid points. */ template std::vector grid_influence_function(P3MParameters const ¶ms, Utils::Vector3i const &n_start, - Utils::Vector3i const &n_end, - Utils::Vector3d const &box_l) { + Utils::Vector3i const &n_stop, + Utils::Vector3d const &inv_box_l) { - auto const size = n_end - n_start; + auto const size = n_stop - n_start; /* The influence function grid */ auto g = std::vector(Utils::product(size), 0.); @@ -121,61 +122,56 @@ std::vector grid_influence_function(P3MParameters const ¶ms, return g; } - double fak1 = Utils::int_pow<3>(static_cast(params.mesh[0])) * 2.0 / - Utils::sqr(box_l[0]); - - auto const shifts = detail::calc_meshift(params.mesh, false); - auto const d_ops = detail::calc_meshift(params.mesh, true); - - Utils::Vector3i n{}; - for (n[0] = n_start[0]; n[0] < n_end[0]; n[0]++) { - for (n[1] = n_start[1]; n[1] < n_end[1]; n[1]++) { - for (n[2] = n_start[2]; n[2] < n_end[2]; n[2]++) { - auto const ind = Utils::get_linear_index(n - n_start, size, - Utils::MemoryOrder::ROW_MAJOR); - - if (((n[0] % (params.mesh[0] / 2) == 0) && - (n[1] % (params.mesh[0] / 2) == 0) && - (n[2] % (params.mesh[0] / 2) == 0))) { - g[ind] = 0.0; - } else { - auto const shift = Utils::Vector3i{shifts[0][n[0]], shifts[0][n[1]], - shifts[0][n[2]]}; - auto const d_op = - Utils::Vector3i{d_ops[0][n[0]], d_ops[0][n[1]], d_ops[0][n[2]]}; - auto const fak2 = G_opt_dipolar(params, shift, d_op); - g[ind] = fak1 * fak2; + auto prefactor = Utils::int_pow<3>(static_cast(params.mesh[0])) * 2. * + Utils::int_pow<2>(inv_box_l[0]); + + auto const offset = detail::calc_meshift(params.mesh, false)[0]; + auto const d_op = detail::calc_meshift(params.mesh, true)[0]; + auto const half_mesh = params.mesh[0] / 2; + auto indices = Utils::Vector3i{}; + auto shift_off = Utils::Vector3i{}; + auto d_op_off = Utils::Vector3i{}; + auto index = std::size_t(0u); + + for_each_3d( + n_start, n_stop, indices, + [&]() { + if (((indices[0] % half_mesh != 0) or (indices[1] % half_mesh != 0) or + (indices[2] % half_mesh != 0))) { + g[index] = prefactor * G_opt_dipolar(params, shift_off, d_op_off); } - } - } - } + ++index; + }, + [&](unsigned dim, int n) { + d_op_off[dim] = d_op[n]; + shift_off[dim] = offset[n]; + }); + return g; } inline double G_opt_dipolar_self_energy(P3MParameters const ¶ms, Utils::Vector3i const &shift) { - using Utils::sinc; + auto constexpr limit = P3M_BRILLOUIN + 1; - auto const exponent = 2. * params.cao; - - auto u_sum = 0.0; - - auto const f1 = 1.0 / static_cast(params.mesh[0]); - - for (int mx = -limit; mx <= limit; mx++) { - auto const nmx = shift[0] + params.mesh[0] * mx; - auto const sx = std::pow(sinc(f1 * nmx), exponent); - for (int my = -limit; my <= limit; my++) { - auto const nmy = shift[1] + params.mesh[0] * my; - auto const sy = sx * std::pow(sinc(f1 * nmy), exponent); - for (int mz = -limit; mz <= limit; mz++) { - auto const nmz = shift[2] + params.mesh[0] * mz; - auto const sz = sy * std::pow(sinc(f1 * nmz), exponent); - u_sum += sz; - } - } - } - return u_sum; + auto constexpr m_start = Utils::Vector3i::broadcast(-limit); + auto constexpr m_stop = Utils::Vector3i::broadcast(limit + 1); + auto const cao = params.cao; + auto const mesh = params.mesh[0]; + auto const offset = + static_cast(shift) / static_cast(mesh); + auto indices = Utils::Vector3i{}; + auto fnm = Utils::Vector3d{}; + auto energy = 0.; + + for_each_3d( + m_start, m_stop, indices, + [&]() { energy += std::pow(Utils::product(fnm), 2 * cao); }, + [&](unsigned dim, int n) { + fnm[dim] = math::sinc(offset[dim] + n * mesh); + }); + + return energy; } /** @@ -183,42 +179,39 @@ inline double G_opt_dipolar_self_energy(P3MParameters const ¶ms, * * @param params DP3M parameters * @param n_start Lower left corner of the grid - * @param n_end Upper right corner of the grid. + * @param n_stop Upper right corner of the grid. * @param g Energies on the grid. * @return Total self-energy. */ inline double grid_influence_function_self_energy( P3MParameters const ¶ms, Utils::Vector3i const &n_start, - Utils::Vector3i const &n_end, std::vector const &g) { - auto const size = n_end - n_start; - - auto const shifts = detail::calc_meshift(params.mesh, false); - auto const d_ops = detail::calc_meshift(params.mesh, true); - - double energy = 0.0; - Utils::Vector3i n{}; - for (n[0] = n_start[0]; n[0] < n_end[0]; n[0]++) { - for (n[1] = n_start[1]; n[1] < n_end[1]; n[1]++) { - for (n[2] = n_start[2]; n[2] < n_end[2]; n[2]++) { - if (((n[0] % (params.mesh[0] / 2) == 0) && - (n[1] % (params.mesh[0] / 2) == 0) && - (n[2] % (params.mesh[0] / 2) == 0))) { - energy += 0.0; - } else { - auto const ind = Utils::get_linear_index( - n - n_start, size, Utils::MemoryOrder::ROW_MAJOR); - auto const shift = Utils::Vector3i{shifts[0][n[0]], shifts[0][n[1]], - shifts[0][n[2]]}; - auto const d_op = - Utils::Vector3i{d_ops[0][n[0]], d_ops[0][n[1]], d_ops[0][n[2]]}; - auto const U2 = G_opt_dipolar_self_energy(params, shift); - energy += g[ind] * U2 * d_op.norm2(); + Utils::Vector3i const &n_stop, std::vector const &g) { + + auto const offset = detail::calc_meshift(params.mesh, false)[0]; + auto const d_op = detail::calc_meshift(params.mesh, true)[0]; + auto const half_mesh = params.mesh[0] / 2; + auto indices = Utils::Vector3i{}; + auto shift_off = Utils::Vector3i{}; + auto d_op_off = Utils::Vector3i{}; + auto index = std::size_t(0u); + auto energy = 0.; + + for_each_3d( + n_start, n_stop, indices, + [&]() { + if (((indices[0] % half_mesh != 0) or (indices[1] % half_mesh != 0) or + (indices[2] % half_mesh != 0))) { + auto const U2 = G_opt_dipolar_self_energy(params, shift_off); + energy += g[index] * U2 * d_op_off.norm2(); } - } - } - } + ++index; + }, + [&](unsigned dim, int n) { + d_op_off[dim] = d_op[n]; + shift_off[dim] = offset[n]; + }); + return energy; } -#endif -#endif +#endif // defined(DP3M) diff --git a/src/core/p3m/math.hpp b/src/core/p3m/math.hpp new file mode 100644 index 00000000000..2da9b38dd19 --- /dev/null +++ b/src/core/p3m/math.hpp @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2010-2024 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group + * + * This file is part of ESPResSo. + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#pragma once + +#include +#include + +#ifndef __CUDACC__ +#include +#endif +#include +#include +#include + +namespace math { + +/** @brief Return the absolute value of x. */ +inline DEVICE_QUALIFIER auto abs(double x) { return fabs(x); } + +/** @brief Return the absolute value of x. */ +inline DEVICE_QUALIFIER auto abs(float x) { return fabsf(x); } + +/** + * @brief Calculate the function @f$ \mathrm{sinc}(x) = \sin(\pi x)/(\pi x) @f$. + * + * (same convention as in @cite hockney88a). In order to avoid divisions + * by 0, arguments whose modulus is smaller than @f$ \epsilon = 0.1 @f$ + * are evaluated by an 8th order Taylor expansion. + * Note that the difference between sinc(x) and this expansion + * is smaller than 0.235e-12, if x is smaller than @f$ \epsilon @f$. + * (The next term in the expansion is the 10th order contribution, i.e. + * @f$ \pi^{10} x^{10}/39916800 \approx 0.2346 \cdot x^{12} @f$). + */ +template DEVICE_QUALIFIER auto sinc(T x) { + auto constexpr epsilon = T(0.1); +#if not defined(__CUDACC__) + using std::sin; +#endif + + auto const pix = std::numbers::pi_v * x; + + if (::math::abs(x) > epsilon) + return sin(pix) / pix; + + auto constexpr factorial = [](int n) consteval { + int acc{1}, c{1}; + while (c < n) { + acc *= ++c; + } + return acc; + }; + + /* Coefficients of the Taylor expansion of sinc */ + auto constexpr c0 = T(+1) / T(factorial(1)); + auto constexpr c2 = T(-1) / T(factorial(3)); + auto constexpr c4 = T(+1) / T(factorial(5)); + auto constexpr c6 = T(-1) / T(factorial(7)); + auto constexpr c8 = T(+1) / T(factorial(9)); + + auto const pix2 = pix * pix; + return c0 + pix2 * (c2 + pix2 * (c4 + pix2 * (c6 + pix2 * c8))); +} + +/** + * @brief One of the aliasing sums used to compute k-space errors. + * Fortunately the one which is most important (because it converges + * most slowly, since it is not damped exponentially) can be calculated + * analytically. The result (which depends on the order of the spline + * interpolation) can be written as an even trigonometric polynomial. + * The results are tabulated here (the employed formula is eq. 7-66 + * p. 233 in @cite hockney88a). + */ +template +DEVICE_QUALIFIER auto analytic_cotangent_sum(int n, double mesh_i) { + static_assert(cao >= 1 and cao <= 7); +#if not defined(__CUDACC__) + using std::cos; +#endif + auto const theta = static_cast(n) * mesh_i * std::numbers::pi; + auto const c = Utils::sqr(cos(theta)); + + if constexpr (cao == 1) { + return 1.; + } + if constexpr (cao == 2) { + return (1. + c * 2.) / 3.; + } + if constexpr (cao == 3) { + return (2. + c * (11. + c * 2.)) / 15.; + } + if constexpr (cao == 4) { + return (17. + c * (180. + c * (114. + c * 4.))) / 315.; + } + if constexpr (cao == 5) { + return (62. + c * (1072. + c * (1452. + c * (247. + c * 2.)))) / 2835.; + } + if constexpr (cao == 6) { + return (1382. + + c * (35396. + c * (83021. + c * (34096. + c * (2026. + c * 4.))))) / + 155925.; + } + return (21844. + + c * (776661. + + c * (2801040. + + c * (2123860. + c * (349500. + c * (8166. + c * 4.)))))) / + 6081075.; +} + +inline auto get_analytic_cotangent_sum_kernel(int cao) { + decltype(&analytic_cotangent_sum<1>) ptr = nullptr; + if (cao == 1) { + ptr = &analytic_cotangent_sum<1>; + } else if (cao == 2) { + ptr = &analytic_cotangent_sum<2>; + } else if (cao == 3) { + ptr = &analytic_cotangent_sum<3>; + } else if (cao == 4) { + ptr = &analytic_cotangent_sum<4>; + } else if (cao == 5) { + ptr = &analytic_cotangent_sum<5>; + } else if (cao == 6) { + ptr = &analytic_cotangent_sum<6>; + } else if (cao == 7) { + ptr = &analytic_cotangent_sum<7>; + } + if (ptr == nullptr) { + throw std::logic_error("Invalid value cao=" + std::to_string(cao)); + } + return ptr; +} + +} // namespace math diff --git a/src/core/unit_tests/CMakeLists.txt b/src/core/unit_tests/CMakeLists.txt index d843da0a636..60fcadae764 100644 --- a/src/core/unit_tests/CMakeLists.txt +++ b/src/core/unit_tests/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2010-2022 The ESPResSo project +# Copyright (C) 2010-2024 The ESPResSo project # Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 # Max-Planck-Institute for Polymer Research, Theory Group # @@ -94,6 +94,8 @@ if(ESPRESSO_BUILD_WITH_FFTW) espresso::core) unit_test(NAME fft_test SRC fft_test.cpp DEPENDS espresso::utils espresso::core) + unit_test(NAME math_test SRC math_test.cpp DEPENDS espresso::utils + espresso::core) endif() if(ESPRESSO_BUILD_WITH_CUDA) diff --git a/src/core/unit_tests/fft_test.cpp b/src/core/unit_tests/fft_test.cpp index 74ce5445867..6e301a48da2 100644 --- a/src/core/unit_tests/fft_test.cpp +++ b/src/core/unit_tests/fft_test.cpp @@ -27,6 +27,7 @@ #include #include "p3m/fft.hpp" +#include "p3m/for_each_3d.hpp" #include @@ -165,6 +166,43 @@ BOOST_AUTO_TEST_CASE(fft_exceptions) { BOOST_CHECK_THROW(allocator.allocate(bad_size), std::bad_array_new_length); } +BOOST_AUTO_TEST_CASE(for_each_3d_test) { + auto const m_start = Utils::Vector3i{{0, -1, 3}}; + auto const m_stop = Utils::Vector3i{{2, 2, 5}}; + auto ref_loop_counters = m_start; + auto indices = Utils::Vector3i{}; + + auto const kernel = [&]() { + BOOST_REQUIRE_EQUAL(indices, ref_loop_counters); + if (++ref_loop_counters[2u] == m_stop[2u]) { + ref_loop_counters[2u] = m_start[2u]; + if (++ref_loop_counters[1u] == m_stop[1u]) { + ref_loop_counters[1u] = m_start[1u]; + if (++ref_loop_counters[0u] == m_stop[0u]) { + ref_loop_counters[0u] = m_start[0u]; + } + } + } + }; + + { + for_each_3d(m_start, m_stop, indices, kernel, [&](unsigned dim, int n) { + BOOST_REQUIRE_GE(dim, 0); + BOOST_REQUIRE_LE(dim, 2); + BOOST_REQUIRE_EQUAL(n, ref_loop_counters[dim]); + }); + + BOOST_REQUIRE_EQUAL(indices, m_stop); + BOOST_REQUIRE_EQUAL(ref_loop_counters, m_start); + } + { + for_each_3d(m_start, m_stop, indices, kernel); + + BOOST_REQUIRE_EQUAL(indices, m_stop); + BOOST_REQUIRE_EQUAL(ref_loop_counters, m_start); + } +} + #else // defined(P3M) || defined(DP3M) int main(int argc, char **argv) {} #endif // defined(P3M) || defined(DP3M) diff --git a/src/core/unit_tests/math_test.cpp b/src/core/unit_tests/math_test.cpp new file mode 100644 index 00000000000..46a638800f1 --- /dev/null +++ b/src/core/unit_tests/math_test.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2017-2024 The ESPResSo project + * + * ESPResSo is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * ESPResSo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define BOOST_TEST_MODULE "Special mathematical functions" +#define BOOST_TEST_DYN_LINK +#include + +#include "p3m/math.hpp" + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +/** @brief Compute the n-th term in the Taylor series of @c tan(x). */ +static auto taylor_series_tangent(int n) { + auto const two_power_2n = std::pow(2., 2 * n); + auto const b2n = boost::math::bernoulli_b2n(n); + auto const f2n = boost::math::factorial(2 * n); + return std::pow(-1., n - 1) * two_power_2n * (two_power_2n - 1.) * b2n / f2n; +} + +/** @brief Compute the n-th term in the Taylor series of @c sinc(x). */ +static auto taylor_series_sinc(int n) { + return std::pow(-1., n) / std::tgamma(2 * n + 2); +} + +BOOST_AUTO_TEST_CASE(abs_test) { + static_assert(std::is_same_v); + static_assert(std::is_same_v); + + BOOST_CHECK_EQUAL(math::abs(+3.1415), std::abs(+3.1415)); + BOOST_CHECK_EQUAL(math::abs(-3.1415), std::abs(-3.1415)); + BOOST_CHECK_EQUAL(math::abs(+3.1415f), std::abs(+3.1415f)); + BOOST_CHECK_EQUAL(math::abs(-3.1415f), std::abs(-3.1415f)); +} + +BOOST_AUTO_TEST_CASE(sinc_test) { + // edge case + BOOST_CHECK_EQUAL(math::sinc(0.0), 1.0); + + // check against standard math functions + auto x = 0.001; + while (x <= 0.11) { + auto const approx = math::sinc(x); + auto const pi_x = std::numbers::pi * x; + auto const exact = std::sin(pi_x) / (pi_x); + BOOST_CHECK_SMALL(approx - exact, 1e-13); + x += 0.01; + } + + // check Taylor expansion + auto const series = std::views::iota(0, 5) | std::views::reverse | + std::views::transform(taylor_series_sinc); + for (auto const x : {1e-6, 1e-5, 1e-4, 1e-3, 1e-2}) { + auto const pix2 = Utils::sqr(std::numbers::pi * x); + auto const ref = std::accumulate( + series.begin(), series.end(), 0., + [pix2](auto const &acc, auto const &c) { return acc * pix2 + c; }); + BOOST_CHECK_SMALL(math::sinc(x) - ref, 1e-13); + } +} + +BOOST_AUTO_TEST_CASE(analytic_cotangent_sum_test) { + auto constexpr tol = 8. * 100. * std::numeric_limits::epsilon(); + auto const kernel = [](int n, double mesh_i, int cao) { + return math::get_analytic_cotangent_sum_kernel(cao)(n, mesh_i); + }; + + // edge case: at theta = 0, aliasing sums are unity + for (auto const cao : std::ranges::iota_view{1, 8}) { + BOOST_CHECK_CLOSE(kernel(0, 0., cao), 1., tol); + } + // edge case: at theta = pi / 2, aliasing sums are equal to the cao-th term + // in the tangent Taylor series + for (auto const cao : std::ranges::iota_view{1, 8}) { + BOOST_CHECK_CLOSE(kernel(1, 0.5, cao), taylor_series_tangent(cao), tol); + } + + // check assertion + for (auto const invalid_cao : {-1, 0, 8}) { + BOOST_CHECK_THROW(kernel(1, 0., invalid_cao), std::logic_error); + } +} diff --git a/src/core/unit_tests/p3m_test.cpp b/src/core/unit_tests/p3m_test.cpp index 5fd3a83915b..3f091d565ce 100644 --- a/src/core/unit_tests/p3m_test.cpp +++ b/src/core/unit_tests/p3m_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2020-2022 The ESPResSo project * * This file is part of ESPResSo. * @@ -17,7 +17,7 @@ * along with this program. If not, see . */ -#define BOOST_TEST_MODULE p3m test +#define BOOST_TEST_MODULE "P3M utility functions" #define BOOST_TEST_DYN_LINK #include @@ -27,8 +27,6 @@ #include #include -#include -#include #include BOOST_AUTO_TEST_CASE(calc_meshift_false) { @@ -60,27 +58,3 @@ BOOST_AUTO_TEST_CASE(calc_meshift_true) { } } } - -#if defined(P3M) || defined(DP3M) -BOOST_AUTO_TEST_CASE(analytic_cotangent_sum) { - auto constexpr kernel = p3m_analytic_cotangent_sum; - auto constexpr tol = 8. * 100. * std::numeric_limits::epsilon(); - - // check only trivial cases - for (auto const cao : {1, 2, 3, 4, 5, 6, 7}) { - BOOST_CHECK_CLOSE(kernel(0, 0., cao), 1., tol); - } - BOOST_CHECK_CLOSE(kernel(1, 0.5, 1), 1., tol); - BOOST_CHECK_CLOSE(kernel(1, 0.5, 2), 1. / 3., tol); - BOOST_CHECK_CLOSE(kernel(1, 0.5, 3), 2. / 15., tol); - BOOST_CHECK_CLOSE(kernel(1, 0.5, 4), 17. / 315., tol); - BOOST_CHECK_CLOSE(kernel(1, 0.5, 5), 62. / 2835., tol); - BOOST_CHECK_CLOSE(kernel(1, 0.5, 6), 1382. / 155925., tol); - BOOST_CHECK_CLOSE(kernel(1, 0.5, 7), 21844. / 6081075., tol); - - // check assertion - for (auto const invalid_cao : {-1, 0, 8}) { - BOOST_CHECK_THROW(kernel(1, 0., invalid_cao), std::logic_error); - } -} -#endif // defined(P3M) || defined(DP3M) diff --git a/src/utils/include/utils/Vector.hpp b/src/utils/include/utils/Vector.hpp index 4553741af6a..06167c901dc 100644 --- a/src/utils/include/utils/Vector.hpp +++ b/src/utils/include/utils/Vector.hpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -79,7 +80,8 @@ template class Vector : public Array { public: template - explicit Vector(Range const &rng) : Vector(std::begin(rng), std::end(rng)) {} + explicit constexpr Vector(Range const &rng) + : Vector(std::begin(rng), std::end(rng)) {} explicit constexpr Vector(T const (&v)[N]) : Base() { copy_init(std::begin(v), std::end(v)); } @@ -103,14 +105,13 @@ template class Vector : public Array { } } - /** - * @brief Create a vector that has all entries set to - * one value. - */ - static Vector broadcast(T const &s) { - Vector ret; - std::fill(ret.begin(), ret.end(), s); - + /** @brief Create a vector that has all entries set to the same value. */ + DEVICE_QUALIFIER static constexpr Vector + broadcast(typename Base::value_type const &value) { + Vector ret{}; + for (std::size_t i = 0u; i != N; ++i) { + ret[i] = value; + } return ret; } @@ -118,6 +119,12 @@ template class Vector : public Array { operator std::vector() const { return as_vector(); } + constexpr std::span as_span() const { + return std::span(const_cast(begin()), size()); + } + + constexpr operator std::span() const { return as_span(); } + template explicit operator Vector() const { Vector ret; @@ -299,6 +306,15 @@ Vector operator/(Vector const &a, T const &b) { return ret; } +template +Vector operator/(T const &a, Vector const &b) { + Vector ret; + + std::transform(std::begin(b), std::end(b), ret.begin(), + [a](T const &val) { return a / val; }); + return ret; +} + template Vector &operator/=(Vector &a, T const &b) { std::transform(std::begin(a), std::end(a), std::begin(a), diff --git a/src/utils/include/utils/math/abs.hpp b/src/utils/include/utils/math/abs.hpp deleted file mode 100644 index 4576f2c9964..00000000000 --- a/src/utils/include/utils/math/abs.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef ESPRESSO_ABS_HPP -#define ESPRESSO_ABS_HPP - -#include "utils/device_qualifier.hpp" - -#ifndef __CUDACC__ -#include -#endif - -namespace Utils { -/** - * @brief Return the absolute value of x. - */ -inline DEVICE_QUALIFIER double abs(double x) { return fabs(x); } - -/** - * @brief Return the absolute value of x. - */ -inline DEVICE_QUALIFIER float abs(float x) { return fabsf(x); } -} // namespace Utils - -#endif // ESPRESSO_DEVICE_MATH_HPP diff --git a/src/utils/include/utils/math/sinc.hpp b/src/utils/include/utils/math/sinc.hpp deleted file mode 100644 index 0a2cf4f47c7..00000000000 --- a/src/utils/include/utils/math/sinc.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2010-2022 The ESPResSo project - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 - * Max-Planck-Institute for Polymer Research, Theory Group - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#pragma once - -#include "utils/device_qualifier.hpp" -#include "utils/math/abs.hpp" - -#include -#include - -namespace Utils { -/** - * @brief Calculates the sinc-function as sin(PI*x)/(PI*x). - * - * (same convention as in @cite hockney88a). In order to avoid - * divisions by 0, arguments, whose modulus is smaller than epsi, will - * be evaluated by an 8th order Taylor expansion of the sinc - * function. Note that the difference between sinc(x) and this - * expansion is smaller than 0.235e-12, if x is smaller than 0.1. (The - * next term in the expansion is the 10th order contribution - * PI^10/39916800 * x^10 = 0.2346...*x^12). This expansion should - * also save time, since it reduces the number of function calls to - * sin(). - */ -template DEVICE_QUALIFIER T sinc(T d) { - const constexpr T epsi = T(0.1); - - const auto PId = std::numbers::pi_v * d; - - if (::Utils::abs(d) > epsi) - return sin(PId) / PId; - - /* Coefficients of the Taylor expansion of sinc */ - const constexpr T c2 = T(-0.1666666666667e-0); - const constexpr T c4 = T(0.8333333333333e-2); - const constexpr T c6 = T(-0.1984126984127e-3); - const constexpr T c8 = T(0.2755731922399e-5); - - const auto PId2 = PId * PId; - return T(1) + PId2 * (c2 + PId2 * (c4 + PId2 * (c6 + PId2 * c8))); -} -} // namespace Utils diff --git a/src/utils/include/utils/math/sqr.hpp b/src/utils/include/utils/math/sqr.hpp index dae6ee6d8af..a5ac0f25969 100644 --- a/src/utils/include/utils/math/sqr.hpp +++ b/src/utils/include/utils/math/sqr.hpp @@ -1,5 +1,7 @@ /* * Copyright (C) 2010-2022 The ESPResSo project + * Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009,2010 + * Max-Planck-Institute for Polymer Research, Theory Group * * This file is part of ESPResSo. * diff --git a/src/utils/tests/CMakeLists.txt b/src/utils/tests/CMakeLists.txt index 2dd87d9f775..89755749fab 100644 --- a/src/utils/tests/CMakeLists.txt +++ b/src/utils/tests/CMakeLists.txt @@ -21,7 +21,6 @@ include(unit_test) -unit_test(NAME abs_test SRC abs_test.cpp DEPENDS espresso::utils) unit_test(NAME Vector_test SRC Vector_test.cpp DEPENDS espresso::utils) unit_test(NAME Factory_test SRC Factory_test.cpp DEPENDS espresso::utils) unit_test(NAME NumeratedContainer_test SRC NumeratedContainer_test.cpp DEPENDS @@ -34,7 +33,6 @@ unit_test(NAME accumulator SRC accumulator.cpp DEPENDS espresso::utils unit_test(NAME int_pow SRC int_pow_test.cpp DEPENDS espresso::utils) unit_test(NAME sgn SRC sgn_test.cpp DEPENDS espresso::utils) unit_test(NAME AS_erfc_part SRC AS_erfc_part_test.cpp DEPENDS espresso::utils) -unit_test(NAME sinc SRC sinc_test.cpp DEPENDS espresso::utils) unit_test(NAME permute_ifield_test SRC permute_ifield_test.cpp DEPENDS espresso::utils) unit_test(NAME vec_rotate SRC vec_rotate_test.cpp DEPENDS espresso::utils) diff --git a/src/utils/tests/Vector_test.cpp b/src/utils/tests/Vector_test.cpp index 4079f4f5d15..64463077fdf 100644 --- a/src/utils/tests/Vector_test.cpp +++ b/src/utils/tests/Vector_test.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -43,7 +44,8 @@ using Utils::Vector; /* Number of nontrivial Baxter permutations of length 2n-1. (A001185) */ -#define TEST_NUMBERS {0, 1, 1, 7, 21, 112, 456, 2603, 13203} +#define TEST_NUMBERS \ + { 0, 1, 1, 7, 21, 112, 456, 2603, 13203 } constexpr int test_numbers[] = TEST_NUMBERS; constexpr std::size_t n_test_numbers = sizeof(test_numbers) / sizeof(int); @@ -187,6 +189,13 @@ BOOST_AUTO_TEST_CASE(algebraic_operators) { BOOST_CHECK(v4 == (v3 /= 2)); } + { + Utils::Vector3i v3{2, 12, 91}; + Utils::Vector3i v4{180, 30, 3}; + auto v5 = 360 / v3; + BOOST_CHECK(v5 == v4); + } + BOOST_CHECK((sqrt(Utils::Vector3d{1., 2., 3.}) == Utils::Vector3d{sqrt(1.), sqrt(2.), sqrt(3.)})); @@ -308,6 +317,27 @@ BOOST_AUTO_TEST_CASE(conversion) { auto const result = Utils::Vector3d{orig.as_vector()}; BOOST_TEST(result == orig); } + + // check span conversion + { + auto const view = static_cast>(orig); + BOOST_TEST(view.data() == orig.data()); + BOOST_TEST(view.size() == orig.size()); + } + + // check span conversion + { + auto const view = std::span(orig); + BOOST_TEST(view.data() == orig.data()); + BOOST_TEST(view.size() == orig.size()); + } + + // check span conversion + { + auto const view = orig.as_span(); + BOOST_TEST(view.data() == orig.data()); + BOOST_TEST(view.size() == orig.size()); + } } BOOST_AUTO_TEST_CASE(vector_product_test) { diff --git a/src/utils/tests/abs_test.cpp b/src/utils/tests/abs_test.cpp deleted file mode 100644 index 94e43728e7b..00000000000 --- a/src/utils/tests/abs_test.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2019-2022 The ESPResSo project - * - * This file is part of ESPResSo. - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#define BOOST_TEST_MODULE abs test -#define BOOST_TEST_DYN_LINK -#include - -#include - -#include -#include - -BOOST_AUTO_TEST_CASE(abs_test) { - using Utils::abs; - - static_assert(std::is_same_v); - static_assert(std::is_same_v); - - BOOST_CHECK_EQUAL(std::abs(3.1415), abs(3.1415)); - BOOST_CHECK_EQUAL(std::abs(-3.1415), abs(-3.1415)); - BOOST_CHECK_EQUAL(std::abs(3.1415f), abs(3.1415f)); - BOOST_CHECK_EQUAL(std::abs(-3.1415f), abs(-3.1415f)); -} diff --git a/src/utils/tests/sinc_test.cpp b/src/utils/tests/sinc_test.cpp deleted file mode 100644 index 063120ca971..00000000000 --- a/src/utils/tests/sinc_test.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (C) 2017-2022 The ESPResSo project - * - * ESPResSo is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * ESPResSo is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define BOOST_TEST_MODULE Utils::sinc test -#define BOOST_TEST_DYN_LINK -#include - -#include "utils/math/sinc.hpp" - -#include -#include -#include - -BOOST_AUTO_TEST_CASE(zero) { BOOST_CHECK_EQUAL(Utils::sinc(0.0), 1.0); } - -BOOST_AUTO_TEST_CASE(approx) { - auto x = 0.001; - while (x <= 0.11) { - auto const approx = Utils::sinc(x); - auto const pi_x = std::numbers::pi * x; - auto const exact = std::sin(pi_x) / (pi_x); - BOOST_CHECK_SMALL(approx - exact, 1e-13); - x += 0.01; - } -}