Skip to content

Commit

Permalink
CUDA tests work
Browse files Browse the repository at this point in the history
  • Loading branch information
leios committed Dec 20, 2024
1 parent 14cf074 commit 28e7381
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 322 deletions.
330 changes: 9 additions & 321 deletions ext/MollyCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module MollyCUDAExt
using Molly
using CUDA
using Atomix
using KernelAbstractions

CUDA.Const(nl::Molly.NoNeighborList) = nl

Check warning on line 8 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L8

Added line #L8 was not covered by tests

Expand Down Expand Up @@ -37,8 +38,8 @@ function cuda_threads_blocks_specific(n_inters)
return n_threads_gpu, n_blocks

Check warning on line 38 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L35-L38

Added lines #L35 - L38 were not covered by tests
end

function Molly.pairwise_force_gpu!(fs_mat, coords::CuArray{SVector{D, C}}, velocities, atoms,
boundary, pairwise_inters, nbs, step_n, force_units, ::Val{T}) where {D, C, T}
function Molly.pairwise_force_gpu!(fs_mat::AT, coords::AbstractArray{SVector{D, C}}, velocities, atoms,

Check warning on line 41 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L41

Added line #L41 was not covered by tests
boundary, pairwise_inters, nbs, step_n, force_units, ::Val{T}) where {AT <: CuArray, D, C, T}
if typeof(nbs) == NoNeighborList
kernel = @cuda launch=false pairwise_force_kernel_nonl!(

Check warning on line 44 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L43-L44

Added lines #L43 - L44 were not covered by tests
fs_mat, coords, velocities, atoms, boundary, pairwise_inters, step_n,
Expand All @@ -56,6 +57,7 @@ function Molly.pairwise_force_gpu!(fs_mat, coords::CuArray{SVector{D, C}}, veloc
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks pairwise_force_kernel_nl!(

Check warning on line 57 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L56-L57

Added lines #L56 - L57 were not covered by tests
fs_mat, coords, velocities, atoms, boundary, pairwise_inters, nbs, step_n,
Val(D), Val(force_units))

end
return fs_mat

Check warning on line 62 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L62

Added line #L62 was not covered by tests
end
Expand All @@ -71,7 +73,7 @@ function pairwise_force_kernel_nl!(forces, coords_var, velocities_var, atoms_var

@inbounds if inter_i <= length(neighbors)
i, j, special = neighbors[inter_i]
f = sum_pairwise_forces(inters, atoms[i], atoms[j], Val(F), special, coords[i], coords[j],
f = Molly.sum_pairwise_forces(inters, atoms[i], atoms[j], Val(F), special, coords[i], coords[j],

Check warning on line 76 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L74-L76

Added lines #L74 - L76 were not covered by tests
boundary, velocities[i], velocities[j], step_n)
for dim in 1:D
fval = ustrip(f[dim])
Expand All @@ -82,6 +84,7 @@ function pairwise_force_kernel_nl!(forces, coords_var, velocities_var, atoms_var
return nothing

Check warning on line 84 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L84

Added line #L84 was not covered by tests
end


#=
**The No-neighborlist pairwise force summation kernel**: This kernel calculates all the pairwise forces in the system of
`n_atoms` atoms, this is done by dividing the complete matrix of `n_atoms`×`n_atoms` interactions into small tiles. Most
Expand Down Expand Up @@ -122,7 +125,7 @@ That's why the calculations are done in the following order:
h | 1 2 3 4 5 6
```
=#
function pairwise_force_kernel_nonl!(forces::CuArray{T}, coords_var, velocities_var,
function pairwise_force_kernel_nonl!(forces::AbstractArray{T}, coords_var, velocities_var,

Check warning on line 128 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L128

Added line #L128 was not covered by tests
atoms_var, boundary, inters, step_n, ::Val{D}, ::Val{F}) where {T, D, F}
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
Expand All @@ -149,7 +152,7 @@ function pairwise_force_kernel_nonl!(forces::CuArray{T}, coords_var, velocities_
j = j_0_tile + del_j
if i != j
atom_j, coord_j, vel_j = atoms[j], coords[j], velocities[j]
f = sum_pairwise_forces(inters, atom_i, atom_j, Val(F), false, coord_i, coord_j,
f = Molly.sum_pairwise_forces(inters, atom_i, atom_j, Val(F), false, coord_i, coord_j,

Check warning on line 155 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L147-L155

Added lines #L147 - L155 were not covered by tests
boundary, vel_i, vel_j, step_n)
for dim in 1:D
forces_shmem[dim, tidx] += -ustrip(f[dim])
Expand All @@ -174,7 +177,7 @@ function pairwise_force_kernel_nonl!(forces::CuArray{T}, coords_var, velocities_
@inbounds for _ in 1:tilesteps
sync_warp()
atom_j = atoms[j]
f = sum_pairwise_forces(inters, atom_i, atom_j, Val(F), false, coord_i, coord_j,
f = Molly.sum_pairwise_forces(inters, atom_i, atom_j, Val(F), false, coord_i, coord_j,

Check warning on line 180 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L175-L180

Added lines #L175 - L180 were not covered by tests
boundary, vel_i, vel_j, step_n)
for dim in 1:D
forces_shmem[dim, tidx] += -ustrip(f[dim])
Expand All @@ -190,319 +193,4 @@ function pairwise_force_kernel_nonl!(forces::CuArray{T}, coords_var, velocities_
return nothing

Check warning on line 193 in ext/MollyCUDAExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/MollyCUDAExt.jl#L193

Added line #L193 was not covered by tests
end

function Molly.specific_force_gpu!(fs_mat, inter_list::InteractionList1Atoms, coords::CuArray{SVector{D, C}},
velocities, atoms, boundary, step_n, force_units, ::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks specific_force_1_atoms_kernel!(fs_mat,
coords, velocities, atoms, boundary, step_n, inter_list.is, inter_list.inters,
Val(D), Val(force_units))
return fs_mat
end

function Molly.specific_force_gpu!(fs_mat, inter_list::InteractionList2Atoms, coords::CuArray{SVector{D, C}},
velocities, atoms, boundary, step_n, force_units, ::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks specific_force_2_atoms_kernel!(fs_mat,
coords, velocities, atoms, boundary, step_n, inter_list.is, inter_list.js,
inter_list.inters, Val(D), Val(force_units))
return fs_mat
end

function Molly.specific_force_gpu!(fs_mat, inter_list::InteractionList3Atoms, coords::CuArray{SVector{D, C}},
velocities, atoms, boundary, step_n, force_units, ::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks specific_force_3_atoms_kernel!(fs_mat,
coords, velocities, atoms, boundary, step_n, inter_list.is, inter_list.js,
inter_list.ks, inter_list.inters, Val(D), Val(force_units))
return fs_mat
end

function Molly.specific_force_gpu!(fs_mat, inter_list::InteractionList4Atoms, coords::CuArray{SVector{D, C}},
velocities, atoms, boundary, step_n, force_units, ::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks specific_force_4_atoms_kernel!(fs_mat,
coords, velocities, atoms, boundary, step_n, inter_list.is, inter_list.js,
inter_list.ks, inter_list.ls, inter_list.inters, Val(D), Val(force_units))
return fs_mat
end

function Molly.specific_force_1_atoms_kernel!(forces, coords_var, velocities_var, atoms_var, boundary,
step_n, is_var, inters_var, ::Val{D}, ::Val{F}) where {D, F}
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
is = CUDA.Const(is_var)
inters = CUDA.Const(inters_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(is)
i = is[inter_i]
fs = force_gpu(inters[inter_i], coords[i], boundary, atoms[i], F, velocities[i], step_n)
if unit(fs.f1[1]) != F
error("wrong force unit returned, was expecting $F")
end
for dim in 1:D
Atomix.@atomic :monotonic forces[dim, i] += ustrip(fs.f1[dim])
end
end
return nothing
end

function Molly.specific_force_2_atoms_kernel!(forces, coords_var, velocities_var, atoms_var, boundary,
step_n, is_var, js_var, inters_var, ::Val{D}, ::Val{F}) where {D, F}
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
is = CUDA.Const(is_var)
js = CUDA.Const(js_var)
inters = CUDA.Const(inters_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(is)
i, j = is[inter_i], js[inter_i]
fs = force_gpu(inters[inter_i], coords[i], coords[j], boundary, atoms[i], atoms[j], F,
velocities[i], velocities[j], step_n)
if unit(fs.f1[1]) != F || unit(fs.f2[1]) != F
error("wrong force unit returned, was expecting $F")
end
for dim in 1:D
Atomix.@atomic :monotonic forces[dim, i] += ustrip(fs.f1[dim])
Atomix.@atomic :monotonic forces[dim, j] += ustrip(fs.f2[dim])
end
end
return nothing
end

function Molly.specific_force_3_atoms_kernel!(forces, coords_var, velocities_var, atoms_var, boundary,
step_n, is_var, js_var, ks_var, inters_var, ::Val{D}, ::Val{F}) where {D, F}
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
is = CUDA.Const(is_var)
js = CUDA.Const(js_var)
ks = CUDA.Const(ks_var)
inters = CUDA.Const(inters_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(is)
i, j, k = is[inter_i], js[inter_i], ks[inter_i]
fs = force_gpu(inters[inter_i], coords[i], coords[j], coords[k], boundary, atoms[i],
atoms[j], atoms[k], F, velocities[i], velocities[j], velocities[k], step_n)
if unit(fs.f1[1]) != F || unit(fs.f2[1]) != F || unit(fs.f3[1]) != F
error("wrong force unit returned, was expecting $F")
end
for dim in 1:D
Atomix.@atomic :monotonic forces[dim, i] += ustrip(fs.f1[dim])
Atomix.@atomic :monotonic forces[dim, j] += ustrip(fs.f2[dim])
Atomix.@atomic :monotonic forces[dim, k] += ustrip(fs.f3[dim])
end
end
return nothing
end

function Molly.specific_force_4_atoms_kernel!(forces, coords_var, velocities_var, atoms_var, boundary,
step_n, is_var, js_var, ks_var, ls_var, inters_var,
::Val{D}, ::Val{F}) where {D, F}
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
is = CUDA.Const(is_var)
js = CUDA.Const(js_var)
ks = CUDA.Const(ks_var)
ls = CUDA.Const(ls_var)
inters = CUDA.Const(inters_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(is)
i, j, k, l = is[inter_i], js[inter_i], ks[inter_i], ls[inter_i]
fs = force_gpu(inters[inter_i], coords[i], coords[j], coords[k], coords[l], boundary,
atoms[i], atoms[j], atoms[k], atoms[l], F, velocities[i], velocities[j],
velocities[k], velocities[l], step_n)
if unit(fs.f1[1]) != F || unit(fs.f2[1]) != F || unit(fs.f3[1]) != F || unit(fs.f4[1]) != F
error("wrong force unit returned, was expecting $F")
end
for dim in 1:D
Atomix.@atomic :monotonic forces[dim, i] += ustrip(fs.f1[dim])
Atomix.@atomic :monotonic forces[dim, j] += ustrip(fs.f2[dim])
Atomix.@atomic :monotonic forces[dim, k] += ustrip(fs.f3[dim])
Atomix.@atomic :monotonic forces[dim, l] += ustrip(fs.f4[dim])
end
end
return nothing
end

function Molly.pairwise_pe_gpu!(pe_vec_nounits, coords::CuArray{SVector{D, C}}, velocities, atoms,
boundary, pairwise_inters, nbs, step_n, energy_units,
::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_pairwise(length(nbs))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks pairwise_pe_kernel!(
pe_vec_nounits, coords, velocities, atoms, boundary, pairwise_inters, nbs,
step_n, Val(energy_units))
return pe_vec_nounits
end

function pairwise_pe_kernel!(energy, coords_var, velocities_var, atoms_var, boundary, inters,
neighbors_var, step_n, ::Val{E}) where E
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
neighbors = CUDA.Const(neighbors_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(neighbors)
i, j, special = neighbors[inter_i]
coord_i, coord_j, vel_i, vel_j = coords[i], coords[j], velocities[i], velocities[j]
dr = vector(coord_i, coord_j, boundary)
pe = Molly.potential_energy_gpu(inters[1], dr, atoms[i], atoms[j], E, special, coord_i, coord_j,
boundary, vel_i, vel_j, step_n)
for inter in inters[2:end]
pe += Molly.potential_energy_gpu(inter, dr, atoms[i], atoms[j], E, special, coord_i, coord_j,
boundary, vel_i, vel_j, step_n)
end
if unit(pe) != E
error("wrong energy unit returned, was expecting $E but got $(unit(pe))")
end
Atomix.@atomic :monotonic energy[1] += ustrip(pe)
end
return nothing
end

function Molly.specific_pe_gpu!(pe_vec_nounits, inter_list::InteractionList1Atoms, coords::CuArray{SVector{D, C}},
velocities, atoms, boundary, step_n, energy_units, ::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks specific_pe_1_atoms_kernel!(
pe_vec_nounits, coords, velocities, atoms, boundary, step_n, inter_list.is,
inter_list.inters, Val(energy_units))
return pe_vec_nounits
end

function Molly.specific_pe_gpu!(pe_vec_nounits, inter_list::InteractionList2Atoms, coords::CuArray{SVector{D, C}},
velocities, atoms, boundary, step_n, energy_units, ::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks specific_pe_2_atoms_kernel!(
pe_vec_nounits, coords, velocities, atoms, boundary, step_n, inter_list.is,
inter_list.js, inter_list.inters, Val(energy_units))
return pe_vec_nounits
end

function Molly.specific_pe_gpu!(pe_vec_nounits, inter_list::InteractionList3Atoms, coords::CuArray{SVector{D, C}},
velocities, atoms, boundary, step_n, energy_units, ::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks specific_pe_3_atoms_kernel!(
pe_vec_nounits, coords, velocities, atoms, boundary, step_n, inter_list.is,
inter_list.js, inter_list.ks, inter_list.inters, Val(energy_units))
return pe_vec_nounits
end

function Molly.specific_pe_gpu!(pe_vec_nounits, inter_list::InteractionList4Atoms, coords::CuArray{SVector{D, C}},
velocities, atoms, boundary, step_n, energy_units, ::Val{T}) where {D, C, T}
n_threads_gpu, n_blocks = cuda_threads_blocks_specific(length(inter_list))
CUDA.@sync @cuda threads=n_threads_gpu blocks=n_blocks specific_pe_4_atoms_kernel!(
pe_vec_nounits, coords, velocities, atoms, boundary, step_n, inter_list.is,
inter_list.js, inter_list.ks, inter_list.ls, inter_list.inters, Val(energy_units))
return pe_vec_nounits
end

function specific_pe_1_atoms_kernel!(energy, coords_var, velocities_var, atoms_var, boundary,
step_n, is_var, inters_var, ::Val{E}) where E
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
is = CUDA.Const(is_var)
inters = CUDA.Const(inters_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(is)
i = is[inter_i]
pe = potential_energy_gpu(inters[inter_i], coords[i], boundary, atoms[i], E,
velocities[i], step_n)
if unit(pe) != E
error("wrong energy unit returned, was expecting $E but got $(unit(pe))")
end
Atomix.@atomic :monotonic energy[1] += ustrip(pe)
end
return nothing
end

function specific_pe_2_atoms_kernel!(energy, coords_var, velocities_var, atoms_var, boundary,
step_n, is_var, js_var, inters_var, ::Val{E}) where E
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
is = CUDA.Const(is_var)
js = CUDA.Const(js_var)
inters = CUDA.Const(inters_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(is)
i, j = is[inter_i], js[inter_i]
pe = potential_energy_gpu(inters[inter_i], coords[i], coords[j], boundary, atoms[i],
atoms[j], E, velocities[i], velocities[j], step_n)
if unit(pe) != E
error("wrong energy unit returned, was expecting $E but got $(unit(pe))")
end
Atomix.@atomic :monotonic energy[1] += ustrip(pe)
end
return nothing
end

function specific_pe_3_atoms_kernel!(energy, coords_var, velocities_var, atoms_var, boundary,
step_n, is_var, js_var, ks_var, inters_var, ::Val{E}) where E
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
is = CUDA.Const(is_var)
js = CUDA.Const(js_var)
ks = CUDA.Const(ks_var)
inters = CUDA.Const(inters_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(is)
i, j, k = is[inter_i], js[inter_i], ks[inter_i]
pe = potential_energy_gpu(inters[inter_i], coords[i], coords[j], coords[k], boundary,
atoms[i], atoms[j], atoms[k], E, velocities[i], velocities[j],
velocities[k], step_n)
if unit(pe) != E
error("wrong energy unit returned, was expecting $E but got $(unit(pe))")
end
Atomix.@atomic :monotonic energy[1] += ustrip(pe)
end
return nothing
end

function specific_pe_4_atoms_kernel!(energy, coords_var, velocities_var, atoms_var, boundary,
step_n, is_var, js_var, ks_var, ls_var, inters_var, ::Val{E}) where E
coords = CUDA.Const(coords_var)
velocities = CUDA.Const(velocities_var)
atoms = CUDA.Const(atoms_var)
is = CUDA.Const(is_var)
js = CUDA.Const(js_var)
ks = CUDA.Const(ks_var)
ls = CUDA.Const(ls_var)
inters = CUDA.Const(inters_var)

inter_i = (blockIdx().x - 1) * blockDim().x + threadIdx().x

@inbounds if inter_i <= length(is)
i, j, k, l = is[inter_i], js[inter_i], ks[inter_i], ls[inter_i]
pe = potential_energy_gpu(inters[inter_i], coords[i], coords[j], coords[k], coords[l],
boundary, atoms[i], atoms[j], atoms[k], atoms[l], E,
velocities[i], velocities[j], velocities[k], velocities[l],
step_n)
if unit(pe) != E
error("wrong energy unit returned, was expecting $E but got $(unit(pe))")
end
Atomix.@atomic :monotonic energy[1] += ustrip(pe)
end
return nothing
end

end # module
1 change: 0 additions & 1 deletion src/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ function pairwise_force_gpu!(fs_mat, coords::AbstractArray{SVector{D, C}}, veloc
kernel! = pairwise_force_kernel_nl!(backend, n_threads_gpu)
kernel!(fs_mat, coords, velocities, atoms, boundary, pairwise_inters,

Check warning on line 44 in src/kernels.jl

View check run for this annotation

Codecov / codecov/patch

src/kernels.jl#L42-L44

Added lines #L42 - L44 were not covered by tests
nbs, step_n, Val(D), Val(force_units); ndrange = length(nbs))

end
return fs_mat

Check warning on line 47 in src/kernels.jl

View check run for this annotation

Codecov / codecov/patch

src/kernels.jl#L47

Added line #L47 was not covered by tests
end
Expand Down

0 comments on commit 28e7381

Please sign in to comment.