From 8bf24d42cefaf242671f25d1f3b7fed3f4ad01e6 Mon Sep 17 00:00:00 2001
From: Leandro Martinez <lmartine@unicamp.br>
Date: Mon, 9 Dec 2024 12:26:21 -0300
Subject: [PATCH] use ChunkSplitters for splitting batches

---
 Project.toml         |  2 ++
 src/CellListMap.jl   |  1 +
 src/CoreComputing.jl | 21 ++++++++-------------
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/Project.toml b/Project.toml
index fccb315d..dc2114d6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,6 +4,7 @@ authors = ["Leandro Martinez <lmartine@unicamp.br> and contributors"]
 version = "0.9.7-DEV"
 
 [deps]
+ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e"
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -19,6 +20,7 @@ TestItems = "1c621080-faea-4a02-84b6-bbd5e436b8fe"
 Aqua = "0.8.5"
 BenchmarkTools = "1.4"
 Chemfiles = "0.10.31"
+ChunkSplitters = "3.1.0"
 Compat = "4.14.0"
 DocStringExtensions = "0.9"
 Documenter = "1.2.1"
diff --git a/src/CellListMap.jl b/src/CellListMap.jl
index c0def25e..dcdc58d3 100644
--- a/src/CellListMap.jl
+++ b/src/CellListMap.jl
@@ -10,6 +10,7 @@ using Setfield: @set!
 using LinearAlgebra: cross, diagm, I
 using Base.Threads: nthreads, @spawn 
 using Base: @lock # not exported in 1.6
+using ChunkSplitters: index_chunks, RoundRobin, Consecutive
 
 export Box
 export CellList, UpdateCellList!
diff --git a/src/CoreComputing.jl b/src/CoreComputing.jl
index 883a9a12..70a0ef98 100644
--- a/src/CoreComputing.jl
+++ b/src/CoreComputing.jl
@@ -1,8 +1,3 @@
-#
-# Parallel thread spliiter
-#
-splitter(first, nbatches, n) = first:nbatches:n
-
 #=
     reduce(output, output_threaded)
 
@@ -118,8 +113,8 @@ end
 #
 # Parallel version for self-pairwise computations
 #
-function batch(f::F, ibatch, nbatches, n_cells_with_real_particles, output_threaded, box, cl, p) where {F}
-    for i in splitter(ibatch, nbatches, n_cells_with_real_particles)
+function batch(f::F, ibatch, cell_indices, output_threaded, box, cl, p) where {F}
+    for i in cell_indices
         cellᵢ = cl.cells[cl.cell_indices_real[i]]
         output_threaded[ibatch] = inner_loop!(f, box, cellᵢ, cl, output_threaded[ibatch], ibatch)
         _next!(p)
@@ -143,8 +138,8 @@ function map_pairwise_parallel!(
     @unpack n_cells_with_real_particles = cl
     nbatches = cl.nbatches.map_computation
     p = show_progress ? Progress(n_cells_with_real_particles, dt=1) : nothing
-    @sync for ibatch in 1:nbatches
-        @spawn batch($f, $ibatch, $nbatches, $n_cells_with_real_particles, $output_threaded, $box, $cl, $p)
+    @sync for (ibatch, cell_indices) in enumerate(index_chunks(1:n_cells_with_real_particles; n=nbatches, split=RoundRobin()))
+        @spawn batch($f, $ibatch, $cell_indices, $output_threaded, $box, $cl, $p)
     end
     return reduce(output, output_threaded)
 end
@@ -168,8 +163,8 @@ end
 #
 # Parallel version for cross-interaction computations
 #
-function batch(f::F, ibatch, nbatches, output_threaded, box, cl, p) where {F}
-    for i in splitter(ibatch, nbatches, length(cl.ref))
+function batch_cross(f::F, ibatch, ref_atom_indices, output_threaded, box, cl, p) where {F}
+    for i in ref_atom_indices
         output_threaded[ibatch] = inner_loop!(f, output_threaded[ibatch], i, box, cl)
         _next!(p)
     end
@@ -187,8 +182,8 @@ function map_pairwise_parallel!(
         output_threaded = [deepcopy(output) for i in 1:nbatches]
     end
     p = show_progress ? Progress(length(cl.ref), dt=1) : nothing
-    @sync for ibatch in 1:nbatches
-        @spawn batch($f, $ibatch, $nbatches, $output_threaded, $box, $cl, $p)
+    @sync for (ibatch, ref_atom_indices) in enumerate(index_chunks(1:length(cl.ref); n=nbatches, split=Consecutive()))
+        @spawn batch_cross($f, $ibatch, $ref_atom_indices, $output_threaded, $box, $cl, $p)
     end
     return reduce(output, output_threaded)
 end