Skip to content

Commit

Permalink
Merge pull request #768 from manodeep/npairs_s_mu_perf
Browse files Browse the repository at this point in the history
Improving npairs(s, mu) performance and testing.
  • Loading branch information
Duncan Campbell authored Jun 13, 2017
2 parents 7c9d606 + fefd22f commit ce11b77
Show file tree
Hide file tree
Showing 6 changed files with 353 additions and 118 deletions.
2 changes: 1 addition & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
0.6 (unreleased)
----------------

- No changes yet
- Changed the API for mock_observables.pair_counters.n_pairs_s_mu which now requires ``mu_bins`` to be in the conventional mu=cos(theta_LOS) format instead of mu=sin(theta_LOS).


0.5 (2017-05-31)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@ cimport cython
from libc.math cimport ceil
from libc.math cimport sqrt

__author__ = ('Andrew Hearin', 'Duncan Campbell')
__author__ = ('Andrew Hearin', 'Duncan Campbell', 'Manodeep Sinha')
__all__ = ('npairs_s_mu_engine', )

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def npairs_s_mu_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
s_bins_in, mu_bins_in, cell1_tuple):
r""" Cython engine for counting pairs of points as a function of projected separation.
r""" Cython engine for counting pairs of points as a function of radial separation, s,
and the angle between the line-of-sight (LOS) and s.
Parameters
------------
Expand Down Expand Up @@ -48,10 +49,15 @@ def npairs_s_mu_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
counts : array
Integer array of length len(s_bins) giving the number of pairs
separated by a distance less than the corresponding entry of ``s_bins``.
Notes
-----
mu is defined as the sin(theta_LOS) so that as theta_LOS increases, mu increases.
"""
cdef cnp.float64_t[:] s_bins = s_bins_in
cdef cnp.float64_t[:] mu_bins = mu_bins_in
cdef cnp.float64_t[:] sqr_s_bins = s_bins_in * s_bins_in
cdef cnp.float64_t[:] sqr_mu_bins = mu_bins_in * mu_bins_in

cdef cnp.float64_t xperiod = double_mesh.xperiod
cdef cnp.float64_t yperiod = double_mesh.yperiod
cdef cnp.float64_t zperiod = double_mesh.zperiod
Expand All @@ -60,8 +66,8 @@ def npairs_s_mu_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
cdef int PBCs = double_mesh._PBCs

cdef int Ncell1 = double_mesh.mesh1.ncells
cdef int num_s_bins = len(s_bins)
cdef int num_mu_bins = len(mu_bins)
cdef int num_s_bins = len(sqr_s_bins)
cdef int num_mu_bins = len(sqr_mu_bins)
cdef cnp.int64_t[:,:] counts = np.zeros((num_s_bins, num_mu_bins), dtype=np.int64)
cdef cnp.int64_t[:,:] counts_sum = np.zeros((num_s_bins, num_mu_bins), dtype=np.int64)

Expand Down Expand Up @@ -105,7 +111,9 @@ def npairs_s_mu_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
cdef cnp.float64_t x2shift, y2shift, z2shift, dx, dy, dz, dxy_sq, dz_sq
cdef cnp.float64_t x1tmp, y1tmp, z1tmp, s, mu
cdef int Ni, Nj, i, j, k, l, g, max_k
cdef cnp.float64_t s_max = np.max(s_bins_in), mu_max = np.max(mu_bins_in)
cdef cnp.float64_t sqr_s_max = np.max(sqr_s_bins)
cdef cnp.float64_t sqr_mu_max = np.max(sqr_mu_bins)
cdef cnp.float64_t sqr_s, sqr_mu

cdef cnp.float64_t[:] x_icell1, x_icell2
cdef cnp.float64_t[:] y_icell1, y_icell2
Expand Down Expand Up @@ -172,42 +180,52 @@ def npairs_s_mu_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
z_icell2 = z2[ifirst2:ilast2]

Nj = ilast2 - ifirst2
#loop over points in cell1 points
# loop over points in cell1 points
if Nj > 0:
for i in range(0,Ni):
x1tmp = x_icell1[i] - x2shift
y1tmp = y_icell1[i] - y2shift
z1tmp = z_icell1[i] - z2shift
#loop over points in cell2 points
# loop over points in cell2 points
for j in range(0,Nj):
#calculate the square distance
# calculate the square distance
dx = x1tmp - x_icell2[j]
dy = y1tmp - y_icell2[j]
dz = z1tmp - z_icell2[j]
dxy_sq = dx*dx + dy*dy
dz_sq = dz*dz

#transform to s and mu
s = sqrt(dz_sq + dxy_sq)
if s!=0:
mu = sqrt(dxy_sq)/s
else:
mu=0.0

if (s <= s_max) & (mu <= mu_max):

k = num_s_bins-2
while k!=-1:
if s > s_bins[k]: break
k=k-1
# transform to s and mu
sqr_s = dz_sq + dxy_sq

g = num_mu_bins-2
while g!=-1:
if mu > mu_bins[g]: break
g=g-1
if sqr_s > sqr_s_max:
continue

# Only counts pairs in that bin.
counts[k+1,g+1] += 1
if sqr_s > 0.0:
sqr_mu = dxy_sq/sqr_s
else:
sqr_mu = 0.0

if sqr_mu > sqr_mu_max:
continue

# The loop has been intentionally split up
# Since division is slow,
# computing mu is a bottle-neck.
# Computing the 's' bin however can proceed
# in the meantime.
k = num_s_bins-2
while k!=-1:
if sqr_s > sqr_s_bins[k]: break
k=k-1

g = num_mu_bins-2
while g!=-1:
if sqr_mu > sqr_mu_bins[g]: break
g=g-1

# Only counts pairs in that bin.
counts[k+1,g+1] += 1

# Adds counts for all bins where s < s_bin and mu < mu_bin.
for k in range(num_s_bins):
Expand Down
68 changes: 43 additions & 25 deletions halotools/mock_observables/pair_counters/npairs_s_mu.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,20 @@ def npairs_s_mu(sample1, sample2, s_bins, mu_bins, period=None,
verbose=False, num_threads=1, approx_cell1_size=None, approx_cell2_size=None):
r"""
Function counts the number of pairs of points separated by less than
radial separation, *s,* and :math:`\mu\equiv\sin(\theta_{\rm los})`,
where :math:`\theta_{\rm los}` is the line-of-sight angle
between points and :math:`s^2 = r_{\rm parallel}^2 + r_{\rm perp}^2`.
Note that if sample1 == sample2 that the
`~halotools.mock_observables.npairs_s_mu` function double-counts pairs.
If your science application requires sample1==sample2 inputs and also pairs
to not be double-counted, simply divide the final counts by 2.
radial separation, :math:`s`, given by ``s_bins`` and
angular distance, :math:`\mu\equiv\cos(\theta_{\rm los})`, given by ``mu_bins``,
where :math:`\theta_{\rm los}` is the angle between :math:`\vec{s}` and
the line-of-sight (LOS).
The first two dimensions (x, y) define the plane for perpendicular distances.
The third dimension (z) defines the LOS. i.e. x,y positions are on
the plane of the sky, and z is the radial distance coordinate. This is the 'distant
observer' approximation.
A common variation of pair-counting calculations is to count pairs with
separations *between* two different distances *r1* and *r2*. You can retrieve
this information from the `~halotools.mock_observables.npairs_s_mu`
by taking `numpy.diff` of the returned array.
separations *between* two different distances, e.g. [s1 ,s2] and [mu1, mu2].
You can retrieve this information from `~halotools.mock_observables.npairs_s_mu`
by taking `numpy.diff` of the returned array along each axis.
See Notes section for further clarification.
Expand All @@ -60,9 +61,6 @@ def npairs_s_mu(sample1, sample2, s_bins, mu_bins, period=None,
:math:`\cos(\theta_{\rm LOS})` boundaries defining the bins in
which pairs are counted. All values must be between [0,1].
Note that using the sine function is not common convention for
calculating the two point correlation function (see notes).
period : array_like, optional
Length-3 sequence defining the periodic boundary conditions
in each dimension. If you instead provide a single scalar, Lbox,
Expand Down Expand Up @@ -99,15 +97,31 @@ def npairs_s_mu(sample1, sample2, s_bins, mu_bins, period=None,
number of pairs separated by less than (s, mu)
Notes
------
The quantity :math:`\mu` is defined as the :math:`\sin(\theta_{\rm los})`
and not the conventional :math:`\cos(\theta_{\rm los})`. This is
because the pair counter has been optimized under the assumption that its
separation variable (in this case, :math:`\mu`) *increases*
as :math:`\theta_{\rm los})` increases.
-----
Let :math:`\vec{s}` be the radial vector connnecting two points.
The magnitude, :math:`s`, is:
.. math::
s = \sqrt{r_{\parallel}^2+r_{\perp}^2},
where :math:`r_{\parallel}` is the separation parallel to the LOS
and :math:`r_{\perp}` is the separation perpednicular to the LOS. :math:`\mu` is
the cosine of the angle, :math:`\theta_{\rm LOS}`, between the LOS
and :math:`\vec{s}`:
.. math::
\mu = \cos(\theta_{\rm LOS}) \equiv r_{\parallel}/s.
Along the first dimension of ``num_pairs``, :math:`s` increases.
Along the second dimension, :math:`\mu` decreases,
i.e. :math:`\theta_{\rm LOS}` increases.
If sample1 == sample2 that the `~halotools.mock_observables.npairs_s_mu` function
double-counts pairs. If your science application requires sample1==sample2 inputs
and also pairs to not be double-counted, simply divide the final counts by 2.
One final point of clarification concerning double-counting may be in order.
Suppose sample1==sample2 and rbins[0]==0. Then the returned value for this bin
Suppose sample1==sample2 and s_bins[0]==0. Then the returned value for this bin
will be len(sample1), since each sample1 point has distance 0 from itself.
Examples
Expand Down Expand Up @@ -158,6 +172,10 @@ def npairs_s_mu(sample1, sample2, s_bins, mu_bins, period=None,
msg = ("\n Input `mu_bins` must be a monotonically increasing \n"
"1D array with at least two entries")
raise ValueError(msg)
# convert to mu=sin(theta_los) binning used by the cython engine.
mu_bins_prime = np.sin(np.arccos(mu_bins))
mu_bins_prime = np.sort(mu_bins_prime)
# increasing mu_prime now corresponds to increasing theta_LOS

search_xlength, search_ylength, search_zlength = rmax, rmax, rmax

Expand All @@ -174,11 +192,11 @@ def npairs_s_mu(sample1, sample2, s_bins, mu_bins, period=None,
approx_x2cell_size, approx_y2cell_size, approx_z2cell_size,
search_xlength, search_ylength, search_zlength, xperiod, yperiod, zperiod, PBCs)

# # Create a function object that has a single argument, for parallelization purposes
# Create a function object that has a single argument, for parallelization purposes
engine = partial(npairs_s_mu_engine,
double_mesh, x1in, y1in, z1in, x2in, y2in, z2in, s_bins, mu_bins)
double_mesh, x1in, y1in, z1in, x2in, y2in, z2in, s_bins, mu_bins_prime)

# # Calculate the cell1 indices that will be looped over by the engine
# Calculate the cell1 indices that will be looped over by the engine
num_threads, cell1_tuples = _cell1_parallelization_indices(
double_mesh.mesh1.ncells, num_threads)

Expand Down
Loading

0 comments on commit ce11b77

Please sign in to comment.