Skip to content

Commit

Permalink
Merge pull request #814 from duncandc/wp_jackknife
Browse files Browse the repository at this point in the history
adding w_p jackknife mock observable
  • Loading branch information
aphearin authored Oct 17, 2017
2 parents d49ccdb + a6daa52 commit f66a9cd
Show file tree
Hide file tree
Showing 11 changed files with 1,284 additions and 3 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

- Added `num_lines_header` optional keyword argument to TabularAsciiReader

- Added `wp_jackknife` function. See https://github.com/astropy/halotools/pull/814.


0.5 (2017-05-31)
----------------

Expand Down
1 change: 1 addition & 0 deletions halotools/mock_observables/pair_counters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .marked_npairs_3d import marked_npairs_3d
from .marked_npairs_xy_z import marked_npairs_xy_z
from .npairs_jackknife_3d import npairs_jackknife_3d
from .npairs_jackknife_xy_z import npairs_jackknife_xy_z
from .npairs_s_mu import npairs_s_mu
from .weighted_npairs_s_mu import weighted_npairs_s_mu
from .npairs_per_object_3d import npairs_per_object_3d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
from .npairs_per_object_3d_engine import npairs_per_object_3d_engine
from .pairwise_distance_3d_engine import pairwise_distance_3d_engine
from .pairwise_distance_xy_z_engine import pairwise_distance_xy_z_engine
from .npairs_jackknife_xy_z_engine import npairs_jackknife_xy_z_engine
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
"""
"""
from __future__ import (absolute_import, division, print_function, unicode_literals)

import numpy as np
cimport numpy as cnp
cimport cython
from libc.math cimport ceil

__author__ = ('Duncan Campbell', )
__all__ = ('npairs_jackknife_xy_z_engine', )

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def npairs_jackknife_xy_z_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
weights1in, weights2in, jtags1in, jtags2in, cnp.int64_t N_samples, rp_bins, pi_bins, cell1_tuple):
""" Cython engine for counting pairs of points as a function of projected and parallel separation.
Parameters
------------
double_mesh : object
Instance of `~halotools.mock_observables.RectangularDoubleMesh`
x1in, y1in, z1in : arrays
Numpy arrays storing Cartesian coordinates of points in sample 1
x2in, y2in, z2in : arrays
Numpy arrays storing Cartesian coordinates of points in sample 2
weights1in : array
Numpy array storing the weights for points in sample 1
weights2in : array
Numpy array storing the weights for points in sample 2
jtags1in : array
Numpy array storing the subvolume label integers for points in sample 1
jtags2in : array
Numpy array storing the subvolume label integers for points in sample 2
N_samples : int
Total number of cells into which the simulated box has been subdivided
rp_bins : array_like
numpy array of boundaries defining the bins of separation in the xy-plane
:math:`r_{\rm p}` in which pairs are counted.
pi_bins : numpy.array
array defining parallel separation in which to sum the pair counts
cell1_tuple : tuple
Two-element tuple defining the first and last cells in
double_mesh.mesh1 that will be looped over. Intended for use with
python multiprocessing.
Returns
--------
counts : array
Integer array of length len(rbins) giving the number of pairs
separated by a distance less than the corresponding entry of ``rbins``.
"""

cdef cnp.float64_t[:] rp_bins_squared = rp_bins*rp_bins
cdef cnp.float64_t[:] pi_bins_squared = pi_bins*pi_bins
cdef cnp.float64_t xperiod = double_mesh.xperiod
cdef cnp.float64_t yperiod = double_mesh.yperiod
cdef cnp.float64_t zperiod = double_mesh.zperiod
cdef cnp.int64_t first_cell1_element = cell1_tuple[0]
cdef cnp.int64_t last_cell1_element = cell1_tuple[1]
cdef int PBCs = double_mesh._PBCs

cdef int Ncell1 = double_mesh.mesh1.ncells
cdef int num_rp_bins = len(rp_bins)
cdef int num_pi_bins = len(pi_bins)
cdef cnp.float64_t[:,:,:] counts = np.zeros((N_samples+1, num_rp_bins, num_pi_bins), dtype=np.float64)

cdef cnp.float64_t[:] x1 = np.ascontiguousarray(x1in[double_mesh.mesh1.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] y1 = np.ascontiguousarray(y1in[double_mesh.mesh1.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] z1 = np.ascontiguousarray(z1in[double_mesh.mesh1.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] x2 = np.ascontiguousarray(x2in[double_mesh.mesh2.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] y2 = np.ascontiguousarray(y2in[double_mesh.mesh2.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] z2 = np.ascontiguousarray(z2in[double_mesh.mesh2.idx_sorted], dtype=np.float64)

cdef cnp.float64_t[:] weights1 = np.ascontiguousarray(weights1in[double_mesh.mesh1.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] weights2 = np.ascontiguousarray(weights2in[double_mesh.mesh2.idx_sorted], dtype=np.float64)
cdef cnp.int64_t[:] jtags1 = np.ascontiguousarray(jtags1in[double_mesh.mesh1.idx_sorted], dtype=np.int64)
cdef cnp.int64_t[:] jtags2 = np.ascontiguousarray(jtags2in[double_mesh.mesh2.idx_sorted], dtype=np.int64)

cdef cnp.int64_t icell1, icell2
cdef cnp.int64_t[:] cell1_indices = np.ascontiguousarray(double_mesh.mesh1.cell_id_indices, dtype=np.int64)
cdef cnp.int64_t[:] cell2_indices = np.ascontiguousarray(double_mesh.mesh2.cell_id_indices, dtype=np.int64)

cdef cnp.int64_t ifirst1, ilast1, ifirst2, ilast2

cdef int ix2, iy2, iz2, ix1, iy1, iz1
cdef int nonPBC_ix2, nonPBC_iy2, nonPBC_iz2

cdef int num_x2_covering_steps = int(np.ceil(
double_mesh.search_xlength / double_mesh.mesh2.xcell_size))
cdef int num_y2_covering_steps = int(np.ceil(
double_mesh.search_ylength / double_mesh.mesh2.ycell_size))
cdef int num_z2_covering_steps = int(np.ceil(
double_mesh.search_zlength / double_mesh.mesh2.zcell_size))

cdef int leftmost_ix2, rightmost_ix2
cdef int leftmost_iy2, rightmost_iy2
cdef int leftmost_iz2, rightmost_iz2

cdef int num_x1divs = double_mesh.mesh1.num_xdivs
cdef int num_y1divs = double_mesh.mesh1.num_ydivs
cdef int num_z1divs = double_mesh.mesh1.num_zdivs
cdef int num_x2divs = double_mesh.mesh2.num_xdivs
cdef int num_y2divs = double_mesh.mesh2.num_ydivs
cdef int num_z2divs = double_mesh.mesh2.num_zdivs
cdef int num_x2_per_x1 = num_x2divs // num_x1divs
cdef int num_y2_per_y1 = num_y2divs // num_y1divs
cdef int num_z2_per_z1 = num_z2divs // num_z1divs

cdef cnp.float64_t x2shift, y2shift, z2shift, dx, dy, dz, dxy_sq, dz_sq
cdef cnp.float64_t x1tmp, y1tmp, z1tmp

cdef cnp.int64_t j1, j2
cdef cnp.float64_t w1, w2

cdef int Ni, Nj, i, j, k, l, g
cdef cnp.int64_t s

cdef cnp.float64_t[:] x_icell1, x_icell2
cdef cnp.float64_t[:] y_icell1, y_icell2
cdef cnp.float64_t[:] z_icell1, z_icell2
cdef cnp.float64_t[:] w_icell1, w_icell2
cdef cnp.int64_t[:] j_icell1, j_icell2

for icell1 in range(first_cell1_element, last_cell1_element):
ifirst1 = cell1_indices[icell1]
ilast1 = cell1_indices[icell1+1]

#extract the points in cell1
x_icell1 = x1[ifirst1:ilast1]
y_icell1 = y1[ifirst1:ilast1]
z_icell1 = z1[ifirst1:ilast1]

#extract the weights in cell1
w_icell1 = weights1[ifirst1:ilast1]

#extract the subvolume tags in cell1
j_icell1 = jtags1[ifirst1:ilast1]

Ni = ilast1 - ifirst1
if Ni > 0:

ix1 = icell1 // (num_y1divs*num_z1divs)
iy1 = (icell1 - ix1*num_y1divs*num_z1divs) // num_z1divs
iz1 = icell1 - (ix1*num_y1divs*num_z1divs) - (iy1*num_z1divs)

leftmost_ix2 = ix1*num_x2_per_x1 - num_x2_covering_steps
leftmost_iy2 = iy1*num_y2_per_y1 - num_y2_covering_steps
leftmost_iz2 = iz1*num_z2_per_z1 - num_z2_covering_steps

rightmost_ix2 = (ix1+1)*num_x2_per_x1 + num_x2_covering_steps
rightmost_iy2 = (iy1+1)*num_y2_per_y1 + num_y2_covering_steps
rightmost_iz2 = (iz1+1)*num_z2_per_z1 + num_z2_covering_steps

for nonPBC_ix2 in range(leftmost_ix2, rightmost_ix2):
if nonPBC_ix2 < 0:
x2shift = -xperiod*PBCs
elif nonPBC_ix2 >= num_x2divs:
x2shift = +xperiod*PBCs
else:
x2shift = 0.
# Now apply the PBCs
ix2 = nonPBC_ix2 % num_x2divs

for nonPBC_iy2 in range(leftmost_iy2, rightmost_iy2):
if nonPBC_iy2 < 0:
y2shift = -yperiod*PBCs
elif nonPBC_iy2 >= num_y2divs:
y2shift = +yperiod*PBCs
else:
y2shift = 0.
# Now apply the PBCs
iy2 = nonPBC_iy2 % num_y2divs

for nonPBC_iz2 in range(leftmost_iz2, rightmost_iz2):
if nonPBC_iz2 < 0:
z2shift = -zperiod*PBCs
elif nonPBC_iz2 >= num_z2divs:
z2shift = +zperiod*PBCs
else:
z2shift = 0.
# Now apply the PBCs
iz2 = nonPBC_iz2 % num_z2divs

icell2 = ix2*(num_y2divs*num_z2divs) + iy2*num_z2divs + iz2
ifirst2 = cell2_indices[icell2]
ilast2 = cell2_indices[icell2+1]

#extract the points in cell2
x_icell2 = x2[ifirst2:ilast2]
y_icell2 = y2[ifirst2:ilast2]
z_icell2 = z2[ifirst2:ilast2]

#extract the weights in cell1
w_icell2 = weights2[ifirst2:ilast2]

#extract the subvolume tags in cell1
j_icell2 = jtags2[ifirst2:ilast2]

Nj = ilast2 - ifirst2
#loop over points in cell1
if Nj > 0:
for i in range(0,Ni):
x1tmp = x_icell1[i] - x2shift
y1tmp = y_icell1[i] - y2shift
z1tmp = z_icell1[i] - z2shift

w1 = w_icell1[i]
j1 = j_icell1[i]
#loop over points in cell2
for j in range(0,Nj):
#calculate the square distance
dx = x1tmp - x_icell2[j]
dy = y1tmp - y_icell2[j]
dz = z1tmp - z_icell2[j]
dxy_sq = dx*dx + dy*dy
dz_sq = dz*dz

w2 = w_icell2[j]
j2 = j_icell2[j]

for s in range(N_samples+1):
k = num_rp_bins-1
while dxy_sq<=rp_bins_squared[k]:
g = num_pi_bins-1
while dz_sq<=pi_bins_squared[g]:
counts[s,k,g] += jweight(s, j1, j2, w1, w2)
g=g-1
if g<0: break
k=k-1
if k<0: break


return np.array(counts)


cdef inline cnp.float64_t jweight(cnp.int64_t j, cnp.int64_t j1, cnp.int64_t j2,
cnp.float64_t w1, cnp.float64_t w2):
"""
Return the jackknife weighted count.
parameters
----------
j : int
subsample being removed
j1 : int
integer label indicating which subsample point 1 occupies
j2 : int
integer label indicating which subsample point 2 occupies
w1 : float
weight associated with point 1
w2 : float
weight associated with point 2
Returns
-------
w : double
0.0, w1*w2*0.5, or w1*w2
Notes
-----
We use the tag '0' to indicated we want to use the entire sample, i.e. no subsample
should be labeled with a '0'.
jackknife wiehgt is caclulated as follows:
if both points are inside the sample, return w1*w2
if both points are outside the sample, return 0.0
if one point is within and one point is outside the sample, return 0.5*w1*w2
"""
cdef cnp.float64_t result
if j==0:
result = w1 * w2
# both outside the sub-sample
elif (j1 == j2) & (j1 == j):
result = 0.0
# both inside the sub-sample
elif (j1 != j) & (j2 != j):
result = (w1 * w2)
# only one inside the sub-sample
elif (j1 != j2) & ((j1 == j) | (j2 == j)):
result = 0.5*(w1 * w2)

return result



Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ __all__ = ('npairs_xy_z_engine', )
@cython.nonecheck(False)
def npairs_xy_z_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
rp_bins, pi_bins, cell1_tuple):
r""" Cython engine for counting pairs of points as a function of projected separation.
r""" Cython engine for counting pairs of points as a function of projected and parrallel separation.
Parameters
------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"npairs_3d_engine.pyx", "npairs_projected_engine.pyx",
"npairs_xy_z_engine.pyx", "npairs_jackknife_3d_engine.pyx", "npairs_s_mu_engine.pyx",
"pairwise_distance_3d_engine.pyx", "pairwise_distance_xy_z_engine.pyx",
"weighted_npairs_s_mu_engine.pyx")
"weighted_npairs_s_mu_engine.pyx", "npairs_jackknife_xy_z_engine.pyx")
THIS_PKG_NAME = '.'.join(__name__.split('.')[:-1])


Expand Down
Loading

0 comments on commit f66a9cd

Please sign in to comment.