Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding w_p jackknife mock observable #814

Merged
merged 14 commits into from
Oct 17, 2017
Merged
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@

- Added `num_lines_header` optional keyword argument to TabularAsciiReader

- Added `wp_jackknife` function. See https://github.com/astropy/halotools/pull/814.


0.5 (2017-05-31)
----------------

Expand Down
1 change: 1 addition & 0 deletions halotools/mock_observables/pair_counters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .marked_npairs_3d import marked_npairs_3d
from .marked_npairs_xy_z import marked_npairs_xy_z
from .npairs_jackknife_3d import npairs_jackknife_3d
from .npairs_jackknife_xy_z import npairs_jackknife_xy_z
from .npairs_s_mu import npairs_s_mu
from .weighted_npairs_s_mu import weighted_npairs_s_mu
from .npairs_per_object_3d import npairs_per_object_3d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@
from .npairs_per_object_3d_engine import npairs_per_object_3d_engine
from .pairwise_distance_3d_engine import pairwise_distance_3d_engine
from .pairwise_distance_xy_z_engine import pairwise_distance_xy_z_engine
from .npairs_jackknife_xy_z_engine import npairs_jackknife_xy_z_engine
Original file line number Diff line number Diff line change
@@ -0,0 +1,302 @@
"""
"""
from __future__ import (absolute_import, division, print_function, unicode_literals)

import numpy as np
cimport numpy as cnp
cimport cython
from libc.math cimport ceil

__author__ = ('Duncan Campbell', )
__all__ = ('npairs_jackknife_xy_z_engine', )

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def npairs_jackknife_xy_z_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
weights1in, weights2in, jtags1in, jtags2in, cnp.int64_t N_samples, rp_bins, pi_bins, cell1_tuple):
""" Cython engine for counting pairs of points as a function of projected and parallel separation.

Parameters
------------
double_mesh : object
Instance of `~halotools.mock_observables.RectangularDoubleMesh`

x1in, y1in, z1in : arrays
Numpy arrays storing Cartesian coordinates of points in sample 1

x2in, y2in, z2in : arrays
Numpy arrays storing Cartesian coordinates of points in sample 2

weights1in : array
Numpy array storing the weights for points in sample 1

weights2in : array
Numpy array storing the weights for points in sample 2

jtags1in : array
Numpy array storing the subvolume label integers for points in sample 1

jtags2in : array
Numpy array storing the subvolume label integers for points in sample 2

N_samples : int
Total number of cells into which the simulated box has been subdivided

rp_bins : array_like
numpy array of boundaries defining the bins of separation in the xy-plane
:math:`r_{\rm p}` in which pairs are counted.

pi_bins : numpy.array
array defining parallel separation in which to sum the pair counts

cell1_tuple : tuple
Two-element tuple defining the first and last cells in
double_mesh.mesh1 that will be looped over. Intended for use with
python multiprocessing.

Returns
--------
counts : array
Integer array of length len(rbins) giving the number of pairs
separated by a distance less than the corresponding entry of ``rbins``.

"""

cdef cnp.float64_t[:] rp_bins_squared = rp_bins*rp_bins
cdef cnp.float64_t[:] pi_bins_squared = pi_bins*pi_bins
cdef cnp.float64_t xperiod = double_mesh.xperiod
cdef cnp.float64_t yperiod = double_mesh.yperiod
cdef cnp.float64_t zperiod = double_mesh.zperiod
cdef cnp.int64_t first_cell1_element = cell1_tuple[0]
cdef cnp.int64_t last_cell1_element = cell1_tuple[1]
cdef int PBCs = double_mesh._PBCs

cdef int Ncell1 = double_mesh.mesh1.ncells
cdef int num_rp_bins = len(rp_bins)
cdef int num_pi_bins = len(pi_bins)
cdef cnp.float64_t[:,:,:] counts = np.zeros((N_samples+1, num_rp_bins, num_pi_bins), dtype=np.float64)

cdef cnp.float64_t[:] x1 = np.ascontiguousarray(x1in[double_mesh.mesh1.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] y1 = np.ascontiguousarray(y1in[double_mesh.mesh1.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] z1 = np.ascontiguousarray(z1in[double_mesh.mesh1.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] x2 = np.ascontiguousarray(x2in[double_mesh.mesh2.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] y2 = np.ascontiguousarray(y2in[double_mesh.mesh2.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] z2 = np.ascontiguousarray(z2in[double_mesh.mesh2.idx_sorted], dtype=np.float64)

cdef cnp.float64_t[:] weights1 = np.ascontiguousarray(weights1in[double_mesh.mesh1.idx_sorted], dtype=np.float64)
cdef cnp.float64_t[:] weights2 = np.ascontiguousarray(weights2in[double_mesh.mesh2.idx_sorted], dtype=np.float64)
cdef cnp.int64_t[:] jtags1 = np.ascontiguousarray(jtags1in[double_mesh.mesh1.idx_sorted], dtype=np.int64)
cdef cnp.int64_t[:] jtags2 = np.ascontiguousarray(jtags2in[double_mesh.mesh2.idx_sorted], dtype=np.int64)

cdef cnp.int64_t icell1, icell2
cdef cnp.int64_t[:] cell1_indices = np.ascontiguousarray(double_mesh.mesh1.cell_id_indices, dtype=np.int64)
cdef cnp.int64_t[:] cell2_indices = np.ascontiguousarray(double_mesh.mesh2.cell_id_indices, dtype=np.int64)

cdef cnp.int64_t ifirst1, ilast1, ifirst2, ilast2

cdef int ix2, iy2, iz2, ix1, iy1, iz1
cdef int nonPBC_ix2, nonPBC_iy2, nonPBC_iz2

cdef int num_x2_covering_steps = int(np.ceil(
double_mesh.search_xlength / double_mesh.mesh2.xcell_size))
cdef int num_y2_covering_steps = int(np.ceil(
double_mesh.search_ylength / double_mesh.mesh2.ycell_size))
cdef int num_z2_covering_steps = int(np.ceil(
double_mesh.search_zlength / double_mesh.mesh2.zcell_size))

cdef int leftmost_ix2, rightmost_ix2
cdef int leftmost_iy2, rightmost_iy2
cdef int leftmost_iz2, rightmost_iz2

cdef int num_x1divs = double_mesh.mesh1.num_xdivs
cdef int num_y1divs = double_mesh.mesh1.num_ydivs
cdef int num_z1divs = double_mesh.mesh1.num_zdivs
cdef int num_x2divs = double_mesh.mesh2.num_xdivs
cdef int num_y2divs = double_mesh.mesh2.num_ydivs
cdef int num_z2divs = double_mesh.mesh2.num_zdivs
cdef int num_x2_per_x1 = num_x2divs // num_x1divs
cdef int num_y2_per_y1 = num_y2divs // num_y1divs
cdef int num_z2_per_z1 = num_z2divs // num_z1divs

cdef cnp.float64_t x2shift, y2shift, z2shift, dx, dy, dz, dxy_sq, dz_sq
cdef cnp.float64_t x1tmp, y1tmp, z1tmp

cdef cnp.int64_t j1, j2
cdef cnp.float64_t w1, w2

cdef int Ni, Nj, i, j, k, l, g
cdef cnp.int64_t s

cdef cnp.float64_t[:] x_icell1, x_icell2
cdef cnp.float64_t[:] y_icell1, y_icell2
cdef cnp.float64_t[:] z_icell1, z_icell2
cdef cnp.float64_t[:] w_icell1, w_icell2
cdef cnp.int64_t[:] j_icell1, j_icell2

for icell1 in range(first_cell1_element, last_cell1_element):
ifirst1 = cell1_indices[icell1]
ilast1 = cell1_indices[icell1+1]

#extract the points in cell1
x_icell1 = x1[ifirst1:ilast1]
y_icell1 = y1[ifirst1:ilast1]
z_icell1 = z1[ifirst1:ilast1]

#extract the weights in cell1
w_icell1 = weights1[ifirst1:ilast1]

#extract the subvolume tags in cell1
j_icell1 = jtags1[ifirst1:ilast1]

Ni = ilast1 - ifirst1
if Ni > 0:

ix1 = icell1 // (num_y1divs*num_z1divs)
iy1 = (icell1 - ix1*num_y1divs*num_z1divs) // num_z1divs
iz1 = icell1 - (ix1*num_y1divs*num_z1divs) - (iy1*num_z1divs)

leftmost_ix2 = ix1*num_x2_per_x1 - num_x2_covering_steps
leftmost_iy2 = iy1*num_y2_per_y1 - num_y2_covering_steps
leftmost_iz2 = iz1*num_z2_per_z1 - num_z2_covering_steps

rightmost_ix2 = (ix1+1)*num_x2_per_x1 + num_x2_covering_steps
rightmost_iy2 = (iy1+1)*num_y2_per_y1 + num_y2_covering_steps
rightmost_iz2 = (iz1+1)*num_z2_per_z1 + num_z2_covering_steps

for nonPBC_ix2 in range(leftmost_ix2, rightmost_ix2):
if nonPBC_ix2 < 0:
x2shift = -xperiod*PBCs
elif nonPBC_ix2 >= num_x2divs:
x2shift = +xperiod*PBCs
else:
x2shift = 0.
# Now apply the PBCs
ix2 = nonPBC_ix2 % num_x2divs

for nonPBC_iy2 in range(leftmost_iy2, rightmost_iy2):
if nonPBC_iy2 < 0:
y2shift = -yperiod*PBCs
elif nonPBC_iy2 >= num_y2divs:
y2shift = +yperiod*PBCs
else:
y2shift = 0.
# Now apply the PBCs
iy2 = nonPBC_iy2 % num_y2divs

for nonPBC_iz2 in range(leftmost_iz2, rightmost_iz2):
if nonPBC_iz2 < 0:
z2shift = -zperiod*PBCs
elif nonPBC_iz2 >= num_z2divs:
z2shift = +zperiod*PBCs
else:
z2shift = 0.
# Now apply the PBCs
iz2 = nonPBC_iz2 % num_z2divs

icell2 = ix2*(num_y2divs*num_z2divs) + iy2*num_z2divs + iz2
ifirst2 = cell2_indices[icell2]
ilast2 = cell2_indices[icell2+1]

#extract the points in cell2
x_icell2 = x2[ifirst2:ilast2]
y_icell2 = y2[ifirst2:ilast2]
z_icell2 = z2[ifirst2:ilast2]

#extract the weights in cell1
w_icell2 = weights2[ifirst2:ilast2]

#extract the subvolume tags in cell1
j_icell2 = jtags2[ifirst2:ilast2]

Nj = ilast2 - ifirst2
#loop over points in cell1
if Nj > 0:
for i in range(0,Ni):
x1tmp = x_icell1[i] - x2shift
y1tmp = y_icell1[i] - y2shift
z1tmp = z_icell1[i] - z2shift

w1 = w_icell1[i]
j1 = j_icell1[i]
#loop over points in cell2
for j in range(0,Nj):
#calculate the square distance
dx = x1tmp - x_icell2[j]
dy = y1tmp - y_icell2[j]
dz = z1tmp - z_icell2[j]
dxy_sq = dx*dx + dy*dy
dz_sq = dz*dz

w2 = w_icell2[j]
j2 = j_icell2[j]

for s in range(N_samples+1):
k = num_rp_bins-1
while dxy_sq<=rp_bins_squared[k]:
g = num_pi_bins-1
while dz_sq<=pi_bins_squared[g]:
counts[s,k,g] += jweight(s, j1, j2, w1, w2)
g=g-1
if g<0: break
k=k-1
if k<0: break


return np.array(counts)


cdef inline cnp.float64_t jweight(cnp.int64_t j, cnp.int64_t j1, cnp.int64_t j2,
cnp.float64_t w1, cnp.float64_t w2):
"""
Return the jackknife weighted count.

parameters
----------
j : int
subsample being removed

j1 : int
integer label indicating which subsample point 1 occupies

j2 : int
integer label indicating which subsample point 2 occupies

w1 : float
weight associated with point 1

w2 : float
weight associated with point 2

Returns
-------
w : double
0.0, w1*w2*0.5, or w1*w2

Notes
-----
We use the tag '0' to indicated we want to use the entire sample, i.e. no subsample
should be labeled with a '0'.

jackknife wiehgt is caclulated as follows:
if both points are inside the sample, return w1*w2
if both points are outside the sample, return 0.0
if one point is within and one point is outside the sample, return 0.5*w1*w2
"""
cdef cnp.float64_t result
if j==0:
result = w1 * w2
# both outside the sub-sample
elif (j1 == j2) & (j1 == j):
result = 0.0
# both inside the sub-sample
elif (j1 != j) & (j2 != j):
result = (w1 * w2)
# only one inside the sub-sample
elif (j1 != j2) & ((j1 == j) | (j2 == j)):
result = 0.5*(w1 * w2)

return result



Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ __all__ = ('npairs_xy_z_engine', )
@cython.nonecheck(False)
def npairs_xy_z_engine(double_mesh, x1in, y1in, z1in, x2in, y2in, z2in,
rp_bins, pi_bins, cell1_tuple):
r""" Cython engine for counting pairs of points as a function of projected separation.
r""" Cython engine for counting pairs of points as a function of projected and parrallel separation.

Parameters
------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"npairs_3d_engine.pyx", "npairs_projected_engine.pyx",
"npairs_xy_z_engine.pyx", "npairs_jackknife_3d_engine.pyx", "npairs_s_mu_engine.pyx",
"pairwise_distance_3d_engine.pyx", "pairwise_distance_xy_z_engine.pyx",
"weighted_npairs_s_mu_engine.pyx")
"weighted_npairs_s_mu_engine.pyx", "npairs_jackknife_xy_z_engine.pyx")
THIS_PKG_NAME = '.'.join(__name__.split('.')[:-1])


Expand Down
Loading