Skip to content

Commit

Permalink
Merge pull request #529 from aphearin/cuboid_label_relocation
Browse files Browse the repository at this point in the history
Cuboid label relocation
  • Loading branch information
aphearin committed May 27, 2016
2 parents 10faf3d + c502e1e commit 9b9af05
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 169 deletions.
100 changes: 94 additions & 6 deletions halotools/mock_observables/catalog_analysis_helpers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
# -*- coding: utf-8 -*-

""" Common functions used when analyzing
catalogs of galaxies/halos.
""" Common functions used when analyzing catalogs of galaxies/halos.
"""
from __future__ import (absolute_import, division, print_function, unicode_literals)

Expand All @@ -13,7 +9,7 @@

from ..custom_exceptions import HalotoolsError

__all__ = ('mean_y_vs_x', 'return_xyz_formatted_array')
__all__ = ('mean_y_vs_x', 'return_xyz_formatted_array', 'cuboid_subvolume_labels')
__author__ = ['Andrew Hearin']


Expand Down Expand Up @@ -201,6 +197,98 @@ def return_xyz_formatted_array(x, y, z, period=np.inf, **kwargs):
except KeyError:
return pos

def cuboid_subvolume_labels(sample, Nsub, Lbox):
"""
Return integer labels indicating which cubical subvolume of a larger cubical volume a
set of points occupy.
Parameters
----------
sample : array_like
Npts x 3 numpy array containing 3-D positions of points.
Nsub : array_like
Length-3 numpy array of integers indicating how many times to split the volume
along each dimension. If single integer, N, is supplied, ``Nsub`` is set to
[N,N,N], and the volume is split along each dimension N times. The total number
of subvolumes is then given by numpy.prod(Nsub).
Lbox : array_like
Length-3 numpy array definging the lengths of the sides of the cubical volume
that ``sample`` occupies. If only a single scalar is specified, the volume is assumed
to be a cube with side-length Lbox
Returns
-------
labels : numpy.array
numpy array with integer labels in the range [1,numpy.prod(Nsub)] indicating
the subvolume each point in ``sample`` occupies.
N_sub_vol : int
number of subvolumes.
Examples
--------
For demonstration purposes we create a randomly distributed set of points within a
periodic unit cube.
>>> Npts = 1000
>>> Lbox = 1.0
>>> period = np.array([Lbox,Lbox,Lbox])
>>> x = np.random.random(Npts)
>>> y = np.random.random(Npts)
>>> z = np.random.random(Npts)
We transform our *x, y, z* points into the array shape used by the pair-counter by
taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
is used throughout the `~halotools.mock_observables` sub-package:
>>> sample = np.vstack((x,y,z)).T
Divide the volume into cubes with length 0.25 on a side.
>>> Nsub = [4,4,4]
>>> labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
"""

#process inputs and check for consistency
sample = np.atleast_1d(sample).astype('f8')
try:
assert sample.ndim == 2
assert sample.shape[1] == 3
except AssertionError:
msg = ("Input ``sample`` must have shape (Npts, 3)")
raise TypeError(msg)

Nsub = np.atleast_1d(Nsub).astype('i4')
if len(Nsub) == 1:
Nsub = np.array([Nsub[0], Nsub[0], Nsub[0]])
elif len(Nsub) != 3:
msg = "Input ``Nsub`` must be a scalar or length-3 sequence"
raise TypeError(msg)

Lbox = np.atleast_1d(Lbox).astype('f8')
if len(Lbox) == 1:
Lbox = np.array([Lbox[0]]*3)
elif len(Lbox) != 3:
msg = "Input ``Lbox`` must be a scalar or length-3 sequence"
raise TypeError(msg)

dL = Lbox/Nsub # length of subvolumes along each dimension
N_sub_vol = int(np.prod(Nsub)) # total the number of subvolumes
# create an array of unique integer IDs for each subvolume
inds = np.arange(1, N_sub_vol+1).reshape(Nsub[0], Nsub[1], Nsub[2])

#tag each particle with an integer indicating which subvolume it is in
index = np.floor(sample/dL).astype(int)
#take care of the case where a point falls on the boundary
for i in range(3):
index[:, i] = np.where(index[:, i] == Nsub[i], Nsub[i] - 1, index[:, i])
index = inds[index[:,0],index[:,1],index[:,2]].astype(int)

return index, int(N_sub_vol)




Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,80 @@
#!/usr/bin/env python
""" Module providing unit-testing for the functions in
`~halotools.mock_observables.catalog_analysis_helpers` module.
"""
from __future__ import (absolute_import, division, print_function)

from unittest import TestCase

import numpy as np

from astropy.tests.helper import pytest
from astropy.utils.misc import NumpyRNGContext

from .. import catalog_analysis_helpers as cat_helpers
from ..catalog_analysis_helpers import cuboid_subvolume_labels

from ...sim_manager import FakeSim

from .cf_helpers import generate_locus_of_3d_points

from ...custom_exceptions import HalotoolsError

__all__ = ('TestCatalogAnalysisHelpers', )

fixed_seed = 43

def test_cuboid_subvolume_labels_bounds_checking():
Npts = 100
with NumpyRNGContext(fixed_seed):
good_sample = np.random.random((Npts, 3))
bad_sample = np.random.random((Npts, 2))

good_Nsub1 = 3
good_Nsub2 = (4, 4, 4)
bad_Nsub = (3, 3)

good_Lbox = 1
good_Lbox2 = (1, 1, 1)
bad_Lbox = (3, 3)

with pytest.raises(TypeError) as err:
cuboid_subvolume_labels(bad_sample, good_Nsub1, good_Lbox)
substr = "Input ``sample`` must have shape (Npts, 3)"
assert substr in err.value.args[0]

with pytest.raises(TypeError) as err:
cuboid_subvolume_labels(good_sample, bad_Nsub, good_Lbox2)
substr = "Input ``Nsub`` must be a scalar or length-3 sequence"
assert substr in err.value.args[0]

with pytest.raises(TypeError) as err:
cuboid_subvolume_labels(good_sample, good_Nsub2, bad_Lbox)
substr = "Input ``Lbox`` must be a scalar or length-3 sequence"
assert substr in err.value.args[0]

def test_cuboid_subvolume_labels_correctness():
Npts = 100
Nsub = 2
Lbox = 1

sample = generate_locus_of_3d_points(Npts, xc=0.1, yc=0.1, zc=0.1, seed = fixed_seed)
labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
assert np.all(labels == 1)

sample = generate_locus_of_3d_points(Npts, xc=0.9, yc=0.9, zc=0.9, seed = fixed_seed)
labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
assert np.all(labels == 8)

sample = generate_locus_of_3d_points(Npts, xc=0.1, yc=0.1, zc=0.9, seed = fixed_seed)
labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
assert np.all(labels == 2)

sample = generate_locus_of_3d_points(Npts, xc=0.1, yc=0.9, zc=0.1, seed = fixed_seed)
labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
assert np.all(labels == 3)



class TestCatalogAnalysisHelpers(TestCase):
""" Class providing tests of the `~halotools.mock_observables.catalog_analysis_helpers`.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
from astropy.tests.helper import pytest
from astropy.utils.misc import NumpyRNGContext

from ..tpcf_jackknife import tpcf_jackknife, cuboid_subvolume_labels
from ..tpcf_jackknife import tpcf_jackknife
from ..tpcf import tpcf

from ...tests.cf_helpers import generate_locus_of_3d_points

slow = pytest.mark.slow

__all__=['test_tpcf_jackknife_corr_func', 'test_tpcf_jackknife_cov_matrix']
Expand Down Expand Up @@ -56,64 +54,3 @@ def test_tpcf_jackknife_cov_matrix():

assert np.shape(err)==(nbins,nbins), "cov matrix not correct shape"

def test_cuboid_subvolume_labels_bounds_checking():
Npts = 100
with NumpyRNGContext(fixed_seed):
good_sample = np.random.random((Npts, 3))
bad_sample = np.random.random((Npts, 2))

good_Nsub1 = 3
good_Nsub2 = (4, 4, 4)
bad_Nsub = (3, 3)

good_Lbox = 1
good_Lbox2 = (1, 1, 1)
bad_Lbox = (3, 3)

with pytest.raises(TypeError) as err:
cuboid_subvolume_labels(bad_sample, good_Nsub1, good_Lbox)
substr = "Input ``sample`` must have shape (Npts, 3)"
assert substr in err.value.args[0]

with pytest.raises(TypeError) as err:
cuboid_subvolume_labels(good_sample, bad_Nsub, good_Lbox2)
substr = "Input ``Nsub`` must be a scalar or length-3 sequence"
assert substr in err.value.args[0]

with pytest.raises(TypeError) as err:
cuboid_subvolume_labels(good_sample, good_Nsub2, bad_Lbox)
substr = "Input ``Lbox`` must be a scalar or length-3 sequence"
assert substr in err.value.args[0]

def test_cuboid_subvolume_labels_correctness():
Npts = 100
Nsub = 2
Lbox = 1

sample = generate_locus_of_3d_points(Npts, xc=0.1, yc=0.1, zc=0.1, seed = fixed_seed)
labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
assert np.all(labels == 1)

sample = generate_locus_of_3d_points(Npts, xc=0.9, yc=0.9, zc=0.9, seed = fixed_seed)
labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
assert np.all(labels == 8)

sample = generate_locus_of_3d_points(Npts, xc=0.1, yc=0.1, zc=0.9, seed = fixed_seed)
labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
assert np.all(labels == 2)

sample = generate_locus_of_3d_points(Npts, xc=0.1, yc=0.9, zc=0.1, seed = fixed_seed)
labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
assert np.all(labels == 3)












100 changes: 2 additions & 98 deletions halotools/mock_observables/two_point_clustering/tpcf_jackknife.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
from ..mock_observables_helpers import (enforce_sample_has_correct_shape,
get_separation_bins_array, get_period, get_num_threads)
from ..pair_counters.mesh_helpers import _enforce_maximum_search_length

from ..catalog_analysis_helpers import cuboid_subvolume_labels
from ...custom_exceptions import HalotoolsError

__all__ = ('tpcf_jackknife', 'cuboid_subvolume_labels')
__all__ = ('tpcf_jackknife', )
__author__ = ('Duncan Campbell', )


Expand Down Expand Up @@ -469,99 +469,3 @@ def _tpcf_jackknife_process_args(sample1, randoms, rbins,
return sample1, rbins, Nsub, sample2, randoms, period, do_auto, do_cross,\
num_threads, _sample1_is_sample2, PBCs

def cuboid_subvolume_labels(sample, Nsub, Lbox):
"""
Return integer labels indicating which cubical subvolume of a larger cubical volume a
set of points occupy.
Parameters
----------
sample : array_like
Npts x 3 numpy array containing 3-D positions of points.
Nsub : array_like
Length-3 numpy array of integers indicating how many times to split the volume
along each dimension. If single integer, N, is supplied, ``Nsub`` is set to
[N,N,N], and the volume is split along each dimension N times. The total number
of subvolumes is then given by numpy.prod(Nsub).
Lbox : array_like
Length-3 numpy array definging the lengths of the sides of the cubical volume
that ``sample`` occupies. If only a single scalar is specified, the volume is assumed
to be a cube with side-length Lbox
Returns
-------
labels : numpy.array
numpy array with integer labels in the range [1,numpy.prod(Nsub)] indicating
the subvolume each point in ``sample`` occupies.
N_sub_vol : int
number of subvolumes.
Examples
--------
For demonstration purposes we create a randomly distributed set of points within a
periodic unit cube.
>>> Npts = 1000
>>> Lbox = 1.0
>>> period = np.array([Lbox,Lbox,Lbox])
>>> x = np.random.random(Npts)
>>> y = np.random.random(Npts)
>>> z = np.random.random(Npts)
We transform our *x, y, z* points into the array shape used by the pair-counter by
taking the transpose of the result of `numpy.vstack`. This boilerplate transformation
is used throughout the `~halotools.mock_observables` sub-package:
>>> sample = np.vstack((x,y,z)).T
Divide the volume into cubes with length 0.25 on a side.
>>> Nsub = [4,4,4]
>>> labels, N_sub_vol = cuboid_subvolume_labels(sample, Nsub, Lbox)
"""

#process inputs and check for consistency
sample = np.atleast_1d(sample).astype('f8')
try:
assert sample.ndim == 2
assert sample.shape[1] == 3
except AssertionError:
msg = ("Input ``sample`` must have shape (Npts, 3)")
raise TypeError(msg)

Nsub = np.atleast_1d(Nsub).astype('i4')
if len(Nsub) == 1:
Nsub = np.array([Nsub[0], Nsub[0], Nsub[0]])
elif len(Nsub) != 3:
msg = "Input ``Nsub`` must be a scalar or length-3 sequence"
raise TypeError(msg)

Lbox = np.atleast_1d(Lbox).astype('f8')
if len(Lbox) == 1:
Lbox = np.array([Lbox[0]]*3)
elif len(Lbox) != 3:
msg = "Input ``Lbox`` must be a scalar or length-3 sequence"
raise TypeError(msg)

dL = Lbox/Nsub # length of subvolumes along each dimension
N_sub_vol = int(np.prod(Nsub)) # total the number of subvolumes
# create an array of unique integer IDs for each subvolume
inds = np.arange(1, N_sub_vol+1).reshape(Nsub[0], Nsub[1], Nsub[2])

#tag each particle with an integer indicating which subvolume it is in
index = np.floor(sample/dL).astype(int)
#take care of the case where a point falls on the boundary
for i in range(3):
index[:, i] = np.where(index[:, i] == Nsub[i], Nsub[i] - 1, index[:, i])
index = inds[index[:,0],index[:,1],index[:,2]].astype(int)

return index, int(N_sub_vol)





0 comments on commit 9b9af05

Please sign in to comment.