Skip to content

Commit

Permalink
Merge pull request #117 from static-frame/115/block-index-zero-shape
Browse files Browse the repository at this point in the history
`BlockIndex` handlign for zero-width arrays
  • Loading branch information
flexatone authored May 17, 2023
2 parents bebdc97 + f86fa4b commit 1d69ef9
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 26 deletions.
9 changes: 9 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ ArrayKit requires the following:
What is New in ArrayKit
-------------------------

0.4.1
............

Updated ``BlockIndex.register()`` to handle 0-column 2D arrays and return False.

Added ``BlockIndex.rows``, ``BlockIndex.columns`` properties.

Updated unset ``BlockIndex.dtype`` to return a float dtype.


0.4.0
............
Expand Down
2 changes: 1 addition & 1 deletion doc/articles/block_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pickle

from arraykit import BlockIndex
# from arraykit import ErrorInitBlocks
# from arraykit import ErrorInitTypeBlocks
from arraykit import shape_filter
from arraykit import resolve_dtype

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from setuptools import setup
from pathlib import Path

AK_VERSION = '0.4.0'
AK_VERSION = '0.4.1'

def get_long_description() -> str:
return '''The ArrayKit library provides utilities for creating and transforming NumPy arrays, implementing performance-critical StaticFrame operations as Python C extensions.
Expand Down
2 changes: 1 addition & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ._arraykit import __version__
from ._arraykit import ArrayGO as ArrayGO
from ._arraykit import BlockIndex as BlockIndex
from ._arraykit import ErrorInitBlocks as ErrorInitBlocks
from ._arraykit import ErrorInitTypeBlocks as ErrorInitTypeBlocks

from ._arraykit import immutable_filter as immutable_filter
from ._arraykit import mloc as mloc
Expand Down
8 changes: 5 additions & 3 deletions src/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ _T = tp.TypeVar('_T')

__version__: str

class ErrorInitBlocks:
class ErrorInitTypeBlocks:
def __init__(self, *args: tp.Any, **kwargs: tp.Any) -> None: ...
def with_traceback(self, tb: Exception) -> Exception: ...
def __setstate__(self) -> None: ...
Expand All @@ -27,10 +27,12 @@ class ArrayGO:

class BlockIndex:
shape: tp.Tuple[int, int]
dtype: tp.Optional[np.dtype]
dtype: np.dtype
rows: int
columns: int

def __init__() -> None: ...
def register(self, __value: object) -> None: ...
def register(self, __value: np.ndarray) -> bool: ...
def to_list(self,) -> tp.List[int]: ...
def to_bytes(self,) -> bytes: ...
def copy(self,) -> 'BlockIndex': ...
Expand Down
42 changes: 31 additions & 11 deletions src/_arraykit.c
Original file line number Diff line number Diff line change
Expand Up @@ -4114,7 +4114,7 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
//------------------------------------------------------------------------------

static PyTypeObject BlockIndexType;
static PyObject *ErrorInitBlocks;
static PyObject *ErrorInitTypeBlocks;

// NOTE: we use platform size types here, which are appropriate for the values, but might pose issues if trying to pass pickles between 32 and 64 bit machines.
typedef struct BlockIndexRecord {
Expand Down Expand Up @@ -4777,18 +4777,18 @@ BlockIndex_repr(BlockIndexObject *self) {
dt);
}

// Returns NULL on error, None otherwise. This checks and raises on non-array inputs, dimensions other than 1 or 2.
// Returns NULL on error, True if the block should be reatained, False if the block has zero columns and should not be retained. This checks and raises on non-array inputs, dimensions other than 1 or 2, and mis-aligned columns.
static PyObject *
BlockIndex_register(BlockIndexObject *self, PyObject *value) {
if (!PyArray_Check(value)) {
PyErr_Format(ErrorInitBlocks, "Found non-array block: %R", value);
PyErr_Format(ErrorInitTypeBlocks, "Found non-array block: %R", value);
return NULL;
}
PyArrayObject *a = (PyArrayObject *)value;
int ndim = PyArray_NDIM(a);

if (ndim < 1 || ndim > 2) {
PyErr_Format(ErrorInitBlocks, "Array block has invalid dimensions: %i", ndim);
PyErr_Format(ErrorInitTypeBlocks, "Array block has invalid dimensions: %i", ndim);
return NULL;
}
Py_ssize_t increment = ndim == 1 ? 1 : PyArray_DIM(a, 1);
Expand All @@ -4799,13 +4799,17 @@ BlockIndex_register(BlockIndexObject *self, PyObject *value) {
self->row_count = alignment;
}
else if (self->row_count != alignment) {
PyErr_Format(ErrorInitBlocks,
PyErr_Format(ErrorInitTypeBlocks,
"Array block has unaligned row count: found %i, expected %i",
alignment,
self->row_count);
return NULL;
}

if (increment == 0) {
Py_RETURN_FALSE;
}

PyArray_Descr* dt = PyArray_DESCR(a); // borrowed ref
if (self->dtype == NULL) {
Py_INCREF((PyObject*)dt);
Expand All @@ -4829,7 +4833,7 @@ BlockIndex_register(BlockIndexObject *self, PyObject *value) {
self->bir_count++;
}
self->block_count++;
Py_RETURN_NONE;
Py_RETURN_TRUE;
}


Expand Down Expand Up @@ -4942,20 +4946,36 @@ BlockIndex_iter(BlockIndexObject* self) {

static PyObject *
BlockIndex_shape_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
// NOTE: this could be cached
return Py_BuildValue("nn", self->row_count, self->bir_count);
}

static PyObject *
BlockIndex_rows_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
return PyLong_FromSsize_t(self->row_count);
}

static PyObject *
BlockIndex_columns_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
return PyLong_FromSsize_t(self->bir_count );
}

// Return the resolved dtype for all registered blocks. If no block have been registered, this will return a float dtype.
static PyObject *
BlockIndex_dtype_getter(BlockIndexObject *self, void* Py_UNUSED(closure)){
if (self->dtype != NULL) {
Py_INCREF(self->dtype);
return (PyObject*)self->dtype;
}
Py_RETURN_NONE;
// NOTE: could use NPY_DEFAULT_TYPE here; SF defines this explicitly as float64
return (PyObject*)PyArray_DescrFromType(NPY_FLOAT64);
}


static struct PyGetSetDef BlockIndex_getset[] = {
{"shape", (getter)BlockIndex_shape_getter, NULL, NULL, NULL},
{"rows", (getter)BlockIndex_rows_getter, NULL, NULL, NULL},
{"columns", (getter)BlockIndex_columns_getter, NULL, NULL, NULL},
{"dtype", (getter)BlockIndex_dtype_getter, NULL, NULL, NULL},
{NULL},
};
Expand Down Expand Up @@ -5380,12 +5400,12 @@ PyInit__arraykit(void)
{
import_array();

ErrorInitBlocks = PyErr_NewExceptionWithDoc(
"arraykit.ErrorInitBlocks",
ErrorInitTypeBlocks = PyErr_NewExceptionWithDoc(
"arraykit.ErrorInitTypeBlocks",
"RuntimeError error in block initialization.",
PyExc_RuntimeError,
NULL);
if (ErrorInitBlocks == NULL) {
if (ErrorInitTypeBlocks == NULL) {
return NULL;
}

Expand All @@ -5411,7 +5431,7 @@ PyInit__arraykit(void)
PyModule_AddObject(m, "BlockIndex", (PyObject *) &BlockIndexType) ||
PyModule_AddObject(m, "ArrayGO", (PyObject *) &ArrayGOType) ||
PyModule_AddObject(m, "deepcopy", deepcopy) ||
PyModule_AddObject(m, "ErrorInitBlocks", ErrorInitBlocks)
PyModule_AddObject(m, "ErrorInitTypeBlocks", ErrorInitTypeBlocks)
){
Py_DECREF(deepcopy);
Py_XDECREF(m);
Expand Down
57 changes: 49 additions & 8 deletions test/test_block_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@
import numpy as np

from arraykit import BlockIndex
from arraykit import ErrorInitBlocks
from arraykit import ErrorInitTypeBlocks


class TestUnit(unittest.TestCase):

def test_block_index_init_a(self) -> None:
bi1 = BlockIndex()
self.assertEqual(bi1.dtype, np.dtype(float))
# print(bi1)

def test_block_index_init_b1(self) -> None:
Expand Down Expand Up @@ -52,19 +53,19 @@ def test_block_index_init_d(self) -> None:

def test_block_index_register_a(self) -> None:
bi1 = BlockIndex()
with self.assertRaises(ErrorInitBlocks):
with self.assertRaises(ErrorInitTypeBlocks):
bi1.register('foo')

with self.assertRaises(ErrorInitBlocks):
with self.assertRaises(ErrorInitTypeBlocks):
bi1.register(3.5)

def test_block_index_register_b(self) -> None:

bi1 = BlockIndex()
with self.assertRaises(ErrorInitBlocks):
with self.assertRaises(ErrorInitTypeBlocks):
bi1.register(np.array(0))

with self.assertRaises(ErrorInitBlocks):
with self.assertRaises(ErrorInitTypeBlocks):
bi1.register(np.arange(12).reshape(2,3,2))


Expand All @@ -76,6 +77,8 @@ def test_block_index_register_c(self) -> None:
self.assertEqual(bi1.to_list(),
[(0, 0), (1, 0), (2, 0), (2, 1)])
self.assertEqual(bi1.shape, (3, 4))
self.assertEqual(bi1.rows, 3)
self.assertEqual(bi1.columns, 4)

def test_block_index_register_d(self) -> None:
bi1 = BlockIndex()
Expand All @@ -87,18 +90,55 @@ def test_block_index_register_d(self) -> None:
[(0, 0), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (2, 0), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (3, 5)]
)
self.assertEqual(bi1.shape, (2, 14))
self.assertEqual(bi1.rows, 2)
self.assertEqual(bi1.columns, 14)

def test_block_index_register_e(self) -> None:
bi1 = BlockIndex()
bi1.register(np.arange(2))
with self.assertRaises(ErrorInitBlocks):
with self.assertRaises(ErrorInitTypeBlocks):
bi1.register(np.arange(12).reshape(3,4))


def test_block_index_register_f(self) -> None:
bi1 = BlockIndex()
a1 = np.arange(20000).reshape(2, 10_000) #.reshape(2, 10_000)
a1 = np.arange(20000).reshape(2, 10_000)
bi1.register(a1)
self.assertEqual(bi1.rows, 2)
self.assertEqual(bi1.columns, 10_000)


def test_block_index_register_g(self) -> None:
bi1 = BlockIndex()
a1 = np.array(()).reshape(4, 0)
self.assertFalse(bi1.register(a1))
self.assertEqual(bi1.shape, (4, 0))
# as not dtype has been registered, we will get default float
self.assertEqual(bi1.dtype, np.dtype(float))

a2 = np.arange(8).reshape(4, 2).astype(bool)
self.assertTrue(bi1.register(a2))
self.assertEqual(bi1.shape, (4, 2))
self.assertEqual(bi1.dtype, np.dtype(bool))


def test_block_index_register_h(self) -> None:
bi1 = BlockIndex()
a1 = np.array(()).reshape(0, 4).astype(bool)
self.assertTrue(bi1.register(a1))
self.assertEqual(bi1.shape, (0, 4))
self.assertEqual(bi1.dtype, np.dtype(bool))

a2 = np.array(()).reshape(0, 0).astype(float)
self.assertFalse(bi1.register(a2))
self.assertEqual(bi1.shape, (0, 4))
# dtype is still bool
self.assertEqual(bi1.dtype, np.dtype(bool))

a3 = np.array(()).reshape(0, 3).astype(int)
self.assertTrue(bi1.register(a3))
self.assertEqual(bi1.shape, (0, 7))
self.assertEqual(bi1.dtype, np.dtype(object))


#---------------------------------------------------------------------------
Expand Down Expand Up @@ -191,10 +231,11 @@ def test_block_index_getitem_a(self) -> None:
bi1 = BlockIndex()
bi1.register(np.arange(12).reshape(2,6))
self.assertEqual(bi1.shape, (2, 6))
self.assertEqual(bi1.columns, 6)

bi1.register(np.arange(4).reshape(2,2))
self.assertEqual(bi1.shape, (2, 8))

self.assertEqual(bi1.columns, 8)

def test_block_index_getitem_b(self) -> None:
bi1 = BlockIndex()
Expand Down
2 changes: 1 addition & 1 deletion test/test_pyi.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def from_module(cls, module):
continue
obj = getattr(module, name)
if isinstance(obj, type): # a class
if name == ak.ErrorInitBlocks.__name__:
if name == ak.ErrorInitTypeBlocks.__name__:
# skip as there is Python version variability
continue
classes[name] = []
Expand Down

0 comments on commit 1d69ef9

Please sign in to comment.