static-frame · chaburkland · Mar 16, 2023 · Mar 21, 2023 · Mar 21, 2023 · Mar 21, 2023
diff --git a/README.rst b/README.rst
@@ -86,6 +86,10 @@ Extended arguments to and functionality in ``split_after_count()`` to support th
 
 Now building wheels for 3.11.
 
+0.1.12
+............
+
+Implemented ``is_sorted``.
 
 0.2.2
 ............

diff --git a/src/__init__.py b/src/__init__.py
@@ -19,6 +19,7 @@
 from ._arraykit import delimited_to_arrays as delimited_to_arrays
 from ._arraykit import iterable_str_to_array_1d as iterable_str_to_array_1d
 from ._arraykit import get_new_indexers_and_screen as get_new_indexers_and_screen
+from ._arraykit import is_sorted as is_sorted
 from ._arraykit import split_after_count as split_after_count
 from ._arraykit import count_iteration as count_iteration
 from ._arraykit import first_true_1d as first_true_1d

diff --git a/src/__init__.pyi b/src/__init__.pyi
@@ -72,6 +72,7 @@ def resolve_dtype_iter(__dtypes: tp.Iterable[np.dtype]) -> np.dtype: ...
 def isna_element(__value: tp.Any, include_none: bool = True) -> bool: ...
 def dtype_from_element(__value: tp.Optional[tp.Hashable]) -> np.dtype: ...
 def get_new_indexers_and_screen(indexers: np.ndarray, positions: np.ndarray) -> tp.Tuple[np.ndarray, np.ndarray]: ...
+def is_sorted(arr: np.ndarray) -> bool: ...
 
 def first_true_1d(__array: np.ndarray, *, forward: bool) -> int: ...
 def first_true_2d(__array: np.ndarray, *, forward: bool, axis: int) -> np.ndarray: ...
diff --git a/src/_arraykit.c b/src/_arraykit.c
@@ -4031,7 +4031,7 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
     Py_DECREF(element_locations);
 
     // new_positions = order_found[:num_unique]
-    PyObject *new_positions = PySequence_GetSlice((PyObject*)order_found, 0, num_found);
+    PyObject *new_positions = PySequence_GetSlice((PyObject*)order_found, 0, (Py_ssize_t)num_found);
     Py_DECREF(order_found);
     if (new_positions == NULL) {
         return NULL;
@@ -4058,6 +4058,181 @@ get_new_indexers_and_screen(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kw
         return NULL;
 }
 
+//------------------------------------------------------------------------------
+
+# define AK_COMPARE_SIMPLE(a, b) a > b
+# define AK_COMPARE_COMPLEX(a, b) a.real > b.real || (a.real == b.real && a.imag > b.imag)
+
+/*Note: Data array needs a unique name for each case inside the switch*/
+# define AK_IS_SORTED(ctype, compare_macro)                     \
+    if (contiguous) {                                           \
+        NPY_BEGIN_THREADS_DEF;                                  \
+        NPY_BEGIN_THREADS;                                      \
+        ctype* data_##ctype##_ = (ctype*)PyArray_DATA(arr);     \
+        for (size_t i = 0; i < arr_size - 1; ++i) {             \
+            ctype element = data_##ctype##_[i];                 \
+            ctype next = data_##ctype##_[i + 1];                \
+            if (compare_macro(element, next)) {                 \
+                NPY_END_THREADS;                                \
+                Py_RETURN_FALSE;                                \
+            }                                                   \
+        }                                                       \
+        NPY_END_THREADS;                                        \
+    }                                                           \
+    else {                                                      \
+        NPY_BEGIN_THREADS_DEF;                                  \
+        NPY_BEGIN_THREADS;                                      \
+        for (size_t i = 0; i < arr_size - 1; ++i) {             \
+            ctype element = *(ctype*)PyArray_GETPTR1(arr, i);   \
+            ctype next = *(ctype*)PyArray_GETPTR1(arr, i + 1);  \
+            if (compare_macro(element, next)) {                 \
+                NPY_END_THREADS;                                \
+                Py_RETURN_FALSE;                                \
+            }                                                   \
+        }                                                       \
+        NPY_END_THREADS;                                        \
+    }                                                           \
+    Py_RETURN_TRUE;                                             \
+
+
+static bool
+AK_is_sorted_string(PyArrayObject* arr, bool contiguous, size_t arr_size)
+{
+    size_t item_size = (size_t)PyArray_ITEMSIZE(arr);
+
+    if (contiguous) {
+        NPY_BEGIN_THREADS_DEF;
+        NPY_BEGIN_THREADS;
+        char* data = (char*)PyArray_DATA(arr);
+        size_t i = 0;
+        while (i < (arr_size - 1) * item_size) {
+            if (strncmp(&data[i], &data[i + item_size], item_size) > 0) {
+                NPY_END_THREADS;
+                Py_RETURN_FALSE;
+            }
+            i += item_size;
+        }
+        NPY_END_THREADS;
+    }
+    else {
+        NPY_BEGIN_THREADS_DEF;
+        NPY_BEGIN_THREADS;
+        size_t i = 0;
+        while (i < (arr_size - 1) * item_size) {
+            char *element = PyArray_GETPTR1(arr, i);
+            char *next = PyArray_GETPTR1(arr, i + 1);
+            if (strncmp(element, next, item_size) > 0) {
+                NPY_END_THREADS;
+                Py_RETURN_FALSE;
+            }
+            i += item_size;
+        }
+        NPY_END_THREADS;
+    }
+    Py_RETURN_TRUE;
+}
+
+
+static PyObject *
+is_sorted(PyObject *Py_UNUSED(m), PyObject *arg)
+{
+    AK_CHECK_NUMPY_ARRAY(arg);
+    PyArrayObject *arr = (PyArrayObject*)arg;
+
+    if (PyArray_NDIM(arr) != 1) {
+        PyErr_SetString(PyExc_ValueError, "Array must be 1-dimensional");
+        return NULL;
+    }
+
+    bool contiguous = (bool)PyArray_IS_C_CONTIGUOUS(arr);
+    size_t arr_size = (size_t)PyArray_SIZE(arr);
+
+    switch (PyArray_TYPE(arr)) {
+        case NPY_BOOL:;
+            AK_IS_SORTED(npy_bool, AK_COMPARE_SIMPLE)
+        case NPY_BYTE:;
+            AK_IS_SORTED(npy_byte, AK_COMPARE_SIMPLE)
+        case NPY_UBYTE:;
+            AK_IS_SORTED(npy_ubyte, AK_COMPARE_SIMPLE)
+        case NPY_SHORT:;
+            AK_IS_SORTED(npy_short, AK_COMPARE_SIMPLE)
+        case NPY_USHORT:;
+            AK_IS_SORTED(npy_ushort, AK_COMPARE_SIMPLE)
+        case NPY_INT:;
+            AK_IS_SORTED(npy_int, AK_COMPARE_SIMPLE)
+        case NPY_UINT:;
+            AK_IS_SORTED(npy_uint, AK_COMPARE_SIMPLE)
+        case NPY_LONG:;
+            AK_IS_SORTED(npy_long, AK_COMPARE_SIMPLE)
+        case NPY_ULONG:;
+            AK_IS_SORTED(npy_ulong, AK_COMPARE_SIMPLE)
+        case NPY_LONGLONG:;
+            AK_IS_SORTED(npy_longlong, AK_COMPARE_SIMPLE)
+        case NPY_ULONGLONG:;
+            AK_IS_SORTED(npy_ulonglong, AK_COMPARE_SIMPLE)
+        case NPY_FLOAT:;
+            AK_IS_SORTED(npy_float, AK_COMPARE_SIMPLE)
+        case NPY_DOUBLE:;
+            AK_IS_SORTED(npy_double, AK_COMPARE_SIMPLE)
+
+        # ifdef PyFloat128ArrType_Type
+        case NPY_LONGDOUBLE:;
+            AK_IS_SORTED(npy_longdouble, AK_COMPARE_SIMPLE)
+        # endif
+
+        case NPY_DATETIME:;
+            AK_IS_SORTED(npy_datetime, AK_COMPARE_SIMPLE)
+        case NPY_TIMEDELTA:;
+            AK_IS_SORTED(npy_timedelta, AK_COMPARE_SIMPLE)
+        case NPY_HALF:;
+            AK_IS_SORTED(npy_half, AK_COMPARE_SIMPLE)
+        case NPY_CFLOAT:;
+            AK_IS_SORTED(npy_complex64, AK_COMPARE_COMPLEX)
+        case NPY_CDOUBLE:;
+            AK_IS_SORTED(npy_complex128, AK_COMPARE_COMPLEX)
+
+        # ifdef PyComplex256ArrType_Type
+        case NPY_CLONGDOUBLE:;
+            AK_IS_SORTED(npy_complex256, AK_COMPARE_COMPLEX)
+        # endif
+
+        case NPY_STRING:
+        case NPY_UNICODE:
+            if (!AK_is_sorted_string(arr, contiguous, arr_size)) {
+                Py_RETURN_FALSE;
+            }
+            Py_RETURN_TRUE;
+        default:;
+            PyErr_Format(PyExc_ValueError,
+                    "Unsupported dtype: %s",
+                    PyArray_DESCR(arr)->typeobj->tp_name
+                    );
+            return NULL;
+    }
+    // // ------------------------------------------------------------------------
+    // // perf is not good here - maybe drop support?
+    // else if (np_dtype == NPY_OBJECT) {
+    //     do {
+    //         char* data = *dataptr;
+    //         npy_intp stride = *strideptr;
+    //         npy_intp inner_size = *innersizeptr;
+
+    //         PyObject* prev = *((PyObject **)data);
+    //         data += stride;
+    //         inner_size--;
+    //         while (inner_size--) {
+    //             PyObject* element = *((PyObject **)data);
+    //             if (PyObject_RichCompareBool(element, prev, Py_LT) == 1) {
+    //                 goto fail;
+    //             }
+    //             prev = element;
+    //             data += stride;
+    //         }
+    //     } while(arr_iternext(arr_iter));
+    // }
+    Py_UNREACHABLE();
+}
+
 //------------------------------------------------------------------------------
 // ArrayGO
 //------------------------------------------------------------------------------
@@ -4364,6 +4539,7 @@ static PyMethodDef arraykit_methods[] =  {
             METH_VARARGS | METH_KEYWORDS,
             NULL},
     {"dtype_from_element", dtype_from_element, METH_O, NULL},
+    {"is_sorted", is_sorted, METH_O, NULL},
     {"get_new_indexers_and_screen",
             (PyCFunction)get_new_indexers_and_screen,
             METH_VARARGS | METH_KEYWORDS,