falling back to numpy 1x

parashardhapola · Aug 1, 2024 · 84e8c63 · 84e8c63
1 parent f5b1f23
commit 84e8c63
Show file tree

Hide file tree

Showing 9 changed files with 28 additions and 27 deletions.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.29.3
+0.29.4
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-numpy
+numpy==1.26.4
 pandas
 scipy
 scikit-learn

diff --git a/scarf/ann.py b/scarf/ann.py
@@ -2,7 +2,7 @@
 import numpy as np
 import pandas as pd
 import dask.array as da
-from numpy.linalg import LinAlgError
+
 from threadpoolctl import threadpool_limits
 from .utils import controlled_compute, logger, tqdmbar
 from .harmony import run_harmony
@@ -206,6 +206,7 @@ def transform_ann(
 
     def _fit_pca(self, disable_scaling, use_for_pca) -> None:
         from sklearn.decomposition import IncrementalPCA
+        from numpy.linalg import LinAlgError
 
         # We fit 1 extra PC dim than specified and then ignore the last PC.
         self._pca = IncrementalPCA(

diff --git a/scarf/datastore/base_datastore.py b/scarf/datastore/base_datastore.py
@@ -347,7 +347,7 @@ def _ini_cell_props(
                     f"({from_assay}) Computing nCounts",
                     self.nthreads,
                 )
-                self.cells.insert(var_name, n_c.astype(np.float_), overwrite=True)
+                self.cells.insert(var_name, n_c.astype(np.float64), overwrite=True)
                 if type(assay) == RNAassay:
                     min_nc = min(n_c)
                     if min(n_c) < assay.sf:
@@ -362,7 +362,7 @@ def _ini_cell_props(
                     f"({from_assay}) Computing nFeatures",
                     self.nthreads,
                 )
-                self.cells.insert(var_name, n_f.astype(np.float_), overwrite=True)
+                self.cells.insert(var_name, n_f.astype(np.float64), overwrite=True)
 
             if type(assay) == RNAassay:
                 if mito_pattern == "":
@@ -493,15 +493,15 @@ def get_cell_vals(
             if vals is None:
                 vals = controlled_compute(
                     assay.normed(cell_idx, feat_idx).mean(axis=1), self.nthreads
-                ).astype(np.float_)
+                ).astype(np.float64)
         else:
             vals = self.cells.fetch(k, key=cell_key)
         if clip_fraction < 0 or clip_fraction > 1:
             raise ValueError(
                 "ERROR: Value for `clip_fraction` parameter should be between 0 and 1"
             )
         if clip_fraction > 0:
-            if vals.dtype in [np.float_, np.uint64]:
+            if vals.dtype in [np.float64, np.uint64]:
                 min_v = np.percentile(vals, 100 * clip_fraction)
                 max_v = np.percentile(vals, 100 - 100 * clip_fraction)
                 vals[vals < min_v] = min_v

diff --git a/scarf/datastore/graph_datastore.py b/scarf/datastore/graph_datastore.py
@@ -2068,7 +2068,7 @@ def load_pca_knn(assay_name):
             (n_cells * n_neighbors, 2),
         )
         zgw = create_zarr_dataset(
-            store, f"weights", (chunk_size,), np.float_, (n_cells * n_neighbors)
+            store, f"weights", (chunk_size,), np.float64, (n_cells * n_neighbors)
         )
 
         zge[:, 0] = merged_graph.row

diff --git a/scarf/plots.py b/scarf/plots.py
@@ -434,7 +434,7 @@ def plot_heatmap(
 def _scatter_fix_type(v: pd.Series, ints_as_cats: bool) -> pd.Series:
     vt = v.dtype
     if v.nunique() == 1:
-        return pd.Series(np.ones(len(v)), index=v.index).astype(np.float_)
+        return pd.Series(np.ones(len(v)), index=v.index).astype(np.float64)
     if vt in [np.bool_]:
         # converting first to int to handle bool
         return v.astype(np.int_).astype("category")
@@ -445,13 +445,13 @@ def _scatter_fix_type(v: pd.Series, ints_as_cats: bool) -> pd.Series:
             logger.warning("Too many categories. set force_ints_as_cats to false")
         return v.astype(np.int_).astype("category")
     else:
-        return v.astype(np.float_)
+        return v.astype(np.float64)
 
 
 def _scatter_fix_mask(v: pd.Series, mask_vals: list, mask_name: str) -> pd.Series:
     if mask_vals is None:
         mask_vals = []
-    mask_vals += [np.NaN]
+    mask_vals += [np.nan]
     iscat = False
     if v.dtype.name == "category":
         iscat = True

diff --git a/scarf/tests/test_datastore.py b/scarf/tests/test_datastore.py
@@ -5,22 +5,22 @@
 
 class TestToyDataStore:
     def test_toy_crdir_metadata(self, toy_crdir_ds):
-        assert np.alltrue(
+        assert np.all(
             toy_crdir_ds.RNA.feats.fetch_all("ids") == ["g1", "g2", "g3", "g4"]
         )
-        assert np.alltrue(toy_crdir_ds.ADT.feats.fetch_all("ids") == ["a1", "a2"])
-        assert np.alltrue(toy_crdir_ds.HTO.feats.fetch_all("ids") == ["h1"])
-        assert np.alltrue(toy_crdir_ds.cells.fetch_all("ids") == ["b1", "b2", "b3"])
+        assert np.all(toy_crdir_ds.ADT.feats.fetch_all("ids") == ["a1", "a2"])
+        assert np.all(toy_crdir_ds.HTO.feats.fetch_all("ids") == ["h1"])
+        assert np.all(toy_crdir_ds.cells.fetch_all("ids") == ["b1", "b2", "b3"])
 
     def test_toy_crdir_rawdata(self, toy_crdir_ds):
-        assert np.alltrue(
+        assert np.all(
             toy_crdir_ds.RNA.rawData.compute()
             == [[5, 0, 0, 2], [3, 3, 0, 7], [3, 3, 0, 7]]
         )
-        assert np.alltrue(
+        assert np.all(
             toy_crdir_ds.ADT.rawData.compute() == [[30, 40], [30, 50], [0, 50]]
         )
-        assert np.alltrue(toy_crdir_ds.HTO.rawData.compute() == [[200], [100], [100]])
+        assert np.all(toy_crdir_ds.HTO.rawData.compute() == [[200], [100], [100]])
 
 
 class TestDataStore:
@@ -70,12 +70,12 @@ def test_graph_indices(self, make_graph, datastore):
     def test_graph_distances(self, make_graph, datastore):
         a = np.load(full_path("knn_distances.npy"))
         b = datastore.z[make_graph]["distances"][:]
-        assert np.alltrue((a - b) < 1e-3)
+        assert np.all((a - b) < 1e-3)
 
     def test_graph_weights(self, make_graph, datastore):
         a = np.load(full_path("knn_weights.npy"))
         b = datastore.z[make_graph]["graph__1.0__1.5"]["weights"][:]
-        assert np.alltrue((a - b) < 1e-5)
+        assert np.all((a - b) < 1e-5)
 
     def test_atac_graph_indices(self, make_atac_graph, atac_datastore):
         a = np.load(full_path("atac_knn_indices.npy"))
@@ -93,7 +93,7 @@ def test_atac_graph_distances(self, make_atac_graph, atac_datastore):
 
         # TODO: activate this when this PR is merged and released in gensim
         # https://github.com/RaRe-Technologies/gensim/pull/3194
-        # assert np.alltrue((a - b) < 1e-5)
+        # assert np.all((a - b) < 1e-5)
 
     def test_leiden_values(self, leiden_clustering, cell_attrs):
         assert len(set(leiden_clustering)) == 10
@@ -117,7 +117,7 @@ def test_umap_values(self, umap, cell_attrs):
         precalc_umap = cell_attrs[["RNA_UMAP1", "RNA_UMAP2"]].values
         assert umap.shape == precalc_umap.shape
         # Disabled the following test because failing on CI
-        # assert np.alltrue((umap - precalc_umap) < 0.1)
+        # assert np.all((umap - precalc_umap) < 0.1)
 
     def test_get_markers(self, marker_search, paris_clustering, datastore):
         precalc_markers = pd.read_csv(full_path("markers_cluster1.csv"), index_col=0)
@@ -173,8 +173,8 @@ def test_run_pseudotime_marker_search(self, pseudotime_markers):
         precalc_markers = pd.read_csv(
             full_path("pseudotime_markers_r_values.csv"), index_col=0
         )
-        assert np.alltrue(precalc_markers.index == pseudotime_markers.index)
-        assert np.alltrue(
+        assert np.all(precalc_markers.index == pseudotime_markers.index)
+        assert np.all(
             precalc_markers.names.values == pseudotime_markers.names.values
         )
         assert np.allclose(

diff --git a/scarf/tests/test_readers.py b/scarf/tests/test_readers.py
@@ -2,11 +2,11 @@
 
 
 def test_toy_crdir_assay_feats_table(toy_crdir_reader):
-    assert np.alltrue(
+    assert np.all(
         toy_crdir_reader.assayFeats.columns
         == np.array(["RNA", "ADT", "RNA", "HTO", "RNA"])
     )
-    assert np.alltrue(
+    assert np.all(
         toy_crdir_reader.assayFeats.values[1:]
         == [[0, 1, 3, 5, 6], [1, 3, 5, 6, 7], [1, 2, 2, 1, 1]]
     )

diff --git a/scarf/writers.py b/scarf/writers.py
@@ -468,7 +468,7 @@ class NaboH5ToZarr:
 
     Attributes:
         h5: A Nabo h5 object.
-        fn: The file name for the Zarr hierarchy.
+        zarr_loc: The file name for the Zarr hierarchy.
         chunkSizes: The requested size of chunks to load into memory and process.
         assayName: The Zarr hierarchy (array or group).
         z: The Zarr hierarchy (array or group).