From a0c98a16ae2bdd1b261ab1edfbc91d42aca38c53 Mon Sep 17 00:00:00 2001 From: ejohnson643 Date: Thu, 28 Oct 2021 19:04:22 -0500 Subject: [PATCH] Made sure that all dictionary keywords for class initializations are COPIED so that defaults don't get overwritten... --- EMBEDR/affinity.py | 6 +- EMBEDR/nearest_neighbors.py | 5 +- .../EMBEDR_Figure_01v1_DimRed_Zoology.py | 381 +++++++++++++++++ EMBEDR/plots/__init__.py | 0 ...igure_01v1_DimRed_Zoology-checkpoint.ipynb | 202 +++++++++ .../EMBEDR_Figure_01v1_DimRed_Zoology.ipynb | 387 ++++++++++++++++++ 6 files changed, 977 insertions(+), 4 deletions(-) create mode 100644 EMBEDR/plots/EMBEDR_Figure_01v1_DimRed_Zoology.py create mode 100644 EMBEDR/plots/__init__.py create mode 100644 projects/Figures/.ipynb_checkpoints/EMBEDR_Figure_01v1_DimRed_Zoology-checkpoint.ipynb create mode 100644 projects/Figures/EMBEDR_Figure_01v1_DimRed_Zoology.ipynb diff --git a/EMBEDR/affinity.py b/EMBEDR/affinity.py index 9d0bcb9..aecd18a 100644 --- a/EMBEDR/affinity.py +++ b/EMBEDR/affinity.py @@ -89,7 +89,7 @@ def __init__(self, random_state=1, verbose=1): - self.kernel_params = kernel_params + self.kernel_params = kernel_params.copy() self.symmetrize = bool(symmetrize) @@ -371,11 +371,11 @@ def _initialize_affinity_matrix(X, tmp_kNN_params = {'n_neighbors': n_neighbors} tmp_kNN_params.update(kNN_params) - aff_obj = aff_class(kernel_params=kernel_params, + aff_obj = aff_class(kernel_params=kernel_params.copy(), symmetrize=symmetrize, normalization=normalization, precomputed=precomputed, - kNN_params=tmp_kNN_params, + kNN_params=tmp_kNN_params.copy(), n_jobs=n_jobs, random_state=random_state, verbose=verbose, diff --git a/EMBEDR/nearest_neighbors.py b/EMBEDR/nearest_neighbors.py index 0b0f14f..34272a9 100644 --- a/EMBEDR/nearest_neighbors.py +++ b/EMBEDR/nearest_neighbors.py @@ -87,7 +87,10 @@ def __init__(self, **kwargs): self.metric = self._check_metric(metric) - self.metric_params = metric_params + if metric_params is not None: + self.metric_params = metric_params.copy() + else: + self.metric_params = None self.n_jobs = int(n_jobs) self.random_state = check_random_state(random_state) self.verbose = float(verbose) diff --git a/EMBEDR/plots/EMBEDR_Figure_01v1_DimRed_Zoology.py b/EMBEDR/plots/EMBEDR_Figure_01v1_DimRed_Zoology.py new file mode 100644 index 0000000..5c627a5 --- /dev/null +++ b/EMBEDR/plots/EMBEDR_Figure_01v1_DimRed_Zoology.py @@ -0,0 +1,381 @@ +""" +############################################################################### + Figure: A Zoology of Distortive Effects (v1) +############################################################################### + + Author: Eric Johnson + Date Created: Monday, March 8, 2021 + Date Edited: Thursday, October 28, 2021 + Email: ericjohnson1.2015@u.northwestern.edu + +############################################################################### + + In this figure, we want to show how different cell types get distorted as + we change algorithm and algorithmic parameters. We also want to illustrate + actual underlying variation in basic properties of data in a DR embedding. + + Specifically, in this Figure, I want to make a 4x4 panel figure, showing + t-SNE and UMAP embeddings with select clusters annotated. These panels + will also contain insets showing the number of effective nearest neighbors + (for t-SNE) or the distance to the kth neighbor (for UMAP). + + In this edit I want to formulate this more as a set of functions that can + be run to quickly generate this figure for any data set. + +############################################################################### +""" +from EMBEDR.embedr import EMBEDR, EMBEDR_sweep +import EMBEDR.plotting_utility as putl + +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import os +import scanpy as sc +import seaborn as sns +import warnings + +warnings.filterwarnings("ignore", message="This figure includes Axes that") +warnings.filterwarnings("ignore", message="tight_layout not applied: ") + + +def make_figure(X, cluster_labels, clusters_2_label=None, label_colors=None, + label_sizes=None, DRAs=None, grid_params={}, project_name=None, + project_dir=None, EMBEDR_params={}, n_rows=None, n_cols=2): + + if clusters_2_label is None: + clusters_2_label = [1, 10, 0, 8, 6, 4, 9, 7, 2, 3] ## By size cOnt + clusters_2_label = sorted(clusters_2_label) + + if label_colors is None: + cblind_cmap = sns.color_palette('colorblind') + l2cl = {cl: (ii + 3) % 10 for ii, cl in enumerate(clust_2_label)} + label_colors = [cblind_cmap[l2cl[ll]] if (ll in clust_2_label) + else 'lightgrey' for ll in labels] + + if label_sizes is None: + label_sizes = [3 if (ll in clust_2_label) else 1 for ll in labels] + + if DRAs is None: + ## Set parameters at which to plot data + DRAs = [('tSNE', 7), + ('UMAP', 15), + ('tSNE', 250), + ('UMAP', 400)] + + if project_name is None: + project_name = "EMBEDR_Figure_01v1_DimRedZoology" + if project_dir is None: + project_dir = "./" + + if n_rows is None: + n_rows = int(np.ceil(len(DRAs) / n_cols)) + + fig, back_axis, main_gs, main_axes = set_main_grid(n_rows=n_rows, + n_cols=n_cols, + **grid_params) + + for algNo, (alg, param) in enumerate(DRAs): + print(f"\nPlotting {alg} embedding (param = {param})") + + if alg.lower() in ['tsne', 't-sne']: + embObj = EMBEDR(X=X, + perplexity=param, + DRA='tsne', + n_data_embed=1, + n_jobs=-1, + project_name=project_name, + project_dir=project_dir, + **EMBEDR_params) + Y, _ = embObj.get_tSNE_embedding(X) + + rowNo = int(algNo / n_cols) + colNo = int(algNo % n_cols) + ax = main_axes[rowNo][colNo] + + add_plot_color_by_cluster(Y, cluster_labels) + return + + +def set_main_grid(fig_wid=7.2, fig_hgt=5.76, n_rows=2, n_cols=2, + spine_alpha=0, main_wspace=0.005, main_hspace=0.01, + main_spns_2_show='all', main_spn_alpha=0.5, + main_spn_width=1.0): + + fig = plt.figure(figsize=(fig_wid, fig_hgt)) + + back_axis = putl.make_border_axes(fig.add_subplot(111), + spine_alpha=spine_alpha) + + main_gs = fig.add_gridspec(nrows=n_rows, + ncols=n_cols, + wspace=main_wspace, + hspace=main_hspace) + + main_axes = [] + for rowNo in range(n_rows): + axes_row = [] + for colNo in range(n_cols): + ax = fig.add_subplot(main_gs[rowNo, colNo]) + ax = putl.make_border_axes(ax, + spines_2_show=main_spns_2_show, + spine_alpha=main_spn_alpha, + spine_width=main_spn_width) + axes_row.append(ax) + main_axes.append(axes_row) + + return fig, back_axis, main_gs, main_axes + + +def add_plot_color_by_cluster(Y, cluster_labels, ax, label_colors, label_sizes, + clusters_2_label): + + ax.scatter(*Y.T, + c=label_colors, + s=label_sizes, + alpha=scatter_alpha) + + for cNo, cluster in enumerate(clusters_2_label): + good_idx = (cluster_labels == clusters) + + cluster_median = np.median(Y[good_idx], axis=0) + + cluster_number = cNo + 1 + ax.text(*cluster_median, + f"{cluster_number}", + fontsize=12, + fontweight='bold', + va='center', ha='center') + + # text_off = 0 + # text_h = 0 + # for cNo, lab in enumerate(clust_2_label): + # good_idx = (labels == lab) + + # med_Y = np.median(Y[good_idx], axis=0) + + # ax.text(*med_Y, f"{cNo}", + # fontsize=12, + # fontweight='bold', + # va='center', ha='center') + + # ax_width = back_axis.get_window_extent().width + # ax_height = back_axis.get_window_extent().height / fig.dpi + + # pad = 3 + + # rect_width = ax.get_window_extent().width / ax_width + + # if cNo < 5: + # x_loc = rect_width / 2 + # rect_x = 0 + # else: + # x_loc = 1 - (rect_width / 2) + # rect_x = 1 - rect_width + + # cLab = cOnt_labels[lab].title() + # if "Slamf1-Negative" in cLab: + # cLab = " ".join(cLab.split(" Multipotent ")) + # cLab = f"{cNo}: " + cLab + + # bb = ax.text(x_loc, -0.013 - (cNo % 5) * text_h, + # cLab, + # # r"$N=$" + f"{sum(good_idx)}", + # ha='center', va='top', + # fontsize=10, + # transform=back_axis.transAxes) + + # text_h = (bb.get_size() + 2 * pad) / 72. / ax_height + + # # if (cNo % 2) == 1: + # # text_off -= text_h + + # # rect_y = (int(cNo / 2) + 1) * text_h + # rect_y = ((cNo % 5) + 1) * text_h + + # rect = plt.Rectangle((rect_x, -rect_y), + # width=rect_width, + # height=text_h, + # transform=back_axis.transAxes, + # zorder=3, + # fill=True, + # facecolor=cblind_cmap[l2cl[lab]], + # clip_on=False, + # alpha=0.8, + # edgecolor='0.8') + + # back_axis.add_patch(rect) + + +############################################################################### +## RUN THE FILE +############################################################################### +if __name__ == "__main__": + + print("\n\n" + 66 * "=") + print(f"\n\tGenerating EMBEDR Figure 01v1 (Dim. Red. Zoology)\n") + print(66 * "=" + "\n\n") + + ########################################################################### + ## Set Runtime Parameters + ########################################################################### + if True: + ## Set the figure base name + name_base = "EMBEDR_Figure_01v1_DimRedZoology" + + ## Select which data to use + seq_type = "FACS" + dataset = "Marrow" + + ## Set parameters at which to plot data + DR_params = [('tSNE', 7), + ('UMAP', 15), + ('tSNE', 250), + ('UMAP', 400)] + + ## Set other parameters + n_components = 2 + + tSNE_exag_iter = 250 + tSNE_n_iter = 1000 - tSNE_exag_iter + + random_seed = 1 + initialization = 'random' + n_jobs = -1 + + ## Data directory + # data_dir = f"../../data/tabula-muris/04_facs_processed_data/" + data_dir = f"../../data/TabulaMuris/" + + ## Figure directory + fig_dir = f"./" + + ## Runtime flags + show_all_axes = False ## Show ALL axes borders + color_by_cluster = True ## Color plot by cell type annotations + color_by_variability = False ## Color plot by kEff / dist to NN + + ########################################################################### + ## Set Figure Parameters + ########################################################################### + if True: + ## Select clusters to label + clust_2_label = [1, 10, 0, 8, 6, 4, 9, 7, 2, 3] ## By size cOnt + clust_2_label = sorted(clust_2_label) + + ## Environment-wide parameters. + plt.close('all') + plt.rcParams['svg.fonttype'] = 'none' + sns.set(color_codes=True) + sns.set_style('whitegrid') + matplotlib.rc("font", size=10) + matplotlib.rc("xtick", labelsize=10) + matplotlib.rc("ytick", labelsize=10) + matplotlib.rc("axes", labelsize=12) + matplotlib.rc("axes", titlesize=16) + matplotlib.rc("legend", fontsize=10) + matplotlib.rc("figure", titlesize=12) + + ## Figure size and gridspec size + my_dpi = 400 + fig_wid = 7.2 ## inches (8 inch-wide paper minus margins) + fig_hgt = 0.8 * fig_wid + + ## Automatically set n_rows based on conditions and n_cols. + n_cols = 2 + n_rows = int(np.ceil(len(DR_params) / n_cols)) + + ## Main gridspec parameters. + if color_by_cluster: ## We need less space if coloring by cluster. + main_wspace = 0.005 + fig_pad = 0.5 + else: + main_wspace = 0.15 + fig_pad = 3 + main_hspace = 0.01 + main_spns_2_show = 'all' + main_spn_alpha = 0.5 + main_spn_width = 1.0 + main_height_ratios = [1, 1] + + ## Toggle for axes borders. + if show_all_axes: + spine_alpha = 1 + else: + spine_alpha = 0 + + ########################################################################### + ## Set Coloration Parameters + ########################################################################### + if True: + + if dataset.lower() == "mnist": + label_colors = [cblind_cmap[ll] if ll in clust_2_label + else 'lightgrey'for ll in metadata] + labels = metadata + + elif dataset.lower() in ['marrow']: + + data_path = f"{seq_type}/Processed_{dataset.title()}.h5ad" + data = sc.read_h5ad(os.path.join(data_dir, data_path)) + + ## CELL ONTOLOGY ANNOTATIONS + cell_ont_meta = data.obs['cell_ontology_class'].values + cell_ont_ids = np.sort(cell_ont_meta.unique()).squeeze() + + cell_ont_counts = data.obs.groupby('cell_ontology_class') + cell_ont_counts = cell_ont_counts['cell_ontology_class'].count() + + cell_ont_ids = sorted(cell_ont_ids, + key=lambda cO: -cell_ont_counts[cO]) + + cell_ont_labels = [f"{cO} (N = {cell_ont_counts[cO]})" + for cO in cell_ont_ids] + + cell_ont_cmap = sns.color_palette('husl', len(cell_ont_ids)) + + cell_ont_map = {cO: ii for ii, cO in enumerate(cell_ont_ids)} + + cell_ont_alpha_map = {cO: ii for ii, cO + in enumerate(np.sort(cell_ont_ids))} + + cell_ont_colors = [cell_ont_cmap[cell_ont_map[cO]] + for cO in cell_ont_meta] + + cell_ont_alpha_colors = [cell_ont_cmap[cell_ont_alpha_map[cO]] + for cO in cell_ont_meta] + + ########################################################################### + ## Do The Plotting! + ########################################################################### + if True: + + pass + + + + # parsed_meta = pUtl.parse_metadata(metadata) + + # cOnt_labels = parsed_meta['cell_ont_labels'] + # cOnt_map = parsed_meta['cell_ont_map'] + # cOnt_map_rev = {val: key for key, val in cOnt_map.items()} + + # # labels = metadata['cluster.ids'].values + # # label_colors = [cblind_cmap[cIdMap[ll]] if (ll in clust_2_label) + # # else 'lightgrey' for ll in labels] + + # labels = [cOnt_map[ll] for ll in metadata['cell_ontology_class']] + # labels = np.asarray(labels).squeeze() + + # l2cl = {cl: (ii + 3) % 10 for ii, cl in enumerate(clust_2_label)} + # label_colors = [cblind_cmap[l2cl[ll]] if (ll in clust_2_label) + # else 'lightgrey' for ll in labels] + + # # with open("./Embeddings/temp_labels.pkl", 'rb') as f: + # # labels = pkl.load(f) + # # labels[labels == 21] = 11 + # # l2cl = {cl: ii for ii, cl in enumerate(clust_2_label)} + # # label_colors = [cblind_cmap[l2cl[ll]] if (ll in clust_2_label) + # # else 'lightgrey' for ll in labels] + + # label_sizes = [3 if (ll in clust_2_label) else 1 for ll in labels] \ No newline at end of file diff --git a/EMBEDR/plots/__init__.py b/EMBEDR/plots/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/projects/Figures/.ipynb_checkpoints/EMBEDR_Figure_01v1_DimRed_Zoology-checkpoint.ipynb b/projects/Figures/.ipynb_checkpoints/EMBEDR_Figure_01v1_DimRed_Zoology-checkpoint.ipynb new file mode 100644 index 0000000..8f2c023 --- /dev/null +++ b/projects/Figures/.ipynb_checkpoints/EMBEDR_Figure_01v1_DimRed_Zoology-checkpoint.ipynb @@ -0,0 +1,202 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ee89d7fc-3349-438e-a6b7-509014c08eac", + "metadata": {}, + "source": [ + "# Manuscript Figure 1: A Zoology of Dimensionality Reduction Outcomes\n", + "\n", + "In this notebook I want to re-do the code for the first figure in the paper. As a reminder, this was a Figure that showed how processing the Tabula Muris Marrow data using different algorithms and hyperparameters results in different qualitative features. Specifically, it was a 2x2 figure showing two t-SNE and two UMAP embeddings, each at $k=15$ and $k=400$." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f0301b81-cffd-4f86-ac9a-3ff43ff30e32", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "# import PaperV4_PlottingScripts.plotting_utility as pUtl\n", + "import pandas as pd\n", + "import scanpy as sc\n", + "import seaborn as sns\n", + "from sklearn.metrics import pairwise_distances as pwd\n", + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\", message=\"This figure includes Axes that\")\n", + "warnings.filterwarnings(\"ignore\", message=\"tight_layout not applied: \")" + ] + }, + { + "cell_type": "markdown", + "id": "9f5e4ebb-1778-46e7-a3b3-3b6c698501b9", + "metadata": {}, + "source": [ + "### Set runtime parameters here..." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c44db56c-cf98-475d-8dcc-cb71e217f9df", + "metadata": {}, + "outputs": [], + "source": [ + "## Set the figure base name\n", + "name_base = \"EMBEDRFigure_01v1_DimRedZoology\"\n", + "\n", + "## Select which data to use\n", + "seq_type = \"FACS\"\n", + "tissue = \"Marrow\"\n", + "\n", + "## Set parameters at which to plot data\n", + "DR_params = [('tSNE', 7),\n", + " ('UMAP', 15),\n", + " ('tSNE', 250),\n", + " ('UMAP', 400)]\n", + "\n", + "## Set other parameters\n", + "n_components = 2\n", + "\n", + "tSNE_exag_iter = 250\n", + "tSNE_n_iter = 1000 - tSNE_exag_iter\n", + "\n", + "random_seed = 1\n", + "initialization = 'random'\n", + "n_jobs = -1\n", + "\n", + "## Data directory\n", + "data_dir = f\"../../data/tabula-muris/\"\n", + "\n", + "## Figure directory\n", + "fig_dir = f\"./\"\n", + "\n", + "## Runtime flags\n", + "show_all_axes = False ## Show ALL axes borders (useful for gridspec setup)\n", + "color_by_cluster = True ## Color plot by cell type cluster annotations.\n", + "color_by_variability = False ## Color plot by kEff / dist to NN" + ] + }, + { + "cell_type": "markdown", + "id": "0ab48d1a-cec0-40d1-bf96-99f6b43a74c1", + "metadata": {}, + "source": [ + "### Load the data and metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "120c3c28-8cc6-45e0-bfe1-f37b6403ba22", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input data `X` is 4771 x 50!\n" + ] + } + ], + "source": [ + "data = sc.read_h5ad(os.path.join(data_dir, f\"04_facs_processed_data/{seq_type}/Processed_{tissue.title()}.h5ad\"))\n", + "data.obs.head()\n", + "\n", + "X = data.obsm['X_pca']\n", + "n_samples, n_features = X.shape\n", + "print(f\"Input data `X` is {n_samples} x {n_features}!\")" + ] + }, + { + "cell_type": "markdown", + "id": "87a4bb6a-a010-4762-9138-a51d167699f9", + "metadata": {}, + "source": [ + "### Set Figure Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92e8a6da-4e96-472e-ab41-886193161b77", + "metadata": {}, + "outputs": [], + "source": [ + "## Environment-wide parameters.\n", + "plt.rcParams['svg.fonttype'] = 'none'\n", + "sns.set(color_codes=True)\n", + "sns.set_style('whitegrid')\n", + "matplotlib.rc(\"font\", size=10)\n", + "matplotlib.rc(\"xtick\", labelsize=10)\n", + "matplotlib.rc(\"ytick\", labelsize=10)\n", + "matplotlib.rc(\"axes\", labelsize=12)\n", + "matplotlib.rc(\"axes\", titlesize=16)\n", + "matplotlib.rc(\"legend\", fontsize=10)\n", + "matplotlib.rc(\"figure\", titlesize=12)\n", + "\n", + "## Figure size and gridspec size\n", + "my_dpi = 400\n", + "fig_wid = 7.2 ## inches (8 inch-wide paper minus margins)\n", + "fig_hgt = 0.8 * fig_wid\n", + "\n", + "## Automatically set n_rows based on conditions and n_cols.\n", + "n_cols = 2\n", + "n_rows = int(np.ceil(len(DR_params) / n_cols))\n", + "\n", + "## Main gridspec parameters.\n", + "if color_by_cluster: ## We need less space if coloring by cluster.\n", + " main_wspace = 0.005\n", + " fig_pad = 0.5\n", + "else:\n", + " main_wspace = 0.15\n", + " fig_pad = 3\n", + "main_hspace = 0.01\n", + "main_spns_2_show = 'all'\n", + "main_spn_alpha = 0.5\n", + "main_spn_width = 1.0\n", + "main_height_ratios = [1, 1]\n", + "\n", + "## Toggle for axes borders.\n", + "if show_all_axes:\n", + " spine_alpha = 1\n", + "else:\n", + " spine_alpha = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b60463f-c312-4e37-bed7-1604d20a41e7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/projects/Figures/EMBEDR_Figure_01v1_DimRed_Zoology.ipynb b/projects/Figures/EMBEDR_Figure_01v1_DimRed_Zoology.ipynb new file mode 100644 index 0000000..1fd2366 --- /dev/null +++ b/projects/Figures/EMBEDR_Figure_01v1_DimRed_Zoology.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ee89d7fc-3349-438e-a6b7-509014c08eac", + "metadata": {}, + "source": [ + "# Manuscript Figure 1: A Zoology of Dimensionality Reduction Outcomes\n", + "\n", + "In this notebook I want to re-do the code for the first figure in the paper. As a reminder, this was a Figure that showed how processing the Tabula Muris Marrow data using different algorithms and hyperparameters results in different qualitative features. Specifically, it was a 2x2 figure showing two t-SNE and two UMAP embeddings, each at $k=15$ and $k=400$." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "f0301b81-cffd-4f86-ac9a-3ff43ff30e32", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "# import PaperV4_PlottingScripts.plotting_utility as pUtl\n", + "import pandas as pd\n", + "import scanpy as sc\n", + "import seaborn as sns\n", + "from sklearn.metrics import pairwise_distances as pwd\n", + "import warnings\n", + "\n", + "%matplotlib inline\n", + "\n", + "warnings.filterwarnings(\"ignore\", message=\"This figure includes Axes that\")\n", + "warnings.filterwarnings(\"ignore\", message=\"tight_layout not applied: \")" + ] + }, + { + "cell_type": "markdown", + "id": "9f5e4ebb-1778-46e7-a3b3-3b6c698501b9", + "metadata": {}, + "source": [ + "### Set runtime parameters here..." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c44db56c-cf98-475d-8dcc-cb71e217f9df", + "metadata": {}, + "outputs": [], + "source": [ + "## Set the figure base name\n", + "name_base = \"EMBEDRFigure_01v1_DimRedZoology\"\n", + "\n", + "## Select which data to use\n", + "seq_type = \"FACS\"\n", + "tissue = \"Marrow\"\n", + "\n", + "## Set parameters at which to plot data\n", + "DR_params = [('tSNE', 7),\n", + " ('UMAP', 15),\n", + " ('tSNE', 250),\n", + " ('UMAP', 400)]\n", + "\n", + "## Set other parameters\n", + "n_components = 2\n", + "\n", + "tSNE_exag_iter = 250\n", + "tSNE_n_iter = 1000 - tSNE_exag_iter\n", + "\n", + "random_seed = 1\n", + "initialization = 'random'\n", + "n_jobs = -1\n", + "\n", + "## Data directory\n", + "data_dir = f\"../../data/tabula-muris/\"\n", + "\n", + "## Figure directory\n", + "fig_dir = f\"./\"\n", + "\n", + "## Runtime flags\n", + "show_all_axes = False ## Show ALL axes borders (useful for gridspec setup)\n", + "color_by_cluster = True ## Color plot by cell type cluster annotations.\n", + "color_by_variability = False ## Color plot by kEff / dist to NN" + ] + }, + { + "cell_type": "markdown", + "id": "0ab48d1a-cec0-40d1-bf96-99f6b43a74c1", + "metadata": {}, + "source": [ + "### Load the data and metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "120c3c28-8cc6-45e0-bfe1-f37b6403ba22", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input data `X` is 4771 x 50!\n" + ] + } + ], + "source": [ + "data = sc.read_h5ad(os.path.join(data_dir, f\"04_facs_processed_data/{seq_type}/Processed_{tissue.title()}.h5ad\"))\n", + "data.obs.head()\n", + "\n", + "X = data.obsm['X_pca']\n", + "n_samples, n_features = X.shape\n", + "print(f\"Input data `X` is {n_samples} x {n_features}!\")" + ] + }, + { + "cell_type": "markdown", + "id": "87a4bb6a-a010-4762-9138-a51d167699f9", + "metadata": {}, + "source": [ + "### Set Figure Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92e8a6da-4e96-472e-ab41-886193161b77", + "metadata": {}, + "outputs": [], + "source": [ + "## Environment-wide parameters.\n", + "plt.rcParams['svg.fonttype'] = 'none'\n", + "sns.set(color_codes=True)\n", + "sns.set_style('whitegrid')\n", + "matplotlib.rc(\"font\", size=10)\n", + "matplotlib.rc(\"xtick\", labelsize=10)\n", + "matplotlib.rc(\"ytick\", labelsize=10)\n", + "matplotlib.rc(\"axes\", labelsize=12)\n", + "matplotlib.rc(\"axes\", titlesize=16)\n", + "matplotlib.rc(\"legend\", fontsize=10)\n", + "matplotlib.rc(\"figure\", titlesize=12)\n", + "\n", + "## Figure size and gridspec size\n", + "my_dpi = 400\n", + "fig_wid = 7.2 ## inches (8 inch-wide paper minus margins)\n", + "fig_hgt = 0.8 * fig_wid\n", + "\n", + "## Automatically set n_rows based on conditions and n_cols.\n", + "n_cols = 2\n", + "n_rows = int(np.ceil(len(DR_params) / n_cols))\n", + "\n", + "## Main gridspec parameters.\n", + "if color_by_cluster: ## We need less space if coloring by cluster.\n", + " main_wspace = 0.005\n", + " fig_pad = 0.5\n", + "else:\n", + " main_wspace = 0.15\n", + " fig_pad = 3\n", + "main_hspace = 0.01\n", + "main_spns_2_show = 'all'\n", + "main_spn_alpha = 0.5\n", + "main_spn_width = 1.0\n", + "main_height_ratios = [1, 1]\n", + "\n", + "## Toggle for axes borders.\n", + "if show_all_axes:\n", + " spine_alpha = 1\n", + "else:\n", + " spine_alpha = 0" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "7b60463f-c312-4e37-bed7-1604d20a41e7", + "metadata": {}, + "outputs": [], + "source": [ + "import EMBEDR" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "b7b7ba9a", + "metadata": {}, + "outputs": [], + "source": [ + "import EMBEDR.plots.EMBEDR_Figure_01v1_DimRed_Zoology as F01" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "1cc90167", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "==================================================================\n", + "\n", + "\tGenerating EMBEDR Figure 01v1 (Dim. Red. Zoology)\n", + "\n", + "==================================================================\n", + "\n", + "\n" + ] + } + ], + "source": [ + "%run ../../EMBEDR/plots/EMBEDR_Figure_01v1_DimRed_Zoology.py" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "408982eb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cell_ontology_class\n", + "B cell 44\n", + "Slamf1-negative multipotent progenitor cell 713\n", + "Slamf1-positive multipotent progenitor cell 134\n", + "basophil 25\n", + "common lymphoid progenitor 156\n", + "granulocyte 761\n", + "granulocyte monocyte progenitor cell 134\n", + "granulocytopoietic cell 221\n", + "hematopoietic precursor cell 265\n", + "immature B cell 344\n", + "immature NK T cell 37\n", + "immature T cell 60\n", + "immature natural killer cell 36\n", + "late pro-B cell 306\n", + "macrophage 173\n", + "mature natural killer cell 49\n", + "megakaryocyte-erythroid progenitor cell 55\n", + "monocyte 266\n", + "naive B cell 692\n", + "pre-natural killer cell 22\n", + "precursor B cell 517\n", + "regulatory T cell 27\n", + "Name: cell_ontology_class, dtype: int64" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# cell_ont_meta\n", + "# cell_ont_ids\n", + "cell_ont_counts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dbf01928", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "e8c10905", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "EMBEDR_Figure_01v1_DimRed_Zoology.ipynb\r\n", + "\u001b[34mEMBEDR_project\u001b[m\u001b[m/\r\n" + ] + } + ], + "source": [ + "\"../../data/TabulaMuris/FACS/Processed_Marrow.h5ad\"" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "2156ba1c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This works?\n", + "\n", + "This works!\n" + ] + } + ], + "source": [ + "F01.test_function()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "fe7522d7", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAFHCAYAAADnd5hjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAFWklEQVR4nO3asXHjMBBAUejGHaEBlqbS2ABq4iVyRvMSW1/yvZcg2GQTzB9ieDuOYwDAs/2pFwDg/yRAACQECICEAAGQECAAEgIEQOLjarjW2scY+1M2gfe0Pc493AFe2Tbn3M4GlwEaY+xzzvu3rwO/xFrrPsYY7gmc+7wjZzzBAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkLgdx/HlcK21jzH2Zy0Db2h7nHu4A7yybc65nQ0uAwQAP+XjaugLCP5pe5x7uAO8si+/gC4DNMbY55z3b18Hfom11n2MMdwTOPd5R874CQGAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQECAAEgIEQEKAAEgIEAAJAQIgIUAAJAQIgIQAAZAQIAASAgRAQoAASAgQAAkBAiAhQAAkBAiAhAABkBAgABICBEBCgABICBAACQECICFAACQECICEAAGQuB3H8eVwrbWPMfZnLQNvaHuce7gDvLJtzrmdDS4DBAA/xRMcAAkBAiAhQAAkBAiAhAABkPgLzsg7A6LSV7oAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, back_axis, main_gs, main_axes = F01.set_main_grid()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "c9d38ed5", + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "make_figure() missing 2 required positional arguments: 'X' and 'cluster_labels'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/_v/n82vwlfd3wg7_t3361n20xhr0000gn/T/ipykernel_81858/1555668879.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mF01\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmake_figure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: make_figure() missing 2 required positional arguments: 'X' and 'cluster_labels'" + ] + } + ], + "source": [ + "\n", + "F01.make_figure(X, data.obs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f2cc201", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}