From e300f1e5cfba906ef9d7cba9077d676a2f72c57f Mon Sep 17 00:00:00 2001 From: Zeba-Sultana Date: Fri, 10 Jan 2025 18:33:46 +0100 Subject: [PATCH] Added notbooks defining periglomerular regions --- ...lomerularRegions_Functions_allSlides.ipynb | 536 ++++++++++++++++++ ...rularRegions_all8Slides_Concatenated.ipynb | 169 ++++++ 2 files changed, 705 insertions(+) create mode 100644 notebooks/07_preglom_annotation/00_DefiningPeriglomerularRegions_Functions_allSlides.ipynb create mode 100644 notebooks/07_preglom_annotation/01_DefinitionsPeriglomerularRegions_all8Slides_Concatenated.ipynb diff --git a/notebooks/07_preglom_annotation/00_DefiningPeriglomerularRegions_Functions_allSlides.ipynb b/notebooks/07_preglom_annotation/00_DefiningPeriglomerularRegions_Functions_allSlides.ipynb new file mode 100644 index 0000000..fc84df5 --- /dev/null +++ b/notebooks/07_preglom_annotation/00_DefiningPeriglomerularRegions_Functions_allSlides.ipynb @@ -0,0 +1,536 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "54f5eac4-7c10-48f4-8717-b388d15e0208", + "metadata": {}, + "outputs": [], + "source": [ + "#pip install shapely" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dfbe989a-ab3c-455d-92e7-e7e4c80ac36c", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from tqdm import tqdm\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "import scanpy as sc\n", + "\n", + "import anndata as ad\n", + "\n", + "from shapely.geometry import MultiPoint, Point\n", + "from shapely.ops import unary_union\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "54139d08-e771-42e9-a074-2fbe0695a03b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/data/projects/zeba/MY_PROJECTS/240130_Xenium_ANCA_SLE_GBM/xenium-cgn/notebooks_zs/02_PeriglomDefinition\n" + ] + } + ], + "source": [ + "!pwd\n", + "\n", + "path1 = './output_dataobjects'\n", + "path2 = './figures'\n", + "\n", + "os.makedirs(path1, exist_ok=True)\n", + "os.makedirs(path2, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cef10439-6e2f-44f8-8f11-e0e78364bc40", + "metadata": {}, + "outputs": [], + "source": [ + "def define_glom_ID(adata, col_glom_num):\n", + " #using the column with glom numbers \"col_glom_num\", creating 'glom_ID' which will give a unique ID to each glomerulus\n", + "\n", + " adata.obs['glom_ID'] = adata.obs[col_glom_num].astype(str)+ \"_\" + adata.obs['Slide_ID'].astype(str)+ \"_\" + adata.obs['Disease'].astype(str) + \"_\" + adata.obs['sample'].astype(str)\n", + "\n", + " adata.obs.glom_ID = adata.obs.glom_ID.astype('category')\n", + " \n", + " return adata" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a0ff47b9-f21c-4e8b-9163-7047dd321202", + "metadata": {}, + "outputs": [], + "source": [ + "def define_expanded_polygons(Slide,NonGlomTag):\n", + " #the first argument should be adata subsetted for each slide separately.\n", + " #second argument is the string that labels cells which fall outside of glom annotation\n", + " \n", + " # dictionary to store the expanded polygons for each 'glom_ID'\n", + " expanded_polygons = {}\n", + " \n", + " # To only take real gloms, filter out rows where 'glom_ID' does not start with the second argument\n", + " Slide_filtered = Slide.obs[~Slide.obs['glom_ID'].str.startswith(NonGlomTag)]\n", + "\n", + " for glom_ID, group in Slide_filtered.groupby('glom_ID'):\n", + " # Create a MultiPoint object from the x and y coordinates of the cells\n", + " points = MultiPoint(list(zip(group['x'], group['y'])))\n", + " \n", + " # Create the convex polygon to get the simplest polygon that contains all the points\n", + " polygon = points.convex_hull\n", + " \n", + " # Buffer the polygon to create the expanded region\n", + " expanded_polygon = polygon.buffer(100) # 100 µm expansion\n", + " \n", + " # Store the expanded polygon for this group\n", + " expanded_polygons[glom_ID] = expanded_polygon\n", + " \n", + " return expanded_polygons" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "62051f81-3a84-4e2e-b502-e5c4aba3c9f2", + "metadata": {}, + "outputs": [], + "source": [ + "def define_polygon_flags(Slide,expanded_polygons): \n", + " # empty column for storing the polygon keys\n", + " Slide.obs['polygon_flags'] = ''\n", + " \n", + " for index, row in tqdm(Slide.obs.iterrows()):\n", + " point = Point(row['x'], row['y']) # Create a Point object for the current cell\n", + " \n", + " keys_found = [] # List to store keys of polygons in which the current point lies\n", + " \n", + " # Check this point against each polygon\n", + " for key, polygon in expanded_polygons.items():\n", + " if polygon.contains(point):\n", + " keys_found.append(key)\n", + " \n", + " # Join all found keys with commas and assign to the DataFrame\n", + " Slide.obs.at[index, 'polygon_flags'] = ','.join(keys_found)\n", + " \n", + " return Slide" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9cc500dd-efa5-47f6-8959-1b9e81494727", + "metadata": {}, + "outputs": [], + "source": [ + "def create_glom_periglom_cols(Slide,col_glom_num, NonGlomTag):\n", + " # Create column 'is_in_polygon' , 'is_in_glom' and \"is_in_periglom\"\n", + " Slide.obs['is_in_polygon'] = Slide.obs['polygon_flags'].astype(bool) & Slide.obs['polygon_flags'].notna()\n", + " Slide.obs['is_in_polygon'] = Slide.obs['is_in_polygon'].fillna(False) # Replace NaNs in 'is_in_polygon' with False explicitly\n", + " \n", + " Slide.obs['is_in_glom'] = ~Slide.obs[col_glom_num].str.contains(NonGlomTag)\n", + " \n", + " Slide.obs['is_in_periglom'] = (Slide.obs['is_in_polygon'] == True) & (Slide.obs['is_in_glom'] == False)\n", + " \n", + " return Slide" + ] + }, + { + "cell_type": "markdown", + "id": "f42f4828-1173-4375-91b1-38440bb3e7f0", + "metadata": {}, + "source": [ + "### Dataobject \n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "efb5f5ac-e71d-4dd8-8ebe-64921a9f5f67", + "metadata": {}, + "outputs": [], + "source": [ + "#This is the dataobject with nichePCA based Glom annotation Shared by Behnam On 02 Sep 24 : \n", + "# https://github.com/imsb-uke/xenium-cgn/blob/main/notebooks/06_domain_identification/README.md\n", + "#which points to : epyc/Behnam/xenium-cgn/notebooks/06_domain_identification/adata/adata_nichepca_with_domain_tuned_per_sample_v1.h5ad\n", + "\n", + "adata = sc.read_h5ad(\"/data/projects/Behnam/xenium-cgn/notebooks/06_domain_identification/adata/adata_nichepca_with_domain_tuned_per_sample_v1.h5ad\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "5a81d72b-1151-405b-8600-cd9c38ed02da", + "metadata": {}, + "outputs": [], + "source": [ + "adata=define_glom_ID(adata,\"nichepca_glom_no\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e5cb5464-918e-46dc-be5a-435c9673dff9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Other_0011695_SLE_X40', '0_0011695_SLE_X40', '1_0011695_SLE_X40', '2_0011695_SLE_X40', 'Other_0011695_SLE_X39', ..., '9_0011216_ANCA_X17', '10_0011216_ANCA_X17', '11_0011216_ANCA_X17', '12_0011216_ANCA_X17', '13_0011216_ANCA_X17']\n", + "Length: 833\n", + "Categories (833, object): ['0_0011216_ANCA_X17', '0_0011216_ANCA_X18', '0_0011216_ANCA_X19', '0_0011216_ANCA_X20', ..., 'Other_0018775_ANCA_X63', 'Other_0018775_Cntrl_X57', 'Other_0018775_SLE_X61', 'Other_0018775_SLE_X62']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.obs.glom_ID.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f33f8f84-be93-4781-972d-8150279c4ba9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "770" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(adata.obs.glom_ID.unique())-63" + ] + }, + { + "cell_type": "raw", + "id": "86b96738-446c-4641-9f45-78fe92e50dc2", + "metadata": {}, + "source": [ + "In Manual annotation there were : 791 gloms\n", + "In niche PCA based annotation there are 821 gloms (Need to subtract 63 becuse there are 63 samples each of which will have \"Other\" as the first item for glom annotation number.for all regions outside the gloms)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "fe8a56ec-d388-4966-9fd6-65cbbf4a72ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['0011695', '0018775', '0011707', '0011287', '0011762', '0011284', '0011546', '0011216']\n", + "Categories (8, object): ['0011216', '0011284', '0011287', '0011546', '0011695', '0011707', '0011762', '0018775']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata.obs.Slide_ID.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "e28031ce-2c23-48d4-91fe-3f13561ae789", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_966/4150475473.py:11: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " for glom_ID, group in Slide_filtered.groupby('glom_ID'):\n", + "/tmp/ipykernel_966/1889237325.py:3: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n", + " Slide.obs['polygon_flags'] = ''\n", + "303811it [04:11, 1210.37it/s]\n" + ] + } + ], + "source": [ + "Slide1_0011762=adata[adata.obs['Slide_ID']=='0011762',:]\n", + "\n", + "Slide1_0011762_expanded_polygons = define_expanded_polygons(Slide1_0011762, \"Other\")\n", + "#Slide1_0011762_expanded_polygons\n", + "\n", + "Slide1_0011762 = define_polygon_flags(Slide1_0011762,Slide1_0011762_expanded_polygons)\n", + "\n", + "Slide1_0011762 = create_glom_periglom_cols(Slide1_0011762,col_glom_num=\"nichepca_glom_no\", NonGlomTag=\"Other\")\n", + "\n", + "Slide1_0011762.write('./output_dataobjects/Slide1_0011762_PeriglomDefined.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "9c5d0d99-03a0-45f8-8031-3c93558f387b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_966/4150475473.py:11: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " for glom_ID, group in Slide_filtered.groupby('glom_ID'):\n", + "/tmp/ipykernel_966/1889237325.py:3: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n", + " Slide.obs['polygon_flags'] = ''\n", + "407812it [08:22, 811.71it/s]\n" + ] + } + ], + "source": [ + "Slide3_0011546=adata[adata.obs['Slide_ID']=='0011546',:]\n", + "\n", + "Slide3_0011546_expanded_polygons = define_expanded_polygons(Slide3_0011546, \"Other\")\n", + "#Slide3_0011546_expanded_polygons\n", + "\n", + "Slide3_0011546 = define_polygon_flags(Slide3_0011546,Slide3_0011546_expanded_polygons)\n", + "\n", + "Slide3_0011546 = create_glom_periglom_cols(Slide3_0011546,col_glom_num=\"nichepca_glom_no\", NonGlomTag=\"Other\")\n", + "\n", + "Slide3_0011546.write('./output_dataobjects/Slide3_0011546_PeriglomDefined.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "9a819e05-3e62-4303-8fb4-7d27e6e8c1d4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_966/4150475473.py:11: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " for glom_ID, group in Slide_filtered.groupby('glom_ID'):\n", + "/tmp/ipykernel_966/1889237325.py:3: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n", + " Slide.obs['polygon_flags'] = ''\n", + "502183it [08:41, 963.20it/s] \n" + ] + } + ], + "source": [ + "Slide4_0011216=adata[adata.obs['Slide_ID']=='0011216',:]\n", + "\n", + "Slide4_0011216_expanded_polygons = define_expanded_polygons(Slide4_0011216, \"Other\")\n", + "#Slide4_0011216_expanded_polygons\n", + "\n", + "Slide4_0011216 = define_polygon_flags(Slide4_0011216,Slide4_0011216_expanded_polygons)\n", + "\n", + "Slide4_0011216 = create_glom_periglom_cols(Slide4_0011216,col_glom_num=\"nichepca_glom_no\", NonGlomTag=\"Other\")\n", + "\n", + "Slide4_0011216.write('./output_dataobjects/Slide4_0011216_PeriglomDefined.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "6f56af30-eb74-457d-8d9d-9266b5e320ab", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_966/4150475473.py:11: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " for glom_ID, group in Slide_filtered.groupby('glom_ID'):\n", + "/tmp/ipykernel_966/1889237325.py:3: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n", + " Slide.obs['polygon_flags'] = ''\n", + "396654it [06:39, 993.97it/s] \n" + ] + } + ], + "source": [ + "Slide5_0011707=adata[adata.obs['Slide_ID']=='0011707',:]\n", + "\n", + "Slide5_0011707_expanded_polygons = define_expanded_polygons(Slide5_0011707, \"Other\")\n", + "#Slide5_0011707_expanded_polygons\n", + "\n", + "Slide5_0011707 = define_polygon_flags(Slide5_0011707,Slide5_0011707_expanded_polygons)\n", + "\n", + "Slide5_0011707 = create_glom_periglom_cols(Slide5_0011707,col_glom_num=\"nichepca_glom_no\", NonGlomTag=\"Other\")\n", + "\n", + "Slide5_0011707.write('./output_dataobjects/Slide5_0011707_PeriglomDefined.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f58dfb99-cd6a-468b-a6a4-42b43a1401be", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_966/4150475473.py:11: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " for glom_ID, group in Slide_filtered.groupby('glom_ID'):\n", + "/tmp/ipykernel_966/1889237325.py:3: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n", + " Slide.obs['polygon_flags'] = ''\n", + "428899it [06:36, 1080.74it/s]\n" + ] + } + ], + "source": [ + "Slide6_0011695=adata[adata.obs['Slide_ID']=='0011695',:]\n", + "\n", + "Slide6_0011695_expanded_polygons = define_expanded_polygons(Slide6_0011695, \"Other\")\n", + "#Slide6_0011695_expanded_polygons\n", + "\n", + "Slide6_0011695 = define_polygon_flags(Slide6_0011695,Slide6_0011695_expanded_polygons)\n", + "\n", + "Slide6_0011695 = create_glom_periglom_cols(Slide6_0011695,col_glom_num=\"nichepca_glom_no\", NonGlomTag=\"Other\")\n", + "\n", + "Slide6_0011695.write('./output_dataobjects/Slide6_0011695_PeriglomDefined.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93a144f3-3841-49dd-be78-3ac5a401bf91", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1106/4150475473.py:11: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", + " for glom_ID, group in Slide_filtered.groupby('glom_ID'):\n", + "/tmp/ipykernel_1106/1889237325.py:3: ImplicitModificationWarning: Trying to modify attribute `.obs` of view, initializing view as actual.\n", + " Slide.obs['polygon_flags'] = ''\n", + "69183it [01:10, 960.80it/s] " + ] + } + ], + "source": [ + "Slide7_0011284=adata[adata.obs['Slide_ID']=='0011284',:]\n", + "\n", + "Slide7_0011284_expanded_polygons = define_expanded_polygons(Slide7_0011284, \"Other\")\n", + "#Slide7_0011284_expanded_polygons\n", + "\n", + "Slide7_0011284 = define_polygon_flags(Slide7_0011284,Slide7_0011284_expanded_polygons)\n", + "\n", + "Slide7_0011284 = create_glom_periglom_cols(Slide7_0011284,col_glom_num=\"nichepca_glom_no\", NonGlomTag=\"Other\")\n", + "\n", + "Slide7_0011284.write('./output_dataobjects/Slide7_0011284_PeriglomDefined.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa4d42a6-a83f-4f70-9385-0983337fe5be", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "203937it [03:12, 1043.89it/s]" + ] + } + ], + "source": [ + "Slide8_0011287=adata[adata.obs['Slide_ID']=='0011287',:]\n", + "\n", + "Slide8_0011287_expanded_polygons = define_expanded_polygons(Slide8_0011287, \"Other\")\n", + "#Slide8_0011287_expanded_polygons\n", + "\n", + "Slide8_0011287 = define_polygon_flags(Slide8_0011287,Slide8_0011287_expanded_polygons)\n", + "\n", + "Slide8_0011287 = create_glom_periglom_cols(Slide8_0011287,col_glom_num=\"nichepca_glom_no\", NonGlomTag=\"Other\")\n", + "\n", + "Slide8_0011287.write('./output_dataobjects/Slide8_0011287_PeriglomDefined.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a5d7f69-8007-4557-bbe2-620ea024e3ac", + "metadata": {}, + "outputs": [], + "source": [ + "Slide9_0018775=adata[adata.obs['Slide_ID']=='0018775',:]\n", + "\n", + "Slide9_0018775_expanded_polygons = define_expanded_polygons(Slide9_0018775, \"Other\")\n", + "#Slide9_0018775_expanded_polygons\n", + "\n", + "Slide9_0018775 = define_polygon_flags(Slide9_0018775,Slide9_0018775_expanded_polygons)\n", + "\n", + "Slide9_0018775 = create_glom_periglom_cols(Slide9_0018775,col_glom_num=\"nichepca_glom_no\", NonGlomTag=\"Other\")\n", + "\n", + "Slide9_0018775.write('./output_dataobjects/Slide9_0018775_PeriglomDefined.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c617d26d-4a0d-410f-96e5-89c6129ea399", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25371a15-4624-4abb-bf87-a54c018bd48c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/07_preglom_annotation/01_DefinitionsPeriglomerularRegions_all8Slides_Concatenated.ipynb b/notebooks/07_preglom_annotation/01_DefinitionsPeriglomerularRegions_all8Slides_Concatenated.ipynb new file mode 100644 index 0000000..b609883 --- /dev/null +++ b/notebooks/07_preglom_annotation/01_DefinitionsPeriglomerularRegions_all8Slides_Concatenated.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0c16a45b-46b9-495c-b5b1-d7b3a6cf73f6", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "import scanpy as sc\n", + "#import squidpy as sq\n", + "\n", + "import anndata as ad\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c02fd82d-c392-43c4-8253-2b628b2e0929", + "metadata": {}, + "outputs": [], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2f1056c-8ad0-49e5-a3be-44ccde5bebef", + "metadata": {}, + "outputs": [], + "source": [ + "PeriGlom_Slides = [\n", + " './output_dataobjects/Slide1_0011762_PeriglomDefined.h5ad',\n", + " './output_dataobjects/Slide3_0011546_PeriglomDefined.h5ad',\n", + " './output_dataobjects/Slide4_0011216_PeriglomDefined.h5ad',\n", + " './output_dataobjects/Slide5_0011707_PeriglomDefined.h5ad',\n", + " './output_dataobjects/Slide6_0011695_PeriglomDefined.h5ad',\n", + " './output_dataobjects/Slide7_0011284_PeriglomDefined.h5ad',\n", + " './output_dataobjects/Slide8_0011287_PeriglomDefined.h5ad',\n", + " './output_dataobjects/Slide9_0018775_PeriglomDefined.h5ad'\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e9be2df4-2dcc-4456-86e7-7f90d686faf1", + "metadata": {}, + "outputs": [], + "source": [ + "PeriGlom_Slides_list = []\n", + "\n", + "for data_file in PeriGlom_Slides:\n", + " slide = sc.read_h5ad(filename=data_file)\n", + " PeriGlom_Slides_list.append(slide)\n", + "\n", + "# Concatenate the list of AnnData objects into one\n", + "#adata = PeriGlom_Slides_list[0].concat(PeriGlom_Slides_list[1:],\n", + "# index_unique = None,\n", + "# merge='same',\n", + "# uns_merge='same' )\n", + "\n", + "adata_withuns = ad.concat(PeriGlom_Slides_list,index_unique = None,\n", + " merge='unique',\n", + " uns_merge='unique')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7362ef53-e90c-4608-b324-308159095159", + "metadata": {}, + "outputs": [], + "source": [ + "Slide_IDs = [\n", + " adata_withuns.obs['Slide_ID'] == \"0011762\", #Slide1\n", + " adata_withuns.obs['Slide_ID'] == \"0011546\", #Slide3\n", + " adata_withuns.obs['Slide_ID'] == \"0011216\", #Slide4\n", + " adata_withuns.obs['Slide_ID'] == \"0011707\", #Slide5\n", + " adata_withuns.obs['Slide_ID'] == \"0011695\", #Slide6\n", + " adata_withuns.obs['Slide_ID'] == \"0011284\", #Slide7\n", + " adata_withuns.obs['Slide_ID'] == \"0011287\", #Slide8\n", + " adata_withuns.obs['Slide_ID'] == \"0018775\" #Slide9\n", + "]\n", + "\n", + "# Corresponding slide numbers\n", + "Slide_nums = [\"Slide1\", \"Slide3\", \"Slide4\",\"Slide5\", \"Slide6\", \"Slide7\", \"Slide8\", \"Slide9\"]\n", + "\n", + "# Default value if none of the Slide_IDs match\n", + "default_value = \"Other\"\n", + "\n", + "# Assign Slide_num based on Slide_IDs. # this ias based on the order in which conditions are written\n", + "adata_withuns.obs['Slide_num'] = np.select(Slide_IDs, Slide_nums, default=default_value)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec3bf53f-8e35-4d8c-8a99-45126ee4b21a", + "metadata": {}, + "outputs": [], + "source": [ + "# Update the Disease labels as were used previously\n", + "conditions = [\n", + " adata_withuns.obs['Disease'] == 'SLE', \n", + " adata_withuns.obs['Disease'] == 'ANCA', \n", + " adata_withuns.obs['Disease'] == 'GBM' \n", + "]\n", + "\n", + "# Corresponding new values for each condition\n", + "new_values = [\n", + " 'Sle', # Update 'SLE' to 'Sle'\n", + " 'Anca', # Update 'ANCA' to 'Anca'\n", + " 'Gbm' # Update 'GBM' to 'Gbm'\n", + "]\n", + "\n", + "# Use numpy.select to apply conditions\n", + "adata_withuns.obs['Disease'] = np.select(conditions, new_values, default=adata_withuns.obs['Disease'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45d5afa6-b141-4d67-bcb3-45289cbb9111", + "metadata": {}, + "outputs": [], + "source": [ + "adata_withuns.write('./output_dataobjects/Slides_ALL_PeriglomDefined_withuns.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6750e17a-2902-4152-8b82-bd25149db1d1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}