Skip to content

Commit

Permalink
paths
Browse files Browse the repository at this point in the history
  • Loading branch information
robinredX committed Dec 5, 2024
1 parent 107790c commit d2f08d1
Show file tree
Hide file tree
Showing 12 changed files with 42 additions and 275 deletions.
4 changes: 2 additions & 2 deletions notebooks/01_cell_segmentation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ Includes scripts to refine cell segmentation with Baysor. Run following scripts
4. `prepare_anndata.ipynb`
5. `merge_objects.ipynb`


In the working directly, the .h5ad object will be saved at `xenium_outs/merged_raw.h5ad` and the cell boundaries will be saved at `merged_raw.h5ad`.
The input required is the raw Xenium out in folder `data/raw` at the root of repository. Segmentation results including cell boundaries will be saved in folder `data/outputs`
The raw .h5ad object containing all gene expression data will be saved at `data/xenium_outs/merged_raw.h5ad`.
2 changes: 1 addition & 1 deletion notebooks/01_cell_segmentation/merge_objects.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"metadata": {},
"outputs": [],
"source": [
"files = [os.path.join(\"xenium_outs\", folder) for folder in os.listdir(\"xenium_outs\") if \".h5ad\" in folder]"
"files = [os.path.join(\"../../data/xenium_outs\", folder) for folder in os.listdir(\"../../data/xenium_outs\") if \".h5ad\" in folder]"
]
},
{
Expand Down
32 changes: 7 additions & 25 deletions notebooks/01_cell_segmentation/prepare_anndata.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,10 @@
"execution_count": 3,
"id": "252e75f0-b828-4e76-adb7-f104946285be",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['outputs/output-XETG00088__0011287__Region_1__20240206__171802',\n",
" 'outputs/output-XETG00088__0011695__Region_1__20240202__104242',\n",
" 'outputs/output-XETG00088__0018775__Region_1__20240216__134551',\n",
" 'outputs/output-XETG00088__0011284__Region_1__20240206__171802',\n",
" 'outputs/output-XETG00088__0011546__Region_1__20240130__161548',\n",
" 'outputs/output-XETG00088__0011707__Region_1__20240202__104242',\n",
" 'outputs/output-XETG00088__0011216__Region_1__20240130__161548',\n",
" 'outputs/output-XETG00088__0011762__Region_1__20240124__111313']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"folders = [os.path.join(\"outputs\", folder) for folder in os.listdir(\"outputs\") if \"output\" in folder and \"segmentation_cell_stats.csv\" in os.listdir(os.path.join(\"outputs\", folder))]\n",
"folders"
"folders = [os.path.join(\"outputs\", folder) for folder in os.listdir(\"../../data/outputs\") if \"output\" in folder \\\n",
" and \"segmentation_cell_stats.csv\" in os.listdir(os.path.join(\"../../data/outputs\", folder))]"
]
},
{
Expand All @@ -71,8 +53,8 @@
"metadata": {},
"outputs": [],
"source": [
"if not os.path.exists(\"xenium_outs\"):\n",
" os.mkdir(\"xenium_outs\")"
"if not os.path.exists(\"../../data/xenium_outs\"):\n",
" os.mkdir(\"../../data/xenium_outs\")"
]
},
{
Expand Down Expand Up @@ -249,9 +231,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "xenium_cgn",
"language": "python",
"name": "python3"
"name": "xenium_cgn"
},
"language_info": {
"codemirror_mode": {
Expand Down
4 changes: 2 additions & 2 deletions notebooks/01_cell_segmentation/prepare_transcripts_csv.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
" df.to_csv(outfile)\n",
"\n",
"def process_xenium_data(xenraw_dir, slide_ids):\n",
" output_dir = Path('transcript_codes')\n",
" output_dir = Path('\"../../data/transcripts_codes')\n",
" output_dir.mkdir(exist_ok=True)\n",
" \n",
" xenraw_dir = Path(xenraw_dir)\n",
Expand Down Expand Up @@ -47,7 +47,7 @@
"metadata": {},
"outputs": [],
"source": [
"xenraw_dir = \"/data/projects/zeba/MY_PROJECTS/240130_Xenium_ANCA_SLE_GBM/raw_data\"\n",
"xenraw_dir = \"../../data/raw\"\n",
"slide_ids = ['0011216', '0011284', '0011287', '0011546', '0011695', '0011707', '0011762', '0018775']\n",
" \n",
"process_xenium_data(xenraw_dir, slide_ids)"
Expand Down
32 changes: 2 additions & 30 deletions notebooks/01_cell_segmentation/save_mtx.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,8 @@
"metadata": {},
"outputs": [],
"source": [
"folders = [os.path.join(\"outputs\", folder) for folder in os.listdir(\"outputs\") \\\n",
" if \"output-\" in folder and \"baysor_mtx\" not in os.listdir(os.path.join(\"outputs\", folder))]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "361b7218-d02e-4289-aa3e-7a7fd03cbe59",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['outputs/output-XETG00088__0011287__Region_1__20240206__171802',\n",
" 'outputs/output-XETG00088__0011695__Region_1__20240202__104242',\n",
" 'outputs/output-XETG00088__0018775__Region_1__20240216__134551',\n",
" 'outputs/output-XETG00088__0011284__Region_1__20240206__171802',\n",
" 'outputs/output-XETG00088__0011546__Region_1__20240130__161548',\n",
" 'outputs/output-XETG00088__0011707__Region_1__20240202__104242',\n",
" 'outputs/output-XETG00088__0011216__Region_1__20240130__161548',\n",
" 'outputs/output-XETG00088__0011762__Region_1__20240124__111313']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"folders"
"folders = [os.path.join(\"../../data/outputs\", folder) for folder in os.listdir(\"../../data/outputs\") \\\n",
" if \"output-\" in folder and \"baysor_mtx\" not in os.listdir(os.path.join(\"../../data/outputs\", folder))]"
]
},
{
Expand Down
File renamed without changes.
9 changes: 0 additions & 9 deletions notebooks/04_annotate_biopsies/README.md

This file was deleted.

10 changes: 10 additions & 0 deletions notebooks/04_cleaning_and_label_biopsies/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
## Processing and sample annotations

Includes scripts to clean up folded biopsy regions and image-blurs after annotating them on DAPI images in Napari. Run following scripts in order.

1. `process.ipynb`
2. `attach_dapi.ipynb`
3. `clean_and_assign_biopsy_ID.ipynb`

The .h5ad object will be saved at `data/xenium_outs/merged_processed_cleaned.h5ad` at the root of the repository.
The required input file is `data/xenium_outs/merged_raw_SamplesAnnotated.h5ad` generated at 02 step.
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,7 @@
"metadata": {},
"outputs": [],
"source": [
"adata = sc.read(\"xenium_outs/merged_processed.h5ad\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bfce4d26-7179-434b-b9fe-75d956662815",
"metadata": {},
"outputs": [],
"source": [
"del adata.uns[\"spatial\"]"
"adata = sc.read(\"../../data/xenium_outs/merged_processed_integrated.h5ad\")"
]
},
{
Expand All @@ -46,7 +36,7 @@
"metadata": {},
"outputs": [],
"source": [
"raw_dir = \"/data/projects/zeba/MY_PROJECTS/240130_Xenium_ANCA_SLE_GBM/raw_data\"\n",
"raw_dir = \"../../data/raw\"\n",
"sample_folders = [os.path.join(raw_dir, folder) for folder in os.listdir(raw_dir) if \"human_ANCA\" in folder]"
]
},
Expand Down Expand Up @@ -77,16 +67,6 @@
"ids_folders = [e for e in ids_folders if e in adata.obs.Slide_ID.unique()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "823f3174-139c-4def-a919-b2c2d9cf83a1",
"metadata": {},
"outputs": [],
"source": [
"ids_folders"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -207,154 +187,27 @@
{
"cell_type": "code",
"execution_count": null,
"id": "248121fe-5cc5-4af8-a926-ba53be7a7154",
"metadata": {},
"outputs": [],
"source": [
"adata"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d9a4a5b-624e-4b1b-a6c4-7e7a4937399d",
"metadata": {},
"outputs": [],
"source": [
"custom_cmap = {\n",
" 'podocyte': '#950404FF',\n",
" 'mesangial cell': '#C38961FF',\n",
" 'PEC': '#9F5630FF',\n",
" 'PapE': '#FFB274FF',\n",
" 'VSM/P': '#FF7200FF',\n",
" 'EC': '#8C37E5FF',\n",
" 'PT': '#53505AFF',\n",
" 'ATL': '#495564FF',\n",
" 'CNT': '#5C616FFF',\n",
" 'DCT': '#B5BCCBFF',\n",
" 'DTL': '#DCDCE3FF',\n",
" 'TAL': '#B8B69EFF',\n",
" 'FIB': '#007A99FF',\n",
" 'IC': '#00A9CCFF',\n",
" 'NEU': '#32E3FFFF',\n",
" 'PC': '#99F8FFFF',\n",
" 'T': '#648C16FF',\n",
" 'NKC/T': '#7EA13EFF',\n",
" 'B': '#8BAC54FF',\n",
" 'PL': '#A1BA77FF',\n",
" 'N': '#FF0000FF',\n",
" 'MAC': '#FF0000FF',\n",
" 'MDC': '#FF0000FF',\n",
" 'ncMON': '#FF0000FF',\n",
" 'MAST': '#FF6600FF',\n",
" 'cDC': '#FF9900FF',\n",
" 'pDC': '#FFCC00FF',\n",
" 'cycMNP': '#FFCC00FF',\n",
" 'cycNKC/T': '#FFFF00FF'\n",
"}\n",
"\n",
"cell_types_order = ['podocyte', 'PEC', 'mesangial cell', 'PapE', 'VSM/P','EC' ,\n",
" 'PT', 'ATL', 'CNT', 'DCT', 'DTL', 'TAL',\n",
" 'FIB', 'IC', 'NEU', 'PC',\n",
" 'T', 'NKC/T','B', 'PL',\n",
" 'N', 'MAC', 'MDC', 'ncMON', 'MAST',\n",
" 'cDC', 'pDC', 'cycMNP', 'cycNKC/T']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d655a1dc-9bcd-4ed7-a03c-8f5f90cf4bc3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "29fbc04b-7d28-41cf-8503-525d9c554898",
"metadata": {},
"outputs": [],
"source": [
"adata.obs[\"celltype_l1\"] = adata.obs[\"celltype_l1\"].cat.reorder_categories(cell_types_order)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24a39eea-64e1-4f55-a301-5bfd15ab5585",
"metadata": {},
"outputs": [],
"source": [
"colors = []\n",
"for cat in adata.obs[\"celltype_l1\"].cat.categories:\n",
" colors.append(custom_cmap[cat])\n",
"adata.uns[\"celltype_l1_colors\"] = colors"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7b1b5de-1ccc-404d-a1d7-6e5357da3acd",
"metadata": {},
"outputs": [],
"source": [
"sc.pl.umap(adata, color=\"celltype_l1\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82d6888e-311a-480e-ada3-da6e994befeb",
"metadata": {},
"outputs": [],
"source": [
"adata.write(\"xenium_outs/merged_processed.h5ad\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "32111734-fd1e-4b39-abc1-2e0962db9ab5",
"metadata": {},
"outputs": [],
"source": [
"with open(\"colormap_celltypes.json\", \"w\") as f:\n",
" json.dump(custom_cmap, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "180fea52-c3df-4618-a574-d48e247894c7",
"id": "be3f70ba-b9b3-4a4f-ac98-5c41c70f3f34",
"metadata": {},
"outputs": [],
"source": [
"cell_types_order = ['podocyte', 'PEC', 'mesangial cell', 'PapE', 'VSM/P','EC' ,\n",
" 'PT', 'ATL', 'CNT', 'DCT', 'DTL', 'TAL',\n",
" 'FIB', 'IC', 'NEU', 'PC',\n",
" 'T', 'NKC/T','B', 'PL',\n",
" 'N', 'MAC', 'MDC', 'ncMON', 'MAST',\n",
" 'cDC', 'pDC', 'cycMNP', 'cycNKC/T']\n",
"import pickle\n",
"with open(\"order_celltypes.json\", \"w\") as f:\n",
" json.dump(cell_types_order, f)"
"adata.write(\"../../data/xenium_outs/merged_processed_integrated_v2.h5ad\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b6a872a-a686-4c3b-84d5-1b8399df46f1",
"id": "578defa6-da6c-4186-9eda-78015f82ea56",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "xenium_cgn",
"language": "python",
"name": "python3"
"name": "xenium_cgn"
},
"language_info": {
"codemirror_mode": {
Expand Down
Loading

0 comments on commit d2f08d1

Please sign in to comment.