-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a3e291d
commit dcd3a2e
Showing
16 changed files
with
2,610 additions
and
20 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "338742b7-7103-4475-adf1-0b6122dd3abb", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"!pip install mygene statannotations scrublet scanpy scvelo decoupler matplotlib_venn goatools gseapy scperturb biomart PyComplexHeatmap statsmodels omnipath git+https://github.com/saezlab/pypath.git --quiet" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "bdf19927-f5a5-49e3-85ee-9b22bbed4b30", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<style>.container { width:95% !important; }</style>" | ||
], | ||
"text/plain": [ | ||
"<IPython.core.display.HTML object>" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
} | ||
], | ||
"source": [ | ||
"import subprocess\n", | ||
"import os\n", | ||
"import sys\n", | ||
"import matplotlib.backends.backend_pdf\n", | ||
"import scanpy as sc\n", | ||
"import matplotlib.pyplot as pl\n", | ||
"import anndata as ad\n", | ||
"import pandas as pd\n", | ||
"import numpy as np\n", | ||
"import seaborn as sns\n", | ||
"import scvelo as scv\n", | ||
"scv.settings.verbosity=1\n", | ||
"\n", | ||
"from pathlib import Path\n", | ||
"\n", | ||
"# Jupyter stuff\n", | ||
"from tqdm.notebook import tqdm\n", | ||
"from IPython.display import clear_output\n", | ||
"from IPython.core.display import display, HTML\n", | ||
"display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n", | ||
"\n", | ||
"%matplotlib inline\n", | ||
"\n", | ||
"# Custom functions\n", | ||
"sys.path.insert(1, '../')\n", | ||
"from utils import *\n", | ||
"\n", | ||
"# scperturb package\n", | ||
"sys.path.insert(1, '../package/src/')\n", | ||
"from scperturb import *\n", | ||
"\n", | ||
"from pathlib import Path\n", | ||
"figure_path = Path('../figures/')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "1276f674-d382-4a04-a5a1-89034a5d6892", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"TEMPDIR = Path('/scratch/peidli/scPerturb/')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "c604eabe-3e9e-48f7-a090-3fd76bc847f3", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"['GSE206741_cell_metadata.tsv',\n", | ||
" 'GSE206741_count_matrix.mtx',\n", | ||
" 'GSE206741_gene_metadata.tsv']" | ||
] | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"sorted([file.name for file in (TEMPDIR / 'LotfollahiTheis2023/').glob('*')])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 14, | ||
"id": "027e6124-715a-453c-81a2-e74b7166bfcf", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from scipy.io import mmread\n", | ||
"from scipy.sparse import csr_matrix\n", | ||
"obs = pd.read_csv(TEMPDIR / f'LotfollahiTheis2023/GSE206741_cell_metadata.tsv', index_col=0, sep='\\t')\n", | ||
"var = pd.read_csv(TEMPDIR / f'LotfollahiTheis2023/GSE206741_gene_metadata.tsv', index_col=0, sep='\\t')\n", | ||
"X = csr_matrix(mmread(TEMPDIR / f'LotfollahiTheis2023/GSE206741_count_matrix.mtx'))\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 43, | ||
"id": "1697c169-08b2-4714-9d8d-f4c18980d494", | ||
"metadata": { | ||
"tags": [] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"adata = sc.AnnData(X.T, obs, var)\n", | ||
"\n", | ||
"adata.var.set_index('gene_short_name', inplace=True, drop=False)\n", | ||
"adata.var.columns = ['ensembl_id']\n", | ||
"adata.var.index.name = 'gene_symbol'\n", | ||
"\n", | ||
"adata.obs['perturbation'] = ['_'.join(np.sort([d1,d2])).replace('\\xa0','') for d1, d2 in zip(adata.obs.Drug1, adata.obs.Drug2)]\n", | ||
"adata.obs['perturbation'] = [x.replace('DMSO_', '').replace('_DMSO', '').replace('DMSO', 'control') for x in adata.obs.perturbation]\n", | ||
"adata.obs.index.name = 'cell_barcode'\n", | ||
"adata.obs.rename({\n", | ||
" 'n.umi': 'ncounts', \n", | ||
"}, axis=1, inplace=True)\n", | ||
"adata.obs.drop(['sample', 'Drug1', 'Drug2'], axis=1, inplace=True)\n", | ||
"adata.obs = adata.obs[['perturbation', 'Size_Factor', 'ncounts', 'RT_well', 'Well']]\n", | ||
"adata.obs['nperts'] = [p.count('_')+1-p.count('control') if type(p)==str else 0 for p in adata.obs.perturbation]\n", | ||
"adata.obs['perturbation_type'] = 'drug'\n", | ||
"adata.obs['disease'] = \"lung adenocarcinoma\"\n", | ||
"adata.obs['cancer'] = True\n", | ||
"adata.obs['tissue_type']=\"cell_line\"\n", | ||
"adata.obs[\"cell_line\"] = \"A549\"\n", | ||
"adata.obs[\"celltype\"] = 'lung epthelial cells'\n", | ||
"adata.obs['organism'] = 'human'\n", | ||
"#annotate_qc(adata, species='human')\n", | ||
"#assert_annotations(adata)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "88c75250-16de-427c-b0a1-02b5a4cd75db", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "fd6705a5-e938-41d9-af0b-c08651210975", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "08ad720e-25ee-42b5-a4b1-b5e6ea5091d3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6a27157d-43c2-4017-85c6-1333e05dd182", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.9" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.