Skip to content

Commit

Permalink
Add LotfollahiTheis2023
Browse files Browse the repository at this point in the history
  • Loading branch information
stefanpeidli committed Dec 1, 2023
1 parent a3e291d commit dcd3a2e
Show file tree
Hide file tree
Showing 16 changed files with 2,610 additions and 20 deletions.
572 changes: 572 additions & 0 deletions dataset_processing/notebooks/DraegerKampmann2022.ipynb

Large diffs are not rendered by default.

792 changes: 792 additions & 0 deletions dataset_processing/notebooks/LiangWang2023.ipynb

Large diffs are not rendered by default.

214 changes: 214 additions & 0 deletions dataset_processing/notebooks/LotfollahiTheis2023.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "338742b7-7103-4475-adf1-0b6122dd3abb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"!pip install mygene statannotations scrublet scanpy scvelo decoupler matplotlib_venn goatools gseapy scperturb biomart PyComplexHeatmap statsmodels omnipath git+https://github.com/saezlab/pypath.git --quiet"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bdf19927-f5a5-49e3-85ee-9b22bbed4b30",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"<style>.container { width:95% !important; }</style>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import subprocess\n",
"import os\n",
"import sys\n",
"import matplotlib.backends.backend_pdf\n",
"import scanpy as sc\n",
"import matplotlib.pyplot as pl\n",
"import anndata as ad\n",
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"import scvelo as scv\n",
"scv.settings.verbosity=1\n",
"\n",
"from pathlib import Path\n",
"\n",
"# Jupyter stuff\n",
"from tqdm.notebook import tqdm\n",
"from IPython.display import clear_output\n",
"from IPython.core.display import display, HTML\n",
"display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n",
"\n",
"%matplotlib inline\n",
"\n",
"# Custom functions\n",
"sys.path.insert(1, '../')\n",
"from utils import *\n",
"\n",
"# scperturb package\n",
"sys.path.insert(1, '../package/src/')\n",
"from scperturb import *\n",
"\n",
"from pathlib import Path\n",
"figure_path = Path('../figures/')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1276f674-d382-4a04-a5a1-89034a5d6892",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"TEMPDIR = Path('/scratch/peidli/scPerturb/')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c604eabe-3e9e-48f7-a090-3fd76bc847f3",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['GSE206741_cell_metadata.tsv',\n",
" 'GSE206741_count_matrix.mtx',\n",
" 'GSE206741_gene_metadata.tsv']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted([file.name for file in (TEMPDIR / 'LotfollahiTheis2023/').glob('*')])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "027e6124-715a-453c-81a2-e74b7166bfcf",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"from scipy.io import mmread\n",
"from scipy.sparse import csr_matrix\n",
"obs = pd.read_csv(TEMPDIR / f'LotfollahiTheis2023/GSE206741_cell_metadata.tsv', index_col=0, sep='\\t')\n",
"var = pd.read_csv(TEMPDIR / f'LotfollahiTheis2023/GSE206741_gene_metadata.tsv', index_col=0, sep='\\t')\n",
"X = csr_matrix(mmread(TEMPDIR / f'LotfollahiTheis2023/GSE206741_count_matrix.mtx'))\n"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "1697c169-08b2-4714-9d8d-f4c18980d494",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"adata = sc.AnnData(X.T, obs, var)\n",
"\n",
"adata.var.set_index('gene_short_name', inplace=True, drop=False)\n",
"adata.var.columns = ['ensembl_id']\n",
"adata.var.index.name = 'gene_symbol'\n",
"\n",
"adata.obs['perturbation'] = ['_'.join(np.sort([d1,d2])).replace('\\xa0','') for d1, d2 in zip(adata.obs.Drug1, adata.obs.Drug2)]\n",
"adata.obs['perturbation'] = [x.replace('DMSO_', '').replace('_DMSO', '').replace('DMSO', 'control') for x in adata.obs.perturbation]\n",
"adata.obs.index.name = 'cell_barcode'\n",
"adata.obs.rename({\n",
" 'n.umi': 'ncounts', \n",
"}, axis=1, inplace=True)\n",
"adata.obs.drop(['sample', 'Drug1', 'Drug2'], axis=1, inplace=True)\n",
"adata.obs = adata.obs[['perturbation', 'Size_Factor', 'ncounts', 'RT_well', 'Well']]\n",
"adata.obs['nperts'] = [p.count('_')+1-p.count('control') if type(p)==str else 0 for p in adata.obs.perturbation]\n",
"adata.obs['perturbation_type'] = 'drug'\n",
"adata.obs['disease'] = \"lung adenocarcinoma\"\n",
"adata.obs['cancer'] = True\n",
"adata.obs['tissue_type']=\"cell_line\"\n",
"adata.obs[\"cell_line\"] = \"A549\"\n",
"adata.obs[\"celltype\"] = 'lung epthelial cells'\n",
"adata.obs['organism'] = 'human'\n",
"#annotate_qc(adata, species='human')\n",
"#assert_annotations(adata)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88c75250-16de-427c-b0a1-02b5a4cd75db",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd6705a5-e938-41d9-af0b-c08651210975",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "08ad720e-25ee-42b5-a4b1-b5e6ea5091d3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6a27157d-43c2-4017-85c6-1333e05dd182",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit dcd3a2e

Please sign in to comment.