Skip to content

Commit

Permalink
Merge pull request #111 from PyEED/graph-db
Browse files Browse the repository at this point in the history
Graph db
  • Loading branch information
haeussma authored Feb 7, 2025
2 parents af3e981 + d8603d5 commit f718a27
Show file tree
Hide file tree
Showing 146 changed files with 44,156 additions and 7,649 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Lint

on: pull_request
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install poetry
poetry install --with dev
- name: Run Ruff
run: |
poetry run ruff check .
poetry run ruff format --check .
- name: Run mypy
run: |
poetry run mypy src/
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ __pycache__/

# C extensions
*.so
.vscode/
.huggingface/

# Distribution / packaging
.Python
Expand Down Expand Up @@ -158,4 +160,9 @@ pyrightconfig.json

poetry.lock

.ruff_cache
.ruff_cache

# Test python files
test.py

docker-compose.yml
11 changes: 4 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
[![Documentation](https://github.com/PyEED/pyeed/actions/workflows/make_docs.yaml/badge.svg)](https://github.com/PyEED/pyeed/actions/workflows/make_docs.yaml)

## About 📖
pyEED is a toolkit enabling object-oriented analysis of protein sequences, instead of working with sequences in a file-oriented fashion. This will enable the user to easily access and manipulate sequence information and to perform analyses on the sequence data.
pyeed is a toolkit enabling object-oriented analysis of protein sequences, instead of working with sequences in a file-oriented fashion. This will enable the user to easily access and manipulate sequence information and to perform analyses on the sequence data.
This library is currently under development and thus the API is subject to change.

![PyEED](./docs/figs/pyeed-model.png)


## Installation ⚙️

Expand All @@ -20,9 +22,4 @@ pip install git+https://github.com/PyEED/pyeed.git

## Quick start 🚀

Library is currently refactored, quick start will be updated soon!

## Documentation 📘

Check out the [documentation](https://pyeed.github.io/pyeed/) for in-depth information on how to setup `pyeed`,
use the build-in tools, and store sequence data in databases.
### Launch Neo4j database via Docker and mount to a local directory
17 changes: 17 additions & 0 deletions docker/blast/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM ncbi/blast

# Install Python and Flask
RUN apt-get update && apt-get install -y python3 python3-pip
RUN pip3 install fastapi uvicorn

# Add the Python script to the container
COPY app.py /usr/local/bin/app.py

# Set the working directory
WORKDIR /usr/local/bin

# Disable Python output buffering
ENV PYTHONUNBUFFERED=1

# Run the Python server script
CMD ["python3", "app.py"]
147 changes: 147 additions & 0 deletions docker/blast/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import logging
import os
import subprocess
import sys

from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import RedirectResponse

app = FastAPI()

# Configure logging to output to stdout without buffering
logging.basicConfig(
level=logging.DEBUG,
format="%(levelname)s - %(message)s",
stream=sys.stdout,
)
logger = logging.getLogger(__name__)


def to_fasta(seq: str) -> str:
return f">query_sequence\n{seq}"


def _check_db_path_correct(db_path: str, db_name: str) -> None:
# check if db_path exists
if not os.path.exists(db_path):
raise HTTPException(
status_code=400, detail=f"Database path does not exist: {db_path}"
)
# check if db_path is a directory
if not os.path.isdir(db_path):
raise HTTPException(
status_code=400, detail=f"Database path is not a directory: {db_path}"
)


@app.get("/")
async def read_root() -> None:
logger.debug("Entering root endpoint")
return RedirectResponse(url="/docs") # type: ignore


@app.get("/blastp_help")
def blastp_help() -> str:
logger.debug("Entering /blastp_help endpoint")

command = ["blastp", "-help"]
logger.debug(f"Running command: {command}")

try:
result = subprocess.run(command, capture_output=True, text=True)

# Return the help text
return result.stdout
except subprocess.CalledProcessError as e:
# Log and raise an HTTP exception if the subprocess fails
logger.error(f"blastp help command failed: {e.stderr}")
raise HTTPException(status_code=400, detail=f"Command failed: {e.stderr}")


@app.get("/blastn_help")
def blastn_help() -> str:
logger.debug("Entering /blastn_help endpoint")

command = ["blastn", "-help"]
logger.debug(f"Running command: {command}")

try:
result = subprocess.run(command, capture_output=True, text=True)
return result.stdout
except subprocess.CalledProcessError as e:
logger.error(f"blastn help command failed: {e.stderr}")
raise HTTPException(status_code=400, detail=f"Command failed: {e.stderr}")


@app.post("/blast")
async def run_blast(request: Request) -> dict[str, str]:
"""Run BLAST search with provided parameters."""
try:
data = await request.json()
logger.debug(f"Received request data: {data}")

_check_db_path_correct(data["db_path"], data["db_name"])

mode = data["mode"]
sequence = data["sequence"]
logger.debug(f"Sequence received: {sequence}")
db_path = data["db_path"]
db_name = data["db_name"]
evalue = float(data["evalue"])
max_target_seqs = int(data["max_target_seqs"])
num_threads = int(data["num_threads"])

query_path = "/usr/local/bin/data/query.fasta"
result_path = "/usr/local/bin/data/result.out"

# Create FASTA file
with open(query_path, "w") as file:
file.write(to_fasta(sequence))
with open(query_path, "r") as file:
logger.debug(f" file content: {file.read()}")

# debug db path exists
logger.debug(f"db path exists: {os.path.exists(db_path)}")
# debug list all files in db path
logger.debug(f"files in db path: {os.listdir(db_path)}")
# Run BLAST
command = [
mode,
"-query",
query_path,
"-db",
f"{db_path}/{db_name}",
"-evalue",
str(evalue),
"-outfmt",
"6",
"-num_threads",
str(num_threads),
"-out",
result_path,
"-max_target_seqs",
str(max_target_seqs),
]

logger.debug(f"Running command: {command}")
subprocess.run(command, capture_output=True, check=True, text=True)

# Read results
with open(result_path, "r") as file:
result_data = file.read()

# Cleanup
os.remove(query_path)
os.remove(result_path)

return {"result": result_data}

except Exception as e:
logger.error(f"Error running BLAST: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))


if __name__ == "__main__":
import uvicorn

uvicorn.run("app:app", host="0.0.0.0", port=6001, reload=True)
4 changes: 4 additions & 0 deletions docker/blast/reload_development.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sudo docker stop blast
sudo docker remove blast
sudo docker build --no-cache -t blast_image .
sudo docker run --name blast --volume /home/ala/BA/mydb:/blast/db --volume /mnt/databases_shared/:/blast/db/custom -p 6001:6001 blast_image
Binary file added docker/blast/test_db/protein_db.pdb
Binary file not shown.
Binary file added docker/blast/test_db/protein_db.phr
Binary file not shown.
Binary file added docker/blast/test_db/protein_db.pin
Binary file not shown.
24 changes: 24 additions & 0 deletions docker/blast/test_db/protein_db.pjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"version": "1.2",
"dbname": "protein_db",
"dbtype": "Protein",
"db-version": 5,
"description": "Protein Database",
"number-of-letters": 893,
"number-of-sequences": 10,
"last-updated": "2025-01-20T15:57:00",
"number-of-volumes": 1,
"bytes-total": 51054,
"bytes-to-cache": 1088,
"files": [
"protein_db.pdb",
"protein_db.phr",
"protein_db.pin",
"protein_db.pog",
"protein_db.pos",
"protein_db.pot",
"protein_db.psq",
"protein_db.ptf",
"protein_db.pto"
]
}
Binary file added docker/blast/test_db/protein_db.pog
Binary file not shown.
Binary file added docker/blast/test_db/protein_db.pos
Binary file not shown.
Binary file added docker/blast/test_db/protein_db.psq
Binary file not shown.
Binary file added docker/blast/test_db/protein_db.ptf
Binary file not shown.
Binary file added docker/blast/test_db/protein_db.pto
Binary file not shown.
20 changes: 20 additions & 0 deletions docker/blast/test_files/protein_sequences.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
>seq1
MSEQVAAVAKLRAKASEAAKEAKAREAAKKLAEAAKKAKAKEAAKRAEAKLAEKAKAAKRAEAKAAKEAKRAAAKRAEAKLAEKAKAAK
>seq2
MADLKQAKALQAAKELRAALKEAQAKRAAKELRAAKELRAAKLKAELRAAKAAKLEAAKRAELAAKRAEAKRAAELAAKAELRAEAKLA
>seq3
MAKLQAELRAAKQAKELRAKAKLEAALKEARAKALKEARAKALKEAQAKELRAAKLEAKAKRAEALQAKELRAAKAAKEAKRAAKLRAE
>seq4
MAAKLQEKAAKLEAAKRAEKAAKRAEAAKRAAELAAKRAELRAAKLEAAKELRAELRAAKLEAAKRAAAKLQEKAKLAEKAAKEAKRAA
>seq5
MAAKLRAKLAEKAAKRAAAKLQEKAAKRAAAKLQEKAAKELRAEKAAKRAELRAEKAAKLQEKRAEAAKLQEKRAELAAKELRAEKRAA
>seq6
MSRAELKAAKRAAAKLRAKAAKRAELAAKLQEKRAAELAAKLEAAKELRAAKLEAAKLRAAKLEAAKLEAAKELRAEKRAEKAAKLEAA
>seq7
MSRAAKLEAALKEAAKRAEALKEAAKRAEAKLEAAKLQEKAKLEAAKLQEKAAKLEAAKLEAAKLEAAKLQEKAAKLEAALKELRAEKAA
>seq8
MRAAKLEAAKLQEKAAKRAELRAELRAEKAAKLQEKAAKLQEKRAEKAAKLQEKAAKLQEKRAEKAAKLQEKAAKLQEKRAEKRAELAA
>seq9
MAAKLEAAKLQEKAAKLQEKAAKLQEKRAAELRAKLQEKRAAELAAKLQEKRAEKAAKLQEKAAKLQEKRAEKAAKLQEKAAKLQEKRAA
>seq10
MAKLQEKRAEKAAKLQEKAAKLQEKRAEKAAKLQEKRAEKAAKLQEKRAEKAAKLQEKRAEKAAKLQEKRAEKAAKLQEKRAEKAAKLQA
2 changes: 2 additions & 0 deletions docker/blast/test_files/query.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>query1
MSEQVAAVAKLRAKASEAAKEAKAREAAKKLAEAAKKAKAKEAAKRAEAKLAEKAKAAKRAEAKAAKEAKRAAAKRAEAKLAEKAKAAK
16 changes: 16 additions & 0 deletions docker/clustalo/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM python:3.12

WORKDIR /app/api

# Install clustalo
RUN wget http://www.clustal.org/omega/clustalo-1.2.4-Ubuntu-x86_64 \
&& chmod +x clustalo-1.2.4-Ubuntu-x86_64 \
&& mv clustalo-1.2.4-Ubuntu-x86_64 /usr/local/bin/clustalo

# Install python dependencies
RUN pip install fastapi python-multipart uvicorn

COPY app.py .


CMD ["python", "app.py"]
Loading

0 comments on commit f718a27

Please sign in to comment.