Skip to content

Commit

Permalink
Workable
Browse files Browse the repository at this point in the history
  • Loading branch information
unkcpz committed Aug 12, 2024
1 parent fb4e1a9 commit 16b4180
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 31 deletions.
33 changes: 32 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ ARG FULL_STACK_VER=2024.1021
ARG UV_VER=0.2.27
ARG QE_VER=7.2
ARG QE_DIR=/opt/conda/envs/quantum-espresso-${QE_VER}
ARG HQ_VER=0.19.0

ARG UV_CACHE_DIR=/tmp/uv_cache
ARG QE_APP_SRC=/tmp/quantum-espresso
Expand Down Expand Up @@ -43,19 +44,37 @@ RUN --mount=from=uv,source=/uv,target=/bin/uv \

# STAGE 3
# - Prepare AiiDA profile and localhost computer
# - Prepare hq computer using hyperqueue as scheduler
# - Install QE codes and pseudopotentials
# - Archive home folder
FROM build_deps AS home_build
ARG QE_DIR
ARG HQ_VER

# Install hq binary
RUN wget -c -O hq.tar.gz https://github.com/It4innovations/hyperqueue/releases/download/v${HQ_VER}/hq-v${HQ_VER}-linux-x64.tar.gz && \
tar xf hq.tar.gz -C /opt/conda/

ENV PSEUDO_FOLDER=/tmp/pseudo
RUN mkdir -p ${PSEUDO_FOLDER} && \
python -m aiidalab_qe download-pseudos --dest ${PSEUDO_FOLDER}

ENV UV_CONSTRAINT=${PIP_CONSTRAINT}
# Install the aiida-hyperqueue
# XXX: fix me after release aiida-hyperqueue
RUN --mount=from=uv,source=/uv,target=/bin/uv \
--mount=from=build_deps,source=${UV_CACHE_DIR},target=${UV_CACHE_DIR},rw \
git clone https://github.com/aiidateam/aiida-hyperqueue && \
uv pip install --system --strict --compile-bytecode --cache-dir=${UV_CACHE_DIR} aiida-hyperqueue

COPY ./before-notebook.d/* /usr/local/bin/before-notebook.d/

# TODO: Remove PGSQL and daemon log files, and other unneeded files
RUN --mount=from=qe_conda_env,source=${QE_DIR},target=${QE_DIR} \
bash /usr/local/bin/before-notebook.d/20_start-postgresql.sh && \
bash /usr/local/bin/before-notebook.d/40_prepare-aiida.sh && \
python -m aiidalab_qe install-qe && \
bash /usr/local/bin/before-notebook.d/41_setup-hq-computer.sh && \
python -m aiidalab_qe install-qe --computer local-hq && \
python -m aiidalab_qe install-pseudos --source ${PSEUDO_FOLDER} && \
verdi daemon stop && \
mamba run -n aiida-core-services pg_ctl stop && \
Expand All @@ -82,6 +101,18 @@ RUN --mount=from=uv,source=/uv,target=/bin/uv \
--mount=from=build_deps,source=${QE_APP_SRC},target=${QE_APP_SRC},rw \
uv pip install --strict --system --compile-bytecode --cache-dir=${UV_CACHE_DIR} ${QE_APP_SRC}

# TODO: this seems need to do twice
# ENV UV_CONSTRAINT=${PIP_CONSTRAINT}
# # Install the aiida-hyperqueue
# # XXX: fix me after release aiida-hyperqueue
# RUN --mount=from=uv,source=/uv,target=/bin/uv \
# --mount=from=build_deps,source=${UV_CACHE_DIR},target=${UV_CACHE_DIR},rw \
# git clone https://github.com/aiidateam/aiida-hyperqueue && \
# uv pip install --system --strict --compile-bytecode --cache-dir=${UV_CACHE_DIR} aiida-hyperqueue

# copy hq binary
COPY --from=home_build /opt/conda/hq /usr/local/bin/

COPY --from=qe_conda_env ${QE_DIR} ${QE_DIR}

USER root
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@

set -x

# XXX: need to make daemon start late
verdi daemon stop || echo "stop fail"

# Setup hyperqueue computer if needed
HQ_COMPUTER="local-hq"
LOCALHOST_MPI_PROCS_PER_MACHINE=2
# XXX: hardcode N_MPI_PROCES, or read from OCI runtime?? think monkey, think!
LOCAL_MPI_PROCS=2
LOCAL_MEM=2560

verdi show computer ${HQ_COMPUTER}
if [[ $? -eq 0 ]]; then
computer_list=$(verdi computer list)
if echo ${computer_list} | grep -q ${HQ_COMPUTER}; then
echo "${HQ_COMPUTER} already setup"
else
# computer
# XXX: upbounded mem??
verdi computer show ${HQ_COMPUTER} || verdi computer setup \
--non-interactive \
--label "${HQ_COMPUTER}" \
Expand All @@ -19,10 +25,16 @@ else
--transport core.local \
--scheduler hyperqueue \
--work-dir /home/${NB_USER}/aiida_run/ \
--mpirun-command "mpirun -np {num_cpus}" \
--mpiprocs-per-machine ${LOCALHOST_MPI_PROCS_PER_MACHINE}
--mpirun-command "mpirun -np {tot_num_mpiprocs}" \
--mpiprocs-per-machine ${LOCAL_MPI_PROCS}

verdi computer configure core.local "${HQ_COMPUTER}" \
--non-interactive \
--safe-interval 5.0
fi

# Start hq server with a worker
nohup hq server start 1>$HOME/.hq-stdout 2>$HOME/.hq-stderr &
nohup hq worker start --cpus=${LOCAL_MPI_PROCS} --resource "mem=sum(${LOCAL_MEM})" --no-detect-resources &

verdi daemon start || echo "start fail"
7 changes: 4 additions & 3 deletions src/aiidalab_qe/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from aiida import load_profile
from aiidalab_qe.common.setup_codes import codes_are_setup
from aiidalab_qe.common.setup_codes import install_and_setup as install_qe_codes
from aiidalab_qe.common.setup_codes import install_and_setup as install_and_setup_qe_codes

# The default profile name of AiiDAlab container.
_DEFAULT_PROFILE = "default"
Expand All @@ -20,11 +20,12 @@ def cli():

@cli.command()
@click.option("-f", "--force", is_flag=True)
@click.option("--computer")
@click.option("-p", "--profile", default=_DEFAULT_PROFILE)
def install_qe(force, profile):
def install_qe(force, profile, computer):
load_profile(profile)
try:
for msg in install_qe_codes(force=force):
for msg in install_and_setup_qe_codes(target_computer=computer, force=force):
click.echo(msg)
assert codes_are_setup()
click.secho("Codes are setup!", fg="green")
Expand Down
40 changes: 18 additions & 22 deletions src/aiidalab_qe/common/setup_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,7 @@ def get_qe_env():

def qe_installed():
env_exist = get_qe_env().exists()
proc = subprocess.run(
["conda", "list", "-n", f"{get_qe_env()}", "qe"],
check=False,
capture_output=True,
)
proc = subprocess.run(["conda", "list", "-n", f"{get_qe_env().name}", "qe"], check=True, capture_output=True,)

# XXX: "qe" in check is not future proof if there are similar packages such as qe-tool, better solution?? JSON output??
return env_exist and "qe" in str(proc.stdout)
Expand Down Expand Up @@ -106,13 +102,13 @@ def _generate_header_to_setup_code():
return header_code


def _generate_string_to_setup_code(code_name, computer_name="localhost"):
def _generate_string_to_setup_code(code_name, computer="localhost"):
"""Generate the Python string to setup an AiiDA code for a given computer.
Tries to load an existing code and if not existent,
generates Python code to create and store a new code setup."""
try:
load_code(f"{code_name}-{QE_VERSION}@{computer_name}")
load_code(f"{code_name}-{QE_VERSION}@{computer}")
except NotExistent:
label = f"{code_name}-{QE_VERSION}"
description = f"{code_name}.x ({QE_VERSION}) setup by AiiDAlab."
Expand All @@ -131,7 +127,7 @@ def _generate_string_to_setup_code(code_name, computer_name="localhost"):
code.store()
""".format( # noqa: UP032
computer_name,
computer,
label,
description,
filepath_executable,
Expand All @@ -154,7 +150,7 @@ def setup_codes():
raise RuntimeError(f"Failed to setup codes: {error}") from None


def install_and_setup(force=False, target_computer="localhost"):
def install_and_setup(target_computer, force=False):
"""Install Quantum ESPRESSO and the corresponding AiiDA codes.
Args:
Expand All @@ -168,8 +164,8 @@ def install_and_setup(force=False, target_computer="localhost"):
if not force and FN_DO_NOT_SETUP.exists():
raise RuntimeError("Installation failed in previous attempt.")

_install()
_setup(target_computer)
yield from _install()
yield from _setup(target_computer)


def _install():
Expand All @@ -186,17 +182,17 @@ def _install():
"is not available."
)

if not qe_installed():
# First, install Quantum ESPRESSO.
yield "Installing QE..."
try:
install_qe()
except subprocess.CalledProcessError as error:
raise RuntimeError(
f"Failed to create conda environment: {error}"
) from None
else:
return
if qe_installed():
return

# Install Quantum ESPRESSO.
yield "Installing QE..."
try:
install_qe()
except subprocess.CalledProcessError as error:
raise RuntimeError(
f"Failed to create conda environment: {error}"
) from None

except Timeout:
# Assume that the installation was triggered by a different process.
Expand Down

0 comments on commit 16b4180

Please sign in to comment.