From edc38fa0582b7b585939adefdc4864c0ed120d72 Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Tue, 23 Jan 2024 14:42:24 +0100 Subject: [PATCH 1/7] Do ewc teach in k8s --- vagrant/.gitignore | 1 + vagrant/Dockerfile | 8 ++ vagrant/README.md | 200 ++++++++++++++++++++++++++++++++++++ vagrant/Vagrantfile | 28 +++++ vagrant/config.yaml.example | 79 ++++++++++++++ vagrant/nbgrader_config.py | 6 ++ 6 files changed, 322 insertions(+) create mode 100644 vagrant/.gitignore create mode 100644 vagrant/Dockerfile create mode 100644 vagrant/README.md create mode 100644 vagrant/Vagrantfile create mode 100644 vagrant/config.yaml.example create mode 100644 vagrant/nbgrader_config.py diff --git a/vagrant/.gitignore b/vagrant/.gitignore new file mode 100644 index 0000000..5b6b072 --- /dev/null +++ b/vagrant/.gitignore @@ -0,0 +1 @@ +config.yaml diff --git a/vagrant/Dockerfile b/vagrant/Dockerfile new file mode 100644 index 0000000..bd44b6b --- /dev/null +++ b/vagrant/Dockerfile @@ -0,0 +1,8 @@ +FROM jupyter/minimal-notebook:latest + +# install apptainer or docker + + +# install ngshare stuff +COPY nbgrader_config.py /etc/jupyter/nbgrader_config.py +RUN python3 -m pip install ngshare_exchange diff --git a/vagrant/README.md b/vagrant/README.md new file mode 100644 index 0000000..015e6ec --- /dev/null +++ b/vagrant/README.md @@ -0,0 +1,200 @@ +# k8s + +Try to run +- JupyterHub for Kubernetes +- nbgrader +- own conda/pip deps +- run containerized ewatercycle models +- inside vagrant with hyper-v +- micro8ks as kubernetes deployment + +## Boot + +```shell +vagrant up +``` + +https://jet.dev/blog/spin-up-local-kubernetes-cluster-agrant/ + +``` +vagrant ssh microk8s_a +ip route | grep default | grep eth0 | cut -d' ' -f9 +172.19.226.152 +vagrant ssh microk8s_b +ip route | grep default | grep eth0 | cut -d' ' -f9 +172.19.234.158 + +#a +sudo -i +echo "172.19.234.158 microk8s-b" >> /etc/hosts +exit + +microk8s add-node + +#b +microk8s join 172.19.226.152:25000/13a97d6ef692d3eadb078866e78f2acd/6da73c5b9623 + +#a +microk8s kubectl get nodes + +microk8s enable metallb +# Use range inside hyperv default switch +172.19.231.83-172.19.231.93 +microk8s enable hostpath-storage +``` +# NFS + +https://microk8s.io/docs/how-to-nfs + +``` +sudo apt-get install nfs-kernel-server +sudo mkdir -p /srv/nfs +sudo chown nobody:nogroup /srv/nfs +sudo chmod 0777 /srv/nfs +sudo mv /etc/exports /etc/exports.bak +echo '/srv/nfs 172.19.0.0/16(rw,sync,no_subtree_check)' | sudo tee /etc/exports +sudo systemctl restart nfs-kernel-server +microk8s enable helm3 +microk8s helm3 repo add csi-driver-nfs https://raw.githubusercontent.com/kubernetes-csi/csi-driver-nfs/master/charts +microk8s helm3 repo update +microk8s helm3 install csi-driver-nfs csi-driver-nfs/csi-driver-nfs \ + --namespace kube-system \ + --set kubeletDir=/var/snap/microk8s/common/var/lib/kubelet +microk8s kubectl wait pod --selector app.kubernetes.io/name=csi-driver-nfs --for condition=ready --namespace kube-system +microk8s kubectl get csidrivers +``` + +``` + cat sc-nfs.yaml +# sc-nfs.yaml +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: nfs-csi +provisioner: nfs.csi.k8s.io +parameters: + server: 172.19.226.152 + share: /srv/nfs +reclaimPolicy: Delete +volumeBindingMode: Immediate +mountOptions: + - hard + - nfsvers=4.1 +``` + +``` +microk8s kubectl apply -f - < sc-nfs.yaml +``` + +``` +# pvc-nfs.yaml +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: my-pvc +spec: + storageClassName: nfs-csi + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 5Gi +``` + +``` +microk8s kubectl apply -f - < pvc-nfs.yaml +microk8s kubectl describe pvc my-pvc +``` + +After login to JH /home/shared is mounted nfs volume + +# Setup JH + +https://z2jh.jupyter.org/en/stable/jupyterhub/installation.html + +Use namespace `teach` and helm release name `teach1`. + +``` +touch config.yaml +microk8s helm repo add jupyterhub https://hub.jupyter.org/helm-chart/ +microk8s helm repo update +microk8s helm upgrade --cleanup-on-fail \ + --install teach1 jupyterhub/jupyterhub \ + --namespace teach \ + --create-namespace \ + --version=3.2.1 \ + --values config.yaml + +microk8s kubectl config set-context $(microk8s kubectl config current-context) --namespace teach +microk8s kubectl --namespace teach get service proxy-public + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +proxy-public LoadBalancer 10.152.183.208 172.19.231.83 80:30233/TCP 2m51s +``` + +Login on http://172.19.231.83 with any username:password + +# Update config.yaml + + +``` +microk8s helm upgrade --cleanup-on-fail \ + teach1 jupyterhub/jupyterhub \ + --namespace teach \ + --version=3.2.1 \ + --values config.yaml +``` + +# Dashboard + +``` +microk8s dashboard-proxy +``` + +Goto public ip of node A with port+token in console output. + +# ngshare + +https://ngshare.readthedocs.io/en/latest/user_guide/install_z2jh.html + +``` +microk8s helm repo add ngshare https://libretexts.github.io/ngshare-helm-repo/ +microk8s helm repo update +``` + +``` + cat config.ngshare.yaml +deployment: + # Resource limitations for the pod + resources: + limits: + cpu: 100m + memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + +ngshare: + hub_api_token: demo_token_9wRp0h4BLzAnC88jjBfpH0fa4QV9tZNI + # Please change the line below with the namespace your Z2JH helm chart is installed under + # You can omit this value if you're installing ngshare in the same namespace + hub_api_url: http://hub.teach.svc.cluster.local:8081/hub/api + admins: + - sverhoeven + +pvc: + # Amount of storage to allocate + storage: 1Gi + ``` + +``` +microk8s helm install ngshare ngshare/ngshare --namespace teach -f config.ngshare.yaml +``` + +# TODO + +- ewatercycle image +- nbgrader +- run model in container + - singleuser container can run apptainer/podman inside + - https://www.redhat.com/sysadmin/podman-inside-kubernetes \ No newline at end of file diff --git a/vagrant/Vagrantfile b/vagrant/Vagrantfile new file mode 100644 index 0000000..a7a8e40 --- /dev/null +++ b/vagrant/Vagrantfile @@ -0,0 +1,28 @@ +Vagrant.configure("2") do |config| + config.vm.box = "generic/ubuntu2204" + config.vm.provider "hyperv" + config.vm.network "public_network", use_dhcp_assigned_default_route: true + config.vm.provider "hyperv" do |h| + h.maxmemory = 8096 + h.cpus = 4 + h.enable_virtualization_extensions = true + h.linked_clone = true + end + config.vm.provision "shell", inline: <<-EOF + snap install microk8s --classic + microk8s status --wait-ready + usermod -a -G microk8s vagrant + EOF + config.vm.define "microk8s_a" do |microk8s_a| + microk8s_a.vm.hostname = "microk8s-a" + microk8s_a.vm.provider "virtualbox" do |vb| + vb.name = "microk8s-a" + end + end + config.vm.define "microk8s_b" do |microk8s_b| + microk8s_b.vm.hostname = "microk8s-b" + microk8s_b.vm.provider "virtualbox" do |vb| + vb.name = "microk8s-b" + end + end +end diff --git a/vagrant/config.yaml.example b/vagrant/config.yaml.example new file mode 100644 index 0000000..8623d89 --- /dev/null +++ b/vagrant/config.yaml.example @@ -0,0 +1,79 @@ +# This file can update the JupyterHub Helm chart's default configuration values. +# +# For reference see the configuration reference and default values, but make +# sure to refer to the Helm chart version of interest to you! +# +# Introduction to YAML: https://www.youtube.com/watch?v=cdLNKUoMc6c +# Chart config reference: https://zero-to-jupyterhub.readthedocs.io/en/stable/resources/reference.html +# Chart default values: https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/HEAD/jupyterhub/values.yaml +# Available chart versions: https://hub.jupyter.org/helm-chart/ +# +hub: +# containerSecurityContext: +# privigleged: true +# runAsUser: 1000 # default user id in jupyter images + config: + Authenticator: + admin_user: + - sverhoeven + allowed_users: + - sverhoeven + GitHubOAuthenticator: + client_id: + client_secret: + oauth_callback_url: http:///hub/oauth_callback + JupyterHub: + authenticator_class: github + allowed_organizations: + - eWaterCycle + scope: + - read:org + extraConfig: + ngshare.py: | + c.JupyterHub.services.append({ + 'name': 'ngshare', + 'url': 'http://ngshare.teach.svc.cluster.local:8080', + 'api_token': '3VEgEzkhFkQsdZNI7zhnyMW6U0a2xsZq', + 'oauth_no_confirm': True}) +singleuser: + image: + # You should replace the "latest" tag with a fixed version from: + # https://hub.docker.com/r/jupyter/datascience-notebook/tags/ + # Inspect the Dockerfile at: + # https://github.com/jupyter/docker-stacks/tree/HEAD/datascience-notebook/Dockerfile + name: jupyter/datascience-notebook + tag: latest + storage: + extraVolumes: + - name: jupyterhub-shared + persistentVolumeClaim: + claimName: my-pvc + extraVolumeMounts: + - name: jupyterhub-shared + mountPath: /home/shared + extraFiles: + # jupyter_notebook_config reference: https://jupyter-notebook.readthedocs.io/en/stable/config.html + jupyter_notebook_config.json: + mountPath: /etc/jupyter/jupyter_notebook_config.json + # data is a YAML structure here but will be rendered to JSON file as our + # file extension is ".json". + data: + MappingKernelManager: + # cull_idle_timeout: timeout (in seconds) after which an idle kernel is + # considered ready to be culled + cull_idle_timeout: 1200 # default: 0 + + # cull_interval: the interval (in seconds) on which to check for idle + # kernels exceeding the cull timeout value + cull_interval: 120 # default: 300 + + # cull_connected: whether to consider culling kernels which have one + # or more connections + cull_connected: true # default: false + + # cull_busy: whether to consider culling kernels which are currently + # busy running some code + cull_busy: false # default: false + networkPolicy: + egressAllowRules: + privateIPs: true \ No newline at end of file diff --git a/vagrant/nbgrader_config.py b/vagrant/nbgrader_config.py new file mode 100644 index 0000000..97e133e --- /dev/null +++ b/vagrant/nbgrader_config.py @@ -0,0 +1,6 @@ +from ngshare_exchange import configureExchange +c=get_config() +configureExchange(c, 'http://ngshare.teach.svc.cluster.local:8080/services/ngshare') +# Add the following line to let students access courses without configuration +# For more information, read Notes for Instructors in the documentation +c.CourseDirectory.course_id = '*' From b184238d778dbc814fb30155a39ff9ba6ab6426e Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Tue, 23 Jan 2024 14:55:48 +0100 Subject: [PATCH 2/7] More todos --- vagrant/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vagrant/Dockerfile b/vagrant/Dockerfile index bd44b6b..2a1c6e5 100644 --- a/vagrant/Dockerfile +++ b/vagrant/Dockerfile @@ -6,3 +6,7 @@ FROM jupyter/minimal-notebook:latest # install ngshare stuff COPY nbgrader_config.py /etc/jupyter/nbgrader_config.py RUN python3 -m pip install ngshare_exchange + +# install ewatercycle + its deps + +# install extra jupyter extensions like lsp \ No newline at end of file From 7407dc007748e8c7686373fceab3df25677ec101 Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Wed, 24 Jan 2024 12:39:11 +0100 Subject: [PATCH 3/7] Use custom image --- vagrant/.gitignore | 1 + vagrant/Dockerfile | 38 ++++++++++++++++++++++++++++++++++--- vagrant/README.md | 19 ++++++++++++------- vagrant/config.yaml.example | 11 +++++++---- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/vagrant/.gitignore b/vagrant/.gitignore index 5b6b072..48b8928 100644 --- a/vagrant/.gitignore +++ b/vagrant/.gitignore @@ -1 +1,2 @@ config.yaml +.vagrant/ diff --git a/vagrant/Dockerfile b/vagrant/Dockerfile index 2a1c6e5..927344c 100644 --- a/vagrant/Dockerfile +++ b/vagrant/Dockerfile @@ -1,12 +1,44 @@ +# docker build -t ghcr.io/ewatercycle/teaching:latest . +# docker run -ti --rm --privileged ghcr.io/ewatercycle/teaching:latest bash + +# Folowing does not work +# docker run -ti --rm --cap-add SYS_ADMIN --cap-add MKNOD --cap-add SYS_CHROOT --cap-add SETFCAP ghcr.io/ewatercycle/teaching:latest bash +# apptainer run docker://ghcr.io/ewatercycle/leakybucket-grpc4bmi:v0.0.1 +# FATAL: container creation failed: mount hook function failure: mount /proc/self/fd/3->/var/lib/apptainer/mnt/session/rootfs error: while mounting image /proc/self/fd/3: squashfuse_ll exited with status 1: fuse: device not found, try 'modprobe fuse' first + +# apptainer pull docker://ghcr.io/ewatercycle/leakybucket-grpc4bmi:v0.0.1 +# ipython +# from grpc4bmi.bmi_client_apptainer import BmiClientApptainer +# model = BmiClientApptainer('docker://ghcr.io/ewatercycle/leakybucket-grpc4bmi:v0.0.1', work_dir='/tmp') +# model.get_component_name() +# del model + +# echo $CR_PAT | docker login ghcr.io -u sverhoeven --password-stdin +# docker push ghcr.io/ewatercycle/teaching:latest + FROM jupyter/minimal-notebook:latest -# install apptainer or docker +LABEL org.opencontainers.image.source=https://github.com/eWaterCycle/teaching +LABEL org.opencontainers.image.description="eWatercycle teaching" +LABEL org.opencontainers.image.licenses=Apache-2.0 + +USER root +# install apptainer or podman +# RUN apt update && apt install -y podman && ln -s /usr/bin/podman /usr/bin/docker +RUN apt update && apt install -y libfuse2 uidmap squashfs-tools squashfuse fuse2fs fuse-overlayfs fakeroot +RUN wget https://github.com/apptainer/apptainer/releases/download/v1.2.5/apptainer_1.2.5_amd64.deb && \ +dpkg -i apptainer_1.2.5_amd64.deb && rm apptainer_1.2.5_amd64.deb # install ngshare stuff COPY nbgrader_config.py /etc/jupyter/nbgrader_config.py -RUN python3 -m pip install ngshare_exchange +# need fork of ngshare to work with latest jupyter +RUN mamba install -y nbgrader && pip install git+https://github.com/lauri3k/ngshare_exchange.git#master # install ewatercycle + its deps +RUN mamba install -y esmvaltool-python && pip install ewatercycle-leakybucket +# TODO other models and utils + +# TODO install extra jupyter extensions like lsp -# install extra jupyter extensions like lsp \ No newline at end of file +USER 1000 diff --git a/vagrant/README.md b/vagrant/README.md index 015e6ec..c43a7b7 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -7,6 +7,7 @@ Try to run - run containerized ewatercycle models - inside vagrant with hyper-v - micro8ks as kubernetes deployment +- mount dcache ## Boot @@ -19,20 +20,20 @@ https://jet.dev/blog/spin-up-local-kubernetes-cluster-agrant/ ``` vagrant ssh microk8s_a ip route | grep default | grep eth0 | cut -d' ' -f9 -172.19.226.152 +172.19.224.255 vagrant ssh microk8s_b ip route | grep default | grep eth0 | cut -d' ' -f9 -172.19.234.158 +172.19.224.136 #a sudo -i -echo "172.19.234.158 microk8s-b" >> /etc/hosts +echo "172.19.224.136 microk8s-b" >> /etc/hosts exit microk8s add-node #b -microk8s join 172.19.226.152:25000/13a97d6ef692d3eadb078866e78f2acd/6da73c5b9623 +microk8s join 172.19.224.255:25000/ca5b88a8fce3cd45bff6aa8eb435d140/699b0df3a535 #a microk8s kubectl get nodes @@ -41,13 +42,14 @@ microk8s enable metallb # Use range inside hyperv default switch 172.19.231.83-172.19.231.93 microk8s enable hostpath-storage + microk8s enable rbac # As advised at https://z2jh.jupyter.org/en/stable/administrator/security.html#use-role-based-access-control-rbac ``` # NFS https://microk8s.io/docs/how-to-nfs ``` -sudo apt-get install nfs-kernel-server +sudo apt-get install -y nfs-kernel-server sudo mkdir -p /srv/nfs sudo chown nobody:nogroup /srv/nfs sudo chmod 0777 /srv/nfs @@ -74,7 +76,7 @@ metadata: name: nfs-csi provisioner: nfs.csi.k8s.io parameters: - server: 172.19.226.152 + server: 172.19.224.255 share: /srv/nfs reclaimPolicy: Delete volumeBindingMode: Immediate @@ -197,4 +199,7 @@ microk8s helm install ngshare ngshare/ngshare --namespace teach -f config.ngsh - nbgrader - run model in container - singleuser container can run apptainer/podman inside - - https://www.redhat.com/sysadmin/podman-inside-kubernetes \ No newline at end of file + - https://www.redhat.com/sysadmin/podman-inside-kubernetes +- mount dcache + https://github.com/wunderio/csi-rclone + https://github.com/simplyzee/kube-rclone diff --git a/vagrant/config.yaml.example b/vagrant/config.yaml.example index 8623d89..4a7ae94 100644 --- a/vagrant/config.yaml.example +++ b/vagrant/config.yaml.example @@ -9,9 +9,6 @@ # Available chart versions: https://hub.jupyter.org/helm-chart/ # hub: -# containerSecurityContext: -# privigleged: true -# runAsUser: 1000 # default user id in jupyter images config: Authenticator: admin_user: @@ -28,6 +25,11 @@ hub: - eWaterCycle scope: - read:org + # TODO fix below, as is jupyterhub gives error when starting server + # KubeSpawner: + # container_security_context: + # privileged: true # TODO less privileged + # runAsUser: 1000 extraConfig: ngshare.py: | c.JupyterHub.services.append({ @@ -41,7 +43,8 @@ singleuser: # https://hub.docker.com/r/jupyter/datascience-notebook/tags/ # Inspect the Dockerfile at: # https://github.com/jupyter/docker-stacks/tree/HEAD/datascience-notebook/Dockerfile - name: jupyter/datascience-notebook + # name: jupyter/datascience-notebook + name: ghcr.io/ewatercycle/teaching tag: latest storage: extraVolumes: From 42812562039cbba39b53828ac64aa0fd363c974d Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Wed, 24 Jan 2024 13:28:26 +0100 Subject: [PATCH 4/7] More todos --- vagrant/README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vagrant/README.md b/vagrant/README.md index c43a7b7..eaf95e2 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -136,7 +136,7 @@ proxy-public LoadBalancer 10.152.183.208 172.19.231.83 80:30233/TCP 2m Login on http://172.19.231.83 with any username:password -# Update config.yaml +# Upgrade config.yaml ``` @@ -193,10 +193,13 @@ pvc: microk8s helm install ngshare ngshare/ngshare --namespace teach -f config.ngshare.yaml ``` + # TODO - ewatercycle image -- nbgrader +- nbgrader with ngshare +- for ngshare-course-management cli create web gui, like a jupyterlab extension, as cli might be too complex for teachers +- instead of ngshare use nfs volume thats read/write for all users. Is less secure but easier to use. - run model in container - singleuser container can run apptainer/podman inside - https://www.redhat.com/sysadmin/podman-inside-kubernetes From 480f9f3e3def397e90aeb5467ec4f31bef1f5d8e Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Mon, 29 Jan 2024 13:44:48 +0100 Subject: [PATCH 5/7] privileged=true can start apptainer container --- vagrant/README.md | 69 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/vagrant/README.md b/vagrant/README.md index eaf95e2..bc14dc4 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -20,27 +20,26 @@ https://jet.dev/blog/spin-up-local-kubernetes-cluster-agrant/ ``` vagrant ssh microk8s_a ip route | grep default | grep eth0 | cut -d' ' -f9 -172.19.224.255 +172.26.149.178 vagrant ssh microk8s_b ip route | grep default | grep eth0 | cut -d' ' -f9 -172.19.224.136 +172.26.146.78 #a sudo -i -echo "172.19.224.136 microk8s-b" >> /etc/hosts +echo "172.26.146.78 microk8s-b" >> /etc/hosts exit microk8s add-node #b -microk8s join 172.19.224.255:25000/ca5b88a8fce3cd45bff6aa8eb435d140/699b0df3a535 +microk8s join 172.26.149.178:25000/ca5b88a8fce3cd45bff6aa8eb435d140/699b0df3a535 #a microk8s kubectl get nodes -microk8s enable metallb # Use range inside hyperv default switch -172.19.231.83-172.19.231.93 +microk8s enable metallb:172.26.145.83-172.26.145.93 microk8s enable hostpath-storage microk8s enable rbac # As advised at https://z2jh.jupyter.org/en/stable/administrator/security.html#use-role-based-access-control-rbac ``` @@ -49,12 +48,14 @@ microk8s enable hostpath-storage https://microk8s.io/docs/how-to-nfs ``` +# on a +sudo apt update sudo apt-get install -y nfs-kernel-server sudo mkdir -p /srv/nfs sudo chown nobody:nogroup /srv/nfs sudo chmod 0777 /srv/nfs sudo mv /etc/exports /etc/exports.bak -echo '/srv/nfs 172.19.0.0/16(rw,sync,no_subtree_check)' | sudo tee /etc/exports +echo '/srv/nfs 172.0.0.0/8(rw,sync,no_subtree_check)' | sudo tee /etc/exports sudo systemctl restart nfs-kernel-server microk8s enable helm3 microk8s helm3 repo add csi-driver-nfs https://raw.githubusercontent.com/kubernetes-csi/csi-driver-nfs/master/charts @@ -76,7 +77,7 @@ metadata: name: nfs-csi provisioner: nfs.csi.k8s.io parameters: - server: 172.19.224.255 + server: 172.26.149.178 share: /srv/nfs reclaimPolicy: Delete volumeBindingMode: Immediate @@ -193,6 +194,58 @@ pvc: microk8s helm install ngshare ngshare/ngshare --namespace teach -f config.ngshare.yaml ``` +## Container in container + +``` +# fuse-device-plugin.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: fuse-device-plugin-daemonset + namespace: kube-system +spec: + selector: + matchLabels: + name: fuse-device-plugin-ds + template: + metadata: + labels: + name: fuse-device-plugin-ds + spec: + hostNetwork: true + containers: + - image: soolaugust/fuse-device-plugin:v1.0 + name: fuse-device-plugin-ctr + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + imagePullSecrets: + - name: registry-secret +``` + +``` +microk8s kubectl apply -f - < fuse-device-plugin.yaml +``` + +``` +apptainer run docker://alpine:latest cat /etc/os-release +apptainer run docker://ghcr.io/ewatercycle/leakybucket-grpc4bmi:v0.0.1 +``` + +```python +from grpc4bmi.bmi_client_apptainer import BmiClientApptainer +model = BmiClientApptainer('docker://ghcr.io/ewatercycle/leakybucket-grpc4bmi:v0.0.1', work_dir='/tmp') +model.get_component_name() +del model +``` # TODO From b359590a7d3b236e4c370c9f03a0962eecda0bb7 Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Tue, 6 Feb 2024 16:02:59 +0100 Subject: [PATCH 6/7] Some tinkering with capabilities and rclone --- apptainer.yaml | 23 +++++++++++++ rclone.yaml.example | 67 +++++++++++++++++++++++++++++++++++++ vagrant/.gitignore | 1 + vagrant/README.md | 66 +++++++++++++++++++++++++++++++----- vagrant/Vagrantfile | 4 ++- vagrant/config.yaml.example | 40 ++++++++++++++++++---- 6 files changed, 184 insertions(+), 17 deletions(-) create mode 100644 apptainer.yaml create mode 100644 rclone.yaml.example diff --git a/apptainer.yaml b/apptainer.yaml new file mode 100644 index 0000000..1552382 --- /dev/null +++ b/apptainer.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Pod +metadata: + name: apptainer +spec: + securityContext: + runAsUser: 1000 + runAsNonRoot: true + restartPolicy: Never + containers: + - name: apptainercontainer + image: ghcr.io/ewatercycle/teaching:latest + # command: + # - apptainer + # - run + # - docker://alpine:latest cat /etc/os-release + securityContext: + allowPrivilegeEscalation: true + privileged: true + resources: + limits: + cpu: "1" + memory: "4Gi" diff --git a/rclone.yaml.example b/rclone.yaml.example new file mode 100644 index 0000000..5532f11 --- /dev/null +++ b/rclone.yaml.example @@ -0,0 +1,67 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: data-rclone-example + labels: + name: data-rclone-example +spec: + accessModes: + - ReadOnlyMany + storageClassName: rclone + capacity: + storage: 10Gi + csi: + driver: csi-rclone + volumeHandle: data-id + volumeAttributes: + remote: "my-dcache" + remotePath: "/" + # TODO pass --read-only and --cache-dir + configData: | + [my-dcache] + type = webdav + url = https://webdav.grid.surfsara.nl:2880 + vendor = other + user = + pass = + bearer_token = .... +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: data-rclone-example +spec: + accessModes: + - ReadOnlyMany + resources: + requests: + storage: 10Gi + storageClassName: rclone + selector: + matchLabels: + name: data-rclone-example + +# From ewc infra +# # /usr/bin/rclone mount \ +# --config=/etc/rclone.conf \ +# --allow-other \ +# --read-only \ +# --cache-dir {{ rclone_cache_dir }} \ +# --vfs-cache-max-size {{ rclone_max_gsize }}G\ +# --vfs-cache-mode full \ +# {{ dcache_rclone_name }}:{{ dcache_root }} {{ data_root }} + +# From jupyter server log + # rclone mount +# my-dcache:/ +# /var/snap/microk8s/common/var/lib/kubelet/pods/81d7b6af-b58d-4eef-87f3-e97a0afec474/volumes/kubernetes.io~csi/data-rclone-example/mount +# --daemon +# --config /tmp/rclone.conf3139822866 +# --dir-cache-time=5s --vfs-cache-mode=writes +# --allow-non-empty=true --allow-other=true +# --cache-info-age=72h --cache-chunk-clean-interval=15m + +# Warning FailedMount 21s kubelet +# MountVolume.SetUp failed for volume "data-rclone-example" : rpc error: code = Internal desc = mounting failed: exit status 1 cmd: +# 'rclone' remote: 'my-dcache:/' targetpath: /var/snap/microk8s/common/var/lib/kubelet/pods/ae09680d-d77b-462a-8146-61d817ac3451/volumes/kubernetes.io~csi/data-rclone-example/mount +# output: "2024/02/06 14:36:37 Fatal error: mount not ready\n" \ No newline at end of file diff --git a/vagrant/.gitignore b/vagrant/.gitignore index 48b8928..02f25b1 100644 --- a/vagrant/.gitignore +++ b/vagrant/.gitignore @@ -1,2 +1,3 @@ config.yaml .vagrant/ +rclone.yaml diff --git a/vagrant/README.md b/vagrant/README.md index bc14dc4..63eecc4 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -12,6 +12,7 @@ Try to run ## Boot ```shell +winget install Hashicorp.Vagrant --version 2.4.0 vagrant up ``` @@ -20,28 +21,29 @@ https://jet.dev/blog/spin-up-local-kubernetes-cluster-agrant/ ``` vagrant ssh microk8s_a ip route | grep default | grep eth0 | cut -d' ' -f9 -172.26.149.178 +172.31.180.222 vagrant ssh microk8s_b ip route | grep default | grep eth0 | cut -d' ' -f9 -172.26.146.78 +172.31.178.224 #a sudo -i -echo "172.26.146.78 microk8s-b" >> /etc/hosts +echo "172.31.178.224 microk8s-b" >> /etc/hosts exit microk8s add-node #b -microk8s join 172.26.149.178:25000/ca5b88a8fce3cd45bff6aa8eb435d140/699b0df3a535 +microk8s join 172.31.180.222:25000/ca5b88a8fce3cd45bff6aa8eb435d140/699b0df3a535 #a microk8s kubectl get nodes # Use range inside hyperv default switch -microk8s enable metallb:172.26.145.83-172.26.145.93 +microk8s enable metallb:172.31.180.200-172.31.180.221 microk8s enable hostpath-storage - microk8s enable rbac # As advised at https://z2jh.jupyter.org/en/stable/administrator/security.html#use-role-based-access-control-rbac +# As advised at https://z2jh.jupyter.org/en/stable/administrator/security.html#use-role-based-access-control-rbac +microk8s enable rbac ``` # NFS @@ -77,7 +79,7 @@ metadata: name: nfs-csi provisioner: nfs.csi.k8s.io parameters: - server: 172.26.149.178 + server: 172.31.180.222 share: /srv/nfs reclaimPolicy: Delete volumeBindingMode: Immediate @@ -166,7 +168,7 @@ microk8s helm repo update ``` ``` - cat config.ngshare.yaml +# cat config.ngshare.yaml deployment: # Resource limitations for the pod resources: @@ -188,12 +190,22 @@ ngshare: pvc: # Amount of storage to allocate storage: 1Gi - ``` +``` ``` microk8s helm install ngshare ngshare/ngshare --namespace teach -f config.ngshare.yaml ``` +In Jupyter terminal as admin: + +``` +ngshare-course-management create_course hydrology101 sverhoeven +echo "c.CourseDirectory.course_id = 'hydrology101'" > nbgrader_config.py +# restart server +``` + +Gives permission denied error. Maybe token or admins are wrong. + ## Container in container ``` @@ -247,6 +259,42 @@ model.get_component_name() del model ``` +### Try to run apptainer container inside a pod without Jupyter distractions. + +``` +microk8s kubectl apply -f apptainer.yaml +microk8s kubectl describe pod apptainer +microk8s kubectl get pods apptainer +microk8s kubectl logs apptainer +microk8s kubectl exec -it apptainer -- apptainer run docker://alpine:latest cat /etc/os-release +# should output alpine os release +microk8s kubectl delete pod apptainer +``` + +# dcache / rclone + +Mount on a machine. + +``` +./rclone-v1.65.2-linux-amd64/rclone mount my-dcache:/ /home/vagrant/rr/data --config ./c --read-only +``` +This works + +Try https://github.com/wunderio/csi-rclone + +``` +# on a +git clone https://github.com/wunderio/csi-rclone.git +cd csi-rclone +microk8s kubectl apply -f deploy/kubernetes/1.19 +microk8s kubectl apply -f < rclone.yaml +``` + +did not work + +try https://github.com/simplyzee/kube-rclone next + + # TODO - ewatercycle image diff --git a/vagrant/Vagrantfile b/vagrant/Vagrantfile index a7a8e40..cacb947 100644 --- a/vagrant/Vagrantfile +++ b/vagrant/Vagrantfile @@ -1,6 +1,7 @@ Vagrant.configure("2") do |config| config.vm.box = "generic/ubuntu2204" config.vm.provider "hyperv" + # config.vm.box_version = "4.3.10" config.vm.network "public_network", use_dhcp_assigned_default_route: true config.vm.provider "hyperv" do |h| h.maxmemory = 8096 @@ -9,7 +10,8 @@ Vagrant.configure("2") do |config| h.linked_clone = true end config.vm.provision "shell", inline: <<-EOF - snap install microk8s --classic + apt reinstall -y snapd + snap install microk8s --classic --stable microk8s status --wait-ready usermod -a -G microk8s vagrant EOF diff --git a/vagrant/config.yaml.example b/vagrant/config.yaml.example index 4a7ae94..adba788 100644 --- a/vagrant/config.yaml.example +++ b/vagrant/config.yaml.example @@ -16,9 +16,9 @@ hub: allowed_users: - sverhoeven GitHubOAuthenticator: - client_id: - client_secret: - oauth_callback_url: http:///hub/oauth_callback + client_id: .... + client_secret: .... + oauth_callback_url: http://172.31.180.200/hub/oauth_callback JupyterHub: authenticator_class: github allowed_organizations: @@ -26,10 +26,30 @@ hub: scope: - read:org # TODO fix below, as is jupyterhub gives error when starting server - # KubeSpawner: - # container_security_context: - # privileged: true # TODO less privileged - # runAsUser: 1000 + # do `Rootless Podman with the privileged flag set` chapter at + # https://www.redhat.com/sysadmin/podman-inside-kubernetes + KubeSpawner: + privileged: true + allow_privilege_escalation: true + container_security_context: + privileged: true # TODO less privileged + runAsUser: 1000 + + # Got ERROR : Failed to set mount propagation: Permission denied with: + # securityContext: + # allowPrivilegeEscalation: true + # capabilities: + # add: + # - "SYS_ADMIN" + # - "MKNOD" + # - "SYS_CHROOT" + # - "SETFCAP" + # resources: + # limits: + # github.com/fuse: 1 + + # extra_resource_limits: + # github.com/fuse: 1 extraConfig: ngshare.py: | c.JupyterHub.services.append({ @@ -51,10 +71,16 @@ singleuser: - name: jupyterhub-shared persistentVolumeClaim: claimName: my-pvc + # - name: dcache-ro + # persistentVolumeClaim: + # claimName: data-rclone-example extraVolumeMounts: - name: jupyterhub-shared mountPath: /home/shared + # - name: dcache-ro + # mountPath: /home/data extraFiles: + # TODO add /etc/jupyter/nbgrader_config.py here see https://ngshare.readthedocs.io/en/latest/user_guide/install_z2jh.html#installing-ngshare-exchange # jupyter_notebook_config reference: https://jupyter-notebook.readthedocs.io/en/stable/config.html jupyter_notebook_config.json: mountPath: /etc/jupyter/jupyter_notebook_config.json From ba90410e77adaa3dcd749aef3d53b168365cc60c Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Fri, 21 Jun 2024 16:34:16 +0200 Subject: [PATCH 7/7] Tried to run model container and connect from notebook. Sadly could not get network to work --- vagrant/README.md | 91 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/vagrant/README.md b/vagrant/README.md index 63eecc4..a6143fc 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -146,7 +146,7 @@ Login on http://172.19.231.83 with any username:password microk8s helm upgrade --cleanup-on-fail \ teach1 jupyterhub/jupyterhub \ --namespace teach \ - --version=3.2.1 \ + --version=3.3.7 \ --values config.yaml ``` @@ -307,3 +307,92 @@ try https://github.com/simplyzee/kube-rclone next - mount dcache https://github.com/wunderio/csi-rclone https://github.com/simplyzee/kube-rclone + +# Spinup leaky bucket bmi container + +And try to connect to it from a Jupyter notebook running in another pod +and get files from noetbook pod to model pod + +```sh +cat < leakybucket-bmi-pod.yaml +apiVersion: v1 +kind: Pod +metadata: + name: leakybucket-bmi-pod + labels: + app: leakybucket +spec: + containers: + - name: leakybucket-bmi-container + image: ghcr.io/ewatercycle/leakybucket-grpc4bmi:v0.0.1 + ports: + - containerPort: 55555 +EOF +``` + +``` +cat < leakybucket-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: leakybucket-service + namespace: teach +spec: + selector: + app: leakybucket + ports: + - protocol: TCP + port: 55555 + targetPort: 55555 + name: grpc +EOF +``` + +``` +cat < allow-jupyter-me-to-leakybucket.yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-jupyter-me-to-leakybucket + namespace: teach # Assuming the leakybucket service and the Jupyter pod are in the same namespace +spec: + podSelector: + matchLabels: + app: leakybucket # This selects the pods targeted by the leakybucket service + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + hub.jupyter.org/username: me # This allows pods with this label to connect + ports: + - protocol: TCP + port: 55555 # Assuming you want to allow access to the port 55555 which the leakybucket service exposes +EOF +``` + +```shell +microk8s kubectl apply -f leakybucket-bmi-pod.yaml +microk8s kubectl apply -f leakybucket-service.yaml +microk8s kubectl apply -f allow-jupyter-me-to-leakybucket.yaml +``` + +```shell +microk8s kubectl get pods +microk8s kubectl get svc leakybucket-service +microk8s kubectl get pod leakybucket-bmi-pod -o=jsonpath='{.status.podIP}' +``` + +Run on machine running model or in notebook with +```python +!pip install --user grpcbmi +from grpc4bmi.bmi_grpc_client import BmiClient +import grpc +u = "leakybucket-service.teach.svc.cluster.local:55555" +mymodel = BmiClient(grpc.insecure_channel(u)) +# works from machine but not from notebook +# TODO unable to connect, connection timesout +myname = mymodel.get_component_name() +myname +``` \ No newline at end of file