diff --git a/docs/examples/python_sdk_examples.ipynb b/docs/examples/python_sdk_examples.ipynb new file mode 100644 index 00000000..a3cf38d8 --- /dev/null +++ b/docs/examples/python_sdk_examples.ipynb @@ -0,0 +1,362 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to use the KServe Python SDK with ModelMesh\n", + "\n", + "This sample assumes ModelMesh Serving was deployed using the [quickstart guide](https://github.com/kserve/modelmesh-serving/blob/main/docs/quickstart.md)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from kubernetes import client\n", + "from kserve import constants\n", + "from kserve import V1beta1InferenceService\n", + "from kserve import V1beta1InferenceServiceSpec\n", + "from kserve import V1beta1PredictorSpec\n", + "from kserve import V1beta1SKLearnSpec\n", + "from kserve import V1beta1TFServingSpec\n", + "from kserve import V1beta1StorageSpec\n", + "from kserve import KServeClient" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The variables being set are `namespace`, `name`, `protocol_version`\n", + "- The `namespace` definition is where the InferenceService will be deployed to\n", + "- `name` will be the name of the `InferenceService`\n", + "- For ModelMesh, the [`v2`](https://kserve.github.io/website/master/modelserving/data_plane/v2_protocol/) protocol must be used since it doesn't support the default [`v1`](https://kserve.github.io/website/master/modelserving/data_plane/v1_protocol/) protocol" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "namespace = 'modelmesh-serving'\n", + "name='mnist-sample'\n", + "protocol_version='v2'" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define `InferenceService` specifying ModelMesh as the deploymentMode\n", + "\n", + "While both the KServe controller and ModelMesh controller will reconcile InferenceService resources, the ModelMesh controller will only handle those InferenceServices with the serving.kserve.io/deploymentMode: ModelMesh annotation. Otherwise, the KServe controller will handle reconciliation. The KServe controller will not reconcile an InferenceService with the serving.kserve.io/deploymentMode: ModelMesh annotation, and will defer under the assumption that the ModelMesh controller will handle it." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "isvc = V1beta1InferenceService(\n", + " api_version=constants.KSERVE_V1BETA1,\n", + " kind=constants.KSERVE_KIND,\n", + " metadata=client.V1ObjectMeta(\n", + " name=name, \n", + " namespace=namespace,\n", + " annotations={\n", + " 'serving.kserve.io/deploymentMode': 'ModelMesh'\n", + " }\n", + " ),\n", + " spec=V1beta1InferenceServiceSpec(\n", + " predictor=V1beta1PredictorSpec(\n", + " sklearn=V1beta1SKLearnSpec(\n", + " protocol_version=protocol_version,\n", + " storage=V1beta1StorageSpec(\n", + " key='localMinIO',\n", + " path='sklearn/mnist-svm.joblib'\n", + " )\n", + " )\n", + " )\n", + " )\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create `InferenceService`" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "kserve = KServeClient()\n", + "isvc = kserve.create(isvc)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "name: mnist-sample\n", + "uid: 45aa2662-4507-401e-98fa-aa78e3cea14e\n", + "key: localMinIO\n", + "path: sklearn/mnist-svm.joblib\n" + ] + } + ], + "source": [ + "print(\"name: \", isvc[\"metadata\"][\"name\"])\n", + "print(\"uid: \", isvc[\"metadata\"][\"uid\"])\n", + "print(\"key: \", isvc[\"spec\"][\"predictor\"][\"sklearn\"][\"storage\"][\"key\"])\n", + "print(\"path: \", isvc[\"spec\"][\"predictor\"][\"sklearn\"][\"storage\"][\"path\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check `InferenceService` status after deploying\n", + "It will be in a `Pending` state while the associated serving runtime pods start up." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "deployed_isvc = kserve.get(name, namespace=namespace)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Pending\n" + ] + } + ], + "source": [ + "print(deployed_isvc[\"status\"][\"modelStatus\"][\"states\"][\"activeModelState\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check `InferenceService` status once its ready\n", + "Once the serving runtime pods are started, the `InferenceService` should be in `Loaded` state." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "kserve.wait_isvc_ready(name, namespace=namespace)\n", + "deployed_isvc = kserve.get(name, namespace=namespace)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded\n" + ] + } + ], + "source": [ + "print(deployed_isvc[\"status\"][\"modelStatus\"][\"states\"][\"activeModelState\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Update `InferenceService` and point it to a different model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "updated_spec=V1beta1InferenceServiceSpec(\n", + " predictor=V1beta1PredictorSpec(\n", + " tensorflow=V1beta1TFServingSpec(\n", + " protocol_version=protocol_version,\n", + " storage=V1beta1StorageSpec(\n", + " key='localMinIO',\n", + " path='tensorflow/mnist.savedmodel'\n", + " )\n", + " )\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "updated_isvc = V1beta1InferenceService(api_version= constants.KSERVE_V1BETA1,\n", + " kind=constants.KSERVE_KIND,\n", + " metadata=client.V1ObjectMeta(name=name, namespace=namespace),\n", + " spec=updated_spec)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "replaced_isvc = kserve.replace(name, updated_isvc, namespace=namespace)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "key: localMinIO\n", + "path: tensorflow/mnist.savedmodel\n" + ] + } + ], + "source": [ + "print(\"key: \", replaced_isvc[\"spec\"][\"predictor\"][\"tensorflow\"][\"storage\"][\"key\"])\n", + "print(\"path: \", replaced_isvc[\"spec\"][\"predictor\"][\"tensorflow\"][\"storage\"][\"path\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Wait for the InferenceService to be ready." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "kserve.wait_isvc_ready(name, namespace=namespace)\n", + "updated = kserve.get(name, namespace=namespace)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded\n" + ] + } + ], + "source": [ + "print(updated[\"status\"][\"modelStatus\"][\"states\"][\"activeModelState\"])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Delete `InferenceService`" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "delete_isvc = kserve.delete(name, namespace=namespace)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Success\n" + ] + } + ], + "source": [ + "print(delete_isvc[\"status\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}