specification/protocol/open_inference_rest.yaml

# Copyright 2023 The KServe Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

openapi: 3.0.0
info:
  title: Data Plane
  version: '2.0'
  description: 'https://github.com/kserve/open-inference-protocol/blob/main/specification/protocol/inference_rest.md'
  license:
    name: Apache 2.0
    url:  'https://www.apache.org/licenses/LICENSE-2.0'
servers: []
paths:
  /v2/health/live:
    get:
      summary: Server Live
      responses:
        '200':
          description: OK
      operationId: get-v2-health-live
      description: The “server live” API indicates if the inference server is able to receive and respond to metadata and inference requests. The “server live” API can be used directly to implement the Kubernetes livenessProbe.
  /v2/health/ready:
    get:
      summary: Server Ready
      tags: []
      responses:
        '200':
          description: OK
      operationId: get-v2-health-ready
      description: The “server ready” health API indicates if all the models are ready for inferencing. The “server ready” health API can be used directly to implement the Kubernetes readinessProbe.
  '/v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/ready':
    parameters:
      - schema:
          type: string
        name: MODEL_NAME
        in: path
        required: true
      - schema:
          type: string
        name: MODEL_VERSION
        in: path
        required: true
    get:
      summary: Model Ready
      tags: []
      responses:
        '200':
          description: OK
      operationId: get-v2-models-$-modelName-versions-$-modelVersion-ready
      description: The “model ready” health API indicates if a specific model is ready for inferencing. The model name and (optionally) version must be available in the URL. If a version is not provided the server may choose a version based on its own policies.
  /v2/:
    get:
      summary: Server Metadata
      tags: []
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/metadata_server_response'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/metadata_server_error_response'
      operationId: get-v2
      description: 'The server metadata endpoint provides information about the server. A server metadata request is made with an HTTP GET to a server metadata endpoint. In the corresponding response the HTTP body contains the [Server Metadata Response JSON Object](#server-metadata-response-json-object) or the [Server Metadata Response JSON Error Object](#server-metadata-response-json-error-object).'
  '/v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}':
    parameters:
      - schema:
          type: string
        name: MODEL_NAME
        in: path
        required: true
      - schema:
          type: string
        name: MODEL_VERSION
        in: path
        required: true
    get:
      summary: Model Metadata
      tags: []
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/metadata_model_response'
      operationId: get-v2-models-$-modelName-versions-$-modelVersion
      description: 'The per-model metadata endpoint provides information about a model. A model metadata request is made with an HTTP GET to a model metadata endpoint. In the corresponding response the HTTP body contains the [Model Metadata Response JSON Object](#model-metadata-response-json-object) or the [Model Metadata Response JSON Error Object](#model-metadata-response-json-error-object). The model name and (optionally) version must be available in the URL. If a version is not provided the server may choose a version based on its own policies or return an error.'
  '/v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/infer':
    parameters:
      - schema:
          type: string
        name: MODEL_NAME
        in: path
        required: true
      - schema:
          type: string
        name: MODEL_VERSION
        in: path
        required: true
    post:
      summary: Inference
      operationId: post-v2-models-$-MODEL_NAME-versions-$-MODEL_VERSION-infer
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/inference_response'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/inference_error_response'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/inference_request'
      description: 'An inference request is made with an HTTP POST to an inference endpoint. In the request the HTTP body contains the [Inference Request JSON Object](#inference-request-json-object). In the corresponding response the HTTP body contains the [Inference Response JSON Object](#inference-response-json-object) or [Inference Response JSON Error Object](#inference-response-json-error-object). See [Inference Request Examples](#inference-request-examples) for some example HTTP/REST requests and responses.'
components:
  schemas:
    metadata_server_response:
      title: metadata_server_response
      type: object
      description: ''
      x-examples: {}
      properties:
        name:
          type: string
        version:
          type: string
        extensions:
          type: array
          items:
            type: string
      required:
        - name
        - version
        - extensions
    metadata_server_error_response:
      title: metadata_server_error_response
      type: object
      properties:
        error:
          type: string
      required:
        - error
    metadata_model_response:
      title: metadata_model_response
      type: object
      properties:
        name:
          type: string
        versions:
          type: array
          items:
            type: string
        platform:
          type: string
        inputs:
          type: array
          items:
            $ref: '#/components/schemas/metadata_tensor'
        outputs:
          type: array
          items:
            $ref: '#/components/schemas/metadata_tensor'
        properties:
          type: object
          additionalProperties:
            type: string
      required:
        - name
        - platform
    metadata_tensor:
      title: metadata_tensor
      type: object
      properties:
        name:
          type: string
        datatype:
          type: string
        shape:
          type: array
          items:
            type: integer
      required:
        - name
        - datatype
        - shape
    metadata_model_error_response:
      title: metadata_model_error_response
      type: object
      properties:
        error:
          type: string
      required:
        - error
    inference_request:
      title: inference_request
      type: object
      x-examples:
        Example 1:
          id: '42'
          inputs:
            - name: input0
              shape:
                - 2
                - 2
              datatype: UINT32
              data:
                - 1
                - 2
                - 3
                - 4
            - name: input1
              shape:
                - 3
              datatype: BOOL
              data:
                - true
          outputs:
            - name: output0
        Example 2:
          id: '42'
          outputs:
            - name: output0
              shape:
                - 3
                - 2
              datatype: FP32
              data:
                - 1
                - 1.1
                - 2
                - 2.1
                - 3
                - 3.1
      properties:
        id:
          type: string
        parameters:
          $ref: '#/components/schemas/parameters'
        inputs:
          type: array
          items:
            $ref: '#/components/schemas/request_input'
        outputs:
          type: array
          items:
            $ref: '#/components/schemas/request_output'
      required:
        - inputs
    parameters:
      title: parameters
      x-examples: {}
      type: object
    request_input:
      title: request_input
      type: object
      properties:
        name:
          type: string
        shape:
          type: array
          items:
            type: integer
        datatype:
          type: string
        parameters:
          $ref: '#/components/schemas/parameters'
        data:
          $ref: '#/components/schemas/tensor_data'
      required:
        - name
        - shape
        - datatype
        - data
    tensor_data:
      title: tensor_data
      type: array
      items:
        anyOf:
          - $ref: '#/components/schemas/tensor_data'
          - type: number
          - type: string
          - type: boolean
    request_output:
      title: request_output
      type: object
      properties:
        name:
          type: string
        parameters:
          $ref: '#/components/schemas/parameters'
      required:
        - name
    response_output:
      title: response_output
      type: object
      properties:
        name:
          type: string
        shape:
          type: array
          items:
            type: integer
        datatype:
          type: string
        parameters:
          $ref: '#/components/schemas/parameters'
        data:
          $ref: '#/components/schemas/tensor_data'
      required:
        - name
        - shape
        - datatype
        - data
    inference_response:
      title: inference_response
      type: object
      properties:
        model_name:
          type: string
        model_version:
          type: string
        id:
          type: string
        parameters:
          $ref: '#/components/schemas/parameters'
        outputs:
          type: array
          items:
            $ref: '#/components/schemas/response_output'
      required:
        - model_name
        - outputs
    inference_error_response:
      title: inference_error_response
      type: object
      properties:
        error:
          type: string