Skip to content

Commit

Permalink
Update generate_rest.yaml
Browse files Browse the repository at this point in the history
Signed-off-by: Gavrish Prabhu <[email protected]>
  • Loading branch information
gavrissh authored Jan 16, 2024
1 parent e6977a6 commit 24d9129
Showing 1 changed file with 75 additions and 37 deletions.
112 changes: 75 additions & 37 deletions specification/protocol/generate_rest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,40 @@ info:
version: 1.0.0
components:
schemas:
GenerateErrorResponse:
type: object
required:
- error
properties:
error:
type: string
GenerateParameters:
type: object
additionalProperties: {}
properties:
temperature:
type: number
format: float
default: 1
minimum: 0
description: What sampling temperature to use, higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
top_p:
type: number
format: float
maximum: 1
minimum: 0
description: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
max_tokens:
type: integer
format: int32
default: 20
minimum: 1
description: The maximum number of tokens to generate in the completion.
stop:
type: array
items:
type: string
description: Sequences where the API will stop generating further tokens.
GenerateRequest:
type: object
required:
Expand All @@ -13,36 +47,10 @@ components:
text_input:
type: string
parameters:
$ref: '#/components/schemas/GenerateParameters'
GenerateParameters:
allOf:
$ref: '#/components/schemas/GenerateParameters'
type: object
additionalProperties: {}
properties:
temperature:
type: number
format: float
default: 1
minimum: 0
description: What sampling temperature to use, higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
top_p:
type: number
format: float
maximum: 1
minimum: 0
description: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
max_tokens:
type: integer
format: int32
default: 20
minimum: 1
description: The maximum number of tokens to generate in the completion.
stop:
type: array
items:
type: string
description: Sequences where the API will stop generating further tokens.
allOf:
- $ref: '#/components/schemas/GenerateParameters'
logprob:
type: boolean
GenerateResponse:
type: object
required:
Expand All @@ -55,6 +63,8 @@ components:
type: string
model_version:
type: string
logprobs:
$ref: '#/components/schemas/Logprobs'
GenerateStreamResponse:
type: object
required:
Expand All @@ -69,18 +79,41 @@ components:
type: string
finish_reason:
type: string
GenerateErrorResponse:
logprobs:
$ref: '#/components/schemas/Logprobs'
Logprobs:
type: array
items:
$ref: '#/components/schemas/Token'
Token:
type: object
required:
- error
- id
- text
- logprob
- special
properties:
error:
id:
type: integer
format: int32
minimum: 0
logprob:
type: number
format: float
special:
type: boolean
text:
type: string
paths:
/v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate:
/v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate:
post:
parameters:
- name: model_name
- name: MODEL_NAME
required: true
in: path
schema:
type: string
- name: MODEL_VERSION
required: true
in: path
schema:
Expand Down Expand Up @@ -130,10 +163,15 @@ paths:
example:
error: Incomplete generation

/v2/models/${MODEL_NAME}[/versions/${MODEL_VERSION}]/generate_stream:
/v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate_stream:
post:
parameters:
- name: model_name
- name: MODEL_NAME
required: true
in: path
schema:
type: string
- name: MODEL_VERSION
required: true
in: path
schema:
Expand Down

0 comments on commit 24d9129

Please sign in to comment.