diff --git a/src/resources/beta/assistants.ts b/src/resources/beta/assistants.ts index c0827848e..a24cee045 100644 --- a/src/resources/beta/assistants.ts +++ b/src/resources/beta/assistants.ts @@ -142,6 +142,31 @@ export interface Assistant { */ tools: Array; + /** + * Specifies the format that the model must output. Compatible with + * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. + * + * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the + * message the model generates is valid JSON. + * + * **Important:** when using JSON mode, you **must** also instruct the model to + * produce JSON yourself via a system or user message. Without this, the model may + * generate an unending stream of whitespace until the generation reaches the token + * limit, resulting in a long-running and seemingly "stuck" request. Also note that + * the message content may be partially cut off if `finish_reason="length"`, which + * indicates the generation exceeded `max_tokens` or the conversation exceeded the + * max context length. + */ + response_format?: ThreadsAPI.AssistantResponseFormatOption | null; + + /** + * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + * make the output more random, while lower values like 0.2 will make it more + * focused and deterministic. + */ + temperature?: number | null; + /** * A set of resources that are used by the assistant's tools. The resources are * specific to the type of tool. For example, the `code_interpreter` tool requires @@ -149,6 +174,15 @@ export interface Assistant { * IDs. */ tool_resources?: Assistant.ToolResources | null; + + /** + * An alternative to sampling with temperature, called nucleus sampling, where the + * model considers the results of the tokens with top_p probability mass. So 0.1 + * means only the tokens comprising the top 10% probability mass are considered. + * + * We generally recommend altering this or temperature but not both. + */ + top_p?: number | null; } export namespace Assistant { @@ -1012,7 +1046,7 @@ export interface AssistantCreateParams { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -1158,7 +1192,7 @@ export interface AssistantUpdateParams { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. diff --git a/src/resources/beta/threads/runs/runs.ts b/src/resources/beta/threads/runs/runs.ts index 9e42f8a20..48cfac546 100644 --- a/src/resources/beta/threads/runs/runs.ts +++ b/src/resources/beta/threads/runs/runs.ts @@ -409,7 +409,7 @@ export interface Run { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -446,7 +446,7 @@ export interface Run { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -459,6 +459,10 @@ export interface Run { */ tools: Array; + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ truncation_strategy: Run.TruncationStrategy | null; /** @@ -534,6 +538,10 @@ export namespace Run { } } + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ export interface TruncationStrategy { /** * The truncation strategy to use for the thread. The default is `auto`. If set to @@ -620,7 +628,7 @@ export interface RunCreateParamsBase { * The maximum number of completion tokens that may be used over the course of the * run. The run will make a best effort to use only the number of completion tokens * specified, across multiple turns of the run. If the run exceeds the number of - * completion tokens specified, the run will end with status `complete`. See + * completion tokens specified, the run will end with status `incomplete`. See * `incomplete_details` for more info. */ max_completion_tokens?: number | null; @@ -629,7 +637,7 @@ export interface RunCreateParamsBase { * The maximum number of prompt tokens that may be used over the course of the run. * The run will make a best effort to use only the number of prompt tokens * specified, across multiple turns of the run. If the run exceeds the number of - * prompt tokens specified, the run will end with status `complete`. See + * prompt tokens specified, the run will end with status `incomplete`. See * `incomplete_details` for more info. */ max_prompt_tokens?: number | null; @@ -673,7 +681,7 @@ export interface RunCreateParamsBase { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -706,7 +714,7 @@ export interface RunCreateParamsBase { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -722,9 +730,15 @@ export interface RunCreateParamsBase { * An alternative to sampling with temperature, called nucleus sampling, where the * model considers the results of the tokens with top_p probability mass. So 0.1 * means only the tokens comprising the top 10% probability mass are considered. + * + * We generally recommend altering this or temperature but not both. */ top_p?: number | null; + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ truncation_strategy?: RunCreateParams.TruncationStrategy | null; } @@ -770,6 +784,10 @@ export namespace RunCreateParams { } } + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ export interface TruncationStrategy { /** * The truncation strategy to use for the thread. The default is `auto`. If set to @@ -865,7 +883,7 @@ export interface RunCreateAndPollParams { * The maximum number of completion tokens that may be used over the course of the * run. The run will make a best effort to use only the number of completion tokens * specified, across multiple turns of the run. If the run exceeds the number of - * completion tokens specified, the run will end with status `complete`. See + * completion tokens specified, the run will end with status `incomplete`. See * `incomplete_details` for more info. */ max_completion_tokens?: number | null; @@ -874,7 +892,7 @@ export interface RunCreateAndPollParams { * The maximum number of prompt tokens that may be used over the course of the run. * The run will make a best effort to use only the number of prompt tokens * specified, across multiple turns of the run. If the run exceeds the number of - * prompt tokens specified, the run will end with status `complete`. See + * prompt tokens specified, the run will end with status `incomplete`. See * `incomplete_details` for more info. */ max_prompt_tokens?: number | null; @@ -918,7 +936,7 @@ export interface RunCreateAndPollParams { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -944,7 +962,7 @@ export interface RunCreateAndPollParams { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -960,9 +978,15 @@ export interface RunCreateAndPollParams { * An alternative to sampling with temperature, called nucleus sampling, where the * model considers the results of the tokens with top_p probability mass. So 0.1 * means only the tokens comprising the top 10% probability mass are considered. + * + * We generally recommend altering this or temperature but not both. */ top_p?: number | null; + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ truncation_strategy?: RunCreateAndPollParams.TruncationStrategy | null; } @@ -1008,6 +1032,10 @@ export namespace RunCreateAndPollParams { } } + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ export interface TruncationStrategy { /** * The truncation strategy to use for the thread. The default is `auto`. If set to @@ -1056,7 +1084,7 @@ export interface RunCreateAndStreamParams { * The maximum number of completion tokens that may be used over the course of the * run. The run will make a best effort to use only the number of completion tokens * specified, across multiple turns of the run. If the run exceeds the number of - * completion tokens specified, the run will end with status `complete`. See + * completion tokens specified, the run will end with status `incomplete`. See * `incomplete_details` for more info. */ max_completion_tokens?: number | null; @@ -1065,7 +1093,7 @@ export interface RunCreateAndStreamParams { * The maximum number of prompt tokens that may be used over the course of the run. * The run will make a best effort to use only the number of prompt tokens * specified, across multiple turns of the run. If the run exceeds the number of - * prompt tokens specified, the run will end with status `complete`. See + * prompt tokens specified, the run will end with status `incomplete`. See * `incomplete_details` for more info. */ max_prompt_tokens?: number | null; @@ -1109,7 +1137,7 @@ export interface RunCreateAndStreamParams { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -1135,7 +1163,7 @@ export interface RunCreateAndStreamParams { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -1151,9 +1179,15 @@ export interface RunCreateAndStreamParams { * An alternative to sampling with temperature, called nucleus sampling, where the * model considers the results of the tokens with top_p probability mass. So 0.1 * means only the tokens comprising the top 10% probability mass are considered. + * + * We generally recommend altering this or temperature but not both. */ top_p?: number | null; + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ truncation_strategy?: RunCreateAndStreamParams.TruncationStrategy | null; } @@ -1199,6 +1233,10 @@ export namespace RunCreateAndStreamParams { } } + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ export interface TruncationStrategy { /** * The truncation strategy to use for the thread. The default is `auto`. If set to @@ -1247,7 +1285,7 @@ export interface RunStreamParams { * The maximum number of completion tokens that may be used over the course of the * run. The run will make a best effort to use only the number of completion tokens * specified, across multiple turns of the run. If the run exceeds the number of - * completion tokens specified, the run will end with status `complete`. See + * completion tokens specified, the run will end with status `incomplete`. See * `incomplete_details` for more info. */ max_completion_tokens?: number | null; @@ -1256,7 +1294,7 @@ export interface RunStreamParams { * The maximum number of prompt tokens that may be used over the course of the run. * The run will make a best effort to use only the number of prompt tokens * specified, across multiple turns of the run. If the run exceeds the number of - * prompt tokens specified, the run will end with status `complete`. See + * prompt tokens specified, the run will end with status `incomplete`. See * `incomplete_details` for more info. */ max_prompt_tokens?: number | null; @@ -1300,7 +1338,7 @@ export interface RunStreamParams { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -1326,7 +1364,7 @@ export interface RunStreamParams { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -1342,9 +1380,15 @@ export interface RunStreamParams { * An alternative to sampling with temperature, called nucleus sampling, where the * model considers the results of the tokens with top_p probability mass. So 0.1 * means only the tokens comprising the top 10% probability mass are considered. + * + * We generally recommend altering this or temperature but not both. */ top_p?: number | null; + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ truncation_strategy?: RunStreamParams.TruncationStrategy | null; } @@ -1390,6 +1434,10 @@ export namespace RunStreamParams { } } + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ export interface TruncationStrategy { /** * The truncation strategy to use for the thread. The default is `auto`. If set to diff --git a/src/resources/beta/threads/threads.ts b/src/resources/beta/threads/threads.ts index f3590ed80..6f1e761de 100644 --- a/src/resources/beta/threads/threads.ts +++ b/src/resources/beta/threads/threads.ts @@ -131,7 +131,7 @@ export interface AssistantResponseFormat { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -170,7 +170,7 @@ export interface AssistantToolChoiceFunction { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -511,7 +511,7 @@ export interface ThreadCreateAndRunParamsBase { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -549,7 +549,7 @@ export interface ThreadCreateAndRunParamsBase { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -575,9 +575,15 @@ export interface ThreadCreateAndRunParamsBase { * An alternative to sampling with temperature, called nucleus sampling, where the * model considers the results of the tokens with top_p probability mass. So 0.1 * means only the tokens comprising the top 10% probability mass are considered. + * + * We generally recommend altering this or temperature but not both. */ top_p?: number | null; + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ truncation_strategy?: ThreadCreateAndRunParams.TruncationStrategy | null; } @@ -745,6 +751,10 @@ export namespace ThreadCreateAndRunParams { } } + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ export interface TruncationStrategy { /** * The truncation strategy to use for the thread. The default is `auto`. If set to @@ -854,7 +864,7 @@ export interface ThreadCreateAndRunPollParams { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -885,7 +895,7 @@ export interface ThreadCreateAndRunPollParams { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -911,9 +921,15 @@ export interface ThreadCreateAndRunPollParams { * An alternative to sampling with temperature, called nucleus sampling, where the * model considers the results of the tokens with top_p probability mass. So 0.1 * means only the tokens comprising the top 10% probability mass are considered. + * + * We generally recommend altering this or temperature but not both. */ top_p?: number | null; + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ truncation_strategy?: ThreadCreateAndRunPollParams.TruncationStrategy | null; } @@ -1081,6 +1097,10 @@ export namespace ThreadCreateAndRunPollParams { } } + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ export interface TruncationStrategy { /** * The truncation strategy to use for the thread. The default is `auto`. If set to @@ -1169,7 +1189,7 @@ export interface ThreadCreateAndRunStreamParams { /** * Specifies the format that the model must output. Compatible with * [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and - * all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + * all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`. * * Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the * message the model generates is valid JSON. @@ -1200,7 +1220,7 @@ export interface ThreadCreateAndRunStreamParams { * Controls which (if any) tool is called by the model. `none` means the model will * not call any tools and instead generates a message. `auto` is the default value * and means the model can pick between generating a message or calling a tool. - * Specifying a particular tool like `{"type": "TOOL_TYPE"}` or + * Specifying a particular tool like `{"type": "file_search"}` or * `{"type": "function", "function": {"name": "my_function"}}` forces the model to * call that tool. */ @@ -1226,9 +1246,15 @@ export interface ThreadCreateAndRunStreamParams { * An alternative to sampling with temperature, called nucleus sampling, where the * model considers the results of the tokens with top_p probability mass. So 0.1 * means only the tokens comprising the top 10% probability mass are considered. + * + * We generally recommend altering this or temperature but not both. */ top_p?: number | null; + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ truncation_strategy?: ThreadCreateAndRunStreamParams.TruncationStrategy | null; } @@ -1396,6 +1422,10 @@ export namespace ThreadCreateAndRunStreamParams { } } + /** + * Controls for how a thread will be truncated prior to the run. Use this to + * control the intial context window of the run. + */ export interface TruncationStrategy { /** * The truncation strategy to use for the thread. The default is `auto`. If set to