-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
179 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
name: Test with Deno | ||
|
||
on: [push, pull_request, workflow_dispatch] | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-22.04 | ||
timeout-minutes: 10 | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: denoland/setup-deno@v2 | ||
with: | ||
deno-version: v2.x | ||
|
||
- run: deno --version | ||
|
||
- name: Prepare LLM | ||
uses: ./.github/actions/prepare-llm | ||
timeout-minutes: 3 | ||
|
||
- run: echo 'Which planet in our solar system is the largest?' | ./ask-llm.ts | grep -i jupiter | ||
timeout-minutes: 7 | ||
env: | ||
LLM_API_BASE_URL: 'http://127.0.0.1:8080/v1' | ||
LLM_DEBUG: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
#!/usr/bin/env -S deno run --allow-env --allow-net | ||
|
||
import readline from 'node:readline'; | ||
|
||
const LLM_API_BASE_URL = process.env.LLM_API_BASE_URL || 'https://api.openai.com/v1'; | ||
const LLM_API_KEY = process.env.LLM_API_KEY || process.env.OPENAI_API_KEY; | ||
const LLM_CHAT_MODEL = process.env.LLM_CHAT_MODEL; | ||
const LLM_STREAMING = process.env.LLM_STREAMING !== 'no'; | ||
|
||
const LLM_DEBUG = process.env.LLM_DEBUG; | ||
|
||
/** | ||
* Represents a chat message. | ||
* | ||
* @typedef {Object} Message | ||
* @property {'system'|'user'|'assistant'} role | ||
* @property {string} content | ||
*/ | ||
|
||
/** | ||
* A callback function to stream then completion. | ||
* | ||
* @callback CompletionHandler | ||
* @param {string} text | ||
* @returns {void} | ||
*/ | ||
|
||
/** | ||
* Generates a chat completion using a RESTful LLM API service. | ||
* | ||
* @param {Array<Message>} messages - List of chat messages. | ||
* @param {CompletionHandler=} handler - An optional callback to stream the completion. | ||
* @returns {Promise<string>} The completion generated by the LLM. | ||
*/ | ||
const chat = async (messages, handler) => { | ||
const url = `${LLM_API_BASE_URL}/chat/completions`; | ||
const auth = LLM_API_KEY ? { 'Authorization': `Bearer ${LLM_API_KEY}` } : {}; | ||
const model = LLM_CHAT_MODEL || 'gpt-4o-mini'; | ||
const stop = ['<|im_end|>', '<|end|>', '<|eot_id|>']; | ||
const max_tokens = 200; | ||
const temperature = 0; | ||
const stream = LLM_STREAMING && typeof handler === 'function'; | ||
const response = await fetch(url, { | ||
method: 'POST', | ||
headers: { 'Content-Type': 'application/json', ...auth }, | ||
body: JSON.stringify({ messages, model, stop, max_tokens, temperature, stream }) | ||
}); | ||
if (!response.ok) { | ||
throw new Error(`HTTP error with the status: ${response.status} ${response.statusText}`); | ||
} | ||
|
||
if (!stream) { | ||
const data = await response.json(); | ||
const { choices } = data; | ||
const first = choices[0]; | ||
const { message } = first; | ||
const { content } = message; | ||
const answer = content.trim(); | ||
handler && handler(answer); | ||
return answer; | ||
} | ||
|
||
const parse = (line) => { | ||
let partial = null; | ||
const prefix = line.substring(0, 6); | ||
if (prefix === 'data: ') { | ||
const payload = line.substring(6); | ||
try { | ||
const { choices } = JSON.parse(payload); | ||
const [choice] = choices; | ||
const { delta } = choice; | ||
partial = delta?.content; | ||
} catch (e) { | ||
// ignore | ||
} finally { | ||
return partial; | ||
} | ||
} | ||
return partial; | ||
} | ||
|
||
const reader = response.body.getReader(); | ||
const decoder = new TextDecoder(); | ||
|
||
let answer = ''; | ||
let buffer = ''; | ||
while (true) { | ||
const { value, done } = await reader.read(); | ||
if (done) { | ||
break; | ||
} | ||
const lines = decoder.decode(value).split('\n'); | ||
for (let i = 0; i < lines.length; ++i) { | ||
const line = buffer + lines[i]; | ||
if (line[0] === ':') { | ||
buffer = ''; | ||
continue; | ||
} | ||
if (line === 'data: [DONE]') { | ||
break; | ||
} | ||
if (line.length > 0) { | ||
const partial = parse(line.trim()); | ||
if (partial === null) { | ||
buffer = line; | ||
} else if (partial && partial.length > 0) { | ||
buffer = ''; | ||
if (answer.length < 1) { | ||
const leading = partial.trim(); | ||
answer = leading; | ||
handler && (leading.length > 0) && handler(leading); | ||
} else { | ||
answer += partial; | ||
handler && handler(partial); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
return answer; | ||
} | ||
|
||
const SYSTEM_PROMPT = 'Answer the question politely and concisely.'; | ||
|
||
(async () => { | ||
console.log(`Using LLM at ${LLM_API_BASE_URL}.`); | ||
console.log('Press Ctrl+D to exit.') | ||
console.log(); | ||
|
||
const messages = []; | ||
messages.push({ role: 'system', content: SYSTEM_PROMPT }); | ||
|
||
let loop = true; | ||
const io = readline.createInterface({ input: process.stdin, output: process.stdout }); | ||
io.on('close', () => { loop = false; }); | ||
|
||
const qa = () => { | ||
io.question('>> ', async (question) => { | ||
messages.push({ role: 'user', content: question }); | ||
const start = Date.now(); | ||
const answer = await chat(messages, (str) => process.stdout.write(str)); | ||
messages.push({ role: 'assistant', content: answer.trim() }); | ||
console.log(); | ||
const elapsed = Date.now() - start; | ||
LLM_DEBUG && console.log(`[${elapsed} ms]`); | ||
console.log(); | ||
loop && qa(); | ||
}) | ||
} | ||
|
||
qa(); | ||
})(); |