-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve Workspace agent functions and prompt #14426
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,11 +19,62 @@ import { inject, injectable } from '@theia/core/shared/inversify'; | |
import { FileService } from '@theia/filesystem/lib/browser/file-service'; | ||
import { FileStat } from '@theia/filesystem/lib/common/files'; | ||
import { WorkspaceService } from '@theia/workspace/lib/browser'; | ||
import { FILE_CONTENT_FUNCTION_ID, GET_WORKSPACE_FILE_LIST_FUNCTION_ID } from '../common/functions'; | ||
import { FILE_CONTENT_FUNCTION_ID, GET_WORKSPACE_DIRECTORY_STRUCTURE_FUNCTION_ID, GET_WORKSPACE_FILE_LIST_FUNCTION_ID } from '../common/functions'; | ||
|
||
function shouldExclude(stat: FileStat): boolean { | ||
const excludedFolders = ['node_modules', 'lib']; | ||
return stat.resource.path.base.startsWith('.') || excludedFolders.includes(stat.resource.path.base); | ||
} | ||
|
||
@injectable() | ||
export class GetWorkspaceDirectoryStructure implements ToolProvider { | ||
static ID = GET_WORKSPACE_DIRECTORY_STRUCTURE_FUNCTION_ID; | ||
|
||
getTool(): ToolRequest { | ||
return { | ||
id: GetWorkspaceDirectoryStructure.ID, | ||
name: GetWorkspaceDirectoryStructure.ID, | ||
description: `Retrieve the complete directory structure of the workspace, listing only directories (no file contents). This structure excludes specific directories, | ||
such as node_modules and hidden files, ensuring paths are within workspace boundaries.`, | ||
handler: () => this.getDirectoryStructure() | ||
}; | ||
} | ||
|
||
@inject(WorkspaceService) | ||
protected workspaceService: WorkspaceService; | ||
|
||
@inject(FileService) | ||
protected readonly fileService: FileService; | ||
|
||
private async getDirectoryStructure(): Promise<string[]> { | ||
const wsRoots = await this.workspaceService.roots; | ||
|
||
if (wsRoots.length === 0) { | ||
throw new Error('Workspace root not found'); | ||
JonasHelming marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
const workspaceRootUri = wsRoots[0].resource; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With just looking into the first workspace root, we don't really support multi-root workspaces. I think it'd be good to try supporting multi-root workspaces here. We could try to just make this explicit to the LLM:
And then in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer making "mutiple workspaces support" a follow-up, added it here: #14119 |
||
|
||
return this.buildDirectoryStructure(workspaceRootUri); | ||
} | ||
|
||
private async buildDirectoryStructure(uri: URI, prefix: string = ''): Promise<string[]> { | ||
const stat = await this.fileService.resolve(uri); | ||
const result: string[] = []; | ||
|
||
if (stat && stat.isDirectory && stat.children) { | ||
for (const child of stat.children) { | ||
if (!child.isDirectory || shouldExclude(child)) { continue; }; | ||
const path = `${prefix}${child.resource.path.base}/`; | ||
result.push(path); | ||
result.push(...await this.buildDirectoryStructure(child.resource, `${path}`)); | ||
} | ||
} | ||
|
||
return result; | ||
} | ||
} | ||
|
||
/** | ||
* A Function that can read the contents of a File from the Workspace. | ||
*/ | ||
@injectable() | ||
export class FileContentFunction implements ToolProvider { | ||
static ID = FILE_CONTENT_FUNCTION_ID; | ||
|
@@ -32,13 +83,15 @@ export class FileContentFunction implements ToolProvider { | |
return { | ||
id: FileContentFunction.ID, | ||
name: FileContentFunction.ID, | ||
description: 'Get the content of the file', | ||
description: `The relative path to the target file within the workspace. This path is resolved from the workspace root, and only files within the workspace boundaries | ||
are accessible. Attempting to access paths outside the workspace will result in an error.`, | ||
parameters: { | ||
type: 'object', | ||
properties: { | ||
file: { | ||
type: 'string', | ||
description: 'The path of the file to retrieve content for', | ||
description: `Return the content of a specified file within the workspace. The file path must be provided relative to the workspace root. Only files within | ||
workspace boundaries are accessible; attempting to access files outside the workspace will return an error.`, | ||
} | ||
} | ||
}, | ||
|
@@ -61,15 +114,36 @@ export class FileContentFunction implements ToolProvider { | |
} | ||
|
||
private async getFileContent(file: string): Promise<string> { | ||
const uri = new URI(file); | ||
const fileContent = await this.fileService.read(uri); | ||
return fileContent.value; | ||
const wsRoots = await this.workspaceService.roots; | ||
|
||
if (wsRoots.length === 0) { | ||
throw new Error('Workspace root not found'); | ||
} | ||
|
||
const workspaceRootUri = wsRoots[0].resource; | ||
|
||
const targetUri = workspaceRootUri.resolve(file); | ||
|
||
if (!targetUri.toString().startsWith(workspaceRootUri.toString())) { | ||
throw new Error('Access outside of the workspace is not allowed'); | ||
} | ||
JonasHelming marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
try { | ||
const fileStat = await this.fileService.resolve(targetUri); | ||
|
||
if (!fileStat || fileStat.isDirectory) { | ||
return JSON.stringify({ error: 'File not found' }); | ||
} | ||
|
||
const fileContent = await this.fileService.read(targetUri); | ||
return fileContent.value; | ||
|
||
} catch (error) { | ||
return JSON.stringify({ error: 'File not found' }); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* A Function that lists all files in the workspace. | ||
*/ | ||
@injectable() | ||
export class GetWorkspaceFileList implements ToolProvider { | ||
static ID = GET_WORKSPACE_FILE_LIST_FUNCTION_ID; | ||
|
@@ -78,9 +152,22 @@ export class GetWorkspaceFileList implements ToolProvider { | |
return { | ||
id: GetWorkspaceFileList.ID, | ||
name: GetWorkspaceFileList.ID, | ||
description: 'List all files in the workspace', | ||
|
||
handler: () => this.getProjectFileList() | ||
parameters: { | ||
type: 'object', | ||
properties: { | ||
path: { | ||
type: 'string', | ||
description: `Optional relative path to a directory within the workspace. If no path is specified, the function lists contents directly in the workspace | ||
root. Paths are resolved within workspace boundaries only; paths outside the workspace or unvalidated paths will result in an error.` | ||
} | ||
} | ||
}, | ||
description: `List files and directories within a specified workspace directory. Paths are relative to the workspace root, and only workspace-contained paths are | ||
allowed. If no path is provided, the root contents are listed. Paths outside the workspace will result in an error.`, | ||
handler: (arg_string: string) => { | ||
const args = JSON.parse(arg_string); | ||
return this.getProjectFileList(args.path); | ||
} | ||
}; | ||
} | ||
|
||
|
@@ -90,45 +177,51 @@ export class GetWorkspaceFileList implements ToolProvider { | |
@inject(FileService) | ||
protected readonly fileService: FileService; | ||
|
||
async getProjectFileList(): Promise<string[]> { | ||
// Get all files from the workspace service as a flat list of qualified file names | ||
async getProjectFileList(path?: string): Promise<string[]> { | ||
const wsRoots = await this.workspaceService.roots; | ||
const result: string[] = []; | ||
for (const root of wsRoots) { | ||
result.push(...await this.listFilesRecursively(root.resource)); | ||
|
||
if (wsRoots.length === 0) { | ||
throw new Error('Workspace root not found'); | ||
} | ||
|
||
const workspaceRootUri = wsRoots[0].resource; | ||
const targetUri = path ? workspaceRootUri.resolve(path) : workspaceRootUri; | ||
|
||
if (!targetUri.toString().startsWith(workspaceRootUri.toString())) { | ||
throw new Error('Access outside of the workspace is not allowed'); | ||
} | ||
JonasHelming marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
try { | ||
const stat = await this.fileService.resolve(targetUri); | ||
if (!stat || !stat.isDirectory) { | ||
return ['Error: Directory not found']; | ||
} | ||
return await this.listFilesDirectly(targetUri, workspaceRootUri); | ||
|
||
} catch (error) { | ||
return ['Error: Directory not found']; | ||
} | ||
return result; | ||
} | ||
|
||
private async listFilesRecursively(uri: URI): Promise<string[]> { | ||
private async listFilesDirectly(uri: URI, workspaceRootUri: URI): Promise<string[]> { | ||
const stat = await this.fileService.resolve(uri); | ||
const result: string[] = []; | ||
|
||
if (stat && stat.isDirectory) { | ||
if (this.exclude(stat)) { | ||
if (shouldExclude(stat)) { | ||
return result; | ||
} | ||
const children = await this.fileService.resolve(uri); | ||
if (children.children) { | ||
for (const child of children.children) { | ||
result.push(child.resource.toString()); | ||
result.push(...await this.listFilesRecursively(child.resource)); | ||
const relativePath = workspaceRootUri.relative(child.resource); | ||
if (relativePath) { | ||
result.push(relativePath.toString()); | ||
} | ||
} | ||
} | ||
} | ||
return result; | ||
} | ||
|
||
// Exclude folders which are not relevant to the AI Agent | ||
private exclude(stat: FileStat): boolean { | ||
if (stat.resource.path.base.startsWith('.')) { | ||
return true; | ||
} | ||
if (stat.resource.path.base === 'node_modules') { | ||
return true; | ||
} | ||
if (stat.resource.path.base === 'lib') { | ||
return true; | ||
} | ||
return false; | ||
return result; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know this was there before, but it'd be better to at least put the function as a method of the tool, so adopters can at least customize this hard-coded list of folders to be excluded.
Ideally it should even be an injectable service, maybe with a default implementation that either provides a commonly useful list or looks into the gitignore.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe two settings:
consider .gitignore: boolean
ignore directories: String[]
plus an injectable service?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that would be great plus extra points :-)
To me it'd be important that platform adopters can easily customize the filter list via injection and that there are reasonable defaults for Theia IDE. Configuration options for the Theia IDE user is then extra nice on top.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, did the services and added user settings as a follow-up (#14119)