Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Fixes for Prompt studio Indexing and tool runs #1052

Merged
merged 31 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
efe74c8
Roll to latest SDK
gaya3-zipstack Dec 4, 2024
3ba42c2
Commit pdm.lock changes
gaya3-zipstack Dec 4, 2024
0796874
Merge main
gaya3-zipstack Dec 6, 2024
12cd28e
Remove pandoc and tessaract
gaya3-zipstack Dec 10, 2024
bc4deae
Revert change
gaya3-zipstack Dec 11, 2024
3def519
Merge remote-tracking branch 'origin' into feature/remote_storage
gaya3-zipstack Dec 11, 2024
26b2fcb
Roll tool versions
gaya3-zipstack Dec 11, 2024
ea0fc25
Roll tool versions
gaya3-zipstack Dec 11, 2024
bc9f145
Roll version
gaya3-zipstack Dec 11, 2024
587322f
Commit pdm.lock changes
gaya3-zipstack Dec 11, 2024
b7f17e0
Merge from main
gaya3-zipstack Dec 12, 2024
e5a4890
Remove reote storage tool registry
gaya3-zipstack Dec 12, 2024
435249e
Merge remote-tracking branch 'origin' into feature/remote_storage
gaya3-zipstack Dec 20, 2024
d5cb94d
Use ENvHelper for env standardisation
gaya3-zipstack Dec 26, 2024
97bccef
Commit pdm.lock changes
gaya3-zipstack Dec 27, 2024
21dc09b
Minor improvement
gaya3-zipstack Dec 31, 2024
f3806dd
Merge remote-tracking branch 'origin' into feature/remote_storage
gaya3-zipstack Dec 31, 2024
0bee7c3
Merge branch 'feature/remote_storage' of https://github.com/Zipstack/…
gaya3-zipstack Dec 31, 2024
ae8b076
Resolve conflicts
gaya3-zipstack Jan 2, 2025
a6c654b
Indexing and env fixes
gaya3-zipstack Jan 7, 2025
9b60731
Indexing and env fixes
gaya3-zipstack Jan 7, 2025
bdf42ec
Correcting sample env
gaya3-zipstack Jan 7, 2025
d916d3d
Merge branch 'main' into feature/remote_storage
gaya3-zipstack Jan 8, 2025
123b5c5
Roll SDK version
gaya3-zipstack Jan 8, 2025
012eca8
Merge from main
gaya3-zipstack Jan 8, 2025
0440ed5
SDK version roll
gaya3-zipstack Jan 8, 2025
9864e3d
Add sample env
gaya3-zipstack Jan 8, 2025
2123c30
Add sample env
gaya3-zipstack Jan 8, 2025
000598a
Merge remote-tracking branch 'origin' into feature/remote_storage
gaya3-zipstack Jan 8, 2025
6f85236
Lock file check in
gaya3-zipstack Jan 9, 2025
bf01dc3
Merge branch 'main' into feature/remote_storage
hari-kuriakose Jan 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 80 additions & 79 deletions backend/pdm.lock

Large diffs are not rendered by default.

88 changes: 63 additions & 25 deletions backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,18 +381,36 @@ def index_document(
process_text = None
if text_processor:
process_text = text_processor.process
doc_id = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
tool_id=tool_id,
file_path=file_path,
org_id=org_id,
document_id=document_id,
is_summary=is_summary,
reindex=True,
run_id=run_id,
user_id=user_id,
process_text=process_text,
)
if not check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
doc_id = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
tool_id=tool_id,
file_path=file_path,
org_id=org_id,
document_id=document_id,
is_summary=is_summary,
reindex=True,
run_id=run_id,
user_id=user_id,
process_text=process_text,
)
else:
fs_instance = FileStorageHelper.initialize_file_storage(
type=FileStorageType.PERMANENT
)
doc_id = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
tool_id=tool_id,
file_path=file_path,
org_id=org_id,
document_id=document_id,
is_summary=is_summary,
reindex=True,
run_id=run_id,
user_id=user_id,
process_text=process_text,
fs=fs_instance,
)

elapsed_time = time.time() - start_time
logger.info(
Expand Down Expand Up @@ -870,7 +888,7 @@ def _fetch_response(
if not check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
file_hash = ToolUtils.get_hash_from_file(file_path=doc_path)
else:
file_hash = ToolUtils.get_hash_from_file(file_path=doc_path, fs=fs_instance)
file_hash = fs_instance.get_hash_from_file(path=doc_path)

payload = {
TSPKeys.TOOL_SETTINGS: tool_settings,
Expand Down Expand Up @@ -1123,17 +1141,34 @@ def _fetch_single_pass_response(
if not default_profile:
raise DefaultProfileError()

index_result = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
file_path=file_path,
tool_id=tool_id,
org_id=org_id,
is_summary=tool.summarize_as_source,
document_id=document_id,
run_id=run_id,
user_id=user_id,
process_text=process_text,
)
if not check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
index_result = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
file_path=file_path,
tool_id=tool_id,
org_id=org_id,
is_summary=tool.summarize_as_source,
document_id=document_id,
run_id=run_id,
user_id=user_id,
process_text=process_text,
)
else:
fs_instance = FileStorageHelper.initialize_file_storage(
type=FileStorageType.PERMANENT
)
index_result = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
file_path=file_path,
tool_id=tool_id,
org_id=org_id,
is_summary=tool.summarize_as_source,
document_id=document_id,
run_id=run_id,
user_id=user_id,
process_text=process_text,
fs=fs_instance,
)
if index_result.get("status") == IndexingStatus.PENDING_STATUS.value:
return {
"status": IndexingStatus.PENDING_STATUS.value,
Expand Down Expand Up @@ -1174,7 +1209,10 @@ def _fetch_single_pass_response(
if tool.summarize_as_source:
path = Path(file_path)
file_path = str(path.parent / TSPKeys.SUMMARIZE / (path.stem + ".txt"))
file_hash = ToolUtils.get_hash_from_file(file_path=file_path)
if not check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
file_hash = ToolUtils.get_hash_from_file(file_path=file_path)
else:
file_hash = fs_instance.get_hash_from_file(path=file_path)

payload = {
TSPKeys.TOOL_SETTINGS: tool_settings,
Expand Down
2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"python-socketio==5.9.0", # For log_events
"social-auth-app-django==5.3.0", # For OAuth
"social-auth-core==4.4.2", # For OAuth
"unstract-sdk~=0.54.0rc11",
"unstract-sdk~=0.54.0rc12",
# ! IMPORTANT!
# Indirect local dependencies usually need to be added in their own projects
# as: https://pdm-project.org/latest/usage/dependency/#local-dependencies.
Expand Down
14 changes: 6 additions & 8 deletions backend/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ REMOTE_PROMPT_STUDIO_FILE_PATH=

# Structure Tool Image (Runs prompt studio exported tools)
# https://hub.docker.com/r/unstract/tool-structure
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.54"
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.55"
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
STRUCTURE_TOOL_IMAGE_TAG="0.0.54"
STRUCTURE_TOOL_IMAGE_TAG="0.0.55"

# Feature Flags
EVALUATION_SERVER_IP=unstract-flipt
Expand Down Expand Up @@ -160,12 +160,10 @@ API_EXECUTION_DIR_PREFIX="unstract/api"

# Storage Provider for Workflow Execution
# Valid options: MINIO, S3, etc..
WORKFLOW_EXECUTION_FS_PROVIDER="MINIO"
WORKFLOW_EXECUTION_FS_CREDENTIAL='{"endpoint_url": "", "key": "", "secret": ""}'
WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider":"minio","credentials": {"endpoint_url":"http://unstract-minio:9000","key":"XXX","secret":"XXX"}}'

# Storage Provider for API Execution
API_STORAGE_FS_PROVIDER="MINIO"
API_STORAGE_FS_CREDENTIAL='{"endpoint_url": "", "key": "", "secret": ""}'
API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "XXX", "secret": "XXX"}}'

# Optional: Legacy storage path (if applicable)
LEGACY_STORAGE_PATH="/path/to/legacy/storage"
# Storage Provider for Tool registry
TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}'
Loading
Loading