Skip to content

Commit

Permalink
[FIX] Fixes for Prompt studio Indexing and tool runs (#1052)
Browse files Browse the repository at this point in the history
* Roll to latest SDK

* Commit pdm.lock changes

* Remove pandoc and tessaract

* Revert change

* Roll tool versions

* Roll tool versions

* Roll version

* Commit pdm.lock changes

* Remove reote storage tool registry

* Use ENvHelper for env standardisation

* Commit pdm.lock changes

* Minor improvement

* Indexing and env fixes

* Indexing and env fixes

* Correcting sample env

* Roll SDK version

* SDK version roll

* Add sample env

* Add sample env

* Lock file check in

---------

Co-authored-by: gaya3-zipstack <[email protected]>
Co-authored-by: Hari John Kuriakose <[email protected]>
  • Loading branch information
3 people authored Jan 9, 2025
1 parent 4e194de commit 159bef0
Show file tree
Hide file tree
Showing 30 changed files with 455 additions and 444 deletions.
159 changes: 80 additions & 79 deletions backend/pdm.lock

Large diffs are not rendered by default.

88 changes: 63 additions & 25 deletions backend/prompt_studio/prompt_studio_core_v2/prompt_studio_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,18 +381,36 @@ def index_document(
process_text = None
if text_processor:
process_text = text_processor.process
doc_id = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
tool_id=tool_id,
file_path=file_path,
org_id=org_id,
document_id=document_id,
is_summary=is_summary,
reindex=True,
run_id=run_id,
user_id=user_id,
process_text=process_text,
)
if not check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
doc_id = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
tool_id=tool_id,
file_path=file_path,
org_id=org_id,
document_id=document_id,
is_summary=is_summary,
reindex=True,
run_id=run_id,
user_id=user_id,
process_text=process_text,
)
else:
fs_instance = FileStorageHelper.initialize_file_storage(
type=FileStorageType.PERMANENT
)
doc_id = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
tool_id=tool_id,
file_path=file_path,
org_id=org_id,
document_id=document_id,
is_summary=is_summary,
reindex=True,
run_id=run_id,
user_id=user_id,
process_text=process_text,
fs=fs_instance,
)

elapsed_time = time.time() - start_time
logger.info(
Expand Down Expand Up @@ -870,7 +888,7 @@ def _fetch_response(
if not check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
file_hash = ToolUtils.get_hash_from_file(file_path=doc_path)
else:
file_hash = ToolUtils.get_hash_from_file(file_path=doc_path, fs=fs_instance)
file_hash = fs_instance.get_hash_from_file(path=doc_path)

payload = {
TSPKeys.TOOL_SETTINGS: tool_settings,
Expand Down Expand Up @@ -1123,17 +1141,34 @@ def _fetch_single_pass_response(
if not default_profile:
raise DefaultProfileError()

index_result = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
file_path=file_path,
tool_id=tool_id,
org_id=org_id,
is_summary=tool.summarize_as_source,
document_id=document_id,
run_id=run_id,
user_id=user_id,
process_text=process_text,
)
if not check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
index_result = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
file_path=file_path,
tool_id=tool_id,
org_id=org_id,
is_summary=tool.summarize_as_source,
document_id=document_id,
run_id=run_id,
user_id=user_id,
process_text=process_text,
)
else:
fs_instance = FileStorageHelper.initialize_file_storage(
type=FileStorageType.PERMANENT
)
index_result = PromptStudioHelper.dynamic_indexer(
profile_manager=default_profile,
file_path=file_path,
tool_id=tool_id,
org_id=org_id,
is_summary=tool.summarize_as_source,
document_id=document_id,
run_id=run_id,
user_id=user_id,
process_text=process_text,
fs=fs_instance,
)
if index_result.get("status") == IndexingStatus.PENDING_STATUS.value:
return {
"status": IndexingStatus.PENDING_STATUS.value,
Expand Down Expand Up @@ -1174,7 +1209,10 @@ def _fetch_single_pass_response(
if tool.summarize_as_source:
path = Path(file_path)
file_path = str(path.parent / TSPKeys.SUMMARIZE / (path.stem + ".txt"))
file_hash = ToolUtils.get_hash_from_file(file_path=file_path)
if not check_feature_flag_status(FeatureFlag.REMOTE_FILE_STORAGE):
file_hash = ToolUtils.get_hash_from_file(file_path=file_path)
else:
file_hash = fs_instance.get_hash_from_file(path=file_path)

payload = {
TSPKeys.TOOL_SETTINGS: tool_settings,
Expand Down
2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies = [
"python-socketio==5.9.0", # For log_events
"social-auth-app-django==5.3.0", # For OAuth
"social-auth-core==4.4.2", # For OAuth
"unstract-sdk~=0.54.0rc11",
"unstract-sdk~=0.54.0rc12",
# ! IMPORTANT!
# Indirect local dependencies usually need to be added in their own projects
# as: https://pdm-project.org/latest/usage/dependency/#local-dependencies.
Expand Down
14 changes: 6 additions & 8 deletions backend/sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ REMOTE_PROMPT_STUDIO_FILE_PATH=

# Structure Tool Image (Runs prompt studio exported tools)
# https://hub.docker.com/r/unstract/tool-structure
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.54"
STRUCTURE_TOOL_IMAGE_URL="docker:unstract/tool-structure:0.0.55"
STRUCTURE_TOOL_IMAGE_NAME="unstract/tool-structure"
STRUCTURE_TOOL_IMAGE_TAG="0.0.54"
STRUCTURE_TOOL_IMAGE_TAG="0.0.55"

# Feature Flags
EVALUATION_SERVER_IP=unstract-flipt
Expand Down Expand Up @@ -160,12 +160,10 @@ API_EXECUTION_DIR_PREFIX="unstract/api"

# Storage Provider for Workflow Execution
# Valid options: MINIO, S3, etc..
WORKFLOW_EXECUTION_FS_PROVIDER="MINIO"
WORKFLOW_EXECUTION_FS_CREDENTIAL='{"endpoint_url": "", "key": "", "secret": ""}'
WORKFLOW_EXECUTION_FILE_STORAGE_CREDENTIALS='{"provider":"minio","credentials": {"endpoint_url":"http://unstract-minio:9000","key":"XXX","secret":"XXX"}}'

# Storage Provider for API Execution
API_STORAGE_FS_PROVIDER="MINIO"
API_STORAGE_FS_CREDENTIAL='{"endpoint_url": "", "key": "", "secret": ""}'
API_FILE_STORAGE_CREDENTIALS='{"provider": "minio", "credentials": {"endpoint_url": "http://unstract-minio:9000", "key": "XXX", "secret": "XXX"}}'

# Optional: Legacy storage path (if applicable)
LEGACY_STORAGE_PATH="/path/to/legacy/storage"
# Storage Provider for Tool registry
TOOL_REGISTRY_STORAGE_CREDENTIALS='{"provider":"local"}'
Loading

0 comments on commit 159bef0

Please sign in to comment.