Skip to content

Commit

Permalink
Merge pull request #464 from i-dot-ai/bugfix/many-type-errors
Browse files Browse the repository at this point in the history
Fix many type errors
  • Loading branch information
brunns authored May 28, 2024
2 parents 6174452 + 7ce9a5d commit d69bdd1
Show file tree
Hide file tree
Showing 15 changed files with 31 additions and 268 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ test-core-api:

test-redbox:
poetry install --no-root --no-ansi --with api,dev --without ai,worker
poetry run pytest redbox/tests --cov=redbox -v --cov-report=term-missing --cov-fail-under=45
poetry run pytest redbox/tests --cov=redbox -v --cov-report=term-missing --cov-fail-under=80

test-worker:
poetry install --no-root --no-ansi --with worker,dev --without ai,api
poetry run pytest worker/tests --cov=worker -v --cov-report=term-missing --cov-fail-under=40

test-django:
docker compose up -d --wait db minio
docker compose run django-app venv/bin/pytest tests/ --ds redbox_app.settings -v --cov=redbox_app.redbox_core --cov-fail-under 60 -o log_cli=true
docker compose run django-app venv/bin/pytest tests/ --ds redbox_app.settings -v --cov=redbox_app.redbox_core --cov-fail-under 80 -o log_cli=true

test-integration:
docker compose down
Expand Down Expand Up @@ -67,7 +67,7 @@ safe:
poetry run mypy ./django_app --ignore-missing-imports

checktypes:
poetry run mypy redbox worker --ignore-missing-imports
poetry run mypy redbox worker --ignore-missing-imports --no-incremental

check-migrations:
docker compose build django-app
Expand Down
Empty file removed redbox/export/__init__.py
Empty file.
106 changes: 0 additions & 106 deletions redbox/export/docx.py

This file was deleted.

68 changes: 0 additions & 68 deletions redbox/llm/llm_base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from datetime import UTC, datetime
from typing import Any

from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.memory import ConversationBufferMemory
Expand All @@ -17,15 +14,6 @@
STUFF_DOCUMENT_PROMPT,
WITH_SOURCES_PROMPT,
)
from redbox.llm.prompts.spotlight import SPOTLIGHT_COMBINATION_TASK_PROMPT
from redbox.llm.spotlight.spotlight import (
key_actions_task,
key_discussion_task,
key_people_task,
summary_task,
)
from redbox.models.file import File
from redbox.models.spotlight import Spotlight, SpotlightTask


class LLMHandler:
Expand Down Expand Up @@ -121,59 +109,3 @@ def chat_with_rag(
callbacks=callbacks or [],
)
return result, docs_with_sources_chain

def get_spotlight_tasks(self, files: list[File], file_hash: str) -> Spotlight:
return Spotlight(
files=files,
file_hash=file_hash,
tasks=[
summary_task,
key_discussion_task,
key_actions_task,
key_people_task,
],
)

def run_spotlight_task(
self,
spotlight: Spotlight,
task: SpotlightTask,
user_info: dict,
callbacks: list | None = None,
map_reduce: bool = False,
token_max: int = 100_000,
) -> tuple[Any, StuffDocumentsChain | MapReduceDocumentsChain]:
map_chain = LLMChain(llm=self.llm, prompt=task.prompt_template) # type: ignore
regular_chain = StuffDocumentsChain(llm_chain=map_chain, document_variable_name="text")

reduce_chain = LLMChain(llm=self.llm, prompt=SPOTLIGHT_COMBINATION_TASK_PROMPT)
combine_documents_chain = StuffDocumentsChain(llm_chain=reduce_chain, document_variable_name="text")
reduce_documents_chain = ReduceDocumentsChain(
combine_documents_chain=combine_documents_chain,
collapse_documents_chain=combine_documents_chain,
token_max=token_max,
)
map_reduce_chain = MapReduceDocumentsChain(
llm_chain=map_chain,
reduce_documents_chain=reduce_documents_chain,
document_variable_name="text",
return_intermediate_steps=False,
)

if map_reduce:
result = map_reduce_chain.run(
user_info=user_info,
current_date=datetime.now(tz=UTC).date().isoformat(),
input_documents=spotlight.to_documents(),
callbacks=callbacks or [],
)
return result, map_reduce_chain
else:
result = regular_chain.run(
user_info=user_info,
current_date=datetime.now(tz=UTC).date().isoformat(),
input_documents=spotlight.to_documents(),
callbacks=callbacks or [],
)

return result, regular_chain
10 changes: 0 additions & 10 deletions redbox/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@
)
from redbox.models.persona import ChatPersona
from redbox.models.settings import Settings
from redbox.models.spotlight import (
Spotlight,
SpotlightComplete,
SpotlightTask,
SpotlightTaskComplete,
)

__all__ = [
"ChatMessage",
Expand All @@ -36,10 +30,6 @@
"EmbeddingModelInfo",
"File",
"FileStatus",
"Spotlight",
"SpotlightComplete",
"SpotlightTask",
"SpotlightTaskComplete",
"Settings",
"EmbeddingResponse",
"EmbedQueueItem",
Expand Down
3 changes: 2 additions & 1 deletion redbox/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ class PersistableModel(BaseModel):
created_datetime: datetime = Field(default_factory=datetime.utcnow)
creator_user_uuid: UUID

@computed_field
@computed_field # type: ignore[misc] # Remove if https://github.com/python/mypy/issues/1362 is fixed.
@property # Needed for type checking - see https://docs.pydantic.dev/2.0/usage/computed_fields/
def model_type(self) -> str:
"""Return the name of the model class.
Expand Down
2 changes: 1 addition & 1 deletion redbox/models/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ class APIErrorResponse(BaseModel):


class APIError404(APIErrorResponse):
type: AnyUrl = "error/not-found"
type: AnyUrl = AnyUrl("http://example.com/error/not-found")
status: int = 404
title: str = "File not found"
6 changes: 4 additions & 2 deletions redbox/models/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,13 @@ class Chunk(PersistableModel):
metadata: Metadata | None = Field(description="subset of the unstructured Element.Metadata object", default=None)
embedding: list[float] | None = Field(description="the vector representation of the text", default=None)

@computed_field
@computed_field # type: ignore[misc] # Remove if https://github.com/python/mypy/issues/1362 is fixed.
@property # Needed for type checking - see https://docs.pydantic.dev/2.0/usage/computed_fields/
def text_hash(self) -> str:
return hashlib.md5(self.text.encode(encoding="UTF-8", errors="strict"), usedforsecurity=False).hexdigest()

@computed_field
@computed_field # type: ignore[misc] # Remove if https://github.com/python/mypy/issues/1362 is fixed.
@property # Needed for type checking - see https://docs.pydantic.dev/2.0/usage/computed_fields/
def token_count(self) -> int:
return len(encoding.encode(self.text))

Expand Down
3 changes: 0 additions & 3 deletions redbox/models/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,9 @@ class Settings(BaseSettings):
dev_mode: bool = False
django_settings_module: str = "redbox_app.settings"
debug: bool = True
django_secret_key: str
django_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "WARNING"
environment: Literal["LOCAL", "DEV", "PREPROD", "PROD"] = "LOCAL"
postgres_user: str = "redbox-core"
postgres_db: str = "redbox-core"
postgres_password: str
postgres_host: str = "db"
contact_email: str = "[email protected]"
core_api_host: str = "core-api"
Expand Down
58 changes: 0 additions & 58 deletions redbox/models/spotlight.py

This file was deleted.

Loading

0 comments on commit d69bdd1

Please sign in to comment.