Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix many type errors #464

Merged
merged 7 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ test-core-api:

test-redbox:
poetry install --no-root --no-ansi --with api,dev --without ai,worker
poetry run pytest redbox/tests --cov=redbox -v --cov-report=term-missing --cov-fail-under=45
poetry run pytest redbox/tests --cov=redbox -v --cov-report=term-missing --cov-fail-under=80

test-worker:
poetry install --no-root --no-ansi --with worker,dev --without ai,api
poetry run pytest worker/tests --cov=worker -v --cov-report=term-missing --cov-fail-under=40

test-django:
docker compose up -d --wait db minio
docker compose run django-app venv/bin/pytest tests/ --ds redbox_app.settings -v --cov=redbox_app.redbox_core --cov-fail-under 60 -o log_cli=true
docker compose run django-app venv/bin/pytest tests/ --ds redbox_app.settings -v --cov=redbox_app.redbox_core --cov-fail-under 80 -o log_cli=true

test-integration:
docker compose down
Expand Down Expand Up @@ -67,7 +67,7 @@ safe:
poetry run mypy ./django_app --ignore-missing-imports

checktypes:
poetry run mypy redbox worker --ignore-missing-imports
poetry run mypy redbox worker --ignore-missing-imports --no-incremental

check-migrations:
docker compose build django-app
Expand Down
Empty file removed redbox/export/__init__.py
Empty file.
106 changes: 0 additions & 106 deletions redbox/export/docx.py

This file was deleted.

68 changes: 0 additions & 68 deletions redbox/llm/llm_base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
from datetime import UTC, datetime
from typing import Any

from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.memory import ConversationBufferMemory
Expand All @@ -17,15 +14,6 @@
STUFF_DOCUMENT_PROMPT,
WITH_SOURCES_PROMPT,
)
from redbox.llm.prompts.spotlight import SPOTLIGHT_COMBINATION_TASK_PROMPT
from redbox.llm.spotlight.spotlight import (
key_actions_task,
key_discussion_task,
key_people_task,
summary_task,
)
from redbox.models.file import File
from redbox.models.spotlight import Spotlight, SpotlightTask


class LLMHandler:
wpfl-dbt marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -121,59 +109,3 @@ def chat_with_rag(
callbacks=callbacks or [],
)
return result, docs_with_sources_chain

def get_spotlight_tasks(self, files: list[File], file_hash: str) -> Spotlight:
return Spotlight(
files=files,
file_hash=file_hash,
tasks=[
summary_task,
key_discussion_task,
key_actions_task,
key_people_task,
],
)

def run_spotlight_task(
self,
spotlight: Spotlight,
task: SpotlightTask,
user_info: dict,
callbacks: list | None = None,
map_reduce: bool = False,
token_max: int = 100_000,
) -> tuple[Any, StuffDocumentsChain | MapReduceDocumentsChain]:
map_chain = LLMChain(llm=self.llm, prompt=task.prompt_template) # type: ignore
regular_chain = StuffDocumentsChain(llm_chain=map_chain, document_variable_name="text")

reduce_chain = LLMChain(llm=self.llm, prompt=SPOTLIGHT_COMBINATION_TASK_PROMPT)
combine_documents_chain = StuffDocumentsChain(llm_chain=reduce_chain, document_variable_name="text")
reduce_documents_chain = ReduceDocumentsChain(
combine_documents_chain=combine_documents_chain,
collapse_documents_chain=combine_documents_chain,
token_max=token_max,
)
map_reduce_chain = MapReduceDocumentsChain(
llm_chain=map_chain,
reduce_documents_chain=reduce_documents_chain,
document_variable_name="text",
return_intermediate_steps=False,
)

if map_reduce:
result = map_reduce_chain.run(
user_info=user_info,
current_date=datetime.now(tz=UTC).date().isoformat(),
input_documents=spotlight.to_documents(),
callbacks=callbacks or [],
)
return result, map_reduce_chain
else:
result = regular_chain.run(
user_info=user_info,
current_date=datetime.now(tz=UTC).date().isoformat(),
input_documents=spotlight.to_documents(),
callbacks=callbacks or [],
)

return result, regular_chain
10 changes: 0 additions & 10 deletions redbox/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@
)
from redbox.models.persona import ChatPersona
from redbox.models.settings import Settings
from redbox.models.spotlight import (
Spotlight,
SpotlightComplete,
SpotlightTask,
SpotlightTaskComplete,
)

__all__ = [
"ChatMessage",
Expand All @@ -36,10 +30,6 @@
"EmbeddingModelInfo",
"File",
"FileStatus",
"Spotlight",
"SpotlightComplete",
"SpotlightTask",
"SpotlightTaskComplete",
"Settings",
"EmbeddingResponse",
"EmbedQueueItem",
Expand Down
3 changes: 2 additions & 1 deletion redbox/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ class PersistableModel(BaseModel):
created_datetime: datetime = Field(default_factory=datetime.utcnow)
creator_user_uuid: UUID

@computed_field
@computed_field # type: ignore[misc] # Remove if https://github.com/python/mypy/issues/1362 is fixed.
@property # Needed for type checking - see https://docs.pydantic.dev/2.0/usage/computed_fields/
def model_type(self) -> str:
"""Return the name of the model class.
Expand Down
2 changes: 1 addition & 1 deletion redbox/models/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ class APIErrorResponse(BaseModel):


class APIError404(APIErrorResponse):
type: AnyUrl = "error/not-found"
type: AnyUrl = AnyUrl("http://example.com/error/not-found")
status: int = 404
title: str = "File not found"
6 changes: 4 additions & 2 deletions redbox/models/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,13 @@ class Chunk(PersistableModel):
metadata: Metadata | None = Field(description="subset of the unstructured Element.Metadata object", default=None)
embedding: list[float] | None = Field(description="the vector representation of the text", default=None)

@computed_field
@computed_field # type: ignore[misc] # Remove if https://github.com/python/mypy/issues/1362 is fixed.
@property # Needed for type checking - see https://docs.pydantic.dev/2.0/usage/computed_fields/
def text_hash(self) -> str:
return hashlib.md5(self.text.encode(encoding="UTF-8", errors="strict"), usedforsecurity=False).hexdigest()

@computed_field
@computed_field # type: ignore[misc] # Remove if https://github.com/python/mypy/issues/1362 is fixed.
@property # Needed for type checking - see https://docs.pydantic.dev/2.0/usage/computed_fields/
def token_count(self) -> int:
return len(encoding.encode(self.text))

Expand Down
3 changes: 0 additions & 3 deletions redbox/models/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,9 @@ class Settings(BaseSettings):
dev_mode: bool = False
django_settings_module: str = "redbox_app.settings"
debug: bool = True
django_secret_key: str
django_log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "WARNING"
wpfl-dbt marked this conversation as resolved.
Show resolved Hide resolved
environment: Literal["LOCAL", "DEV", "PREPROD", "PROD"] = "LOCAL"
postgres_user: str = "redbox-core"
postgres_db: str = "redbox-core"
postgres_password: str
postgres_host: str = "db"
contact_email: str = "[email protected]"
core_api_host: str = "core-api"
Expand Down
58 changes: 0 additions & 58 deletions redbox/models/spotlight.py

This file was deleted.

Loading
Loading