Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update file expiry with RAG chat #435

Merged
merged 4 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 5.0.6 on 2024-05-22 14:07

from django.db import migrations


def reset_last_referenced(apps, schema_editor):
# Required as we're moving from expiry date, which is 30 days further forward
File = apps.get_model("redbox_core", "File")
for file in File.objects.all():
file.last_referenced = file.created_at
file.save()


class Migration(migrations.Migration):
dependencies = [
("redbox_core", "0009_file_expiry_date"),
]

operations = [
migrations.RenameField(
model_name="file",
old_name="expiry_date",
new_name="last_referenced",
),
migrations.RunPython(reset_last_referenced, migrations.RunPython.noop)
]
16 changes: 10 additions & 6 deletions django_app/redbox_app/redbox_core/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import uuid
from datetime import timedelta
from datetime import datetime, timedelta

import boto3
from botocore.config import Config
Expand Down Expand Up @@ -66,18 +66,18 @@ class File(UUIDPrimaryKeyBase, TimeStampedModel):
user = models.ForeignKey(User, on_delete=models.CASCADE)
original_file_name = models.TextField(max_length=2048, blank=True, null=True)
core_file_uuid = models.UUIDField(null=True)
expiry_date = models.DateTimeField(blank=True, null=True)
last_referenced = models.DateTimeField(blank=True, null=True)

def __str__(self) -> str: # pragma: no cover
return f"{self.original_file_name} {self.user}"

def save(self, *args, **kwargs):
if not self.expiry_date:
if not self.last_referenced:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we need this part if we have the migration? (or vice versa)

if self.created_at:
# Needed to populate the initial expiry date for existing Files
self.expiry_date = self.created_at + timedelta(seconds=settings.FILE_EXPIRY_IN_SECONDS)
# Needed to populate the initial last_referenced field for existing Files
self.last_referenced = self.created_at
else:
self.expiry_date = timezone.now() + timedelta(seconds=settings.FILE_EXPIRY_IN_SECONDS)
self.last_referenced = timezone.now()
super().save(*args, **kwargs)

def delete(self, using=None, keep_parents=False): # noqa: ARG002 # remove at Python 3.12
Expand Down Expand Up @@ -130,6 +130,10 @@ def get_processing_status_text(self) -> str:
"Unknown",
)

@property
def expiry_date(self) -> datetime:
return self.last_referenced + timedelta(seconds=settings.FILE_EXPIRY_IN_SECONDS)


class ChatHistory(UUIDPrimaryKeyBase, TimeStampedModel):
name = models.TextField(max_length=1024, null=False, blank=False)
Expand Down
5 changes: 5 additions & 0 deletions django_app/redbox_app/redbox_core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from django.http import HttpRequest, HttpResponse, JsonResponse
from django.shortcuts import redirect, render
from django.urls import reverse
from django.utils import timezone
from django.utils.datastructures import MultiValueDictKeyError
from django.views.decorators.http import require_http_methods
from redbox_app.redbox_core.client import CoreApiClient
Expand Down Expand Up @@ -215,6 +216,10 @@ def post_message(request: HttpRequest) -> HttpResponse:
files: list[File] = File.objects.filter(core_file_uuid__in=doc_uuids, user=request.user)
llm_message.source_files.set(files)

for file in files:
252afh marked this conversation as resolved.
Show resolved Hide resolved
file.last_referenced = timezone.now()
file.save()

return redirect(reverse(chats_view, args=(session.id,)))


Expand Down
35 changes: 22 additions & 13 deletions django_app/tests/test_models.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,30 @@
from datetime import UTC, datetime, timedelta
from datetime import datetime, timedelta, timezone

import pytest
from django.conf import settings
from redbox_app.redbox_core.models import File
from django.core.files.uploadedfile import SimpleUploadedFile
from redbox_app.redbox_core.models import File, ProcessingStatusEnum


@pytest.mark.django_db
def test_file_model_expiry_date(uploaded_file: File):
# Tests the initial value of the expiry_date
expected_expiry_date = uploaded_file.created_at + timedelta(seconds=settings.FILE_EXPIRY_IN_SECONDS)
# The expiry_date should be FILE_EXPIRY_IN_SECONDS ahead of created_at
def test_file_model_last_referenced(peter_rabbit, s3_client): # noqa: ARG001
mock_file = SimpleUploadedFile("test.txt", b"these are the file contents")

new_file = File.objects.create(
processing_status=ProcessingStatusEnum.uploaded,
original_file=mock_file,
user=peter_rabbit,
original_file_name="test.txt",
)

# Tests the initial value of the last_referenced
expected_last_referenced = new_file.created_at
# The last_referenced should be FILE_EXPIRY_IN_SECONDS ahead of created_at
# these fields are set during the same save() process
# this test accounts for a delay between creating the fields
assert abs(uploaded_file.expiry_date - expected_expiry_date) < timedelta(seconds=1)
assert abs(new_file.last_referenced - expected_last_referenced) < timedelta(seconds=1)

# Tests that the expiry_date can be updated
new_date = datetime(2028, 1, 1, tzinfo=UTC)
uploaded_file.expiry_date = new_date
uploaded_file.save()
assert uploaded_file.expiry_date == new_date
# Tests that the last_referenced field can be updated
new_date = datetime(2028, 1, 1, tzinfo=timezone.utc)
new_file.last_referenced = new_date
new_file.save()
assert new_file.last_referenced == new_date
14 changes: 12 additions & 2 deletions django_app/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,16 +244,22 @@ def test_post_message_to_new_session(alice: User, client: Client, requests_mock:


@pytest.mark.django_db
def test_post_message_to_existing_session(chat_history: ChatHistory, client: Client, requests_mock: Mocker):
def test_post_message_to_existing_session(
chat_history: ChatHistory, client: Client, requests_mock: Mocker, uploaded_file: File
):
# Given
client.force_login(chat_history.users)
session_id = chat_history.id
rag_url = f"http://{settings.CORE_API_HOST}:{settings.CORE_API_PORT}/chat/rag"
requests_mock.register_uri(
"POST",
rag_url,
json={"output_text": "Good afternoon, Mr. Amor.", "source_documents": []},
json={
"output_text": "Good afternoon, Mr. Amor.",
"source_documents": [{"file_uuid": str(uploaded_file.core_file_uuid)}],
},
)
initial_file_expiry_date = File.objects.get(core_file_uuid=uploaded_file.core_file_uuid).expiry_date

# When
response = client.post("/post-message/", {"message": "Are you there?", "session-id": session_id})
Expand All @@ -264,6 +270,10 @@ def test_post_message_to_existing_session(chat_history: ChatHistory, client: Cli
assert (
ChatMessage.objects.get(chat_history__id=session_id, role=ChatRoleEnum.ai).text == "Good afternoon, Mr. Amor."
)
assert (
ChatMessage.objects.get(chat_history__id=session_id, role=ChatRoleEnum.ai).source_files.first() == uploaded_file
)
assert initial_file_expiry_date != File.objects.get(core_file_uuid=uploaded_file.core_file_uuid).expiry_date
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make this more utilitarian by adding a last_used date, so it can be used for more than expiry?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, good idea :)



@pytest.mark.django_db
Expand Down
Loading