Skip to content

Commit

Permalink
added code to remove duplicates first
Browse files Browse the repository at this point in the history
  • Loading branch information
gecBurton committed Jan 15, 2025
1 parent 8473b0b commit 97943a8
Showing 1 changed file with 18 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
# Generated by Django 5.1.3 on 2025-01-07 13:37
from collections import defaultdict

from django.db import migrations, models

def remove_duplicate_citations(apps, schema_editor):
Citation = apps.get_model("redbox_core", "Citation")

duplicates = defaultdict(list)
for citation in Citation.objects.all():
duplicates[(citation.file_id, citation.chat_message_id)].append(citation.id)


# Step 2: Remove duplicates
for ids in duplicates.values():
if len(ids) > 1:
# Keep the first, delete the rest
# ids[0] is retained, ids[1:] are deleted
Citation.objects.filter(id__in=ids[1:]).delete()


def back_populate_new_source_files(apps, schema_editor):
ChatMessage = apps.get_model("redbox_core", "ChatMessage")
for chat_message in ChatMessage.objects.all():
Expand All @@ -16,6 +33,7 @@ class Migration(migrations.Migration):
]

operations = [
migrations.RunPython(remove_duplicate_citations, migrations.RunPython.noop),
migrations.AddField(
model_name='chatmessage',
name='new_source_files',
Expand Down

0 comments on commit 97943a8

Please sign in to comment.