From c6c5151bb123f9152d6bbcc92912d8c2ea9ed7da Mon Sep 17 00:00:00 2001 From: Gaspard Lonchampt Date: Tue, 12 Nov 2024 17:08:19 +0100 Subject: [PATCH] feat(anonymisation): POC and command for mission table --- app/commands.py | 23 +++++- app/models/__init__.py | 1 + app/models/anonymized/mission.py | 19 +++++ app/services/anonymize_tables.py | 82 +++++++++++++++++++ ...870f7b9399_add_mission_anonymized_table.py | 39 +++++++++ 5 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 app/models/anonymized/mission.py create mode 100644 app/services/anonymize_tables.py create mode 100644 migrations/versions/c8870f7b9399_add_mission_anonymized_table.py diff --git a/app/commands.py b/app/commands.py index 41d4a596..0c098fdd 100644 --- a/app/commands.py +++ b/app/commands.py @@ -336,7 +336,6 @@ def sync_brevo_command(pipeline_names, verbose): """ Command to sync companies between the database and Brevo. You can specify one or more pipeline names as arguments. - Example usage: flask sync_brevo "Test Dev churn" """ from app.services.sync_companies_with_brevo import ( sync_companies_with_brevo, @@ -356,3 +355,25 @@ def sync_brevo_command(pipeline_names, verbose): sync_companies_with_brevo(brevo, list(pipeline_names), verbose=verbose) app.logger.info("Process sync companies with Brevo done") + + +@app.cli.command("migrate_anonymize_mission", with_appcontext=True) +@click.argument("time_interval") +def migrate_anonymize_mission(time_interval): + """ + Migrate data to anonymized tables. + You can specify time interval as arguments. + """ + from app.services.anonymize_tables import migrate_anonymize_mission + + if not time_interval: + print( + "Please provide time interval e.g. : '< now() - interval '1 year' " + ) + return + + app.logger.info("Process with data migration and anonymization began") + + migrate_anonymize_mission(time_interval) + + app.logger.info("Process with data migration and anonymization done") diff --git a/app/models/__init__.py b/app/models/__init__.py index 27985818..c982f813 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -30,3 +30,4 @@ from .scenario_testing import ScenarioTesting from .user_survey_actions import UserSurveyActions from .user_agreement import UserAgreement +from .anonymized.mission import MissionAnonymized diff --git a/app/models/anonymized/mission.py b/app/models/anonymized/mission.py new file mode 100644 index 00000000..53fb11fa --- /dev/null +++ b/app/models/anonymized/mission.py @@ -0,0 +1,19 @@ +from app import db +from app.models.mission import Mission + + +class MissionAnonymized(Mission): + backref_base_name = "mission_anonymized" + __mapper_args__ = {"concrete": True} + + id = db.Column(db.Integer, primary_key=True) + name = db.Column(db.String, nullable=True) + submitter_id = db.Column(db.Integer, nullable=True) + company_id = db.Column(db.Integer, nullable=True) + vehicle_id = db.Column(db.Integer, nullable=True) + creation_time = db.Column(db.DateTime, nullable=True) + reception_time = db.Column(db.DateTime, nullable=True) + context = db.Column(db.JSON, nullable=True) + + def __repr__(self): + return f"" diff --git a/app/services/anonymize_tables.py b/app/services/anonymize_tables.py new file mode 100644 index 00000000..9f4f4506 --- /dev/null +++ b/app/services/anonymize_tables.py @@ -0,0 +1,82 @@ +import csv +from io import StringIO +from sqlalchemy import text +from app import db +import json + + +def migrate_anonymize_mission(interval: str): + interval = interval.replace('"', "") + select_query = f""" + SELECT + id, + anon.fake_last_name() AS name, + NULL AS submitter_id, + NULL AS company_id, + vehicle_id, + date_trunc('month', creation_time) AS creation_time, + date_trunc('month', reception_time) AS reception_time, + context::jsonb AS context -- Convertir context en JSON valide + FROM mission + WHERE creation_time {interval}; + """ + + try: + with db.session.begin_nested(): + + result = db.session.execute(text(select_query)) + rows = result.fetchall() + + if not rows: + print("No data to migrate.") + return + + csv_buffer = StringIO() + csv_writer = csv.writer(csv_buffer) + + for row in rows: + row_as_list = list(row) + + if isinstance(row_as_list[-1], dict): + row_as_list[-1] = json.dumps(row_as_list[-1]) + + csv_writer.writerow(row_as_list) + + csv_buffer.seek(0) + + engine = db.get_engine() + connection = engine.raw_connection() + + try: + cursor = connection.cursor() + cursor.copy_expert( + """ + COPY mission_anonymized (id, name, submitter_id, company_id, vehicle_id, creation_time, reception_time, context) + FROM STDIN WITH (FORMAT CSV) + """, + csv_buffer, + ) + + delete_query = f""" + DELETE FROM mission WHERE creation_time {interval}; + """ + db.session.execute(text(delete_query)) + + print("Anonymized data migration successful.") + + except Exception as e: + connection.rollback() + print(f"Error when copying mass data: {e}") + raise + + finally: + cursor.close() + connection.close() + csv_buffer.close() + + except Exception as e: + db.session.rollback() + print(f"Transaction failed, rolling back changes: {e}") + + finally: + db.session.close() diff --git a/migrations/versions/c8870f7b9399_add_mission_anonymized_table.py b/migrations/versions/c8870f7b9399_add_mission_anonymized_table.py new file mode 100644 index 00000000..859d73c4 --- /dev/null +++ b/migrations/versions/c8870f7b9399_add_mission_anonymized_table.py @@ -0,0 +1,39 @@ +"""Add mission_anonymized table + +Revision ID: c8870f7b9399 +Revises: bd643a8d5269 +Create Date: 2024-11-07 12:34:16.339041 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "c8870f7b9399" +down_revision = "bd643a8d5269" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "mission_anonymized", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(), nullable=True), + sa.Column("submitter_id", sa.Integer(), nullable=True), + sa.Column("company_id", sa.Integer(), nullable=True), + sa.Column("vehicle_id", sa.Integer(), nullable=True), + sa.Column("creation_time", sa.DateTime(), nullable=True), + sa.Column("reception_time", sa.DateTime(), nullable=True), + sa.Column("context", sa.JSON(), nullable=True), + sa.PrimaryKeyConstraint("id"), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("mission_anonymized") + # ### end Alembic commands ###