Skip to content

Commit

Permalink
feat(anonymisation): POC and command for mission table
Browse files Browse the repository at this point in the history
  • Loading branch information
gaspard-lonchampt committed Nov 13, 2024
1 parent be18158 commit c6c5151
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 1 deletion.
23 changes: 22 additions & 1 deletion app/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,6 @@ def sync_brevo_command(pipeline_names, verbose):
"""
Command to sync companies between the database and Brevo.
You can specify one or more pipeline names as arguments.
Example usage: flask sync_brevo "Test Dev churn"
"""
from app.services.sync_companies_with_brevo import (
sync_companies_with_brevo,
Expand All @@ -356,3 +355,25 @@ def sync_brevo_command(pipeline_names, verbose):
sync_companies_with_brevo(brevo, list(pipeline_names), verbose=verbose)

app.logger.info("Process sync companies with Brevo done")


@app.cli.command("migrate_anonymize_mission", with_appcontext=True)
@click.argument("time_interval")
def migrate_anonymize_mission(time_interval):
"""
Migrate data to anonymized tables.
You can specify time interval as arguments.
"""
from app.services.anonymize_tables import migrate_anonymize_mission

if not time_interval:
print(
"Please provide time interval e.g. : '< now() - interval '1 year' "
)
return

app.logger.info("Process with data migration and anonymization began")

migrate_anonymize_mission(time_interval)

app.logger.info("Process with data migration and anonymization done")
1 change: 1 addition & 0 deletions app/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@
from .scenario_testing import ScenarioTesting
from .user_survey_actions import UserSurveyActions
from .user_agreement import UserAgreement
from .anonymized.mission import MissionAnonymized
19 changes: 19 additions & 0 deletions app/models/anonymized/mission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from app import db
from app.models.mission import Mission


class MissionAnonymized(Mission):
backref_base_name = "mission_anonymized"
__mapper_args__ = {"concrete": True}

id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.String, nullable=True)
submitter_id = db.Column(db.Integer, nullable=True)
company_id = db.Column(db.Integer, nullable=True)
vehicle_id = db.Column(db.Integer, nullable=True)
creation_time = db.Column(db.DateTime, nullable=True)
reception_time = db.Column(db.DateTime, nullable=True)
context = db.Column(db.JSON, nullable=True)

def __repr__(self):
return f"<MissionAnonymized(id={self.id}, name={self.name}, creation_time={self.creation_time})>"
82 changes: 82 additions & 0 deletions app/services/anonymize_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import csv
from io import StringIO
from sqlalchemy import text
from app import db
import json


def migrate_anonymize_mission(interval: str):
interval = interval.replace('"', "")
select_query = f"""
SELECT
id,
anon.fake_last_name() AS name,
NULL AS submitter_id,
NULL AS company_id,
vehicle_id,
date_trunc('month', creation_time) AS creation_time,
date_trunc('month', reception_time) AS reception_time,
context::jsonb AS context -- Convertir context en JSON valide
FROM mission
WHERE creation_time {interval};
"""

try:
with db.session.begin_nested():

result = db.session.execute(text(select_query))
rows = result.fetchall()

if not rows:
print("No data to migrate.")
return

csv_buffer = StringIO()
csv_writer = csv.writer(csv_buffer)

for row in rows:
row_as_list = list(row)

if isinstance(row_as_list[-1], dict):
row_as_list[-1] = json.dumps(row_as_list[-1])

csv_writer.writerow(row_as_list)

csv_buffer.seek(0)

engine = db.get_engine()
connection = engine.raw_connection()

try:
cursor = connection.cursor()
cursor.copy_expert(
"""
COPY mission_anonymized (id, name, submitter_id, company_id, vehicle_id, creation_time, reception_time, context)
FROM STDIN WITH (FORMAT CSV)
""",
csv_buffer,
)

delete_query = f"""
DELETE FROM mission WHERE creation_time {interval};
"""
db.session.execute(text(delete_query))

print("Anonymized data migration successful.")

except Exception as e:
connection.rollback()
print(f"Error when copying mass data: {e}")
raise

finally:
cursor.close()
connection.close()
csv_buffer.close()

except Exception as e:
db.session.rollback()
print(f"Transaction failed, rolling back changes: {e}")

finally:
db.session.close()
39 changes: 39 additions & 0 deletions migrations/versions/c8870f7b9399_add_mission_anonymized_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Add mission_anonymized table
Revision ID: c8870f7b9399
Revises: bd643a8d5269
Create Date: 2024-11-07 12:34:16.339041
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "c8870f7b9399"
down_revision = "bd643a8d5269"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"mission_anonymized",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("name", sa.String(), nullable=True),
sa.Column("submitter_id", sa.Integer(), nullable=True),
sa.Column("company_id", sa.Integer(), nullable=True),
sa.Column("vehicle_id", sa.Integer(), nullable=True),
sa.Column("creation_time", sa.DateTime(), nullable=True),
sa.Column("reception_time", sa.DateTime(), nullable=True),
sa.Column("context", sa.JSON(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table("mission_anonymized")
# ### end Alembic commands ###

0 comments on commit c6c5151

Please sign in to comment.