From 1f1a4f002d24c67a75f837c1955518c224343e67 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 6 Sep 2023 16:25:16 -0700 Subject: [PATCH] Prototype of set primary key, refs #1 --- datasette_edit_schema/__init__.py | 69 +++++++++++++++++-- .../templates/edit_schema_table.html | 21 ++++++ datasette_edit_schema/utils.py | 41 +++++++++++ 3 files changed, 124 insertions(+), 7 deletions(-) diff --git a/datasette_edit_schema/__init__.py b/datasette_edit_schema/__init__.py index dda1627..f57c7bf 100644 --- a/datasette_edit_schema/__init__.py +++ b/datasette_edit_schema/__init__.py @@ -3,7 +3,7 @@ from datasette.utils import sqlite3 from urllib.parse import quote_plus, unquote_plus import sqlite_utils -from .utils import get_primary_keys, potential_foreign_keys +from .utils import get_primary_keys, potential_foreign_keys, potential_primary_keys # Don't attempt to detect foreign keys on tables larger than this: FOREIGN_KEY_DETECTION_LIMIT = 10_000 @@ -221,8 +221,11 @@ def transform_the_table(conn): return await update_foreign_keys( request, datasette, database, table, formdata ) - - if "delete_table" in formdata: + elif formdata.get("action") == "update_primary_key": + return await update_primary_key( + request, datasette, database, table, formdata + ) + elif "delete_table" in formdata: return await delete_table(request, datasette, database, table) elif "add_column" in formdata: return await add_column(request, datasette, database, table, formdata) @@ -231,17 +234,17 @@ def transform_the_table(conn): else: return Response.html("Unknown operation", status=400) - def get_columns_and_schema_and_fks(conn): + def get_columns_and_schema_and_fks_and_pks(conn): t = sqlite_utils.Database(conn)[table] pks = set(t.pks) columns = [ {"name": column, "type": dtype, "is_pk": column in pks} for column, dtype in t.columns_dict.items() ] - return columns, t.schema, t.foreign_keys + return columns, t.schema, t.foreign_keys, t.pks - columns, schema, foreign_keys = await database.execute_fn( - get_columns_and_schema_and_fks + columns, schema, foreign_keys, pks = await database.execute_fn( + get_columns_and_schema_and_fks_and_pks ) foreign_keys_by_column = {} for fk in foreign_keys: @@ -281,6 +284,13 @@ def get_columns_and_schema_and_fks(conn): for column in columns if not column["is_pk"] ] + + # Anything not a float or an existing PK could be the next PK, but + # for smaller tables we cut those down to just unique columns + potential_pks = [ + c["name"] for c in columns if c["type"] is not float and not c["is_pk"] + ] + potential_fks = [] # Only scan for potential foreign keys if there are less than 10,000 # rows - since execute_fn() does not yet support time limits if ( @@ -300,6 +310,13 @@ def get_columns_and_schema_and_fks(conn): ) for info in column_foreign_keys: info["suggestions"] = potential_fks.get(info["name"], []) + # Now do potential primary keys against non-float columns + non_float_columns = [ + c["name"] for c in columns if c["type"] is not float and not c["is_pk"] + ] + potential_pks = await database.execute_fn( + lambda conn: potential_primary_keys(conn, table, non_float_columns) + ) # Add 'options' to those for info in column_foreign_keys: @@ -368,6 +385,9 @@ def get_columns_and_schema_and_fks(conn): ], "foreign_keys": foreign_keys, "column_foreign_keys": column_foreign_keys, + "potential_pks": potential_pks, + "is_rowid_table": bool(pks == ["rowid"]), + "current_pk": pks[0] if len(pks) == 1 else None, }, request=request, ) @@ -516,3 +536,38 @@ def run(conn): message, ) return Response.redirect(request.path) + + +async def update_primary_key(request, datasette, database, table, formdata): + primary_key = formdata["primary_key"] + if not primary_key: + datasette.add_message(request, "Primary key is required", datasette.ERROR) + return Response.redirect(request.path) + + def run(conn): + db = sqlite_utils.Database(conn) + with conn: + if primary_key not in db[table].columns_dict: + return "Column '{}' does not exist".format(primary_key) + # Make sure it's unique + sql = 'select count(*) - count(distinct("{}")) from "{}"'.format( + primary_key, table + ) + should_be_zero = db.execute(sql).fetchone()[0] + if should_be_zero: + return "Column '{}' is not unique".format(primary_key) + db[table].transform(pk=primary_key) + return None + + error = await database.execute_write_fn(run, block=True) + if error: + datasette.add_message(request, error, datasette.ERROR) + else: + datasette.add_message( + request, + "Primary key for '{}' is now '{}'".format( + table, + formdata["primary_key"], + ), + ) + return Response.redirect(request.path) diff --git a/datasette_edit_schema/templates/edit_schema_table.html b/datasette_edit_schema/templates/edit_schema_table.html index e5f09af..3b7d73d 100644 --- a/datasette_edit_schema/templates/edit_schema_table.html +++ b/datasette_edit_schema/templates/edit_schema_table.html @@ -148,6 +148,27 @@

Update foreign key relationships

+{% if potential_pks %} +

{% if is_rowid_table %}Set a primary key{% else %}Change the primary key{% endif %}

+ +

The primary key column uniquely identifies each row in the table.

+ +
+ + + + +

+
+{% endif %} +

Delete table

diff --git a/datasette_edit_schema/utils.py b/datasette_edit_schema/utils.py index 648c43d..7c9fb74 100644 --- a/datasette_edit_schema/utils.py +++ b/datasette_edit_schema/utils.py @@ -26,6 +26,9 @@ def potential_foreign_keys(conn, table_name, columns, other_table_pks): for column in columns: potentials[column] = [] for other_table, other_column, _ in other_table_pks: + # Search for a value in this column that does not exist in the other table, + # terminate early as soon as we find one since that shows this is not a + # good foreign key candidate. query = """ select "{table}"."{column}" from "{table}" @@ -45,3 +48,41 @@ def potential_foreign_keys(conn, table_name, columns, other_table_pks): if cursor.fetchone() is None: potentials[column].append((other_table, other_column)) return potentials + + +def potential_primary_keys(conn, table_name, columns, max_string_len=128): + # First we run a query to check the max length of each column + if it has any nulls + selects = [] + for column in columns: + selects.append("max(length(\"{}\")) as 'maxlen.{}'".format(column, column)) + selects.append( + "sum(case when \"{}\" is null then 1 else 0 end) as 'nulls.{}'".format( + column, column + ) + ) + sql = 'select {} from "{}"'.format(", ".join(selects), table_name) + cursor = conn.cursor() + cursor.execute(sql) + row = cursor.fetchone() + potential_columns = [] + for i, column in enumerate(columns): + maxlen = row[i * 2] + nulls = row[i * 2 + 1] + if maxlen < max_string_len and nulls == 0: + potential_columns.append(column) + if not potential_columns: + return [] + # Count distinct values in each of our candidate columns + selects = ["count(*) as _count"] + for column in potential_columns: + selects.append("count(distinct \"{}\") as 'distinct.{}'".format(column, column)) + sql = 'select {} from "{}"'.format(", ".join(selects), table_name) + cursor.execute(sql) + row = cursor.fetchone() + count = row[0] + potential_pks = [] + for i, column in enumerate(potential_columns): + distinct = row[i + 1] + if distinct == count: + potential_pks.append(column) + return potential_pks