Skip to content

Commit

Permalink
Prototype of set primary key, refs #1
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 6, 2023
1 parent c24aa8e commit 1f1a4f0
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 7 deletions.
69 changes: 62 additions & 7 deletions datasette_edit_schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from datasette.utils import sqlite3
from urllib.parse import quote_plus, unquote_plus
import sqlite_utils
from .utils import get_primary_keys, potential_foreign_keys
from .utils import get_primary_keys, potential_foreign_keys, potential_primary_keys

# Don't attempt to detect foreign keys on tables larger than this:
FOREIGN_KEY_DETECTION_LIMIT = 10_000
Expand Down Expand Up @@ -221,8 +221,11 @@ def transform_the_table(conn):
return await update_foreign_keys(
request, datasette, database, table, formdata
)

if "delete_table" in formdata:
elif formdata.get("action") == "update_primary_key":
return await update_primary_key(
request, datasette, database, table, formdata
)
elif "delete_table" in formdata:
return await delete_table(request, datasette, database, table)
elif "add_column" in formdata:
return await add_column(request, datasette, database, table, formdata)
Expand All @@ -231,17 +234,17 @@ def transform_the_table(conn):
else:
return Response.html("Unknown operation", status=400)

def get_columns_and_schema_and_fks(conn):
def get_columns_and_schema_and_fks_and_pks(conn):
t = sqlite_utils.Database(conn)[table]
pks = set(t.pks)
columns = [
{"name": column, "type": dtype, "is_pk": column in pks}
for column, dtype in t.columns_dict.items()
]
return columns, t.schema, t.foreign_keys
return columns, t.schema, t.foreign_keys, t.pks

columns, schema, foreign_keys = await database.execute_fn(
get_columns_and_schema_and_fks
columns, schema, foreign_keys, pks = await database.execute_fn(
get_columns_and_schema_and_fks_and_pks
)
foreign_keys_by_column = {}
for fk in foreign_keys:
Expand Down Expand Up @@ -281,6 +284,13 @@ def get_columns_and_schema_and_fks(conn):
for column in columns
if not column["is_pk"]
]

# Anything not a float or an existing PK could be the next PK, but
# for smaller tables we cut those down to just unique columns
potential_pks = [
c["name"] for c in columns if c["type"] is not float and not c["is_pk"]
]
potential_fks = []
# Only scan for potential foreign keys if there are less than 10,000
# rows - since execute_fn() does not yet support time limits
if (
Expand All @@ -300,6 +310,13 @@ def get_columns_and_schema_and_fks(conn):
)
for info in column_foreign_keys:
info["suggestions"] = potential_fks.get(info["name"], [])
# Now do potential primary keys against non-float columns
non_float_columns = [
c["name"] for c in columns if c["type"] is not float and not c["is_pk"]
]
potential_pks = await database.execute_fn(
lambda conn: potential_primary_keys(conn, table, non_float_columns)
)

# Add 'options' to those
for info in column_foreign_keys:
Expand Down Expand Up @@ -368,6 +385,9 @@ def get_columns_and_schema_and_fks(conn):
],
"foreign_keys": foreign_keys,
"column_foreign_keys": column_foreign_keys,
"potential_pks": potential_pks,
"is_rowid_table": bool(pks == ["rowid"]),
"current_pk": pks[0] if len(pks) == 1 else None,
},
request=request,
)
Expand Down Expand Up @@ -516,3 +536,38 @@ def run(conn):
message,
)
return Response.redirect(request.path)


async def update_primary_key(request, datasette, database, table, formdata):
primary_key = formdata["primary_key"]
if not primary_key:
datasette.add_message(request, "Primary key is required", datasette.ERROR)
return Response.redirect(request.path)

def run(conn):
db = sqlite_utils.Database(conn)
with conn:
if primary_key not in db[table].columns_dict:
return "Column '{}' does not exist".format(primary_key)
# Make sure it's unique
sql = 'select count(*) - count(distinct("{}")) from "{}"'.format(
primary_key, table
)
should_be_zero = db.execute(sql).fetchone()[0]
if should_be_zero:
return "Column '{}' is not unique".format(primary_key)
db[table].transform(pk=primary_key)
return None

error = await database.execute_write_fn(run, block=True)
if error:
datasette.add_message(request, error, datasette.ERROR)
else:
datasette.add_message(
request,
"Primary key for '{}' is now '{}'".format(
table,
formdata["primary_key"],
),
)
return Response.redirect(request.path)
21 changes: 21 additions & 0 deletions datasette_edit_schema/templates/edit_schema_table.html
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,27 @@ <h2>Update foreign key relationships</h2>
<input type="submit" value="Update foreign keys">
</form>

{% if potential_pks %}
<h2>{% if is_rowid_table %}Set a primary key{% else %}Change the primary key{% endif %}</h2>

<p>The primary key column uniquely identifies each row in the table.</p>

<form action="{{ base_url }}-/edit-schema/{{ database.name|quote_plus }}/{{ table|quote_plus }}" method="post">
<input type="hidden" name="csrftoken" value="{{ csrftoken() }}">
<input type="hidden" name="action" value="update_primary_key">
<label for="primary_key">Primary key column</label>
<select id="primary_key" name="primary_key">
{% if current_pk %}
<option selected="selected" value="{{ current_pk }}">{{ current_pk }} (current)</option>
{% endif %}
{% for pk in potential_pks %}
<option>{{ pk }}</option>
{% endfor %}
</select>
<p><input type="submit" value="Set primary key"></p>
</form>
{% endif %}

<h2>Delete table</h2>

<form action="{{ base_url }}-/edit-schema/{{ database.name|quote_plus }}/{{ table|quote_plus }}" method="post">
Expand Down
41 changes: 41 additions & 0 deletions datasette_edit_schema/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def potential_foreign_keys(conn, table_name, columns, other_table_pks):
for column in columns:
potentials[column] = []
for other_table, other_column, _ in other_table_pks:
# Search for a value in this column that does not exist in the other table,
# terminate early as soon as we find one since that shows this is not a
# good foreign key candidate.
query = """
select "{table}"."{column}"
from "{table}"
Expand All @@ -45,3 +48,41 @@ def potential_foreign_keys(conn, table_name, columns, other_table_pks):
if cursor.fetchone() is None:
potentials[column].append((other_table, other_column))
return potentials


def potential_primary_keys(conn, table_name, columns, max_string_len=128):
# First we run a query to check the max length of each column + if it has any nulls
selects = []
for column in columns:
selects.append("max(length(\"{}\")) as 'maxlen.{}'".format(column, column))
selects.append(
"sum(case when \"{}\" is null then 1 else 0 end) as 'nulls.{}'".format(
column, column
)
)
sql = 'select {} from "{}"'.format(", ".join(selects), table_name)
cursor = conn.cursor()
cursor.execute(sql)
row = cursor.fetchone()
potential_columns = []
for i, column in enumerate(columns):
maxlen = row[i * 2]
nulls = row[i * 2 + 1]
if maxlen < max_string_len and nulls == 0:
potential_columns.append(column)
if not potential_columns:
return []
# Count distinct values in each of our candidate columns
selects = ["count(*) as _count"]
for column in potential_columns:
selects.append("count(distinct \"{}\") as 'distinct.{}'".format(column, column))
sql = 'select {} from "{}"'.format(", ".join(selects), table_name)
cursor.execute(sql)
row = cursor.fetchone()
count = row[0]
potential_pks = []
for i, column in enumerate(potential_columns):
distinct = row[i + 1]
if distinct == count:
potential_pks.append(column)
return potential_pks

0 comments on commit 1f1a4f0

Please sign in to comment.