-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Redirect recipes during indexing when an earlier-known-origin-URL is …
…discovered (#84)
- Loading branch information
1 parent
db1c0e1
commit 03e19f2
Showing
7 changed files
with
130 additions
and
2 deletions.
There are no files selected for viewing
59 changes: 59 additions & 0 deletions
59
migrations/versions/0ed6bcd27647_add_hash_ids_to_url_models.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
"""Add hash-ids to URL models | ||
Revision ID: 0ed6bcd27647 | ||
Revises: a34cbcbedf7c | ||
Create Date: 2023-12-12 18:26:21.409576 | ||
""" | ||
from alembic import op | ||
from pymmh3 import hash_bytes | ||
import sqlalchemy as sa | ||
|
||
from reciperadar.models.url import BaseURL | ||
|
||
|
||
# revision identifiers, used by Alembic. | ||
revision = '0ed6bcd27647' | ||
down_revision = 'a34cbcbedf7c' | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
with op.batch_alter_table('crawl_urls', schema=None) as batch_op: | ||
batch_op.add_column(sa.Column('id', sa.String(), nullable=True)) | ||
|
||
with op.batch_alter_table('recipe_urls', schema=None) as batch_op: | ||
batch_op.add_column(sa.Column('id', sa.String(), nullable=True)) | ||
|
||
# ### end Alembic commands ### | ||
|
||
connection = op.get_bind() | ||
|
||
records = connection.execute(sa.text('SELECT url FROM crawl_urls')) | ||
for (url,) in records.fetchall(): | ||
generated_id = BaseURL.generate_id(hash_bytes(url).encode("utf-8")) | ||
connection.execute( | ||
sa.text('UPDATE crawl_urls SET id = :generated_id WHERE url = :url'), | ||
parameters={"generated_id": generated_id, "url": url}, | ||
) | ||
|
||
records = connection.execute(sa.text('SELECT url FROM recipe_urls')) | ||
for (url,) in records.fetchall(): | ||
generated_id = BaseURL.generate_id(hash_bytes(url).encode("utf-8")) | ||
connection.execute( | ||
sa.text('UPDATE recipe_urls SET id = :generated_id WHERE url = :url'), | ||
parameters={"generated_id": generated_id, "url": url}, | ||
) | ||
|
||
|
||
def downgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
with op.batch_alter_table('recipe_urls', schema=None) as batch_op: | ||
batch_op.drop_column('id') | ||
|
||
with op.batch_alter_table('crawl_urls', schema=None) as batch_op: | ||
batch_op.drop_column('id') | ||
|
||
# ### end Alembic commands ### |
50 changes: 50 additions & 0 deletions
50
migrations/versions/330fe0f5e304_make_url_hash_id_required_and_indexed.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
"""Make URL hash-id required and indexed | ||
Revision ID: 330fe0f5e304 | ||
Revises: 0ed6bcd27647 | ||
Create Date: 2023-12-12 18:41:13.134572 | ||
""" | ||
from alembic import op | ||
import sqlalchemy as sa | ||
|
||
|
||
# revision identifiers, used by Alembic. | ||
revision = '330fe0f5e304' | ||
down_revision = '0ed6bcd27647' | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
with op.batch_alter_table('crawl_urls', schema=None) as batch_op: | ||
batch_op.alter_column('id', | ||
existing_type=sa.VARCHAR(), | ||
nullable=False) | ||
batch_op.create_index(batch_op.f('ix_crawl_urls_id'), ['id'], unique=False) | ||
|
||
with op.batch_alter_table('recipe_urls', schema=None) as batch_op: | ||
batch_op.alter_column('id', | ||
existing_type=sa.VARCHAR(), | ||
nullable=False) | ||
batch_op.create_index(batch_op.f('ix_recipe_urls_id'), ['id'], unique=False) | ||
|
||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
with op.batch_alter_table('recipe_urls', schema=None) as batch_op: | ||
batch_op.drop_index(batch_op.f('ix_recipe_urls_id')) | ||
batch_op.alter_column('id', | ||
existing_type=sa.VARCHAR(), | ||
nullable=True) | ||
|
||
with op.batch_alter_table('crawl_urls', schema=None) as batch_op: | ||
batch_op.drop_index(batch_op.f('ix_crawl_urls_id')) | ||
batch_op.alter_column('id', | ||
existing_type=sa.VARCHAR(), | ||
nullable=True) | ||
|
||
# ### end Alembic commands ### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters