Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better Mac support. Better support for Trino and UUIDs #28

Merged
merged 6 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Build the stack
run: docker-compose up -d mysql postgres trino clickhouse vertica
run: docker compose up -d mysql postgres trino clickhouse vertica

- name: Install Poetry
run: pip install poetry
Expand Down Expand Up @@ -74,7 +74,7 @@ jobs:
env:
# SNOWFLAKE_URI: '${{ secrets.SNOWFLAKE_URI }}'
# PRESTO_URI: '${{ secrets.PRESTO_URI }}'
TRINO_URI: '${{ secrets.TRINO_URI }}'
TRINO_URI: 'trino://[email protected]:8081/postgresql/public'
# BIGQUERY_URI: '${{ secrets.BIGQUERY_URI }}'
CLICKHOUSE_URI: 'clickhouse://clickhouse:Password1@localhost:9000/clickhouse'
VERTICA_URI: 'vertica://vertica:Password1@localhost:5433/vertica'
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/ci_full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: Build the stack
run: docker-compose up -d mysql postgres trino vertica # presto clickhouse
run: docker compose up -d mysql postgres trino vertica # presto clickhouse

- name: Install Poetry
run: pip install poetry
Expand All @@ -66,6 +66,7 @@ jobs:

- name: Run unit tests
env:
TRINO_URI: 'trino://[email protected]:8081/postgresql/public'
SNOWFLAKE_URI: '${{ secrets.SNOWFLAKE_URI }}'
# PRESTO_URI: '${{ secrets.PRESTO_URI }}'
# CLICKHOUSE_URI: 'clickhouse://clickhouse:Password1@localhost:9000/clickhouse'
Expand Down
438 changes: 266 additions & 172 deletions poetry.lock

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ dsnparse = "*"
click = ">=8.1"
rich = "*"
toml = ">=0.10.2"
sqeleton = ">=0.1.5"
sqeleton = "^0.1.6"
mysql-connector-python = {version=">=8.0.29", optional=true}
psycopg2 = {version="*", optional=true}
psycopg2-binary = {version="*", optional=true}
snowflake-connector-python = {version=">=2.7.2", optional=true}
cryptography = {version="*", optional=true}
trino = {version=">=0.314.0", optional=true}
Expand All @@ -45,7 +45,7 @@ parameterized = "*"
unittest-parallel = "*"
# preql = ">=0.2.19"
mysql-connector-python = "*"
psycopg2 = "*"
psycopg2-binary = "*"
snowflake-connector-python = ">=2.7.2"
cryptography = "*"
trino = ">=0.314.0"
Expand All @@ -60,7 +60,7 @@ duckdb = ">=0.6.0"
# When adding, update also: README + dev deps just above
preql = ["preql"]
mysql = ["mysql-connector-python"]
postgresql = ["psycopg2"]
postgresql = ["psycopg2-binary"]
snowflake = ["snowflake-connector-python", "cryptography"]
presto = ["presto-python-client"]
oracle = ["cx_Oracle"]
Expand All @@ -70,7 +70,7 @@ clickhouse = ["clickhouse-driver"]
vertica = ["vertica-python"]
duckdb = ["duckdb"]

all = ["mysql-connector-python", "psycopg2", "snowflake-connector-python", "cryptography", "presto-python-client", "cx_Oracle", "trino", "clickhouse-driver", "vertica-python", "duckdb"]
all = ["mysql-connector-python", "psycopg2-binary", "snowflake-connector-python", "cryptography", "presto-python-client", "cx_Oracle", "trino", "clickhouse-driver", "vertica-python", "duckdb"]

[build-system]
requires = ["poetry-core>=1.0.0"]
Expand Down
14 changes: 14 additions & 0 deletions reladiff/table_segment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .utils import safezip, Vector
from sqeleton.utils import ArithString, split_space
from sqeleton.databases import Database, DbPath, DbKey, DbTime
from sqeleton.abcs.database_types import String_UUID
from sqeleton.schema import Schema, create_schema
from sqeleton.queries import Count, Checksum, SKIP, table, this, Expr, min_, max_, Code
from sqeleton.queries.extras import ApplyFuncAndNormalizeAsString, NormalizeAsString
Expand Down Expand Up @@ -165,13 +166,26 @@ def with_schema(self, refine: bool = True, allow_empty_table: bool = False) -> "
return self._with_raw_schema(
self.database.query_table_schema(self.table_path), refine=refine, allow_empty_table=allow_empty_table
)

def _cast_col_value(self, col, value):
"""Cast the value to the right type, based on the type of the column

Currently only used to support native vs string UUID values.
"""
assert self._schema
t = self._schema[col]
if isinstance(t, String_UUID):
return str(value)
return value

def _make_key_range(self):
if self.min_key is not None:
for mn, k in safezip(self.min_key, self.key_columns):
mn = self._cast_col_value(k, mn)
yield mn <= this[k]
if self.max_key is not None:
for k, mx in safezip(self.key_columns, self.max_key):
mx = self._cast_col_value(k, mx)
yield this[k] < mx

def _make_update_range(self):
Expand Down
57 changes: 41 additions & 16 deletions tests/test_database_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from sqeleton.utils import number_to_human
from sqeleton.queries import table, commit, this, Code
from sqeleton.queries.api import insert_rows_in_batches
from sqeleton.queries.api import insert_rows_in_batches, Cast

from reladiff import databases as db
from reladiff.query_utils import drop_table
Expand Down Expand Up @@ -64,11 +64,14 @@ def init_conns():
"double precision",
"numeric(6,3)",
],
"uuid": [
"uuid_string": [
"text",
"varchar(100)",
"char(100)",
],
"uuid_native": [
"uuid",
],
"boolean": [
"boolean",
],
Expand Down Expand Up @@ -97,11 +100,12 @@ def init_conns():
"numeric",
"numeric(65, 10)",
],
"uuid": [
"uuid_string": [
"varchar(100)",
"char(100)",
"varbinary(100)",
],
"uuid_native": [],
"boolean": [
"boolean",
],
Expand All @@ -118,9 +122,10 @@ def init_conns():
# "DOUBLE",
# 'DECIMAL'
],
"uuid": [
"uuid_string": [
"VARCHAR(100)",
],
"uuid_native": [],
"boolean": [
"BOOLEAN",
],
Expand All @@ -136,9 +141,10 @@ def init_conns():
"float64",
"bignumeric",
],
"uuid": [
"uuid_string": [
"STRING",
],
"uuid_native": [],
"boolean": [
"boolean",
],
Expand Down Expand Up @@ -167,10 +173,11 @@ def init_conns():
"float",
"numeric",
],
"uuid": [
"uuid_string": [
"varchar",
"varchar(100)",
],
"uuid_native": [],
"boolean": [
"boolean",
],
Expand All @@ -189,11 +196,12 @@ def init_conns():
"float8",
"numeric",
],
"uuid": [
"uuid_string": [
"text",
"varchar(100)",
"char(100)",
],
"uuid_native": [],
"boolean": [
"boolean",
],
Expand All @@ -214,12 +222,13 @@ def init_conns():
"double precision",
"Number(5, 2)",
],
"uuid": [
"uuid_string": [
"CHAR(100)",
"VARCHAR(100)",
"NCHAR(100)",
"NVARCHAR2(100)",
],
"uuid_native": [],
"boolean": [], # Oracle has no boolean type
},
db.Presto: {
Expand All @@ -240,10 +249,11 @@ def init_conns():
"decimal(10,2)",
"decimal(30,6)",
],
"uuid": [
"uuid_string": [
"varchar",
"char(100)",
],
"uuid_native": [],
"boolean": [
"boolean",
],
Expand All @@ -267,9 +277,10 @@ def init_conns():
"DOUBLE",
"DECIMAL(6, 2)",
],
"uuid": [
"uuid_string": [
"STRING",
],
"uuid_native": [],
"boolean": [
"boolean",
],
Expand All @@ -289,10 +300,13 @@ def init_conns():
"decimal(10,2)",
"decimal(30,6)",
],
"uuid": [
"uuid_string": [
"varchar",
"char(100)",
],
"uuid_native": [
"uuid",
],
"boolean": [
"boolean",
],
Expand All @@ -317,9 +331,10 @@ def init_conns():
"Float32",
"Float64",
],
"uuid": [
"uuid_string": [
"String",
],
"uuid_native": [],
"boolean": [
"boolean",
],
Expand All @@ -337,10 +352,11 @@ def init_conns():
"float",
"float8",
],
"uuid": [
"uuid_string": [
"varchar(100)",
"char(100)",
],
"uuid_native": [],
"boolean": [
"boolean",
],
Expand Down Expand Up @@ -468,12 +484,17 @@ def __len__(self):
def __iter__(self):
return (uuid.uuid1(i) for i in range(self.max))

class UUID_Faker_String(UUID_Faker):
def __iter__(self):
return (str(uuid.uuid1(i)) for i in range(self.max))


TYPE_SAMPLES = {
"int": IntFaker(N_SAMPLES),
"datetime": DateTimeFaker(N_SAMPLES),
"float": FloatFaker(N_SAMPLES),
"uuid": UUID_Faker(N_SAMPLES),
"uuid_string": UUID_Faker_String(N_SAMPLES),
"uuid_native": UUID_Faker(N_SAMPLES),
"boolean": BooleanFaker(N_SAMPLES),
}

Expand Down Expand Up @@ -654,20 +675,24 @@ def test_types(self, source_db, target_db, source_type, target_type, type_catego
insertion_source_duration = time.monotonic() - start

values_in_source = PaginatedTable(src_table_path, src_conn)
if source_db is db.Presto or source_db is db.Trino:
if source_db is db.Presto: # or source_db is db.Trino:
if source_type.startswith("decimal"):
values_in_source = ((a, Decimal(b)) for a, b in values_in_source)
elif source_type.startswith("timestamp"):
values_in_source = ((a, datetime.fromisoformat(b.rstrip(" UTC"))) for a, b in values_in_source)

if type_category.startswith("uuid_native"):
if target_db is db.Trino:
values_in_source = [(a, Cast(b, Code("uuid"))) for a, b in values_in_source]

start = time.monotonic()
if not BENCHMARK:
drop_table(dst_conn, dst_table_path)
_create_table_with_indexes(dst_conn, dst_table_path, target_type)
_insert_to_table(dst_conn, dst_table_path, values_in_source, target_type)
insertion_target_duration = time.monotonic() - start

if type_category == "uuid":
if type_category.startswith("uuid_"):
self.table = TableSegment(self.src_conn, src_table_path, ("col",), None, ("id",), case_sensitive=False)
self.table2 = TableSegment(self.dst_conn, dst_table_path, ("col",), None, ("id",), case_sensitive=False)
else:
Expand Down
20 changes: 10 additions & 10 deletions tests/test_diff_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,11 +354,11 @@ def setUp(self):

src_table = self.src_table

self.new_uuid = uuid.uuid1(32132131)
self.new_uuid = str(uuid.uuid1(32132131))

self.connection.query(
[
src_table.insert_rows((uuid.uuid1(i), str(i)) for i in range(100)),
src_table.insert_rows((str(uuid.uuid1(i)), str(i)) for i in range(100)),
table(self.table_dst_path).create(src_table),
src_table.insert_row(self.new_uuid, "This one is different"),
commit,
Expand Down Expand Up @@ -540,10 +540,10 @@ def setUp(self):

values = []
for i in range(10):
uuid_value = uuid.uuid1(i)
uuid_value = str(uuid.uuid1(i))
values.append((uuid_value, uuid_value))

self.null_uuid = uuid.uuid1(32132131)
self.null_uuid = str(uuid.uuid1(32132131))

self.connection.query(
[
Expand Down Expand Up @@ -572,10 +572,10 @@ def setUp(self):

src_table = self.src_table

self.null_uuid = uuid.uuid1(1)
self.null_uuid = str(uuid.uuid1(1))
self.connection.query(
[
src_table.insert_row(uuid.uuid1(1), "1"),
src_table.insert_row(str(uuid.uuid1(1)), "1"),
table(self.table_dst_path).create(src_table),
src_table.insert_row(self.null_uuid, None), # Add a row where a column has NULL value
commit,
Expand Down Expand Up @@ -624,7 +624,7 @@ def setUp(self):

self.diffs = []
for i in range(0, 8):
pk = uuid.uuid1(i)
pk = str(uuid.uuid1(i))
src_row = (str(pk), str(i), None)
dst_row = (str(pk), str(i) + "-different", None)

Expand Down Expand Up @@ -685,9 +685,9 @@ class TestTableTableEmpty(DiffTestCase):
def setUp(self):
super().setUp()

self.null_uuid = uuid.uuid1(1)
self.null_uuid = str(uuid.uuid1(1))

diffs = [(uuid.uuid1(i), str(i)) for i in range(100)]
diffs = [(str(uuid.uuid1(i)), str(i)) for i in range(100)]
self.connection.query([self.src_table.insert_rows(diffs), commit])

self.a = table_segment(self.connection, self.table_src_path, "id", "text_comment", case_sensitive=False)
Expand Down Expand Up @@ -901,7 +901,7 @@ class TestCompoundKeyAlphanum(DiffTestCase):
def setUp(self):
super().setUp()

rows = [(uuid.uuid1(i), i, str(i)) for i in range(100)]
rows = [(str(uuid.uuid1(i)), i, str(i)) for i in range(100)]
rows2 = list(rows)
x = rows2[9]
rows2[9] = (x[0], 9000, x[2])
Expand Down
Loading