Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Feb 10, 2023
1 parent ff87a8b commit 10b6476
Show file tree
Hide file tree
Showing 6 changed files with 816 additions and 141 deletions.
81 changes: 70 additions & 11 deletions parquet_integration/write_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,22 @@ def case_nested() -> Tuple[dict, pa.Schema, str]:
[{"a": "e"}],
]

struct_list_nullable = pa.StructArray.from_arrays(
[pa.array(string)],
fields=[("a", pa.list_(pa.utf8()))],
)

list_struct_list_nullable = [
[{"a": ["a"]}, {"a": ["b"]}],
None,
[{"a": ["b"]}, None, {"a": ["b"]}],
[{"a": None}, {"a": None}, {"a": None}],
[],
[{"a": ["d"]}, {"a": [None]}, {"a": ["c", "d"]}],
None,
[{"a": []}],
]

fields = [
pa.field("list_int64", pa.list_(pa.int64())),
pa.field("list_int64_required", pa.list_(pa.field("item", pa.int64(), False))),
Expand All @@ -196,6 +212,14 @@ def case_nested() -> Tuple[dict, pa.Schema, str]:
"list_struct_nullable",
pa.list_(pa.struct([("a", pa.utf8())])),
),
pa.field(
"struct_list_nullable",
pa.struct([("a", pa.list_(pa.utf8()))]),
),
pa.field(
"list_struct_list_nullable",
pa.list_(pa.struct([("a", pa.list_(pa.utf8()))])),
),
]
schema = pa.schema(fields)
return (
Expand All @@ -212,6 +236,8 @@ def case_nested() -> Tuple[dict, pa.Schema, str]:
"list_nested_inner_required_i64": items_required_nested,
"list_nested_inner_required_required_i64": items_required_nested_2,
"list_struct_nullable": list_struct_nullable,
"struct_list_nullable": struct_list_nullable,
"list_struct_list_nullable": list_struct_list_nullable,
},
schema,
f"nested_nullable_10.parquet",
Expand Down Expand Up @@ -263,7 +289,9 @@ def case_struct() -> Tuple[dict, pa.Schema, str]:
struct_nullable = pa.StructArray.from_arrays(
[pa.array(string), pa.array(boolean)],
fields=struct_fields,
mask=pa.array([False, False, True, False, False, False, False, False, False, False]),
mask=pa.array(
[False, False, True, False, False, False, False, False, False, False]
),
)

return (
Expand All @@ -277,7 +305,20 @@ def case_struct() -> Tuple[dict, pa.Schema, str]:
"struct_struct_nullable": pa.StructArray.from_arrays(
[struct, pa.array(boolean)],
names=["f1", "f2"],
mask=pa.array([False, False, True, False, False, False, False, False, False, False]),
mask=pa.array(
[
False,
False,
True,
False,
False,
False,
False,
False,
False,
False,
]
),
),
},
schema,
Expand All @@ -288,30 +329,48 @@ def case_struct() -> Tuple[dict, pa.Schema, str]:
def case_nested_edge():
simple = [[0, 1]]
null = [None]
empty = [[]]

struct_list_nullable = pa.StructArray.from_arrays(
[pa.array([["a", "b", None, "c"]])],
fields=[
("f1", pa.list_(pa.utf8())),
],
fields=[("f1", pa.list_(pa.utf8()))],
)

list_struct_list_nullable = pa.ListArray.from_arrays([0, 1], struct_list_nullable)

fields = [
pa.field("simple", pa.list_(pa.int64())),
pa.field("null", pa.list_(pa.field("item", pa.int64(), True))),
pa.field("empty", pa.list_(pa.field("item", pa.int64(), True))),
pa.field(
"struct_list_nullable",
pa.struct(
[("f1", pa.list_(pa.utf8()))],
),
),
pa.field(
"struct_list_nullable",
pa.struct([
("f1", pa.list_(pa.utf8())),
]),
)
"list_struct_list_nullable",
pa.list_(
pa.field(
"item",
pa.struct(
[
("f1", pa.list_(pa.utf8())),
]
),
True,
)
),
),
]
schema = pa.schema(fields)
return (
{
"simple": simple,
"null": null,
"empty": empty,
"struct_list_nullable": struct_list_nullable,
"list_struct_list_nullable": list_struct_list_nullable,
},
schema,
f"nested_edge_nullable_10.parquet",
Expand Down Expand Up @@ -413,7 +472,7 @@ def case_benches_required(size):


# for read benchmarks
for i in range(10, 22, 2):
for i in range(22, 22, 2):
# two pages (dict)
write_pyarrow(case_benches(2**i), 1, True, False, None)
# single page
Expand Down
Loading

0 comments on commit 10b6476

Please sign in to comment.