Skip to content

Commit

Permalink
Add large utf8 example
Browse files Browse the repository at this point in the history
  • Loading branch information
Dandandan committed Sep 19, 2021
1 parent 859ee78 commit 1fbe8e3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 0 deletions.
5 changes: 5 additions & 0 deletions benches/read_parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| read_decompressed_pages(&buffer, size * 8, 2).unwrap())
});

let a = format!("read utf8 large emoji 2^{}", i);
c.bench_function(&a, |b| {
b.iter(|| read_decompressed_pages(&buffer, size * 8, 6).unwrap())
});

let a = format!("read bool 2^{}", i);
c.bench_function(&a, |b| {
b.iter(|| read_decompressed_pages(&buffer, size * 8, 3).unwrap())
Expand Down
3 changes: 3 additions & 0 deletions parquet_integration/write_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def case_basic_nullable(size=1):
float64 = [0.0, 1.0, None, 3.0, None, 5.0, 6.0, 7.0, None, 9.0]
string = ["Hello", None, "aa", "", None, "abc", None, None, "def", "aaa"]
boolean = [True, None, False, False, None, True, None, None, True, True]
string_large_emoji = ["😃🌚🕳👊🚅🚑🎐🚘✨⛎⛹📔🔫😭🀄️🏗🚵🍒⏮🏎🎼🌥🌀🕎⛴💀™️📈🍋🅿️🌉✅⚜🏓🏜💅📖🚾🛤☺️☑️🕊🌁📡💵📮🌷💡🍩🏬💫🏩🍵🎼◽️❌♥️🛌🕹🍰🗄💷▪️🔲🏛👡👽🍭🛤▶️🍫😵🏔🎁🌫☎️✈️〰️👚🐫🍺🎢🔵👊🗒🆘🎡💌♋️➕♉️🖐🎶📒™️👛😆👠🐛🌫🦄⚫️😕🍙🕠♨️➿🔰💺🕳😶👳😙🌧🍽🏘🐰🍗🍲🐏🌂🌆🗂🚀👓↘️📀🔰"] * 10

fields = [
pa.field("int64", pa.int64()),
Expand All @@ -18,6 +19,7 @@ def case_basic_nullable(size=1):
pa.field("bool", pa.bool_()),
pa.field("date", pa.timestamp("ms")),
pa.field("uint32", pa.uint32()),
pa.field("string_large_emoji", pa.utf8()),
]
schema = pa.schema(fields)

Expand All @@ -29,6 +31,7 @@ def case_basic_nullable(size=1):
"bool": boolean * size,
"date": int64 * size,
"uint32": int64 * size,
"string_large_emoji": string_large_emoji * size,
},
schema,
f"basic_nullable_{size*10}.parquet",
Expand Down

0 comments on commit 1fbe8e3

Please sign in to comment.