-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add clickbench queries to sqllogictest coverage #6836
Merged
Merged
Changes from 3 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
e7f253c
Add clickbench queries to sqllogictest coverage
alamb 86c346d
rowsort
alamb 3ea141f
Update datafusion/core/tests/sqllogictests/test_files/clickbench.slt
alamb 9dcf165
fix typo -- :facepalm:
alamb 038a908
Merge remote-tracking branch 'apache/main' into alamb/clickhouse_queries
alamb ac72e62
Update queries now that they pass
alamb File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Binary file not shown.
270 changes: 270 additions & 0 deletions
270
datafusion/core/tests/sqllogictests/test_files/clickbench.slt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,270 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
|
||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
|
||
# This file contains the clickbench schema and queries | ||
# and the first 10 rows of data. Since ClickBench contains case sensitive queries | ||
# this is also a good test of that usecase too | ||
|
||
# create.sql came from | ||
# https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/create.sql | ||
# Data file made with DuckDB: | ||
# COPY (SELECT * FROM 'hits.parquet' LIMIT 10) TO 'clickbench_hits_10.parquet' (FORMAT PARQUET); | ||
|
||
statement ok | ||
CREATE EXTERNAL TABLE hits | ||
STORED AS PARQUET | ||
LOCATION 'tests/data/clickbench_hits_10.parqet'; | ||
|
||
|
||
# queries.sql came from | ||
# https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/queries.sql | ||
|
||
query I | ||
SELECT COUNT(*) FROM hits; | ||
---- | ||
10 | ||
|
||
query I | ||
SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0; | ||
---- | ||
0 | ||
|
||
query IIR | ||
SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits; | ||
---- | ||
0 10 0 | ||
|
||
query R | ||
SELECT AVG("UserID") FROM hits; | ||
---- | ||
-304548765855551600 | ||
|
||
query I | ||
SELECT COUNT(DISTINCT "UserID") FROM hits; | ||
---- | ||
5 | ||
|
||
query I | ||
SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; | ||
---- | ||
1 | ||
|
||
query DD | ||
SELECT MIN("EventDate"::INT::DATE), MAX("EventDate"::INT::DATE) FROM hits; | ||
---- | ||
2013-07-15 2013-07-15 | ||
|
||
query II | ||
SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; | ||
---- | ||
|
||
# https://github.com/apache/arrow-datafusion/issues/6790 | ||
query error DataFusion error: Optimizer rule 'simplify_expressions' failed | ||
SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10; | ||
|
||
query IIIRI rowsort | ||
SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10; | ||
---- | ||
197 0 2 0 1 | ||
229 0 1 0 1 | ||
39 0 1 0 1 | ||
839 0 6 0 2 | ||
|
||
# https://github.com/apache/arrow-datafusion/issues/6790 | ||
query error DataFusion error: Optimizer rule 'simplify_expressions' failed | ||
SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; | ||
|
||
# https://github.com/apache/arrow-datafusion/issues/6790 | ||
query error DataFusion error: Optimizer rule 'simplify_expressions' failed | ||
SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; | ||
|
||
query TI | ||
SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; | ||
---- | ||
|
||
query error DataFusion error: Optimizer rule 'simplify_expressions' failed | ||
SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; | ||
|
||
query ITI | ||
SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; | ||
---- | ||
|
||
query II rowsort | ||
SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; | ||
---- | ||
-2461439046089301801 5 | ||
376160620089546609 1 | ||
427738049800818189 1 | ||
519640690937130534 2 | ||
7418527520126366595 1 | ||
|
||
query ITI rowsort | ||
SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; | ||
---- | ||
-2461439046089301801 (empty) 5 | ||
376160620089546609 (empty) 1 | ||
427738049800818189 (empty) 1 | ||
519640690937130534 (empty) 2 | ||
7418527520126366595 (empty) 1 | ||
|
||
query ITI rowsort | ||
SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10; | ||
---- | ||
-2461439046089301801 (empty) 5 | ||
376160620089546609 (empty) 1 | ||
427738049800818189 (empty) 1 | ||
519640690937130534 (empty) 2 | ||
7418527520126366595 (empty) 1 | ||
|
||
query IRTI rowsort | ||
SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; | ||
---- | ||
-2461439046089301801 18 (empty) 1 | ||
-2461439046089301801 33 (empty) 1 | ||
-2461439046089301801 38 (empty) 1 | ||
-2461439046089301801 56 (empty) 1 | ||
-2461439046089301801 58 (empty) 1 | ||
376160620089546609 30 (empty) 1 | ||
427738049800818189 40 (empty) 1 | ||
519640690937130534 26 (empty) 1 | ||
519640690937130534 36 (empty) 1 | ||
7418527520126366595 18 (empty) 1 | ||
|
||
query I | ||
SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; | ||
---- | ||
|
||
query I | ||
SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; | ||
---- | ||
0 | ||
|
||
query TTI | ||
SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; | ||
---- | ||
|
||
query TTTII | ||
SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; | ||
---- | ||
|
||
query IITIIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIII | ||
SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10; | ||
---- | ||
|
||
query T | ||
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10; | ||
---- | ||
|
||
query T | ||
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; | ||
---- | ||
|
||
query T | ||
SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime"), "SearchPhrase" LIMIT 10; | ||
---- | ||
|
||
query IRI | ||
SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; | ||
---- | ||
|
||
query TRIT | ||
SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; | ||
---- | ||
|
||
query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII | ||
SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; | ||
---- | ||
0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890 | ||
|
||
query IIIIR | ||
SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; | ||
---- | ||
|
||
query IIIIR | ||
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; | ||
---- | ||
|
||
query IIIIR rowsort | ||
SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; | ||
---- | ||
4894690465724379622 1568366281 1 0 0 | ||
5206346422301499756 -1216690514 1 0 0 | ||
6308646140879811077 -1216690514 1 0 0 | ||
6635790769678439148 1427531677 1 0 0 | ||
6864353419233967042 1568366281 1 0 0 | ||
8120543446287442873 -1216690514 1 0 0 | ||
8156744413230856864 -1216690514 1 0 0 | ||
8740403056911509777 1615432634 1 0 0 | ||
8924809397503602651 -1216690514 1 0 0 | ||
9110818468285196899 -1216690514 1 0 0 | ||
|
||
query TI rowsort | ||
SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10; | ||
---- | ||
(empty) 5 | ||
http://afisha.mail.ru/catalog/314/women.ru/ency=1&page3/?errovat-pinniki 1 | ||
http://bonprix.ru/index.ru/cinema/art/0 986 424 233 сезон 1 | ||
http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 1 | ||
http://holodilnik.ru/russia/05jul2013&model=0 1 | ||
http://tours/Ekategoriya%2F&sr=http://slovareniye 1 | ||
|
||
query ITI rowsort | ||
SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10; | ||
---- | ||
1 (empty) 5 | ||
1 http://afisha.mail.ru/catalog/314/women.ru/ency=1&page3/?errovat-pinniki 1 | ||
1 http://bonprix.ru/index.ru/cinema/art/0 986 424 233 сезон 1 | ||
1 http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 1 | ||
1 http://holodilnik.ru/russia/05jul2013&model=0 1 | ||
1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1 | ||
|
||
query IIIII rowsort | ||
SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10; | ||
---- | ||
-1216690514 -1216690515 -1216690516 -1216690517 6 | ||
1427531677 1427531676 1427531675 1427531674 1 | ||
1568366281 1568366280 1568366279 1568366278 2 | ||
1615432634 1615432633 1615432632 1615432631 1 | ||
|
||
query TI | ||
SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10; | ||
---- | ||
|
||
query TI | ||
SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10; | ||
---- | ||
|
||
query TI | ||
SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; | ||
---- | ||
|
||
query IIITTI | ||
SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; | ||
---- | ||
|
||
query IDI | ||
SELECT "URLHash", "EventDate"::INT::DATE, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate"::INT::DATE ORDER BY PageViews DESC LIMIT 10 OFFSET 100; | ||
---- | ||
|
||
query III | ||
SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; | ||
---- | ||
|
||
query PI | ||
SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-14' AND "EventDate"::INT::DATE <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000; | ||
---- |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are fixed by #6827