From 36cc64ef4e81833d6e69f069655588df896c7e60 Mon Sep 17 00:00:00 2001 From: zhongjingxiong Date: Mon, 6 Nov 2023 12:24:58 +0800 Subject: [PATCH 1/3] feat: add example to ci --- .github/workflows/rust.yml | 14 +------- ci/scripts/rust_example.sh | 35 +++++++++++++++++++ .../{ => external_dependency}/catalog.rs | 0 .../dataframe-to-s3.rs | 0 .../{ => external_dependency}/query-aws-s3.rs | 0 .../examples/{ => flight}/flight_client.rs | 0 .../examples/{ => flight}/flight_server.rs | 0 .../{ => flight}/flight_sql_server.rs | 0 datafusion-examples/examples/simple_udwf.rs | 2 +- 9 files changed, 37 insertions(+), 14 deletions(-) create mode 100755 ci/scripts/rust_example.sh rename datafusion-examples/examples/{ => external_dependency}/catalog.rs (100%) rename datafusion-examples/examples/{ => external_dependency}/dataframe-to-s3.rs (100%) rename datafusion-examples/examples/{ => external_dependency}/query-aws-s3.rs (100%) rename datafusion-examples/examples/{ => flight}/flight_client.rs (100%) rename datafusion-examples/examples/{ => flight}/flight_server.rs (100%) rename datafusion-examples/examples/{ => flight}/flight_sql_server.rs (100%) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 6c3d60bd4399..485d179571e3 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -139,19 +139,7 @@ jobs: # test datafusion-sql examples cargo run --example sql # test datafusion-examples - cargo run --example avro_sql --features=datafusion/avro - cargo run --example csv_sql - cargo run --example custom_datasource - cargo run --example dataframe - cargo run --example dataframe_in_memory - cargo run --example deserialize_to_struct - cargo run --example expr_api - cargo run --example parquet_sql - cargo run --example parquet_sql_multiple_files - cargo run --example memtable - cargo run --example rewrite_expr - cargo run --example simple_udf - cargo run --example simple_udaf + ci/scripts/rust_example.sh - name: Verify Working Directory Clean run: git diff --exit-code diff --git a/ci/scripts/rust_example.sh b/ci/scripts/rust_example.sh new file mode 100755 index 000000000000..053bdfe4e994 --- /dev/null +++ b/ci/scripts/rust_example.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex +cd datafusion-examples/examples/ +files=$(ls .) +for filename in $files +do + example_name=`basename $filename ".rs"` + # Skip tests that rely on external storage and flight + # todo: Currently, catalog.rs is placed in the external-dependence directory because there is a problem parsing + # the parquet file of the external parquet-test that it currently relies on. + # We will wait for this issue[https://github.com/apache/arrow-datafusion/issues/8041] to be resolved. + if [ ! -d $filename ]; then + cargo run --example $example_name + fi +done + +cargo fmt --all -- --check diff --git a/datafusion-examples/examples/catalog.rs b/datafusion-examples/examples/external_dependency/catalog.rs similarity index 100% rename from datafusion-examples/examples/catalog.rs rename to datafusion-examples/examples/external_dependency/catalog.rs diff --git a/datafusion-examples/examples/dataframe-to-s3.rs b/datafusion-examples/examples/external_dependency/dataframe-to-s3.rs similarity index 100% rename from datafusion-examples/examples/dataframe-to-s3.rs rename to datafusion-examples/examples/external_dependency/dataframe-to-s3.rs diff --git a/datafusion-examples/examples/query-aws-s3.rs b/datafusion-examples/examples/external_dependency/query-aws-s3.rs similarity index 100% rename from datafusion-examples/examples/query-aws-s3.rs rename to datafusion-examples/examples/external_dependency/query-aws-s3.rs diff --git a/datafusion-examples/examples/flight_client.rs b/datafusion-examples/examples/flight/flight_client.rs similarity index 100% rename from datafusion-examples/examples/flight_client.rs rename to datafusion-examples/examples/flight/flight_client.rs diff --git a/datafusion-examples/examples/flight_server.rs b/datafusion-examples/examples/flight/flight_server.rs similarity index 100% rename from datafusion-examples/examples/flight_server.rs rename to datafusion-examples/examples/flight/flight_server.rs diff --git a/datafusion-examples/examples/flight_sql_server.rs b/datafusion-examples/examples/flight/flight_sql_server.rs similarity index 100% rename from datafusion-examples/examples/flight_sql_server.rs rename to datafusion-examples/examples/flight/flight_sql_server.rs diff --git a/datafusion-examples/examples/simple_udwf.rs b/datafusion-examples/examples/simple_udwf.rs index 39042a35629b..d1cbcc7c4389 100644 --- a/datafusion-examples/examples/simple_udwf.rs +++ b/datafusion-examples/examples/simple_udwf.rs @@ -36,7 +36,7 @@ async fn create_context() -> Result { // declare a table in memory. In spark API, this corresponds to createDataFrame(...). println!("pwd: {}", std::env::current_dir().unwrap().display()); - let csv_path = "datafusion/core/tests/data/cars.csv".to_string(); + let csv_path = "../../datafusion/core/tests/data/cars.csv".to_string(); let read_options = CsvReadOptions::default().has_header(true); ctx.register_csv("cars", &csv_path, read_options).await?; From 69de29afff1344dd5508962149c35f2384edda73 Mon Sep 17 00:00:00 2001 From: zhongjingxiong Date: Mon, 6 Nov 2023 14:08:02 +0800 Subject: [PATCH 2/3] nit --- ci/scripts/rust_example.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/rust_example.sh b/ci/scripts/rust_example.sh index 053bdfe4e994..fe3696f20865 100755 --- a/ci/scripts/rust_example.sh +++ b/ci/scripts/rust_example.sh @@ -19,6 +19,8 @@ set -ex cd datafusion-examples/examples/ +cargo fmt --all -- --check + files=$(ls .) for filename in $files do @@ -31,5 +33,3 @@ do cargo run --example $example_name fi done - -cargo fmt --all -- --check From 5a4bef2bd82559dfed5fd5dbf990a1271c4cc9e4 Mon Sep 17 00:00:00 2001 From: zhongjingxiong Date: Tue, 7 Nov 2023 11:22:30 +0800 Subject: [PATCH 3/3] addr comments --- datafusion-examples/README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md index bfed3976c946..9f7c9f99d14e 100644 --- a/datafusion-examples/README.md +++ b/datafusion-examples/README.md @@ -44,16 +44,18 @@ cargo run --example csv_sql - [`avro_sql.rs`](examples/avro_sql.rs): Build and run a query plan from a SQL statement against a local AVRO file - [`csv_sql.rs`](examples/csv_sql.rs): Build and run a query plan from a SQL statement against a local CSV file +- [`catalog.rs`](examples/external_dependency/catalog.rs): Register the table into a custom catalog - [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider) - [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame against a local parquet file +- [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 - [`dataframe_in_memory.rs`](examples/dataframe_in_memory.rs): Run a query using a DataFrame against data in memory - [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results into rust structs using serde - [`expr_api.rs`](examples/expr_api.rs): Use the `Expr` construction and simplification API -- [`flight_sql_server.rs`](examples/flight_sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from JDBC clients +- [`flight_sql_server.rs`](examples/flight/flight_sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from JDBC clients - [`memtable.rs`](examples/memtable.rs): Create an query data in memory using SQL and `RecordBatch`es - [`parquet_sql.rs`](examples/parquet_sql.rs): Build and run a query plan from a SQL statement against a local Parquet file - [`parquet_sql_multiple_files.rs`](examples/parquet_sql_multiple_files.rs): Build and run a query plan from a SQL statement against multiple local Parquet files -- [`query-aws-s3.rs`](examples/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3 +- [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3 - [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP - [`rewrite_expr.rs`](examples/rewrite_expr.rs): Define and invoke a custom Query Optimizer pass - [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF) @@ -62,4 +64,4 @@ cargo run --example csv_sql ## Distributed -- [`flight_client.rs`](examples/flight_client.rs) and [`flight_server.rs`](examples/flight_server.rs): Run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol. +- [`flight_client.rs`](examples/flight/flight_client.rs) and [`flight_server.rs`](examples/flight/flight_server.rs): Run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol.