Skip to content

Commit

Permalink
Add json-functions as optional extension
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Sep 16, 2024
1 parent b419d8c commit a3099ee
Show file tree
Hide file tree
Showing 10 changed files with 456 additions and 54 deletions.
189 changes: 157 additions & 32 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ color-eyre = "0.6.3"
crossterm = { version = "0.28.1", features = ["event-stream"] }
datafusion = "41.0.0"
datafusion-common = "41.0.0"
datafusion-functions-json = { version = "0.41.0", optional = true }
deltalake = { version = "0.19.0", features = ["datafusion"], optional = true }
directories = "5.0.1"
env_logger = "0.11.5"
Expand All @@ -43,13 +44,15 @@ url = { version = "2.5.2", optional = true }

[dev-dependencies]
assert_cmd = "2.0.16"
insta = { version = "1.40.0", features = ["yaml"] }
predicates = "3.1.2"
tempfile = "3.2.0"

[features]
deltalake = ["dep:deltalake"]
flightsql = ["dep:arrow-flight", "dep:tonic"]
s3 = ["object_store/aws", "url"]
functions-json = ["dep:datafusion-functions-json"]
url = ["dep:url"]

[[bin]]
Expand Down
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,18 @@ Currently, the only supported packaging is on [crates.io](https://crates.io/sear

Once installed you can run `dft` to start the application.

#### Features
#### Optional Features (Rust Crate Features)

`dft` has several optional (conditionally compiled features) integrations which are controlled by [Rust Crate Features]

To build with all features, you can run

```shell
cargo install --path . --all-features
````

[Rust Crate Features]: https://doc.rust-lang.org/cargo/reference/features.html


#### S3 (`--features=s3`)

Expand Down Expand Up @@ -143,6 +154,18 @@ Register deltalake tables. For example:
CREATE EXTERNAL TABLE table_name STORED AS DELTATABLE LOCATION 's3://bucket/table'
```

#### Json Functions (`--features=function-json`)

Adds functions from [datafusion-function-json] for querying JSON strings in DataFusion in `dft`. For example:

```sql
select * from foo where json_get(attributes, 'bar')::string='ham'
(show examples of using operators too)
```
[datafusion-function-json]: https://github.com/datafusion-contrib/datafusion-functions-json
### Config
The `dft` configuration is stored in `~/.config/dft/config.toml`
Expand Down
39 changes: 20 additions & 19 deletions src/app/state/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ pub mod tabs;

use crate::app::state::tabs::sql::SQLTabState;
use crate::app::ui::SelectedTab;
use crate::config::get_data_dir;
use log::{debug, error, info};
use std::path::PathBuf;

Expand All @@ -46,7 +45,6 @@ impl Default for Tabs {
pub struct AppState<'app> {
pub config: AppConfig,
pub should_quit: bool,
pub data_dir: PathBuf,
pub sql_tab: SQLTabState<'app>,
#[cfg(feature = "flightsql")]
pub flightsql_tab: FlightSQLTabState<'app>,
Expand All @@ -57,7 +55,6 @@ pub struct AppState<'app> {

pub fn initialize<'app>(config_path: PathBuf) -> AppState<'app> {
debug!("Initializing state");
let data_dir = get_data_dir();
debug!("Config path: {:?}", config_path);
let config = if config_path.exists() {
debug!("Config exists");
Expand All @@ -82,24 +79,28 @@ pub fn initialize<'app>(config_path: PathBuf) -> AppState<'app> {
debug!("No config, using default");
AppConfig::default()
};
AppState::new(config)
}

let tabs = Tabs::default();

let sql_tab_state = SQLTabState::new();
#[cfg(feature = "flightsql")]
let flightsql_tab_state = FlightSQLTabState::new();
let logs_tab_state = LogsTabState::default();
let history_tab_state = HistoryTabState::default();
impl<'app> AppState<'app> {
pub fn new(config: AppConfig) -> Self {
let tabs = Tabs::default();

AppState {
config,
data_dir,
tabs,
sql_tab: sql_tab_state,
let sql_tab_state = SQLTabState::new();
#[cfg(feature = "flightsql")]
flightsql_tab: flightsql_tab_state,
logs_tab: logs_tab_state,
history_tab: history_tab_state,
should_quit: false,
let flightsql_tab_state = FlightSQLTabState::new();
let logs_tab_state = LogsTabState::default();
let history_tab_state = HistoryTabState::default();

AppState {
config,
tabs,
sql_tab: sql_tab_state,
#[cfg(feature = "flightsql")]
flightsql_tab: flightsql_tab_state,
logs_tab: logs_tab_state,
history_tab: history_tab_state,
should_quit: false,
}
}
}
11 changes: 9 additions & 2 deletions src/execution/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,19 @@ impl ExecutionContext {
/// Construct a new `ExecutionContext` with the specified configuration
pub fn try_new(config: &ExecutionConfig) -> Result<Self> {
let mut builder = DftSessionStateBuilder::new();
for extension in enabled_extensions() {
let extensions = enabled_extensions();
for extension in &extensions {
builder = extension.register(config, builder)?;
}

let state = builder.build()?;
let session_ctx = SessionContext::new_with_state(state);
let mut session_ctx = SessionContext::new_with_state(state);

// Apply any additional setup to the session context (e.g. registering
// functions)
for extension in &extensions {
extension.register_on_ctx(config, &mut session_ctx)?;
}

Ok(Self {
session_ctx,
Expand Down
50 changes: 50 additions & 0 deletions src/extensions/functions_json.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! [datafusion-function-json] Integration: [JsonFunctionsExtension]
//!
//! [datafusion-function-json]: https://github.com/datafusion-contrib/datafusion-functions-json
use crate::config::ExecutionConfig;
use crate::extensions::{DftSessionStateBuilder, Extension};
use datafusion::prelude::SessionContext;
use datafusion_common::Result;

#[derive(Debug, Default)]
pub struct JsonFunctionsExtension {}

impl JsonFunctionsExtension {
pub fn new() -> Self {
Self {}
}
}

impl Extension for JsonFunctionsExtension {
fn register(
&self,
_config: &ExecutionConfig,
builder: DftSessionStateBuilder,
) -> datafusion_common::Result<DftSessionStateBuilder> {
//
Ok(builder)
}

fn register_on_ctx(&self, _config: &ExecutionConfig, ctx: &mut SessionContext) -> Result<()> {
datafusion_functions_json::register_all(ctx)?;
Ok(())
}
}
12 changes: 12 additions & 0 deletions src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@
use crate::config::ExecutionConfig;
use datafusion::common::Result;
use datafusion::prelude::SessionContext;
use std::fmt::Debug;

mod builder;
#[cfg(feature = "deltalake")]
mod deltalake;
#[cfg(feature = "functions-json")]
mod functions_json;
#[cfg(feature = "s3")]
mod s3;

Expand All @@ -36,6 +39,13 @@ pub trait Extension: Debug {
_config: &ExecutionConfig,
_builder: DftSessionStateBuilder,
) -> Result<DftSessionStateBuilder>;

/// Registers this extension after the SessionContext has been created
/// (this is to match the historic way many extensions were registered)
/// TODO file a ticket upstream to use the builder pattern
fn register_on_ctx(&self, _config: &ExecutionConfig, _ctx: &mut SessionContext) -> Result<()> {
Ok(())
}
}

/// Return all extensions currently enabled
Expand All @@ -45,5 +55,7 @@ pub fn enabled_extensions() -> Vec<Box<dyn Extension>> {
Box::new(s3::AwsS3Extension::new()),
#[cfg(feature = "deltalake")]
Box::new(deltalake::DeltaLakeExtension::new()),
#[cfg(feature = "functions-json")]
Box::new(functions_json::JsonFunctionsExtension::new()),
]
}
76 changes: 76 additions & 0 deletions tests/extension_cases/functions_json.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Tests for datafusion-function-json integration
use crate::TestExecution;

static TEST_TABLE: &str = r#"
CREATE TABLE test_table (
id INT,
json_col VARCHAR
) AS VALUES
(1, '{}'),
(2, '{ "a": 1 }'),
(3, '{ "a": 2 }'),
(4, '{ "a": 1, "b": 2 }'),
(5, '{ "a": 1, "b": 2, "c": 3 }')
"#;

/// Ensure one of the functions `json_contains` function is properly registered
#[tokio::test]
async fn test_basic() {
let mut execution = TestExecution::new().with_setup(TEST_TABLE).await;

let actual = execution
.run_and_format("SELECT id, json_contains(json_col, 'b') as json_contains FROM test_table")
.await;

insta::assert_yaml_snapshot!(actual, @r###"
- +----+---------------+
- "| id | json_contains |"
- +----+---------------+
- "| 1 | false |"
- "| 2 | false |"
- "| 3 | false |"
- "| 4 | true |"
- "| 5 | true |"
- +----+---------------+
"###);
}

/// ensure the json operators like -> are properly registered
#[tokio::test]
async fn test_operators() {
let mut execution = TestExecution::new().with_setup(TEST_TABLE).await;

let actual = execution
.run_and_format("SELECT id, json_col->'a' as json_col_a FROM test_table")
.await;

insta::assert_yaml_snapshot!(actual, @r###"
- +----+------------+
- "| id | json_col_a |"
- +----+------------+
- "| 1 | {null=} |"
- "| 2 | {int=1} |"
- "| 3 | {int=2} |"
- "| 4 | {int=1} |"
- "| 5 | {int=1} |"
- +----+------------+
"###);
}
19 changes: 19 additions & 0 deletions tests/extension_cases/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[cfg(feature = "functions-json")]
mod functions_json;
Loading

0 comments on commit a3099ee

Please sign in to comment.