Skip to content

Commit

Permalink
Add related source code locations to errors (apache#13664)
Browse files Browse the repository at this point in the history
* Reset branch to no changes other than SqlRel

* feat: improve `Diagnostic` ergonomics

* feat: 'table not found' diagnostic in `SqlToRel`

* feat: unresolved fields

* feat: union with different number of columns

* feat: `Spans`

* feat: adjust field not found error

* feat: incompatible types in binary expr

* fix: tests not compiling

* feat: ambiguous column

* feat: possible references

* feat: group by

* chore: fmt

* feat: relay `Spans` in `create_col_from_scalar_expr`

* chore: cargo check

* chore: cargo fmt

* feat: diagnostic can only be either error or warning

* feat: simplify to one `Diagnostic` per error

* test: add tests

* chore: add license headers

* chore: update CLI lockfile

* chore: taplo format

* fix: doctest

* chore: configs.md

* fix: test

* fix: clippy, by removing smallvec

* fix: update slt

* feat: support all binary expressions

* refactor: move diagnostic tests to datafusion-sql

* chore: format

* fix: tests

* feat: pr feedback

* fix: ci checks

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
2 people authored and Sergey Zhukov committed Feb 3, 2025
1 parent bccf94b commit 866843b
Show file tree
Hide file tree
Showing 45 changed files with 1,187 additions and 229 deletions.
79 changes: 29 additions & 50 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

81 changes: 75 additions & 6 deletions datafusion/common/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,33 @@

//! Column
use arrow_schema::{Field, FieldRef};

use crate::error::_schema_err;
use crate::utils::{parse_identifiers_normalized, quote_identifier};
use crate::{DFSchema, Result, SchemaError, TableReference};
use crate::{DFSchema, Diagnostic, Result, SchemaError, Spans, TableReference};
use arrow_schema::{Field, FieldRef};
use std::collections::HashSet;
use std::convert::Infallible;
use std::fmt;
use std::str::FromStr;

/// A named reference to a qualified field in a schema.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Column {
/// relation/table reference.
pub relation: Option<TableReference>,
/// field/column name.
pub name: String,
/// Original source code location, if known
pub spans: Spans,
}

impl fmt::Debug for Column {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Column")
.field("relation", &self.relation)
.field("name", &self.name)
.finish()
}
}

impl Column {
Expand All @@ -50,6 +60,7 @@ impl Column {
Self {
relation: relation.map(|r| r.into()),
name: name.into(),
spans: Spans::new(),
}
}

Expand All @@ -58,6 +69,7 @@ impl Column {
Self {
relation: None,
name: name.into(),
spans: Spans::new(),
}
}

Expand All @@ -68,6 +80,7 @@ impl Column {
Self {
relation: None,
name: name.into(),
spans: Spans::new(),
}
}

Expand Down Expand Up @@ -99,7 +112,11 @@ impl Column {
// identifiers will be treated as an unqualified column name
_ => return None,
};
Some(Self { relation, name })
Some(Self {
relation,
name,
spans: Spans::new(),
})
}

/// Deserialize a fully qualified name string into a column
Expand All @@ -113,6 +130,7 @@ impl Column {
Self {
relation: None,
name: flat_name,
spans: Spans::new(),
},
)
}
Expand All @@ -124,6 +142,7 @@ impl Column {
Self {
relation: None,
name: flat_name,
spans: Spans::new(),
},
)
}
Expand Down Expand Up @@ -239,7 +258,30 @@ impl Column {

// If not due to USING columns then due to ambiguous column name
return _schema_err!(SchemaError::AmbiguousReference {
field: Column::new_unqualified(self.name),
field: Column::new_unqualified(&self.name),
})
.map_err(|err| {
let mut diagnostic = Diagnostic::new_error(
format!("column '{}' is ambiguous", &self.name),
self.spans().first(),
);
// TODO If [`DFSchema`] had spans, we could show the
// user which columns are candidates, or which table
// they come from. For now, let's list the table names
// only.
for qualified_field in qualified_fields {
let (Some(table), _) = qualified_field else {
continue;
};
diagnostic.add_note(
format!(
"possible reference to '{}' in table '{}'",
&self.name, table
),
None,
);
}
err.with_diagnostic(diagnostic)
});
}
}
Expand All @@ -254,6 +296,33 @@ impl Column {
.collect(),
})
}

/// Returns a reference to the set of locations in the SQL query where this
/// column appears, if known.
pub fn spans(&self) -> &Spans {
&self.spans
}

/// Returns a mutable reference to the set of locations in the SQL query
/// where this column appears, if known.
pub fn spans_mut(&mut self) -> &mut Spans {
&mut self.spans
}

/// Replaces the set of locations in the SQL query where this column
/// appears, if known.
pub fn with_spans(mut self, spans: Spans) -> Self {
self.spans = spans;
self
}

/// Qualifies the column with the given table reference.
pub fn with_relation(&self, relation: TableReference) -> Self {
Self {
relation: Some(relation),
..self.clone()
}
}
}

impl From<&str> for Column {
Expand Down
5 changes: 5 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ config_namespace! {
/// specified. The Arrow type system does not have a notion of maximum
/// string length and thus DataFusion can not enforce such limits.
pub support_varchar_with_length: bool, default = true

/// When set to true, the source locations relative to the original SQL
/// query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected
/// and recorded in the logical plan nodes.
pub collect_spans: bool, default = false
}
}

Expand Down
5 changes: 1 addition & 4 deletions datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -502,10 +502,7 @@ impl DFSchema {
Ok((fields_without_qualifier[0].0, fields_without_qualifier[0].1))
} else {
_schema_err!(SchemaError::AmbiguousReference {
field: Column {
relation: None,
name: name.to_string(),
},
field: Column::new_unqualified(name.to_string(),),
})
}
}
Expand Down
Loading

0 comments on commit 866843b

Please sign in to comment.