diff --git a/Cargo.toml b/Cargo.toml index 7df2c14d4d0..7b620163ac9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,7 +47,7 @@ regex-syntax = { version = "^0.6", optional = true } streaming-iterator = { version = "0.1", optional = true } fallible-streaming-iterator = { version = "0.1", optional = true } -json-deserializer = { version = "0.3", optional = true } +json-deserializer = { version = "0.4", optional = true, features = ["preserve_order"] } indexmap = { version = "^1.6", optional = true } # used to print columns in a nice columnar format diff --git a/src/io/json/read/infer_schema.rs b/src/io/json/read/infer_schema.rs index 2ea4f840583..38c147728a1 100644 --- a/src/io/json/read/infer_schema.rs +++ b/src/io/json/read/infer_schema.rs @@ -1,5 +1,4 @@ use std::borrow::Borrow; -use std::collections::BTreeMap; use indexmap::map::IndexMap as HashMap; use indexmap::set::IndexSet as HashSet; @@ -30,7 +29,7 @@ fn filter_map_nulls(dt: DataType) -> Option { } } -fn infer_object(inner: &BTreeMap) -> Result { +fn infer_object(inner: &HashMap) -> Result { let fields = inner .iter() .filter_map(|(key, value)| { diff --git a/tests/it/io/ndjson/mod.rs b/tests/it/io/ndjson/mod.rs index 14e8d6378dc..0f90da3c14f 100644 --- a/tests/it/io/ndjson/mod.rs +++ b/tests/it/io/ndjson/mod.rs @@ -311,3 +311,22 @@ fn case(case: &str) -> (String, Box) { _ => todo!(), } } + +#[test] +fn infer_object() -> Result<()> { + let data = r#"{"i64": 1, "f64": 0.1, "utf8": "foo1", "bools": true} + {"i64": 2, "f64": 0.2, "utf8": "foo2", "bools": false} + {"i64": 3, "f64": 0.3, "utf8": "foo3"} + {"i64": 4, "f64": 0.4, "utf8": "foo4", "bools": false} + "#; + let u64_fld = Field::new("i64", DataType::Int64, true); + let f64_fld = Field::new("f64", DataType::Float64, true); + let utf8_fld = Field::new("utf8", DataType::Utf8, true); + let bools_fld = Field::new("bools", DataType::Boolean, true); + + let expected = DataType::Struct(vec![u64_fld, f64_fld, utf8_fld, bools_fld]); + let actual = infer(data)?; + + assert_eq!(expected, actual); + Ok(()) +}