pydantic · sydney-runkle · Jun 19, 2024 · Jun 16, 2024 · Jun 16, 2024 · Jun 16, 2024
diff --git a/src/validators/model.rs b/src/validators/model.rs
@@ -204,6 +204,7 @@ impl Validator for ModelValidator {
             for field_name in validated_fields_set {
                 fields_set.add(field_name)?;
             }
+            state.fields_set_count = Some(fields_set.len());
         }
 
         force_setattr(py, model, intern!(py, DUNDER_DICT), validated_dict.to_object(py))?;
@@ -241,10 +242,13 @@ impl ModelValidator {
             } else {
                 PySet::new_bound(py, [&String::from(ROOT_FIELD)])?
             };
-            force_setattr(py, self_instance, intern!(py, DUNDER_FIELDS_SET_KEY), fields_set)?;
+            force_setattr(py, self_instance, intern!(py, DUNDER_FIELDS_SET_KEY), &fields_set)?;
             force_setattr(py, self_instance, intern!(py, ROOT_FIELD), &output)?;
+            state.fields_set_count = Some(fields_set.len());
         } else {
-            let (model_dict, model_extra, fields_set) = output.extract(py)?;
+            let (model_dict, model_extra, fields_set): (Bound<PyAny>, Bound<PyAny>, Bound<PyAny>) =
+                output.extract(py)?;
+            state.fields_set_count = fields_set.len().ok();
             set_model_attrs(self_instance, &model_dict, &model_extra, &fields_set)?;
         }
         self.call_post_init(py, self_instance.clone(), input, state.extra())
@@ -281,11 +285,13 @@ impl ModelValidator {
             } else {
                 PySet::new_bound(py, [&String::from(ROOT_FIELD)])?
             };
-            force_setattr(py, &instance, intern!(py, DUNDER_FIELDS_SET_KEY), fields_set)?;
+            force_setattr(py, &instance, intern!(py, DUNDER_FIELDS_SET_KEY), &fields_set)?;
             force_setattr(py, &instance, intern!(py, ROOT_FIELD), output)?;
+            state.fields_set_count = Some(fields_set.len());
         } else {
             let (model_dict, model_extra, val_fields_set) = output.extract(py)?;
             let fields_set = existing_fields_set.unwrap_or(&val_fields_set);
+            state.fields_set_count = fields_set.len().ok();
             set_model_attrs(&instance, &model_dict, &model_extra, fields_set)?;
         }
         self.call_post_init(py, instance, input, state.extra())

diff --git a/src/validators/typed_dict.rs b/src/validators/typed_dict.rs
@@ -165,6 +165,7 @@ impl Validator for TypedDictValidator {
 
         {
             let state = &mut state.rebind_extra(|extra| extra.data = Some(output_dict.clone()));
+            let mut fields_set_count: usize = 0;
 
             for field in &self.fields {
                 let op_key_value = match dict.get_item(&field.lookup_key) {
@@ -186,6 +187,7 @@ impl Validator for TypedDictValidator {
                     match field.validator.validate(py, value.borrow_input(), state) {
                         Ok(value) => {
                             output_dict.set_item(&field.name_py, value)?;
+                            fields_set_count += 1;
                         }
                         Err(ValError::Omit) => continue,
                         Err(ValError::LineErrors(line_errors)) => {
@@ -227,6 +229,8 @@ impl Validator for TypedDictValidator {
                     Err(err) => return Err(err),
                 }
             }
+
+            state.fields_set_count = (fields_set_count != 0).then_some(fields_set_count);
         }
 
         if let Some(used_keys) = used_keys {

diff --git a/src/validators/union.rs b/src/validators/union.rs
@@ -111,7 +111,9 @@ impl UnionValidator {
         let strict = state.strict_or(self.strict);
         let mut errors = MaybeErrors::new(self.custom_error.as_ref());
 
-        let mut success = None;
+        // we use this to track the validation against the most compatible union member
+        // up to the current point
+        let mut success: Option<(Py<PyAny>, Exactness, Option<usize>)> = None;
 
         for (choice, label) in &self.choices {
             let state = &mut state.rebind_extra(|extra| {
@@ -134,16 +136,20 @@ impl UnionValidator {
                     _ => {
                         // success should always have an exactness
                         debug_assert_ne!(state.exactness, None);
+
                         let new_exactness = state.exactness.unwrap_or(Exactness::Lax);
-                        // if the new result has higher exactness than the current success, replace it
-                        if success
-                            .as_ref()
-                            .map_or(true, |(_, current_exactness)| *current_exactness < new_exactness)
-                        {
-                            // TODO: is there a possible optimization here, where once there has
-                            // been one success, we turn on strict mode, to avoid unnecessary
-                            // coercions for further validation?
-                            success = Some((new_success, new_exactness));
+                        let new_fields_set = state.fields_set_count;
+
+                        let new_success_is_best_match: bool =
+                            success.as_ref().map_or(true, |(_, cur_exactness, cur_fields_set)| {
+                                match (*cur_fields_set, new_fields_set) {
+                                    (Some(cur), Some(new)) if cur != new => cur < new,
+                                    _ => *cur_exactness < new_exactness,
+                                }
+                            });
+
+                        if new_success_is_best_match {
+                            success = Some((new_success, new_exactness, new_fields_set));
                         }
                     }
                 },
@@ -158,7 +164,7 @@ impl UnionValidator {
         }
         state.exactness = old_exactness;
 
-        if let Some((success, exactness)) = success {
+        if let Some((success, exactness, _fields_set)) = success {
             state.floor_exactness(exactness);
             return Ok(success);
         }

diff --git a/src/validators/validation_state.rs b/src/validators/validation_state.rs
@@ -18,6 +18,7 @@ pub enum Exactness {
 pub struct ValidationState<'a, 'py> {
     pub recursion_guard: &'a mut RecursionState,
     pub exactness: Option<Exactness>,
+    pub fields_set_count: Option<usize>,
     // deliberately make Extra readonly
     extra: Extra<'a, 'py>,
 }
@@ -27,6 +28,7 @@ impl<'a, 'py> ValidationState<'a, 'py> {
         Self {
             recursion_guard, // Don't care about exactness unless doing union validation
             exactness: None,
+            fields_set_count: None,
             extra,
         }
     }

diff --git a/tests/validators/test_union.py b/tests/validators/test_union.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
 from datetime import date, time
 from enum import Enum, IntEnum
-from typing import Any
+from typing import Any, Optional, Union
 from uuid import UUID
 
 import pytest
@@ -170,13 +170,15 @@ def test_model_a(self, schema_validator: SchemaValidator):
         assert m.b == 'hello'
         assert not hasattr(m, 'c')
 
-    def test_model_b_ignored(self, schema_validator: SchemaValidator):
-        # first choice works, so second choice is not used
+    def test_model_b_preferred(self, schema_validator: SchemaValidator):
+        # Note, this is a different behavior to previous smart union behavior,
+        # where the first match would be preferred. However, we believe is it better
+        # to prefer the match with the greatest number of valid fields set.
         m = schema_validator.validate_python({'a': 1, 'b': 'hello', 'c': 2.0})
-        assert isinstance(m, self.ModelA)
+        assert isinstance(m, self.ModelB)
         assert m.a == 1
         assert m.b == 'hello'
-        assert not hasattr(m, 'c')
+        assert m.c == 2.0
 
     def test_model_b_not_ignored(self, schema_validator: SchemaValidator):
         m1 = self.ModelB()
@@ -803,3 +805,157 @@ class ModelA:
     assert isinstance(m, ModelA)
     assert m.a == 42
     assert validator.validate_python(True) is True
+
+
+def test_union_with_subclass() -> None:
+    class ModelA:
+        a: int
+
+    class ModelB(ModelA):
+        b: int
+
+    model_a_schema = core_schema.model_schema(
+        ModelA, core_schema.model_fields_schema(fields={'a': core_schema.model_field(core_schema.int_schema())})
+    )
+    model_b_schema = core_schema.model_schema(
+        ModelB,
+        core_schema.model_fields_schema(
+            fields={
+                'a': core_schema.model_field(core_schema.int_schema()),
+                'b': core_schema.model_field(core_schema.int_schema()),
+            }
+        ),
+    )
+
+    for choices in [[model_a_schema, model_b_schema], [model_b_schema, model_a_schema]]:
+        validator = SchemaValidator(schema=core_schema.union_schema(choices))
+        assert isinstance(validator.validate_python({'a': 1}), ModelA)
+        assert isinstance(validator.validate_python({'a': 1, 'b': 2}), ModelB)
+
+        # confirm that a model that matches in lax mode with 2 fields
+        # is preferred over a model that matches in strict mode with 1 field
+        assert isinstance(validator.validate_python({'a': '1', 'b': '2'}), ModelB)
+        assert isinstance(validator.validate_python({'a': '1', 'b': 2}), ModelB)
+        assert isinstance(validator.validate_python({'a': 1, 'b': '2'}), ModelB)
+        assert isinstance(validator.validate_python({'a': 1, 'b': 2}), ModelB)
+
+
+def test_union_with_default() -> None:
+    class ModelA:
+        a: int = 0
+
+    class ModelB:
+        b: int = 0
+
+    val = SchemaValidator(
+        {
+            'type': 'union',
+            'choices': [
+                {
+                    'type': 'model',
+                    'cls': ModelA,
+                    'schema': {
+                        'type': 'model-fields',
+                        'fields': {
+                            'a': {
+                                'type': 'model-field',
+                                'schema': {'type': 'default', 'schema': {'type': 'int'}, 'default': 0},
+                            },
+                        },
+                    },
+                },
+                {
+                    'type': 'model',
+                    'cls': ModelB,
+                    'schema': {
+                        'type': 'model-fields',
+                        'fields': {
+                            'b': {
+                                'type': 'model-field',
+                                'schema': {'type': 'default', 'schema': {'type': 'int'}, 'default': 0},
+                            },
+                        },
+                    },
+                },
+            ],
+        }
+    )
+
+    assert isinstance(val.validate_python({'a': 1}), ModelA)
+    assert isinstance(val.validate_python({'b': 1}), ModelB)
+
+    # defaults to leftmost choice if there's a tie
+    assert isinstance(val.validate_python({}), ModelA)
+
+
+def test_optional_union_with_members_having_defaults() -> None:
+    class ModelA:
+        a: int = 0
+
+    class ModelB:
+        b: int = 0
+
+    class WrapModel:
+        val: Optional[Union[ModelA, ModelB]] = None
+
+    val = SchemaValidator(
+        {
+            'type': 'model',
+            'cls': WrapModel,
+            'schema': {
+                'type': 'model-fields',
+                'fields': {
+                    'val': {
+                        'type': 'model-field',
+                        'schema': {
+                            'type': 'default',
+                            'schema': {
+                                'type': 'union',
+                                'choices': [
+                                    {
+                                        'type': 'model',
+                                        'cls': ModelA,
+                                        'schema': {
+                                            'type': 'model-fields',
+                                            'fields': {
+                                                'a': {
+                                                    'type': 'model-field',
+                                                    'schema': {
+                                                        'type': 'default',
+                                                        'schema': {'type': 'int'},
+                                                        'default': 0,
+                                                    },
+                                                }
+                                            },
+                                        },
+                                    },
+                                    {
+                                        'type': 'model',
+                                        'cls': ModelB,
+                                        'schema': {
+                                            'type': 'model-fields',
+                                            'fields': {
+                                                'b': {
+                                                    'type': 'model-field',
+                                                    'schema': {
+                                                        'type': 'default',
+                                                        'schema': {'type': 'int'},
+                                                        'default': 0,
+                                                    },
+                                                }
+                                            },
+                                        },
+                                    },
+                                ],
+                            },
+                            'default': None,
+                        },
+                    }
+                },
+            },
+        }
+    )
+
+    assert isinstance(val.validate_python({'val': {'a': 1}}).val, ModelA)
+    assert isinstance(val.validate_python({'val': {'b': 1}}).val, ModelB)
+    assert val.validate_python({}).val is None