Skip to content

Commit 8af9c53

Browse files
timsaucerclaude
andcommitted
refactor(udf): scalar from_parts now takes Vec<DataType>, matching udaf/udwf
`PythonFunctionScalarUDF::from_parts` previously took `Vec<Field>` for inputs only to call `.data_type()` on every entry inside `new` — nullability and metadata went straight in the bin because `Signature::exact` cannot represent them. The codec carried full Field info through IPC but threw it away one frame later. Take `Vec<DataType>` instead. The codec extracts data types from the decoded IPC schema and feeds them in directly. `return_field` is still a `Field` so per-output nullability/metadata round-trips intact, the same way `state_fields` is preserved on the aggregate side. The Python constructor path (`PyScalarUDF::new`) still goes through `new` with `Vec<Field>` since PyArrow already hands it the full Field list. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b5684bc commit 8af9c53

2 files changed

Lines changed: 15 additions & 9 deletions

File tree

crates/core/src/codec.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -527,10 +527,10 @@ fn decode_python_scalar_udf(py: Python<'_>, payload: &[u8]) -> PyResult<PythonFu
527527

528528
let input_schema = schema_from_ipc_bytes(&input_schema_bytes)
529529
.map_err(|e| pyo3::exceptions::PyValueError::new_err(format!("{e}")))?;
530-
let input_fields: Vec<Field> = input_schema
530+
let input_types: Vec<arrow::datatypes::DataType> = input_schema
531531
.fields()
532532
.iter()
533-
.map(|f| f.as_ref().clone())
533+
.map(|f| f.data_type().clone())
534534
.collect();
535535

536536
let return_schema = schema_from_ipc_bytes(&return_schema_bytes)
@@ -552,7 +552,7 @@ fn decode_python_scalar_udf(py: Python<'_>, payload: &[u8]) -> PyResult<PythonFu
552552
Ok(PythonFunctionScalarUDF::from_parts(
553553
name,
554554
func,
555-
input_fields,
555+
input_types,
556556
return_field,
557557
volatility,
558558
))

crates/core/src/udf.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,18 +79,24 @@ impl PythonFunctionScalarUDF {
7979
}
8080

8181
/// Reconstruct a `PythonFunctionScalarUDF` from the parts emitted
82-
/// by the codec. `input_fields` here only contributes `data_type`
83-
/// info (collapsed into `Signature::exact`); their names,
84-
/// nullability, and metadata are not retained, so the decoder is
85-
/// free to fabricate them from `Vec<DataType>`.
82+
/// by the codec. Inputs collapse to `Vec<DataType>` because
83+
/// `Signature::exact` cannot carry per-input nullability or
84+
/// metadata — the encoder is free to discard that side of the
85+
/// schema. `return_field` is kept as a `Field` so the post-decode
86+
/// nullability and metadata match the sender's instance.
8687
pub(crate) fn from_parts(
8788
name: String,
8889
func: Py<PyAny>,
89-
input_fields: Vec<Field>,
90+
input_types: Vec<DataType>,
9091
return_field: Field,
9192
volatility: Volatility,
9293
) -> Self {
93-
Self::new(name, func, input_fields, return_field, volatility)
94+
Self {
95+
name,
96+
func,
97+
signature: Signature::exact(input_types, volatility),
98+
return_field: Arc::new(return_field),
99+
}
94100
}
95101
}
96102

0 commit comments

Comments
 (0)