Skip to content

Commit fafefd0

Browse files
timsaucerclaude
andcommitted
fix(udf): log Python __eq__ exceptions instead of silently mapping to unequal
`PartialEq` on `PythonFunctionScalarUDF` / `PythonFunctionAggregateUDF` / `PythonFunctionWindowUDF` used `unwrap_or(false)` when the underlying Python `__eq__` raised. Two same-callable UDFs whose `__eq__` happened to throw — e.g. a deliberately strict `__eq__` that runs validation — would compare unequal silently, breaking expression dedup and cache lookups without leaving any trace for an operator to investigate. Rust's `PartialEq` cannot return `Result`, so `false` remains the conservative choice (better to over-distinguish than wrongly merge), but the exception is now logged at `log::debug` with the UDF's registered name and the exception text. Mark each call site with a `FIXME` pointing at the upstream `*UDFImpl` traits — when those expose a fallible `PartialEq` we can drop the fallback and surface the error. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent f432d6b commit fafefd0

3 files changed

Lines changed: 47 additions & 3 deletions

File tree

crates/core/src/udaf.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,22 @@ impl PartialEq for PythonFunctionAggregateUDF {
243243
// `__eq__` only for two distinct callables.
244244
&& (self.accumulator.as_ptr() == other.accumulator.as_ptr()
245245
|| Python::attach(|py| {
246+
// See `PythonFunctionScalarUDF::eq` for the
247+
// rationale on swallowing the exception as `false`
248+
// and logging at `debug`. FIXME: revisit if
249+
// upstream `AggregateUDFImpl` exposes a fallible
250+
// `PartialEq`.
246251
self.accumulator
247252
.bind(py)
248253
.eq(other.accumulator.bind(py))
249-
.unwrap_or(false)
254+
.unwrap_or_else(|e| {
255+
log::debug!(
256+
target: "datafusion_python::udaf",
257+
"PythonFunctionAggregateUDF {:?} __eq__ raised; treating as unequal: {e}",
258+
self.name,
259+
);
260+
false
261+
})
250262
}))
251263
}
252264
}

crates/core/src/udf.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,27 @@ impl PartialEq for PythonFunctionScalarUDF {
112112
// circuits before touching the GIL.
113113
&& (self.func.as_ptr() == other.func.as_ptr()
114114
|| Python::attach(|py| {
115-
self.func.bind(py).eq(other.func.bind(py)).unwrap_or(false)
115+
// Rust's `PartialEq` cannot return `Result`, so we
116+
// have to pick a side when Python `__eq__` raises.
117+
// `false` is the conservative choice — better to
118+
// report two UDFs as distinct than to wrongly
119+
// merge them — but the silent miss can still
120+
// surface as expression-dedup or cache-lookup
121+
// anomalies. Log at `debug` so the failure is
122+
// observable without flooding production logs.
123+
// FIXME: revisit if upstream `ScalarUDFImpl`
124+
// exposes a fallible `PartialEq`.
125+
self.func
126+
.bind(py)
127+
.eq(other.func.bind(py))
128+
.unwrap_or_else(|e| {
129+
log::debug!(
130+
target: "datafusion_python::udf",
131+
"PythonFunctionScalarUDF {:?} __eq__ raised; treating as unequal: {e}",
132+
self.name,
133+
);
134+
false
135+
})
116136
}))
117137
}
118138
}

crates/core/src/udwf.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,10 +329,22 @@ impl PartialEq for PythonFunctionWindowUDF {
329329
// `__eq__` only for two distinct callables.
330330
&& (self.evaluator.as_ptr() == other.evaluator.as_ptr()
331331
|| Python::attach(|py| {
332+
// See `PythonFunctionScalarUDF::eq` for the
333+
// rationale on swallowing the exception as `false`
334+
// and logging at `debug`. FIXME: revisit if
335+
// upstream `WindowUDFImpl` exposes a fallible
336+
// `PartialEq`.
332337
self.evaluator
333338
.bind(py)
334339
.eq(other.evaluator.bind(py))
335-
.unwrap_or(false)
340+
.unwrap_or_else(|e| {
341+
log::debug!(
342+
target: "datafusion_python::udwf",
343+
"PythonFunctionWindowUDF {:?} __eq__ raised; treating as unequal: {e}",
344+
self.name,
345+
);
346+
false
347+
})
336348
}))
337349
}
338350
}

0 commit comments

Comments
 (0)