Skip to content

Commit dfcf3bc

Browse files
timsaucerclaude
andcommitted
chore: replace blanket S301 allow with per-line noqa at pickle.loads
The pickle-Expr work added `S301` (suspicious-pickle-usage) to the `per-file-ignores` for both `python/tests/*` and `examples/*`. That silenced ruff's pickle-usage warning everywhere in those trees, not just at the call sites that genuinely need it — a future test that adds an unrelated `pickle.loads` on untrusted bytes would pass review without ever pinging the linter. Drop the blanket allows and tag each intentional `pickle.loads` call with a per-line `# noqa: S301` instead (12 sites total: 9 in `test_pickle_expr.py`, 2 in `_pickle_multiprocessing_helpers.py`, 1 in the FFI example test). New `pickle.loads` calls now have to justify themselves with their own noqa, keeping S301 a real signal. Production code (`Expr.__reduce__`) only calls `pickle.dumps`, which S301 does not cover, so no noqa is needed outside tests/examples. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent cbfa652 commit dfcf3bc

4 files changed

Lines changed: 12 additions & 14 deletions

File tree

examples/datafusion-ffi-example/python/tests/_test_pickle_strict_ffi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_strict_ffi_udf_pickle_roundtrip_via_thread_locals():
9898

9999
set_worker_ctx(receiver)
100100
try:
101-
restored = pickle.loads(blob)
101+
restored = pickle.loads(blob) # noqa: S301
102102
finally:
103103
clear_worker_ctx()
104104

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ extend-allowed-calls = ["datafusion.lit", "lit"]
127127
"PT011",
128128
"RUF015",
129129
"S101",
130-
"S301",
131130
"S608",
132131
"SLF",
133132
]
@@ -141,7 +140,6 @@ extend-allowed-calls = ["datafusion.lit", "lit"]
141140
"PLR2004",
142141
"RUF015",
143142
"S101",
144-
"S301",
145143
"T201",
146144
"W505",
147145
]

python/tests/_pickle_multiprocessing_helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def unpickle_and_describe(blob: bytes) -> str:
6969
"""Unpickle a proto-bytes blob and return its canonical name."""
7070
import pickle
7171

72-
expr = pickle.loads(blob)
72+
expr = pickle.loads(blob) # noqa: S301
7373
return expr.canonical_name()
7474

7575

@@ -82,7 +82,7 @@ def unpickle_and_evaluate(blob: bytes, batch: list[int]) -> list[int]:
8282
"""
8383
import pickle
8484

85-
expr = pickle.loads(blob)
85+
expr = pickle.loads(blob) # noqa: S301
8686
ctx = SessionContext()
8787
df = ctx.from_pydict({"a": batch})
8888
out = df.with_column("result", expr).select("result")

python/tests/test_pickle_expr.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class TestProtoRoundTrip:
6767
def test_builtin_round_trip(self):
6868
e = col("a") + lit(1)
6969
blob = pickle.dumps(e)
70-
decoded = pickle.loads(blob)
70+
decoded = pickle.loads(blob) # noqa: S301
7171
assert decoded.canonical_name() == e.canonical_name()
7272

7373
def test_to_bytes_from_bytes(self):
@@ -107,14 +107,14 @@ def test_udf_decodes_into_fresh_ctx(self):
107107
def test_udf_decodes_via_pickle_with_no_worker_ctx(self):
108108
e = _double_udf()(col("a"))
109109
blob = pickle.dumps(e)
110-
decoded = pickle.loads(blob)
110+
decoded = pickle.loads(blob) # noqa: S301
111111
assert "double" in decoded.canonical_name()
112112

113113
def test_udf_decodes_via_pickle_with_worker_ctx(self):
114114
set_worker_ctx(SessionContext())
115115
e = _double_udf()(col("a"))
116116
blob = pickle.dumps(e)
117-
decoded = pickle.loads(blob)
117+
decoded = pickle.loads(blob) # noqa: S301
118118
assert "double" in decoded.canonical_name()
119119

120120
def test_closure_capturing_udf_names_match(self):
@@ -132,7 +132,7 @@ def fn(arr):
132132
)
133133
e = u(col("a"))
134134
blob = pickle.dumps(e)
135-
decoded = pickle.loads(blob)
135+
decoded = pickle.loads(blob) # noqa: S301
136136
# Round-trip names match; functional verification of captured state
137137
# happens in test_pickle_multiprocessing via an actual UDF call.
138138
assert decoded.canonical_name() == e.canonical_name()
@@ -189,15 +189,15 @@ def test_agg_udf_decodes_via_pickle_with_no_worker_ctx(self):
189189
u = self._build_aggregate_udf()
190190
e = u(col("a"))
191191
blob = pickle.dumps(e)
192-
decoded = pickle.loads(blob)
192+
decoded = pickle.loads(blob) # noqa: S301
193193
assert "count_all" in decoded.canonical_name()
194194

195195
def test_agg_udf_evaluates_after_roundtrip(self):
196196
"""End-to-end: the decoded aggregate UDF runs and merges across
197197
partitions, exercising the round-tripped state-field schema."""
198198
u = self._build_aggregate_udf()
199199
e = u(col("a"))
200-
decoded = pickle.loads(pickle.dumps(e))
200+
decoded = pickle.loads(pickle.dumps(e)) # noqa: S301
201201

202202
ctx = SessionContext()
203203
df = ctx.from_pydict({"a": [1, 2, 3, 4, 5]})
@@ -242,7 +242,7 @@ def test_window_udf_decodes_via_pickle_with_no_worker_ctx(self):
242242
u = self._build_window_udf()
243243
e = u(col("a"))
244244
blob = pickle.dumps(e)
245-
decoded = pickle.loads(blob)
245+
decoded = pickle.loads(blob) # noqa: S301
246246
assert "count_up" in decoded.canonical_name()
247247

248248

@@ -344,7 +344,7 @@ def test_sender_ctx_strict_roundtrip_via_pickle(self):
344344
worker.register_udf(u)
345345
set_worker_ctx(worker)
346346
try:
347-
decoded = pickle.loads(blob)
347+
decoded = pickle.loads(blob) # noqa: S301
348348
finally:
349349
clear_worker_ctx()
350350

@@ -369,7 +369,7 @@ def test_sender_ctx_strict_pickle_accepted_by_inline_worker_with_registry(self):
369369
worker.register_udf(u)
370370
set_worker_ctx(worker)
371371
try:
372-
decoded = pickle.loads(blob)
372+
decoded = pickle.loads(blob) # noqa: S301
373373
finally:
374374
clear_worker_ctx()
375375

0 commit comments

Comments
 (0)