@@ -100,22 +100,37 @@ pub mod window;
100100
101101use sort_expr:: { to_sort_expressions, PySortExpr } ;
102102
103+ // Define the new RawExpr struct and implement Debug trait
104+ #[ derive( Debug , Clone ) ]
105+ pub struct RawExpr {
106+ pub expr : Expr ,
107+ }
108+
109+ // Implement conversion from RawExpr to Expr
110+ impl From < RawExpr > for Expr {
111+ fn from ( raw_expr : RawExpr ) -> Expr {
112+ raw_expr. expr
113+ }
114+ }
115+
103116/// A PyExpr that can be used on a DataFrame
104117#[ pyclass( name = "Expr" , module = "datafusion.expr" , subclass) ]
105118#[ derive( Debug , Clone ) ]
106119pub struct PyExpr {
107- pub expr : Expr ,
120+ pub raw_expr : RawExpr ,
108121}
109122
110123impl From < PyExpr > for Expr {
111124 fn from ( expr : PyExpr ) -> Expr {
112- expr. expr
125+ expr. raw_expr . expr
113126 }
114127}
115128
116129impl From < Expr > for PyExpr {
117130 fn from ( expr : Expr ) -> PyExpr {
118- PyExpr { expr }
131+ PyExpr {
132+ raw_expr : RawExpr { expr }
133+ }
119134 }
120135}
121136
@@ -129,7 +144,7 @@ impl PyExpr {
129144 /// Return the specific expression
130145 fn to_variant < ' py > ( & self , py : Python < ' py > ) -> PyResult < Bound < ' py , PyAny > > {
131146 Python :: with_gil ( |_| {
132- match & self . expr {
147+ match & self . raw_expr . expr {
133148 Expr :: Alias ( alias) => Ok ( PyAlias :: from ( alias. clone ( ) ) . into_bound_py_any ( py) ?) ,
134149 Expr :: Column ( col) => Ok ( PyColumn :: from ( col. clone ( ) ) . into_bound_py_any ( py) ?) ,
135150 Expr :: ScalarVariable ( data_type, variables) => {
@@ -194,72 +209,72 @@ impl PyExpr {
194209 /// Returns the name of this expression as it should appear in a schema. This name
195210 /// will not include any CAST expressions.
196211 fn schema_name ( & self ) -> PyResult < String > {
197- Ok ( format ! ( "{}" , self . expr. schema_name( ) ) )
212+ Ok ( format ! ( "{}" , self . raw_expr . expr. schema_name( ) ) )
198213 }
199214
200215 /// Returns a full and complete string representation of this expression.
201216 fn canonical_name ( & self ) -> PyResult < String > {
202- Ok ( format ! ( "{}" , self . expr) )
217+ Ok ( format ! ( "{}" , self . raw_expr . expr) )
203218 }
204219
205220 /// Returns the name of the Expr variant.
206221 /// Ex: 'IsNotNull', 'Literal', 'BinaryExpr', etc
207222 fn variant_name ( & self ) -> PyResult < & str > {
208- Ok ( self . expr . variant_name ( ) )
223+ Ok ( self . raw_expr . expr . variant_name ( ) )
209224 }
210225
211226 fn __richcmp__ ( & self , other : PyExpr , op : CompareOp ) -> PyExpr {
212227 let expr = match op {
213- CompareOp :: Lt => self . expr . clone ( ) . lt ( other. expr ) ,
214- CompareOp :: Le => self . expr . clone ( ) . lt_eq ( other. expr ) ,
215- CompareOp :: Eq => self . expr . clone ( ) . eq ( other. expr ) ,
216- CompareOp :: Ne => self . expr . clone ( ) . not_eq ( other. expr ) ,
217- CompareOp :: Gt => self . expr . clone ( ) . gt ( other. expr ) ,
218- CompareOp :: Ge => self . expr . clone ( ) . gt_eq ( other. expr ) ,
228+ CompareOp :: Lt => self . raw_expr . expr . clone ( ) . lt ( other. raw_expr . expr ) ,
229+ CompareOp :: Le => self . raw_expr . expr . clone ( ) . lt_eq ( other. raw_expr . expr ) ,
230+ CompareOp :: Eq => self . raw_expr . expr . clone ( ) . eq ( other. raw_expr . expr ) ,
231+ CompareOp :: Ne => self . raw_expr . expr . clone ( ) . not_eq ( other. raw_expr . expr ) ,
232+ CompareOp :: Gt => self . raw_expr . expr . clone ( ) . gt ( other. raw_expr . expr ) ,
233+ CompareOp :: Ge => self . raw_expr . expr . clone ( ) . gt_eq ( other. raw_expr . expr ) ,
219234 } ;
220235 expr. into ( )
221236 }
222237
223238 fn __repr__ ( & self ) -> PyResult < String > {
224- Ok ( format ! ( "Expr({})" , self . expr) )
239+ Ok ( format ! ( "Expr({})" , self . raw_expr . expr) )
225240 }
226241
227242 fn __add__ ( & self , rhs : PyExpr ) -> PyResult < PyExpr > {
228- Ok ( ( self . expr . clone ( ) + rhs. expr ) . into ( ) )
243+ Ok ( ( self . raw_expr . expr . clone ( ) + rhs. raw_expr . expr ) . into ( ) )
229244 }
230245
231246 fn __sub__ ( & self , rhs : PyExpr ) -> PyResult < PyExpr > {
232- Ok ( ( self . expr . clone ( ) - rhs. expr ) . into ( ) )
247+ Ok ( ( self . raw_expr . expr . clone ( ) - rhs. raw_expr . expr ) . into ( ) )
233248 }
234249
235250 fn __truediv__ ( & self , rhs : PyExpr ) -> PyResult < PyExpr > {
236- Ok ( ( self . expr . clone ( ) / rhs. expr ) . into ( ) )
251+ Ok ( ( self . raw_expr . expr . clone ( ) / rhs. raw_expr . expr ) . into ( ) )
237252 }
238253
239254 fn __mul__ ( & self , rhs : PyExpr ) -> PyResult < PyExpr > {
240- Ok ( ( self . expr . clone ( ) * rhs. expr ) . into ( ) )
255+ Ok ( ( self . raw_expr . expr . clone ( ) * rhs. raw_expr . expr ) . into ( ) )
241256 }
242257
243258 fn __mod__ ( & self , rhs : PyExpr ) -> PyResult < PyExpr > {
244- let expr = self . expr . clone ( ) % rhs. expr ;
259+ let expr = self . raw_expr . expr . clone ( ) % rhs. raw_expr . expr ;
245260 Ok ( expr. into ( ) )
246261 }
247262
248263 fn __and__ ( & self , rhs : PyExpr ) -> PyResult < PyExpr > {
249- Ok ( self . expr . clone ( ) . and ( rhs. expr ) . into ( ) )
264+ Ok ( self . raw_expr . expr . clone ( ) . and ( rhs. raw_expr . expr ) . into ( ) )
250265 }
251266
252267 fn __or__ ( & self , rhs : PyExpr ) -> PyResult < PyExpr > {
253- Ok ( self . expr . clone ( ) . or ( rhs. expr ) . into ( ) )
268+ Ok ( self . raw_expr . expr . clone ( ) . or ( rhs. raw_expr . expr ) . into ( ) )
254269 }
255270
256271 fn __invert__ ( & self ) -> PyResult < PyExpr > {
257- let expr = !self . expr . clone ( ) ;
272+ let expr = !self . raw_expr . expr . clone ( ) ;
258273 Ok ( expr. into ( ) )
259274 }
260275
261276 fn __getitem__ ( & self , key : & str ) -> PyResult < PyExpr > {
262- Ok ( self . expr . clone ( ) . field ( key) . into ( ) )
277+ Ok ( self . raw_expr . expr . clone ( ) . field ( key) . into ( ) )
263278 }
264279
265280 #[ staticmethod]
@@ -274,34 +289,34 @@ impl PyExpr {
274289
275290 /// assign a name to the PyExpr
276291 pub fn alias ( & self , name : & str ) -> PyExpr {
277- self . expr . clone ( ) . alias ( name) . into ( )
292+ self . raw_expr . expr . clone ( ) . alias ( name) . into ( )
278293 }
279294
280295 /// Create a sort PyExpr from an existing PyExpr.
281296 #[ pyo3( signature = ( ascending=true , nulls_first=true ) ) ]
282297 pub fn sort ( & self , ascending : bool , nulls_first : bool ) -> PySortExpr {
283- self . expr . clone ( ) . sort ( ascending, nulls_first) . into ( )
298+ self . raw_expr . expr . clone ( ) . sort ( ascending, nulls_first) . into ( )
284299 }
285300
286301 pub fn is_null ( & self ) -> PyExpr {
287- self . expr . clone ( ) . is_null ( ) . into ( )
302+ self . raw_expr . expr . clone ( ) . is_null ( ) . into ( )
288303 }
289304
290305 pub fn is_not_null ( & self ) -> PyExpr {
291- self . expr . clone ( ) . is_not_null ( ) . into ( )
306+ self . raw_expr . expr . clone ( ) . is_not_null ( ) . into ( )
292307 }
293308
294309 pub fn cast ( & self , to : PyArrowType < DataType > ) -> PyExpr {
295310 // self.expr.cast_to() requires DFSchema to validate that the cast
296311 // is supported, omit that for now
297- let expr = Expr :: Cast ( Cast :: new ( Box :: new ( self . expr . clone ( ) ) , to. 0 ) ) ;
312+ let expr = Expr :: Cast ( Cast :: new ( Box :: new ( self . raw_expr . expr . clone ( ) ) , to. 0 ) ) ;
298313 expr. into ( )
299314 }
300315
301316 #[ pyo3( signature = ( low, high, negated=false ) ) ]
302317 pub fn between ( & self , low : PyExpr , high : PyExpr , negated : bool ) -> PyExpr {
303318 let expr = Expr :: Between ( Between :: new (
304- Box :: new ( self . expr . clone ( ) ) ,
319+ Box :: new ( self . raw_expr . expr . clone ( ) ) ,
305320 negated,
306321 Box :: new ( low. into ( ) ) ,
307322 Box :: new ( high. into ( ) ) ,
@@ -313,7 +328,7 @@ impl PyExpr {
313328 /// could include user defined functions or types. RexType identifies the row
314329 /// as one of the possible valid `RexTypes`.
315330 pub fn rex_type ( & self ) -> PyResult < RexType > {
316- Ok ( match self . expr {
331+ Ok ( match self . raw_expr . expr {
317332 Expr :: Alias ( ..) => RexType :: Alias ,
318333 Expr :: Column ( ..) => RexType :: Reference ,
319334 Expr :: ScalarVariable ( ..) | Expr :: Literal ( ..) => RexType :: Literal ,
@@ -352,16 +367,16 @@ impl PyExpr {
352367 /// Given the current `Expr` return the DataTypeMap which represents the
353368 /// PythonType, Arrow DataType, and SqlType Enum which represents
354369 pub fn types ( & self ) -> PyResult < DataTypeMap > {
355- Self :: _types ( & self . expr )
370+ Self :: _types ( & self . raw_expr . expr )
356371 }
357372
358373 /// Extracts the Expr value into a PyObject that can be shared with Python
359374 pub fn python_value ( & self , py : Python ) -> PyResult < PyObject > {
360- match & self . expr {
375+ match & self . raw_expr . expr {
361376 Expr :: Literal ( scalar_value) => scalar_to_pyarrow ( scalar_value, py) ,
362377 _ => Err ( py_type_err ( format ! (
363378 "Non Expr::Literal encountered in types: {:?}" ,
364- & self . expr
379+ & self . raw_expr . expr
365380 ) ) ) ,
366381 }
367382 }
@@ -370,10 +385,10 @@ impl PyExpr {
370385 /// store those operands in different datastructures. This function examines the Expr variant and returns
371386 /// the operands to the calling logic as a Vec of PyExpr instances.
372387 pub fn rex_call_operands ( & self ) -> PyResult < Vec < PyExpr > > {
373- match & self . expr {
388+ match & self . raw_expr . expr {
374389 // Expr variants that are themselves the operand to return
375390 Expr :: Column ( ..) | Expr :: ScalarVariable ( ..) | Expr :: Literal ( ..) => {
376- Ok ( vec ! [ PyExpr :: from( self . expr. clone( ) ) ] )
391+ Ok ( vec ! [ PyExpr :: from( self . raw_expr . expr. clone( ) ) ] )
377392 }
378393
379394 Expr :: Alias ( alias) => Ok ( vec ! [ PyExpr :: from( * alias. expr. clone( ) ) ] ) ,
@@ -470,14 +485,14 @@ impl PyExpr {
470485 | Expr :: Placeholder { .. }
471486 | Expr :: Exists { .. } => Err ( py_runtime_err ( format ! (
472487 "Unimplemented Expr type: {}" ,
473- self . expr
488+ self . raw_expr . expr
474489 ) ) ) ,
475490 }
476491 }
477492
478493 /// Extracts the operator associated with a RexType::Call
479494 pub fn rex_call_operator ( & self ) -> PyResult < String > {
480- Ok ( match & self . expr {
495+ Ok ( match & self . raw_expr . expr {
481496 Expr :: BinaryExpr ( BinaryExpr {
482497 left : _,
483498 op,
@@ -520,7 +535,7 @@ impl PyExpr {
520535 _ => {
521536 return Err ( py_type_err ( format ! (
522537 "Catch all triggered in get_operator_name: {:?}" ,
523- & self . expr
538+ & self . raw_expr . expr
524539 ) ) )
525540 }
526541 } )
@@ -533,34 +548,34 @@ impl PyExpr {
533548 // Expression Function Builder functions
534549
535550 pub fn order_by ( & self , order_by : Vec < PySortExpr > ) -> PyExprFuncBuilder {
536- self . expr
551+ self . raw_expr . expr
537552 . clone ( )
538553 . order_by ( to_sort_expressions ( order_by) )
539554 . into ( )
540555 }
541556
542557 pub fn filter ( & self , filter : PyExpr ) -> PyExprFuncBuilder {
543- self . expr . clone ( ) . filter ( filter. expr . clone ( ) ) . into ( )
558+ self . raw_expr . expr . clone ( ) . filter ( filter. raw_expr . expr . clone ( ) ) . into ( )
544559 }
545560
546561 pub fn distinct ( & self ) -> PyExprFuncBuilder {
547- self . expr . clone ( ) . distinct ( ) . into ( )
562+ self . raw_expr . expr . clone ( ) . distinct ( ) . into ( )
548563 }
549564
550565 pub fn null_treatment ( & self , null_treatment : NullTreatment ) -> PyExprFuncBuilder {
551- self . expr
566+ self . raw_expr . expr
552567 . clone ( )
553568 . null_treatment ( Some ( null_treatment. into ( ) ) )
554569 . into ( )
555570 }
556571
557572 pub fn partition_by ( & self , partition_by : Vec < PyExpr > ) -> PyExprFuncBuilder {
558- let partition_by = partition_by. iter ( ) . map ( |e| e. expr . clone ( ) ) . collect ( ) ;
559- self . expr . clone ( ) . partition_by ( partition_by) . into ( )
573+ let partition_by = partition_by. iter ( ) . map ( |e| e. raw_expr . expr . clone ( ) ) . collect ( ) ;
574+ self . raw_expr . expr . clone ( ) . partition_by ( partition_by) . into ( )
560575 }
561576
562577 pub fn window_frame ( & self , window_frame : PyWindowFrame ) -> PyExprFuncBuilder {
563- self . expr . clone ( ) . window_frame ( window_frame. into ( ) ) . into ( )
578+ self . raw_expr . expr . clone ( ) . window_frame ( window_frame. into ( ) ) . into ( )
564579 }
565580
566581 #[ pyo3( signature = ( partition_by=None , window_frame=None , order_by=None , null_treatment=None ) ) ]
@@ -571,7 +586,7 @@ impl PyExpr {
571586 order_by : Option < Vec < PySortExpr > > ,
572587 null_treatment : Option < NullTreatment > ,
573588 ) -> PyDataFusionResult < PyExpr > {
574- match & self . expr {
589+ match & self . raw_expr . expr {
575590 Expr :: AggregateFunction ( agg_fn) => {
576591 let window_fn = Expr :: WindowFunction ( WindowFunction :: new (
577592 WindowFunctionDefinition :: AggregateUDF ( agg_fn. func . clone ( ) ) ,
@@ -587,15 +602,15 @@ impl PyExpr {
587602 )
588603 }
589604 Expr :: WindowFunction ( _) => add_builder_fns_to_window (
590- self . expr . clone ( ) ,
605+ self . raw_expr . expr . clone ( ) ,
591606 partition_by,
592607 window_frame,
593608 order_by,
594609 null_treatment,
595610 ) ,
596611 _ => Err (
597612 PyDataFusionError :: ExecutionError ( datafusion:: error:: DataFusionError :: Plan (
598- format ! ( "Using {} with `over` is not allowed. Must use an aggregate or window function." , self . expr. variant_name( ) ) ,
613+ format ! ( "Using {} with `over` is not allowed. Must use an aggregate or window function." , self . raw_expr . expr. variant_name( ) ) ,
599614 ) )
600615 ) ,
601616 }
@@ -624,7 +639,7 @@ impl PyExprFuncBuilder {
624639 }
625640
626641 pub fn filter ( & self , filter : PyExpr ) -> PyExprFuncBuilder {
627- self . builder . clone ( ) . filter ( filter. expr . clone ( ) ) . into ( )
642+ self . builder . clone ( ) . filter ( filter. raw_expr . expr . clone ( ) ) . into ( )
628643 }
629644
630645 pub fn distinct ( & self ) -> PyExprFuncBuilder {
@@ -639,7 +654,7 @@ impl PyExprFuncBuilder {
639654 }
640655
641656 pub fn partition_by ( & self , partition_by : Vec < PyExpr > ) -> PyExprFuncBuilder {
642- let partition_by = partition_by. iter ( ) . map ( |e| e. expr . clone ( ) ) . collect ( ) ;
657+ let partition_by = partition_by. iter ( ) . map ( |e| e. raw_expr . expr . clone ( ) ) . collect ( ) ;
643658 self . builder . clone ( ) . partition_by ( partition_by) . into ( )
644659 }
645660
@@ -657,7 +672,7 @@ impl PyExprFuncBuilder {
657672
658673impl PyExpr {
659674 pub fn _column_name ( & self , plan : & LogicalPlan ) -> PyDataFusionResult < String > {
660- let field = Self :: expr_to_field ( & self . expr , plan) ?;
675+ let field = Self :: expr_to_field ( & self . raw_expr . expr , plan) ?;
661676 Ok ( field. name ( ) . to_owned ( ) )
662677 }
663678
0 commit comments