googleapis
diff --git a/‎bigframes/core/compile/sqlglot/expressions/unary_compiler.py‎
Lines changed: 6 additions & 1 deletion b/‎bigframes/core/compile/sqlglot/expressions/unary_compiler.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎bigframes/dataframe.py‎
Lines changed: 10 additions & 3 deletions b/‎bigframes/dataframe.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎bigframes/functions/_function_session.py‎
Lines changed: 0 additions & 4 deletions b/‎bigframes/functions/_function_session.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎tests/system/large/functions/test_managed_function.py‎
Lines changed: 112 additions & 0 deletions b/‎tests/system/large/functions/test_managed_function.py‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎tests/system/large/functions/test_remote_function.py‎
Lines changed: 83 additions & 0 deletions b/‎tests/system/large/functions/test_remote_function.py‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎tests/unit/core/compile/sqlglot/conftest.py‎
Lines changed: 1 addition & 0 deletions b/‎tests/unit/core/compile/sqlglot/conftest.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_mul_timedelta/out.sql‎
Lines changed: 26 additions & 14 deletions b/‎tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_mul_timedelta/out.sql‎
Lines changed: 26 additions & 14 deletions
@@ -22,6 +22,7 @@
 import sqlglot.expressions as sge
 
 from bigframes import operations as ops
+from bigframes.core.compile.constants import UNIT_TO_US_CONVERSION_FACTORS
 from bigframes.core.compile.sqlglot.expressions.op_registration import OpRegistration
 from bigframes.core.compile.sqlglot.expressions.typed_expr import TypedExpr
 
@@ -618,7 +619,11 @@ def _(op: ops.ToTimestampOp, expr: TypedExpr) -> sge.Expression:
 
 @UNARY_OP_REGISTRATION.register(ops.ToTimedeltaOp)
 def _(op: ops.ToTimedeltaOp, expr: TypedExpr) -> sge.Expression:
-    return sge.Interval(this=expr.expr, unit=sge.Identifier(this="SECOND"))
+    value = expr.expr
+    factor = UNIT_TO_US_CONVERSION_FACTORS[op.unit]
+    if factor != 1:
+        value = sge.Mul(this=value, expression=sge.convert(factor))
+    return sge.Interval(this=value, unit=sge.Identifier(this="MICROSECOND"))
 
 
 @UNARY_OP_REGISTRATION.register(ops.UnixMicros)
 
@@ -2797,10 +2797,17 @@ def where(self, cond, other=None):
             )
 
         # Execute it with the DataFrame when cond or/and other is callable.
+        # It can be either a plain python function or remote/managed function.
         if callable(cond):
-            cond = cond(self)
+            if hasattr(cond, "bigframes_bigquery_function"):
+                cond = self.apply(cond, axis=1)
+            else:
+                cond = cond(self)
         if callable(other):
-            other = other(self)
+            if hasattr(other, "bigframes_bigquery_function"):
+                other = self.apply(other, axis=1)
+            else:
+                other = other(self)
 
         aligned_block, (_, _) = self._block.join(cond._block, how="left")
         # No left join is needed when 'other' is None or constant.
@@ -2813,7 +2820,7 @@ def where(self, cond, other=None):
         labels = aligned_block.column_labels[:self_len]
         self_col = {x: ex.deref(y) for x, y in zip(labels, ids)}
 
-        if isinstance(cond, bigframes.series.Series) and cond.name in self_col:
+        if isinstance(cond, bigframes.series.Series):
             # This is when 'cond' is a valid series.
             y = aligned_block.value_columns[self_len]
             cond_col = {x: ex.deref(y) for x in self_col.keys()}
 
@@ -555,10 +555,6 @@ def wrapper(func):
                     warnings.warn(msg, category=bfe.FunctionConflictTypeHintWarning)
                 py_sig = py_sig.replace(return_annotation=output_type)
 
-            # Try to get input types via type annotations.
-
-            # The function will actually be receiving a pandas Series, but allow both
-            # BigQuery DataFrames and pandas object types for compatibility.
             # The function will actually be receiving a pandas Series, but allow
             # both BigQuery DataFrames and pandas object types for compatibility.
             is_row_processor = False
 
@@ -963,3 +963,115 @@ def float_parser(row):
         cleanup_function_assets(
             float_parser_mf, session.bqclient, ignore_failures=False
         )
+
+
+def test_managed_function_df_where(session, dataset_id, scalars_dfs):
+    try:
+
+        # The return type has to be bool type for callable where condition.
+        def is_sum_positive(a, b):
+            return a + b > 0
+
+        is_sum_positive_mf = session.udf(
+            input_types=[int, int],
+            output_type=bool,
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
+        )(is_sum_positive)
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+        int64_cols = ["int64_col", "int64_too"]
+
+        bf_int64_df = scalars_df[int64_cols]
+        bf_int64_df_filtered = bf_int64_df.dropna()
+        pd_int64_df = scalars_pandas_df[int64_cols]
+        pd_int64_df_filtered = pd_int64_df.dropna()
+
+        # Use callable condition in dataframe.where method.
+        bf_result = bf_int64_df_filtered.where(is_sum_positive_mf).to_pandas()
+        # Pandas doesn't support such case, use following as workaround.
+        pd_result = pd_int64_df_filtered.where(pd_int64_df_filtered.sum(axis=1) > 0)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+        # Make sure the read_gbq_function path works for this function.
+        is_sum_positive_ref = session.read_gbq_function(
+            function_name=is_sum_positive_mf.bigframes_bigquery_function
+        )
+
+        bf_result_gbq = bf_int64_df_filtered.where(
+            is_sum_positive_ref, -bf_int64_df_filtered
+        ).to_pandas()
+        pd_result_gbq = pd_int64_df_filtered.where(
+            pd_int64_df_filtered.sum(axis=1) > 0, -pd_int64_df_filtered
+        )
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(
+            bf_result_gbq, pd_result_gbq, check_dtype=False
+        )
+
+    finally:
+        # Clean up the gcp assets created for the managed function.
+        cleanup_function_assets(
+            is_sum_positive_mf, session.bqclient, ignore_failures=False
+        )
+
+
+def test_managed_function_df_where_series(session, dataset_id, scalars_dfs):
+    try:
+
+        # The return type has to be bool type for callable where condition.
+        def is_sum_positive_series(s):
+            return s["int64_col"] + s["int64_too"] > 0
+
+        is_sum_positive_series_mf = session.udf(
+            input_types=bigframes.series.Series,
+            output_type=bool,
+            dataset=dataset_id,
+            name=prefixer.create_prefix(),
+        )(is_sum_positive_series)
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+        int64_cols = ["int64_col", "int64_too"]
+
+        bf_int64_df = scalars_df[int64_cols]
+        bf_int64_df_filtered = bf_int64_df.dropna()
+        pd_int64_df = scalars_pandas_df[int64_cols]
+        pd_int64_df_filtered = pd_int64_df.dropna()
+
+        # Use callable condition in dataframe.where method.
+        bf_result = bf_int64_df_filtered.where(is_sum_positive_series).to_pandas()
+        pd_result = pd_int64_df_filtered.where(is_sum_positive_series)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+        # Make sure the read_gbq_function path works for this function.
+        is_sum_positive_series_ref = session.read_gbq_function(
+            function_name=is_sum_positive_series_mf.bigframes_bigquery_function,
+            is_row_processor=True,
+        )
+
+        # This is for callable `other` arg in dataframe.where method.
+        def func_for_other(x):
+            return -x
+
+        bf_result_gbq = bf_int64_df_filtered.where(
+            is_sum_positive_series_ref, func_for_other
+        ).to_pandas()
+        pd_result_gbq = pd_int64_df_filtered.where(
+            is_sum_positive_series, func_for_other
+        )
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(
+            bf_result_gbq, pd_result_gbq, check_dtype=False
+        )
+
+    finally:
+        # Clean up the gcp assets created for the managed function.
+        cleanup_function_assets(
+            is_sum_positive_series_mf, session.bqclient, ignore_failures=False
+        )
@@ -2847,3 +2847,86 @@ def foo(x: int) -> int:
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_function_assets(foo, session.bqclient, session.cloudfunctionsclient)
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_df_where(session, dataset_id, scalars_dfs):
+    try:
+
+        # The return type has to be bool type for callable where condition.
+        def is_sum_positive(a, b):
+            return a + b > 0
+
+        is_sum_positive_mf = session.remote_function(
+            input_types=[int, int],
+            output_type=bool,
+            dataset=dataset_id,
+            reuse=False,
+            cloud_function_service_account="default",
+        )(is_sum_positive)
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+        int64_cols = ["int64_col", "int64_too"]
+
+        bf_int64_df = scalars_df[int64_cols]
+        bf_int64_df_filtered = bf_int64_df.dropna()
+        pd_int64_df = scalars_pandas_df[int64_cols]
+        pd_int64_df_filtered = pd_int64_df.dropna()
+
+        # Use callable condition in dataframe.where method.
+        bf_result = bf_int64_df_filtered.where(is_sum_positive_mf, 0).to_pandas()
+        # Pandas doesn't support such case, use following as workaround.
+        pd_result = pd_int64_df_filtered.where(pd_int64_df_filtered.sum(axis=1) > 0, 0)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+    finally:
+        # Clean up the gcp assets created for the remote function.
+        cleanup_function_assets(
+            is_sum_positive_mf, session.bqclient, ignore_failures=False
+        )
+
+
+@pytest.mark.flaky(retries=2, delay=120)
+def test_remote_function_df_where_series(session, dataset_id, scalars_dfs):
+    try:
+
+        # The return type has to be bool type for callable where condition.
+        def is_sum_positive_series(s):
+            return s["int64_col"] + s["int64_too"] > 0
+
+        is_sum_positive_series_mf = session.remote_function(
+            input_types=bigframes.series.Series,
+            output_type=bool,
+            dataset=dataset_id,
+            reuse=False,
+            cloud_function_service_account="default",
+        )(is_sum_positive_series)
+
+        scalars_df, scalars_pandas_df = scalars_dfs
+        int64_cols = ["int64_col", "int64_too"]
+
+        bf_int64_df = scalars_df[int64_cols]
+        bf_int64_df_filtered = bf_int64_df.dropna()
+        pd_int64_df = scalars_pandas_df[int64_cols]
+        pd_int64_df_filtered = pd_int64_df.dropna()
+
+        # This is for callable `other` arg in dataframe.where method.
+        def func_for_other(x):
+            return -x
+
+        # Use callable condition in dataframe.where method.
+        bf_result = bf_int64_df_filtered.where(
+            is_sum_positive_series, func_for_other
+        ).to_pandas()
+        pd_result = pd_int64_df_filtered.where(is_sum_positive_series, func_for_other)
+
+        # Ignore any dtype difference.
+        pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+    finally:
+        # Clean up the gcp assets created for the remote function.
+        cleanup_function_assets(
+            is_sum_positive_series_mf, session.bqclient, ignore_failures=False
+        )
@@ -89,6 +89,7 @@ def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
         bigquery.SchemaField("string_col", "STRING"),
         bigquery.SchemaField("time_col", "TIME"),
         bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
+        bigquery.SchemaField("duration_col", "INTEGER"),
     ]
 
 
 
@@ -2,30 +2,42 @@ WITH `bfcte_0` AS (
   SELECT
     `int64_col` AS `bfcol_0`,
     `rowindex` AS `bfcol_1`,
-    `timestamp_col` AS `bfcol_2`
+    `timestamp_col` AS `bfcol_2`,
+    `duration_col` AS `bfcol_3`
   FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
 ), `bfcte_1` AS (
   SELECT
     *,
-    `bfcol_1` AS `bfcol_6`,
-    `bfcol_2` AS `bfcol_7`,
-    `bfcol_0` AS `bfcol_8`,
-    CAST(FLOOR(86400000000 * `bfcol_0`) AS INT64) AS `bfcol_9`
+    `bfcol_1` AS `bfcol_8`,
+    `bfcol_2` AS `bfcol_9`,
+    `bfcol_0` AS `bfcol_10`,
+    INTERVAL `bfcol_3` MICROSECOND AS `bfcol_11`
   FROM `bfcte_0`
 ), `bfcte_2` AS (
   SELECT
     *,
-    `bfcol_6` AS `bfcol_14`,
-    `bfcol_7` AS `bfcol_15`,
     `bfcol_8` AS `bfcol_16`,
     `bfcol_9` AS `bfcol_17`,
-    CAST(FLOOR(`bfcol_8` * 86400000000) AS INT64) AS `bfcol_18`
+    `bfcol_10` AS `bfcol_18`,
+    `bfcol_11` AS `bfcol_19`,
+    CAST(FLOOR(`bfcol_11` * `bfcol_10`) AS INT64) AS `bfcol_20`
   FROM `bfcte_1`
+), `bfcte_3` AS (
+  SELECT
+    *,
+    `bfcol_16` AS `bfcol_26`,
+    `bfcol_17` AS `bfcol_27`,
+    `bfcol_18` AS `bfcol_28`,
+    `bfcol_19` AS `bfcol_29`,
+    `bfcol_20` AS `bfcol_30`,
+    CAST(FLOOR(`bfcol_18` * `bfcol_19`) AS INT64) AS `bfcol_31`
+  FROM `bfcte_2`
 )
 SELECT
-  `bfcol_14` AS `rowindex`,
-  `bfcol_15` AS `timestamp_col`,
-  `bfcol_16` AS `int64_col`,
-  `bfcol_17` AS `timedelta_mul_numeric`,
-  `bfcol_18` AS `numeric_mul_timedelta`
-FROM `bfcte_2`
+  `bfcol_26` AS `rowindex`,
+  `bfcol_27` AS `timestamp_col`,
+  `bfcol_28` AS `int64_col`,
+  `bfcol_29` AS `duration_col`,
+  `bfcol_30` AS `timedelta_mul_numeric`,
+  `bfcol_31` AS `numeric_mul_timedelta`
+FROM `bfcte_3`
Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,7 @@ def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:`
`89`	`89`	`bigquery.SchemaField("string_col", "STRING"),`
`90`	`90`	`bigquery.SchemaField("time_col", "TIME"),`
`91`	`91`	`bigquery.SchemaField("timestamp_col", "TIMESTAMP"),`
	`92`	`+ bigquery.SchemaField("duration_col", "INTEGER"),`
`92`	`93`	`]`
`93`	`94`
`94`	`95`