7474import bigframes .dtypes
7575import bigframes .exceptions as bfe
7676import bigframes .formatting_helpers as formatter
77+ import bigframes .functions
7778import bigframes .operations as ops
7879import bigframes .operations .aggregations as agg_ops
7980import bigframes .operations .ai
@@ -4470,15 +4471,17 @@ def _prepare_export(
44704471 return array_value , id_overrides
44714472
44724473 def map (self , func , na_action : Optional [str ] = None ) -> DataFrame :
4473- if not callable (func ):
4474+ if not isinstance (func , bigframes . functions . BigqueryCallableRoutine ):
44744475 raise TypeError ("the first argument must be callable" )
44754476
44764477 if na_action not in {None , "ignore" }:
44774478 raise ValueError (f"na_action={ na_action } not supported" )
44784479
44794480 # TODO(shobs): Support **kwargs
44804481 return self ._apply_unary_op (
4481- ops .RemoteFunctionOp (func = func , apply_on_null = (na_action is None ))
4482+ ops .RemoteFunctionOp (
4483+ function_def = func .udf_def , apply_on_null = (na_action is None )
4484+ )
44824485 )
44834486
44844487 def apply (self , func , * , axis = 0 , args : typing .Tuple = (), ** kwargs ):
@@ -4492,13 +4495,18 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
44924495 )
44934496 warnings .warn (msg , category = bfe .FunctionAxisOnePreviewWarning )
44944497
4495- if not hasattr (func , "bigframes_bigquery_function" ):
4498+ if not isinstance (
4499+ func ,
4500+ (
4501+ bigframes .functions .BigqueryCallableRoutine ,
4502+ bigframes .functions .BigqueryCallableRowRoutine ,
4503+ ),
4504+ ):
44964505 raise ValueError (
44974506 "For axis=1 a BigFrames BigQuery function must be used."
44984507 )
44994508
4500- is_row_processor = getattr (func , "is_row_processor" )
4501- if is_row_processor :
4509+ if func .is_row_processor :
45024510 # Early check whether the dataframe dtypes are currently supported
45034511 # in the bigquery function
45044512 # NOTE: Keep in sync with the value converters used in the gcf code
@@ -4552,7 +4560,7 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
45524560
45534561 # Apply the function
45544562 result_series = rows_as_json_series ._apply_unary_op (
4555- ops .RemoteFunctionOp (func = func , apply_on_null = True )
4563+ ops .RemoteFunctionOp (function_def = func . udf_def , apply_on_null = True )
45564564 )
45574565 else :
45584566 # This is a special case where we are providing not-pandas-like
@@ -4567,7 +4575,7 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
45674575 # compatible with the data types of the input params
45684576 # 3. The order of the columns in the dataframe must correspond
45694577 # to the order of the input params in the function
4570- udf_input_dtypes = getattr ( func , "input_dtypes" )
4578+ udf_input_dtypes = func . udf_def . signature . bf_input_types
45714579 if len (udf_input_dtypes ) != len (self .columns ):
45724580 raise ValueError (
45734581 f"BigFrames BigQuery function takes { len (udf_input_dtypes )} "
@@ -4581,25 +4589,11 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs):
45814589
45824590 series_list = [self [col ] for col in self .columns ]
45834591 result_series = series_list [0 ]._apply_nary_op (
4584- ops .NaryRemoteFunctionOp (func = func ), series_list [1 :]
4592+ ops .NaryRemoteFunctionOp (function_def = func . udf_def ), series_list [1 :]
45854593 )
45864594 result_series .name = None
45874595
4588- # If the result type is string but the function output is intended
4589- # to be an array, reconstruct the array from the string assuming it
4590- # is a json serialized form of the array.
4591- if bigframes .dtypes .is_string_like (
4592- result_series .dtype
4593- ) and bigframes .dtypes .is_array_like (func .output_dtype ):
4594- import bigframes .bigquery as bbq
4595-
4596- result_dtype = bigframes .dtypes .arrow_dtype_to_bigframes_dtype (
4597- func .output_dtype .pyarrow_dtype .value_type
4598- )
4599- result_series = bbq .json_extract_string_array (
4600- result_series , value_dtype = result_dtype
4601- )
4602-
4596+ result_series = func ._post_process_series (result_series )
46034597 return result_series
46044598
46054599 # At this point column-wise or element-wise bigquery function operation will
0 commit comments