Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
datafusion-functions 37.0.0 - Docs.rs
[go: Go Back, main page]

datafusion-functions 37.0.0

Function packages for the DataFusion query engine
Documentation
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

/// macro that exports a list of function names as:
/// 1. individual functions in an `expr_fn` module
/// 2. a single function that returns a list of all functions
///
/// Equivalent to
/// ```text
/// pub mod expr_fn {
///     use super::*;
///     /// Return encode(arg)
///     pub fn encode(args: Vec<Expr>) -> Expr {
///         super::encode().call(args)
///     }
///  ...
/// /// Return a list of all functions in this package
/// pub(crate) fn functions() -> Vec<Arc<ScalarUDF>> {
///     vec![
///       encode(),
///       decode()
///    ]
/// }
/// ```
macro_rules! export_functions {
    ($(($FUNC:ident,  $($arg:ident)*, $DOC:expr)),*) => {
        pub mod expr_fn {
            $(
                #[doc = $DOC]
                /// Return $name(arg)
                pub fn $FUNC($($arg: datafusion_expr::Expr),*) -> datafusion_expr::Expr {
                    super::$FUNC().call(vec![$($arg),*],)
                }
            )*
        }

        /// Return a list of all functions in this package
        pub fn functions() -> Vec<std::sync::Arc<datafusion_expr::ScalarUDF>> {
            vec![
                $(
                    $FUNC(),
                )*
            ]
        }
    };
}

/// Creates a singleton `ScalarUDF` of the `$UDF` function named `$GNAME` and a
/// function named `$NAME` which returns that function named $NAME.
///
/// This is used to ensure creating the list of `ScalarUDF` only happens once.
macro_rules! make_udf_function {
    ($UDF:ty, $GNAME:ident, $NAME:ident) => {
        /// Singleton instance of the function
        static $GNAME: std::sync::OnceLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
            std::sync::OnceLock::new();

        /// Return a [`ScalarUDF`] for [`$UDF`]
        ///
        /// [`ScalarUDF`]: datafusion_expr::ScalarUDF
        pub fn $NAME() -> std::sync::Arc<datafusion_expr::ScalarUDF> {
            $GNAME
                .get_or_init(|| {
                    std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
                        <$UDF>::new(),
                    ))
                })
                .clone()
        }
    };
}

/// Macro creates a sub module if the feature is not enabled
///
/// The rationale for providing stub functions is to help users to configure datafusion
/// properly (so they get an error telling them why a function is not available)
/// instead of getting a cryptic "no function found" message at runtime.

macro_rules! make_stub_package {
    ($name:ident, $feature:literal) => {
        #[cfg(not(feature = $feature))]
        #[doc = concat!("Disabled. Enable via feature flag `", $feature, "`")]
        pub mod $name {
            use datafusion_expr::ScalarUDF;
            use log::debug;
            use std::sync::Arc;

            /// Returns an empty list of functions when the feature is not enabled
            pub fn functions() -> Vec<Arc<ScalarUDF>> {
                debug!("{} functions disabled", stringify!($name));
                vec![]
            }
        }
    };
}

/// Invokes a function on each element of an array and returns the result as a new array
///
/// $ARG: ArrayRef
/// $NAME: name of the function (for error messages)
/// $ARGS_TYPE: the type of array to cast the argument to
/// $RETURN_TYPE: the type of array to return
/// $FUNC: the function to apply to each element of $ARG
///
macro_rules! make_function_scalar_inputs_return_type {
    ($ARG: expr, $NAME:expr, $ARG_TYPE:ident, $RETURN_TYPE:ident, $FUNC: block) => {{
        let arg = downcast_arg!($ARG, $NAME, $ARG_TYPE);

        arg.iter()
            .map(|a| match a {
                Some(a) => Some($FUNC(a)),
                _ => None,
            })
            .collect::<$RETURN_TYPE>()
    }};
}

/// Downcast an argument to a specific array type, returning an internal error
/// if the cast fails
///
/// $ARG: ArrayRef
/// $NAME: name of the argument (for error messages)
/// $ARRAY_TYPE: the type of array to cast the argument to
macro_rules! downcast_arg {
    ($ARG:expr, $NAME:expr, $ARRAY_TYPE:ident) => {{
        $ARG.as_any().downcast_ref::<$ARRAY_TYPE>().ok_or_else(|| {
            DataFusionError::Internal(format!(
                "could not cast {} to {}",
                $NAME,
                std::any::type_name::<$ARRAY_TYPE>()
            ))
        })?
    }};
}

/// Macro to create a unary math UDF.
///
/// A unary math function takes an argument of type Float32 or Float64,
/// applies a unary floating function to the argument, and returns a value of the same type.
///
/// $UDF: the name of the UDF struct that implements `ScalarUDFImpl`
/// $GNAME: a singleton instance of the UDF
/// $NAME: the name of the function
/// $UNARY_FUNC: the unary function to apply to the argument
macro_rules! make_math_unary_udf {
    ($UDF:ident, $GNAME:ident, $NAME:ident, $UNARY_FUNC:ident) => {
        make_udf_function!($NAME::$UDF, $GNAME, $NAME);

        mod $NAME {
            use arrow::array::{ArrayRef, Float32Array, Float64Array};
            use arrow::datatypes::DataType;
            use datafusion_common::{exec_err, DataFusionError, Result};
            use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
            use std::any::Any;
            use std::sync::Arc;

            #[derive(Debug)]
            pub struct $UDF {
                signature: Signature,
            }

            impl $UDF {
                pub fn new() -> Self {
                    use DataType::*;
                    Self {
                        signature: Signature::uniform(
                            1,
                            vec![Float64, Float32],
                            Volatility::Immutable,
                        ),
                    }
                }
            }

            impl ScalarUDFImpl for $UDF {
                fn as_any(&self) -> &dyn Any {
                    self
                }
                fn name(&self) -> &str {
                    stringify!($NAME)
                }

                fn signature(&self) -> &Signature {
                    &self.signature
                }

                fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
                    let arg_type = &arg_types[0];

                    match arg_type {
                        DataType::Float32 => Ok(DataType::Float32),
                        // For other types (possible values float64/null/int), use Float64
                        _ => Ok(DataType::Float64),
                    }
                }

                fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
                    let args = ColumnarValue::values_to_arrays(args)?;

                    let arr: ArrayRef = match args[0].data_type() {
                        DataType::Float64 => {
                            Arc::new(make_function_scalar_inputs_return_type!(
                                &args[0],
                                self.name(),
                                Float64Array,
                                Float64Array,
                                { f64::$UNARY_FUNC }
                            ))
                        }
                        DataType::Float32 => {
                            Arc::new(make_function_scalar_inputs_return_type!(
                                &args[0],
                                self.name(),
                                Float32Array,
                                Float32Array,
                                { f32::$UNARY_FUNC }
                            ))
                        }
                        other => {
                            return exec_err!(
                                "Unsupported data type {other:?} for function {}",
                                self.name()
                            )
                        }
                    };
                    Ok(ColumnarValue::Array(arr))
                }
            }
        }
    };
}