Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
udf.rs - source
[go: Go Back, main page]

datafusion_expr/
udf.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! [`ScalarUDF`]: Scalar User Defined Functions
19
20use crate::expr::schema_name_from_exprs_comma_separated_without_space;
21use crate::simplify::{ExprSimplifyResult, SimplifyInfo};
22use crate::sort_properties::{ExprProperties, SortProperties};
23use crate::{ColumnarValue, Documentation, Expr, Signature};
24use arrow::datatypes::{DataType, Field, FieldRef};
25use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue};
26use datafusion_expr_common::interval_arithmetic::Interval;
27use std::any::Any;
28use std::cmp::Ordering;
29use std::fmt::Debug;
30use std::hash::{DefaultHasher, Hash, Hasher};
31use std::sync::Arc;
32
33/// Logical representation of a Scalar User Defined Function.
34///
35/// A scalar function produces a single row output for each row of input. This
36/// struct contains the information DataFusion needs to plan and invoke
37/// functions you supply such as name, type signature, return type, and actual
38/// implementation.
39///
40/// 1. For simple use cases, use [`create_udf`] (examples in [`simple_udf.rs`]).
41///
42/// 2. For advanced use cases, use [`ScalarUDFImpl`] which provides full API
43///    access (examples in  [`advanced_udf.rs`]).
44///
45/// See [`Self::call`] to create an `Expr` which invokes a `ScalarUDF` with arguments.
46///
47/// # API Note
48///
49/// This is a separate struct from [`ScalarUDFImpl`] to maintain backwards
50/// compatibility with the older API.
51///
52/// [`create_udf`]: crate::expr_fn::create_udf
53/// [`simple_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udf.rs
54/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
55#[derive(Debug, Clone)]
56pub struct ScalarUDF {
57    inner: Arc<dyn ScalarUDFImpl>,
58}
59
60impl PartialEq for ScalarUDF {
61    fn eq(&self, other: &Self) -> bool {
62        self.inner.equals(other.inner.as_ref())
63    }
64}
65
66// Manual implementation based on `ScalarUDFImpl::equals`
67impl PartialOrd for ScalarUDF {
68    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
69        match self.name().partial_cmp(other.name()) {
70            Some(Ordering::Equal) => self.signature().partial_cmp(other.signature()),
71            cmp => cmp,
72        }
73    }
74}
75
76impl Eq for ScalarUDF {}
77
78impl Hash for ScalarUDF {
79    fn hash<H: Hasher>(&self, state: &mut H) {
80        self.inner.hash_value().hash(state)
81    }
82}
83
84impl ScalarUDF {
85    /// Create a new `ScalarUDF` from a `[ScalarUDFImpl]` trait object
86    ///
87    /// Note this is the same as using the `From` impl (`ScalarUDF::from`)
88    pub fn new_from_impl<F>(fun: F) -> ScalarUDF
89    where
90        F: ScalarUDFImpl + 'static,
91    {
92        Self::new_from_shared_impl(Arc::new(fun))
93    }
94
95    /// Create a new `ScalarUDF` from a `[ScalarUDFImpl]` trait object
96    pub fn new_from_shared_impl(fun: Arc<dyn ScalarUDFImpl>) -> ScalarUDF {
97        Self { inner: fun }
98    }
99
100    /// Return the underlying [`ScalarUDFImpl`] trait object for this function
101    pub fn inner(&self) -> &Arc<dyn ScalarUDFImpl> {
102        &self.inner
103    }
104
105    /// Adds additional names that can be used to invoke this function, in
106    /// addition to `name`
107    ///
108    /// If you implement [`ScalarUDFImpl`] directly you should return aliases directly.
109    pub fn with_aliases(self, aliases: impl IntoIterator<Item = &'static str>) -> Self {
110        Self::new_from_impl(AliasedScalarUDFImpl::new(Arc::clone(&self.inner), aliases))
111    }
112
113    /// Returns a [`Expr`] logical expression to call this UDF with specified
114    /// arguments.
115    ///
116    /// This utility allows easily calling UDFs
117    ///
118    /// # Example
119    /// ```no_run
120    /// use datafusion_expr::{col, lit, ScalarUDF};
121    /// # fn my_udf() -> ScalarUDF { unimplemented!() }
122    /// let my_func: ScalarUDF = my_udf();
123    /// // Create an expr for `my_func(a, 12.3)`
124    /// let expr = my_func.call(vec![col("a"), lit(12.3)]);
125    /// ```
126    pub fn call(&self, args: Vec<Expr>) -> Expr {
127        Expr::ScalarFunction(crate::expr::ScalarFunction::new_udf(
128            Arc::new(self.clone()),
129            args,
130        ))
131    }
132
133    /// Returns this function's name.
134    ///
135    /// See [`ScalarUDFImpl::name`] for more details.
136    pub fn name(&self) -> &str {
137        self.inner.name()
138    }
139
140    /// Returns this function's display_name.
141    ///
142    /// See [`ScalarUDFImpl::display_name`] for more details
143    pub fn display_name(&self, args: &[Expr]) -> Result<String> {
144        self.inner.display_name(args)
145    }
146
147    /// Returns this function's schema_name.
148    ///
149    /// See [`ScalarUDFImpl::schema_name`] for more details
150    pub fn schema_name(&self, args: &[Expr]) -> Result<String> {
151        self.inner.schema_name(args)
152    }
153
154    /// Returns the aliases for this function.
155    ///
156    /// See [`ScalarUDF::with_aliases`] for more details
157    pub fn aliases(&self) -> &[String] {
158        self.inner.aliases()
159    }
160
161    /// Returns this function's [`Signature`] (what input types are accepted).
162    ///
163    /// See [`ScalarUDFImpl::signature`] for more details.
164    pub fn signature(&self) -> &Signature {
165        self.inner.signature()
166    }
167
168    /// The datatype this function returns given the input argument types.
169    /// This function is used when the input arguments are [`DataType`]s.
170    ///
171    ///  # Notes
172    ///
173    /// If a function implement [`ScalarUDFImpl::return_field_from_args`],
174    /// its [`ScalarUDFImpl::return_type`] should raise an error.
175    ///
176    /// See [`ScalarUDFImpl::return_type`] for more details.
177    pub fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
178        self.inner.return_type(arg_types)
179    }
180
181    /// Return the datatype this function returns given the input argument types.
182    ///
183    /// See [`ScalarUDFImpl::return_field_from_args`] for more details.
184    pub fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
185        self.inner.return_field_from_args(args)
186    }
187
188    /// Do the function rewrite
189    ///
190    /// See [`ScalarUDFImpl::simplify`] for more details.
191    pub fn simplify(
192        &self,
193        args: Vec<Expr>,
194        info: &dyn SimplifyInfo,
195    ) -> Result<ExprSimplifyResult> {
196        self.inner.simplify(args, info)
197    }
198
199    #[allow(deprecated)]
200    pub fn is_nullable(&self, args: &[Expr], schema: &dyn ExprSchema) -> bool {
201        self.inner.is_nullable(args, schema)
202    }
203
204    /// Invoke the function on `args`, returning the appropriate result.
205    ///
206    /// See [`ScalarUDFImpl::invoke_with_args`] for details.
207    pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
208        self.inner.invoke_with_args(args)
209    }
210
211    /// Get the circuits of inner implementation
212    pub fn short_circuits(&self) -> bool {
213        self.inner.short_circuits()
214    }
215
216    /// Computes the output interval for a [`ScalarUDF`], given the input
217    /// intervals.
218    ///
219    /// # Parameters
220    ///
221    /// * `inputs` are the intervals for the inputs (children) of this function.
222    ///
223    /// # Example
224    ///
225    /// If the function is `ABS(a)`, and the input interval is `a: [-3, 2]`,
226    /// then the output interval would be `[0, 3]`.
227    pub fn evaluate_bounds(&self, inputs: &[&Interval]) -> Result<Interval> {
228        self.inner.evaluate_bounds(inputs)
229    }
230
231    /// Updates bounds for child expressions, given a known interval for this
232    /// function. This is used to propagate constraints down through an expression
233    /// tree.
234    ///
235    /// # Parameters
236    ///
237    /// * `interval` is the currently known interval for this function.
238    /// * `inputs` are the current intervals for the inputs (children) of this function.
239    ///
240    /// # Returns
241    ///
242    /// A `Vec` of new intervals for the children, in order.
243    ///
244    /// If constraint propagation reveals an infeasibility for any child, returns
245    /// [`None`]. If none of the children intervals change as a result of
246    /// propagation, may return an empty vector instead of cloning `children`.
247    /// This is the default (and conservative) return value.
248    ///
249    /// # Example
250    ///
251    /// If the function is `ABS(a)`, the current `interval` is `[4, 5]` and the
252    /// input `a` is given as `[-7, 3]`, then propagation would return `[-5, 3]`.
253    pub fn propagate_constraints(
254        &self,
255        interval: &Interval,
256        inputs: &[&Interval],
257    ) -> Result<Option<Vec<Interval>>> {
258        self.inner.propagate_constraints(interval, inputs)
259    }
260
261    /// Calculates the [`SortProperties`] of this function based on its
262    /// children's properties.
263    pub fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
264        self.inner.output_ordering(inputs)
265    }
266
267    pub fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
268        self.inner.preserves_lex_ordering(inputs)
269    }
270
271    /// See [`ScalarUDFImpl::coerce_types`] for more details.
272    pub fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
273        self.inner.coerce_types(arg_types)
274    }
275
276    /// Returns the documentation for this Scalar UDF.
277    ///
278    /// Documentation can be accessed programmatically as well as
279    /// generating publicly facing documentation.
280    pub fn documentation(&self) -> Option<&Documentation> {
281        self.inner.documentation()
282    }
283}
284
285impl<F> From<F> for ScalarUDF
286where
287    F: ScalarUDFImpl + 'static,
288{
289    fn from(fun: F) -> Self {
290        Self::new_from_impl(fun)
291    }
292}
293
294/// Arguments passed to [`ScalarUDFImpl::invoke_with_args`] when invoking a
295/// scalar function.
296pub struct ScalarFunctionArgs {
297    /// The evaluated arguments to the function
298    pub args: Vec<ColumnarValue>,
299    /// Field associated with each arg, if it exists
300    pub arg_fields: Vec<FieldRef>,
301    /// The number of rows in record batch being evaluated
302    pub number_rows: usize,
303    /// The return field of the scalar function returned (from `return_type`
304    /// or `return_field_from_args`) when creating the physical expression
305    /// from the logical expression
306    pub return_field: FieldRef,
307}
308
309impl ScalarFunctionArgs {
310    /// The return type of the function. See [`Self::return_field`] for more
311    /// details.
312    pub fn return_type(&self) -> &DataType {
313        self.return_field.data_type()
314    }
315}
316
317/// Information about arguments passed to the function
318///
319/// This structure contains metadata about how the function was called
320/// such as the type of the arguments, any scalar arguments and if the
321/// arguments can (ever) be null
322///
323/// See [`ScalarUDFImpl::return_field_from_args`] for more information
324#[derive(Debug)]
325pub struct ReturnFieldArgs<'a> {
326    /// The data types of the arguments to the function
327    pub arg_fields: &'a [FieldRef],
328    /// Is argument `i` to the function a scalar (constant)?
329    ///
330    /// If the argument `i` is not a scalar, it will be None
331    ///
332    /// For example, if a function is called like `my_function(column_a, 5)`
333    /// this field will be `[None, Some(ScalarValue::Int32(Some(5)))]`
334    pub scalar_arguments: &'a [Option<&'a ScalarValue>],
335}
336
337/// Trait for implementing user defined scalar functions.
338///
339/// This trait exposes the full API for implementing user defined functions and
340/// can be used to implement any function.
341///
342/// See [`advanced_udf.rs`] for a full example with complete implementation and
343/// [`ScalarUDF`] for other available options.
344///
345/// [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
346///
347/// # Basic Example
348/// ```
349/// # use std::any::Any;
350/// # use std::sync::LazyLock;
351/// # use arrow::datatypes::DataType;
352/// # use datafusion_common::{DataFusionError, plan_err, Result};
353/// # use datafusion_expr::{col, ColumnarValue, Documentation, ScalarFunctionArgs, Signature, Volatility};
354/// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF};
355/// # use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH;
356/// /// This struct for a simple UDF that adds one to an int32
357/// #[derive(Debug)]
358/// struct AddOne {
359///   signature: Signature,
360/// }
361///
362/// impl AddOne {
363///   fn new() -> Self {
364///     Self {
365///       signature: Signature::uniform(1, vec![DataType::Int32], Volatility::Immutable),
366///      }
367///   }
368/// }
369///
370/// static DOCUMENTATION: LazyLock<Documentation> = LazyLock::new(|| {
371///         Documentation::builder(DOC_SECTION_MATH, "Add one to an int32", "add_one(2)")
372///             .with_argument("arg1", "The int32 number to add one to")
373///             .build()
374///     });
375///
376/// fn get_doc() -> &'static Documentation {
377///     &DOCUMENTATION
378/// }
379///
380/// /// Implement the ScalarUDFImpl trait for AddOne
381/// impl ScalarUDFImpl for AddOne {
382///    fn as_any(&self) -> &dyn Any { self }
383///    fn name(&self) -> &str { "add_one" }
384///    fn signature(&self) -> &Signature { &self.signature }
385///    fn return_type(&self, args: &[DataType]) -> Result<DataType> {
386///      if !matches!(args.get(0), Some(&DataType::Int32)) {
387///        return plan_err!("add_one only accepts Int32 arguments");
388///      }
389///      Ok(DataType::Int32)
390///    }
391///    // The actual implementation would add one to the argument
392///    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
393///         unimplemented!()
394///    }
395///    fn documentation(&self) -> Option<&Documentation> {
396///         Some(get_doc())
397///     }
398/// }
399///
400/// // Create a new ScalarUDF from the implementation
401/// let add_one = ScalarUDF::from(AddOne::new());
402///
403/// // Call the function `add_one(col)`
404/// let expr = add_one.call(vec![col("a")]);
405/// ```
406pub trait ScalarUDFImpl: Debug + Send + Sync {
407    // Note: When adding any methods (with default implementations), remember to add them also
408    // into the AliasedScalarUDFImpl below!
409
410    /// Returns this object as an [`Any`] trait object
411    fn as_any(&self) -> &dyn Any;
412
413    /// Returns this function's name
414    fn name(&self) -> &str;
415
416    /// Returns the user-defined display name of function, given the arguments
417    ///
418    /// This can be used to customize the output column name generated by this
419    /// function.
420    ///
421    /// Defaults to `name(args[0], args[1], ...)`
422    fn display_name(&self, args: &[Expr]) -> Result<String> {
423        let names: Vec<String> = args.iter().map(ToString::to_string).collect();
424        // TODO: join with ", " to standardize the formatting of Vec<Expr>, <https://github.com/apache/datafusion/issues/10364>
425        Ok(format!("{}({})", self.name(), names.join(",")))
426    }
427
428    /// Returns the name of the column this expression would create
429    ///
430    /// See [`Expr::schema_name`] for details
431    fn schema_name(&self, args: &[Expr]) -> Result<String> {
432        Ok(format!(
433            "{}({})",
434            self.name(),
435            schema_name_from_exprs_comma_separated_without_space(args)?
436        ))
437    }
438
439    /// Returns the function's [`Signature`] for information about what input
440    /// types are accepted and the function's Volatility.
441    fn signature(&self) -> &Signature;
442
443    /// What [`DataType`] will be returned by this function, given the types of
444    /// the arguments.
445    ///
446    /// # Notes
447    ///
448    /// If you provide an implementation for [`Self::return_field_from_args`],
449    /// DataFusion will not call `return_type` (this function). In such cases
450    /// is recommended to return [`DataFusionError::Internal`].
451    ///
452    /// [`DataFusionError::Internal`]: datafusion_common::DataFusionError::Internal
453    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType>;
454
455    /// What type will be returned by this function, given the arguments?
456    ///
457    /// By default, this function calls [`Self::return_type`] with the
458    /// types of each argument.
459    ///
460    /// # Notes
461    ///
462    /// For the majority of UDFs, implementing [`Self::return_type`] is sufficient,
463    /// as the result type is typically a deterministic function of the input types
464    /// (e.g., `sqrt(f32)` consistently yields `f32`). Implementing this method directly
465    /// is generally unnecessary unless the return type depends on runtime values.
466    ///
467    /// This function can be used for more advanced cases such as:
468    ///
469    /// 1. specifying nullability
470    /// 2. return types based on the **values** of the arguments (rather than
471    ///    their **types**.
472    ///
473    /// # Example creating `Field`
474    ///
475    /// Note the name of the [`Field`] is ignored, except for structured types such as
476    /// `DataType::Struct`.
477    ///
478    /// ```rust
479    /// # use std::sync::Arc;
480    /// # use arrow::datatypes::{DataType, Field, FieldRef};
481    /// # use datafusion_common::Result;
482    /// # use datafusion_expr::ReturnFieldArgs;
483    /// # struct Example{}
484    /// # impl Example {
485    /// fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
486    ///   // report output is only nullable if any one of the arguments are nullable
487    ///   let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
488    ///   let field = Arc::new(Field::new("ignored_name", DataType::Int32, true));
489    ///   Ok(field)
490    /// }
491    /// # }
492    /// ```
493    ///
494    /// # Output Type based on Values
495    ///
496    /// For example, the following two function calls get the same argument
497    /// types (something and a `Utf8` string) but return different types based
498    /// on the value of the second argument:
499    ///
500    /// * `arrow_cast(x, 'Int16')` --> `Int16`
501    /// * `arrow_cast(x, 'Float32')` --> `Float32`
502    ///
503    /// # Requirements
504    ///
505    /// This function **must** consistently return the same type for the same
506    /// logical input even if the input is simplified (e.g. it must return the same
507    /// value for `('foo' | 'bar')` as it does for ('foobar').
508    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
509        let data_types = args
510            .arg_fields
511            .iter()
512            .map(|f| f.data_type())
513            .cloned()
514            .collect::<Vec<_>>();
515        let return_type = self.return_type(&data_types)?;
516        Ok(Arc::new(Field::new(self.name(), return_type, true)))
517    }
518
519    #[deprecated(
520        since = "45.0.0",
521        note = "Use `return_field_from_args` instead. if you use `is_nullable` that returns non-nullable with `return_type`, you would need to switch to `return_field_from_args`, you might have error"
522    )]
523    fn is_nullable(&self, _args: &[Expr], _schema: &dyn ExprSchema) -> bool {
524        true
525    }
526
527    /// Invoke the function returning the appropriate result.
528    ///
529    /// # Performance
530    ///
531    /// For the best performance, the implementations should handle the common case
532    /// when one or more of their arguments are constant values (aka
533    /// [`ColumnarValue::Scalar`]).
534    ///
535    /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments
536    /// to arrays, which will likely be simpler code, but be slower.
537    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue>;
538
539    /// Returns any aliases (alternate names) for this function.
540    ///
541    /// Aliases can be used to invoke the same function using different names.
542    /// For example in some databases `now()` and `current_timestamp()` are
543    /// aliases for the same function. This behavior can be obtained by
544    /// returning `current_timestamp` as an alias for the `now` function.
545    ///
546    /// Note: `aliases` should only include names other than [`Self::name`].
547    /// Defaults to `[]` (no aliases)
548    fn aliases(&self) -> &[String] {
549        &[]
550    }
551
552    /// Optionally apply per-UDF simplification / rewrite rules.
553    ///
554    /// This can be used to apply function specific simplification rules during
555    /// optimization (e.g. `arrow_cast` --> `Expr::Cast`). The default
556    /// implementation does nothing.
557    ///
558    /// Note that DataFusion handles simplifying arguments and  "constant
559    /// folding" (replacing a function call with constant arguments such as
560    /// `my_add(1,2) --> 3` ). Thus, there is no need to implement such
561    /// optimizations manually for specific UDFs.
562    ///
563    /// # Arguments
564    /// * `args`: The arguments of the function
565    /// * `info`: The necessary information for simplification
566    ///
567    /// # Returns
568    /// [`ExprSimplifyResult`] indicating the result of the simplification NOTE
569    /// if the function cannot be simplified, the arguments *MUST* be returned
570    /// unmodified
571    fn simplify(
572        &self,
573        args: Vec<Expr>,
574        _info: &dyn SimplifyInfo,
575    ) -> Result<ExprSimplifyResult> {
576        Ok(ExprSimplifyResult::Original(args))
577    }
578
579    /// Returns true if some of this `exprs` subexpressions may not be evaluated
580    /// and thus any side effects (like divide by zero) may not be encountered.
581    ///
582    /// Setting this to true prevents certain optimizations such as common
583    /// subexpression elimination
584    fn short_circuits(&self) -> bool {
585        false
586    }
587
588    /// Computes the output [`Interval`] for a [`ScalarUDFImpl`], given the input
589    /// intervals.
590    ///
591    /// # Parameters
592    ///
593    /// * `children` are the intervals for the children (inputs) of this function.
594    ///
595    /// # Example
596    ///
597    /// If the function is `ABS(a)`, and the input interval is `a: [-3, 2]`,
598    /// then the output interval would be `[0, 3]`.
599    fn evaluate_bounds(&self, _input: &[&Interval]) -> Result<Interval> {
600        // We cannot assume the input datatype is the same of output type.
601        Interval::make_unbounded(&DataType::Null)
602    }
603
604    /// Updates bounds for child expressions, given a known [`Interval`]s for this
605    /// function.
606    ///
607    /// This function is used to propagate constraints down through an
608    /// expression tree.
609    ///
610    /// # Parameters
611    ///
612    /// * `interval` is the currently known interval for this function.
613    /// * `inputs` are the current intervals for the inputs (children) of this function.
614    ///
615    /// # Returns
616    ///
617    /// A `Vec` of new intervals for the children, in order.
618    ///
619    /// If constraint propagation reveals an infeasibility for any child, returns
620    /// [`None`]. If none of the children intervals change as a result of
621    /// propagation, may return an empty vector instead of cloning `children`.
622    /// This is the default (and conservative) return value.
623    ///
624    /// # Example
625    ///
626    /// If the function is `ABS(a)`, the current `interval` is `[4, 5]` and the
627    /// input `a` is given as `[-7, 3]`, then propagation would return `[-5, 3]`.
628    fn propagate_constraints(
629        &self,
630        _interval: &Interval,
631        _inputs: &[&Interval],
632    ) -> Result<Option<Vec<Interval>>> {
633        Ok(Some(vec![]))
634    }
635
636    /// Calculates the [`SortProperties`] of this function based on its children's properties.
637    fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
638        if !self.preserves_lex_ordering(inputs)? {
639            return Ok(SortProperties::Unordered);
640        }
641
642        let Some(first_order) = inputs.first().map(|p| &p.sort_properties) else {
643            return Ok(SortProperties::Singleton);
644        };
645
646        if inputs
647            .iter()
648            .skip(1)
649            .all(|input| &input.sort_properties == first_order)
650        {
651            Ok(*first_order)
652        } else {
653            Ok(SortProperties::Unordered)
654        }
655    }
656
657    /// Returns true if the function preserves lexicographical ordering based on
658    /// the input ordering.
659    ///
660    /// For example, `concat(a || b)` preserves lexicographical ordering, but `abs(a)` does not.
661    fn preserves_lex_ordering(&self, _inputs: &[ExprProperties]) -> Result<bool> {
662        Ok(false)
663    }
664
665    /// Coerce arguments of a function call to types that the function can evaluate.
666    ///
667    /// This function is only called if [`ScalarUDFImpl::signature`] returns
668    /// [`crate::TypeSignature::UserDefined`]. Most UDFs should return one of
669    /// the other variants of [`TypeSignature`] which handle common cases.
670    ///
671    /// See the [type coercion module](crate::type_coercion)
672    /// documentation for more details on type coercion
673    ///
674    /// [`TypeSignature`]: crate::TypeSignature
675    ///
676    /// For example, if your function requires a floating point arguments, but the user calls
677    /// it like `my_func(1::int)` (i.e. with `1` as an integer), coerce_types can return `[DataType::Float64]`
678    /// to ensure the argument is converted to `1::double`
679    ///
680    /// # Parameters
681    /// * `arg_types`: The argument types of the arguments  this function with
682    ///
683    /// # Return value
684    /// A Vec the same length as `arg_types`. DataFusion will `CAST` the function call
685    /// arguments to these specific types.
686    fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
687        not_impl_err!("Function {} does not implement coerce_types", self.name())
688    }
689
690    /// Return true if this scalar UDF is equal to the other.
691    ///
692    /// Allows customizing the equality of scalar UDFs.
693    /// Must be consistent with [`Self::hash_value`] and follow the same rules as [`Eq`]:
694    ///
695    /// - reflexive: `a.equals(a)`;
696    /// - symmetric: `a.equals(b)` implies `b.equals(a)`;
697    /// - transitive: `a.equals(b)` and `b.equals(c)` implies `a.equals(c)`.
698    ///
699    /// By default, compares [`Self::name`] and [`Self::signature`].
700    fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
701        self.name() == other.name() && self.signature() == other.signature()
702    }
703
704    /// Returns a hash value for this scalar UDF.
705    ///
706    /// Allows customizing the hash code of scalar UDFs. Similarly to [`Hash`] and [`Eq`],
707    /// if [`Self::equals`] returns true for two UDFs, their `hash_value`s must be the same.
708    ///
709    /// By default, hashes [`Self::name`] and [`Self::signature`].
710    fn hash_value(&self) -> u64 {
711        let hasher = &mut DefaultHasher::new();
712        self.name().hash(hasher);
713        self.signature().hash(hasher);
714        hasher.finish()
715    }
716
717    /// Returns the documentation for this Scalar UDF.
718    ///
719    /// Documentation can be accessed programmatically as well as generating
720    /// publicly facing documentation.
721    fn documentation(&self) -> Option<&Documentation> {
722        None
723    }
724}
725
726/// ScalarUDF that adds an alias to the underlying function. It is better to
727/// implement [`ScalarUDFImpl`], which supports aliases, directly if possible.
728#[derive(Debug)]
729struct AliasedScalarUDFImpl {
730    inner: Arc<dyn ScalarUDFImpl>,
731    aliases: Vec<String>,
732}
733
734impl AliasedScalarUDFImpl {
735    pub fn new(
736        inner: Arc<dyn ScalarUDFImpl>,
737        new_aliases: impl IntoIterator<Item = &'static str>,
738    ) -> Self {
739        let mut aliases = inner.aliases().to_vec();
740        aliases.extend(new_aliases.into_iter().map(|s| s.to_string()));
741        Self { inner, aliases }
742    }
743}
744
745impl ScalarUDFImpl for AliasedScalarUDFImpl {
746    fn as_any(&self) -> &dyn Any {
747        self
748    }
749
750    fn name(&self) -> &str {
751        self.inner.name()
752    }
753
754    fn display_name(&self, args: &[Expr]) -> Result<String> {
755        self.inner.display_name(args)
756    }
757
758    fn schema_name(&self, args: &[Expr]) -> Result<String> {
759        self.inner.schema_name(args)
760    }
761
762    fn signature(&self) -> &Signature {
763        self.inner.signature()
764    }
765
766    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
767        self.inner.return_type(arg_types)
768    }
769
770    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
771        self.inner.return_field_from_args(args)
772    }
773
774    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
775        self.inner.invoke_with_args(args)
776    }
777
778    fn aliases(&self) -> &[String] {
779        &self.aliases
780    }
781
782    fn simplify(
783        &self,
784        args: Vec<Expr>,
785        info: &dyn SimplifyInfo,
786    ) -> Result<ExprSimplifyResult> {
787        self.inner.simplify(args, info)
788    }
789
790    fn short_circuits(&self) -> bool {
791        self.inner.short_circuits()
792    }
793
794    fn evaluate_bounds(&self, input: &[&Interval]) -> Result<Interval> {
795        self.inner.evaluate_bounds(input)
796    }
797
798    fn propagate_constraints(
799        &self,
800        interval: &Interval,
801        inputs: &[&Interval],
802    ) -> Result<Option<Vec<Interval>>> {
803        self.inner.propagate_constraints(interval, inputs)
804    }
805
806    fn output_ordering(&self, inputs: &[ExprProperties]) -> Result<SortProperties> {
807        self.inner.output_ordering(inputs)
808    }
809
810    fn preserves_lex_ordering(&self, inputs: &[ExprProperties]) -> Result<bool> {
811        self.inner.preserves_lex_ordering(inputs)
812    }
813
814    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
815        self.inner.coerce_types(arg_types)
816    }
817
818    fn equals(&self, other: &dyn ScalarUDFImpl) -> bool {
819        if let Some(other) = other.as_any().downcast_ref::<AliasedScalarUDFImpl>() {
820            self.inner.equals(other.inner.as_ref()) && self.aliases == other.aliases
821        } else {
822            false
823        }
824    }
825
826    fn hash_value(&self) -> u64 {
827        let hasher = &mut DefaultHasher::new();
828        self.inner.hash_value().hash(hasher);
829        self.aliases.hash(hasher);
830        hasher.finish()
831    }
832
833    fn documentation(&self) -> Option<&Documentation> {
834        self.inner.documentation()
835    }
836}
837
838// Scalar UDF doc sections for use in public documentation
839pub mod scalar_doc_sections {
840    use crate::DocSection;
841
842    pub fn doc_sections() -> Vec<DocSection> {
843        vec![
844            DOC_SECTION_MATH,
845            DOC_SECTION_CONDITIONAL,
846            DOC_SECTION_STRING,
847            DOC_SECTION_BINARY_STRING,
848            DOC_SECTION_REGEX,
849            DOC_SECTION_DATETIME,
850            DOC_SECTION_ARRAY,
851            DOC_SECTION_STRUCT,
852            DOC_SECTION_MAP,
853            DOC_SECTION_HASHING,
854            DOC_SECTION_UNION,
855            DOC_SECTION_OTHER,
856        ]
857    }
858
859    pub const fn doc_sections_const() -> &'static [DocSection] {
860        &[
861            DOC_SECTION_MATH,
862            DOC_SECTION_CONDITIONAL,
863            DOC_SECTION_STRING,
864            DOC_SECTION_BINARY_STRING,
865            DOC_SECTION_REGEX,
866            DOC_SECTION_DATETIME,
867            DOC_SECTION_ARRAY,
868            DOC_SECTION_STRUCT,
869            DOC_SECTION_MAP,
870            DOC_SECTION_HASHING,
871            DOC_SECTION_UNION,
872            DOC_SECTION_OTHER,
873        ]
874    }
875
876    pub const DOC_SECTION_MATH: DocSection = DocSection {
877        include: true,
878        label: "Math Functions",
879        description: None,
880    };
881
882    pub const DOC_SECTION_CONDITIONAL: DocSection = DocSection {
883        include: true,
884        label: "Conditional Functions",
885        description: None,
886    };
887
888    pub const DOC_SECTION_STRING: DocSection = DocSection {
889        include: true,
890        label: "String Functions",
891        description: None,
892    };
893
894    pub const DOC_SECTION_BINARY_STRING: DocSection = DocSection {
895        include: true,
896        label: "Binary String Functions",
897        description: None,
898    };
899
900    pub const DOC_SECTION_REGEX: DocSection = DocSection {
901        include: true,
902        label: "Regular Expression Functions",
903        description: Some(
904            r#"Apache DataFusion uses a [PCRE-like](https://en.wikibooks.org/wiki/Regular_Expressions/Perl-Compatible_Regular_Expressions)
905regular expression [syntax](https://docs.rs/regex/latest/regex/#syntax)
906(minus support for several features including look-around and backreferences).
907The following regular expression functions are supported:"#,
908        ),
909    };
910
911    pub const DOC_SECTION_DATETIME: DocSection = DocSection {
912        include: true,
913        label: "Time and Date Functions",
914        description: None,
915    };
916
917    pub const DOC_SECTION_ARRAY: DocSection = DocSection {
918        include: true,
919        label: "Array Functions",
920        description: None,
921    };
922
923    pub const DOC_SECTION_STRUCT: DocSection = DocSection {
924        include: true,
925        label: "Struct Functions",
926        description: None,
927    };
928
929    pub const DOC_SECTION_MAP: DocSection = DocSection {
930        include: true,
931        label: "Map Functions",
932        description: None,
933    };
934
935    pub const DOC_SECTION_HASHING: DocSection = DocSection {
936        include: true,
937        label: "Hashing Functions",
938        description: None,
939    };
940
941    pub const DOC_SECTION_OTHER: DocSection = DocSection {
942        include: true,
943        label: "Other Functions",
944        description: None,
945    };
946
947    pub const DOC_SECTION_UNION: DocSection = DocSection {
948        include: true,
949        label: "Union Functions",
950        description: Some("Functions to work with the union data type, also know as tagged unions, variant types, enums or sum types. Note: Not related to the SQL UNION operator"),
951    };
952}