Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
lib.rs - source
[go: Go Back, main page]

datafusion/
lib.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#![doc(
19    html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
20    html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
21)]
22#![cfg_attr(docsrs, feature(doc_auto_cfg))]
23// Make cheap clones clear: https://github.com/apache/datafusion/issues/11143
24#![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
25#![warn(missing_docs, clippy::needless_borrow)]
26
27//! [DataFusion] is an extensible query engine written in Rust that
28//! uses [Apache Arrow] as its in-memory format. DataFusion's target users are
29//! developers building fast and feature rich database and analytic systems,
30//! customized to particular workloads. See [use cases] for examples.
31//!
32//! "Out of the box," DataFusion offers [SQL] and [`Dataframe`] APIs,
33//! excellent [performance], built-in support for CSV, Parquet, JSON, and Avro,
34//! extensive customization, and a great community.
35//! [Python Bindings] are also available.
36//!
37//! DataFusion features a full query planner, a columnar, streaming, multi-threaded,
38//! vectorized execution engine, and partitioned data  sources. You can
39//! customize DataFusion at almost all points including additional data sources,
40//! query languages, functions, custom operators and more.
41//! See the [Architecture] section below for more details.
42//!
43//! [DataFusion]: https://datafusion.apache.org/
44//! [Apache Arrow]: https://arrow.apache.org
45//! [use cases]: https://datafusion.apache.org/user-guide/introduction.html#use-cases
46//! [SQL]: https://datafusion.apache.org/user-guide/sql/index.html
47//! [`DataFrame`]: dataframe::DataFrame
48//! [performance]: https://benchmark.clickhouse.com/
49//! [Python Bindings]: https://github.com/apache/datafusion-python
50//! [Architecture]: #architecture
51//!
52//! # Examples
53//!
54//! The main entry point for interacting with DataFusion is the
55//! [`SessionContext`]. [`Expr`]s represent expressions such as `a + b`.
56//!
57//! [`SessionContext`]: execution::context::SessionContext
58//!
59//! ## DataFrame
60//!
61//! To execute a query against data stored
62//! in a CSV file using a [`DataFrame`]:
63//!
64//! ```rust
65//! # use datafusion::prelude::*;
66//! # use datafusion::error::Result;
67//! # use datafusion::functions_aggregate::expr_fn::min;
68//! # use datafusion::arrow::array::RecordBatch;
69//!
70//! # #[tokio::main]
71//! # async fn main() -> Result<()> {
72//! let ctx = SessionContext::new();
73//!
74//! // create the dataframe
75//! let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
76//!
77//! // create a plan
78//! let df = df.filter(col("a").lt_eq(col("b")))?
79//!            .aggregate(vec![col("a")], vec![min(col("b"))])?
80//!            .limit(0, Some(100))?;
81//!
82//! // execute the plan
83//! let results: Vec<RecordBatch> = df.collect().await?;
84//!
85//! // format the results
86//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?
87//!    .to_string();
88//!
89//! let expected = vec![
90//!     "+---+----------------+",
91//!     "| a | min(?table?.b) |",
92//!     "+---+----------------+",
93//!     "| 1 | 2              |",
94//!     "+---+----------------+"
95//! ];
96//!
97//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
98//! # Ok(())
99//! # }
100//! ```
101//!
102//! ## SQL
103//!
104//! To execute a query against a CSV file using [SQL]:
105//!
106//! ```
107//! # use datafusion::prelude::*;
108//! # use datafusion::error::Result;
109//! # use datafusion::arrow::array::RecordBatch;
110//!
111//! # #[tokio::main]
112//! # async fn main() -> Result<()> {
113//! let ctx = SessionContext::new();
114//!
115//! ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
116//!
117//! // create a plan
118//! let df = ctx.sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100").await?;
119//!
120//! // execute the plan
121//! let results: Vec<RecordBatch> = df.collect().await?;
122//!
123//! // format the results
124//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?
125//!   .to_string();
126//!
127//! let expected = vec![
128//!     "+---+----------------+",
129//!     "| a | min(example.b) |",
130//!     "+---+----------------+",
131//!     "| 1 | 2              |",
132//!     "+---+----------------+"
133//! ];
134//!
135//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
136//! # Ok(())
137//! # }
138//! ```
139//!
140//! ## More Examples
141//!
142//! There are many additional annotated examples of using DataFusion in the [datafusion-examples] directory.
143//!
144//! [datafusion-examples]: https://github.com/apache/datafusion/tree/main/datafusion-examples
145//!
146//! # Architecture
147//!
148//! <!-- NOTE: The goal of this section is to provide a high level
149//! overview of how DataFusion is organized and then link to other
150//! sections of the docs with more details -->
151//!
152//! You can find a formal description of DataFusion's architecture in our
153//! [SIGMOD 2024 Paper].
154//!
155//! [SIGMOD 2024 Paper]: https://dl.acm.org/doi/10.1145/3626246.3653368
156//!
157//! ## Design Goals
158//! DataFusion's Architecture Goals are:
159//!
160//! 1. Work β€œout of the box”: Provide a very fast, world class query engine with
161//!    minimal setup or required configuration.
162//!
163//! 2. Customizable everything: All behavior should be customizable by
164//!    implementing traits.
165//!
166//! 3. Architecturally boring πŸ₯±: Follow industrial best practice rather than
167//!    trying cutting edge, but unproven, techniques.
168//!
169//! With these principles, users start with a basic, high-performance engine
170//! and specialize it over time to suit their needs and available engineering
171//! capacity.
172//!
173//! ## Overview  Presentations
174//!
175//! The following presentations offer high level overviews of the
176//! different components and how they interact together.
177//!
178//! - [Apr 2023]: The Apache DataFusion Architecture talks
179//!   - _Query Engine_: [recording](https://youtu.be/NVKujPxwSBA) and [slides](https://docs.google.com/presentation/d/1D3GDVas-8y0sA4c8EOgdCvEjVND4s2E7I6zfs67Y4j8/edit#slide=id.p)
180//!   - _Logical Plan and Expressions_: [recording](https://youtu.be/EzZTLiSJnhY) and [slides](https://docs.google.com/presentation/d/1ypylM3-w60kVDW7Q6S99AHzvlBgciTdjsAfqNP85K30)
181//!   - _Physical Plan and Execution_: [recording](https://youtu.be/2jkWU3_w6z0) and [slides](https://docs.google.com/presentation/d/1cA2WQJ2qg6tx6y4Wf8FH2WVSm9JQ5UgmBWATHdik0hg)
182//! - [July 2022]: DataFusion and Arrow: Supercharge Your Data Analytical Tool with a Rusty Query Engine: [recording](https://www.youtube.com/watch?v=Rii1VTn3seQ) and [slides](https://docs.google.com/presentation/d/1q1bPibvu64k2b7LPi7Yyb0k3gA1BiUYiUbEklqW1Ckc/view#slide=id.g11054eeab4c_0_1165)
183//! - [March 2021]: The DataFusion architecture is described in _Query Engine Design and the Rust-Based DataFusion in Apache Arrow_: [recording](https://www.youtube.com/watch?v=K6eCAVEk4kU) (DataFusion content starts [~ 15 minutes in](https://www.youtube.com/watch?v=K6eCAVEk4kU&t=875s)) and [slides](https://www.slideshare.net/influxdata/influxdb-iox-tech-talks-query-engine-design-and-the-rustbased-datafusion-in-apache-arrow-244161934)
184//! - [February 2021]: How DataFusion is used within the Ballista Project is described in _Ballista: Distributed Compute with Rust and Apache Arrow_: [recording](https://www.youtube.com/watch?v=ZZHQaOap9pQ)
185//!
186//! ## Customization and Extension
187//!
188//! DataFusion is designed to be highly extensible, so you can
189//! start with a working, full featured engine, and then
190//! specialize any behavior for your use case. For example,
191//! some projects may add custom [`ExecutionPlan`] operators, or create their own
192//! query language that directly creates [`LogicalPlan`] rather than using the
193//! built in SQL planner, [`SqlToRel`].
194//!
195//! In order to achieve this, DataFusion supports extension at many points:
196//!
197//! * read from any datasource ([`TableProvider`])
198//! * define your own catalogs, schemas, and table lists ([`catalog`] and [`CatalogProvider`])
199//! * build your own query language or plans ([`LogicalPlanBuilder`])
200//! * declare and use user-defined functions ([`ScalarUDF`], and [`AggregateUDF`], [`WindowUDF`])
201//! * add custom plan rewrite passes ([`AnalyzerRule`], [`OptimizerRule`]  and [`PhysicalOptimizerRule`])
202//! * extend the planner to use user-defined logical and physical nodes ([`QueryPlanner`])
203//!
204//! You can find examples of each of them in the [datafusion-examples] directory.
205//!
206//! [`TableProvider`]: crate::datasource::TableProvider
207//! [`CatalogProvider`]: crate::catalog::CatalogProvider
208//! [`LogicalPlanBuilder`]: datafusion_expr::logical_plan::builder::LogicalPlanBuilder
209//! [`ScalarUDF`]: crate::logical_expr::ScalarUDF
210//! [`AggregateUDF`]: crate::logical_expr::AggregateUDF
211//! [`WindowUDF`]: crate::logical_expr::WindowUDF
212//! [`QueryPlanner`]: execution::context::QueryPlanner
213//! [`OptimizerRule`]: datafusion_optimizer::optimizer::OptimizerRule
214//! [`AnalyzerRule`]:  datafusion_optimizer::analyzer::AnalyzerRule
215//! [`PhysicalOptimizerRule`]: datafusion_physical_optimizer::PhysicalOptimizerRule
216//!
217//! ## Query Planning and Execution Overview
218//!
219//! ### SQL
220//!
221//! ```text
222//!                 Parsed with            SqlToRel creates
223//!                 sqlparser              initial plan
224//! β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”           β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”             β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
225//! β”‚   SELECT *    β”‚           β”‚Query {  β”‚             β”‚Project      β”‚
226//! β”‚   FROM ...    │──────────▢│..       │────────────▢│  TableScan  β”‚
227//! β”‚               β”‚           β”‚}        β”‚             β”‚    ...      β”‚
228//! β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜           β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜             β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
229//!
230//!   SQL String                 sqlparser               LogicalPlan
231//!                              AST nodes
232//! ```
233//!
234//! 1. The query string is parsed to an Abstract Syntax Tree (AST)
235//!    [`Statement`] using [sqlparser].
236//!
237//! 2. The AST is converted to a [`LogicalPlan`] and logical expressions
238//!    [`Expr`]s to compute the desired result by [`SqlToRel`]. This phase
239//!    also includes name and type resolution ("binding").
240//!
241//! [`Statement`]: https://docs.rs/sqlparser/latest/sqlparser/ast/enum.Statement.html
242//!
243//! ### DataFrame
244//!
245//! When executing plans using the [`DataFrame`] API, the process is
246//! identical as with SQL, except the DataFrame API builds the
247//! [`LogicalPlan`] directly using [`LogicalPlanBuilder`]. Systems
248//! that have their own custom query languages typically also build
249//! [`LogicalPlan`] directly.
250//!
251//! ### Planning
252//!
253//! ```text
254//!             AnalyzerRules and      PhysicalPlanner          PhysicalOptimizerRules
255//!             OptimizerRules         creates ExecutionPlan    improve performance
256//!             rewrite plan
257//! β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”        β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”      β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”        β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
258//! β”‚Project      β”‚        β”‚Project(x, y)β”‚      β”‚ProjectExec      β”‚        β”‚ProjectExec      β”‚
259//! β”‚  TableScan  │──...──▢│  TableScan  │─────▢│  ...            │──...──▢│  ...            β”‚
260//! β”‚    ...      β”‚        β”‚    ...      β”‚      β”‚   DataSourceExecβ”‚        β”‚   DataSourceExecβ”‚
261//! β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜        β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜      β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜        β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
262//!
263//!  LogicalPlan            LogicalPlan         ExecutionPlan             ExecutionPlan
264//! ```
265//!
266//! To process large datasets with many rows as efficiently as
267//! possible, significant effort is spent planning and
268//! optimizing, in the following manner:
269//!
270//! 1. The [`LogicalPlan`] is checked and rewritten to enforce
271//!    semantic rules, such as type coercion, by [`AnalyzerRule`]s
272//!
273//! 2. The [`LogicalPlan`] is rewritten by [`OptimizerRule`]s, such as
274//!    projection and filter pushdown, to improve its efficiency.
275//!
276//! 3. The [`LogicalPlan`] is converted to an [`ExecutionPlan`] by a
277//!    [`PhysicalPlanner`]
278//!
279//! 4. The [`ExecutionPlan`] is rewritten by
280//!    [`PhysicalOptimizerRule`]s, such as sort and join selection, to
281//!    improve its efficiency.
282//!
283//! ## Data Sources
284//!
285//! ```text
286//! Planning       β”‚
287//! requests       β”‚            TableProvider::scan
288//! information    β”‚            creates an
289//! such as schema β”‚            ExecutionPlan
290//!                β”‚
291//!                β–Ό
292//!   β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”         β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
293//!   β”‚                         β”‚         β”‚               β”‚
294//!   β”‚impl TableProvider       │────────▢│DataSourceExec β”‚
295//!   β”‚                         β”‚         β”‚               β”‚
296//!   β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜         β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
297//!         TableProvider
298//!         (built in or user provided)    ExecutionPlan
299//! ```
300//!
301//! A [`TableProvider`] provides information for planning and
302//! an [`ExecutionPlan`]s for execution. DataFusion includes [`ListingTable`]
303//! which supports reading several common file formats, and you can support any
304//! new file format by implementing the [`TableProvider`] trait. See also:
305//!
306//! 1. [`ListingTable`]: Reads data from Parquet, JSON, CSV, or AVRO
307//!    files.  Supports single files or multiple files with HIVE style
308//!    partitioning, optional compression, directly reading from remote
309//!    object store and more.
310//!
311//! 2. [`MemTable`]: Reads data from in memory [`RecordBatch`]es.
312//!
313//! 3. [`StreamingTable`]: Reads data from potentially unbounded inputs.
314//!
315//! [`ListingTable`]: crate::datasource::listing::ListingTable
316//! [`MemTable`]: crate::datasource::memory::MemTable
317//! [`StreamingTable`]: crate::catalog::streaming::StreamingTable
318//!
319//! ## Plan Representations
320//!
321//! ### Logical Plans
322//! Logical planning yields [`LogicalPlan`] nodes and [`Expr`]
323//! representing expressions which are [`Schema`] aware and represent statements
324//! independent of how they are physically executed.
325//! A [`LogicalPlan`] is a Directed Acyclic Graph (DAG) of other
326//! [`LogicalPlan`]s, each potentially containing embedded [`Expr`]s.
327//!
328//! `LogicalPlan`s can be rewritten with [`TreeNode`] API, see the
329//! [`tree_node module`] for more details.
330//!
331//! [`Expr`]s can also be rewritten with [`TreeNode`] API and simplified using
332//! [`ExprSimplifier`]. Examples of working with and executing `Expr`s can be
333//! found in the [`expr_api`.rs] example
334//!
335//! [`TreeNode`]: datafusion_common::tree_node::TreeNode
336//! [`tree_node module`]: datafusion_expr::logical_plan::tree_node
337//! [`ExprSimplifier`]: crate::optimizer::simplify_expressions::ExprSimplifier
338//! [`expr_api`.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/expr_api.rs
339//!
340//! ### Physical Plans
341//!
342//! An [`ExecutionPlan`] (sometimes referred to as a "physical plan")
343//! is a plan that can be executed against data. It a DAG of other
344//! [`ExecutionPlan`]s each potentially containing expressions that implement the
345//! [`PhysicalExpr`] trait.
346//!
347//! Compared to a [`LogicalPlan`], an [`ExecutionPlan`] has additional concrete
348//! information about how to perform calculations (e.g. hash vs merge
349//! join), and how data flows during execution (e.g. partitioning and
350//! sortedness).
351//!
352//! [cp_solver] performs range propagation analysis on [`PhysicalExpr`]s and
353//! [`PruningPredicate`] can prove certain boolean [`PhysicalExpr`]s used for
354//! filtering can never be `true` using additional statistical information.
355//!
356//! [cp_solver]: crate::physical_expr::intervals::cp_solver
357//! [`PruningPredicate`]: datafusion_physical_optimizer::pruning::PruningPredicate
358//! [`PhysicalExpr`]: crate::physical_plan::PhysicalExpr
359//!
360//! ## Execution
361//!
362//! ```text
363//!            ExecutionPlan::execute             Calling next() on the
364//!            produces a stream                  stream produces the data
365//!
366//! β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”      β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”         β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
367//! β”‚ProjectExec     β”‚      β”‚impl                     β”‚    β”Œβ”€β”€β”€β–Άβ”‚RecordBatch β”‚
368//! β”‚  ...           │─────▢│SendableRecordBatchStream│─────    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
369//! β”‚  DataSourceExecβ”‚      β”‚                         β”‚    β”‚    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
370//! β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜      β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜    β”œβ”€β”€β”€β–Άβ”‚RecordBatch β”‚
371//!               β–²                                        β”‚    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
372//! ExecutionPlan β”‚                                        β”‚         ...
373//!               β”‚                                        β”‚
374//!               β”‚                                        β”‚    β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
375//!             PhysicalOptimizerRules                     β”œβ”€β”€β”€β–Άβ”‚RecordBatch β”‚
376//!             request information                        β”‚    β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
377//!             such as partitioning                       β”‚    β”Œ ─ ─ ─ ─ ─ ─
378//!                                                        └───▢ None        β”‚
379//!                                                             β”” ─ ─ ─ ─ ─ ─
380//! ```
381//!
382//! [`ExecutionPlan`]s process data using the [Apache Arrow] memory
383//! format, making heavy use of functions from the [arrow]
384//! crate. Values are represented with [`ColumnarValue`], which are either
385//! [`ScalarValue`] (single constant values) or [`ArrayRef`] (Arrow
386//! Arrays).
387//!
388//! Calling [`execute`] produces 1 or more partitions of data,
389//! as a [`SendableRecordBatchStream`], which implements a pull based execution
390//! API. Calling [`next()`]`.await` will incrementally compute and return the next
391//! [`RecordBatch`]. Balanced parallelism is achieved using [Volcano style]
392//! "Exchange" operations implemented by [`RepartitionExec`].
393//!
394//! While some recent research such as [Morsel-Driven Parallelism] describes challenges
395//! with the pull style Volcano execution model on NUMA architectures, in practice DataFusion achieves
396//! similar scalability as systems that use push driven schedulers [such as DuckDB].
397//! See the [DataFusion paper in SIGMOD 2024] for more details.
398//!
399//! [`execute`]: physical_plan::ExecutionPlan::execute
400//! [`SendableRecordBatchStream`]: crate::physical_plan::SendableRecordBatchStream
401//! [`ColumnarValue`]: datafusion_expr::ColumnarValue
402//! [`ScalarValue`]: crate::scalar::ScalarValue
403//! [`ArrayRef`]: arrow::array::ArrayRef
404//! [`Stream`]: futures::stream::Stream
405//!
406//! See the [implementors of `ExecutionPlan`] for a list of physical operators available.
407//!
408//! [`RepartitionExec`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/repartition/struct.RepartitionExec.html
409//! [Volcano style]: https://w6113.github.io/files/papers/volcanoparallelism-89.pdf
410//! [Morsel-Driven Parallelism]: https://db.in.tum.de/~leis/papers/morsels.pdf
411//! [DataFusion paper in SIGMOD 2024]: https://github.com/apache/datafusion/files/15149988/DataFusion_Query_Engine___SIGMOD_2024-FINAL-mk4.pdf
412//! [such as DuckDB]: https://github.com/duckdb/duckdb/issues/1583
413//! [implementors of `ExecutionPlan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html#implementors
414//!
415//! ## Streaming Execution
416//!
417//! DataFusion is a "streaming" query engine which means `ExecutionPlan`s incrementally
418//! read from their input(s) and compute output one [`RecordBatch`] at a time
419//! by continually polling [`SendableRecordBatchStream`]s. Output and
420//! intermediate `RecordBatch`s each have approximately `batch_size` rows,
421//! which amortizes per-batch overhead of execution.
422//!
423//! Note that certain operations, sometimes called "pipeline breakers",
424//! (for example full sorts or hash aggregations) are fundamentally non streaming and
425//! must read their input fully before producing **any** output. As much as possible,
426//! other operators read a single [`RecordBatch`] from their input to produce a
427//! single `RecordBatch` as output.
428//!
429//! For example, given this SQL query:
430//!
431//! ```sql
432//! SELECT date_trunc('month', time) FROM data WHERE id IN (10,20,30);
433//! ```
434//!
435//! The diagram below shows the call sequence when a consumer calls [`next()`] to
436//! get the next `RecordBatch` of output. While it is possible that some
437//! steps run on different threads, typically tokio will use the same thread
438//! that called `next()` to read from the input, apply the filter, and
439//! return the results without interleaving any other operations. This results
440//! in excellent cache locality as the same CPU core that produces the data often
441//! consumes it immediately as well.
442//!
443//! ```text
444//!
445//! Step 3: FilterExec calls next()       Step 2: ProjectionExec calls
446//!         on input Stream                  next() on input Stream
447//!         β”Œ ─ ─ ─ ─ ─ ─ ─ ─ ─      β”Œ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐
448//!                            β”‚                                               Step 1: Consumer
449//!         β–Ό                        β–Ό                           β”‚               calls next()
450//! ┏━━━━━━━━━━━━━━━━┓     ┏━━━━━┻━━━━━━━━━━━━━┓      ┏━━━━━━━━━━━━━━━━━━━━━━━━┓
451//! ┃                ┃     ┃                   ┃      ┃                        β—€ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
452//! ┃  DataSource    ┃     ┃                   ┃      ┃                        ┃
453//! ┃    (e.g.       ┃     ┃    FilterExec     ┃      ┃     ProjectionExec     ┃
454//! ┃ ParquetSource) ┃     ┃id IN (10, 20, 30) ┃      ┃date_bin('month', time) ┃
455//! ┃                ┃     ┃                   ┃      ┃                        ┣ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ β–Ά
456//! ┃                ┃     ┃                   ┃      ┃                        ┃
457//! ┗━━━━━━━━━━━━━━━━┛     ┗━━━━━━━━━━━┳━━━━━━━┛      ┗━━━━━━━━━━━━━━━━━━━━━━━━┛
458//!         β”‚                  β–²                                 β–²          Step 6: ProjectionExec
459//!                            β”‚     β”‚                           β”‚        computes date_trunc into a
460//!         β”” ─ ─ ─ ─ ─ ─ ─ ─ ─       ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─          new RecordBatch returned
461//!              β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”                β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”          from client
462//!              β”‚     RecordBatch     β”‚                β”‚ RecordBatch β”‚
463//!              β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜                β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
464//!
465//!           Step 4: DataSource returns a        Step 5: FilterExec returns a new
466//!                single RecordBatch            RecordBatch with only matching rows
467//! ```
468//!
469//! [`next()`]: futures::StreamExt::next
470//!
471//! ## Thread Scheduling, CPU / IO Thread Pools, and [Tokio] [`Runtime`]s
472//!
473//! DataFusion automatically runs each plan with multiple CPU cores using
474//! a [Tokio] [`Runtime`] as a thread pool. While tokio is most commonly used
475//! for asynchronous network I/O, the combination of an efficient, work-stealing
476//! scheduler and first class compiler support for automatic continuation
477//! generation (`async`), also makes it a compelling choice for CPU intensive
478//! applications as explained in the [Using Rustlang’s Async Tokio
479//! Runtime for CPU-Bound Tasks] blog.
480//!
481//! The number of cores used is determined by the `target_partitions`
482//! configuration setting, which defaults to the number of CPU cores.
483//! While preparing for execution, DataFusion tries to create this many distinct
484//! `async` [`Stream`]s for each `ExecutionPlan`.
485//! The `Stream`s for certain `ExecutionPlans`, such as as [`RepartitionExec`]
486//! and [`CoalescePartitionsExec`], spawn [Tokio] [`task`]s, that are run by
487//! threads managed by the `Runtime`.
488//! Many DataFusion `Stream`s perform CPU intensive processing.
489//!
490//! Using `async` for CPU intensive tasks makes it easy for [`TableProvider`]s
491//! to perform network I/O using standard Rust `async` during execution.
492//! However, this design also makes it very easy to mix CPU intensive and latency
493//! sensitive I/O work on the same thread pool ([`Runtime`]).
494//! Using the same (default) `Runtime` is convenient, and often works well for
495//! initial development and processing local files, but it can lead to problems
496//! under load and/or when reading from network sources such as AWS S3.
497//!
498//! If your system does not fully utilize either the CPU or network bandwidth
499//! during execution, or you see significantly higher tail (e.g. p99) latencies
500//! responding to network requests, **it is likely you need to use a different
501//! `Runtime` for CPU intensive DataFusion plans**. This effect can be especially
502//! pronounced when running several queries concurrently.
503//!
504//! As shown in the following figure, using the same `Runtime` for both CPU
505//! intensive processing and network requests can introduce significant
506//! delays in responding to those network requests. Delays in processing network
507//! requests can and does lead network flow control to throttle the available
508//! bandwidth in response.
509//!
510//! ```text
511//!                                                                          Legend
512//!
513//!                                                                          ┏━━━━━━┓
514//!                            Processing network request                    ┃      ┃  CPU bound work
515//!                            is delayed due to processing                  ┗━━━━━━┛
516//!                            CPU bound work                                β”Œβ”€β”
517//!                                                                          β”‚ β”‚       Network request
518//!                                         β”‚β”‚                               β””β”€β”˜       processing
519//!
520//!                                         β”‚β”‚
521//!                                ─ ─ ─ ─ ─  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
522//!                               β”‚                                            β”‚
523//!
524//!                               β–Ό                                            β–Ό
525//! β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”           β”Œβ”€β”β”Œβ”€β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”“β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”“β”Œβ”€β”
526//! β”‚             β”‚thread 1   β”‚ β”‚β”‚ │┃     Decoding      ┃┃     Filtering     ┃│ β”‚
527//! β”‚             β”‚           β””β”€β”˜β””β”€β”˜β”—β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”›β”—β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”›β””β”€β”˜
528//! β”‚             β”‚           ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
529//! β”‚Tokio Runtimeβ”‚thread 2   ┃   Decoding   ┃     Filtering     ┃   Decoding   ┃       ...
530//! β”‚(thread pool)β”‚           ┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━┛
531//! β”‚             β”‚     ...                               ...
532//! β”‚             β”‚           β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”³β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”“β”Œβ”€β” ┏━━━━━━━━━━━━━━┓
533//! β”‚             β”‚thread N   ┃     Decoding      ┃     Filtering     ┃│ β”‚ ┃   Decoding   ┃
534//! β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜           β”—β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”»β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”β”›β””β”€β”˜ ┗━━━━━━━━━━━━━━┛
535//!                           ─────────────────────────────────────────────────────────────▢
536//!                                                                                           time
537//! ```
538//!
539//! The bottleneck resulting from network throttling can be avoided
540//! by using separate [`Runtime`]s for the different types of work, as shown
541//! in the diagram below.
542//!
543//! ```text
544//!                    A separate thread pool processes network       Legend
545//!                    requests, reducing the latency for
546//!                    processing each request                        ┏━━━━━━┓
547//!                                                                   ┃      ┃  CPU bound work
548//!                                         β”‚                         ┗━━━━━━┛
549//!                                          β”‚                        β”Œβ”€β”
550//!                               β”Œ ─ ─ ─ ─ β”˜                         β”‚ β”‚       Network request
551//!                                  β”Œ ─ ─ ─ β”˜                        β””β”€β”˜       processing
552//!                               β”‚
553//!                               β–Ό  β–Ό
554//! β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”           β”Œβ”€β”β”Œβ”€β”β”Œβ”€β”
555//! β”‚             β”‚thread 1   β”‚ β”‚β”‚ β”‚β”‚ β”‚
556//! β”‚             β”‚           β””β”€β”˜β””β”€β”˜β””β”€β”˜
557//! β”‚Tokio Runtimeβ”‚                                          ...
558//! β”‚(thread pool)β”‚thread 2
559//! β”‚             β”‚
560//! β”‚"IO Runtime" β”‚     ...
561//! β”‚             β”‚                                                   β”Œβ”€β”
562//! β”‚             β”‚thread N                                           β”‚ β”‚
563//! β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜                                                   β””β”€β”˜
564//!                           ─────────────────────────────────────────────────────────────▢
565//!                                                                                           time
566//!
567//! β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”           ┏━━━━━━━━━━━━━━━━━━━┓┏━━━━━━━━━━━━━━━━━━━┓
568//! β”‚             β”‚thread 1   ┃     Decoding      ┃┃     Filtering     ┃
569//! β”‚             β”‚           ┗━━━━━━━━━━━━━━━━━━━┛┗━━━━━━━━━━━━━━━━━━━┛
570//! β”‚Tokio Runtimeβ”‚           ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
571//! β”‚(thread pool)β”‚thread 2   ┃   Decoding   ┃     Filtering     ┃   Decoding   ┃       ...
572//! β”‚             β”‚           ┗━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━┛
573//! β”‚ CPU Runtime β”‚     ...                               ...
574//! β”‚             β”‚           ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
575//! β”‚             β”‚thread N   ┃     Decoding      ┃     Filtering     ┃   Decoding   ┃
576//! β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜           ┗━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━┛
577//!                          ─────────────────────────────────────────────────────────────▢
578//!                                                                                           time
579//!```
580//!
581//! Note that DataFusion does not use [`tokio::task::spawn_blocking`] for
582//! CPU-bounded work, because `spawn_blocking` is designed for blocking **IO**,
583//! not designed CPU bound tasks. Among other challenges, spawned blocking
584//! tasks can't yield waiting for input (can't call `await`) so they
585//! can't be used to limit the number of concurrent CPU bound tasks or
586//! keep the processing pipeline to the same core.
587//!
588//! [Tokio]:  https://tokio.rs
589//! [`Runtime`]: tokio::runtime::Runtime
590//! [`task`]: tokio::task
591//! [Using Rustlang’s Async Tokio Runtime for CPU-Bound Tasks]: https://thenewstack.io/using-rustlangs-async-tokio-runtime-for-cpu-bound-tasks/
592//! [`RepartitionExec`]: physical_plan::repartition::RepartitionExec
593//! [`CoalescePartitionsExec`]: physical_plan::coalesce_partitions::CoalescePartitionsExec
594//!
595//! ## State Management and Configuration
596//!
597//! [`ConfigOptions`] contain options to control DataFusion's
598//! execution.
599//!
600//! [`ConfigOptions`]: datafusion_common::config::ConfigOptions
601//!
602//! The state required to execute queries is managed by the following
603//! structures:
604//!
605//! 1. [`SessionContext`]: State needed for create [`LogicalPlan`]s such
606//!    as the table definitions, and the function registries.
607//!
608//! 2. [`TaskContext`]: State needed for execution such as the
609//!    [`MemoryPool`], [`DiskManager`], and [`ObjectStoreRegistry`].
610//!
611//! 3. [`ExecutionProps`]: Per-execution properties and data (such as
612//!    starting timestamps, etc).
613//!
614//! [`SessionContext`]: crate::execution::context::SessionContext
615//! [`TaskContext`]: crate::execution::context::TaskContext
616//! [`ExecutionProps`]: crate::execution::context::ExecutionProps
617//!
618//! ### Resource Management
619//!
620//! The amount of memory and temporary local disk space used by
621//! DataFusion when running a plan can be controlled using the
622//! [`MemoryPool`] and [`DiskManager`]. Other runtime options can be
623//! found on [`RuntimeEnv`].
624//!
625//! [`DiskManager`]: crate::execution::DiskManager
626//! [`MemoryPool`]: crate::execution::memory_pool::MemoryPool
627//! [`RuntimeEnv`]: crate::execution::runtime_env::RuntimeEnv
628//! [`ObjectStoreRegistry`]: crate::datasource::object_store::ObjectStoreRegistry
629//!
630//! ## Crate Organization
631//!
632//! Most users interact with DataFusion via this crate (`datafusion`), which re-exports
633//! all functionality needed to build and execute queries.
634//!
635//! There are three other crates that provide additional functionality that
636//! must be used directly:
637//! * [`datafusion_proto`]: Plan serialization and deserialization
638//! * [`datafusion_substrait`]: Support for the substrait plan serialization format
639//! * [`datafusion_sqllogictest`] : The DataFusion SQL logic test runner
640//!
641//! [`datafusion_proto`]: https://crates.io/crates/datafusion-proto
642//! [`datafusion_substrait`]: https://crates.io/crates/datafusion-substrait
643//! [`datafusion_sqllogictest`]: https://crates.io/crates/datafusion-sqllogictest
644//!
645//! DataFusion is internally split into multiple sub crates to
646//! enforce modularity and improve compilation times. See the
647//! [list of modules](#modules) for all available sub-crates. Major ones are
648//!
649//! * [datafusion_common]: Common traits and types
650//! * [datafusion_catalog]: Catalog APIs such as [`SchemaProvider`] and [`CatalogProvider`]
651//! * [datafusion_execution]: State and structures needed for execution
652//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure
653//! * [datafusion_functions]: Scalar function packages
654//! * [datafusion_functions_aggregate]: Aggregate functions such as `MIN`, `MAX`, `SUM`, etc
655//! * [datafusion_functions_nested]: Scalar function packages for `ARRAY`s, `MAP`s and `STRUCT`s
656//! * [datafusion_functions_table]: Table Functions such as `GENERATE_SERIES`
657//! * [datafusion_functions_window]: Window functions such as `ROW_NUMBER`, `RANK`, etc
658//! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s
659//! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions
660//! * [datafusion_physical_plan]: [`ExecutionPlan`] and related expressions
661//! * [datafusion_physical_optimizer]: [`ExecutionPlan`] and related expressions
662//! * [datafusion_sql]: SQL planner ([`SqlToRel`])
663//!
664//! [`SchemaProvider`]: datafusion_catalog::SchemaProvider
665//! [`CatalogProvider`]: datafusion_catalog::CatalogProvider
666//!
667//! ## Citing DataFusion in Academic Papers
668//!
669//! You can use the following citation to reference DataFusion in academic papers:
670//!
671//! ```text
672//! @inproceedings{lamb2024apache
673//!   title={Apache Arrow DataFusion: A Fast, Embeddable, Modular Analytic Query Engine},
674//!   author={Lamb, Andrew and Shen, Yijie and Heres, Dani{\"e}l and Chakraborty, Jayjeet and Kabak, Mehmet Ozan and Hsieh, Liang-Chi and Sun, Chao},
675//!   booktitle={Companion of the 2024 International Conference on Management of Data},
676//!   pages={5--17},
677//!   year={2024}
678//! }
679//! ```
680//!
681//! [sqlparser]: https://docs.rs/sqlparser/latest/sqlparser
682//! [`SqlToRel`]: sql::planner::SqlToRel
683//! [`Expr`]: datafusion_expr::Expr
684//! [`LogicalPlan`]: datafusion_expr::LogicalPlan
685//! [`AnalyzerRule`]: datafusion_optimizer::analyzer::AnalyzerRule
686//! [`OptimizerRule`]: optimizer::optimizer::OptimizerRule
687//! [`ExecutionPlan`]: physical_plan::ExecutionPlan
688//! [`PhysicalPlanner`]: physical_planner::PhysicalPlanner
689//! [`PhysicalOptimizerRule`]: datafusion_physical_optimizer::PhysicalOptimizerRule
690//! [`Schema`]: arrow::datatypes::Schema
691//! [`PhysicalExpr`]: physical_plan::PhysicalExpr
692//! [`RecordBatch`]: arrow::array::RecordBatch
693//! [`RecordBatchReader`]: arrow::record_batch::RecordBatchReader
694//! [`Array`]: arrow::array::Array
695
696/// DataFusion crate version
697pub const DATAFUSION_VERSION: &str = env!("CARGO_PKG_VERSION");
698
699extern crate core;
700extern crate sqlparser;
701
702pub mod catalog_common;
703pub mod dataframe;
704pub mod datasource;
705pub mod error;
706pub mod execution;
707pub mod physical_planner;
708pub mod prelude;
709pub mod scalar;
710
711// re-export dependencies from arrow-rs to minimize version maintenance for crate users
712pub use arrow;
713#[cfg(feature = "parquet")]
714pub use parquet;
715
716// re-export DataFusion sub-crates at the top level. Use `pub use *`
717// so that the contents of the subcrates appears in rustdocs
718// for details, see https://github.com/apache/datafusion/issues/6648
719
720/// re-export of [`datafusion_common`] crate
721pub mod common {
722    pub use datafusion_common::*;
723
724    /// re-export of [`datafusion_common_runtime`] crate
725    pub mod runtime {
726        pub use datafusion_common_runtime::*;
727    }
728}
729
730// Backwards compatibility
731pub use common::config;
732
733// NB datafusion execution is re-exported in the `execution` module
734
735/// re-export of [`datafusion_catalog`] crate
736pub mod catalog {
737    pub use datafusion_catalog::*;
738}
739
740/// re-export of [`datafusion_expr`] crate
741pub mod logical_expr {
742    pub use datafusion_expr::*;
743}
744
745/// re-export of [`datafusion_expr_common`] crate
746pub mod logical_expr_common {
747    pub use datafusion_expr_common::*;
748}
749
750/// re-export of [`datafusion_optimizer`] crate
751pub mod optimizer {
752    pub use datafusion_optimizer::*;
753}
754
755/// re-export of [`datafusion_physical_optimizer`] crate
756pub mod physical_optimizer {
757    pub use datafusion_physical_optimizer::*;
758}
759
760/// re-export of [`datafusion_physical_expr`] crate
761pub mod physical_expr_common {
762    pub use datafusion_physical_expr_common::*;
763}
764
765/// re-export of [`datafusion_physical_expr`] crate
766pub mod physical_expr {
767    pub use datafusion_physical_expr::*;
768}
769
770/// re-export of [`datafusion_physical_plan`] crate
771pub mod physical_plan {
772    pub use datafusion_physical_plan::*;
773}
774
775// Reexport testing macros for compatibility
776pub use datafusion_common::assert_batches_eq;
777pub use datafusion_common::assert_batches_sorted_eq;
778
779/// re-export of [`datafusion_sql`] crate
780pub mod sql {
781    pub use datafusion_sql::*;
782}
783
784/// re-export of [`datafusion_functions`] crate
785pub mod functions {
786    pub use datafusion_functions::*;
787}
788
789/// re-export of [`datafusion_functions_nested`] crate, if "nested_expressions" feature is enabled
790pub mod functions_nested {
791    #[cfg(feature = "nested_expressions")]
792    pub use datafusion_functions_nested::*;
793}
794
795/// re-export of [`datafusion_functions_nested`] crate as [`functions_array`] for backward compatibility, if "nested_expressions" feature is enabled
796#[deprecated(since = "41.0.0", note = "use datafusion-functions-nested instead")]
797pub mod functions_array {
798    #[cfg(feature = "nested_expressions")]
799    pub use datafusion_functions_nested::*;
800}
801
802/// re-export of [`datafusion_functions_aggregate`] crate
803pub mod functions_aggregate {
804    pub use datafusion_functions_aggregate::*;
805}
806
807/// re-export of [`datafusion_functions_window`] crate
808pub mod functions_window {
809    pub use datafusion_functions_window::*;
810}
811
812/// re-export of [`datafusion_functions_table`] crate
813pub mod functions_table {
814    pub use datafusion_functions_table::*;
815}
816
817/// re-export of variable provider for `@name` and `@@name` style runtime values.
818pub mod variable {
819    pub use datafusion_expr::var_provider::{VarProvider, VarType};
820}
821
822#[cfg(not(target_arch = "wasm32"))]
823pub mod test;
824
825mod schema_equivalence;
826pub mod test_util;
827
828#[cfg(doctest)]
829doc_comment::doctest!("../../../README.md", readme_example_test);
830
831// Instructions for Documentation Examples
832//
833// The following commands test the examples from the user guide as part of
834// `cargo test --doc`
835//
836// # Adding new tests:
837//
838// Simply add code like this to your .md file and ensure your md file is
839// included in the lists below.
840//
841// ```rust
842// <code here will be tested>
843// ```
844//
845// Note that sometimes it helps to author the doctest as a standalone program
846// first, and then copy it into the user guide.
847//
848// # Debugging Test Failures
849//
850// Unfortunately, the line numbers reported by doctest do not correspond to the
851// line numbers of in the .md files. Thus, if a doctest fails, use the name of
852// the test to find the relevant file in the list below, and then find the
853// example in that file to fix.
854//
855// For example, if `user_guide_expressions(line 123)` fails,
856// go to `docs/source/user-guide/expressions.md` to find the relevant problem.
857//
858#[cfg(doctest)]
859doc_comment::doctest!(
860    "../../../docs/source/user-guide/concepts-readings-events.md",
861    user_guide_concepts_readings_events
862);
863
864#[cfg(doctest)]
865doc_comment::doctest!(
866    "../../../docs/source/user-guide/configs.md",
867    user_guide_configs
868);
869
870#[cfg(doctest)]
871doc_comment::doctest!(
872    "../../../docs/source/user-guide/crate-configuration.md",
873    user_guide_crate_configuration
874);
875
876#[cfg(doctest)]
877doc_comment::doctest!(
878    "../../../docs/source/user-guide/dataframe.md",
879    user_guide_dataframe
880);
881
882#[cfg(doctest)]
883doc_comment::doctest!(
884    "../../../docs/source/user-guide/example-usage.md",
885    user_guide_example_usage
886);
887
888#[cfg(doctest)]
889doc_comment::doctest!(
890    "../../../docs/source/user-guide/explain-usage.md",
891    user_guide_explain_usage
892);
893
894#[cfg(doctest)]
895doc_comment::doctest!(
896    "../../../docs/source/user-guide/expressions.md",
897    user_guide_expressions
898);
899
900#[cfg(doctest)]
901doc_comment::doctest!("../../../docs/source/user-guide/faq.md", user_guide_faq);
902
903#[cfg(doctest)]
904doc_comment::doctest!(
905    "../../../docs/source/user-guide/introduction.md",
906    user_guide_introduction
907);
908
909#[cfg(doctest)]
910doc_comment::doctest!(
911    "../../../docs/source/user-guide/cli/datasources.md",
912    user_guide_cli_datasource
913);
914
915#[cfg(doctest)]
916doc_comment::doctest!(
917    "../../../docs/source/user-guide/cli/installation.md",
918    user_guide_cli_installation
919);
920
921#[cfg(doctest)]
922doc_comment::doctest!(
923    "../../../docs/source/user-guide/cli/overview.md",
924    user_guide_cli_overview
925);
926
927#[cfg(doctest)]
928doc_comment::doctest!(
929    "../../../docs/source/user-guide/cli/usage.md",
930    user_guide_cli_usage
931);
932
933#[cfg(doctest)]
934doc_comment::doctest!(
935    "../../../docs/source/user-guide/features.md",
936    user_guide_features
937);
938
939#[cfg(doctest)]
940doc_comment::doctest!(
941    "../../../docs/source/user-guide/sql/aggregate_functions.md",
942    user_guide_sql_aggregate_functions
943);
944
945#[cfg(doctest)]
946doc_comment::doctest!(
947    "../../../docs/source/user-guide/sql/data_types.md",
948    user_guide_sql_data_types
949);
950
951#[cfg(doctest)]
952doc_comment::doctest!(
953    "../../../docs/source/user-guide/sql/ddl.md",
954    user_guide_sql_ddl
955);
956
957#[cfg(doctest)]
958doc_comment::doctest!(
959    "../../../docs/source/user-guide/sql/dml.md",
960    user_guide_sql_dml
961);
962
963#[cfg(doctest)]
964doc_comment::doctest!(
965    "../../../docs/source/user-guide/sql/explain.md",
966    user_guide_sql_exmplain
967);
968
969#[cfg(doctest)]
970doc_comment::doctest!(
971    "../../../docs/source/user-guide/sql/information_schema.md",
972    user_guide_sql_information_schema
973);
974
975#[cfg(doctest)]
976doc_comment::doctest!(
977    "../../../docs/source/user-guide/sql/operators.md",
978    user_guide_sql_operators
979);
980
981#[cfg(doctest)]
982doc_comment::doctest!(
983    "../../../docs/source/user-guide/sql/prepared_statements.md",
984    user_guide_prepared_statements
985);
986
987#[cfg(doctest)]
988doc_comment::doctest!(
989    "../../../docs/source/user-guide/sql/scalar_functions.md",
990    user_guide_sql_scalar_functions
991);
992
993#[cfg(doctest)]
994doc_comment::doctest!(
995    "../../../docs/source/user-guide/sql/select.md",
996    user_guide_sql_select
997);
998
999#[cfg(doctest)]
1000doc_comment::doctest!(
1001    "../../../docs/source/user-guide/sql/special_functions.md",
1002    user_guide_sql_special_functions
1003);
1004
1005#[cfg(doctest)]
1006doc_comment::doctest!(
1007    "../../../docs/source/user-guide/sql/subqueries.md",
1008    user_guide_sql_subqueries
1009);
1010
1011#[cfg(doctest)]
1012doc_comment::doctest!(
1013    "../../../docs/source/user-guide/sql/window_functions.md",
1014    user_guide_sql_window_functions
1015);
1016
1017#[cfg(doctest)]
1018doc_comment::doctest!(
1019    "../../../docs/source/user-guide/sql/write_options.md",
1020    user_guide_sql_write_options
1021);
1022
1023#[cfg(doctest)]
1024doc_comment::doctest!(
1025    "../../../docs/source/library-user-guide/adding-udfs.md",
1026    library_user_guide_adding_udfs
1027);
1028
1029#[cfg(doctest)]
1030doc_comment::doctest!(
1031    "../../../docs/source/library-user-guide/api-health.md",
1032    library_user_guide_api_health
1033);
1034
1035#[cfg(doctest)]
1036doc_comment::doctest!(
1037    "../../../docs/source/library-user-guide/building-logical-plans.md",
1038    library_user_guide_building_logical_plans
1039);
1040
1041#[cfg(doctest)]
1042doc_comment::doctest!(
1043    "../../../docs/source/library-user-guide/catalogs.md",
1044    library_user_guide_catalogs
1045);
1046
1047#[cfg(doctest)]
1048doc_comment::doctest!(
1049    "../../../docs/source/library-user-guide/custom-table-providers.md",
1050    library_user_guide_custom_table_providers
1051);
1052
1053#[cfg(doctest)]
1054doc_comment::doctest!(
1055    "../../../docs/source/library-user-guide/extending-operators.md",
1056    library_user_guide_extending_operators
1057);
1058
1059#[cfg(doctest)]
1060doc_comment::doctest!(
1061    "../../../docs/source/library-user-guide/extensions.md",
1062    library_user_guide_extensions
1063);
1064
1065#[cfg(doctest)]
1066doc_comment::doctest!(
1067    "../../../docs/source/library-user-guide/index.md",
1068    library_user_guide_index
1069);
1070
1071#[cfg(doctest)]
1072doc_comment::doctest!(
1073    "../../../docs/source/library-user-guide/profiling.md",
1074    library_user_guide_profiling
1075);
1076
1077#[cfg(doctest)]
1078doc_comment::doctest!(
1079    "../../../docs/source/library-user-guide/query-optimizer.md",
1080    library_user_guide_query_optimizer
1081);
1082
1083#[cfg(doctest)]
1084doc_comment::doctest!(
1085    "../../../docs/source/library-user-guide/using-the-dataframe-api.md",
1086    library_user_guide_dataframe_api
1087);
1088
1089#[cfg(doctest)]
1090doc_comment::doctest!(
1091    "../../../docs/source/library-user-guide/using-the-sql-api.md",
1092    library_user_guide_sql_api
1093);
1094
1095#[cfg(doctest)]
1096doc_comment::doctest!(
1097    "../../../docs/source/library-user-guide/working-with-exprs.md",
1098    library_user_guide_working_with_exprs
1099);