Struct datafusion::execution::context::SessionContext
source · pub struct SessionContext {
pub session_start_time: DateTime<Utc>,
pub state: Arc<RwLock<SessionState>>,
/* private fields */
}
Expand description
SessionContext is the main interface for executing queries with DataFusion. It stands for the connection between user and DataFusion/Ballista cluster. The context provides the following functionality
- Create DataFrame from a CSV or Parquet data source.
- Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
- Register a custom data source that can be referenced from a SQL query.
- Execution a SQL query
The following example demonstrates how to use the context to execute a query against a CSV data source using the DataFrame API:
use datafusion::prelude::*;
let ctx = SessionContext::new();
let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new()).await?;
let df = df.filter(col("a").lt_eq(col("b")))?
.aggregate(vec![col("a")], vec![min(col("b"))])?
.limit(0, Some(100))?;
let results = df.collect();
The following example demonstrates how to execute the same query using SQL:
use datafusion::prelude::*;
let mut ctx = SessionContext::new();
ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new()).await?;
let results = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100").await?;
Fields§
§session_start_time: DateTime<Utc>
Session start time
state: Arc<RwLock<SessionState>>
Shared session state for the session
Implementations§
source§impl SessionContext
impl SessionContext
sourcepub async fn refresh_catalogs(&self) -> Result<()>
pub async fn refresh_catalogs(&self) -> Result<()>
Finds any ListSchemaProviders and instructs them to reload tables from “disk”
sourcepub fn with_config(config: SessionConfig) -> Self
pub fn with_config(config: SessionConfig) -> Self
Creates a new session context using the provided session configuration.
sourcepub fn with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self
pub fn with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self
Creates a new session context using the provided configuration and RuntimeEnv.
sourcepub fn with_state(state: SessionState) -> Self
pub fn with_state(state: SessionState) -> Self
Creates a new session context using the provided session state.
sourcepub fn register_batch(
&self,
table_name: &str,
batch: RecordBatch
) -> Result<Option<Arc<dyn TableProvider>>>
pub fn register_batch(
&self,
table_name: &str,
batch: RecordBatch
) -> Result<Option<Arc<dyn TableProvider>>>
Registers the [RecordBatch
] as the specified table name
sourcepub fn runtime_env(&self) -> Arc<RuntimeEnv>
pub fn runtime_env(&self) -> Arc<RuntimeEnv>
Return the RuntimeEnv used to run queries with this SessionContext
sourcepub fn config_options(&self) -> Arc<RwLock<ConfigOptions>>
pub fn config_options(&self) -> Arc<RwLock<ConfigOptions>>
Return a handle to the shared configuration options
sourcepub fn session_id(&self) -> String
pub fn session_id(&self) -> String
Return the session_id of this Session
sourcepub fn copied_config(&self) -> SessionConfig
pub fn copied_config(&self) -> SessionConfig
Return a copied version of config for this Session
sourcepub async fn sql(&self, sql: &str) -> Result<Arc<DataFrame>>
pub async fn sql(&self, sql: &str) -> Result<Arc<DataFrame>>
Creates a DataFrame
that will execute a SQL query.
This method is async
because queries of type CREATE EXTERNAL TABLE
might require the schema to be inferred.
sourcepub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan>
pub fn create_logical_plan(&self, sql: &str) -> Result<LogicalPlan>
Creates a logical plan.
This function is intended for internal use and should not be called directly.
sourcepub fn register_variable(
&mut self,
variable_type: VarType,
provider: Arc<dyn VarProvider + Send + Sync>
)
pub fn register_variable(
&mut self,
variable_type: VarType,
provider: Arc<dyn VarProvider + Send + Sync>
)
Registers a variable provider within this context.
sourcepub fn register_udf(&mut self, f: ScalarUDF)
pub fn register_udf(&mut self, f: ScalarUDF)
Registers a scalar UDF within this context.
Note in SQL queries, function names are looked up using lowercase unless the query uses quotes. For example,
SELECT MY_FUNC(x)...
will look for a function named "my_func"
SELECT "my_FUNC"(x)
will look for a function named "my_FUNC"
sourcepub fn register_udaf(&mut self, f: AggregateUDF)
pub fn register_udaf(&mut self, f: AggregateUDF)
Registers an aggregate UDF within this context.
Note in SQL queries, aggregate names are looked up using lowercase unless the query uses quotes. For example,
SELECT MY_UDAF(x)...
will look for an aggregate named "my_udaf"
SELECT "my_UDAF"(x)
will look for an aggregate named "my_UDAF"
sourcepub async fn read_avro(
&self,
table_path: impl AsRef<str>,
options: AvroReadOptions<'_>
) -> Result<Arc<DataFrame>>
pub async fn read_avro(
&self,
table_path: impl AsRef<str>,
options: AvroReadOptions<'_>
) -> Result<Arc<DataFrame>>
Creates a DataFrame
for reading an Avro data source.
sourcepub async fn read_json(
&mut self,
table_path: impl AsRef<str>,
options: NdJsonReadOptions<'_>
) -> Result<Arc<DataFrame>>
pub async fn read_json(
&mut self,
table_path: impl AsRef<str>,
options: NdJsonReadOptions<'_>
) -> Result<Arc<DataFrame>>
Creates a DataFrame
for reading an Json data source.
sourcepub fn read_empty(&self) -> Result<Arc<DataFrame>>
pub fn read_empty(&self) -> Result<Arc<DataFrame>>
Creates an empty DataFrame.
sourcepub async fn read_csv(
&self,
table_path: impl AsRef<str>,
options: CsvReadOptions<'_>
) -> Result<Arc<DataFrame>>
pub async fn read_csv(
&self,
table_path: impl AsRef<str>,
options: CsvReadOptions<'_>
) -> Result<Arc<DataFrame>>
Creates a DataFrame
for reading a CSV data source.
sourcepub async fn read_parquet(
&self,
table_path: impl AsRef<str>,
options: ParquetReadOptions<'_>
) -> Result<Arc<DataFrame>>
pub async fn read_parquet(
&self,
table_path: impl AsRef<str>,
options: ParquetReadOptions<'_>
) -> Result<Arc<DataFrame>>
Creates a DataFrame
for reading a Parquet data source.
sourcepub fn read_table(
&self,
provider: Arc<dyn TableProvider>
) -> Result<Arc<DataFrame>>
pub fn read_table(
&self,
provider: Arc<dyn TableProvider>
) -> Result<Arc<DataFrame>>
Creates a DataFrame
for reading a custom TableProvider
.
sourcepub fn read_batch(&self, batch: RecordBatch) -> Result<Arc<DataFrame>>
pub fn read_batch(&self, batch: RecordBatch) -> Result<Arc<DataFrame>>
Creates a DataFrame
for reading a [RecordBatch
]
sourcepub async fn register_listing_table(
&self,
name: &str,
table_path: impl AsRef<str>,
options: ListingOptions,
provided_schema: Option<SchemaRef>,
sql_definition: Option<String>
) -> Result<()>
pub async fn register_listing_table(
&self,
name: &str,
table_path: impl AsRef<str>,
options: ListingOptions,
provided_schema: Option<SchemaRef>,
sql_definition: Option<String>
) -> Result<()>
Registers a [ListingTable]
that can assemble multiple files
from locations in an ObjectStore
instance into a single
table.
This method is async
because it might need to resolve the schema.
sourcepub async fn register_csv(
&self,
name: &str,
table_path: &str,
options: CsvReadOptions<'_>
) -> Result<()>
pub async fn register_csv(
&self,
name: &str,
table_path: &str,
options: CsvReadOptions<'_>
) -> Result<()>
Registers a CSV file as a table which can referenced from SQL statements executed against this context.
sourcepub async fn register_json(
&self,
name: &str,
table_path: &str,
options: NdJsonReadOptions<'_>
) -> Result<()>
pub async fn register_json(
&self,
name: &str,
table_path: &str,
options: NdJsonReadOptions<'_>
) -> Result<()>
Registers a Json file as a table that it can be referenced from SQL statements executed against this context.
sourcepub async fn register_parquet(
&self,
name: &str,
table_path: &str,
options: ParquetReadOptions<'_>
) -> Result<()>
pub async fn register_parquet(
&self,
name: &str,
table_path: &str,
options: ParquetReadOptions<'_>
) -> Result<()>
Registers a Parquet file as a table that can be referenced from SQL statements executed against this context.
sourcepub async fn register_avro(
&self,
name: &str,
table_path: &str,
options: AvroReadOptions<'_>
) -> Result<()>
pub async fn register_avro(
&self,
name: &str,
table_path: &str,
options: AvroReadOptions<'_>
) -> Result<()>
Registers an Avro file as a table that can be referenced from SQL statements executed against this context.
sourcepub fn register_catalog(
&self,
name: impl Into<String>,
catalog: Arc<dyn CatalogProvider>
) -> Option<Arc<dyn CatalogProvider>>
pub fn register_catalog(
&self,
name: impl Into<String>,
catalog: Arc<dyn CatalogProvider>
) -> Option<Arc<dyn CatalogProvider>>
Registers a named catalog using a custom CatalogProvider
so that
it can be referenced from SQL statements executed against this
context.
Returns the CatalogProvider
previously registered for this
name, if any
sourcepub fn catalog_names(&self) -> Vec<String> ⓘ
pub fn catalog_names(&self) -> Vec<String> ⓘ
Retrieves the list of available catalog names.
sourcepub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>
pub fn catalog(&self, name: &str) -> Option<Arc<dyn CatalogProvider>>
Retrieves a CatalogProvider
instance by name
sourcepub fn register_table<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>,
provider: Arc<dyn TableProvider>
) -> Result<Option<Arc<dyn TableProvider>>>
pub fn register_table<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>,
provider: Arc<dyn TableProvider>
) -> Result<Option<Arc<dyn TableProvider>>>
Registers a TableProvider
as a table that can be
referenced from SQL statements executed against this context.
Returns the TableProvider
previously registered for this
reference, if any
sourcepub fn deregister_table<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Option<Arc<dyn TableProvider>>>
pub fn deregister_table<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Option<Arc<dyn TableProvider>>>
Deregisters the given table.
Returns the registered provider, if any
sourcepub fn table_exist<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>
) -> Result<bool>
pub fn table_exist<'a>(
&'a self,
table_ref: impl Into<TableReference<'a>>
) -> Result<bool>
Return true if the specified table exists in the schema provider.
sourcepub fn table<'a>(
&self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Arc<DataFrame>>
pub fn table<'a>(
&self,
table_ref: impl Into<TableReference<'a>>
) -> Result<Arc<DataFrame>>
Retrieves a DataFrame
representing a table previously
registered by calling the register_table
function.
Returns an error if no table has been registered with the provided reference.
sourcepub fn tables(&self) -> Result<HashSet<String>>
👎Deprecated: Please use the catalog provider interface (SessionContext::catalog
) to examine available catalogs, schemas, and tables
pub fn tables(&self) -> Result<HashSet<String>>
SessionContext::catalog
) to examine available catalogs, schemas, and tablesReturns the set of available tables in the default catalog and schema.
Use table
to get a specific table.
sourcepub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>
pub fn optimize(&self, plan: &LogicalPlan) -> Result<LogicalPlan>
Optimizes the logical plan by applying optimizer rules.
sourcepub async fn create_physical_plan(
&self,
logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
pub async fn create_physical_plan(
&self,
logical_plan: &LogicalPlan
) -> Result<Arc<dyn ExecutionPlan>>
Creates a physical plan from a logical plan.
sourcepub async fn write_csv(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
pub async fn write_csv(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
Executes a query and writes the results to a partitioned CSV file.
sourcepub async fn write_json(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
pub async fn write_json(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>
) -> Result<()>
Executes a query and writes the results to a partitioned JSON file.
sourcepub async fn write_parquet(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>,
writer_properties: Option<WriterProperties>
) -> Result<()>
pub async fn write_parquet(
&self,
plan: Arc<dyn ExecutionPlan>,
path: impl AsRef<str>,
writer_properties: Option<WriterProperties>
) -> Result<()>
Executes a query and writes the results to a partitioned Parquet file.
sourcepub fn task_ctx(&self) -> Arc<TaskContext>
pub fn task_ctx(&self) -> Arc<TaskContext>
Get a new TaskContext to run in this session
sourcepub fn state(&self) -> SessionState
pub fn state(&self) -> SessionState
Get a copy of the SessionState
of this SessionContext
Trait Implementations§
source§impl Clone for SessionContext
impl Clone for SessionContext
source§fn clone(&self) -> SessionContext
fn clone(&self) -> SessionContext
1.0.0 · source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read moresource§impl Default for SessionContext
impl Default for SessionContext
source§impl From<&SessionContext> for TaskContext
impl From<&SessionContext> for TaskContext
Create a new task context instance from SessionContext