use datafusion::execution::SessionStateDefaults;
use datafusion_common::{not_impl_err, HashSet, Result};
use datafusion_expr::{
aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF,
DocSection, Documentation, ScalarUDF, WindowUDF,
};
use itertools::Itertools;
use std::env::args;
use std::fmt::Write as _;
fn main() -> Result<()> {
let args: Vec<String> = args().collect();
if args.len() != 2 {
panic!(
"Usage: {} type (one of 'aggregate', 'scalar', 'window')",
args[0]
);
}
let function_type = args[1].trim().to_lowercase();
let docs = match function_type.as_str() {
"aggregate" => print_aggregate_docs(),
"scalar" => print_scalar_docs(),
"window" => print_window_docs(),
_ => {
panic!("Unknown function type: {function_type}")
}
}?;
println!("{docs}");
Ok(())
}
fn print_aggregate_docs() -> Result<String> {
let mut providers: Vec<Box<dyn DocProvider>> = vec![];
for f in SessionStateDefaults::default_aggregate_functions() {
providers.push(Box::new(f.as_ref().clone()));
}
print_docs(providers, aggregate_doc_sections::doc_sections())
}
fn print_scalar_docs() -> Result<String> {
let mut providers: Vec<Box<dyn DocProvider>> = vec![];
for f in SessionStateDefaults::default_scalar_functions() {
providers.push(Box::new(f.as_ref().clone()));
}
print_docs(providers, scalar_doc_sections::doc_sections())
}
fn print_window_docs() -> Result<String> {
let mut providers: Vec<Box<dyn DocProvider>> = vec![];
for f in SessionStateDefaults::default_window_functions() {
providers.push(Box::new(f.as_ref().clone()));
}
print_docs(providers, window_doc_sections::doc_sections())
}
#[allow(dead_code)]
fn save_doc_code_text(documentation: &Documentation, name: &str) {
let attr_text = documentation.to_doc_attribute();
let file_path = format!("{name}.txt");
if std::path::Path::new(&file_path).exists() {
std::fs::remove_file(&file_path).unwrap();
}
let mut file = std::fs::OpenOptions::new()
.append(true) .create(true) .open(file_path)
.unwrap();
use std::io::Write;
file.write_all(attr_text.as_bytes()).unwrap();
}
fn print_docs(
providers: Vec<Box<dyn DocProvider>>,
doc_sections: Vec<DocSection>,
) -> Result<String> {
let mut docs = "".to_string();
let mut providers_with_no_docs = HashSet::new();
for doc_section in doc_sections {
if !&providers.iter().any(|f| {
if let Some(documentation) = f.get_documentation() {
documentation.doc_section == doc_section
} else {
false
}
}) {
continue;
}
let providers: Vec<&Box<dyn DocProvider>> = providers
.iter()
.filter(|&f| {
if let Some(documentation) = f.get_documentation() {
documentation.doc_section == doc_section
} else {
providers_with_no_docs.insert(f.get_name());
false
}
})
.collect::<Vec<_>>();
let _ = writeln!(docs, "\n## {} \n", doc_section.label);
if let Some(description) = doc_section.description {
let _ = writeln!(docs, "{description}");
}
let names = get_names_and_aliases(&providers);
names.iter().for_each(|name| {
let _ = writeln!(docs, "- [{name}](#{name})");
});
for name in names {
let f = providers
.iter()
.find(|f| f.get_name() == name || f.get_aliases().contains(&name))
.unwrap();
let aliases = f.get_aliases();
let documentation = f.get_documentation();
if aliases.contains(&name) {
let fname = f.get_name();
let _ = writeln!(docs, r#"### `{name}`"#);
let _ = writeln!(docs, "_Alias of [{fname}](#{fname})._");
continue;
}
let Some(documentation) = documentation else {
unreachable!()
};
let _ = write!(
docs,
r#"
### `{}`
{}
```sql
{}
```
"#,
name, documentation.description, documentation.syntax_example
);
if let Some(args) = &documentation.arguments {
let _ = writeln!(docs, "#### Arguments\n");
for (arg_name, arg_desc) in args {
let _ = writeln!(docs, "- **{arg_name}**: {arg_desc}");
}
}
if let Some(example) = &documentation.sql_example {
let _ = writeln!(
docs,
r#"
#### Example
{example}
"#
);
}
if let Some(alt_syntax) = &documentation.alternative_syntax {
let _ = writeln!(docs, "#### Alternative Syntax\n");
for syntax in alt_syntax {
let _ = writeln!(docs, "```sql\n{syntax}\n```");
}
}
if !f.get_aliases().is_empty() {
let _ = writeln!(docs, "#### Aliases");
for alias in f.get_aliases() {
let _ = writeln!(docs, "- {}", alias.replace("_", r#"\_"#));
}
}
if let Some(related_udfs) = &documentation.related_udfs {
let _ = writeln!(docs, "\n**Related functions**:");
for related in related_udfs {
let _ = writeln!(docs, "- [{related}](#{related})");
}
}
}
}
if !providers_with_no_docs.is_empty() {
eprintln!("INFO: The following functions do not have documentation:");
for f in &providers_with_no_docs {
eprintln!(" - {f}");
}
not_impl_err!("Some functions do not have documentation. Please implement `documentation` for: {providers_with_no_docs:?}")
} else {
Ok(docs)
}
}
trait DocProvider {
fn get_name(&self) -> String;
fn get_aliases(&self) -> Vec<String>;
fn get_documentation(&self) -> Option<&Documentation>;
}
impl DocProvider for AggregateUDF {
fn get_name(&self) -> String {
self.name().to_string()
}
fn get_aliases(&self) -> Vec<String> {
self.aliases().iter().map(|a| a.to_string()).collect()
}
fn get_documentation(&self) -> Option<&Documentation> {
self.documentation()
}
}
impl DocProvider for ScalarUDF {
fn get_name(&self) -> String {
self.name().to_string()
}
fn get_aliases(&self) -> Vec<String> {
self.aliases().iter().map(|a| a.to_string()).collect()
}
fn get_documentation(&self) -> Option<&Documentation> {
self.documentation()
}
}
impl DocProvider for WindowUDF {
fn get_name(&self) -> String {
self.name().to_string()
}
fn get_aliases(&self) -> Vec<String> {
self.aliases().iter().map(|a| a.to_string()).collect()
}
fn get_documentation(&self) -> Option<&Documentation> {
self.documentation()
}
}
#[allow(clippy::borrowed_box)]
#[allow(clippy::ptr_arg)]
fn get_names_and_aliases(functions: &Vec<&Box<dyn DocProvider>>) -> Vec<String> {
functions
.iter()
.flat_map(|f| {
if f.get_aliases().is_empty() {
vec![f.get_name().to_string()]
} else {
let mut names = vec![f.get_name().to_string()];
names.extend(f.get_aliases().iter().cloned());
names
}
})
.sorted()
.collect_vec()
}