Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ regex = "1.12"
rstest = "0.26.1"
serde_json = "1"
sqlparser = { version = "0.59.0", default-features = false, features = ["std", "visitor"] }
strum = "0.27.2"
strum_macros = "0.27.2"
tempfile = "3"
testcontainers = { version = "0.25.2", features = ["default"] }
testcontainers-modules = { version = "0.13" }
Expand Down
2 changes: 2 additions & 0 deletions datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ object_store = { workspace = true, features = ["aws", "http"] }
prost = { workspace = true }
rand = { workspace = true }
serde_json = { workspace = true }
strum = { workspace = true }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are not net new workspace dependencies (see Cargo.lock file) -- we already have them in the workspace. So I think it is ok to add them here

strum_macros = { workspace = true }
tempfile = { workspace = true }
test-utils = { path = "../test-utils" }
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
Expand Down
64 changes: 14 additions & 50 deletions datafusion-examples/examples/builtin_functions/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,67 +34,31 @@ mod date_time;
mod function_factory;
mod regexp;

use std::str::FromStr;

use datafusion::error::{DataFusionError, Result};
use strum::{IntoEnumIterator, VariantNames};
use strum_macros::{Display, EnumIter, EnumString, VariantNames};

#[derive(EnumIter, EnumString, Display, VariantNames)]
#[strum(serialize_all = "snake_case")]
enum ExampleKind {
All,
DateTime,
FunctionFactory,
Regexp,
}

impl AsRef<str> for ExampleKind {
fn as_ref(&self) -> &str {
match self {
Self::All => "all",
Self::DateTime => "date_time",
Self::FunctionFactory => "function_factory",
Self::Regexp => "regexp",
}
}
}

impl FromStr for ExampleKind {
type Err = DataFusionError;

fn from_str(s: &str) -> Result<Self> {
match s {
"all" => Ok(Self::All),
"date_time" => Ok(Self::DateTime),
"function_factory" => Ok(Self::FunctionFactory),
"regexp" => Ok(Self::Regexp),
_ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
}
}
}

impl ExampleKind {
const ALL_VARIANTS: [Self; 4] = [
Self::All,
Self::DateTime,
Self::FunctionFactory,
Self::Regexp,
];

const RUNNABLE_VARIANTS: [Self; 3] =
[Self::DateTime, Self::FunctionFactory, Self::Regexp];

const EXAMPLE_NAME: &str = "builtin_functions";

fn variants() -> Vec<&'static str> {
Self::ALL_VARIANTS
.iter()
.map(|example| example.as_ref())
.collect()
fn runnable() -> impl Iterator<Item = ExampleKind> {
ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
}

async fn run(&self) -> Result<()> {
match self {
ExampleKind::All => {
for example in ExampleKind::RUNNABLE_VARIANTS {
println!("Running example: {}", example.as_ref());
for example in ExampleKind::runnable() {
println!("Running example: {example}");
Box::pin(example.run()).await?;
}
}
Expand All @@ -111,14 +75,14 @@ async fn main() -> Result<()> {
let usage = format!(
"Usage: cargo run --example {} -- [{}]",
ExampleKind::EXAMPLE_NAME,
ExampleKind::variants().join("|")
ExampleKind::VARIANTS.join("|")
);

let arg = std::env::args().nth(1).ok_or_else(|| {
eprintln!("{usage}");
DataFusionError::Execution("Missing argument".to_string())
})?;
let example: ExampleKind = std::env::args()
.nth(1)
.ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
.parse()
.map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;

let example = arg.parse::<ExampleKind>()?;
example.run().await
}
83 changes: 14 additions & 69 deletions datafusion-examples/examples/custom_data_source/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ mod custom_file_format;
mod default_column_values;
mod file_stream_provider;

use std::str::FromStr;

use datafusion::error::{DataFusionError, Result};
use strum::{IntoEnumIterator, VariantNames};
use strum_macros::{Display, EnumIter, EnumString, VariantNames};

#[derive(EnumIter, EnumString, Display, VariantNames)]
#[strum(serialize_all = "snake_case")]
enum ExampleKind {
All,
CsvJsonOpener,
Expand All @@ -57,75 +59,18 @@ enum ExampleKind {
FileStreamProvider,
}

impl AsRef<str> for ExampleKind {
fn as_ref(&self) -> &str {
match self {
Self::All => "all",
Self::CsvJsonOpener => "csv_json_opener",
Self::CsvSqlStreaming => "csv_sql_streaming",
Self::CustomDatasource => "custom_datasource",
Self::CustomFileCasts => "custom_file_casts",
Self::CustomFileFormat => "custom_file_format",
Self::DefaultColumnValues => "default_column_values",
Self::FileStreamProvider => "file_stream_provider",
}
}
}

impl FromStr for ExampleKind {
type Err = DataFusionError;

fn from_str(s: &str) -> Result<Self> {
match s {
"all" => Ok(Self::All),
"csv_json_opener" => Ok(Self::CsvJsonOpener),
"csv_sql_streaming" => Ok(Self::CsvSqlStreaming),
"custom_datasource" => Ok(Self::CustomDatasource),
"custom_file_casts" => Ok(Self::CustomFileCasts),
"custom_file_format" => Ok(Self::CustomFileFormat),
"default_column_values" => Ok(Self::DefaultColumnValues),
"file_stream_provider" => Ok(Self::FileStreamProvider),
_ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
}
}
}

impl ExampleKind {
const ALL_VARIANTS: [Self; 8] = [
Self::All,
Self::CsvJsonOpener,
Self::CsvSqlStreaming,
Self::CustomDatasource,
Self::CustomFileCasts,
Self::CustomFileFormat,
Self::DefaultColumnValues,
Self::FileStreamProvider,
];

const RUNNABLE_VARIANTS: [Self; 7] = [
Self::CsvJsonOpener,
Self::CsvSqlStreaming,
Self::CustomDatasource,
Self::CustomFileCasts,
Self::CustomFileFormat,
Self::DefaultColumnValues,
Self::FileStreamProvider,
];

const EXAMPLE_NAME: &str = "custom_data_source";

fn variants() -> Vec<&'static str> {
Self::ALL_VARIANTS
.iter()
.map(|example| example.as_ref())
.collect()
fn runnable() -> impl Iterator<Item = ExampleKind> {
ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
}

async fn run(&self) -> Result<()> {
match self {
ExampleKind::All => {
for example in ExampleKind::RUNNABLE_VARIANTS {
println!("Running example: {}", example.as_ref());
for example in ExampleKind::runnable() {
println!("Running example: {example}");
Box::pin(example.run()).await?;
}
}
Expand Down Expand Up @@ -158,14 +103,14 @@ async fn main() -> Result<()> {
let usage = format!(
"Usage: cargo run --example {} -- [{}]",
ExampleKind::EXAMPLE_NAME,
ExampleKind::variants().join("|")
ExampleKind::VARIANTS.join("|")
);

let arg = std::env::args().nth(1).ok_or_else(|| {
eprintln!("{usage}");
DataFusionError::Execution("Missing argument".to_string())
})?;
let example: ExampleKind = std::env::args()
.nth(1)
.ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
.parse()
.map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;

let example = arg.parse::<ExampleKind>()?;
example.run().await
}
95 changes: 14 additions & 81 deletions datafusion-examples/examples/data_io/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,12 @@ mod parquet_index;
mod query_http_csv;
mod remote_catalog;

use std::str::FromStr;

use datafusion::error::{DataFusionError, Result};
use strum::{IntoEnumIterator, VariantNames};
use strum_macros::{Display, EnumIter, EnumString, VariantNames};

#[derive(EnumIter, EnumString, Display, VariantNames)]
#[strum(serialize_all = "snake_case")]
enum ExampleKind {
All,
Catalog,
Expand All @@ -66,87 +68,18 @@ enum ExampleKind {
RemoteCatalog,
}

impl AsRef<str> for ExampleKind {
fn as_ref(&self) -> &str {
match self {
Self::All => "all",
Self::Catalog => "catalog",
Self::JsonShredding => "json_shredding",
Self::ParquetAdvIdx => "parquet_adv_idx",
Self::ParquetEmbIdx => "parquet_emb_idx",
Self::ParquetEnc => "parquet_enc",
Self::ParquetEncWithKms => "parquet_enc_with_kms",
Self::ParquetExecVisitor => "parquet_exec_visitor",
Self::ParquetIdx => "parquet_idx",
Self::QueryHttpCsv => "query_http_csv",
Self::RemoteCatalog => "remote_catalog",
}
}
}

impl FromStr for ExampleKind {
type Err = DataFusionError;

fn from_str(s: &str) -> Result<Self> {
match s {
"all" => Ok(Self::All),
"catalog" => Ok(Self::Catalog),
"json_shredding" => Ok(Self::JsonShredding),
"parquet_adv_idx" => Ok(Self::ParquetAdvIdx),
"parquet_emb_idx" => Ok(Self::ParquetEmbIdx),
"parquet_enc" => Ok(Self::ParquetEnc),
"parquet_enc_with_kms" => Ok(Self::ParquetEncWithKms),
"parquet_exec_visitor" => Ok(Self::ParquetExecVisitor),
"parquet_idx" => Ok(Self::ParquetIdx),
"query_http_csv" => Ok(Self::QueryHttpCsv),
"remote_catalog" => Ok(Self::RemoteCatalog),
_ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
}
}
}

impl ExampleKind {
const ALL_VARIANTS: [Self; 11] = [
Self::All,
Self::Catalog,
Self::JsonShredding,
Self::ParquetAdvIdx,
Self::ParquetEmbIdx,
Self::ParquetEnc,
Self::ParquetEncWithKms,
Self::ParquetExecVisitor,
Self::ParquetIdx,
Self::QueryHttpCsv,
Self::RemoteCatalog,
];

const RUNNABLE_VARIANTS: [Self; 10] = [
Self::Catalog,
Self::JsonShredding,
Self::ParquetAdvIdx,
Self::ParquetEmbIdx,
Self::ParquetEnc,
Self::ParquetEncWithKms,
Self::ParquetExecVisitor,
Self::ParquetIdx,
Self::QueryHttpCsv,
Self::RemoteCatalog,
];

const EXAMPLE_NAME: &str = "data_io";

fn variants() -> Vec<&'static str> {
Self::ALL_VARIANTS
.iter()
.map(|example| example.as_ref())
.collect()
fn runnable() -> impl Iterator<Item = ExampleKind> {
ExampleKind::iter().filter(|v| !matches!(v, ExampleKind::All))
}

async fn run(&self) -> Result<()> {
match self {
ExampleKind::All => {
for example in ExampleKind::RUNNABLE_VARIANTS {
println!("Running example: {}", example.as_ref());
for example in ExampleKind::runnable() {
println!("Running example: {example}");
Box::pin(example.run()).await?;
}
}
Expand Down Expand Up @@ -178,14 +111,14 @@ async fn main() -> Result<()> {
let usage = format!(
"Usage: cargo run --example {} -- [{}]",
ExampleKind::EXAMPLE_NAME,
ExampleKind::variants().join("|")
ExampleKind::VARIANTS.join("|")
);

let arg = std::env::args().nth(1).ok_or_else(|| {
eprintln!("{usage}");
DataFusionError::Execution("Missing argument".to_string())
})?;
let example: ExampleKind = std::env::args()
.nth(1)
.ok_or_else(|| DataFusionError::Execution(format!("Missing argument. {usage}")))?
.parse()
.map_err(|_| DataFusionError::Execution(format!("Unknown example. {usage}")))?;

let example = arg.parse::<ExampleKind>()?;
example.run().await
}
Loading