Conversation
Benchmarks: PolarSignals ProfilingSummary
Detailed Results Table
|
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨Benchmark |
Polar Signals Profiling ResultsLatest Run
Previous Runs (2)
Powered by Polar Signals Cloud |
Benchmarks: Random AccessSummary
|
| if: matrix.remote_storage == null || github.event.pull_request.head.repo.fork == true | ||
| shell: bash | ||
| env: | ||
| VORTEX_USE_SCAN_API: "1" |
There was a problem hiding this comment.
this should be 0 or remove the old one?
| loop { | ||
| // Try to pull from the current child's split stream. | ||
| if let Some(ref mut child_stream) = current_stream { | ||
| match child_stream.next().await { | ||
| Some(Ok(split)) => { | ||
| if let Some(ref mut s) = state | ||
| && let Some(ref mut limit) = s.remaining_limit | ||
| { | ||
| let est = split.row_count_estimate(); | ||
| *limit = limit.saturating_sub(est.upper.unwrap_or(est.lower)); | ||
| } | ||
| return Some((Ok(split), (state, current_stream))); | ||
| } | ||
| Some(Err(e)) => { | ||
| return Some((Err(e), (None, None))); | ||
| } | ||
| None => { | ||
| // Current child exhausted, move to next. | ||
| drop(current_stream.take()); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| let s = state.as_mut()?; | ||
|
|
||
| if s.remaining_limit.is_some_and(|l| l == 0) { | ||
| return None; | ||
| } | ||
|
|
||
| // Get the next data source. | ||
| let source = match s.next_source().await { | ||
| Ok(Some(source)) => source, | ||
| Ok(None) => return None, | ||
| Err(e) => return Some((Err(e), (None, None))), | ||
| }; | ||
|
|
||
| if source.dtype() != &s.dtype { | ||
| return Some(( | ||
| Err(vortex_err!( | ||
| "MultiDataSource dtype mismatch: expected {}, got {}", | ||
| s.dtype, | ||
| source.dtype() | ||
| )), | ||
| (None, None), | ||
| )); | ||
| } | ||
|
|
||
| let mut child_request = s.request.clone(); | ||
| child_request.limit = s.remaining_limit; | ||
| let child_scan = match source.scan(child_request) { | ||
| Ok(scan) => scan, | ||
| Err(e) => return Some((Err(e), (None, None))), | ||
| }; | ||
|
|
||
| current_stream = Some(child_scan.splits()); | ||
| } |
There was a problem hiding this comment.
This can be extracted into a helper method that takes &mut current_stream, then you can do:
impl DataSourceScan for MultiDataSourceScan {
fn splits(self: Box<Self>) -> SplitStream {
stream::unfold(
(*self, None::<SplitStream>),
|(mut scan, mut current_stream)| async move {
let result = scan.next_split(&mut current_stream).await?;
Some((result, (scan, current_stream)))
},
)
.boxed()
}
}Doesn't seem like you need it to be Some(*self)
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Signed-off-by: Nicholas Gates <nick@nickgates.com>
Experiment to use the Scan API from DuckDB and DataFusion integrations.