Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 86 additions & 15 deletions backends-velox/benchmark/ColumnarTableCacheBenchmark-results.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,94 @@
OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
Apple M1 Pro
table cache count: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/parquet table cache count: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 16773 17024 401 1.2 838.7 1.0X
enable columnar table cache 9985 10051 65 2.0 499.3 1.0X
disable columnar table cache 17377 18043 1147 0.6 1737.7 1.0X
enable columnar table cache 2975 3304 361 3.4 297.5 1.0X


OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
Apple M1 Pro
table cache column pruning: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/parquet table cache column pruning: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 16429 16873 688 1.2 821.5 1.0X
enable columnar table cache 15118 15495 456 1.3 755.9 1.0X
disable columnar table cache 20768 20972 307 0.5 2076.8 1.0X
enable columnar table cache 3778 3988 297 2.6 377.8 1.0X


OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
Apple M1 Pro
table cache filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/parquet table cache filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 22895 23527 722 0.9 1144.7 1.0X
enable columnar table cache 16673 17462 765 1.2 833.7 1.0X
disable columnar table cache 22681 22850 248 0.4 2268.1 1.0X
enable columnar table cache 4242 4353 98 2.4 424.2 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/csv table cache count: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 40502 40834 301 0.2 4050.2 1.0X
enable columnar table cache 30146 30620 471 0.3 3014.6 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/csv table cache column pruning: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 42245 42431 165 0.2 4224.5 1.0X
enable columnar table cache 30667 31005 395 0.3 3066.7 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/csv table cache filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 43929 44071 127 0.2 4392.9 1.0X
enable columnar table cache 31077 31505 376 0.3 3107.7 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/json table cache count: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 44659 44947 274 0.2 4465.9 1.0X
enable columnar table cache 28467 28798 502 0.4 2846.7 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/json table cache column pruning: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 46961 47499 465 0.2 4696.1 1.0X
enable columnar table cache 29230 29620 427 0.3 2923.0 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
numeric/json table cache filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 49106 49767 638 0.2 4910.6 1.0X
enable columnar table cache 29061 29906 742 0.3 2906.1 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
wide-string/parquet table cache count: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 40888 41183 267 0.0 20444.2 1.0X
enable columnar table cache 11863 16043 NaN 0.2 5931.7 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
wide-string/csv table cache count: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 82433 83101 849 0.0 41216.5 1.0X
enable columnar table cache 86708 86768 58 0.0 43353.8 1.0X


OpenJDK 64-Bit Server VM 17.0.18+8-Ubuntu-124.04.1 on Linux 6.6.87.2-microsoft-standard-WSL2
Intel(R) Xeon(R) Platinum 8473C
wide-string/json table cache count: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 70729 74004 NaN 0.0 35364.4 1.0X
enable columnar table cache 54856 56680 1607 0.0 27428.2 1.0X
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,66 @@ package org.apache.spark.sql.execution.benchmark
import org.apache.gluten.config.GlutenConfig

import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.{col, repeat}
import org.apache.spark.storage.StorageLevel

import java.io.File

/**
* Benchmark to measure performance for columnar table cache. To run this benchmark:
* Benchmark to measure performance for columnar table cache across source formats and schema
* shapes. To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
* }}}
*
* Matrix:
* - sources : parquet (velox-native columnar), csv, json (row-based fallback per GLUTEN-3456)
* - shapes : numeric (5 cols mixed), wide-string (16 x ~200 char) -- the GLUTEN-3488 hazard
* - cases : count / column pruning / filter for numeric; count only for wide-string
*/
object ColumnarTableCacheBenchmark extends SqlBasedBenchmark {
private val numRows = 20L * 1000 * 1000
private val numRows = 10L * 1000 * 1000
private val wideStringRows = 2L * 1000 * 1000
private val wideStringCols = 16
private val wideStringLen = 200

private val sources = Seq("parquet", "csv", "json")

private def numericDf(): DataFrame = {
spark
.range(numRows)
.selectExpr(
"cast(id as int) as c0",
"cast(id as double) as c1",
"id as c2",
"cast(id as string) as c3",
"uuid() as c4")
}

private def wideStringDf(): DataFrame = {
val cols = (0 until wideStringCols).map(
i =>
repeat(col("id").cast("string"), wideStringLen / 8 + 1).as(s"c$i"))
spark.range(wideStringRows).select(cols: _*)
}

private def writeSource(df: DataFrame, source: String, dir: File): String = {
val path = new File(dir, source).getCanonicalPath
val writer = df.write.format(source)
val w = if (source == "csv") writer.option("header", "true") else writer
w.save(path)
path
}

private def readSource(source: String, path: String): DataFrame = {
val reader = spark.read.format(source)
val r = if (source == "csv") {
reader.option("header", "true").option("inferSchema", "true")
} else reader
r.load(path)
}

private def doBenchmark(name: String, cardinality: Long)(f: => Unit): Unit = {
val benchmark = new Benchmark(name, cardinality, output = output)
Expand All @@ -50,38 +99,43 @@ object ColumnarTableCacheBenchmark extends SqlBasedBenchmark {
override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
withTempPath {
f =>
spark
.range(numRows)
.selectExpr(
"cast(id as int) as c0",
"cast(id as double) as c1",
"id as c2",
"cast(id as string) as c3",
"uuid() as c4")
.write
.parquet(f.getCanonicalPath)
// --- numeric workload: write once per source, run 3 cases ---
val numericPaths =
sources.map(src => src -> writeSource(numericDf(), src, new File(f, "numeric"))).toMap

doBenchmark("table cache count", numRows) {
spark.read.parquet(f.getCanonicalPath).persist(StorageLevel.MEMORY_ONLY).count()
spark.catalog.clearCache()
sources.foreach {
src =>
val path = numericPaths(src)
doBenchmark(s"numeric/$src table cache count", numRows) {
readSource(src, path).persist(StorageLevel.MEMORY_ONLY).count()
spark.catalog.clearCache()
}
doBenchmark(s"numeric/$src table cache column pruning", numRows) {
val cached = readSource(src, path).persist(StorageLevel.MEMORY_ONLY)
cached.select("c1", "c2").noop()
cached.select("c0", "c3").noop()
spark.catalog.clearCache()
}
doBenchmark(s"numeric/$src table cache filter", numRows) {
val cached = readSource(src, path).persist(StorageLevel.MEMORY_ONLY)
cached.where("c1 % 100 > 10").noop()
cached.where("c1 % 100 > 20").noop()
spark.catalog.clearCache()
}
}

doBenchmark("table cache column pruning", numRows) {
val cached = spark.read
.parquet(f.getCanonicalPath)
.persist(StorageLevel.MEMORY_ONLY)
cached.select("c1", "c2").noop()
cached.select("c0", "c3").noop()
spark.catalog.clearCache()
}
// --- wide-string workload: GLUTEN-3488 hazard reproducer, count only ---
val wsPaths = sources.map {
src => src -> writeSource(wideStringDf(), src, new File(f, "widestring"))
}.toMap

doBenchmark("table cache filter", numRows) {
val cached = spark.read
.parquet(f.getCanonicalPath)
.persist(StorageLevel.MEMORY_ONLY)
cached.where("c1 % 100 > 10").noop()
cached.where("c1 % 100 > 20").noop()
spark.catalog.clearCache()
sources.foreach {
src =>
val path = wsPaths(src)
doBenchmark(s"wide-string/$src table cache count", wideStringRows) {
readSource(src, path).persist(StorageLevel.MEMORY_ONLY).count()
spark.catalog.clearCache()
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,7 @@ object GlutenConfig extends ConfigRegistry {
buildStaticConf("spark.gluten.sql.columnar.tableCache")
.doc("Enable or disable columnar table cache.")
.booleanConf
.createWithDefault(false)
.createWithDefault(true)

val COLUMNAR_TABLE_CACHE_PARTITION_STATS_ENABLED =
buildConf("spark.gluten.sql.columnar.tableCache.partitionStats.enabled")
Expand Down
Loading