Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -6027,6 +6027,12 @@
],
"sqlState" : "42836"
},
"RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE" : {
"message" : [
"Recursive file loading is not supported when the data source has explicit partition columns. Either remove the option \"recursiveFileLookup\", or read the data without supplying partition columns (for example, do not read a partitioned table)."
],
"sqlState" : "0A000"
},
"RECURSIVE_PROTOBUF_SCHEMA" : {
"message" : [
"Found recursive reference in Protobuf schema, which can not be processed by Spark by default: <fieldDescriptor>. try setting the option `recursive.fields.max.depth` 1 to 10. Going beyond 10 levels of recursion is not allowed."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3426,6 +3426,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
"newPath" -> newPath.map(toSQLId).mkString(" -> ")))
}

def recursiveFileLookupNotSupportedForPartitionedDataSourceError(): Throwable = {
new AnalysisException(
errorClass = "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE",
messageParameters = Map.empty)
}

def notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError(
viewNameParts: Seq[String],
attr: Attribute): Throwable = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.{expressions, InternalRow}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.ArrayImplicits._

Expand Down Expand Up @@ -89,8 +90,7 @@ abstract class PartitioningAwareFileIndex(
PartitionDirectory(InternalRow.empty, allFiles().toArray.filter(isNonEmptyFile))) :: Nil
} else {
if (recursiveFileLookup) {
throw new IllegalArgumentException(
"Datasource with partition do not allow recursive file loading.")
throw QueryCompilationErrors.recursiveFileLookupNotSupportedForPartitionedDataSourceError()
}
prunePartitions(partitionFilters, partitionSpec()).map {
case PartitionPath(values, path) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,29 @@ class FileBasedDataSourceSuite extends SharedSparkSession
assert(fileList.toSet === expectedFileList.toSet)
}

test("recursiveFileLookup with a partitioned catalog table is rejected") {
withTable("part_tbl") {
sql(
"""
|CREATE TABLE part_tbl (id INT, value STRING)
|USING parquet
|PARTITIONED BY (year INT)
|""".stripMargin)
sql("INSERT INTO part_tbl PARTITION (year = 2024) VALUES (1, 'a')")
sql("INSERT INTO part_tbl PARTITION (year = 2025) VALUES (2, 'b')")
checkError(
exception = intercept[AnalysisException] {
spark.read
.option("recursiveFileLookup", "true")
.table("part_tbl")
.collect()
},
condition = "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE",
parameters = Map.empty[String, String]
)
}
}

test("Return correct results when data columns overlap with partition columns") {
Seq("parquet", "orc", "json").foreach { format =>
withTempPath { path =>
Expand Down