Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
83dcf21
[SPARK-56750] default path config
srielau May 6, 2026
8c57bb6
Address review comments
srielau May 7, 2026
df6c073
Address review-2 comments
srielau May 7, 2026
f7f9972
Address review-2 follow-ups
srielau May 7, 2026
d50da6e
Drop duplicate check at lookup; static-only at SET PATH
srielau May 7, 2026
7e789c4
Honor explicit SET PATH for system entry ordering; decouple SC default
srielau May 7, 2026
94e41a5
Drop explicit/implicit PATH distinction for fast-path kinds
srielau May 7, 2026
758ea64
Move resolution-order logic from Coordination to Strategy layer
srielau May 7, 2026
9d24154
Drop SPARK-56750 prefix from in-code lock-order comments
srielau May 7, 2026
b83d03c
Trim unrelated diff noise
srielau May 7, 2026
f345a53
Reject PATH token in DEFAULT_PATH config.
srielau May 8, 2026
ad6dbd6
Align single-pass count(*) rewrite with PATH shadowing.
srielau May 8, 2026
90abd8c
Wrap Scaladoc to fix line length (Scalastyle) in CatalogManager
srielau May 8, 2026
92af0a0
[SPARK-56750][SQL] Single-pass analyzer fixes for PATH and SQL UDFs
srielau May 8, 2026
251545d
[SPARK-56750][SQL] Replace session function kinds lambda with Catalog…
srielau May 9, 2026
c956d57
[SPARK-56750][SQL] Fix Scalastyle line length in SessionCatalog
srielau May 9, 2026
51437fb
[SPARK-56750][SQL] Revert single-pass resolver wiring for PATH PR
srielau May 11, 2026
7215160
Merge branch 'master' into SPARK-56750-default-path
srielau May 12, 2026
0614d10
[SPARK-56750][SQL] Drop AGENTS.md and Cursor rule added for lint remi…
srielau May 12, 2026
e14212a
Merge upstream/master into SPARK-56750-default-path (DEFAULT_PATH lan…
srielau May 12, 2026
b058edb
Merge origin/SPARK-56750-default-path (AGENTS drop) after upstream/ma…
srielau May 12, 2026
3adf8e1
[SPARK-56750][SQL] Fix Scaladoc links for JavaUnidoc (cloud-fan review)
srielau May 12, 2026
ac3927c
[SPARK-56750][SQL] Fix Scalastyle line length and Javadoc Catalogs link
srielau May 12, 2026
e7a9452
[SPARK-56750][SQL] Fix JavaUnidoc links for PATH catalog docs
srielau May 12, 2026
306bd3b
[SPARK-56750][SQL] Wrap CatalogPlugin Javadoc for 100-char checkstyle
srielau May 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ singleTableSchema
: colTypeList EOF
;

singlePathElementList
: pathElement (COMMA pathElement)* EOF
;

singleRoutineParamList
: colDefinitionList EOF
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
* views, and functions.
* <p>
* Catalog implementations must implement this marker interface to be loaded by
* {@link Catalogs#load(String, SQLConf)}. The loader will instantiate catalog classes using the
* {@link org.apache.spark.sql.connector.catalog.Catalogs#load(String,SQLConf)}.
* The loader will instantiate catalog classes using the
* required public no-arg constructor. After creating an instance, it will be configured by calling
* {@link #initialize(String, CaseInsensitiveStringMap)}.
* {@link #initialize(String,CaseInsensitiveStringMap)}.
* <p>
* Catalog implementations are registered to a name by adding a configuration option to Spark:
* {@code spark.sql.catalog.catalog-name=com.example.YourCatalogClass}. All configuration properties
Expand All @@ -56,8 +57,8 @@ public interface CatalogPlugin {
/**
* Called to get this catalog's name.
* <p>
* This method is only called after {@link #initialize(String, CaseInsensitiveStringMap)} is
* called to pass the catalog's name.
* This method is only called after
* {@link #initialize(String,CaseInsensitiveStringMap)} is called to pass the catalog's name.
*/
String name();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1976,14 +1976,15 @@ class Analyzer(
* This is used for special syntax transformations (e.g., COUNT(*) -> COUNT(1)) that
* should only apply to builtin functions, not to user-defined functions.
*
* In legacy mode (sessionOrder="first"), temp functions shadow builtins, so an
* unqualified name that matches a temp function should NOT be treated as builtin.
* When the effective SQL PATH puts `system.session` before `system.builtin`, temp
* functions shadow builtins, so an unqualified name that matches a temp function
* should NOT be treated as builtin.
*/
private def matchesFunctionName(nameParts: Seq[String], expectedName: String): Boolean = {
if (!FunctionResolution.isUnqualifiedOrBuiltinFunctionName(nameParts, expectedName)) {
return false
}
if (nameParts.size == 1 && conf.sessionFunctionResolutionOrder == "first") {
if (nameParts.size == 1 && functionResolution.isSessionBeforeBuiltinInPath) {
Comment thread
srielau marked this conversation as resolved.
val v1Catalog = catalogManager.v1SessionCatalog
!v1Catalog.isTemporaryFunction(FunctionIdentifier(nameParts.head))
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ class FunctionResolution(
nameParts.length == 3 &&
nameParts.head.equalsIgnoreCase(CatalogManager.SYSTEM_CATALOG_NAME)

/**
* True iff `system.session` is searched before `system.builtin` in the effective SQL PATH.
*
* Drives the `count(*) -> count(1)` rewrite (which must skip transformation when a temp
* `count` shadows the builtin) and the `SessionCatalog` security check that blocks creating
* a temp function with a builtin's name. Reads the live PATH via `CatalogManager` and
* applies the same kinds extraction that drives `SessionCatalog`'s fast-path provider, so
* the predicate stays in sync with the lookup loop's actual order.
*/
def isSessionBeforeBuiltinInPath: Boolean = {
val path = catalogManager.sqlResolutionPathEntries(
catalogManager.currentCatalog.name(), catalogManager.currentNamespace.toSeq)
CatalogManager.systemFunctionKindsFromPath(path).headOption
.contains(org.apache.spark.sql.catalyst.catalog.SessionCatalog.Temp)
}

/**
* Produces the ordered list of candidate names for resolution. Expansion happens in two cases:
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ import org.apache.spark.sql.catalyst.util.CollationFactory
*/
class FunctionResolver(
expressionResolver: ExpressionResolver,
functionResolution: FunctionResolution,
protected val functionResolution: FunctionResolution,
aggregateExpressionResolver: AggregateExpressionResolver,
binaryArithmeticResolver: BinaryArithmeticResolver)
extends TreeNodeResolver[UnresolvedFunction, Expression]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package org.apache.spark.sql.catalyst.analysis.resolver

import java.util.Locale

import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{
FunctionResolution,
ResolvedStar,
Star,
UnresolvedFunction,
Expand All @@ -35,6 +37,7 @@ import org.apache.spark.sql.internal.SQLConf
*/
trait FunctionResolverUtils {
protected def expressionResolver: ExpressionResolver
protected def functionResolution: FunctionResolution
protected def conf: SQLConf

private val scopes = expressionResolver.getNameScopes
Expand Down Expand Up @@ -99,7 +102,21 @@ trait FunctionResolverUtils {
unresolvedFunction: UnresolvedFunction,
normalizeFunctionName: Boolean = true
): Boolean = {
!unresolvedFunction.isDistinct && isCount(unresolvedFunction, normalizeFunctionName)
!unresolvedFunction.isDistinct &&
isCount(unresolvedFunction, normalizeFunctionName) &&
!isUnqualifiedCountShadowedByTemp(unresolvedFunction)
}

/**
* Keep single-pass behavior aligned with fixed-point: when PATH puts system.session before
* system.builtin and a temp `count` exists, unqualified `count(*)` must not be rewritten to
* `count(1)`.
*/
private def isUnqualifiedCountShadowedByTemp(unresolvedFunction: UnresolvedFunction): Boolean = {
unresolvedFunction.nameParts.length == 1 &&
functionResolution.isSessionBeforeBuiltinInPath &&
functionResolution.catalogManager.v1SessionCatalog
.isTemporaryFunction(FunctionIdentifier(unresolvedFunction.nameParts.head))
}

private def isCount(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.sql.errors.QueryCompilationErrors
*/
class HigherOrderFunctionResolver(
protected val expressionResolver: ExpressionResolver,
functionResolution: FunctionResolution)
protected val functionResolution: FunctionResolution)
extends TreeNodeResolver[UnresolvedFunction, Expression]
with ProducesUnresolvedSubtree
with CoercesExpressionTypes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,67 @@ class SessionCatalog(
identifier.copy(funcName = "") == SESSION_NAMESPACE_TEMPLATE

/**
* Session function kinds in resolution order for unqualified lookups.
* Matches [[SQLConf.sessionFunctionResolutionOrder]]: "first" (session first),
* "second" (default), "last" (builtin only; session tried after persistent).
* When set, unqualified builtin/temp function resolution uses this fixed kind order instead of
* [[catalogManagerForSessionFunctionKinds]] / [[SQLConf.systemPathOrder]]. For unit tests only;
* production relies on the catalog manager binding.
*/
private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind] = {
conf.sessionFunctionResolutionOrder match {
case "first" => Seq(Temp, Builtin)
case "last" => Seq(Builtin)
case _ => Seq(Builtin, Temp) // "second" (default)
}
@volatile private var sessionFunctionKindsTestOverride: Option[Seq[SessionFunctionKind]] = None

/**
* Live PATH for session function kinds. Set from
* [[org.apache.spark.sql.connector.catalog.CatalogManager]]'s constructor via
* [[bindCatalogManagerForSessionFunctionKinds]] so unqualified lookups and the security check
* that blocks temp functions from shadowing builtins read the effective SQL PATH (post-`SET
* PATH`, with [[SQLConf.DEFAULT_PATH]] and [[SQLConf.defaultPathOrder]] fallbacks already
* applied).
*
* When unset (e.g. standalone [[SessionCatalog]] in tests), kinds derive from
* [[SQLConf.systemPathOrder]] -- the seeded default path -- without assuming other legacy
* resolution-order conf beyond seeding `defaultPathOrder`.
*/
@volatile private var catalogManagerForSessionFunctionKinds: Option[CatalogManager] = None

/**
* Wire live PATH-derived session function kinds from the session [[CatalogManager]].
* Called once from [[org.apache.spark.sql.connector.catalog.CatalogManager]]'s constructor.
*/
private[sql] def bindCatalogManagerForSessionFunctionKinds(cm: CatalogManager): Unit = {
catalogManagerForSessionFunctionKinds = Some(cm)
}

/**
* Pin session function kinds for tests (`None` clears). Uses `private[sql]` so tests under the
* `org.apache.spark.sql` package can control ordering without a public catalog API.
*/
private[sql] def setSessionFunctionKindsTestOverride(
kinds: Option[Seq[SessionFunctionKind]]): Unit = {
sessionFunctionKindsTestOverride = kinds
}

/**
* Session function kinds in resolution order for unqualified lookups: test override if set,
* else live PATH from [[catalogManagerForSessionFunctionKinds]], else
* [[SQLConf.systemPathOrder]].
*/
private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind] =
sessionFunctionKindsTestOverride.getOrElse {
catalogManagerForSessionFunctionKinds match {
case Some(cm) =>
CatalogManager.systemFunctionKindsFromPath(
cm.sqlResolutionPathEntries(cm.currentCatalog.name(), cm.currentNamespace.toSeq))
case None =>
CatalogManager.systemFunctionKindsFromPath(conf.systemPathOrder)
}
}

/**
* True iff the effective SQL PATH searches `system.session` before `system.builtin`. Used
* to gate the security check that blocks temporary functions from silently shadowing a
* builtin of the same name.
*/
private def sessionFirstInPath: Boolean =
sessionFunctionKindsInResolutionOrder.headOption.contains(Temp)

/**
* Checks if a namespace represents temporary functions.
*/
Expand Down Expand Up @@ -2080,12 +2129,11 @@ class SessionCatalog(
qualifyIdentifier(func)
}

// Security check: When legacy mode is enabled, block SQL-created temporary functions
// from shadowing builtin functions (to preserve master behavior)
// Scala UDFs are still allowed to shadow in legacy mode
// We throw ROUTINE_ALREADY_EXISTS to indicate the builtin function already exists
val sessionFirst = conf.sessionFunctionResolutionOrder == "first"
if (func.database.isEmpty && sessionFirst && !overrideIfExists) {
// Security check: when the effective SQL PATH searches `system.session` before
// `system.builtin`, block creating an unqualified temporary function whose name
// collides with a builtin so it cannot silently shadow that builtin via unqualified
// resolution. We throw ROUTINE_ALREADY_EXISTS to indicate the conflict.
if (func.database.isEmpty && sessionFirstInPath && !overrideIfExists) {
val funcName = func.funcName
// Check if function exists in builtin namespace (extensions are stored as builtins)
val builtinIdent = FunctionRegistry.builtinFunctionIdentifier(funcName)
Expand Down Expand Up @@ -2195,10 +2243,11 @@ class SessionCatalog(
// Use FunctionIdentifier with session namespace for temporary functions
val tempIdentifier = tempFunctionIdentifier(function.name.funcName)

// Security check: When legacy mode is enabled, block SQL-created temporary functions
// from shadowing builtin functions (including extensions) as a safeguard
// We throw ROUTINE_ALREADY_EXISTS to indicate the builtin function already exists
if ((conf.sessionFunctionResolutionOrder == "first") && !overrideIfExists) {
// Security check: when the effective SQL PATH searches `system.session` before
// `system.builtin`, block creating an unqualified temporary function whose name
// collides with a builtin (including extensions) so it cannot silently shadow that
// builtin via unqualified resolution.
if (sessionFirstInPath && !overrideIfExists) {
val funcName = function.name.funcName
// Check if function exists in builtin namespace (extensions are stored as builtins)
val builtinIdent = FunctionRegistry.builtinFunctionIdentifier(funcName)
Expand Down Expand Up @@ -2499,7 +2548,12 @@ class SessionCatalog(
* Look up the `ExpressionInfo` of the given function by name.
* Resolution order follows the configured path (e.g. builtin then session).
*/
def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = synchronized {
def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = {
// Intentionally not `synchronized` on this [[SessionCatalog]]. Resolution order may call
// into [[CatalogManager]] (e.g. [[CatalogManager.sqlResolutionPathEntries]]), which can
// synchronize on the manager; another
// thread can hold that lock and call into this catalog (e.g. via `setCurrentNamespace`),
// which would deadlock if this method also synchronized on `this`.
lookupFunctionWithShadowing(name, tableFunctionRegistry, checkBuiltinOperators = false)
}

Expand Down Expand Up @@ -2650,7 +2704,11 @@ class SessionCatalog(
/**
* Look up the [[ExpressionInfo]] associated with the specified function, assuming it exists.
*/
def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = synchronized {
def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = {
// Intentionally not `synchronized` on this [[SessionCatalog]] (see
// [[lookupBuiltinOrTempTableFunction]]): unqualified builtin/temp resolution uses
// [[sessionFunctionKindsInResolutionOrder]] / [[CatalogManager]] and must not run under
// this catalog's intrinsic lock.
if (name.database.isEmpty) {
lookupBuiltinOrTempFunction(name.funcName)
.orElse(lookupBuiltinOrTempTableFunction(name.funcName))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin
import org.apache.spark.sql.catalyst.plans.logical.{CompoundPlanStatement, LogicalPlan}
import org.apache.spark.sql.catalyst.trees.Origin
import org.apache.spark.sql.connector.catalog.PathElement
import org.apache.spark.sql.errors.QueryParsingErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.StructType
Expand Down Expand Up @@ -110,6 +111,18 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface {
}
}

/**
* Parse the right-hand side of `SET PATH = ...` (a comma-separated list of path elements).
* Used by [[org.apache.spark.sql.connector.catalog.CatalogManager]] to honor the
* [[SQLConf.DEFAULT_PATH]] conf without re-implementing the SET PATH grammar.
*/
private[sql] def parsePathElements(sqlText: String): Seq[PathElement] = parse(sqlText) { parser =>
val ctx = parser.singlePathElementList()
withErrorHandling(ctx, Some(sqlText)) {
astBuilder.visitSinglePathElementList(ctx)
}
}

def withErrorHandling[T](ctx: ParserRuleContext, sqlText: Option[String])(toResult: => T): T = {
withOrigin(ctx, sqlText) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, DateTimeUtils, EvaluateUnresolvedInlineTable, IntervalUtils}
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTime, stringToTimestamp, stringToTimestampWithoutTimeZone}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogInfo, SupportsNamespaces, TableCatalog, TableWritePrivilege}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogInfo, PathElement, SupportsNamespaces, TableCatalog, TableWritePrivilege}
import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange}
import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
Expand Down Expand Up @@ -708,6 +708,26 @@ class AstBuilder extends DataTypeAstBuilder
visitMultipartIdentifier(ctx.multipartIdentifier)
}

override def visitSinglePathElementList(
ctx: SinglePathElementListContext): Seq[PathElement] = withOrigin(ctx) {
ctx.pathElement().asScala.map(visitPathElement).toSeq
}

override def visitPathElement(ctx: PathElementContext): PathElement = withOrigin(ctx) {
if (ctx.DEFAULT_PATH() != null) PathElement.DefaultPath
else if (ctx.SYSTEM_PATH() != null) PathElement.SystemPath
else if (ctx.PATH() != null) PathElement.PathRef
else if (ctx.CURRENT_DATABASE() != null || ctx.CURRENT_SCHEMA() != null) {
PathElement.CurrentSchema
} else {
val parts = visitMultipartIdentifier(ctx.multipartIdentifier())
if (parts.length < 2) {
throw QueryCompilationErrors.invalidSqlPathSchemaReferenceError(parts.mkString("."))
}
PathElement.SchemaInPath(parts)
}
}

override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
typedVisit[DataType](ctx.dataType)
}
Expand Down
Loading