Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions python/pyspark/sql/tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,47 @@ def test_catalog_analyze_table(self):
spark.sql(f"INSERT INTO {t} VALUES (1)")
spark.catalog.analyzeTable(t, noScan=True)

def test_path_current_path_disabled(self):
# current_path() is a regular builtin and resolves even when
# spark.sql.path.enabled is false. The DataFrame and SQL surfaces must agree.
from pyspark.sql.functions import current_path

spark = self.spark
with self.sql_conf({"spark.sql.path.enabled": False}):
sql_form = spark.sql("SELECT current_path()").collect()[0][0]
self.assertIsInstance(sql_form, str)
self.assertNotEqual(sql_form, "")
api_form = spark.range(1).select(current_path()).collect()[0][0]
self.assertEqual(sql_form, api_form)

def test_path_set_path_and_current_path(self):
# SET PATH is parsed and applied; current_path() reflects it
# over both the SQL and DataFrame surfaces. Restores DEFAULT_PATH on exit.
from pyspark.sql.functions import current_path

spark = self.spark
with self.sql_conf({"spark.sql.path.enabled": True}):
try:
spark.sql("SET PATH = spark_catalog.default, system.builtin")
sql_form = spark.sql("SELECT current_path()").collect()[0][0]
self.assertEqual(sql_form, "spark_catalog.default,system.builtin")
api_form = spark.range(1).select(current_path()).collect()[0][0]
self.assertEqual(sql_form, api_form)
finally:
spark.sql("SET PATH = DEFAULT_PATH")

def test_path_set_path_rejected_when_disabled(self):
# SET PATH must raise UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED
# when the feature flag is off (covers both classic and Connect error paths).
spark = self.spark
with self.sql_conf({"spark.sql.path.enabled": False}):
with self.assertRaises(AnalysisException) as ctx:
spark.sql("SET PATH = spark_catalog.default")
self.assertEqual(
ctx.exception.getCondition(),
"UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED",
)


class CatalogTests(CatalogTestsMixin, ReusedSQLTestCase):
pass
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.catalog

import org.json4s.JsonAST.{JArray, JObject, JString}
import org.json4s.jackson.JsonMethods.{compact, render}

import org.apache.spark.SparkFunSuite

/**
* Unit tests for [[SqlPathFormat]] -- the helper that converts the raw JSON-array-of-arrays
* path stored on view / SQL function metadata into the JSON-object form used by DESCRIBE
* AS JSON and the human-readable form used by DESCRIBE EXTENDED.
*/
class SqlPathFormatSuite extends SparkFunSuite {

private def compactJson(v: JArray): String = compact(render(v))

test("toDescribeJson: maps each [catalog, ns...] entry to a JSON object") {
val stored =
"""[["spark_catalog","default"],["system","builtin"]]"""
val result = SqlPathFormat.toDescribeJson(stored)
.getOrElse(fail(s"Expected a JSON value, got None for: $stored"))
val expected = JArray(List(
JObject("catalog_name" -> JString("spark_catalog"),
"namespace" -> JArray(List(JString("default")))),
JObject("catalog_name" -> JString("system"),
"namespace" -> JArray(List(JString("builtin"))))))
assert(compactJson(result.asInstanceOf[JArray]) == compactJson(expected))
}

test("toDescribeJson: multi-level namespace becomes [head, tail...]") {
val stored = """[["cat1","db","sub"]]"""
val result = SqlPathFormat.toDescribeJson(stored)
.getOrElse(fail("Expected a JSON value"))
val expected = JArray(List(
JObject("catalog_name" -> JString("cat1"),
"namespace" -> JArray(List(JString("db"), JString("sub"))))))
assert(compactJson(result.asInstanceOf[JArray]) == compactJson(expected))
}

test("toDescribeJson: empty array returns None") {
assert(SqlPathFormat.toDescribeJson("[]").isEmpty)
}

test("toDescribeJson: malformed payloads return None") {
Seq(
"",
"not_json",
"{}",
"""{"foo":1}""",
"""[1, 2, 3]"""
).foreach { payload =>
assert(SqlPathFormat.toDescribeJson(payload).isEmpty, s"payload=$payload")
}
}

test("formatForDisplay: renders plain identifiers without backticks") {
val json = SqlPathFormat.toDescribeJson(
"""[["spark_catalog","default"],["system","builtin"]]""")
.getOrElse(fail("Expected a JSON value"))
val rendered = SqlPathFormat.formatForDisplay(json)
.getOrElse(fail("Expected a display string"))
assert(rendered == "spark_catalog.default, system.builtin")
}

test("formatForDisplay: backticks identifiers that need quoting") {
val json = SqlPathFormat.toDescribeJson(
"""[["spark_catalog","weird.schema"]]""")
.getOrElse(fail("Expected a JSON value"))
val rendered = SqlPathFormat.formatForDisplay(json)
.getOrElse(fail("Expected a display string"))
assert(rendered == "spark_catalog.`weird.schema`")
}

test("formatForDisplay: round-trips multi-level namespaces") {
val json = SqlPathFormat.toDescribeJson("""[["cat","db","ns"]]""")
.getOrElse(fail("Expected a JSON value"))
val rendered = SqlPathFormat.formatForDisplay(json)
.getOrElse(fail("Expected a display string"))
assert(rendered == "cat.db.ns")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ import java.net.URI
import scala.jdk.CollectionConverters._

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, NoSuchNamespaceException}
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog => V1InMemoryCatalog, SessionCatalog}
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.connector.catalog.CatalogManager.{CurrentSchemaEntry, LiteralPathEntry}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.util.CaseInsensitiveStringMap

Expand Down Expand Up @@ -150,6 +152,115 @@ class CatalogManagerSuite extends SparkFunSuite with SQLHelper {
assert(CatalogManager.deserializePathEntries(payload).isEmpty, s"payload=$payload")
}
}

test("serializePathEntries round-trips through deserialize for typical inputs") {
val cases = Seq(
Seq(Seq("spark_catalog", "default"), Seq("system", "builtin")),
Seq(Seq("system", "session")),
Seq.empty[Seq[String]])
cases.foreach { entries =>
val payload = CatalogManager.serializePathEntries(entries)
val parsed = CatalogManager.deserializePathEntries(payload)
.getOrElse(fail(s"Expected payload to round-trip: $payload"))
assert(parsed === entries, s"Round-trip mismatch for $entries; got $parsed")
}
}

test("serializePathEntries round-trips multi-level and quoted identifiers") {
val entries = Seq(
Seq("cat", "ns1", "ns2"),
Seq("spark_catalog", "sch.with.dots"),
Seq("spark_catalog", "schema with spaces"))
val payload = CatalogManager.serializePathEntries(entries)
val parsed = CatalogManager.deserializePathEntries(payload)
.getOrElse(fail(s"Expected payload to round-trip: $payload"))
assert(parsed === entries)
}

test("deserializePathEntriesOrFail raises a clear AnalysisException for bad payloads") {
val e = intercept[AnalysisException] {
CatalogManager.deserializePathEntriesOrFail(
storedPathStr = "{bad-json",
objectType = "view",
objectName = "default.v_broken")
}
assert(e.getMessage.contains("Invalid stored SQL path metadata for view"))
assert(e.getMessage.contains("default.v_broken"))
}

// ---------------------------------------------------------------------------
// Direct unit tests for [[PathElement.validateNoStaticDuplicates]]. The end-to-end
Comment thread
srielau marked this conversation as resolved.
// `SetPathSuite` exercises this via SQL, but the duplicate-detection rules
// (literal-vs-literal, current_schema-vs-current_schema, case-sensitivity) are pure
// data and benefit from focused tests close to the implementation.
// ---------------------------------------------------------------------------

private def literalEntry(parts: String*): LiteralPathEntry = LiteralPathEntry(parts.toSeq)

test("validateNoStaticDuplicates: no duplicates returns the input unchanged") {
val entries = Seq(
literalEntry("spark_catalog", "default"),
literalEntry("system", "builtin"),
CurrentSchemaEntry)
assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries)
}

test("validateNoStaticDuplicates: duplicate literal under case-insensitive collation") {
val entries = Seq(
literalEntry("spark_catalog", "default"),
literalEntry("Spark_Catalog", "DEFAULT"))
val e = intercept[AnalysisException] {
PathElement.validateNoStaticDuplicates(entries, caseSensitive = false)
}
assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY")
assert(e.getMessageParameters.get("pathEntry") == "Spark_Catalog.DEFAULT")
}

test("validateNoStaticDuplicates: case-sensitive mode keeps differently cased entries") {
val entries = Seq(
literalEntry("spark_catalog", "DEFAULT"),
literalEntry("spark_catalog", "default"))
assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = true) === entries)
}

test("validateNoStaticDuplicates: repeated CurrentSchemaEntry is rejected") {
val entries = Seq(CurrentSchemaEntry, CurrentSchemaEntry)
val e = intercept[AnalysisException] {
PathElement.validateNoStaticDuplicates(entries, caseSensitive = false)
}
assert(e.getCondition == "DUPLICATE_SQL_PATH_ENTRY")
assert(e.getMessageParameters.get("pathEntry") == "current_schema")
}

test("validateNoStaticDuplicates: literal-vs-CurrentSchemaEntry collision is tolerated") {
// The CurrentSchemaEntry marker resolves dynamically against USE SCHEMA, so a literal
// that happens to match the live current schema is intentionally not flagged here.
val entries = Seq(
literalEntry("spark_catalog", "default"),
CurrentSchemaEntry,
literalEntry("system", "builtin"))
assert(PathElement.validateNoStaticDuplicates(entries, caseSensitive = false) === entries)
}

test("validateNoStaticDuplicates: identifier containing a dot is quoted in the error") {
val entries = Seq(
literalEntry("spark_catalog", "weird.schema"),
literalEntry("spark_catalog", "weird.schema"))
val e = intercept[AnalysisException] {
PathElement.validateNoStaticDuplicates(entries, caseSensitive = false)
}
assert(e.getMessageParameters.get("pathEntry") == "spark_catalog.`weird.schema`")
}

test("validateNoStaticDuplicates: multi-level namespace duplicate is flagged") {
val entries = Seq(
literalEntry("cat", "db", "ns"),
literalEntry("cat", "db", "ns"))
val e = intercept[AnalysisException] {
PathElement.validateNoStaticDuplicates(entries, caseSensitive = false)
}
assert(e.getMessageParameters.get("pathEntry") == "cat.db.ns")
}
}

class DummyCatalog extends CatalogPlugin {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.connect

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.connect.test.{ConnectFunSuite, RemoteSparkSession, SQLHelper}
import org.apache.spark.sql.functions.current_path

/**
* End-to-end coverage for the SQL Standard PATH feature over Spark Connect.
*
* SET PATH and the frozen-path semantics for persisted views / SQL functions are implemented
* entirely server-side, but the analyzer state (`AnalysisContext`) that carries the pinned path
* must survive plan reification across the gRPC boundary. These tests run the public surface over
* a real Connect client so regressions there are caught:
* - `SET PATH = ...` is parsed and applied to the session,
* - `current_path()` (SQL and the DataFrame builtin) reflects it,
* - a persisted view created under one path resolves its body under the frozen path even when
* the invoker switches the session path.
*/
class SqlPathE2ETestSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelper {

test("SET PATH and current_path() round-trip over Connect") {
withSQLConf("spark.sql.path.enabled" -> "true") {
try {
spark.sql("SET PATH = spark_catalog.default, system.builtin")
val sqlPath = spark.sql("SELECT current_path()").head().getString(0)
assert(
sqlPath == "spark_catalog.default,system.builtin",
s"current_path() over Connect should reflect SET PATH; got: $sqlPath")

// DataFrame builtin should agree with the SQL form.
val apiPath = spark.range(1).select(current_path()).head().getString(0)
assert(
apiPath == sqlPath,
s"functions.current_path() should match SQL current_path(); got: $apiPath vs $sqlPath")
} finally {
spark.sql("SET PATH = DEFAULT_PATH")
}
}
}

test("Persisted view body uses frozen path over Connect") {
withSQLConf("spark.sql.path.enabled" -> "true") {
withDatabase("connect_path_a", "connect_path_b") {
spark.sql("CREATE DATABASE connect_path_a")
spark.sql("CREATE DATABASE connect_path_b")
spark.sql("CREATE TABLE connect_path_a.frozen_t USING parquet AS SELECT 1 AS id")
spark.sql("CREATE TABLE connect_path_b.frozen_t USING parquet AS SELECT 2 AS id")
withView("default.v_path_connect") {
try {
// Create the view under PATH=a.
spark.sql("SET PATH = spark_catalog.connect_path_a, system.builtin")
spark.sql("CREATE VIEW default.v_path_connect AS SELECT id FROM frozen_t")

// Switch the session path to b; bare `frozen_t` now resolves through b,
// but the view's frozen path keeps it pinned to a.
spark.sql("SET PATH = spark_catalog.connect_path_b, system.builtin")
val bare = spark.sql("SELECT id FROM frozen_t").head().getInt(0)
assert(bare == 2, s"Bare `frozen_t` should follow live PATH=b; got: $bare")
val viaView = spark.sql("SELECT id FROM default.v_path_connect").head().getInt(0)
assert(
viaView == 1,
s"View body should resolve via the frozen creation-time PATH; got: $viaView")
} finally {
spark.sql("SET PATH = DEFAULT_PATH")
}
}
}
}
}

test("SET PATH is rejected over Connect when feature is disabled") {
withSQLConf("spark.sql.path.enabled" -> "false") {
val ex = intercept[AnalysisException] {
spark.sql("SET PATH = spark_catalog.default")
}
assert(
ex.getCondition == "UNSUPPORTED_FEATURE.SET_PATH_WHEN_DISABLED",
s"Expected SET_PATH_WHEN_DISABLED, got: ${ex.getCondition}")
}
}
}
Loading