apache · AnishMahto · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 22, 2026
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -203,18 +203,36 @@
     ],
     "sqlState" : "22023"
   },
+  "AUTOCDC_KEY_NOT_IN_SELECTED_SCHEMA" : {
+    "message" : [
+      "Using <caseSensitivity> column name comparison, the AutoCDC key column `<keyColumnName>` is not present in the flow's selected source schema. AutoCDC requires every key column to be present in the source change-data feed and retained by any configured column selection."
+    ],
+    "sqlState" : "22023"
+  },
   "AUTOCDC_MULTIPART_COLUMN_IDENTIFIER" : {
     "message" : [
       "Expected a single column identifier; got the multi-part identifier <columnName> (parts: <nameParts>)."
     ],
     "sqlState" : "42703"
   },
-  "AUTOCDC_RESERVED_COLUMN_NAME_CONFLICT" : {
+  "AUTOCDC_MULTIPLE_FLOWS_TO_TARGET" : {
+    "message" : [
+      "Invalid AutoCDC destination <tableName> with multiple flows: <flows>. An AutoCDC target table must have exactly one flow writing to it."
+    ],
+    "sqlState" : "42000"
+  },
+  "AUTOCDC_RESERVED_COLUMN_NAME_PREFIX_CONFLICT" : {
     "message" : [
-      "Using <caseSensitivity> column name comparison, the column `<columnName>` in the <schemaName> schema conflicts with the reserved AutoCDC column name `<reservedColumnName>`. Rename or remove the column."
+      "The column `<columnName>` in the <schemaName> schema collides with the reserved AutoCDC column name prefix `<reservedColumnNamePrefix>` (using <caseSensitivity> column name comparison). Rename or remove the column."
     ],
     "sqlState" : "42710"
   },
+  "AUTOCDC_SCD2_NOT_SUPPORTED" : {
+    "message" : [
+      "AutoCDC flows do not currently support SCD Type 2 transformations."
+    ],
+    "sqlState" : "0A000"
+  },
   "AVRO_CANNOT_WRITE_NULL_FIELD" : {
     "message" : [
       "Cannot write null value for field <name> defined as non-null Avro data type <dataType>.",
@@ -3687,6 +3705,11 @@
       "Flow <flowIdentifier> returns an invalid relation type."
     ],
     "subClass" : {
+      "AUTOCDC_RELATION_FOR_TEMPORARY_VIEW" : {
+        "message" : [
+          "AutoCDC flows must target a streaming table because their reconciliation semantics require a streaming-table sink, but the flow <flowIdentifier> attempts to write an AutoCDC relation to the temporary view <viewIdentifier>."
+        ]
+      },
       "BATCH_RELATION_FOR_STREAMING_TABLE" : {
         "message" : [
           "Streaming tables may only be defined by streaming relations, but the flow <flowIdentifier> attempts to write a batch relation to the streaming table <tableIdentifier>. Consider using the STREAM operator in Spark-SQL to convert the batch relation into a streaming relation, or populating the streaming table with an append once-flow instead."

diff --git a/...nnect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/PipelinesHandler.scala b/...nnect/server/src/main/scala/org/apache/spark/sql/connect/pipelines/PipelinesHandler.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.execution.command.{ShowCatalogsCommand, ShowNamespacesCommand}
 import org.apache.spark.sql.pipelines.Language.Python
 import org.apache.spark.sql.pipelines.common.RunState.{CANCELED, FAILED}
-import org.apache.spark.sql.pipelines.graph.{AllTables, FlowAnalysis, GraphIdentifierManager, GraphRegistrationContext, IdentifierHelper, NoTables, PipelineUpdateContextImpl, QueryContext, QueryOrigin, QueryOriginType, Sink, SinkImpl, SomeTables, SqlGraphRegistrationContext, Table, TableFilter, TemporaryView, UnresolvedFlow}
+import org.apache.spark.sql.pipelines.graph.{AllTables, FlowAnalysis, GraphIdentifierManager, GraphRegistrationContext, IdentifierHelper, NoTables, PipelineUpdateContextImpl, QueryContext, QueryOrigin, QueryOriginType, Sink, SinkImpl, SomeTables, SqlGraphRegistrationContext, Table, TableFilter, TemporaryView, UntypedFlow}
 import org.apache.spark.sql.pipelines.logging.{PipelineEvent, RunProgress}
 import org.apache.spark.sql.types.StructType
 
@@ -371,7 +371,7 @@ private[connect] object PipelinesHandler extends Logging {
       case proto.PipelineCommand.DefineFlow.DetailsCase.RELATION_FLOW_DETAILS =>
         val relationFlowDetails = flow.getRelationFlowDetails
         graphElementRegistry.registerFlow(
-          UnresolvedFlow(
+          UntypedFlow(
             identifier = flowIdentifier,
             destinationIdentifier = destinationIdentifier,
             func = FlowAnalysis.createFlowFunctionFromLogicalPlan(

diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/ChangeArgs.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/ChangeArgs.scala
@@ -120,7 +120,7 @@ object ColumnSelection {
 }
 
 /** User-facing case-sensitivity labels surfaced in AutoCDC error messages. */
-private[autocdc] object CaseSensitivityLabels {
+private[pipelines] object CaseSensitivityLabels {
   val CaseSensitive: String = "case-sensitive"
   val CaseInsensitive: String = "case-insensitive"
 

diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessor.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/autocdc/Scd1BatchProcessor.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.pipelines.autocdc
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{functions => F, AnalysisException}
+import org.apache.spark.sql.{functions => F}
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.util.QuotingUtils
 import org.apache.spark.sql.classic.DataFrame
@@ -89,9 +89,6 @@ case class Scd1BatchProcessor(
    * column.
    */
   def extendMicrobatchRowsWithCdcMetadata(validatedMicrobatch: DataFrame): DataFrame = {
-    // Proactively validate the reserved CDC metadata column does not exist in the microbatch.
-    validateCdcMetadataColumnNotPresent(validatedMicrobatch)
-
     val rowDeleteSequence: Column = changeArgs.deleteCondition match {
       case Some(deleteCondition) =>
         F.when(deleteCondition, changeArgs.sequencing).otherwise(F.lit(null))
@@ -211,31 +208,18 @@ case class Scd1BatchProcessor(
       joinType = "left_anti"
     )
   }
-
-  private def validateCdcMetadataColumnNotPresent(microbatch: DataFrame): Unit = {
-    val microbatchSqlConf = microbatch.sparkSession.sessionState.conf
-    val resolver = microbatchSqlConf.resolver
-
-    microbatch.schema.fieldNames
-      .find(resolver(_, Scd1BatchProcessor.cdcMetadataColName))
-      .foreach { conflictingColumnName =>
-        throw new AnalysisException(
-          errorClass = "AUTOCDC_RESERVED_COLUMN_NAME_CONFLICT",
-          messageParameters = Map(
-            "caseSensitivity" -> CaseSensitivityLabels.of(microbatchSqlConf.caseSensitiveAnalysis),
-            "columnName" -> conflictingColumnName,
-            "schemaName" -> "microbatch",
-            "reservedColumnName" -> Scd1BatchProcessor.cdcMetadataColName
-          )
-        )
-      }
-  }
 }
 
 object Scd1BatchProcessor {
-  // Columns prefixed with `__spark_autocdc_` are reserved for internal SDP AutoCDC processing.
-  private[autocdc] val winningRowColName: String = "__spark_autocdc_winning_row"
-  private[autocdc] val cdcMetadataColName: String = "__spark_autocdc_metadata"
+  /**
+   * Reserved column-name prefix for internal SDP AutoCDC processing. Source change-data-feed
+   * dataframes must not contain any columns starting with this prefix; the invariant is
+   * enforced at [[org.apache.spark.sql.pipelines.graph.AutoCdcMergeFlow]] construction.
+   */
+  private[pipelines] val reservedColumnNamePrefix: String = "__spark_autocdc_"
+
+  private[autocdc] val winningRowColName: String = s"${reservedColumnNamePrefix}winning_row"
+  private[pipelines] val cdcMetadataColName: String = s"${reservedColumnNamePrefix}metadata"
 
   private[autocdc] val cdcDeleteSequenceFieldName: String = "deleteSequence"
   private[autocdc] val cdcUpsertSequenceFieldName: String = "upsertSequence"
@@ -251,7 +235,7 @@ object Scd1BatchProcessor {
   /**
    * Schema of the CDC metadata struct column for SCD1.
    */
-  private def cdcMetadataColSchema(sequencingType: DataType): StructType =
+  private[pipelines] def cdcMetadataColSchema(sequencingType: DataType): StructType =
     StructType(
       Seq(
         // The sequencing of the event if it represents a delete, null otherwise.
@@ -265,7 +249,7 @@ object Scd1BatchProcessor {
    * Construct the CDC metadata struct column for SCD1, following the exact schema and field
    * ordering defined by [[cdcMetadataColSchema]].
    */
-  private[autocdc] def constructCdcMetadataCol(
+  private[pipelines] def constructCdcMetadataCol(
       deleteSequence: Column,
       upsertSequence: Column,
       sequencingType: DataType): Column = {

diff --git a/...lines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala b/...lines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala
@@ -176,7 +176,7 @@ private class FlowResolver(rawGraph: DataflowGraph) {
           } else {
             f
           }
-          convertResolvedToTypedFlow(flowToResolve, maybeNewFuncResult)
+          resolveFlow(flowToResolve, maybeNewFuncResult)
 
         // If the flow failed due to an UnresolvedDatasetException, it means that one of the
         // flow's inputs wasn't available. After other flows are resolved, these inputs
@@ -199,9 +199,18 @@ private class FlowResolver(rawGraph: DataflowGraph) {
       }
   }
 
-  private def convertResolvedToTypedFlow(
+  private def resolveFlow(
       flow: UnresolvedFlow,
       funcResult: FlowFunctionResult): ResolvedFlow = {
+    flow match {
+      case acf: AutoCdcFlow => new AutoCdcMergeFlow(acf, funcResult)
+      case utf: UntypedFlow => transformUntypedFlowToResolvedFlow(utf, funcResult)
+    }
+  }
+
+  private def transformUntypedFlowToResolvedFlow(
+      flow: UntypedFlow,
+      funcResult: FlowFunctionResult): ResolvedFlow = {
     flow match {
       case _ if flow.once => new AppendOnceFlow(flow, funcResult)
       case _ if funcResult.dataFrame.get.isStreaming =>
@@ -210,7 +219,7 @@ private class FlowResolver(rawGraph: DataflowGraph) {
         // then get their results overwritten.
         val mustBeAppend = rawGraph.flowsTo(flow.destinationIdentifier).size > 1
         new StreamingFlow(flow, funcResult, mustBeAppend = mustBeAppend)
-      case _: UnresolvedFlow => new CompleteFlow(flow, funcResult)
+      case _ => new CompleteFlow(flow, funcResult)
     }
   }
 }