apache
diff --git a/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 25 additions & 0 deletions b/‎common/utils/src/main/resources/error/error-conditions.json‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java‎
Lines changed: 9 additions & 0 deletions b/‎sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala‎
Lines changed: 4 additions & 4 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala‎
Lines changed: 151 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V2TableUtil.scala‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala‎
Lines changed: 32 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala‎
Lines changed: 34 additions & 6 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala‎
Lines changed: 34 additions & 6 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 13 additions & 0 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 13 additions & 0 deletions
@@ -2165,6 +2165,31 @@
     ],
     "sqlState" : "42000"
   },
+  "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS" : {
+    "message" : [
+      "Detected incompatible changes to table <tableName> after DataFrame/Dataset has been resolved and analyzed, meaning the underlying plan is out of sync. Please, re-create DataFrame/Dataset before attempting to execute the query again."
+    ],
+    "subClass" : {
+      "COLUMNS_MISMATCH" : {
+        "message" : [
+          "Data columns have changed:",
+          "<errors>"
+        ]
+      },
+      "METADATA_COLUMNS_MISMATCH" : {
+        "message" : [
+          "Metadata columns have changed:",
+          "<errors>"
+        ]
+      },
+      "TABLE_ID_MISMATCH" : {
+        "message" : [
+          "Table ID has changed from <capturedTableId> to <detectedTableId>."
+        ]
+      }
+    },
+    "sqlState" : "51024"
+  },
   "INCOMPATIBLE_VIEW_SCHEMA_CHANGE" : {
     "message" : [
       "The SQL query of view <viewName> has an incompatible schema change and column <colName> cannot be resolved. Expected <expectedNum> columns named <colName> but got <actualCols>.",
 
@@ -50,6 +50,15 @@ public interface Table {
    */
   String name();
 
+  /**
+   * An ID of the table that can be used to reliably check if two table objects refer to the same
+   * metastore entity. If a table is dropped and recreated again with the same name, the new table ID
+   * must be different. This method must return null if connectors don't support the notion of table ID.
+   */
+  default String id() {
+    return null;
+  }
+
   /**
    * Returns the schema of this table. If the table is not readable and doesn't have a schema, an
    * empty schema can be returned here.
 
@@ -203,10 +203,10 @@ class RelationResolution(override val catalogManager: CatalogManager)
             )
           )
         } else {
-          SubqueryAlias(
-            catalog.name +: ident.asMultipartIdentifier,
-            DataSourceV2Relation.create(table, Some(catalog), Some(ident), options, timeTravelSpec)
-          )
+          val relation = DataSourceV2Relation.create(
+            table, Some(catalog), Some(ident), options, timeTravelSpec)
+          relation.setLoadTimeNanos(System.nanoTime())
+          SubqueryAlias(catalog.name +: ident.asMultipartIdentifier, relation)
         }
     }
   }
 
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util.Locale
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.util.quoteIfNeeded
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.util.ArrayImplicits._
+
+private[sql] object V2TableUtil extends SQLConfHelper {
+
+  def toQualifiedName(catalog: CatalogPlugin, ident: Identifier): String = {
+    s"${quoteIfNeeded(catalog.name)}.${ident.quoted}"
+  }
+
+  /**
+   * Validates that captured data columns match the current table schema.
+   *
+   * @param table the current table
+   * @param relation the relation with captured columns
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateCapturedColumns(table: Table, relation: DataSourceV2Relation): Seq[String] = {
+    validateCapturedColumns(table, relation.table.columns.toImmutableArraySeq)
+  }
+
+  /**
+   * Validates that captured data columns match the current table schema.
+   *
+   * Checks for:
+   *  - Column type or nullability changes
+   *  - Removed columns (missing from the current table schema)
+   *  - Added columns (new in the current table schema)
+   *
+   * @param table the current table
+   * @param originCols the originally captured columns
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateCapturedColumns(table: Table, originCols: Seq[Column]): Seq[String] = {
+    val errors = mutable.ArrayBuffer[String]()
+
+    val colsByNormalizedName = indexByNormalizedName(table.columns.toImmutableArraySeq)
+    val originColsByNormalizedName = indexByNormalizedName(originCols)
+
+    originColsByNormalizedName.foreach { case (normalizedName, originCol) =>
+      colsByNormalizedName.get(normalizedName) match {
+        case Some(col) =>
+          if (originCol.dataType != col.dataType || originCol.nullable != col.nullable) {
+            val oldType = formatType(originCol.dataType, originCol.nullable)
+            val newType = formatType(col.dataType, col.nullable)
+            errors += s"`${originCol.name}` type has changed from $oldType to $newType"
+          }
+        case None =>
+          errors += s"${formatColumn(originCol)} has been removed"
+      }
+    }
+
+    colsByNormalizedName.foreach { case (normalizedName, col) =>
+      if (!originColsByNormalizedName.contains(normalizedName)) {
+        errors += s"${formatColumn(col)} has been added"
+      }
+    }
+
+    errors.toSeq
+  }
+
+  /**
+   * Validates that captured metadata columns match the current table schema.
+   *
+   * Checks for:
+   *  - Metadata column type or nullability changes
+   *  - Removed metadata columns (missing from current table)
+   *
+   * @param table the current table
+   * @param metaAttrs the originally captured metadata column attributes
+   * @return validation errors, or empty sequence if valid
+   */
+  def validateCapturedMetadataColumns(
+      table: Table,
+      metaAttrs: Seq[AttributeReference]): Seq[String] = {
+    val errors = mutable.ArrayBuffer[String]()
+    val metaColsByNormalizedName = metadataColumnsByNormalizedName(table)
+
+    metaAttrs.foreach { metaAttr =>
+      val normalizedName = normalize(metaAttr.name)
+      metaColsByNormalizedName.get(normalizedName) match {
+        case Some(metaCol) =>
+          if (metaAttr.dataType != metaCol.dataType || metaAttr.nullable != metaCol.isNullable) {
+            val oldType = formatType(metaAttr.dataType, metaAttr.nullable)
+            val newType = formatType(metaCol.dataType, metaCol.isNullable)
+            errors += s"`${metaAttr.name}` type has changed from $oldType to $newType"
+          }
+        case None =>
+          errors += s"${formatAttr(metaAttr)} has been removed"
+      }
+    }
+
+    errors.toSeq
+  }
+
+  private def metadataColumnsByNormalizedName(table: Table): Map[String, MetadataColumn] = {
+    table match {
+      case hasMeta: SupportsMetadataColumns =>
+        hasMeta.metadataColumns.map(col => normalize(col.name) -> col).toMap
+      case _ =>
+        Map.empty
+    }
+  }
+
+  private def formatColumn(col: Column): String = {
+    s"`${col.name}` ${formatType(col.dataType, col.nullable)}"
+  }
+
+  private def formatAttr(attr: AttributeReference): String = {
+    s"`${attr.name}` ${formatType(attr.dataType, attr.nullable)}"
+  }
+
+  private def formatType(dataType: DataType, nullable: Boolean): String = {
+    if (nullable) dataType.sql else s"${dataType.sql} NOT NULL"
+  }
+
+  private def indexByNormalizedName(cols: Seq[Column]): Map[String, Column] = {
+    cols.map(col => normalize(col.name) -> col).toMap
+  }
+
+  private def normalize(name: String): String = {
+    if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
+  }
+}
@@ -2113,6 +2113,38 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     }
   }
 
+  def tableIdChangedAfterAnalysis(
+      tableName: String,
+      capturedTableId: String,
+      detectedTableId: String): Throwable = {
+    new AnalysisException(
+      errorClass = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.TABLE_ID_MISMATCH",
+      messageParameters = Map(
+        "tableName" -> toSQLId(tableName),
+        "capturedTableId" -> capturedTableId,
+        "detectedTableId" -> detectedTableId))
+  }
+
+  def columnsChangedAfterAnalysis(
+      tableName: String,
+      errors: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.COLUMNS_MISMATCH",
+      messageParameters = Map(
+        "tableName" -> toSQLId(tableName),
+        "errors" -> errors.mkString("\n- ", "\n- ", "")))
+  }
+
+  def metadataColumnsChangedAfterAnalysis(
+      tableName: String,
+      errors: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "INCOMPATIBLE_TABLE_CHANGE_AFTER_ANALYSIS.METADATA_COLUMNS_MISMATCH",
+      messageParameters = Map(
+        "tableName" -> toSQLId(tableName),
+        "errors" -> errors.mkString("\n- ", "\n- ", "")))
+  }
+
   def numberOfPartitionsNotAllowedWithUnspecifiedDistributionError(): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_WRITE_DISTRIBUTION.PARTITION_NUM_WITH_UNSPECIFIED_DISTRIBUTION",
 
@@ -22,9 +22,11 @@ import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelat
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Expression, SortOrder}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, ExposesMetadataColumns, Histogram, HistogramBin, LeafNode, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
-import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, truncatedString, CharVarcharUtils}
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, SupportsMetadataColumns, Table, TableCapability}
+import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, SupportsMetadataColumns, Table, TableCapability, TableCatalog, V2TableUtil}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
 import org.apache.spark.sql.connector.read.{Scan, Statistics => V2Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -57,9 +59,8 @@ abstract class DataSourceV2RelationBase(
   }
 
   override def name: String = {
-    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     (catalog, identifier) match {
-      case (Some(cat), Some(ident)) => s"${quoteIfNeeded(cat.name())}.${ident.quoted}"
+      case (Some(cat), Some(ident)) => V2TableUtil.toQualifiedName(cat, ident)
       case _ => table.name()
     }
   }
@@ -133,6 +134,27 @@ case class DataSourceV2Relation(
 
   def autoSchemaEvolution(): Boolean =
     table.capabilities().contains(TableCapability.AUTOMATIC_SCHEMA_EVOLUTION)
+
+  /**
+   * Sets the load time (in nanoseconds) for the table in this relation.
+   * This is used to track when the table metadata was loaded from the catalog,
+   * allowing refresh logic to determine if the table information is stale.
+   *
+   * @param nanos the load time in nanoseconds (typically from System.nanoTime())
+   */
+  def setLoadTimeNanos(nanos: Long): Unit = {
+    setTagValue(DataSourceV2Relation.TABLE_LOAD_TIME_TAG, nanos)
+  }
+
+  /**
+   * Returns the load time (in nanoseconds) for the table in this relation, if available.
+   * Returns None if the load time has not been set.
+   *
+   * @return load time if available, None otherwise
+   */
+  def loadTimeNanos: Option[Long] = {
+    getTagValue(DataSourceV2Relation.TABLE_LOAD_TIME_TAG)
+  }
 }
 
 /**
@@ -259,17 +281,23 @@ object ExtractV2Table {
 }
 
 object ExtractV2CatalogAndIdentifier {
-  def unapply(relation: DataSourceV2Relation): Option[(CatalogPlugin, Identifier)] = {
+  def unapply(relation: DataSourceV2Relation): Option[(TableCatalog, Identifier)] = {
     relation match {
       case DataSourceV2Relation(_, _, Some(catalog), Some(identifier), _, _) =>
-        Some((catalog, identifier))
+        Some((catalog.asTableCatalog, identifier))
       case _ =>
         None
     }
   }
 }
 
 object DataSourceV2Relation {
+  /**
+   * Tag for tracking when the table metadata was loaded from the catalog.
+   * Used by version refresh logic to determine if table information is stale.
+   */
+  private[sql] val TABLE_LOAD_TIME_TAG = TreeNodeTag[Long]("table_load_time")
+
   def create(
       table: Table,
       catalog: Option[CatalogPlugin],
 
@@ -2060,6 +2060,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val TABLE_METADATA_MAX_AGE =
+    buildConf("spark.sql.analyzer.tableMetadataMaxAge")
+      .doc("Maximum age in milliseconds for analyzed table metadata. Table metadata is " +
+        "considered valid for this duration after being loaded during analysis. If execution " +
+        "is delayed beyond this threshold, metadata will be refreshed to ensure consistency " +
+        "and detect schema changes. Note this config is only supported for versioned tables " +
+        "that expose their versions to Spark and is only checked during the first execution.")
+      .version("4.1.0")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefault(100L)
+
   val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets")
     .doc("The maximum number of buckets allowed.")
     .version("2.4.0")
@@ -7137,6 +7148,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def bucketingMaxBuckets: Int = getConf(SQLConf.BUCKETING_MAX_BUCKETS)
 
+  def tableMetadataMaxAge: Long = getConf(SQLConf.TABLE_METADATA_MAX_AGE)
+
   def autoBucketedScanEnabled: Boolean = getConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED)
 
   def v2BucketingEnabled: Boolean = getConf(SQLConf.V2_BUCKETING_ENABLED)
Original file line number	Diff line number	Diff line change
`@@ -203,10 +203,10 @@ class RelationResolution(override val catalogManager: CatalogManager)`
`203`	`203`	`)`
`204`	`204`	`)`
`205`	`205`	`} else {`
`206`		`- SubqueryAlias(`
`207`		`- catalog.name +: ident.asMultipartIdentifier,`
`208`		`- DataSourceV2Relation.create(table, Some(catalog), Some(ident), options, timeTravelSpec)`
`209`		`- )`
	`206`	`+ val relation = DataSourceV2Relation.create(`
	`207`	`+ table, Some(catalog), Some(ident), options, timeTravelSpec)`
	`208`	`+ relation.setLoadTimeNanos(System.nanoTime())`
	`209`	`+ SubqueryAlias(catalog.name +: ident.asMultipartIdentifier, relation)`
`210`	`210`	`}`
`211`	`211`	`}`
`212`	`212`	`}`