fix

viirya · viirya · commit 3ef18f43320e · 2025-12-08T11:18:03.000-08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2216,7 +2216,7 @@ class Dataset[T] private[sql](
    */
   @varargs
   def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T] = {
-    observation.on(this, expr, exprs: _*)
+    observation.on(this, id, expr, exprs: _*)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala b/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters
 import scala.concurrent.{Future, Promise}
 import scala.concurrent.duration.Duration
 
+import org.apache.spark.sql.catalyst.plans.logical.CollectMetrics
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.util.SparkThreadUtils
@@ -61,6 +62,8 @@ class Observation(val name: String) {
 
   @volatile private var sparkSession: Option[SparkSession] = None
 
+  @volatile private var dataframeId: Option[Long] = None
+
   private val promise = Promise[Row]()
 
   /**
@@ -78,11 +81,11 @@ class Observation(val name: String) {
    * @return observed dataset
    * @throws IllegalArgumentException If this is a streaming Dataset (ds.isStreaming == true)
    */
-  private[spark] def on[T](ds: Dataset[T], expr: Column, exprs: Column*): Dataset[T] = {
+  private[spark] def on[T](ds: Dataset[T], dataframeId: Long, expr: Column, exprs: Column*): Dataset[T] = {
     if (ds.isStreaming) {
       throw new IllegalArgumentException("Observation does not support streaming Datasets")
     }
-    register(ds.sparkSession)
+    register(ds.sparkSession, dataframeId)
     ds.observe(name, expr, exprs: _*)
   }
 
@@ -91,20 +94,31 @@ class Observation(val name: String) {
    * its first action. Only the result of the first action is available. Subsequent actions do not
    * modify the result.
    *
+   *
+   * Note that if no metrics were recorded, an empty map is probably returned. It possibly happens
+   * when the operators used for observation are optimized away.
+   *
    * @return the observed metrics as a `Map[String, Any]`
    * @throws InterruptedException interrupted while waiting
    */
   @throws[InterruptedException]
   def get: Map[String, _] = {
-    val row = getRow
-    row.getValuesMap(row.schema.map(_.name))
+    val row = getRow]
+    if (row == null || row.schema == null) {
+      Map.empty
+    } else {
+      row.getValuesMap(row.schema.map(_.name))
+    }
   }
 
   /**
    * (Java-specific) Get the observed metrics. This waits for the observed dataset to finish
    * its first action. Only the result of the first action is available. Subsequent actions do not
    * modify the result.
    *
+   * Note that if no metrics were recorded, an empty map is probably returned. It possibly happens
+   * when the operators used for observation are optimized away.
+   *
    * @return the observed metrics as a `java.util.Map[String, Object]`
    * @throws InterruptedException interrupted while waiting
    */
@@ -115,7 +129,7 @@ class Observation(val name: String) {
     )
   }
 
-  private def register(sparkSession: SparkSession): Unit = {
+  private def register(sparkSession: SparkSession, dataframeId: Long): Unit = {
     // makes this class thread-safe:
     // only the first thread entering this block can set sparkSession
     // all other threads will see the exception, as it is only allowed to do this once
@@ -124,6 +138,7 @@ class Observation(val name: String) {
         throw new IllegalArgumentException("An Observation can be used with a Dataset only once")
       }
       this.sparkSession = Some(sparkSession)
+      this.dataframeId = Some(dataframeId)
     }
 
     sparkSession.listenerManager.register(this.listener)
@@ -134,9 +149,24 @@ class Observation(val name: String) {
   }
 
   private[spark] def onFinish(qe: QueryExecution): Unit = {
-    qe.observedMetrics.get(name).foreach { metrics =>
-      promise.trySuccess(metrics)
-      unregister()
+    if (qe.logical.exists {
+      case CollectMetrics(name, _, _, dataframeId) =>
+        name == this.name && dataframeId == this.dataframeId.get
+      case _ => false
+    }) {
+      val metrics = qe.observedMetrics.get(name)
+      if (metrics.isEmpty) {
+        // If the key exists but no metrics were collected, it means for some reason the metrics
+        // could not be collected. This can happen e.g., if the CollectMetricsExec was optimized
+        // away.
+        promise.trySuccess(Row.empty)
+        unregister()
+      } else {
+        metrics.foreach { metrics =>
+          promise.trySuccess(metrics)
+          unregister()
+        }
+      }
     }
   }
 

Original file line number	Diff line number	Diff line change
`@@ -2216,7 +2216,7 @@ class Dataset[T] private[sql](`
`2216`	`2216`	`*/`
`2217`	`2217`	`@varargs`
`2218`	`2218`	`def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T] = {`
`2219`		`- observation.on(this, expr, exprs: _*)`
	`2219`	`+ observation.on(this, id, expr, exprs: _*)`
`2220`	`2220`	`}`
`2221`	`2221`
`2222`	`2222`	`/**`