windows

comphead · comphead · commit 5b778ad91e1f · 2026-05-05T10:15:16.000-07:00
diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs
@@ -2464,13 +2464,13 @@ impl PhysicalPlanner {
                         .iter()
                         .map(|expr| self.create_expr(expr, Arc::clone(&input_schema)))
                         .collect::<Result<Vec<_>, ExecutionError>>()?;
-                    window_func = self.find_df_window_function(&window_func_name).ok_or_else(
-                        || {
-                            GeneralError(format!(
-                                "{window_func_name} not supported for window function"
-                            ))
-                        },
-                    )?;
+                    window_func =
+                        self.find_df_window_function(&window_func_name)
+                            .ok_or_else(|| {
+                                GeneralError(format!(
+                                    "{window_func_name} not supported for window function"
+                                ))
+                            })?;
                 }
                 other => {
                     return Err(GeneralError(format!(
@@ -2659,9 +2659,8 @@ impl PhysicalPlanner {
         // Resolve a window-capable function by name via the session registry, returning
         // a clean "X not supported for window function" error if missing.
         let by_name = |name: &str| -> Result<WindowFunctionDefinition, ExecutionError> {
-            self.find_df_window_function(name).ok_or_else(|| {
-                GeneralError(format!("{name} not supported for window function"))
-            })
+            self.find_df_window_function(name)
+                .ok_or_else(|| GeneralError(format!("{name} not supported for window function")))
         };
 
         match &agg_func.expr_struct {
diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala
@@ -21,15 +21,15 @@ package org.apache.spark.sql.comet
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeSet, CumeDist, CurrentRow, DenseRank, Expression, Lag, Lead, Literal, MakeDecimal, NamedExpression, NTile, PercentRank, RangeFrame, Rank, RowFrame, RowNumber, SortOrder, SpecifiedWindowFrame, UnboundedFollowing, UnboundedPreceding, WindowExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeSet, CumeDist, CurrentRow, DenseRank, Expression, Lag, Lead, Literal, MakeDecimal, NamedExpression, NthValue, NTile, PercentRank, RangeFrame, Rank, RowFrame, RowNumber, SortOrder, SpecifiedWindowFrame, UnboundedFollowing, UnboundedPreceding, WindowExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Average, Complete, Count, First, Last, Max, Min, Sum}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{LongType, NumericType}
 import org.apache.spark.sql.types.Decimal
-import org.apache.spark.sql.types.NumericType
 
 import com.google.common.base.Objects
 
@@ -195,8 +195,31 @@ object CometWindowExec extends CometOperatorSerde[WindowExec] {
         case nt: NTile =>
           val bucketsExpr = exprToProto(nt.buckets, output)
           (None, scalarFunctionExprToProto("ntile", bucketsExpr), false)
-        case _ =>
-          (None, exprToProto(windowExpr.windowFunction, output), false)
+        case nv: NthValue =>
+          val inputExpr = exprToProto(nv.input, output)
+          // DataFusion's nth_value (aggregate UDF path, picked first by
+          // find_df_window_function) requires the position argument to be a
+          // ScalarValue::Int64 literal. Spark's NthValue.offset is IntegerType,
+          // which would serialize as Int32 and trigger
+          // "nth_value not supported for n: <expr>" at plan time. Fold the
+          // (foldable) offset to a Long literal so the native side sees Int64.
+          val offsetExpr = nv.offset.eval() match {
+            case n: Number =>
+              exprToProto(Literal(n.longValue(), LongType), output)
+            case _ =>
+              withInfo(
+                windowExpr,
+                s"Unsupported NTH_VALUE offset: ${nv.offset} (${nv.offset.dataType})")
+              None
+          }
+          val func = scalarFunctionExprToProto("nth_value", inputExpr, offsetExpr)
+          (None, func, nv.ignoreNulls)
+        case other =>
+          withInfo(
+            windowExpr,
+            s"window function ${other.getClass.getSimpleName} is not supported",
+            other)
+          (None, None, false)
       }
     }
 
diff --git a/spark/src/test/scala/org/apache/comet/exec/CometWindowExecSuite.scala b/spark/src/test/scala/org/apache/comet/exec/CometWindowExecSuite.scala
@@ -788,7 +788,6 @@ class CometWindowExecSuite extends CometTestBase {
     }
   }
 
-  // TODO: NTH_VALUE returns incorrect results - produces 0 instead of null for first row,
   test("window: NTH_VALUE with position 2") {
     withTempDir { dir =>
       (0 until 30)

Original file line number	Diff line number	Diff line change
`@@ -788,7 +788,6 @@ class CometWindowExecSuite extends CometTestBase {`
`788`	`788`	`}`
`789`	`789`	`}`
`790`	`790`
`791`		`- // TODO: NTH_VALUE returns incorrect results - produces 0 instead of null for first row,`
`792`	`791`	`test("window: NTH_VALUE with position 2") {`
`793`	`792`	`withTempDir { dir =>`
`794`	`793`	`(0 until 30)`