/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.comet.rules

import scala.jdk.CollectionConverters._

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.expressions.{Divide, DoubleLiteral, EqualNullSafe, EqualTo, Expression, FloatLiteral, GreaterThan, GreaterThanOrEqual, KnownFloatingPointNormalized, LessThan, LessThanOrEqual, NamedExpression, Remainder}
import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, RangePartitioning, RoundRobinPartitioning, SinglePartition}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.util.sideBySide
import org.apache.spark.sql.comet._
import org.apache.spark.sql.comet.execution.shuffle.{CometColumnarShuffle, CometNativeShuffle, CometShuffleExchangeExec, CometShuffleManager}
import org.apache.spark.sql.execution._
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AQEShuffleReadExec, BroadcastQueryStageExec, ShuffleQueryStageExec}
import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec}
import org.apache.spark.sql.execution.command.ExecutedCommandExec
import org.apache.spark.sql.execution.datasources.v2.V2CommandExec
import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec}
import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, ShuffledHashJoinExec, SortMergeJoinExec}
import org.apache.spark.sql.execution.window.WindowExec
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._

import org.apache.comet.{CometConf, ExtendedExplainInfo}
import org.apache.comet.CometConf.COMET_EXEC_SHUFFLE_ENABLED
import org.apache.comet.CometSparkSessionExtensions._
import org.apache.comet.rules.CometExecRule.allExecs
import org.apache.comet.serde.{CometOperatorSerde, Compatible, Incompatible, OperatorOuterClass, QueryPlanSerde, Unsupported}
import org.apache.comet.serde.OperatorOuterClass.Operator
import org.apache.comet.serde.QueryPlanSerde.{serializeDataType, supportedDataType}
import org.apache.comet.serde.operator._

object CometExecRule {

  /**
   * Fully native operators.
   */
  val nativeExecs: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]] =
    Map(
      classOf[ProjectExec] -> CometProjectExec,
      classOf[FilterExec] -> CometFilterExec,
      classOf[LocalLimitExec] -> CometLocalLimitExec,
      classOf[GlobalLimitExec] -> CometGlobalLimitExec,
      classOf[ExpandExec] -> CometExpandExec,
      classOf[HashAggregateExec] -> CometHashAggregateExec,
      classOf[ObjectHashAggregateExec] -> CometObjectHashAggregateExec,
      classOf[BroadcastHashJoinExec] -> CometBroadcastHashJoinExec,
      classOf[ShuffledHashJoinExec] -> CometHashJoinExec,
      classOf[SortMergeJoinExec] -> CometSortMergeJoinExec,
      classOf[SortExec] -> CometSortExec,
      classOf[LocalTableScanExec] -> CometLocalTableScanExec,
      classOf[WindowExec] -> CometWindowExec)

  /**
   * Sinks that have a native plan of ScanExec.
   */
  val sinks: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]] =
    Map(
      classOf[CoalesceExec] -> CometCoalesceExec,
      classOf[CollectLimitExec] -> CometCollectLimitExec,
      classOf[TakeOrderedAndProjectExec] -> CometTakeOrderedAndProjectExec,
      classOf[UnionExec] -> CometUnionExec)

  val allExecs: Map[Class[_ <: SparkPlan], CometOperatorSerde[_]] = nativeExecs ++ sinks

}

/**
 * Spark physical optimizer rule for replacing Spark operators with Comet operators.
 */
case class CometExecRule(session: SparkSession) extends Rule[SparkPlan] {

  private lazy val showTransformations = CometConf.COMET_EXPLAIN_TRANSFORMATIONS.get()

  private def applyCometShuffle(plan: SparkPlan): SparkPlan = {
    plan.transformUp {
      case s: ShuffleExchangeExec if nativeShuffleSupported(s) =>
        // Switch to use Decimal128 regardless of precision, since Arrow native execution
        // doesn't support Decimal32 and Decimal64 yet.
        conf.setConfString(CometConf.COMET_USE_DECIMAL_128.key, "true")
        CometShuffleExchangeExec(s, shuffleType = CometNativeShuffle)

      case s: ShuffleExchangeExec if columnarShuffleSupported(s) =>
        // Columnar shuffle for regular Spark operators (not Comet) and Comet operators
        // (if configured)
        CometShuffleExchangeExec(s, shuffleType = CometColumnarShuffle)
    }
  }

  private def isCometPlan(op: SparkPlan): Boolean = op.isInstanceOf[CometPlan]

  private def isCometNative(op: SparkPlan): Boolean = op.isInstanceOf[CometNativeExec]

  // spotless:off

  /**
   * Tries to transform a Spark physical plan into a Comet plan.
   *
   * This rule traverses bottom-up from the original Spark plan and for each plan node, there
   * are a few cases to consider:
   *
   * 1. The child(ren) of the current node `p` cannot be converted to native
   *   In this case, we'll simply return the original Spark plan, since Comet native
   *   execution cannot start from an arbitrary Spark operator (unless it is special node
   *   such as scan or sink such as shuffle exchange, union etc., which are wrapped by
   *   `CometScanWrapper` and `CometSinkPlaceHolder` respectively).
   *
   * 2. The child(ren) of the current node `p` can be converted to native
   *   There are two sub-cases for this scenario: 1) This node `p` can also be converted to
   *   native. In this case, we'll create a new native Comet operator for `p` and connect it with
   *   its previously converted child(ren); 2) This node `p` cannot be converted to native. In
   *   this case, similar to 1) above, we simply return `p` as it is. Its child(ren) would still
   *   be native Comet operators.
   *
   * After this rule finishes, we'll do another pass on the final plan to convert all adjacent
   * Comet native operators into a single native execution block. Please see where
   * `convertBlock` is called below.
   *
   * Here are a few examples:
   *
   *     Scan                       ======>             CometScan
   *      |                                                |
   *     Filter                                         CometFilter
   *      |                                                |
   *     HashAggregate                                  CometHashAggregate
   *      |                                                |
   *     Exchange                                       CometExchange
   *      |                                                |
   *     HashAggregate                                  CometHashAggregate
   *      |                                                |
   *     UnsupportedOperator                            UnsupportedOperator
   *
   * Native execution doesn't necessarily have to start from `CometScan`:
   *
   *     Scan                       =======>            CometScan
   *      |                                                |
   *     UnsupportedOperator                            UnsupportedOperator
   *      |                                                |
   *     HashAggregate                                  HashAggregate
   *      |                                                |
   *     Exchange                                       CometExchange
   *      |                                                |
   *     HashAggregate                                  CometHashAggregate
   *      |                                                |
   *     UnsupportedOperator                            UnsupportedOperator
   *
   * A sink can also be Comet operators other than `CometExchange`, for instance `CometUnion`:
   *
   *     Scan   Scan                =======>          CometScan CometScan
   *      |      |                                       |         |
   *     Filter Filter                                CometFilter CometFilter
   *      |      |                                       |         |
   *        Union                                         CometUnion
   *          |                                               |
   *        Project                                       CometProject
   */
  // spotless:on
  private def transform(plan: SparkPlan): SparkPlan = {
    def operator2ProtoIfAllChildrenAreNative(op: SparkPlan): Option[Operator] = {
      if (op.children.forall(_.isInstanceOf[CometNativeExec])) {
        operator2Proto(op, op.children.map(_.asInstanceOf[CometNativeExec].nativeOp): _*)
      } else {
        None
      }
    }

    /**
     * Convert operator to proto and then apply a transformation to wrap the proto in a new plan.
     */
    def newPlanWithProto(op: SparkPlan, fun: Operator => SparkPlan): SparkPlan = {
      operator2ProtoIfAllChildrenAreNative(op).map(fun).getOrElse(op)
    }

    def convertNode(op: SparkPlan): SparkPlan = op match {
      // Fully native scan for V1
      case scan: CometScanExec if scan.scanImpl == CometConf.SCAN_NATIVE_DATAFUSION =>
        val nativeOp = operator2Proto(scan).get
        CometNativeScan.createExec(nativeOp, scan)

      // Fully native Iceberg scan for V2 (iceberg-rust path)
      // Only handle scans with native metadata; SupportsComet scans fall through to isCometScan
      // Config checks (COMET_ICEBERG_NATIVE_ENABLED, COMET_EXEC_ENABLED) are done in CometScanRule
      case scan: CometBatchScanExec if scan.nativeIcebergScanMetadata.isDefined =>
        operator2Proto(scan) match {
          case Some(nativeOp) =>
            CometIcebergNativeScan.createExec(nativeOp, scan)
          case None =>
            // Serialization failed, fall back to CometBatchScanExec
            scan
        }

      // Comet JVM + native scan for V1 and V2
      case op if isCometScan(op) =>
        val nativeOp = operator2Proto(op)
        CometScanWrapper(nativeOp.get, op)

      case op if shouldApplySparkToColumnar(conf, op) =>
        val cometOp = CometSparkToColumnarExec(op)
        val nativeOp = operator2Proto(cometOp)
        CometScanWrapper(nativeOp.get, cometOp)

      // For AQE broadcast stage on a Comet broadcast exchange
      case s @ BroadcastQueryStageExec(_, _: CometBroadcastExchangeExec, _) =>
        newPlanWithProto(s, CometSinkPlaceHolder(_, s, s))

      case s @ BroadcastQueryStageExec(
            _,
            ReusedExchangeExec(_, _: CometBroadcastExchangeExec),
            _) =>
        newPlanWithProto(s, CometSinkPlaceHolder(_, s, s))

      // `CometBroadcastExchangeExec`'s broadcast output is not compatible with Spark's broadcast
      // exchange. It is only used for Comet native execution. We only transform Spark broadcast
      // exchange to Comet broadcast exchange if its downstream is a Comet native plan or if the
      // broadcast exchange is forced to be enabled by Comet config.
      case plan if plan.children.exists(_.isInstanceOf[BroadcastExchangeExec]) =>
        val newChildren = plan.children.map {
          case b: BroadcastExchangeExec
              if isCometNative(b.child) &&
                CometConf.COMET_EXEC_BROADCAST_EXCHANGE_ENABLED.get(conf) =>
            operator2Proto(b) match {
              case Some(nativeOp) =>
                val cometOp = CometBroadcastExchangeExec(b, b.output, b.mode, b.child)
                CometSinkPlaceHolder(nativeOp, b, cometOp)
              case None => b
            }
          case other => other
        }
        if (!newChildren.exists(_.isInstanceOf[BroadcastExchangeExec])) {
          val newPlan = convertNode(plan.withNewChildren(newChildren))
          if (isCometNative(newPlan) || isCometBroadCastForceEnabled(conf)) {
            newPlan
          } else {
            if (isCometNative(newPlan)) {
              val reason =
                getCometBroadcastNotEnabledReason(conf).getOrElse("no reason available")
              withInfo(plan, s"Broadcast is not enabled: $reason")
            }
            plan
          }
        } else {
          plan
        }

      // For AQE shuffle stage on a Comet shuffle exchange
      case s @ ShuffleQueryStageExec(_, _: CometShuffleExchangeExec, _) =>
        newPlanWithProto(s, CometSinkPlaceHolder(_, s, s))

      // For AQE shuffle stage on a reused Comet shuffle exchange
      // Note that we don't need to handle `ReusedExchangeExec` for non-AQE case, because
      // the query plan won't be re-optimized/planned in non-AQE mode.
      case s @ ShuffleQueryStageExec(_, ReusedExchangeExec(_, _: CometShuffleExchangeExec), _) =>
        newPlanWithProto(s, CometSinkPlaceHolder(_, s, s))

      // Native shuffle for Comet operators
      case s: ShuffleExchangeExec =>
        val nativeShuffle: Option[SparkPlan] =
          if (nativeShuffleSupported(s)) {
            val newOp = operator2ProtoIfAllChildrenAreNative(s)
            newOp match {
              case Some(nativeOp) =>
                // Switch to use Decimal128 regardless of precision, since Arrow native execution
                // doesn't support Decimal32 and Decimal64 yet.
                conf.setConfString(CometConf.COMET_USE_DECIMAL_128.key, "true")
                val cometOp = CometShuffleExchangeExec(s, shuffleType = CometNativeShuffle)
                Some(CometSinkPlaceHolder(nativeOp, s, cometOp))
              case None =>
                None
            }
          } else {
            None
          }

        val nativeOrColumnarShuffle = if (nativeShuffle.isDefined) {
          nativeShuffle
        } else {
          // Columnar shuffle for regular Spark operators (not Comet) and Comet operators
          // (if configured).
          // If the child of ShuffleExchangeExec is also a ShuffleExchangeExec, we should not
          // convert it to CometColumnarShuffle,
          if (columnarShuffleSupported(s)) {
            val newOp = operator2Proto(s)
            newOp match {
              case Some(nativeOp) =>
                s.child match {
                  case n if n.isInstanceOf[CometNativeExec] || !n.supportsColumnar =>
                    val cometOp =
                      CometShuffleExchangeExec(s, shuffleType = CometColumnarShuffle)
                    Some(CometSinkPlaceHolder(nativeOp, s, cometOp))
                  case _ =>
                    None
                }
              case None =>
                None
            }
          } else {
            None
          }
        }

        if (nativeOrColumnarShuffle.isDefined) {
          nativeOrColumnarShuffle.get
        } else {
          s
        }

      case op =>
        allExecs
          .get(op.getClass)
          .map(_.asInstanceOf[CometOperatorSerde[SparkPlan]]) match {
          case Some(handler) =>
            if (op.children.forall(isCometNative)) {
              if (isOperatorEnabled(handler, op)) {
                val builder = OperatorOuterClass.Operator.newBuilder().setPlanId(op.id)
                val childOp = op.children.map(_.asInstanceOf[CometNativeExec].nativeOp)
                childOp.foreach(builder.addChildren)
                return handler
                  .convert(op, builder, childOp: _*)
                  .map(handler.createExec(_, op))
                  .getOrElse(op)
              }
            } else {
              return op
            }
          case _ =>
        }

        op match {
          case _: CometPlan | _: AQEShuffleReadExec | _: BroadcastExchangeExec |
              _: BroadcastQueryStageExec | _: AdaptiveSparkPlanExec =>
            // Some execs should never be replaced. We include
            // these cases specially here so we do not add a misleading 'info' message
            op
          case _: ExecutedCommandExec | _: V2CommandExec =>
            // Some execs that comet will not accelerate, such as command execs.
            op
          case _ =>
            if (!hasExplainInfo(op)) {
              // An operator that is not supported by Comet
              withInfo(op, s"${op.nodeName} is not supported")
            } else {
              // Already has fallback reason, do not override it
              op
            }
        }
    }

    plan.transformUp { case op =>
      convertNode(op)
    }
  }

  private def normalizePlan(plan: SparkPlan): SparkPlan = {
    plan.transformUp {
      case p: ProjectExec =>
        val newProjectList = p.projectList.map(normalize(_).asInstanceOf[NamedExpression])
        ProjectExec(newProjectList, p.child)
      case f: FilterExec =>
        val newCondition = normalize(f.condition)
        FilterExec(newCondition, f.child)
    }
  }

  // Spark will normalize NaN and zero for floating point numbers for several cases.
  // See `NormalizeFloatingNumbers` optimization rule in Spark.
  // However, one exception is for comparison operators. Spark does not normalize NaN and zero
  // because they are handled well in Spark (e.g., `SQLOrderingUtil.compareFloats`). But the
  // comparison functions in arrow-rs do not normalize NaN and zero. So we need to normalize NaN
  // and zero for comparison operators in Comet.
  private def normalize(expr: Expression): Expression = {
    expr.transformUp {
      case EqualTo(left, right) =>
        EqualTo(normalizeNaNAndZero(left), normalizeNaNAndZero(right))
      case EqualNullSafe(left, right) =>
        EqualNullSafe(normalizeNaNAndZero(left), normalizeNaNAndZero(right))
      case GreaterThan(left, right) =>
        GreaterThan(normalizeNaNAndZero(left), normalizeNaNAndZero(right))
      case GreaterThanOrEqual(left, right) =>
        GreaterThanOrEqual(normalizeNaNAndZero(left), normalizeNaNAndZero(right))
      case LessThan(left, right) =>
        LessThan(normalizeNaNAndZero(left), normalizeNaNAndZero(right))
      case LessThanOrEqual(left, right) =>
        LessThanOrEqual(normalizeNaNAndZero(left), normalizeNaNAndZero(right))
      case Divide(left, right, evalMode) =>
        Divide(left, normalizeNaNAndZero(right), evalMode)
      case Remainder(left, right, evalMode) =>
        Remainder(left, normalizeNaNAndZero(right), evalMode)
    }
  }

  private def normalizeNaNAndZero(expr: Expression): Expression = {
    expr match {
      case _: KnownFloatingPointNormalized => expr
      case FloatLiteral(f) if !f.equals(-0.0f) => expr
      case DoubleLiteral(d) if !d.equals(-0.0d) => expr
      case _ =>
        expr.dataType match {
          case _: FloatType | _: DoubleType =>
            KnownFloatingPointNormalized(NormalizeNaNAndZero(expr))
          case _ => expr
        }
    }
  }

  override def apply(plan: SparkPlan): SparkPlan = {
    val newPlan = _apply(plan)
    if (showTransformations && !newPlan.fastEquals(plan)) {
      logInfo(s"""
           |=== Applying Rule $ruleName ===
           |${sideBySide(plan.treeString, newPlan.treeString).mkString("\n")}
           |""".stripMargin)
    }
    newPlan
  }

  private def _apply(plan: SparkPlan): SparkPlan = {
    // We shouldn't transform Spark query plan if Comet is not loaded.
    if (!isCometLoaded(conf)) return plan

    if (!isCometExecEnabled(conf)) {
      // Comet exec is disabled, but for Spark shuffle, we still can use Comet columnar shuffle
      if (isCometShuffleEnabled(conf)) {
        applyCometShuffle(plan)
      } else {
        plan
      }
    } else {
      val normalizedPlan = normalizePlan(plan)

      val planWithJoinRewritten = if (CometConf.COMET_REPLACE_SMJ.get()) {
        normalizedPlan.transformUp { case p =>
          RewriteJoin.rewrite(p)
        }
      } else {
        normalizedPlan
      }

      var newPlan = transform(planWithJoinRewritten)

      // if the plan cannot be run fully natively then explain why (when appropriate
      // config is enabled)
      if (CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.get()) {
        val info = new ExtendedExplainInfo()
        if (info.extensionInfo(newPlan).nonEmpty) {
          logWarning(
            "Comet cannot execute some parts of this plan natively " +
              s"(set ${CometConf.COMET_EXPLAIN_FALLBACK_ENABLED.key}=false " +
              "to disable this logging):\n" +
              s"${info.generateExtendedInfo(newPlan)}")
        }
      }

      // Remove placeholders
      newPlan = newPlan.transform {
        case CometSinkPlaceHolder(_, _, s) => s
        case CometScanWrapper(_, s) => s
      }

      // Set up logical links
      newPlan = newPlan.transform {
        case op: CometExec =>
          if (op.originalPlan.logicalLink.isEmpty) {
            op.unsetTagValue(SparkPlan.LOGICAL_PLAN_TAG)
            op.unsetTagValue(SparkPlan.LOGICAL_PLAN_INHERITED_TAG)
          } else {
            op.originalPlan.logicalLink.foreach(op.setLogicalLink)
          }
          op
        case op: CometShuffleExchangeExec =>
          // Original Spark shuffle exchange operator might have empty logical link.
          // But the `setLogicalLink` call above on downstream operator of
          // `CometShuffleExchangeExec` will set its logical link to the downstream
          // operators which cause AQE behavior to be incorrect. So we need to unset
          // the logical link here.
          if (op.originalPlan.logicalLink.isEmpty) {
            op.unsetTagValue(SparkPlan.LOGICAL_PLAN_TAG)
            op.unsetTagValue(SparkPlan.LOGICAL_PLAN_INHERITED_TAG)
          } else {
            op.originalPlan.logicalLink.foreach(op.setLogicalLink)
          }
          op

        case op: CometBroadcastExchangeExec =>
          if (op.originalPlan.logicalLink.isEmpty) {
            op.unsetTagValue(SparkPlan.LOGICAL_PLAN_TAG)
            op.unsetTagValue(SparkPlan.LOGICAL_PLAN_INHERITED_TAG)
          } else {
            op.originalPlan.logicalLink.foreach(op.setLogicalLink)
          }
          op
      }

      // Convert native execution block by linking consecutive native operators.
      var firstNativeOp = true
      newPlan.transformDown {
        case op: CometNativeExec =>
          val newPlan = if (firstNativeOp) {
            firstNativeOp = false
            op.convertBlock()
          } else {
            op
          }

          // If reaching leaf node, reset `firstNativeOp` to true
          // because it will start a new block in next iteration.
          if (op.children.isEmpty) {
            firstNativeOp = true
          }

          newPlan
        case op =>
          firstNativeOp = true
          op
      }
    }
  }

  /**
   * Returns true if a given spark plan is Comet shuffle operator.
   */
  private def isShuffleOperator(op: SparkPlan): Boolean = {
    op match {
      case op: ShuffleQueryStageExec if op.plan.isInstanceOf[CometShuffleExchangeExec] => true
      case _: CometShuffleExchangeExec => true
      case op: CometSinkPlaceHolder => isShuffleOperator(op.child)
      case _ => false
    }
  }

  def isCometShuffleEnabledWithInfo(op: SparkPlan): Boolean = {
    if (!COMET_EXEC_SHUFFLE_ENABLED.get(op.conf)) {
      withInfo(
        op,
        s"Comet shuffle is not enabled: ${COMET_EXEC_SHUFFLE_ENABLED.key} is not enabled")
      false
    } else if (!isCometShuffleManagerEnabled(op.conf)) {
      withInfo(op, s"spark.shuffle.manager is not set to ${classOf[CometShuffleManager].getName}")
      false
    } else {
      true
    }
  }

  /**
   * Whether the given Spark partitioning is supported by Comet native shuffle.
   */
  private def nativeShuffleSupported(s: ShuffleExchangeExec): Boolean = {

    /**
     * Determine which data types are supported as partition columns in native shuffle.
     *
     * For HashPartitioning this defines the key that determines how data should be collocated for
     * operations like `groupByKey`, `reduceByKey`, or `join`. Native code does not support
     * hashing complex types, see hash_funcs/utils.rs
     */
    def supportedHashPartitioningDataType(dt: DataType): Boolean = dt match {
      case _: BooleanType | _: ByteType | _: ShortType | _: IntegerType | _: LongType |
          _: FloatType | _: DoubleType | _: StringType | _: BinaryType | _: TimestampType |
          _: TimestampNTZType | _: DecimalType | _: DateType =>
        true
      case _ =>
        false
    }

    /**
     * Determine which data types are supported as partition columns in native shuffle.
     *
     * For RangePartitioning this defines the key that determines how data should be collocated
     * for operations like `orderBy`, `repartitionByRange`. Native code does not support sorting
     * complex types.
     */
    def supportedRangePartitioningDataType(dt: DataType): Boolean = dt match {
      case _: BooleanType | _: ByteType | _: ShortType | _: IntegerType | _: LongType |
          _: FloatType | _: DoubleType | _: StringType | _: BinaryType | _: TimestampType |
          _: TimestampNTZType | _: DecimalType | _: DateType =>
        true
      case _ =>
        false
    }

    /**
     * Determine which data types are supported as data columns in native shuffle.
     *
     * Native shuffle relies on the Arrow IPC writer to serialize batches to disk, so it should
     * support all types that Comet supports.
     */
    def supportedSerializableDataType(dt: DataType): Boolean = dt match {
      case _: BooleanType | _: ByteType | _: ShortType | _: IntegerType | _: LongType |
          _: FloatType | _: DoubleType | _: StringType | _: BinaryType | _: TimestampType |
          _: TimestampNTZType | _: DecimalType | _: DateType =>
        true
      case StructType(fields) =>
        fields.nonEmpty && fields.forall(f => supportedSerializableDataType(f.dataType))
      case ArrayType(elementType, _) =>
        supportedSerializableDataType(elementType)
      case MapType(keyType, valueType, _) =>
        supportedSerializableDataType(keyType) && supportedSerializableDataType(valueType)
      case _ =>
        false
    }

    if (!isCometShuffleEnabledWithInfo(s)) {
      return false
    }

    if (!isCometNativeShuffleMode(s.conf)) {
      withInfo(s, "Comet native shuffle not enabled")
      return false
    }

    if (!isCometPlan(s.child)) {
      // we do not need to report a fallback reason if the child plan is not a Comet plan
      return false
    }

    val inputs = s.child.output

    for (input <- inputs) {
      if (!supportedSerializableDataType(input.dataType)) {
        withInfo(s, s"unsupported shuffle data type ${input.dataType} for input $input")
        return false
      }
    }

    val partitioning = s.outputPartitioning
    val conf = SQLConf.get
    partitioning match {
      case HashPartitioning(expressions, _) =>
        var supported = true
        if (!CometConf.COMET_EXEC_SHUFFLE_WITH_HASH_PARTITIONING_ENABLED.get(conf)) {
          withInfo(
            s,
            s"${CometConf.COMET_EXEC_SHUFFLE_WITH_HASH_PARTITIONING_ENABLED.key} is disabled")
          supported = false
        }
        for (expr <- expressions) {
          if (QueryPlanSerde.exprToProto(expr, inputs).isEmpty) {
            withInfo(s, s"unsupported hash partitioning expression: $expr")
            supported = false
            // We don't short-circuit in case there is more than one unsupported expression
            // to provide info for.
          }
        }
        for (dt <- expressions.map(_.dataType).distinct) {
          if (!supportedHashPartitioningDataType(dt)) {
            withInfo(s, s"unsupported hash partitioning data type for native shuffle: $dt")
            supported = false
          }
        }
        supported
      case SinglePartition =>
        // we already checked that the input types are supported
        true
      case RangePartitioning(orderings, _) =>
        if (!CometConf.COMET_EXEC_SHUFFLE_WITH_RANGE_PARTITIONING_ENABLED.get(conf)) {
          withInfo(
            s,
            s"${CometConf.COMET_EXEC_SHUFFLE_WITH_RANGE_PARTITIONING_ENABLED.key} is disabled")
          return false
        }
        var supported = true
        for (o <- orderings) {
          if (QueryPlanSerde.exprToProto(o, inputs).isEmpty) {
            withInfo(s, s"unsupported range partitioning sort order: $o", o)
            supported = false
            // We don't short-circuit in case there is more than one unsupported expression
            // to provide info for.
          }
        }
        for (dt <- orderings.map(_.dataType).distinct) {
          if (!supportedRangePartitioningDataType(dt)) {
            withInfo(s, s"unsupported range partitioning data type for native shuffle: $dt")
            supported = false
          }
        }
        supported
      case _ =>
        withInfo(
          s,
          s"unsupported Spark partitioning for native shuffle: ${partitioning.getClass.getName}")
        false
    }
  }

  /**
   * Check if the datatypes of shuffle input are supported. This is used for Columnar shuffle
   * which supports struct/array.
   */
  private def columnarShuffleSupported(s: ShuffleExchangeExec): Boolean = {

    /**
     * Determine which data types are supported as data columns in columnar shuffle.
     *
     * Comet columnar shuffle used native code to convert Spark unsafe rows to Arrow batches, see
     * shuffle/row.rs
     */
    def supportedSerializableDataType(dt: DataType): Boolean = dt match {
      case _: BooleanType | _: ByteType | _: ShortType | _: IntegerType | _: LongType |
          _: FloatType | _: DoubleType | _: StringType | _: BinaryType | _: TimestampType |
          _: TimestampNTZType | _: DecimalType | _: DateType =>
        true
      case StructType(fields) =>
        fields.nonEmpty && fields.forall(f => supportedSerializableDataType(f.dataType)) &&
        // Java Arrow stream reader cannot work on duplicate field name
        fields.map(f => f.name).distinct.length == fields.length &&
        fields.nonEmpty
      case ArrayType(elementType, _) =>
        supportedSerializableDataType(elementType)
      case MapType(keyType, valueType, _) =>
        supportedSerializableDataType(keyType) && supportedSerializableDataType(valueType)
      case _ =>
        false
    }

    if (!isCometShuffleEnabledWithInfo(s)) {
      return false
    }

    if (!isCometJVMShuffleMode(s.conf)) {
      withInfo(s, "Comet columnar shuffle not enabled")
      return false
    }

    if (isShuffleOperator(s.child)) {
      withInfo(s, s"Child ${s.child.getClass.getName} is a shuffle operator")
      return false
    }

    if (!(!s.child.supportsColumnar || isCometPlan(s.child))) {
      withInfo(s, s"Child ${s.child.getClass.getName} is a neither row-based or a Comet operator")
      return false
    }

    val inputs = s.child.output

    for (input <- inputs) {
      if (!supportedSerializableDataType(input.dataType)) {
        withInfo(s, s"unsupported shuffle data type ${input.dataType} for input $input")
        return false
      }
    }

    val partitioning = s.outputPartitioning
    partitioning match {
      case HashPartitioning(expressions, _) =>
        var supported = true
        for (expr <- expressions) {
          if (QueryPlanSerde.exprToProto(expr, inputs).isEmpty) {
            withInfo(s, s"unsupported hash partitioning expression: $expr")
            supported = false
            // We don't short-circuit in case there is more than one unsupported expression
            // to provide info for.
          }
        }
        supported
      case SinglePartition =>
        // we already checked that the input types are supported
        true
      case RoundRobinPartitioning(_) =>
        // we already checked that the input types are supported
        true
      case RangePartitioning(orderings, _) =>
        var supported = true
        for (o <- orderings) {
          if (QueryPlanSerde.exprToProto(o, inputs).isEmpty) {
            withInfo(s, s"unsupported range partitioning sort order: $o")
            supported = false
            // We don't short-circuit in case there is more than one unsupported expression
            // to provide info for.
          }
        }
        supported
      case _ =>
        withInfo(
          s,
          s"unsupported Spark partitioning for columnar shuffle: ${partitioning.getClass.getName}")
        false
    }
  }

  /**
   * Convert a Spark plan operator to a protobuf Comet operator.
   *
   * @param op
   *   Spark plan operator
   * @param childOp
   *   previously converted protobuf Comet operators, which will be consumed by the Spark plan
   *   operator as its children
   * @return
   *   The converted Comet native operator for the input `op`, or `None` if the `op` cannot be
   *   converted to a native operator.
   */
  private def operator2Proto(op: SparkPlan, childOp: Operator*): Option[Operator] = {
    val builder = OperatorOuterClass.Operator.newBuilder().setPlanId(op.id)
    childOp.foreach(builder.addChildren)

    op match {

      // Fully native scan for V1
      case scan: CometScanExec if scan.scanImpl == CometConf.SCAN_NATIVE_DATAFUSION =>
        CometNativeScan.convert(scan, builder, childOp: _*)

      // Fully native Iceberg scan for V2 (iceberg-rust path)
      case scan: CometBatchScanExec if scan.nativeIcebergScanMetadata.isDefined =>
        CometIcebergNativeScan.convert(scan, builder, childOp: _*)

      case op if isCometSink(op) =>
        val supportedTypes =
          op.output.forall(a => supportedDataType(a.dataType, allowComplex = true))

        if (!supportedTypes) {
          withInfo(op, "Unsupported data type")
          return None
        }

        // These operators are source of Comet native execution chain
        val scanBuilder = OperatorOuterClass.Scan.newBuilder()
        val source = op.simpleStringWithNodeId()
        if (source.isEmpty) {
          scanBuilder.setSource(op.getClass.getSimpleName)
        } else {
          scanBuilder.setSource(source)
        }

        val ffiSafe = op match {
          case _ if isExchangeSink(op) =>
            // Source of broadcast exchange batches is ArrowStreamReader
            // Source of shuffle exchange batches is NativeBatchDecoderIterator
            true
          case scan: CometScanExec if scan.scanImpl == CometConf.SCAN_NATIVE_COMET =>
            // native_comet scan reuses mutable buffers
            false
          case scan: CometScanExec if scan.scanImpl == CometConf.SCAN_NATIVE_ICEBERG_COMPAT =>
            // native_iceberg_compat scan reuses mutable buffers for constant columns
            // https://github.com/apache/datafusion-comet/issues/2152
            false
          case _ =>
            false
        }
        scanBuilder.setArrowFfiSafe(ffiSafe)

        val scanTypes = op.output.flatten { attr =>
          serializeDataType(attr.dataType)
        }

        if (scanTypes.length == op.output.length) {
          scanBuilder.addAllFields(scanTypes.asJava)

          // Sink operators don't have children
          builder.clearChildren()

          Some(builder.setScan(scanBuilder).build())
        } else {
          // There are unsupported scan type
          withInfo(
            op,
            s"unsupported Comet operator: ${op.nodeName}, due to unsupported data types above")
          None
        }

      case _ =>
        // Emit warning if:
        //  1. it is not Spark shuffle operator, which is handled separately
        //  2. it is not a Comet operator
        if (!op.nodeName.contains("Comet") &&
          !op.isInstanceOf[ShuffleExchangeExec]) {
          withInfo(op, s"unsupported Spark operator: ${op.nodeName}")
        }
        None
    }
  }

  private def isOperatorEnabled(handler: CometOperatorSerde[_], op: SparkPlan): Boolean = {
    val enabled = handler.enabledConfig.forall(_.get(op.conf))
    val opName = op.getClass.getSimpleName
    if (enabled) {
      val opSerde = handler.asInstanceOf[CometOperatorSerde[SparkPlan]]
      opSerde.getSupportLevel(op) match {
        case Unsupported(notes) =>
          withInfo(op, notes.getOrElse(""))
          false
        case Incompatible(notes) =>
          val allowIncompat = CometConf.isOperatorAllowIncompat(opName)
          val incompatConf = CometConf.getOperatorAllowIncompatConfigKey(opName)
          if (allowIncompat) {
            if (notes.isDefined) {
              logWarning(
                s"Comet supports $opName when $incompatConf=true " +
                  s"but has notes: ${notes.get}")
            }
            true
          } else {
            val optionalNotes = notes.map(str => s" ($str)").getOrElse("")
            withInfo(
              op,
              s"$opName is not fully compatible with Spark$optionalNotes. " +
                s"To enable it anyway, set $incompatConf=true. " +
                s"${CometConf.COMPAT_GUIDE}.")
            false
          }
        case Compatible(notes) =>
          if (notes.isDefined) {
            logWarning(s"Comet supports $opName but has notes: ${notes.get}")
          }
          true
      }
    } else {
      withInfo(
        op,
        s"Native support for operator $opName is disabled. " +
          s"Set ${handler.enabledConfig.get.key}=true to enable it.")
      false
    }
  }

  /**
   * Whether the input Spark operator `op` can be considered as a Comet sink, i.e., the start of
   * native execution. If it is true, we'll wrap `op` with `CometScanWrapper` or
   * `CometSinkPlaceHolder` later in `CometSparkSessionExtensions` after `operator2proto` is
   * called.
   */
  private def isCometSink(op: SparkPlan): Boolean = {
    if (isExchangeSink(op)) {
      return true
    }
    op match {
      case s if isCometScan(s) => true
      case _: CometSparkToColumnarExec => true
      case _: CometSinkPlaceHolder => true
      case _ => false
    }
  }

  private def isExchangeSink(op: SparkPlan): Boolean = {
    op match {
      case _: ShuffleExchangeExec => true
      case ShuffleQueryStageExec(_, _: CometShuffleExchangeExec, _) => true
      case ShuffleQueryStageExec(_, ReusedExchangeExec(_, _: CometShuffleExchangeExec), _) => true
      case BroadcastQueryStageExec(_, _: CometBroadcastExchangeExec, _) => true
      case BroadcastQueryStageExec(_, ReusedExchangeExec(_, _: CometBroadcastExchangeExec), _) =>
        true
      case _: BroadcastExchangeExec => true
      case _ => false
    }
  }

}
