#!/bin/bash
#
# Copyright (c) 2021-2026, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

shopt -s extglob

SKIP_CLEAN=1
BUILD_ALL_DEBUG=0
SCALA213=0

function join_by { local IFS="$1"; shift; echo "$*"; }


function print_usage() {
  echo "Usage: buildall [OPTION]"
  echo "Options:"
  echo "   -h, --help"
  echo "        print this help message"
  echo "   --debug"
  echo "        enable bash -x tracing right after this option is parsed"
  echo "   --clean"
  echo "        include Maven clean phase"
  echo "   -gb, --generate-bloop"
  echo "        generate projects for Bloop clients: IDE (Scala Metals, IntelliJ) or Bloop CLI"
  echo "   -p=DIST_PROFILE, --profile=DIST_PROFILE"
  echo "        use this profile for the dist module, default: noSnapshots, also supported: snapshots,"
  echo "        snapshotsWithDatabricks, noSnapshotsWithDatabricks"
  echo "        NOTE: the Databricks-related spark3XYdb shims are not built locally, the jars are fetched prebuilt from a"
  echo " .      remote Maven repo. You can also supply a comma-separated list of build versions. E.g., --profile=330,331 will"
  echo "        build only the distribution jar only for 3.3.0 and 3.3.1"
  echo "   -m=MODULE, --module=MODULE"
  echo "        after finishing parallel builds, resume from dist and build up to and including module MODULE."
  echo "        E.g., --module=integration_tests"
  echo "   -P=N, --parallel=N"
  echo "        Build in parallel, N (4 by default) is passed via -P to xargs"
  echo "   --install"
  echo "        Intall the resulting jar instead of just building it"
  echo "   -o=MVN_OPT, --option=MVN_OPT"
  echo "        use this option to build project with maven. E.g., --option='-Dcudf.version=cuda12'"
  echo "   --rebuild-dist-only"
  echo "        repackage the dist module artifact using installed dependencies"
  echo "   --scala213"
  echo "        build 2.13 shims"
}

function bloopInstall() {

  [[ "$BUILD_ALL_DEBUG" == "1" ]] && set -x

  local bloopTmpDir=$(mktemp -d /tmp/tmp.bloop.XXXXXX)

  time (
    bloopDirsGenerated=()
    for bv in "${SPARK_SHIM_VERSIONS[@]}"; do
      bloopTmpConfigDir="$bloopTmpDir/.bloop$bv"
      mkdir -p $bloopTmpConfigDir
      $MVN -B clean install \
        -DbloopInstall \
        -Dbloop.configDirectory="$bloopTmpConfigDir" \
        -DdownloadSources=true \
        -Dbuildver="$bv"

      specifier="spark$bv"
      bloopDir=$PWD/.bloop-$specifier
      rm -rf $bloopDir
      mv $bloopTmpConfigDir $bloopDir
      echo "generated bloop files under $bloopDir"
      bloopDirsGenerated+=("${bloopDir}")
    done
    echo "#### Created bloop projects ${bloopDirsGenerated[@]}"
    echo "Execute"
    echo "  ln -s .bloop-spark3XY .bloop"
    echo "to make it an active Bloop project in VS Code Scala Metals"
  )
}

function versionsFromDistProfile() {
  [[ "$BUILD_ALL_DEBUG" == "1" ]] && set -x
  versionRawStr=$($MVN -B help:evaluate -q -pl dist -P"$1" -Dexpression=included_buildvers -DforceStdout)
  versionStr=${versionRawStr//[$'\n',]/}
  echo -n $versionStr
}

function versionsFromReleaseProfiles() {
  [[ "$BUILD_ALL_DEBUG" == "1" ]] && set -x
  versionRawStr=$(python build/get_buildvers.py $1 $2)
  versionStr=${versionRawStr//[$'\n',]/}
  echo -n $versionStr
}

FINAL_OP="package"

while [[ "$1" != "" ]] ; do

case "$1" in

--help|-h)
  print_usage
  exit 0
  ;;

--generate-bloop|-gb)
  GEN_BLOOP=true
  ;;

-p=*|--profile=*)
  DIST_PROFILE="${1#*=}"
  ;;

-m=*|--module=*)
  MODULE="${1#*=}"
  ;;

-P=*|--parallel=*)
  BUILD_PARALLEL="${1#*=}"
  ;;

--debug)
  # access in called functions
  export BUILD_ALL_DEBUG=1
  set -x
  ;;

--clean)
  SKIP_CLEAN="0"
  ;;

--install)
  FINAL_OP="install"
  ;;

--scala213)
  SCALA213=1
  ;;

--rebuild-dist-only)
  SKIP_DIST_DEPS="1"
  MODULE="dist"
  ;;

-o=*|--option=*)
  export MVN_OPT="${1#*=}"
  ;;

*)
  echo >&2 "Unknown arg: $1"
  print_usage
  exit 1
  ;;

esac

# advance $1 to the next in the arg list
shift

done

if [[ "$DIST_PROFILE" == *Scala213 ]]; then
  SCALA213=1
fi

MVN=${MVN:-"mvn"}
# include options to mvn command
export MVN="$MVN -Dmaven.wagon.http.retryHandler.count=3 ${MVN_OPT}"

if [[ "$SCALA213" == "1" ]]; then
  POM_FILE="scala2.13/pom.xml"
  export MVN="$MVN -f scala2.13/"
  $(dirname $0)/make-scala-version-build-files.sh 2.13
else 
  POM_FILE="pom.xml"
fi

DIST_PROFILE=${DIST_PROFILE:-"noSnapshots"}


[[ "$MODULE" != "" ]] && MODULE_OPT="--projects $MODULE --also-make" || MODULE_OPT=""

echo "Collecting Spark versions..."
case $DIST_PROFILE in

  snapshots?(WithDatabricks))
    SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "snap_and_no_snap" $POM_FILE))
    ;;

  noSnapshots?(WithDatabricks))
    SPARK_SHIM_VERSIONS=($(versionsFromReleaseProfiles "no_snapshots" $POM_FILE))
    ;;



  [34]*)
    <<< $DIST_PROFILE IFS="," read -ra SPARK_SHIM_VERSIONS
    unset DIST_PROFILE
    ;;

  *)
    echo "ERROR unexpected value for profile: $DIST_PROFILE, see 'buildall --help'"
    exit 1

esac

echo "Spark versions involved: ${SPARK_SHIM_VERSIONS[@]} ..."
export MVN_BASE_DIR=$($MVN help:evaluate -Dexpression=project.basedir -q -DforceStdout)

if [[ "$GEN_BLOOP" == "true" ]]; then
  bloopInstall
  exit 0
fi

[[ "$DIST_PROFILE" != "" ]] && MVN_PROFILE_OPT="-P$DIST_PROFILE" || MVN_PROFILE_OPT=""

# First element in SPARK_SHIM_VERSIONS to do most of the checks
export BASE_VER=${SPARK_SHIM_VERSIONS[0]}
export NUM_SHIMS=${#SPARK_SHIM_VERSIONS[@]}
export BUILD_PARALLEL=${BUILD_PARALLEL:-4}

if [[ "$SKIP_CLEAN" != "1" ]]; then
  echo Clean once across all modules
  $MVN -q clean
fi

echo "Building a combined dist jar with Shims for ${SPARK_SHIM_VERSIONS[@]} ..."

function build_single_shim() {
  [[ "$BUILD_ALL_DEBUG" == "1" ]] && set -x
  BUILD_VER=$1
  mkdir -p "$MVN_BASE_DIR/target"
  (( BUILD_PARALLEL == 1 || NUM_SHIMS == 1 )) && LOG_FILE="/dev/tty" || \
    LOG_FILE="$MVN_BASE_DIR/target/mvn-build-$BUILD_VER.log"

  if [[ "$BUILD_VER" == "$BASE_VER" ]]; then
    SKIP_CHECKS="false"
    # WORKAROUND:
    # maven build on L193 currently relies on aggregator dependency which
    # will removed by
    # https://github.com/NVIDIA/spark-rapids/issues/3932
    #
    # if it were a single maven invocation Maven would register and give
    # precedence package-phase artifacts
    #
    MVN_PHASE="install"
  else
    SKIP_CHECKS="true"
    MVN_PHASE="package"
  fi

  echo "#### REDIRECTING mvn output to $LOG_FILE ####"
  $MVN -U "$MVN_PHASE" \
      -DskipTests \
      -Dbuildver="$BUILD_VER" \
      -Drat.skip="$SKIP_CHECKS" \
      -Dmaven.scaladoc.skip \
      -Dmaven.scalastyle.skip="$SKIP_CHECKS" \
      -pl tools -am > "$LOG_FILE" 2>&1 || {
        [[ "$LOG_FILE" != "/dev/tty" ]] && echo "$LOG_FILE:" && tail -500 "$LOG_FILE" || true
        exit 255
      }
}
export -f build_single_shim

# Install all the versions for DIST_PROFILE

# First build the aggregator module for all SPARK_SHIM_VERSIONS in parallel skipping expensive plugins that
# - either deferred to 330 because the check is identical in all shim profiles such as scalastyle
# - or deferred to 330 because we currently don't require it per shim such as scaladoc generation
# - or there is a dedicated step to run against a particular shim jar such as unit tests, in
#   the near future we will run unit tests against a combined multi-shim jar to catch classloading
#   regressions even before pytest-based integration_tests
#
# Then resume maven build from the dist module now that shims have been installed
time (
  # printf a single buildver array element per line
  if [[ "$SKIP_DIST_DEPS" != "1" ]]; then
    # Execute initialize to download a massive jar for spark-rapids-jni in a single thread to
    # avoid repeating this work in parallel
    # Initialize sql-plugin-api only to avoid dealing with missing submodule dependencies
    #
    $MVN initialize -pl sql-plugin-api -am

    printf "%s\n" "${SPARK_SHIM_VERSIONS[@]}" | \
      xargs -t -I% -P "$BUILD_PARALLEL" -n 1 \
      bash -c 'build_single_shim "$@"' _ %
  fi
  # This used to resume from dist. However, without including aggregator in the build
  # the build does not properly initialize spark.version property via buildver profiles
  # in the root pom, and we get a missing spark330 dependency even for --profile=330,331
  # where the build does not require it. Moving it to aggregator resolves this issue with
  # a negligible increase of the build time by ~2 seconds.
  joinShimBuildFrom="aggregator"
  INCLUDED_BUILDVERS_OPT=-Dincluded_buildvers=$(join_by , "${SPARK_SHIM_VERSIONS[@]}")
  echo "Resuming from $joinShimBuildFrom build only using $BASE_VER"
  $MVN $FINAL_OP -rf $joinShimBuildFrom $MODULE_OPT $MVN_PROFILE_OPT $INCLUDED_BUILDVERS_OPT \
    -Dbuildver="$BASE_VER" \
    -DskipTests -Dmaven.scaladoc.skip
)
