#!/bin/bash -e
# Configures or reconfigures DSS Spark integration

# Don't call this directly. Use ./bin/dssadmin update-spark-hadoop-jars

Usage() {
	echo >&2 "Usage: $0 -sparkHome SPARK_HOME [-standalone FLAVOR]"
	exit 1
}

if [ -z "$DKUINSTALLDIR" -o ! -d "$DKUINSTALLDIR" ]; then
	echo >&2 "*** Error: DKUINSTALLDIR not found"
	exit 1
fi

sparkHome=
flavor=generic
ADJUST_HIVE=0
while [ $# -gt 0 ]; do
    if [ "$1" = "-sparkHome" -a $# -gt 1 ]; then
        sparkHome="$2"
        shift 2
    elif [ "$1" = "-standalone" -a $# -gt 1 ]; then
        flavor="$2"
        shift 2
    elif [ "$1" = "-d" ]; then
        DEV_MODE="1"
        shift 1
    elif [ "$1" = "-adjust-hive" ]; then
        ADJUST_HIVE="1"
        shift 1
	else
		Usage
	fi
done


HADOOP_LIBS=
if [ "x$DEV_MODE" = "x1" ]
then
    echo "[+] Building $flavor hadoop libs package"
    cd $DKUINSTALLDIR/packagers/resources-build/hadoop-standalone-libs/
    make clean dist _$flavor
    HADOOP_LIBS=$DKUINSTALLDIR/packagers/resources-build/hadoop-standalone-libs/dist/$flavor
    cd $MYDIR
elif [ "x$DEV_MODE" = "xprebuilt" ]
then
    echo "[+] Using prebuilt $flavor hadoop libs package"
    HADOOP_LIBS=$DKUINSTALLDIR/packagers/resources-build/hadoop-standalone-libs/dist/$flavor
    cd $MYDIR
else
    if [ ! -d "$DKUINSTALLDIR/hadoop-standalone-libs/" ]; then
        echo >&2 "- Missing standalone Hadoop libs"
        exit 1
    fi
    HADOOP_LIBS=$DKUINSTALLDIR/hadoop-standalone-libs/
fi

if [ -z "$sparkHome" ]; then
    sparkHome=$DKU_SPARK_HOME
fi

if [ -z "$sparkHome" ]; then
    echo >&2 "- No Spark home argument"
    exit 1
fi

echo "[+] Using Spark located at $sparkHome"

echo >&2 "+ Swap hadoop jars in Spark"
# see https://github.com/dataiku/dss-doc/blob/release/5.1/app-notes/src/spark-on-k8s-azure/index.md
rm -f $sparkHome/jars/hadoop-*
rm -f $sparkHome/jars/httpc*
rm -f $sparkHome/jars/snappy-java-*.jar

cp $HADOOP_LIBS/hadoop-*3.4* $sparkHome/jars/
cp $HADOOP_LIBS/woodstox* $sparkHome/jars/
cp $HADOOP_LIBS/stax2* $sparkHome/jars/
cp $HADOOP_LIBS/re2j-* $sparkHome/jars/
# hadoop-shaded-guava-1.1.1.jar is a subset of hadoop-client-runtime-3.3.6-dku-20230904-100727.jar, so no need to include it
cp $HADOOP_LIBS/wildfly-openssl-* $sparkHome/jars/
cp $HADOOP_LIBS/commons-configuration2-* $sparkHome/jars/

rm -f $sparkHome/jars/jetty-util-*
cp $HADOOP_LIBS/jetty-util-* $sparkHome/jars/

# Remove too-old guava from Spark, replace with DSS version (+deps)
rm -f $sparkHome/jars/guava-*.jar $sparkHome/jars/jsr305-*.jar
for f in guava \
        checker-qual error_prone_annotations \
        failureaccess j2objc-annotations jsr305 listenablefuture; do \
    cp $DKUINSTALLDIR/lib/ivy/common-run/$f-*.jar $sparkHome/jars/
done

#cp $HADOOP_LIBS/htrace-core4-*.jar $sparkHome/jars/
cp $HADOOP_LIBS/azure* $sparkHome/jars/
cp $HADOOP_LIBS/*aws* $sparkHome/jars/
cp $HADOOP_LIBS/bundle-* $sparkHome/jars/
cp $HADOOP_LIBS/gcs-connector-* $sparkHome/jars/
cp $DKUINSTALLDIR/lib/ivy/backend-run/snappy-java-*.jar $sparkHome/jars/
cp $DKUINSTALLDIR/lib/ivy/common-run/httpc* $sparkHome/jars/

# Download a jar from Maven, keeping a local copy in a per-user cache
mkdir -p "$HOME/.cache/dss-build"
get_maven_file() {
    wget -N -P "$HOME/.cache/dss-build" "https://repo1.maven.org/maven2/$1" &&
    cp -p "$HOME/.cache/dss-build/$(basename "$1")" ./
}

# Remove mesos because of vulnerabilities, we don't use it anyway
rm -f $sparkHome/jars/mesos-1.4.3-shaded-protobuf.jar


echo >&2 "+ Done"
