#!/bin/bash -e
# Starts / stops Dataiku Fleet Manager processes

# Usages:
#
# Control all child processes at once through supervisord
# $0 (start | stop | restart | status)
#
# Control individual child processes through supervisord
# $0 (start | stop | restart | status) PROCESS
#
# Low-level command used by supervisord to run individual child processes (in foreground)
# $0 run PROCESS
#
# where PROCESS is one of (fmserver | nginx | collectd)

# TODO: this does not work if this script is launched through a symlink or $PATH
BINDIR=$(cd "$(dirname "$0")" && pwd -P)
me=$(basename "$0")

if [ $EUID -eq 0 ]; then
  echo >&2 "*** $me: running Fleet Manager under superuser account is not supported."
  exit 1
fi


########################################
#
# get args from command line
#
########################################

Usage() {
  echo "Usage: $me (start | stop | status | restart | reopenlogs)" >&2
  # $me (start | stop | status | restart) PROCESS
  # $me run PROCESS
  # $me run
  # PROCESS : (backend | ipython | nginx | collectd)
  exit 1
}

case "$1" in
  start | stop | status | restart | run | reopenlogs | reload)
    action="$1"
    ;;
  * )
    Usage
    ;;
esac

# then do the stuff that can prompt for keystrokes, only if the action 
# comes from the user (the supervisor.conf file calls the 'run' action)

export DIP_HOME=$(dirname "$BINDIR")
DIP_HOME=$(cd "$DIP_HOME" && pwd -P)

if [[ "$action" == "start" || "$action" == "restart" ]]; then
    if [ -f "$DIP_HOME"/install-support/expected-dip-home.txt ]; then
        EXPECTED_DIP_HOME=$(cat "$DIP_HOME"/install-support/expected-dip-home.txt)
        if [[ "$EXPECTED_DIP_HOME" != "$DIP_HOME" ]]; then
            echo "[!] $DIP_HOME appears to a copy from $EXPECTED_DIP_HOME" >&2
            echo "[!] Was the install properly migrated? (with installer.sh -u ...)" >&2
            echo "[!] Aborting."
            exit 1
        fi
    else
        echo "[!] $DIP_HOME does not appear to be a validated Fleet Manager installation (missing installation flag file)" >&2
    fi
fi

# Load basic environment
source "$BINDIR"/env-default.sh
if [ -z "$DKUINSTALLDIR" ]; then
  echo >&2 "FATAL: DKUINSTALLDIR is not defined. Please check $BINDIR/env-default.sh"
  exit 1
fi
source "$DKUINSTALLDIR/scripts/_startup.inc.sh"

fm_set_global_env
fm_set_java_env
fm_set_py_env
fm_load_env_files
fm_env_sanity_check

# Final environment setup
if ! DKUDISTRIB=$("$DKUINSTALLDIR"/scripts/_find-distrib.sh 2>/dev/null); then
  DKUDISTRIB="unknown unknown"
fi
export DKUDISTRIB

if [ -z "$DKUPATHSET" ]; then
  if [ -d "$DKUINSTALLDIR/tools/bin" ]; then
    export PATH="$DKUINSTALLDIR/tools/bin:$PATH"
  fi
  export DKUPATHSET=1
fi

# Runtime directory
export DKURUNDIR="$DIP_HOME"/run
mkdir -p "$DKURUNDIR"

########################################
# Child processes definition: FM Server
########################################

run_fmmain() {
  # Make sure tmp/ folder exists
  if [ ! -d "$DIP_HOME/tmp" ]; then
    mkdir "$DIP_HOME/tmp"
  fi

  exec "$DKUJAVABIN" \
    -Ddku.fmmain \
    $DKU_JAVA_OPTS \
    $DKU_FMMAIN_JAVA_OPTS \
    -XX:+CrashOnOutOfMemoryError \
    -XX:ErrorFile=$DIP_HOME/run/hs_err_pid%p.log \
    -Djava.library.path="$DKU_FMMAIN_JAVA_LIBRARY_PATH" \
    -cp "$(fm_get_cp)" \
    com.dataiku.fm.server.FMServerMain
}

run_nginx() {
  # Nginx binary to use. WARNING: diagnosis must use the same method to locate nginx
  NGINX_BINARY=$("$DKUPYTHONBIN" "$DKUINSTALLDIR"/scripts/dkuinstall/install_config.py -get server nginx_binary)
  if [ -z "$NGINX_BINARY" -o "$NGINX_BINARY" = "None" ]; then
    NGINX_BINARY="nginx"
    for d in "$DKUINSTALLDIR"/tools/sbin /usr/sbin /opt/local/sbin /usr/local/sbin /usr/local/bin; do
      if [ -x "$d/nginx" ]; then
        NGINX_BINARY="$d/nginx"
        break
      fi
    done
  fi
  if ! command -v "$NGINX_BINARY" >/dev/null; then
    echo >&2 "nginx binary \"$NGINX_BINARY\" not found"
    exit 1
  fi

  exec "$NGINX_BINARY" -c "$DIP_HOME"/install-support/nginx.conf
}

run_collectd() {
  COLLECTD_BINARY="$DKUINSTALLDIR/tools/collectd/bin/collectd"
  if [ -z "$COLLECTD_BINARY" ]; then
    echo "collectd binary is missing" >&2
    exit 1
  fi
  exec "$COLLECTD_BINARY" -C "$DIP_HOME"/install-support/collectd.conf -f
}

########################################
# Supervisord process manager
########################################

start_supervisord() {
  "$DKUPYTHONBIN" -m supervisor.supervisord \
    -c "$DIP_HOME/install-support/supervisord.conf"
}

run_supervisord() {
  exec "$DKUPYTHONBIN" -m supervisor.supervisord \
    -c "$DIP_HOME/install-support/supervisord.conf" -n
}

# supervisorctl COMMAND [ARG ...]
supervisorctl() {
  # Need to check supervisorctl answer as exit status is always 0
  # https://github.com/Supervisor/supervisor/issues/24
  test -S "$DIP_HOME/run/svd.sock" &&
  "$DKUPYTHONBIN" -m supervisor.supervisorctl \
    -c "$DIP_HOME/install-support/supervisord.conf" \
    pid | grep -qE '^[0-9]+$' || {
    echo "$me: FM supervisor is not running" >&2
    return 1
  }
  "$DKUPYTHONBIN" -m supervisor.supervisorctl \
    -c "$DIP_HOME/install-support/supervisord.conf" \
    "$@"
}


########################################
# Wait for process transitions
########################################

wait_supervisor_start() {
  echo "Waiting for Fleet Manager supervisor to start ..."
  for i in $(seq 1 10); do
      sleep 2
      if supervisorctl status; then
        echo "FM started, pid=$(cat "$DKURUNDIR"/supervisord.pid)"
        return 0
      fi
  done
  echo "*** $me : Fleet Manager supervisor failed to start.
You can look for startup error messages in log file:
    $DKURUNDIR/supervisord.log
" >&2
  return 1
}

wait_supervisor_stop() {
  echo "Waiting for Fleet Manager to stop ..."
  for i in $(seq 1 10); do
    sleep 2
    if ! pid=$(cat "$DKURUNDIR"/supervisord.pid 2>/dev/null) || [ -z "$pid" ] || ! ps -p "$pid" -f; then
      echo "FM stopped"
      return 0
    fi
  done
  echo "*** $me : Fleet Manager failed to stop" >&2
  return 1
}

wait_fmmain_start() {
  echo -n "Waiting for Fleet Manager server to start "
  for i in $(seq 1 60); do
    sleep 2

    if curl -fs http://localhost:"$DKU_FMMAIN_PORT"/api/ping -o /dev/null; then
      echo
      return 0
    else
      echo -n .
    fi
  done
  echo ""
  echo "*** $me : Warning: Fleet Manager server still not started after two minutes.
It might still be initializing, and taking longer than expected, because the server is heavily loaded, or it might be a permanent failure.
You can re-check its status using the following command:
    $BINDIR/$me status
You can look for startup error messages in log file:
    $DKURUNDIR/fmmain.log
" >&2
  return 1
}



#######################################################
# Check the runtime environment
#######################################################

check_server_ports() {
  check_ports=$("$DKUPYTHONBIN" "$DKUINSTALLDIR"/scripts/dkuinstall/install_config.py -get server check_ports)
  if [ "$check_ports" = "false" ]; then
    return
  fi
  ports="$DKU_NGINX_PORT $DKU_FMMAIN_PORT"
  "$DKUPYTHONBIN" "$DKUINSTALLDIR"/scripts/dkuinstall/check_server_port.py $ports
}

check_limits() {
  if [[ "$DKUDISTRIB" == osx* ]]; then
    return
  fi
  nofile=$(ulimit -n)
  if [ "$nofile" != "unlimited" ]; then
    if [ "$nofile" -lt 4096 ]; then
      echo >&2 "*** WARNING: maximum number of open files (ulimit -n) is $nofile, lower than recommended minimum 4096"
    fi
  fi
  nproc=$(ulimit -u)
  if [ "$nproc" != "unlimited" ]; then
    if [ "$nproc" -lt 4096 ]; then
      echo >&2 "*** WARNING: maximum number of user processes (ulimit -u) is $nproc, lower than recommended minimum 4096"
    fi
  fi
}

#######################################################
# Global actions
#######################################################

start_all() {
  check_server_ports
  check_limits
  start_supervisord
  wait_supervisor_start
  wait_fmmain_start
}

stop_all() {
  supervisorctl shutdown
  wait_supervisor_stop
}

reload_all() {
  supervisorctl reload || {
    start_supervisord
    wait_supervisor_start
  }
  wait_fmmain_start
}

########################################
#
# Main entry point
#
########################################

if [ $# -eq 1 -a "$action" != "run" ]; then
  action="${action}_all"
elif [ $# -eq 1 ]; then
  process="supervisord"
elif [ $# -eq 2 ]; then
  case "$2" in
    fmmain | nginx | collectd)
      process="$2"
      ;;
    * )
      Usage
      ;;
  esac
else
  Usage
fi

case "$action" in
  start_all)
    start_all
    ;;
  stop_all)
    stop_all
    ;;
  status_all)
    supervisorctl status
    ;;
  restart_all)
    stop_all || echo "Stop failed (ignored)"
    start_all
    ;;
  reload_all)
    reload_all
    ;;
  reopenlogs_all)
    pid="$(supervisorctl pid)" &&
    echo "Reopening supervisor logs" &&
    kill -USR2 "$pid" &&
    supervisorctl signal USR1 nginx
    ;;
  start | stop | restart | status)
    supervisorctl "$action" "$process"
    ;;
  run)
    "run_$process"
    ;;
esac
