#!/bin/sh

set -e
#set -x

usage () {
	echo "Usage: $0 CLUSTER_NAME"
	exit 1
}

if [ $# != 1 ] ; then
	usage
fi

CLUSTER_NAME=${1}

DOMAIN_NAME=$(ocicli -csv cluster-show ${CLUSTER_NAME} | grep Domain: | cut -d, -f2)

FIRST_CTRL=$(ocicli -csv machine-list -a | q -H -d, "SELECT Cur_ip,hostname FROM - WHERE role='controller' AND cluster='${CLUSTER_NAME}' ORDER BY hostname LIMIT 1")
SEC_CTRL=$(ocicli -csv machine-list -a | q -H -d, "SELECT Cur_ip,hostname FROM - WHERE role='controller' AND cluster='${CLUSTER_NAME}' ORDER BY hostname LIMIT 1,1")
LAST_CTRL=$(ocicli -csv machine-list -a | q -H -d, "SELECT Cur_ip,hostname FROM - WHERE role='controller' AND cluster='${CLUSTER_NAME}' ORDER BY hostname DESC LIMIT 1")
ALL_CTRL=$(ocicli -csv machine-list -a | q -H -d, "SELECT Cur_ip,hostname FROM - WHERE role='controller' AND cluster='${CLUSTER_NAME}' ORDER BY hostname" | tr '\n' ' ')
FIRST_CTRL_IP=$(echo ${FIRST_CTRL} | cut -d, -f1)
FIRST_CTRL_HOST=$(echo ${FIRST_CTRL} | cut -d, -f2)
SEC_CTRL_IP=$(echo ${SEC_CTRL} | cut -d, -f1)
SEC_CTRL_HOST=$(echo ${SEC_CTRL} | cut -d, -f2)
LAST_CTRL_IP=$(echo ${LAST_CTRL} | cut -d, -f1)
LAST_CTRL_HOST=$(echo ${LAST_CTRL} | cut -d, -f2)

if [ -n ""$(ocicli -csv machine-list -a | q -H -d, "SELECT Cur_ip,hostname FROM - WHERE role='compute' AND cluster='s1' ORDER BY hostname LIMIT 1") ] ; then
	HAS_COMPUTE_NODE=yes
else
	HAS_COMPUTE_NODE=no
fi

if [ $(ocicli cluster-show ${CLUSTER_NAME} | grep "Self signed API cert:" | awk '{print $7}')"" = "yes" ] ; then
	SELF_SIGNED_API_CERT=yes
else
	SELF_SIGNED_API_CERT=no
fi


RED="\033[1;31m"
NO_COL="\033[0m"
GREEN="\033[1;32m"
green_echo () {
	echo ${GREEN}${1}${NO_COL}
}
red_echo () {
	echo ${RED}${1}${NO_COL}
}

sshi () {
	ssh -i /etc/openstack-cluster-installer/id_rsa $@
}

scpi () {
	scp -i /etc/openstack-cluster-installer/id_rsa $@
}

wait_for_ssh () {
	local COUNT CYCLES OTCI_CAN_SSH SSH_HOST
	SYSUSERNAME=root
	# This is 15 minutes
	COUNT=900
	CYCLES=0
	OTCI_CAN_SSH=no
	SSH_HOST=${1}
	echo -n "${GREEN}---> Waiting for ssh to be up for ${SSH_HOST}:${NO_COL}"
	ssh-keygen -f ~/.ssh/known_hosts -R ${SSH_HOST} 1>/dev/null 2>/dev/null || true
	while [ "${OTCI_CAN_SSH}" != "yes" ] && [ ${COUNT} != 0 ] ; do
		if ssh -i /etc/openstack-cluster-installer/id_rsa -o "ConnectTimeout 2" ${SYSUSERNAME}@${SSH_HOST} 'echo -n ""' 2>/dev/null ; then
			OTCI_CAN_SSH=yes
			echo "${GREEN}ok.${NO_COL}"
		else
			COUNT=$(( ${COUNT} - 1 ))
			CYCLES=$(( ${CYCLES} + 1 ))
			sleep 2
			echo -n "."
		fi
	done
	if [ ${OTCI_CAN_SSH} = "yes" ] ; then
		sleep 1
	else
		echo "${RED}timeout.${NO_COL}"
		exit 1
	fi
}

OS_SERVICES="apache2 haproxy aodh-api aodh-evaluator aodh-listener aodh-notifier barbican-api glance-api heat-api heat-api-cfn heat-engine magnum-api magnum-conductor memcached neutron-api neutron-rpc-server nova-api nova-conductor nova-consoleauth nova-novncproxy nova-placement-api nova-scheduler octavia-api octavia-health-manager octavia-worker"

oci_upgrade_shutdown_all_services () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f1)

	sshi ${HOST_IP} "for i in ${OS_SERVICES} ; do echo -n \$i' ' ; if [ -x /etc/init.d/\$i ] ; then systemctl stop \$i ; fi ; done"
	echo ""
	green_echo "-> Adding a /usr/sbin/policy-rc.d on ${HOST_NAME} to prevent unexpected service start"
	sshi ${HOST_IP} "echo '#!/bin/bash' >/usr/sbin/policy-rc.d"
	sshi ${HOST_IP} "echo 'exit 101' >>/usr/sbin/policy-rc.d ; chmod +x /usr/sbin/policy-rc.d"
}

oci_upgrade_restart_all_services () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f1)

	sshi ${HOST_IP} "rm -f /usr/sbin/policy-rc.d"
	sshi ${HOST_IP} "for i in ${OS_SERVICES} ; do systemctl restart \$i ; done"
}

oci_upgrade_prepare () {
	local i HOST IP

	green_echo "===> Disable puppet on all nodes"
	for i in ${ALL_CTRL} ; do
		IP=$(echo $i | cut -d, -f1)
		HOST=$(echo $i | cut -d, -f2)
		echo "---> Disabling puppet on ${HOST}"
		sshi ${IP} "puppet agent --disable"
	done

	green_echo "===> Save the cluster RabbitMQ credentials"
	oci-save-rabbitmq-access ${FIRST_CTRL_HOST}

	green_echo "===> Move the API VIP to node 3"
	sshi ${FIRST_CTRL_IP} "crm_resource --move --resource openstack-api-vip --node ${LAST_CTRL_HOST} || true"

	# This is needed if running with "only" 3 controllers:
	# otherwise, the 3rd controller will shutdown the VIP
	# when corosync goes down on node 2.
	green_echo "===> Configure corosync on last node to work standalone for the VIP"
	sshi ${LAST_CTRL_IP} 'sed -i "s/  provider: corosync_votequorum/  provider: corosync_votequorum\n  expected_votes: 1/" /etc/corosync/corosync.conf'
	sshi ${LAST_CTRL_IP} "systemctl restart corosync"

	green_echo "===> Shutdown the VIP on first node before upgrade"
	oci_upgrade_shutdown_vip ${FIRST_CTRL}
}

oci_upgrade_shutdown_rabbit () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f1)

	green_echo "===> Disable the RabbitMQ service on ${HOST_NAME}"
	sshi ${HOST_IP} "rabbitmqctl stop_app" || true
	sshi ${HOST_IP} "systemctl stop rabbitmq-server.service"
	sshi ${HOST_IP} "systemctl disable rabbitmq-server.service"
	sshi ${HOST_IP} "systemctl mask rabbitmq-server.service"
}


oci_upgrade_node () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f1)

	if [ "${HAS_COMPUTE_NODE}" = "yes" ] ; then
		green_echo "===> Backing-up nova.conf on ${HOST_NAME}"
		sshi ${HOST_IP} "if ! [ -e /root/nova.conf ] && [ -e /etc/nova/nova.conf ] ; then cp /etc/nova/nova.conf /root ; fi"

		green_echo "===> Remove all ceph packages from upstream on ${HOST_NAME}"
		PKG_LIST=$(sshi ${HOST_IP} "dpkg -l | grep bpo90 | awk '{print "\$"2}' | tr '\n' ' '")
		sshi ${HOST_IP} "DEBIAN_FRONTEND=noninteractive apt-get purge -y ${PKG_LIST}"
		sshi ${HOST_IP} "rm -f /etc/apt/sources.list.d/ceph.list"
	fi

	green_echo "===> Fix-up repos of ${HOST_NAME}"
	sshi ${HOST_IP} "sed -i s/stretch/buster/g /etc/apt/sources.list"
	sshi ${HOST_IP} "mv /etc/apt/sources.list.d/stretch-rocky.list /etc/apt/sources.list.d/buster-rocky.list" || true
	sshi ${HOST_IP} "sed -i s/stretch/buster/g /etc/apt/sources.list.d/buster-rocky.list"
	sshi ${HOST_IP} "apt-get update"

	green_echo "===> Disable MySQL service on ${HOST_NAME}"
	sshi ${HOST_IP} "systemctl stop mysql.service"
	sshi ${HOST_IP} "systemctl disable mysql.service"
	sshi ${HOST_IP} "systemctl mask mysql.service"

	green_echo "===> Disable HA proxy on ${HOST_NAME}"
	sshi ${HOST_IP} "systemctl stop haproxy.service"
	sshi ${HOST_IP} "systemctl disable haproxy.service"
	sshi ${HOST_IP} "systemctl mask haproxy.service"

	green_echo "===> Shutting down all services on ${HOST_NAME}"
	oci_upgrade_shutdown_all_services ${HOST}

	green_echo "===> Dist-upgrade of ${HOST_NAME}"
	sshi ${HOST_IP} "DEBIAN_FRONTEND=noninteractive apt-get dist-upgrade -y -o Dpkg::Options::=--force-confold"
	sshi ${HOST_IP} "DEBIAN_FRONTEND=noninteractive apt-get install mariadb-backup"

	green_echo "===> Re-enable MySQL on ${HOST_NAME} so it comes back after reboot"
	sshi ${HOST_IP} "systemctl unmask mysql.service"
	sshi ${HOST_IP} "systemctl enable mysql.service"

	green_echo "===> Re-enable HAproxy's on ${HOST_NAME} so it comes back after reboot"
	sshi ${HOST_IP} "systemctl unmask haproxy.service"
	sshi ${HOST_IP} "systemctl enable haproxy.service"

	green_echo "===> Remove /usr/sbin/policy-rc.d on ${HOST_NAME}"
	sshi ${HOST_IP} "rm -f /usr/sbin/policy-rc.d"

	green_echo "===> Fixing openssl.cnf"
	sshi ${HOST_IP} "sed -i 's/MinProtocol = TLSv1.2/MinProtocol = TLSv1.0/' /etc/ssl/openssl.cnf"

	green_echo "===> Reboot of ${HOST_NAME} and wait until it comes back"
	sshi ${HOST_IP} "reboot" || true
	sleep 20
	wait_for_ssh ${HOST_IP}
}

oci_upgrade_reinstall_nova () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f1)

	green_echo "===> Install the missing Nova services on ${HOST_NAME}"
	sshi ${HOST_IP} "mkdir -p /etc/nova ; chown nova:nova /etc/nova ; cp /root/nova.conf /etc/nova"
	sshi ${HOST_IP} "DEBIAN_FRONTEND=noninteractive apt-get install -y nova-api nova-conductor nova-consoleauth nova-consoleproxy nova-placement-api nova-scheduler"
}

oci_upgrade_restart_rabbit () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f1)

	green_echo "===> Restart RabbitMQ on ${HOST_NAME} (standalone node for the moment)"
	sshi ${HOST_IP} "rm -rf /var/lib/rabbitmq/mnesia/"
	sshi ${HOST_IP} "systemctl unmask rabbitmq-server.service"
	sshi ${HOST_IP} "systemctl enable rabbitmq-server.service"
	sshi ${HOST_IP} "systemctl start rabbitmq-server.service"
}

oci_upgrade_shutdown_vip () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f1)

	green_echo "-> Shutting down crm on ${HOST_NAME}"
	sshi ${HOST_IP} "systemctl stop corosync"
	sshi ${HOST_IP} "systemctl disable corosync"
	sshi ${HOST_IP} "systemctl mask corosync"
	sshi ${HOST_IP} "systemctl stop pacemaker"
	sshi ${HOST_IP} "systemctl disable pacemaker"
	sshi ${HOST_IP} "systemctl mask pacemaker"
}

oci_upgrade_start_vip () {
	local HOST HOST_IP HOST_NAME
	HOST=${1}
	HOST_IP=$(echo ${HOST} | cut -d, -f1)
	HOST_NAME=$(echo ${HOST} | cut -d, -f1)

	green_echo "-> Shutting down crm on ${HOST_NAME}"
	sshi ${HOST_IP} "systemctl unmask corosync"
	sshi ${HOST_IP} "systemctl enable corosync"
	sshi ${HOST_IP} "systemctl unmask pacemaker"
	sshi ${HOST_IP} "systemctl enable pacemaker"
	sshi ${HOST_IP} "systemctl start corosync"
	sshi ${HOST_IP} "systemctl start pacemaker"
	sshi ${HOST_IP} "crm resource start openstack-api-vip" || true
}

oci_upgrade_node1 () {
	green_echo "### UPGRADE ${FIRST_CTRL_HOST} ###"
	oci_upgrade_shutdown_rabbit ${FIRST_CTRL}
	oci_upgrade_node ${FIRST_CTRL}
	if [ "${HAS_COMPUTE_NODE}" = "yes" ] ; then
		oci_upgrade_reinstall_nova ${FIRST_CTRL}
	fi

	oci_upgrade_restart_rabbit ${FIRST_CTRL}
	green_echo "===> Populate RabbitMQ login / pass so services can access on node 1"
	scpi /etc/openstack-cluster-installer/rabbitmq-access-grant ${FIRST_CTRL_IP}:
	sshi ${FIRST_CTRL_IP} /root/rabbitmq-access-grant
}

oci_upgrade_move_rabbit_to_node1 () {
	red_echo "===> DANGER ZONE: Moving RabbitMQ service"
	# Shutdown all rabbitmq except on first controller
	for CTRL in ${ALL_CTRL} ; do
		if ! [ "${CTRL}" = "${FIRST_CTRL}" ] ; then
			oci_upgrade_shutdown_rabbit ${CTRL}
		fi
	done

	green_echo "===> Restart all services on ${FIRST_CTRL_HOST} to re-declare the RabbitMQ queues..."
	green_echo "===> ...we start by nova and neutron, which are the most important (ie: urgent)"
	if [ "${HAS_COMPUTE_NODE}" = "yes" ] ; then
		for servicename in neutron-rpc-server nova-conductor aodh-notifier cinder-scheduler glance-api octavia-worker ; do
			sshi ${FIRST_CTRL_IP} "if [ -x /etc/init.d/$servicename ] ; then echo '===> Restarting '$servicename ;	systemctl stop $servicename ; sleep 1 ; systemctl start $servicename ; fi"
		done
	fi
	for servicename in barbican-api heat-engine ; do
		sshi ${FIRST_CTRL_IP} "if [ -x /etc/init.d/$servicename ] ; then echo '===> Restarting '$servicename ;	systemctl stop $servicename ; sleep 1 ; systemctl start $servicename ; fi"
	done

	red_echo "===> DANGER ZONE END: Moved RabbitMQ as standalone on ${FIRST_CTRL_HOST}"
}

oci_upgrade_node2 () {
	green_echo "===> ### UPGRADE ${SEC_CTRL_HOST} ###"
	red_echo "===> Shutting down VIP on ${SEC_CTRL_HOST}, please check it continues to work after this operation"
	red_echo "===> and fix manualy if it doesn't"
	oci_upgrade_shutdown_vip ${SEC_CTRL}
	sleep 2
	sshi ${LAST_CTRL_IP} "systemctl restart corosync"
	sleep 2
	sshi ${LAST_CTRL_IP} "crm resource start openstack-api-vip"
	sleep 2
	oci_upgrade_node ${SEC_CTRL}

	red_echo "===> DANGER ZONE: Shutting down Corosync on last remaining nodes"
	green_echo "===> Shutdown the VIP all remaning nodes"

	# Reconfigure last controller to NOT work if standalone anymore.
	green_echo "---> Last controller, not in standalone"
	sshi ${LAST_CTRL_IP} 'sed -i "s/expected_votes: 1//" /etc/corosync/corosync.conf'
	sshi ${LAST_CTRL_IP} "systemctl restart corosync"
	sleep 10

	for i in ${ALL_CTRL} ; do
		if [ ${i} = ${FIRST_CTRL} ] || [ ${i} = ${SEC_CTRL} ]; then
			green_echo "-> Skiping shutting down on ${i}"
		else
			green_echo "-> Shutting down VIP on ${1}"
			oci_upgrade_shutdown_vip $i
		fi
	done
	green_echo "-> Starting VIP on ${FIRST_CTRL_HOST}"
	oci_upgrade_start_vip ${FIRST_CTRL}

	green_echo "-> Starting VIP on ${SEC_CTRL_HOST}"
	oci_upgrade_start_vip ${SEC_CTRL}

	red_echo "===> DANGER ZONE END: Corosync must be running on node 1 and 2 only"

	if [ "${HAS_COMPUTE_NODE}" = "yes" ] ; then
		oci_upgrade_reinstall_nova ${SEC_CTRL}
	fi

	oci_upgrade_restart_rabbit ${SEC_CTRL}
	sshi ${SEC_CTRL_IP} "oci-auto-join-rabbitmq-cluster ${FIRST_CTRL_HOST}"
}

oci_upgrade_remaining_nodes () {
	green_echo "===> Upgrading all remaining nodes"
	for CTRL in ${ALL_CTRL} ; do
		HOST_IP=$(echo ${CTRL} | cut -d, -f1)
		HOST_NAME=$(echo ${CTRL} | cut -d, -f1)
		if [ "${CTRL}" = "${FIRST_CTRL}" ] || [ "${CTRL}" = "${SEC_CTRL}" ] ; then
			green_echo "-> Skipping upgrade of ${HOST_NAME} (already upgraded)"
		else
			green_echo "-> Upgrading of ${HOST_NAME}"
			oci_upgrade_node ${CTRL}
			if [ "${HAS_COMPUTE_NODE}" = "yes" ] ; then
				oci_upgrade_reinstall_nova ${CTRL}
			fi
			oci_upgrade_restart_rabbit ${CTRL}
			sshi ${HOST_IP} "oci-auto-join-rabbitmq-cluster ${FIRST_CTRL_HOST}"
			oci_upgrade_start_vip ${CTRL}
			sshi ${HOST_IP} "systemctl restart apache2"
		fi
	done
}

oci_upgrade_restart_puppet_everywhere () {
	ocicli cluster-set ${CLUSTER_NAME} --initial-cluster-setup yes
	for CTRL in ${ALL_CTRL} ; do
		HOST_IP=$(echo ${CTRL} | cut -d, -f1)
		HOST_NAME=$(echo ${CTRL} | cut -d, -f1)
		sshi ${HOST_IP} "puppet agent --enable"
		if [ "${SELF_SIGNED_API_CERT}" = "yes" ] ; then
			sshi ${HOST_IP} "OS_CACERT=/etc/ssl/certs/oci-pki-oci-ca-chain.pem puppet agent -t --debug" || true
		else
			sshi ${HOST_IP} "puppet agent -t --debug" || true
		fi
	done
	ocicli cluster-set ${CLUSTER_NAME} --initial-cluster-setup no
}

oci_upgrade_prepare
oci_upgrade_node1
oci_upgrade_move_rabbit_to_node1
oci_upgrade_node2
oci_upgrade_remaining_nodes
oci_upgrade_restart_puppet_everywhere
