#!/bin/sh
#
#
# Checkpoint_last OCF Resource Agent.
#
# Copyright (c) 2006 Andrew Beekhof
# Copyright (c) 2012. Synology, Inc. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#

#######################################################################
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. $prefix/etc.defaults/rc.subr

CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot"
SYNOHA_PROG="$prefix/sbin/synoha"
SYNOHASTR_PROG="$prefix/sbin/synohastr"
SYNODRBD_PROG="$prefix/sbin/synodrbd"
NOTIFY_HA_SERVICE_UP="--notify service-up"
CHECK_HA_DATA_SYNC="--check-data-sync"
CLEAN_CRM_STATE="--crm-clean-state"
CHECK_IF_SWITCH_OVER="--check-if-switch-over"
CLEAN_SWITCH_OVER_STATE="--clean-switch-over-state"
CHECK_SYNC="--check-sync"
CHECK_PASSIVE_CRASH="--check-passive-crash"
CHECK_SERVICE_FAIL="--service-check-fail"

NO_ACTIVE_COUNT="/tmp/ha/no_active_cnt"
NO_ACTIVE_UNBINDING="/tmp/ha/no_active_unbinding"

CRM_PROG="${prefix}/sbin/crm"
CRM_DELETE_FAILCOUNT="resource failcount SERV delete"

DRBDADM="$prefix/sbin/drbdadm -c $OCF_RESKEY_drbdconf_default"

FLAG_HA_ROLE_ACTIVE=`$SYNOHASTR_PROG --role-active-file`
#######################################################################

meta_data() {
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Checkpoint_last" version="1.0">
<version>1.0</version>

<longdesc lang="en">
This is an example resource agent that impliments two states
</longdesc>
<shortdesc lang="en">Example Checkpoint_last resource agent</shortdesc>

<parameters>

<parameter name="state" unique="1">
<longdesc lang="en">
Location to store the resource state in
</longdesc>
<shortdesc lang="en">State file</shortdesc>
<content type="string" default="${HA_RSCTMP}/Checkpoint_last-{OCF_RESOURCE_INSTANCE}.state" />
</parameter>

</parameters>

<actions>
<action name="start"   timeout="20s" />
<action name="stop"    timeout="20s" />
<action name="promote" timeout="20s" />
<action name="demote"  timeout="20s" />
<action name="monitor" timeout="20" interval="31" role="Slave" depth="0" />
<action name="monitor" timeout="20" interval="29" role="Master" depth="0" />
<action name="meta-data"  timeout="5" />
<action name="validate-all"  timeout="20s" />
</actions>
</resource-agent>
END
	exit $OCF_SUCCESS
}

#######################################################################

checkpoint_last_usage() {
	cat <<END
usage: $0 {start|stop|promote|demote|monitor|validate-all|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
	exit $OCF_ERR_GENERIC
}

checkpoint_last_update() {
	echo $1 > ${OCF_RESKEY_state}
}

checkpoint_last_check_state() {
	target=$1
	if [ -f ${OCF_RESKEY_state} ]; then
		state=`cat ${OCF_RESKEY_state}`
		if [ "x$target" = "x$state" ]; then
			return $OCF_SUCCESS
		fi

	else
		if [ "x$target" = "x" ]; then
			return $OCF_SUCCESS
		fi
	fi

	return $OCF_ERR_GENERIC
}

checkpoint_last_start() {
	checkpoint_last_check_state master
	if [ $? = 0 ]; then
		# CRM Error - Should never happen
		return $OCF_RUNNING_MASTER
	fi

	rm $NO_ACTIVE_COUNT &> /dev/null
	rm $NO_ACTIVE_UNBINDING &> /dev/null

	checkpoint_last_update slave
	$CRM_MASTER -v 5
	return $OCF_SUCCESS
}

checkpoint_last_demote() {
	checkpoint_last_check_state
	if [ $? = 0 ]; then
		# CRM Error - Should never happen
		return $OCF_NOT_RUNNING
	fi

	rm $NO_ACTIVE_COUNT &> /dev/null
	rm $NO_ACTIVE_UNBINDING &> /dev/null

	checkpoint_last_update slave
	$CRM_MASTER -v 5
	rm -f $FLAG_HA_ROLE_ACTIVE
	return $OCF_SUCCESS
}

checkpoint_last_promote() {
	${SYNOHA_PROG} ${CLEAN_SWITCH_OVER_STATE} &> /dev/null
	checkpoint_last_check_state
	if [ $? = 0 ]; then
		return $OCF_NOT_RUNNING
	fi

	checkpoint_last_update master
	{
	${SYNOHA_PROG} ${NOTIFY_HA_SERVICE_UP} &> /dev/null
	${SYNOHA_PROG} ${CLEAN_CRM_STATE} &> /dev/null
	}&

	# TODO: delete failcount when success to recovery from monitor fail
	# We can't put delete failcount here. Crm may run Checkpoint_last:promote
	# and SERV:monitor concurrently, such that we may clear failcount before counting failcount
	if false; then
		local HostName=`${SYNOHA_PROG} --local-name | awk '{print $3}'`
		${CRM_PROG} ${CRM_DELETE_FAILCOUNT} ${HostName}
	fi

	$CRM_MASTER -v 10
	touch $FLAG_HA_ROLE_ACTIVE
	return $OCF_SUCCESS
}

checkpoint_last_stop() {
	$CRM_MASTER -D
	checkpoint_last_check_state master
	if [ $? = 0 ]; then
		# CRM Error - Should never happen
		return $OCF_RUNNING_MASTER
	fi

	if [ -f ${OCF_RESKEY_state} ]; then
		rm ${OCF_RESKEY_state}
	fi
	return $OCF_SUCCESS
}

checkpoint_last_monitor() {
	checkpoint_last_check_state "master"
	if [ $? = 0 ]; then
		{
		${SYNODRBD_PROG} ${CHECK_HA_DATA_SYNC} &> /dev/null
		${SYNOHA_PROG} ${CHECK_IF_SWITCH_OVER} &> /dev/null
		${SYNODRBD_PROG} ${CHECK_SYNC} &> /dev/null
		${SYNOHA_PROG} ${CHECK_PASSIVE_CRASH} &> /dev/null
		${SYNOHA_PROG} ${CHECK_SERVICE_FAIL} &> /dev/null
		}&
		return $OCF_RUNNING_MASTER
	fi

	checkpoint_last_check_state "slave"
	if [ $? = 0 ]; then
		{
			if `$DRBDADM sh-status all | grep -q "_flags_user_isp=1"`; then
				synoha_log "resume-sync all"
				$DRBDADM resume-sync all
			fi
		}&
		{
		if [ -r $NO_ACTIVE_UNBINDING ]; then
			exit
		fi
		if ${HA_SBIN_DIR}/synoha --remote-role | grep -q "Active"; then
			if [ -r $NO_ACTIVE_COUNT ]; then
				rm $NO_ACTIVE_COUNT
			fi
		else
			if [ -r $NO_ACTIVE_COUNT ]; then
				local timeout=$MAX_HA_PASSIVE_ONLY
				#if ${HA_SBIN_DIR}/synoha --remote-status | grep -q "normal"; then
				#	timeout=600 # two passive, 600s
				#elif ${HA_SBIN_DIR}/synoha --remote-status | grep -q "warning_online"; then
				#	timeout=600 # two passive, 600s
				#else
				#	timeout=600 # I am the only node in cluster
				#fi

				local cntStart=`cat $NO_ACTIVE_COUNT`
				local cntCur=`date +%s`
				local cntDiff=`expr $cntCur - $cntStart`
				if [ $cntDiff -ge $timeout ]; then
					{
					local SYNO_UNBIND_WITH_REBOOT=`$SYNOHASTR_PROG --unbind-with-reboot`
					local SYNO_NO_ACTIVE=`$SYNOHASTR_PROG --no-active`
					synoha_log "Waiting active node timeout. Unbind local."
					touch $NO_ACTIVE_UNBINDING

					${HA_SBIN_DIR}/synoha --notify switch-fail &> /dev/null &
					${HA_SBIN_DIR}/synoha --unbind-local $SYNO_UNBIND_WITH_REBOOT "$SYNO_NO_ACTIVE" &> /dev/null &
					# wait reboot after unbind local
					sleep 300
					reboot -f
					}&
				fi
			else
				date +%s > $NO_ACTIVE_COUNT 2>/dev/null
			fi
		fi
		}&
		return $OCF_SUCCESS
	fi

	if [ -f ${OCF_RESKEY_state} ]; then
		echo "File '${OCF_RESKEY_state}' exists but contains unexpected contents"
		cat ${OCF_RESKEY_state}
		return $OCF_ERR_GENERIC
	fi
	return $OCF_NOT_RUNNING
}

checkpoint_last_validate() {
	exit $OCF_SUCCESS
}

: ${OCF_RESKEY_state=${HA_RSCTMP}/Checkpoint_last-${OCF_RESOURCE_INSTANCE}.state}

case $__OCF_ACTION in
meta-data)    meta_data;;
start)        synoha_log "checkpoint_last start.";checkpoint_last_start;;
promote)      synoha_log "checkpoint_last promote.";checkpoint_last_promote;;
demote)       synoha_log "checkpoint_last demote";checkpoint_last_demote;;
stop)         synoha_log "checkpoint_last stop.";checkpoint_last_stop;;
monitor)      checkpoint_last_monitor;;
validate-all) checkpoint_last_validate;;
usage|help)   checkpoint_last_usage $OCF_SUCCESS;;
*)            checkpoint_last_usage $OCF_ERR_UNIMPLEMENTED;;
esac

exit $?

