#!/bin/ksh # # CDDL HEADER START # # The contents of this file are subject to the terms of the # Common Development and Distribution License (the License). # You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/CDDL.txt # or http://www.opensolaris.org/os/licensing. # See the License for the specific language governing permissions # and limitations under the License. # # When distributing Covered Code, include this CDDL HEADER in each # file and include the License file at usr/src/CDDL.txt. # If applicable, add the following below this CDDL HEADER, with the # fields enclosed by brackets [] replaced with your own identifying # information: Portions Copyright [yyyy] [name of copyright owner] # # CDDL HEADER END # # Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" # PKG=SUNWscxvm TASK_COMMAND="" RESOURCE_PROJECT_NAME="" CCR_TABLE=${VM}_"domain_config" TMP_DIR="/var/tmp" LOGFILE=${TMP_DIR}/${RESOURCE}_logfile # Commands definition SCLOGGER=/usr/cluster/lib/sc/scds_syslog LOGGER=/usr/bin/logger GREP=/usr/xpg4/bin/grep AWK=/usr/bin/awk PGREP=/usr/bin/pgrep SLEEP=/usr/bin/sleep TR=/usr/xpg4/bin/tr SCHA_RESOURCE_GET=/usr/cluster/bin/scha_resource_get SCHA_RESOURCEGROUP_GET=/usr/cluster/bin/scha_resourcegroup_get SCHA_CLUSTER_GET=/usr/cluster/bin/scha_cluster_get HATIMERUN=/usr/cluster/bin/hatimerun LDM=/opt/SUNWldm/bin/ldm VIRSH=/usr/bin/virsh XM=/usr/sbin/xm CCRADM=/usr/cluster/lib/sc/ccradm CL_EXEC_CLIENT=/usr/cluster/lib/sc/cl_exec_client syslog_tag() { ${SET_DEBUG} print "SC[${PKG:-??}.${METHOD:-??}]:${RESOURCEGROUP:-??}:${RESOURCE:-??}" } scds_syslog() { if [ -f "${SCLOGGER}" ] then ${SCLOGGER} "$@" & else while getopts 'p:t:m' opt do case "${opt}" in t) TAG=${OPTARG};; p) PRI=${OPTARG};; esac done shift $((${OPTIND} - 1)) LOG_STRING=$(/usr/bin/printf "$@") ${LOGGER} -p ${PRI} -t ${TAG} ${LOG_STRING} fi } i18n_message() { debug_message "Function: i18n_message - Begin" ${DEBUG} print -u2 $(/bin/printf "$@") debug_message "Function: i18n_message - End" return 0 } debug_message() { typeset DEBUG_TEXT= case ${DEBUG_LEVEL} in 0) # No debug msgs SET_DEBUG= ;; 1) # Begin and End msgs SET_DEBUG= DEBUG_TEXT=$(echo ${1} | ${GREP} -E 'Begin|End') ;; 2) # All debug msgs SET_DEBUG="set -x" DEBUG_TEXT=${1} ;; esac [[ -n "${DEBUG_TEXT}" ]] && \ scds_syslog -p daemon.debug -t $(syslog_tag) -m \ "%s" "${DEBUG_TEXT}" } log_message() { # # Output a message to syslog as required # debug_message "Function: log_message - Begin" ${SET_DEBUG} if [ -s "${LOGFILE}" ] then PRIORITY=${1} HEADER=${2} # # Ensure that the while loop only reads a closed file # strings ${LOGFILE} > ${LOGFILE}.copy while read MSG_TXT do scds_syslog -p daemon.${PRIORITY} -t $(syslog_tag) -m \ "%s - %s" "${HEADER}" "${MSG_TXT}" done < ${LOGFILE}.copy fi cat /dev/null > ${LOGFILE} > /dev/null cat /dev/null > ${LOGFILE}.copy debug_message "Function: log_message - End" } get_resource_property() { debug_message "Function: get_resource_property - Begin" ${SET_DEBUG} typeset RS=${1} typeset PROPERTY=${2} typeset rc # Retrieve the property value. OUTPUT=$(${SCHA_RESOURCE_GET} -O Extension -R ${RS} ${PROPERTY}) rc=${?} debug_message "get_resource_property - " \ "scha_resource_get of property ${PROPERTY} returned ${rc}" if (( ${rc} == 0 )) then # print the values echo ${OUTPUT} | ${AWK} '{ \ if (NF > 1) for (i = 2; i <= NF; i++) print $i; else print "" }' fi debug_message "Function: get_resource_property - End" return ${rc} } get_properties() { debug_message "Function: get_properties - Begin" ${SET_DEBUG} typeset -i rc typeset props=$* for prop in ${props} do # retrieve the property value typeset val=$(get_resource_property ${RESOURCE} ${prop}) rc=${?} if (( ${rc} == 0 )) then case ${prop} in Domain_name) [[ -z ${DOMAIN} ]] && DOMAIN=${val};; Migration_type) [[ -z ${MIGRATION_TYPE} ]] && MIGRATION_TYPE=${val};; Plugin_probe) [[ -z ${PLUGIN_PROBE} ]] && PLUGIN_PROBE=${val};; Password_file) [[ -z ${PASSWORD_FILE} ]] && PASSWORD_FILE=${val};; Debug_level) [[ -z ${DEBUG_LEVEL} ]] && DEBUG_LEVEL=${val};; esac else # SCMSGS # @explanation # The scha_resource_get call failed. # @user_action # Check the syslog for further messages. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Cannot get the property %s of resource %s." \ "${prop}" "${RESOURCE}" break fi done debug_message "Function: get_properties - End" return ${rc} } validate_xvm() { debug_message "Function: validate_xvm - Begin" ${SET_DEBUG} typeset rc=0 typeset msgtext if [ "$(/usr/bin/uname -i)" != "i86xpv" ] then # SCMSGS # @explanation # Solaris is not booted with xVM. # @user_action # Ensure that the default boot grub menu is set to boot # Solaris xVM. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Node is not booted with xVM." msgtext=$(gettext "Node is not booted with xVM.") i18n_message "${msgtext}" rc=1 fi debug_message "Function: validate_xvm - End" return ${rc} } validate_ldom() { debug_message "Function: validate_ldom - Begin" ${SET_DEBUG} typeset ncount=0 typeset msgtext # Make sure that the password file is readable. if [[ ${MIGRATION_TYPE} != "NORMAL" ]] then if [ -z "${PASSWORD_FILE}" ] then # SCMSGS # @explanation # Password file cannot be null. # @user_action # Ensure that a password file name is specified. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Password file cannot be (null)." msgtext=$(gettext "Password file cannot be (null).") i18n_message "${msgtext}" debug_message "Function: validate_ldom - End" return 1 fi if [[ ! -f "${PASSWORD_FILE}" ]] || [[ ! -r "${PASSWORD_FILE}" ]] then # SCMSGS # @explanation # Incorrect Password file specified. # @user_action # Ensure that a valid password file is specified. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Invalid password file specified %s." \ "${PASSWORD_FILE}" msgtext=$(gettext "Invalid password file specified %s.") i18n_message "${msgtext}" "${PASSWORD_FILE}" debug_message "Function: validate_ldom - End" return 1 fi fi # Ensure that the control domain is a cluster node. if ! ${LDM} ls > /dev/null 2>&1 then # SCMSGS # @explanation # Self explanatory. # @user_action # Ensure that the resource is configured in # control domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "The LDom Manager is running in configuration mode." msgtext=$(gettext "The LDom Manager is running in configuration mode.") i18n_message "${msgtext}" debug_message "Function: validate_ldom - End" return 1 fi # Ensure that the failure-policy setting is set to "reset". # If the control domain fails,this would allow the guest domains # to panic. policy=$(${LDM} list -o domain primary \ | ${AWK} -F"=" '$1~/failure-policy/ {print $2}') if [ "${policy}" != "reset" ] then # SCMSGS # @explanation # Incorrect failure-policy setting for the domain. # @user_action # Ensure that the failure-policy for the domain is # set to "reset" on the control domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Invalid failure policy \"%s\" for %s domain." \ "${policy}" "primary" msgtext=$(gettext "Invalid failure policy \"%s\" for %s domain.") i18n_message "${msgtext}" "${policy}" "primary" debug_message "Function: validate_ldom - End" return 1 fi # The CL_EXEC_CLIENT program executes a command on any of the # cluster nodes or a zone or in a zone cluster. It then generates # as output the exit status of command and the stdout and stderr # messages. The valid options are: # [ -z zoneclustername] The command is run on the zone cluster # represented by the zonename. # -C { TS | RT | FSS | FX } The scheduling class in which the # command is to be run. # -p pri Specifies the priority of the command in the given # scheduling class. # -n id[,id..] A comma seperated list of node ID's of a # zone cluster or a node to run the command. # -c cmd [Args] The command to be run along with its arguments. for nodename in $(${SCHA_RESOURCEGROUP_GET} -O NODELIST -G ${RESOURCEGROUP}) do if [[ "$(${SCHA_CLUSTER_GET} -O NodeState_Node ${nodename})" == "DOWN" ]] then continue fi nodeid=$(${SCHA_CLUSTER_GET} -O NODEID_NODENAME ${nodename}) output=$(${CL_EXEC_CLIENT} -n ${nodeid} -c "${LDM} list-domain ${DOMAIN}") result=${?} status=$(echo ${output} | ${AWK} '{print $6}') if (( ${result} == 0 )) && (( ${status} == 0 )) then domstate=$(echo $output | ${AWK} -F" " '{print $18}') if (( ${update} == 0)) && echo $domstate | ${GREP} -q -E "^active$|suspending|resuming|suspended|starting" > /dev/null 2>&1 then # SCMSGS # @explanation # The domain is in an invalid state. # @user_action # Ensure that the domain is in inactive or bound state. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Domain %s is in %s state on %s." \ "${DOMAIN}" "${domstate}" "${nodename}" msgtext=$(gettext "Domain %s is in %s state on %s.") i18n_message "${msgtext}" "${DOMAIN}" "${domstate}" "${nodename}" debug_message "Function: validate_ldom - End" return 1 fi ncount=$((ncount+1)) nlist=$(echo ${nodename} ${nlist}) # dump domain confguration to ccr if [[ "$(/usr/bin/hostname)" == "${nodename}" ]] then if ! dump_domain_config then debug_message "Function: validate_ldom - End" return 1 fi fi fi done if (( ${ncount} == 0 )) then if ! ${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} > /dev/null 2>&1 then scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Domain %s does not exist." \ "${DOMAIN}" msgtext=$(gettext "Domain %s does not exist.") i18n_message "${msgtext}" "${DOMAIN}" return 1 fi fi if [[ ${ncount} -gt 1 ]] then # SCMSGS # @explanation # The domain is configured on multiple # cluster nodes. # @user_action # Ensure that the domain is configured on one node # of the cluster. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Multiple domain %s configuration exists on %s." \ "${DOMAIN}" "${nlist}" msgtext=$(gettext "Multiple domain %s configuration exists on %s.") i18n_message "${msgtext}" "${DOMAIN}" "${nlist}" return 1 fi debug_message "Function: validate_ldom - End" return 0 } validate() { debug_message "Function: validate - Begin" ${SET_DEBUG} typeset rc # Make sure that the plugin probe specified is readable. if [[ -n "${PLUGIN_PROBE}" ]] then if [ -f "${PLUGIN_PROBE}" ] && [ ! -r "${PLUGIN_PROBE}" ] then # SCMSGS # @explanation # Incorrect user probe file specified. # @user_action # Ensure that a valid user probe file is specified. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Invalid user probe file %s." \ "${PLUGIN_PROBE}" msgtext=$(gettext "Invalid user probe file %s.") i18n_message "${msgtext}" "${PLUGIN_PROBE}" return 1 fi fi validate_${VM} rc=${?} debug_message "Function: validate - End" return ${rc} } # # get the domain status # get_xvm_status() { debug_message "Function: get_xvm_status - Begin" ${SET_DEBUG} typeset rc ${VIRSH} domstate ${DOMAIN} rc=${?} debug_message "Function: get_xvm_status - End" return ${rc} } get_ldom_status() { debug_message "Function: get_ldom_status - Begin" ${SET_DEBUG} typeset rc=1 OUTPUT=$(${LDM} list-domain ${DOMAIN}) if (( ${?} == 0 )) then echo ${OUTPUT} | ${AWK} '{print $10}' rc=${?} fi debug_message "Function: get_ldom_status - End" return ${rc} } # # Routines to create the domain on the current cluster node. # add_xvm_domain() { debug_message "Function: add_xvm_domain - Begin" ${SET_DEBUG} typeset rc=0 if ! ${VIRSH} define ${TMP_DIR}/${RESOURCE}.xml >> $LOGFILE 2>&1 then # SCMSGS # @explanation # Defining the domain using an XML file failed. # @user_action # The command /usr/bin/virsh define failed to define the domain. # Determine if you have specified the correct domain name while # registering the resource. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to define %s using %s/%s.xml." \ "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}" rc=1 fi debug_message "Function: add_xvm_domain - End" return ${rc} } add_ldom_domain() { debug_message "Function: add_ldom_domain - Begin" ${SET_DEBUG} typeset rc=0 if ! ${LDM} add-domain -i ${TMP_DIR}/${RESOURCE}.xml ${DOMAIN} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # Defining the domain using an XML file failed. # @user_action # The command /opt/SUNWldm/bin/ldm "add-domain" # failed to define the domain. Determine if you # have specified the correct domain name when # registering the resource. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to add the domain %s using %s/%s.xml." \ "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}" rc=1 fi debug_message "Function: add_ldom_domain - End" return ${rc} } # # test if domain is active # is_xvm_up() { debug_message "Function: is_xvm_up - Begin" ${SET_DEBUG} typeset rc=0 echo $(${VIRSH} domstate ${DOMAIN}) | \ ${GREP} -q -E "running|blocked|paused|in shutdown" > /dev/null 2>&1 rc=${?} debug_message "Function: is_xvm_up - End" return ${rc} } is_ldom_up() { debug_message "Function: is_ldom_up - Begin" ${SET_DEBUG} typeset rc=0 get_ldom_status | ${GREP} -q -E "^active$|^starting$" > /dev/null 2>&1 rc=${?} debug_message "Function: is_ldom_up - End" return ${rc} } # # wrapper routines to start xvm or ldom domains # start_xvm() { debug_message "Function: start_xvm - Begin" ${SET_DEBUG} typeset rc=0 ${VIRSH} start ${DOMAIN} >> $LOGFILE 2>&1 rc=${?} debug_message "Function: start_xvm - End" return ${rc} } # # After a crash/reboot of the node, the domain # would be started and there would be multiple # instances of the same domain across cluster # nodes. Hence the domain is destroyed. # init_ldom() { debug_message "Function: init_ldom - Begin" ${SET_DEBUG} typeset rc MAX_STOP_TIMEOUT=$(${SCHA_RESOURCE_GET} -O INIT_TIMEOUT \ -R ${RESOURCE} -G ${RESOURCEGROUP} ) domain_shutdown rc=${?} debug_message "Function: init_ldom - End" return ${rc} } start_ldom() { debug_message "Function: start_ldom - Begin" ${SET_DEBUG} typeset rc=0 if get_${VM}_status | ${GREP} -q -E "^inactive$" > /dev/null 2>&1 then if ${LDM} bind-domain ${DOMAIN} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # The domain was bound. # @user_action # None required. The domain has been bound on this node. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s is bound." \ "${DOMAIN}" rc=0 else # SCMSGS # @explanation # The /opt/SUNWldm/bin/ldm bind-domain command failed. # @user_action # Determine why it was not possible to bind the domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to bind %s." \ "${DOMAIN}" rc=1 fi fi # # The domain is made to sit at the OBP prompt, so a reboot/crash # wouldn't boot the Guest domain OS. # if (( ${rc} == 0 )) && ${LDM} set-var auto-boot?=true ${DOMAIN} >> $LOGFILE 2>&1 then if ${LDM} start-domain ${DOMAIN} >> $LOGFILE 2>&1 then while [ 1 ] do flag=$(${LDM} list-domain -p ${DOMAIN} | ${GREP} ${DOMAIN} \ | ${AWK} -F"|" '{print $4}'| ${AWK} -F"=" '{print $2}') [[ "${flag}" == "-n----" ]] && break ${SLEEP} 1 done else rc=1 fi ${LDM} set-var auto-boot?=false ${DOMAIN} >> $LOGFILE 2>&1 || rc=1 else rc=1 fi debug_message "Function: start_ldom - End" return ${rc} } start_domain() { debug_message "Function: start_domain - Begin" ${SET_DEBUG} typeset rc=0 # Turn off PMF restart. Starting a domain does not leave # a running pid as in a classic Solaris Cluster agent. START_TIMEOUT=$(${SCHA_RESOURCE_GET} -O START_TIMEOUT \ -R ${RESOURCE} -G ${RESOURCEGROUP} ) ${SLEEP} ${START_TIMEOUT} & /usr/cluster/bin/pmfadm -s ${RESOURCEGROUP},${RESOURCE},0.svc # Check if the domain exists. # # If the domain does not exist, we maybe starting the domain # on a new cluster node following a failover. As such we will # define the domain using the previously dumped XML file # located within the agent's administrative file system. # # If the domain already exists, either the domain was manually # started or the domain was migrated or live migrated from # another cluster node. Therefore, we will use the already # defined domain. # # Note that when the domain is successfully stopped the domain # is deleted. We do this simply to avoid the domain from # being manually started on multiple cluster nodes. See # domain_delete() for more information. if get_${VM}_status > /dev/null 2>&1 then debug_message "Validate - domain ${DOMAIN} exists" else if ${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} > ${TMP_DIR}/${RESOURCE}.xml 2> /dev/null then # add the domain to the cluster node if add_${VM}_domain ${DOMAIN} ${TMP_DIR}/${RESOURCE}.xml then # SCMSGS # @explanation # The domain is being defined using a XML file. # @user_action # None, the domain is being defined using a previously defined # XML file when the domain was last successfully started. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s defined using %s/%s.xml." \ "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}" else # error already logged. debug_message "Function: start_domain - End" return 1 fi else # SCMSGS # @explanation # The domain does not exist. # @user_action # You must ensure that the domain exists. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Domain %s does not exist." \ "${DOMAIN}" debug_message "Function: start_domain - End" return 1 fi fi # Tolerate a manually started domain and a NO-OP start # otherwise start the domain. if ${CCRADM} showkey --key noop_${RESOURCE} ${CCR_TABLE} > /dev/null 2>&1 then # SCMSGS # @explanation # The domain was migrated or live migrated. # @user_action # None required. Informational message. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "NO-OP START being performed." if ! ${CCRADM} delkey --key noop_${RESOURCE} ${CCR_TABLE} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # Failed to delete the NO-OP flag from CCR. # @user_action # Check the syslog for further messages. # Determine why the NO-OP flag was not added to the CCR. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to delete NO-OP flag for %s domain." \ "${DOMAIN}" debug_message "Function: start_domain - End" return 1 else debug_message "start_domain - noop_${RESOURCE} deleted" fi elif is_${VM}_up then # SCMSGS # @explanation # The domain was manually started. # @user_action # None required. Informational message. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s was manually started." \ "${DOMAIN}" else if start_${VM} then # SCMSGS # @explanation # The domain was started successfully. # @user_action # None required. Informational message. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s started." \ "${DOMAIN}" else # SCMSGS # @explanation # The domain failed to start. # @user_action # Check the syslog for further messages. If possible # the cluster will attempt to restart the domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Domain %s failed to start." \ "${DOMAIN}" rc=1 fi fi if (( ${rc} == 0 )) then # Dump the domain configuration into an XML file. This file is then # used on another cluster node to define the domain but only if the # domain does not exist. dump_domain_config rc=${?} fi debug_message "Function: start_domain - End" return ${rc} } # # dump the domain configuration # dump_xvm_xml() { debug_message "Function: dump_xvm_xml - Begin" ${SET_DEBUG} typeset rc=0 if ! ${VIRSH} dumpxml ${DOMAIN} 2>> $LOGFILE then # SCMSGS # @explanation # "/usr/bin/virsh dumpxml" for domain failed. # @user_action # Determine why the command to dump domain # configuration failed. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "%s dumpxml for domain %s failed." \ "${VIRSH}" "${DOMAIN}" rc=${?} fi debug_message "Function: dump_xvm_xml - End" return ${rc} } dump_ldom_xml() { debug_message "Function: dump_ldom_xml - Begin" ${SET_DEBUG} typeset rc=0 if ! ${LDM} list-constraints -x ${DOMAIN} 2>> $LOGFILE then # SCMSGS # @explanation # "/opt/SUNWldm/bin/ldm list-constraints -x" # for domain failed. # @user_action # Determine why the command to list the # domain constraints failed. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "%s list-constraints for domain %s failed." \ "${LDM}" "${DOMAIN}" rc=1 fi debug_message "Function: dump_ldom_xml - End" return ${rc} } # # save the domain configuration in the cluster # configuration repository # dump_domain_config() { debug_message "Function: dump_domain_config - Begin" ${SET_DEBUG} typeset rc=0 # Dump the domain configuration into an XML file. The domain configuration # can be changed, when under the the agent control. olddesc=$(${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} 2> /dev/null) if (( ${?} == 1 )) then # # The ccr table might not exist. # create the CCR table, if it doesn't exist. # if ${CCRADM} addtab ${CCR_TABLE} >> $LOGFILE 2>&1 then debug_message "created ccr table ${CCR_TABLE}" else # SCMSGS # @explanation # Failed to create the CCR table. # @user_action # Check the syslog for further messages. # Determine why the CCR create failed. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to create CCR table %s." \ "${CCR_TABLE}" return 1 fi fi output=$(dump_${VM}_xml) if (( ${?} == 0 )) && [[ -n "${output}" ]] then newdesc=$(echo ${output} | ${TR} -s '\n' '[ ]') if [ "${olddesc}" != "${newdesc}" ] then if ! ${CCRADM} addkey --key=xml_${RESOURCE} --value "${newdesc}" ${CCR_TABLE} > /dev/null 2>&1 then if ! ${CCRADM} changekey --key=xml_${RESOURCE} --value "${newdesc}" ${CCR_TABLE} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # Failed to update the XMl dump to the CCR. # @user_action # Check the syslog for further messages. # Determine why the ccr update failed. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to update domain XML %s to ccr." \ "${DOMAIN}" rc=1 fi else debug_message "dump_domain_config - %s configuration added to CCR" "${DOMAIN}" fi fi else # error already logged. rc=1 fi debug_message "Function: dump_domain_config - End" return ${rc} } # # probe function for domain data service # check_domain() { debug_message "Function: check_domain - Begin" ${SET_DEBUG} typeset rc SECONDS=0 if ${PGREP} -f "control_xvm start -R ${RESOURCE} " >/dev/null 2>&1 then debug_message "Function: check_domain - start program is still running " rc=100 else domstate=$(get_${VM}_status 2>/dev/null) case "${domstate}" in # Acceptable run states "running"|"blocked"|"paused"|"in shutdown"| \ "active"|"suspending"|"resuming"|"suspended"|"starting") if [ "${#PLUGIN_PROBE}" -ne 0 ] then if [ -x "$(echo ${PLUGIN_PROBE} | ${AWK} '{print $1}')" ] then PROBE_TIMEOUT=$(${SCHA_RESOURCE_GET} -O Extension -R ${RESOURCE} -G ${RESOURCEGROUP} Probe_timeout|tail -1) # Run the supplied probe with only 90% of PROBE_TIMEOUT. Also note that this # is supplied as a parameter to the PLUGIN_PROBE. HATIMERUN_TIMEOUT=$((PROBE_TIMEOUT*90/100-${SECONDS})) output=$(${HATIMERUN} -t ${HATIMERUN_TIMEOUT} -k 9 ${PLUGIN_PROBE} ${HATIMERUN_TIMEOUT}) rc=${?} case ${rc} in 0) debug_message "check_domain - ${DOMAIN} ${output}" rc=0 ;; 99) # SCMSGS # @explanation # The domain probe timed out. # @user_action # Ensure that ${PLUGIN_PROBE} can complete within # 90% of PROBE_TIMEOUT. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "%s did not complete within %s seconds." \ "${PLUGIN_PROBE}" "${HATIMERUN_TIMEOUT}" rc=100 ;; 100) if ${PGREP} -f "gds_svc_start .*-R ${RESOURCE} " >/dev/null 2>&1 then debug_message "check_domain - ${DOMAIN} is still starting" rc=100 elif ${PGREP} -f "gds_svc_stop .*-R ${RESOURCE} " >/dev/null 2>&1 then debug_message "check_domain - ${DOMAIN} is stopping" rc=100 else # SCMSGS # @explanation # The domain probe has requested a domain restart. # @user_action # None. A domain restart will be attempted. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "% has requested a domain restart %s." \ "${PLUGIN_PROBE}" "${output}" rc=100 fi ;; 201) if ${PGREP} -f "gds_svc_start .*-R ${RESOURCE} " >/dev/null 2>&1 then debug_message "check_domain - ${DOMAIN} is still starting" rc=100 elif ${PGREP} -f "gds_svc_stop .*-R ${RESOURCE} " >/dev/null 2>&1 then debug_message "check_domain - ${DOMAIN} is stopping" rc=100 else # SCMSGS # @explanation # The domain has requested an immediate failover. # @user_action # None. The domain will be immediately failed over. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "%s has requested an immediate failover." \ "${PLUGIN_PROBE}" rc=201 fi ;; *) # SCMSGS # @explanation # ${PLUGIN_PROBE} did not return 0, 100 or 201. # @user_action # None. A domain restart will be attempted. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "%s did not return 0, 100 or 201, a domain restart will be attempted." \ "${PLUGIN_PROBE}" rc=100 ;; esac else # SCMSGS # @explanation # ${PLUGIN_PROBE} does not exist or is not executable. # @user_action # Check the pathname exists and that ${PLUGIN_PROBE} is executable. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "%s non-existent executable." \ "${PLUGIN_PROBE}" rc=0 fi else rc=0 fi ;; # Restartable run states "shut off"|"crashed"| \ "inactive"|"stopping") rc=100 ;; # Unknown run states *) rc=100 ;; esac debug_message "check_domain - ${DOMAIN} ${domstate}" fi debug_message "Function: check_domain - End" return ${rc} } stop_domain() { debug_message "Function: stop_domain - Begin" ${SET_DEBUG} typeset rc=0 STOP_TIMEOUT=$(${SCHA_RESOURCE_GET} -O STOP_TIMEOUT \ -R ${RESOURCE} -G ${RESOURCEGROUP} ) # Note that GDS will attempt to cleanup after 80% of STOP_TIMEOUT # has been consumed. In this regard, we only allocate a combined # 75% of STOP_TIMEOUT to MAX_MIGRATE_TIMEOUT and MAX_STOP_TIMEOUT. # # This leaves 5% for domain_destroy() which maybe called if # domain_shutdown() exeecds it's timeout and finally domain_delete(). MAX_MIGRATE_TIMEOUT=$((STOP_TIMEOUT*25/100)) MAX_STOP_TIMEOUT=$((STOP_TIMEOUT*50/100)) SECONDS=0 # Save the domain configuration changes. if ! dump_domain_config then debug_message "Function: stop_domain - End" return 1 fi # At resource creation, the administrator can determine the Migration_type. # Valid values for Migration_type are # # Migration_type="normal" # o Stop the resource (shutdown the domain) # o Failover the resource group from the source node to the target node # o Start the resource (start the domain) # # Migration_type="migrate" # o Suspend the domain on the source node # o Copy the domain's memory pages from the source node to the target node # o Resume the domain on the target node # # Migration_type="migrate_live" # o Iteratively copy the domain's memory pages from the source node to the taregt node # o When pre-copy is no longer benefical, suspend the domain on the source node # o Copy the domain's remaning "dirty" pages from the source node to the taregt node # o Resume the domain on the target node # # Note that migration or live migration is performed over the cluster interconnect. # # For migration or live migration to be attempted across Solaris Cluster xVM nodes # the following conditions must be met. # # - The target Solaris Cluster xVM node must be running the same xVM version. # # - The migration TCP port must be open and accepting connections from the source # Solaris Cluster xVM node. # # - There must be sufficient resources for the domain to run in. # # - If the conditions are met and migration or live migration is successful a NO-OP # STOP and START is performed. This will ensure a successful STOP and START to the # appropriate RGM callback methods. Furthermore, doing a NO-OP RGM failover will # ensure that RGM subsequently actions any dependencies and that Solaris Cluster # reflects the correct state and status of resource groups and resources. # # - If the conditions are met but migration or live migration is not successful a # normal failover will be performed. # # - If the conditions are not met, migration or live migration will fail and a normal # failover will be performed. # # However, before attempting a migration or live migration we need to determine if the # resource is being disabled. To distinguish if the resource is being disabled we # test the ON_OFF_SWITCH property of the resource. # # If the resource is being disabled the ON_OFF_SWITCH will be DISABLED before the STOP # method is called. So, conversely if the ON_OFF_SWITCH is ENABLED the resource is not # being disabled and instead the resource group is undergoing either a switch to # another node or is being evacuated from the node. # # - If the resource is being disabled we perform a normal shutdown, regardless of the # Migration_type setting. ON_OFF_SWITCH=$(${SCHA_RESOURCE_GET} -O ON_OFF_SWITCH -R ${RESOURCE} -G ${RESOURCEGROUP}) debug_message "stop_domain - ON_OFF_SWITCH=${ON_OFF_SWITCH}" debug_message "stop_domain - MIGRATION_TYPE=${MIGRATION_TYPE}" if [[ "${ON_OFF_SWITCH}" = "DISABLED" ]] then domain_shutdown else case "${MIGRATION_TYPE}" in NORMAL) domain_shutdown rc=${?} ;; MIGRATE*) if ! domain_migrate then domain_shutdown fi rc=${?} ;; *) # SCMSGS # @explanation # Invalid Migration_type specified. # @user_action # Delete and reregister the resource with # a valid Migration_type entry. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Invalid Migration_type=%s." \ "${MIGRATION_TYPE}" rc=1 ;; esac fi debug_message "Function: stop_domain - End" return ${rc} } get_target_host() { debug_message "Function: get_target_host - Begin" ${SET_DEBUG} typeset rc=1 # Here, we need to determine the target host as the resource group is either being # switched or the node, where the resoure group is online, is being evacuated. # # To determine the target host for a resource group switch we rely on the cluster # command log file /var/cluster/logs/commandlog to supply the target host. We need to # obtain the correct entry from the command log file and match against the following # # + ${RESOURCEGROUP} + "START" + "switch" # # after which we only save the nodename from a clrg or scswitch command. # # Sample /var/cluster/log/commandlog output is as follows, # # 02/07/2008 08:45:13 pelko1 10548 root START - scswitch -z -g "xvm2-rg" -h "pelko2" # 02/07/2008 08:45:38 pelko1 10548 root END 0 # 02/07/2008 09:01:35 pelko1 10874 root START - clrg "switch" -n "pelko1" "xvm2-rg" # 02/07/2008 09:01:36 pelko1 10874 root END -20827641 # # If we are unable to match an entry, as perhaps the entry was logged at # and we are checking at + 1 second, i.e. we are checking just as the second # entry is incrementing to the next second, we perform another check. In fact the # last 10 seconds are checked from the commandlog. # # Once we have matched an entry from /var/cluster/logs/commandlog, we verify that # the target host is a valid nodelist entry for the resource group. # # - If we have a valid nodelist entry we then determine that target host's cluster # interconnect hostname to perform the migration or live migration. # # - If we are unable to find a match for a switch, we need to consider that an evacuate # node is being performed. However, if the node is being evacuated we will rely on # RGM to dertermine the nodename regardless if a mirgation or live migration was # requested. Subsequently, we perform a normal failover. This ensures that we do not # migrate or live migrate the domain to a node that maybe different to the node # selected by RGM. # # So, suffice to say that if a "switch" match is not found, following the discovery # that the resource is not just being disabled, and that a migrate or live migrate # was defined, we will always perform a normal failover. # # Note that the target host match is performed within check_commandlog(). check_commandlog debug_message "get_target_host - ${TARGET_HOST} size=${#TARGET_HOST}" if [ "${#TARGET_HOST}" -eq 0 ] then # SCMSGS # @explanation # A target host was not found # @user_action # None required. The domain will not be migrated or live # migrated instead a normal failover will be performed. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Target host not found, normal failover will be performed." elif [ ${TARGET_HOST} = "$(/usr/bin/uname -n)" ] || [ $(echo ${TARGET_HOST} | /usr/bin/grep [0-9]:global) ] then # SCMSGS # @explanation # The node is being evecuated. # @user_action # None required. The domain will not be migrated or live # migrated. Instead, a normal failover will be performed. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Node is being evacuated, normal failover will be performed." else for i in $(${SCHA_RESOURCEGROUP_GET} -O NODELIST -G ${RESOURCEGROUP}) do [[ "${i}" != "$(uname -n)" || "${i}" = "${TARGET_HOST}" ]] && rc=0 && break done if [ "${rc}" -eq 0 ] then PRIVATELINK_TARGET_HOST=$(${SCHA_CLUSTER_GET} -O PRIVATELINK_HOSTNAME_NODE ${TARGET_HOST}) debug_message "get_target_host - PRIVATELINK_TARGET_HOST=${PRIVATELINK_TARGET_HOST}" else # SCMSGS # @explanation # The target host found in the command log file is not # a valid entry within the resource groups nodelist. # @user_action # None required. The domain will not be migrated or live # migrated instead a normal failover will be performed. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Target host %s not matched with the resource group nodelist, normal failover will be performed." \ "${TARGET_HOST}" fi fi debug_message "Function: get_target_host - End" return ${rc} } check_commandlog() { debug_message "Function: check_commandlog - Begin" # Get the current epoch time typeset ETIME=$(/usr/bin/perl -e 'print time;') typeset DATE=$(/usr/bin/date '+%m/%d/%Y') i=10 while (( $i > 0 )) do # Iteratively search the commandlog for a switch or evacuate, going back in time # by one second each time. If a match is found we break out of the loop. # # The following may help to understand the iterative loop. # # bash-3.2# ETIME=$(perl -e 'print time;') # bash-3.2# echo $ETIME # 1202814041 # bash-3.2# HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | awk '{print $4}') # bash-3.2# echo $HHMMSS # 03:00:41 # bash-3.2# ETIME=$(expr ${ETIME} - 1) # bash-3.2# echo $ETIME # 1202814040 # bash-3.2# HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | awk '{print $4}') # bash-3.2# echo $HHMMSS # 03:00:40 # bash-3.2# # Convert the epoch time into a readable format HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | ${AWK} '{print $4}') debug_message "check_commadlog - performed for ${DATE} ${HHMMSS}" # Check for a clrg switch or scswitch TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\ /usr/bin/grep -w START | /usr/bin/grep switch | /usr/bin/grep \"${RESOURCEGROUP}\" |\ /usr/bin/sed -e 's/^.*-h //' -e 's/^.*-n //' | ${AWK} '{print $1}' | ${TR} -d '" ') [ "${#TARGET_HOST}" -ne 0 ] && break # Check for a clrg evacuate TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\ /usr/bin/grep -w START | /usr/bin/grep evacuate |\ /usr/bin/sed -e 's/^.*-n //' | ${AWK} '{print $1}' | ${TR} -d '+" ' ) [ "${#TARGET_HOST}" -ne 0 ] && break # Check for a scswitch -S TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\ /usr/bin/grep -w START | /usr/bin/grep scswitch | /usr/bin/grep "\-S" |\ /usr/bin/sed -e 's/^.*-h //' | ${AWK} '{print $1}' | ${TR} -d '\-SK" ' ) [ "${#TARGET_HOST}" -ne 0 ] && break i=$(expr $i - 1) ETIME=$(expr ${ETIME} - 1) done debug_message "check_commandlog - TARGET_HOST=${TARGET_HOST}" debug_message "Function: check_commandlog - End" } # # routines to perform domain migration # migrate_xvm() { debug_message "Function: migrate_xvm - Begin" ${SET_DEBUG} typeset rc=0 [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && OPTION="migrate" [[ "${MIGRATION_TYPE}" = "MIGRATE_LIVE" ]] && OPTION="migrate --live" debug_message "domain_migrate - Running /usr/sbin/xm ${OPTION} ${DOMAIN} ${PRIVATELINK_TARGET_HOST}" ${HATIMERUN} -t ${MAX_MIGRATE_TIMEOUT} -k KILL \ ${XM} ${MIGRATION_TYPE} "${DOMAIN}" ${PRIVATELINK_TARGET_HOST} > /dev/null 2>&1 rc=${?} debug_message "Function: migrate_xvm - End" return ${rc} } migrate_ldom() { debug_message "Function: migrate_ldom - Begin" ${SET_DEBUG} typeset rc=0 [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && OPTION="migrate" debug_message "domain_migrate - Running /opt/SUNWscxvm/bin/ldm_migrate ${OPTION} ${DOMAIN} ${PRIVATELINK_TARGET_HOST}" ${HATIMERUN} -t ${MAX_MIGRATE_TIMEOUT} -k KILL \ /opt/SUNWscxvm/bin/ldm_migrate ${OPTION} "${DOMAIN}" ${PRIVATELINK_TARGET_HOST} ${PASSWORD_FILE} >> $LOGFILE 2>&1 rc=${?} debug_message "Function: migrate_ldom - End" return ${rc} } # # routines to cancel migration # cancel_xvm_migration() { # NO OP for a xvm domain return 0 } cancel_ldom_migration() { debug_message "Function: cancel_ldom_migration - Begin" ${SET_DEBUG} # cancel domain migration for ldoms if ${LDM} cancel-operation migration ${DOMAIN} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # The domain migration operation was cancelled. # @user_action # None required. Informational message. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Migration of domain %s is cancelled, the domain state is now in active state." \ "${DOMAIN}" fi while (( ${SECONDS} < ${MAX_STOP_TIMEOUT} )) do if get_${VM}_status | ${GREP} -q -E "^suspending|^resuming|^suspended|^starting" > /dev/null 2>&1 then sleep 5 else SECONDS=${MAX_STOP_TIMEOUT} fi done debug_message "Function: cancel_ldom_migration - End" } domain_migrate() { debug_message "Function: domain_migrate - Begin" ${SET_DEBUG} typeset rc [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && MSG="migrated" [[ "${MIGRATION_TYPE}" = "MIGRATE_LIVE" ]] && MSG="live migrated" if get_target_host then # SCMSGS # @explanation # The domain is being migrated or live migrated to the target host. # @user_action # None required. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s is being %s to %s." \ "${DOMAIN}" "${MSG}" "${TARGET_HOST}" migrate_${VM} ${MIGRATION_TYPE} ${DOMAIN} ${PRIVATELINK_TARGET_HOST} rc=${?} if (( ${rc} == 0 )) then # SCMSGS # @explanation # The domain was migrated or live migrated to the target host. # @user_action # None required. The domain successfully migrated or live migrated # from the source node to the target node. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s successfully %s to %s." \ "${DOMAIN}" "${MSG}" "${TARGET_HOST}" # As the domain has been successfully migrated or live migrated # we need to indicate a successful stop by performing a NO-OP stop # and subsequently a successful start by performing a NO-OP start. if ${CCRADM} addkey --key=noop_${RESOURCE} --value="1" ${CCR_TABLE} >> $LOGFILE 2>&1 then debug_message "domain_migrate - .noop_${RESOURCE} flag added to CCR" else # SCMSGS # @explanation # Failed to update the XMl configuration to the CCR. # @user_action # Check the syslog for further messages. # Determine why the ccr update failed. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to add NO-OP flag for %s to ccr." \ "${DOMAIN}" rc=1 fi # SCMSGS # @explanation # The domain was migrated or live migrated. # @user_action # None required. Informational message. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "NO-OP STOP being performed." elif (( ${rc} == 99 )) then # SCMSGS # @explanation # The domain migration or live migration timed out. # @user_action # None required. Informational message. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Migration of domain %s timed out, the domain state is now shut off." \ "${DOMAIN}" rc=1 cancel_${VM}_migration else # SCMSGS # @explanation # The domain failed to migrate or live migrate to the target host. # @user_action # None required. The domain failed to migrate or live migrate # from the source node to the target node. A normal failover # will be performed. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s failed to %s to %s, normal failover will be performed." \ "${DOMAIN}" "${MSG}" "${TARGET_HOST}" rc=1 cancel_${VM}_migration fi else rc=1 fi # If the domain has successfully migrated, we will now delete the domain. # # Doing this ensures that the domain is only defined and able to be started # on one cluster node at a time. Domains can use shared storage between cluster # nodes so it is very important that we prevent any data corruption if a domain # gets manually started on multiple cluster nodes where shared storage is used. # # Of course using SUNW.HAStoragePlus somewhat protects against this, however we # simply want to avoid any manual administrative errors performed by mistake. # # Note, unless the domain was migrated or live migrated, the domain is defined # before startup using a previously dumped XML file for the administrative file # system. (( ${rc} == 0 )) && [[ "${VM}" == "xvm" ]] && domain_delete debug_message "Function: domain_migrate - End" return ${rc} } # # routines to perform domain shutdown # shutdown_xvm() { debug_message "Function: shutdown_xvm - Begin" ${SET_DEBUG} typeset rc=0 # Note that the virsh shutdown command returns before the domain # has shutdown, as such we do not use hatimerun. ${VIRSH} shutdown ${DOMAIN} > /dev/null 2>&1 rc=${?} debug_message "Function: shutdown_xvm - End" return ${rc} } shutdown_ldom() { debug_message "Function: shutdown_ldom - Begin" ${SET_DEBUG} typeset rc status=$(get_${VM}_status) if (( ${?} == 0 )) then if echo ${status} | ${GREP} -q -E "^active$|^suspending|^resuming|^suspended|^starting" > /dev/null 2>&1 then ${HATIMERUN} -t ${MAX_STOP_TIMEOUT} -k KILL ${LDM} stop-domain ${DOMAIN} >> $LOGFILE 2>&1 rc=${?} else # domain is already stopped rc=0 fi else # domain is not present. rc=2 fi debug_message "Function: shutdown_ldom - Begin" return ${rc} } domain_shutdown() { debug_message "Function: domain_shutdown - Begin" ${SET_DEBUG} typeset rc # Corordinate with the domain OS to perform a graceful shutdown. # Note that the virsh shutdown command returns before the domain # has shutdown, as such we do not use hatimerun. shutdown_${VM} rc=${?} if (( ${rc} == 2 )) then debug_message "Function: domain_shutdown - End" return 0 elif (( ${rc} == 0 )) then # Loop to test if the domain shuts down gracefully # or if the shutdown time is exceeded. while (( ${SECONDS} < ${MAX_STOP_TIMEOUT} )) do if is_${VM}_up then sleep 5 else SECONDS=${MAX_STOP_TIMEOUT} fi done if is_${VM}_up then # SCMSGS # @explanation # The domain failed to shutdown gracefully. # @user_action # None required. The domain failed to shutdown # gracefully and will now be immediately terminated. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s failed to shutdown gracefully, immediate shutdown will now be performed." \ "${DOMAIN}" destroy_${VM} rc=${?} else # SCMSGS # @explanation # The domain was shutdown gracefully. # @user_action # None required. The domain has shutdown gracefully. scds_syslog -p daemon.info -t $(syslog_tag) -m \ "Domain %s has been gracefully shutdown." \ "${DOMAIN}" rc=0 fi else # error already logged destroy_${VM} rc=${?} fi # If the domain has successfully shutdown, we will now delete the domain. # # Doing this ensures that the domain is only defined and able to be started # on one cluster node at a time. Domains can use shared storage between cluster # nodes so it is very important that we prevent any data corruption if a domain # gets manually started on multiple cluster nodes where shared storage is used. # # Of course using SUNW.HAStoragePlus somewhat protects against this, however we # simply want to avoid any manual administrative errors performed by mistake. # # Note, unless the domain was migrated or live migrated, the domain is defined # before startup using a previously dumped XML file for the administrative file # system. (( ${rc} == 0 )) && domain_delete debug_message "Function: domain_shutdown - End" return ${rc} } # # routines to destroy domain # destroy_xvm() { debug_message "Function: destroy_xvm - Begin" ${SET_DEBUG} typeset rc if ${VIRSH} destroy ${DOMAIN} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # The domain was immediately terminated. # @user_action # None required. The domain had previously failed to shutdown # gracefully but has now been immediately terminated. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s has been immediately terminated." \ "${DOMAIN}" rc=0 else # SCMSGS # @explanation # The /usr/bin/virsh destroy command failed. # @user_action # Determine why it was not possible to immediately terminate # the domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Domain %s failed to shutdown immediately." \ "${DOMAIN}" rc=1 fi debug_message "Function: destroy_xvm - End" return ${rc} } destroy_ldom() { debug_message "Function: destroy_ldom - Begin" ${SET_DEBUG} typeset rc if ${LDM} stop-domain -f ${DOMAIN} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # The domain was immediately terminated. # @user_action # None required. The domain had previously failed to shutdown # gracefully but has now been immediately terminated. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s has been forcefully terminated." \ "${DOMAIN}" rc=0 else # SCMSGS # @explanation # The /opt/SUNWldm/bin/ldm stop-domain "-f" command failed. # @user_action # Determine why it was not possible to forcefully stop # the domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Domain %s failed to do a forceful shutdown." \ "${DOMAIN}" rc=1 fi debug_message "Function: destroy_ldom - End" return ${rc} } # # routines to remove domains from the node # domain_delete() { debug_message "Function: domain_delete - Begin" ${SET_DEBUG} # The purpose of deleting the domain after shutdown is to avoid the possibility of # someone manually starting the domain on a different node. Doing so would compromise # the domain if shared storage was used for the domain. The domain's configuration # is always dumped to the agent's administrative file system so that the domain can # be defined before startup. typeset rc if delete_${VM} then # SCMSGS # @explanation # The domain was deleted. # @user_action # None required. The domain has been deleted as it # will be defined on another node. Deleting the domain # on this node ensures that it can't be started on # more than one cluster node at a time. scds_syslog -p daemon.notice -t $(syslog_tag) -m \ "Domain %s has been deleted on this node." \ "${DOMAIN}" rc=0 else # error already logged. rc=1 fi debug_message "Function: domain_delete - End" return ${rc} } delete_xvm() { debug_message "Function: delete_xvm - Begin" ${SET_DEBUG} typeset rc=0 if ! /usr/sbin/xm delete ${DOMAIN} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # The /usr/sbin/xm delete command failed. # @user_action # Determine why it was not possible to delete the domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to delete domain %s on this node." \ "${DOMAIN}" rc=1 fi debug_message "Function: delete_xvm - End" return ${rc} } delete_ldom() { debug_message "Function: delete_ldom - Begin" ${SET_DEBUG} if get_${VM}_status | ${GREP} -q -E "^bound$" > /dev/null 2>&1 then # if the domain is in bound state, unbind it. ${LDM} unbind-domain ${DOMAIN} >> $LOGFILE 2>&1 if (( ${?} != 0 )) then # SCMSGS # @explanation # The /opt/SUNWldm/bin/ldm unbind-domain command failed. # @user_action # Determine why it was not possible to unbind the domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to unbind domain %s on this node." \ "${DOMAIN}" debug_message "Function: delete_ldom - End" return 1 fi fi if ! ${LDM} remove-domain ${DOMAIN} >> $LOGFILE 2>&1 then # SCMSGS # @explanation # The /opt/SUNWldm/bin/ldm remove-domain command failed. # @user_action # Determine why it was not possible to remove the domain. scds_syslog -p daemon.error -t $(syslog_tag) -m \ "Failed to remove domain %s on this node." \ "${DOMAIN}" debug_message "Function: delete_ldom - End" return 1 fi debug_message "Function: delete_ldom - End" return 0 }