1 #!/bin/ksh
   2 #
   3 # CDDL HEADER START
   4 #
   5 # The contents of this file are subject to the terms of the
   6 # Common Development and Distribution License (the License).
   7 # You may not use this file except in compliance with the License.
   8 #
   9 # You can obtain a copy of the license at usr/src/CDDL.txt
  10 # or http://www.opensolaris.org/os/licensing.
  11 # See the License for the specific language governing permissions
  12 # and limitations under the License.
  13 #
  14 # When distributing Covered Code, include this CDDL HEADER in each
  15 # file and include the License file at usr/src/CDDL.txt.
  16 # If applicable, add the following below this CDDL HEADER, with the
  17 # fields enclosed by brackets [] replaced with your own identifying
  18 # information: Portions Copyright [yyyy] [name of copyright owner]
  19 #
  20 # CDDL HEADER END
  21 #
  22 # Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23 # Use is subject to license terms.
  24 #
  25 # ident "%Z%%M% %I%     %E% SMI"
  26 #
  27 
  28 PKG=SUNWscxvm
  29 TASK_COMMAND=""
  30 RESOURCE_PROJECT_NAME=""
  31 CCR_TABLE=${VM}_"domain_config"
  32 TMP_DIR="/var/tmp"
  33 LOGFILE=${TMP_DIR}/${RESOURCE}_logfile
  34 
  35 # Commands definition
  36 SCLOGGER=/usr/cluster/lib/sc/scds_syslog
  37 LOGGER=/usr/bin/logger
  38 GREP=/usr/xpg4/bin/grep
  39 AWK=/usr/bin/awk
  40 PGREP=/usr/bin/pgrep
  41 SLEEP=/usr/bin/sleep
  42 TR=/usr/xpg4/bin/tr
  43 SCHA_RESOURCE_GET=/usr/cluster/bin/scha_resource_get
  44 SCHA_RESOURCEGROUP_GET=/usr/cluster/bin/scha_resourcegroup_get
  45 SCHA_CLUSTER_GET=/usr/cluster/bin/scha_cluster_get
  46 HATIMERUN=/usr/cluster/bin/hatimerun
  47 LDM=/opt/SUNWldm/bin/ldm
  48 VIRSH=/usr/bin/virsh
  49 XM=/usr/sbin/xm
  50 CCRADM=/usr/cluster/lib/sc/ccradm
  51 CL_EXEC_CLIENT=/usr/cluster/lib/sc/cl_exec_client
  52 
  53 syslog_tag()
  54 {
  55         ${SET_DEBUG}
  56         print "SC[${PKG:-??}.${METHOD:-??}]:${RESOURCEGROUP:-??}:${RESOURCE:-??}"
  57 }
  58 
  59 scds_syslog()
  60 {
  61         if [ -f "${SCLOGGER}" ]
  62         then
  63            ${SCLOGGER} "$@" &
  64         else
  65            while getopts 'p:t:m' opt
  66            do
  67               case "${opt}" in
  68                  t) TAG=${OPTARG};;
  69                  p) PRI=${OPTARG};;
  70               esac
  71            done
  72       
  73            shift $((${OPTIND} - 1))
  74            LOG_STRING=$(/usr/bin/printf "$@")
  75            ${LOGGER} -p ${PRI} -t ${TAG} ${LOG_STRING}
  76         fi
  77 
  78         if [[ "${METHOD}" == "validate" ]]
  79         then
  80            shift 5
  81            /usr/bin/printf "$@"
  82         fi
  83 }
  84 
  85 debug_message()
  86 {
  87         typeset DEBUG_TEXT=
  88    
  89         case ${DEBUG_LEVEL} in
  90            0)   # No debug msgs
  91               SET_DEBUG=
  92               ;;
  93            1)  # Begin and End msgs
  94               SET_DEBUG=
  95               DEBUG_TEXT=$(echo ${1} | ${GREP} -E 'Begin|End')
  96               ;;
  97            2)  # All debug msgs
  98               SET_DEBUG="set -x"
  99               DEBUG_TEXT=${1}
 100               ;;
 101         esac
 102    
 103         [[ -n "${DEBUG_TEXT}" ]] && \
 104              scds_syslog -p daemon.debug -t $(syslog_tag) -m \
 105                   "%s" "${DEBUG_TEXT}"
 106 }
 107 
 108 log_message()
 109 {
 110         #
 111         # Output a message to syslog as required
 112         #
 113    
 114         debug_message "Function: log_message - Begin"
 115    
 116         ${SET_DEBUG}
 117    
 118         if [ -s "${LOGFILE}" ]
 119         then
 120            PRIORITY=${1}
 121            HEADER=${2}
 122       
 123            #
 124            # Ensure that the while loop only reads a closed file
 125            #
 126            strings ${LOGFILE} > ${LOGFILE}.copy
 127            while read MSG_TXT
 128            do
 129               scds_syslog -p daemon.${PRIORITY} -t $(syslog_tag) -m \
 130                    "%s - %s" "${HEADER}" "${MSG_TXT}"
 131            done < ${LOGFILE}.copy
 132         fi
 133    
 134         cat /dev/null > ${LOGFILE} > /dev/null
 135         cat /dev/null > ${LOGFILE}.copy
 136    
 137         debug_message "Function: log_message - End"
 138 }
 139 
 140 
 141 get_resource_property()
 142 {
 143         debug_message "Function: get_resource_property - Begin"
 144         ${SET_DEBUG}
 145    
 146         typeset RS=${1}
 147         typeset PROPERTY=${2}
 148         typeset rc
 149    
 150         # Retrieve the property value.
 151         OUTPUT=$(${SCHA_RESOURCE_GET} -O Extension -R ${RS} ${PROPERTY})
 152         rc=${?}
 153    
 154         debug_message "get_resource_property - " \
 155              "scha_resource_get of property ${PROPERTY} returned ${rc}"
 156    
 157         if (( ${rc} == 0 ))
 158         then
 159            # print the values
 160            echo ${OUTPUT} | ${AWK} '{ \
 161                 if (NF > 1) for (i = 2; i <= NF; i++) print $i; else print "" }'
 162         fi
 163    
 164         debug_message "Function: get_resource_property - End"
 165    
 166         return ${rc}
 167 }
 168 
 169 
 170 get_properties()
 171 {
 172         debug_message "Function: get_properties - Begin"
 173         ${SET_DEBUG}
 174    
 175         typeset -i rc
 176         typeset props=$*
 177    
 178         for prop in ${props}
 179         do
 180            # retrieve the property value
 181            typeset val=$(get_resource_property ${RESOURCE} ${prop})
 182            rc=${?}
 183       
 184            if (( ${rc} == 0 ))
 185            then
 186               case ${prop} in
 187                  Domain_name)    [[ -z ${DOMAIN} ]] && DOMAIN=${val};;
 188                  Migration_type) [[ -z ${MIGRATION_TYPE} ]] && MIGRATION_TYPE=${val};;
 189                  Plugin_probe)   [[ -z ${PLUGIN_PROBE} ]] && PLUGIN_PROBE=${val};;
 190                  Password_file)  [[ -z ${PASSWORD_FILE} ]] && PASSWORD_FILE=${val};;
 191                  Debug_level)    [[ -z ${DEBUG_LEVEL} ]] && DEBUG_LEVEL=${val};;
 192               esac
 193            else
 194               # SCMSGS
 195               # @explanation
 196               # The scha_resource_get call failed.
 197               # @user_action
 198               # Check the syslog for further messages.
 199               scds_syslog -p daemon.error -t $(syslog_tag) -m \
 200                    "Cannot get the property %s of resource %s." \
 201                    "${prop}" "${RESOURCE}"
 202               break
 203            fi
 204         done
 205    
 206         debug_message "Function: get_properties - End"
 207    
 208         return ${rc}
 209 }
 210 
 211 validate_xvm()
 212 {
 213         debug_message "Function: validate_xvm - Begin"
 214         ${SET_DEBUG}
 215 
 216         typeset rc=0
 217    
 218         if [ "$(/usr/bin/uname -i)" != "i86xpv" ]
 219         then
 220            # SCMSGS
 221            # @explanation
 222            # Solaris is not booted with xVM.
 223            # @user_action
 224            # Ensure that the default boot grub menu is set to boot
 225            # Solaris xVM.
 226            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 227                 "Node is not booted with xVM."
 228 
 229            rc=1
 230         fi
 231 
 232         debug_message "Function: validate_xvm - End"
 233 
 234         return ${rc}
 235 }
 236 
 237 validate_ldom()
 238 {
 239         debug_message "Function: validate_ldom - Begin"
 240         ${SET_DEBUG}
 241 
 242         typeset ncount=0
 243 
 244         # Make sure that the password file is readable.
 245         if [ ! -r "${PASSWORD_FILE}" ]
 246         then
 247            # SCMSGS
 248            # @explanation
 249            # Incorrect Password file specified.
 250            # @user_action
 251            # Ensure that a valid password file is specified.
 252            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 253               "Invalid password file specified %s." \
 254               "${PASSWORD_FILE}"
 255 
 256            debug_message "Function: validate_ldom - End"
 257            return 1
 258         fi
 259         
 260         # Ensure that the control domain is a cluster node.
 261         if ! ${LDM} ls > /dev/null 2>&1
 262         then
 263            # SCMSGS
 264            # @explanation
 265            # Self explanatory.
 266            # @user_action
 267            # Ensure that the resource is configured in
 268            # control domain.
 269            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 270                "The LDom Manager is running in configuration mode."
 271 
 272               debug_message "Function: validate_ldom - End"
 273            return 1
 274         fi
 275 
 276         # Ensure that the failure-policy setting is set to "reset".
 277         # If the control domain fails,this would allow the guest domains
 278         # to panic. 
 279         policy=$(${LDM} list -o domain primary \
 280             | ${AWK} -F"=" '$1~/failure-policy/ {print $2}')
 281 
 282         if [ "${policy}" != "reset" ]
 283         then
 284            # SCMSGS
 285            # @explanation
 286            # Incorrect failure-policy setting for the domain.
 287            # @user_action
 288            # Ensure that the failure-policy for the domain is
 289            # set to "reset" on the control domain.
 290            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 291               "Invalid failure policy \"%s\" for %s domain." \
 292               "${policy}" "primary"
 293 
 294            debug_message "Function: validate_ldom - End"
 295            return 1
 296         fi
 297 
 298         # The CL_EXEC_CLIENT program executes a command on any of the 
 299         # cluster nodes or a zone or in a zone cluster. It then generates
 300         # as output the exit status of command and the stdout and stderr
 301         # messages. The valid options are:
 302         #     [ -z zoneclustername] The command is run on the zone cluster
 303         # represented by the zonename.
 304         #     -C { TS | RT | FSS | FX } The scheduling class in which the
 305         #  command is to be run.
 306         #     -p pri Specifies the priority of the command in the given
 307         # scheduling class.
 308         #     -n id[,id..] A comma seperated list of node ID's of a
 309         # zone cluster or a node to run the command.
 310         #     -c cmd [Args] The command to be run along with its arguments.
 311 
 312         for nodename in $(${SCHA_RESOURCEGROUP_GET} -O NODELIST -G ${RESOURCEGROUP})
 313         do
 314            if [[ "$(${SCHA_CLUSTER_GET} -O NodeState_Node ${nodename})" == "DOWN" ]]
 315            then
 316               continue
 317            fi
 318 
 319            nodeid=$(${SCHA_CLUSTER_GET} -O NODEID_NODENAME ${nodename})
 320            output=$(${CL_EXEC_CLIENT} -n ${nodeid} -c "${LDM} list-domain ${DOMAIN}")
 321            result=${?}
 322 
 323            status=$(echo ${output} | ${AWK} '{print $6}')
 324 
 325            if (( ${result} == 0 )) && (( ${status} == 0 ))
 326            then
 327               domstate=$(echo $output | ${AWK} -F" " '{print $18}')     
 328 
 329               if (( ${update} == 0)) && echo $domstate | ${GREP} -q -E "^active$|suspending|resuming|suspended|starting" > /dev/null 2>&1
 330               then
 331                  # SCMSGS
 332                  # @explanation
 333                  # The domain is in an invalid state.
 334                  # @user_action
 335                  # Ensure that the domain is in inactive or bound state.
 336                  scds_syslog -p daemon.error -t $(syslog_tag) -m \
 337                     "Domain %s is in %s state on %s." \
 338                     "${DOMAIN}" "${domstate}" "${nodename}"
 339 
 340                  debug_message "Function: validate_ldom - End"
 341                  return 1
 342               fi
 343 
 344               ncount=$((ncount+1))
 345               nlist=$(echo ${nodename} ${nlist})
 346 
 347               # dump domain confguration to ccr
 348               if [[ "$(/usr/bin/hostname)" == "${nodename}" ]]
 349               then
 350                  if ! dump_domain_config
 351                  then
 352                     debug_message "Function: validate_ldom - End"
 353                     return 1
 354                  fi
 355               fi
 356             fi
 357         done
 358 
 359         if (( ${ncount} == 0 ))
 360         then
 361            if ! ${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} > /dev/null 2>&1
 362            then
 363               scds_syslog -p daemon.error -t $(syslog_tag) -m \
 364                  "Domain %s does not exist." \
 365                  "${DOMAIN}"
 366               return 1
 367            fi
 368         fi
 369 
 370         if [[ ${ncount} -gt 1 ]]
 371         then
 372            # SCMSGS
 373            # @explanation
 374            # The domain is configured on multiple 
 375            # cluster nodes.
 376            # @user_action
 377            # Ensure that the domain is configured on one node
 378            # of the cluster.
 379            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 380               "Multiple domain %s configuration exists on %s." \
 381               "${DOMAIN}" "${nlist}"
 382            return 1
 383         fi
 384 
 385         debug_message "Function: validate_ldom - End"
 386         return 0
 387 }
 388 
 389 validate()
 390 {
 391         debug_message "Function: validate - Begin"
 392         ${SET_DEBUG}
 393 
 394         typeset rc
 395 
 396         # Make sure that the plugin probe specified is readable.
 397         if [[ -n "${PLUGIN_PROBE}" ]]
 398         then
 399            if [ -f "${PLUGIN_PROBE}" ] && [ ! -r "${PLUGIN_PROBE}" ]
 400            then
 401                # SCMSGS
 402                # @explanation
 403                # Incorrect user probe file specified.
 404                # @user_action
 405                # Ensure that a valid user probe file is specified.
 406                scds_syslog -p daemon.error -t $(syslog_tag) -m \
 407                    "Invalid user probe file %s." \
 408                    "${PLUGIN_PROBE}"
 409 
 410                return 1
 411            fi
 412         fi
 413    
 414         validate_${VM}
 415         rc=${?}   
 416 
 417         debug_message "Function: validate - End"
 418         return ${rc}
 419 }
 420 
 421 #
 422 # get the domain status 
 423 #
 424 get_xvm_status()
 425 {
 426         debug_message "Function: get_xvm_status - Begin"
 427         ${SET_DEBUG}
 428 
 429         typeset rc
 430 
 431         ${VIRSH} domstate ${DOMAIN}
 432         rc=${?}   
 433 
 434         debug_message "Function: get_xvm_status - End"
 435         return ${rc}
 436 }
 437 
 438 get_ldom_status()
 439 {
 440         debug_message "Function: get_ldom_status - Begin"
 441         ${SET_DEBUG}
 442 
 443         typeset rc=1
 444 
 445         OUTPUT=$(${LDM} list-domain ${DOMAIN})
 446 
 447         if (( ${?} == 0 ))
 448         then
 449            echo ${OUTPUT} | ${AWK} '{print $10}'
 450            rc=${?}
 451         fi
 452    
 453         debug_message "Function: get_ldom_status - End"
 454         return ${rc}
 455 }
 456 
 457 #
 458 # Routines to create the domain on the current cluster node.
 459 #
 460 add_xvm_domain()
 461 {
 462         debug_message "Function: add_xvm_domain - Begin"
 463         ${SET_DEBUG}
 464 
 465         typeset rc=0
 466    
 467         if ! ${VIRSH} define ${TMP_DIR}/${RESOURCE}.xml >> $LOGFILE 2>&1
 468         then
 469            # SCMSGS
 470            # @explanation
 471            # Defining the domain using an XML file failed.
 472            # @user_action
 473            # The command /usr/bin/virsh define failed to define the domain.
 474            # Determine if you have specified the correct domain name while
 475            # registering the resource.
 476            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 477                 "Failed to define %s using %s/%s.xml." \
 478                 "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}"
 479            rc=1
 480         fi
 481    
 482         debug_message "Function: add_xvm_domain - End"
 483         return ${rc}
 484 }
 485 
 486 add_ldom_domain()
 487 {
 488         debug_message "Function: add_ldom_domain - Begin"
 489         ${SET_DEBUG}
 490 
 491         typeset rc=0
 492    
 493         if ! ${LDM} add-domain -i ${TMP_DIR}/${RESOURCE}.xml ${DOMAIN} >> $LOGFILE 2>&1
 494         then
 495            # SCMSGS
 496            # @explanation
 497            # Defining the domain using an XML file failed.
 498            # @user_action
 499            # The command /opt/SUNWldm/bin/ldm "add-domain"
 500            # failed to define the domain. Determine if you
 501            # have specified the correct domain name when
 502            # registering the resource.
 503            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 504                 "Failed to add the domain %s using %s/%s.xml." \
 505                 "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}"
 506            rc=1
 507         fi
 508    
 509         debug_message "Function: add_ldom_domain - End"
 510         return ${rc}
 511 }
 512 
 513 #
 514 # test if domain is active
 515 #
 516 is_xvm_up()
 517 {
 518         debug_message "Function: is_xvm_up - Begin"
 519         ${SET_DEBUG}
 520 
 521         typeset rc=0
 522 
 523         echo $(${VIRSH} domstate ${DOMAIN}) | \
 524              ${GREP} -q -E "running|blocked|paused|in shutdown" > /dev/null 2>&1
 525         rc=${?}
 526    
 527         debug_message "Function: is_xvm_up - End"
 528         return ${rc}
 529 }
 530 
 531 is_ldom_up()
 532 {
 533         debug_message "Function: is_ldom_up - Begin"
 534         ${SET_DEBUG}
 535 
 536         typeset rc=0
 537 
 538         get_ldom_status | ${GREP} -q -E "^active$|^starting$" > /dev/null 2>&1
 539         rc=${?}
 540    
 541         debug_message "Function: is_ldom_up - End"
 542         return ${rc}
 543 }
 544 
 545 #
 546 # wrapper routines to start xvm or ldom domains
 547 #
 548 start_xvm()
 549 {
 550         debug_message "Function: start_xvm - Begin"
 551         ${SET_DEBUG}
 552 
 553         typeset rc=0
 554 
 555         ${VIRSH} start ${DOMAIN} >> $LOGFILE 2>&1
 556         rc=${?}
 557    
 558         debug_message "Function: start_xvm - End"
 559         return ${rc}
 560 }
 561 
 562 #
 563 # After a crash/reboot of the node, the domain
 564 # would be started and there would be multiple
 565 # instances of the same domain across cluster
 566 # nodes. Hence the domain is destroyed.
 567 #
 568 init_ldom()
 569 {
 570         debug_message "Function: init_ldom - Begin"
 571         ${SET_DEBUG}
 572 
 573         typeset rc
 574 
 575         MAX_STOP_TIMEOUT=$(${SCHA_RESOURCE_GET} -O INIT_TIMEOUT \
 576            -R ${RESOURCE} -G ${RESOURCEGROUP} )
 577 
 578         domain_shutdown
 579         rc=${?}
 580 
 581         debug_message "Function: init_ldom - End"
 582         return ${rc}
 583 }
 584 
 585 start_ldom()
 586 {
 587         debug_message "Function: start_ldom - Begin"
 588         ${SET_DEBUG}
 589 
 590         typeset rc=0
 591 
 592         if get_${VM}_status | ${GREP} -q -E "^inactive$" > /dev/null 2>&1
 593         then
 594            if ${LDM} bind-domain ${DOMAIN} >> $LOGFILE 2>&1
 595            then
 596               # SCMSGS
 597               # @explanation
 598               # The domain was bound.
 599               # @user_action
 600               # None required. The domain has been bound on this node.
 601               scds_syslog -p daemon.notice -t $(syslog_tag) -m \
 602                    "Domain %s is bound." \
 603                    "${DOMAIN}"
 604               rc=0
 605            else
 606               # SCMSGS
 607               # @explanation
 608               # The /opt/SUNWldm/bin/ldm bind-domain command failed.
 609               # @user_action
 610               # Determine why it was not possible to bind the domain.
 611               scds_syslog -p daemon.error -t $(syslog_tag) -m \
 612                    "Failed to bind %s." \
 613                    "${DOMAIN}"
 614               rc=1
 615            fi
 616       
 617         fi
 618         
 619         #
 620         # The domain is made to sit at the OBP prompt, so a reboot/crash
 621         # wouldn't boot the Guest domain OS.
 622         # 
 623         if (( ${rc} == 0 )) && ${LDM} set-var auto-boot?=true ${DOMAIN} >> $LOGFILE 2>&1
 624         then
 625            if ${LDM} start-domain ${DOMAIN} >> $LOGFILE 2>&1
 626            then
 627               while [ 1 ]
 628               do
 629                  flag=$(${LDM} list-domain -p ${DOMAIN} | ${GREP} ${DOMAIN} \
 630                     | ${AWK} -F"|" '{print $4}'| ${AWK} -F"=" '{print $2}')
 631                  [[ "${flag}" == "-n----" ]] && break
 632                  ${SLEEP} 1     
 633               done 
 634            else
 635               rc=1
 636            fi
 637            ${LDM} set-var auto-boot?=false ${DOMAIN} >> $LOGFILE 2>&1 || rc=1
 638         else
 639            rc=1
 640         fi
 641 
 642         debug_message "Function: start_ldom - End"
 643         return ${rc}
 644 }
 645 
 646 start_domain()
 647 {
 648         debug_message "Function: start_domain - Begin"
 649         ${SET_DEBUG}
 650    
 651         typeset rc=0
 652    
 653         # Turn off PMF restart. Starting a domain does not leave
 654         # a running pid as in a classic Solaris Cluster agent.
 655    
 656         START_TIMEOUT=$(${SCHA_RESOURCE_GET} -O START_TIMEOUT \
 657              -R ${RESOURCE} -G ${RESOURCEGROUP} )
 658    
 659         ${SLEEP} ${START_TIMEOUT} &
 660         /usr/cluster/bin/pmfadm -s ${RESOURCEGROUP},${RESOURCE},0.svc
 661    
 662         # Check if the domain exists.
 663         #
 664         # If the domain does not exist, we maybe starting the domain
 665         # on a new cluster node following a failover. As such we will
 666         # define the domain using the previously dumped XML file
 667         # located within the agent's administrative file system.
 668         #
 669         # If the domain already exists, either the domain was manually
 670         # started or the domain was migrated or live migrated from
 671         # another cluster node. Therefore, we will use the already
 672         # defined domain.
 673         #
 674         # Note that when the domain is successfully stopped the domain
 675         # is deleted. We do this simply to avoid the domain from
 676         # being manually started on multiple cluster nodes. See
 677         # domain_delete() for more information.
 678    
 679         if get_${VM}_status > /dev/null 2>&1
 680         then
 681            debug_message "Validate - domain ${DOMAIN} exists"
 682         else
 683            if ${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} > ${TMP_DIR}/${RESOURCE}.xml 2> /dev/null
 684            then
 685               # add the domain to the cluster node
 686               if add_${VM}_domain ${DOMAIN} ${TMP_DIR}/${RESOURCE}.xml
 687               then
 688                  # SCMSGS
 689                  # @explanation
 690                  # The domain is being defined using a XML file.
 691                  # @user_action
 692                  # None, the domain is being defined using a previously defined
 693                  # XML file when the domain was last successfully started.
 694                  scds_syslog -p daemon.notice -t $(syslog_tag) -m \
 695                       "Domain %s defined using %s/%s.xml." \
 696                       "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}"
 697               else
 698                  # error already logged.
 699                  debug_message "Function: start_domain - End"
 700                  return 1
 701               fi
 702            else
 703               # SCMSGS
 704               # @explanation
 705               # The domain does not exist.
 706               # @user_action
 707               # You must ensure that the domain exists.
 708               scds_syslog -p daemon.error -t $(syslog_tag) -m \
 709                    "Domain %s does not exist." \
 710                    "${DOMAIN}"
 711               
 712               debug_message "Function: start_domain - End"
 713               return 1
 714            fi
 715         fi
 716         
 717         # Tolerate a manually started domain and a NO-OP start
 718         # otherwise start the domain.
 719         
 720         if ${CCRADM} showkey --key noop_${RESOURCE} ${CCR_TABLE} > /dev/null 2>&1
 721         then
 722            # SCMSGS
 723            # @explanation
 724            # The domain was migrated or live migrated.
 725            # @user_action
 726            # None required. Informational message.
 727            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
 728                 "NO-OP START being performed."
 729            
 730            if ! ${CCRADM} delkey --key noop_${RESOURCE} ${CCR_TABLE} >> $LOGFILE 2>&1
 731            then
 732               # SCMSGS
 733               # @explanation
 734               # Failed to delete the NO-OP flag from CCR.
 735               # @user_action
 736               # Check the syslog for further messages.
 737               # Determine why the NO-OP flag was not added to the CCR.
 738               scds_syslog -p daemon.error -t $(syslog_tag) -m \
 739                    "Failed to delete NO-OP flag for %s domain." \
 740                    "${DOMAIN}"
 741               
 742               debug_message "Function: start_domain - End"
 743               return 1
 744            else
 745               debug_message "start_domain - noop_${RESOURCE} deleted"
 746            fi
 747            
 748         elif is_${VM}_up
 749         then
 750            # SCMSGS
 751            # @explanation
 752            # The domain was manually started.
 753            # @user_action
 754            # None required. Informational message.
 755            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
 756                 "Domain %s was manually started." \
 757                 "${DOMAIN}"
 758         else
 759            if start_${VM}
 760            then
 761               # SCMSGS
 762               # @explanation
 763               # The domain was started successfully.
 764               # @user_action
 765               # None required. Informational message.
 766               scds_syslog -p daemon.notice -t $(syslog_tag) -m \
 767                    "Domain %s started." \
 768                    "${DOMAIN}"
 769            else
 770               # SCMSGS
 771               # @explanation
 772               # The domain failed to start.
 773               # @user_action
 774               # Check the syslog for further messages. If possible
 775               # the cluster will attempt to restart the domain.
 776               scds_syslog -p daemon.error -t $(syslog_tag) -m \
 777                    "Domain %s failed to start." \
 778                    "${DOMAIN}"
 779                    
 780               rc=1
 781            fi
 782         fi
 783         
 784         if (( ${rc} == 0 ))
 785         then
 786            # Dump the domain configuration into an XML file. This file is then
 787            # used on another cluster node to define the domain but only if the
 788            # domain does not exist.
 789            
 790            dump_domain_config
 791            rc=${?}
 792         fi
 793         
 794         debug_message "Function: start_domain - End"
 795         return ${rc}
 796 }
 797 
 798 #
 799 # dump the domain configuration
 800 #
 801 dump_xvm_xml()
 802 {
 803         debug_message "Function: dump_xvm_xml - Begin"
 804         ${SET_DEBUG}
 805         
 806         typeset rc=0
 807 
 808         if ! ${VIRSH} dumpxml ${DOMAIN} 2>> $LOGFILE
 809         then
 810            # SCMSGS
 811            # @explanation
 812            # "/usr/bin/virsh dumpxml" for domain failed.
 813            # @user_action
 814            # Determine why the command to dump domain
 815            # configuration failed.
 816            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 817                "%s dumpxml for domain %s failed." \
 818                "${VIRSH}" "${DOMAIN}"
 819             rc=${?}
 820         fi
 821 
 822         debug_message "Function: dump_xvm_xml - End"
 823         return ${rc}
 824 }
 825 
 826 dump_ldom_xml()
 827 {
 828         debug_message "Function: dump_ldom_xml - Begin"
 829         ${SET_DEBUG}
 830         
 831         typeset rc=0
 832 
 833         if ! ${LDM} list-constraints -x ${DOMAIN} 2>> $LOGFILE
 834         then
 835            # SCMSGS
 836            # @explanation
 837            # "/opt/SUNWldm/bin/ldm list-constraints -x"
 838            # for domain failed.
 839            # @user_action
 840            # Determine why the command to list the
 841            # domain constraints failed.
 842            scds_syslog -p daemon.error -t $(syslog_tag) -m \
 843                "%s list-constraints for domain %s failed." \
 844                "${LDM}" "${DOMAIN}"
 845             rc=1
 846         fi
 847    
 848         debug_message "Function: dump_ldom_xml - End"
 849         return ${rc}
 850 }
 851 
 852 #
 853 # save the domain configuration in the cluster
 854 # configuration repository
 855 #
 856 dump_domain_config()
 857 {
 858         debug_message "Function: dump_domain_config - Begin"
 859         ${SET_DEBUG}
 860         
 861         typeset rc=0
 862         
 863         # Dump the domain configuration into an XML file. The domain configuration
 864         # can be changed, when under the the agent control.
 865         
 866         olddesc=$(${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} 2> /dev/null)
 867         
 868         if (( ${?} == 1 ))
 869         then
 870            #
 871            # The ccr table might not exist.
 872            # create the CCR table, if it doesn't exist.
 873            #
 874            if ${CCRADM} addtab ${CCR_TABLE} >> $LOGFILE 2>&1
 875            then
 876               debug_message "created ccr table ${CCR_TABLE}"
 877            else
 878               # SCMSGS
 879               # @explanation
 880               # Failed to create the CCR table.
 881               # @user_action
 882               # Check the syslog for further messages.
 883               # Determine why the CCR create failed.
 884               scds_syslog -p daemon.error -t $(syslog_tag) -m \
 885                    "Failed to create CCR table %s." \
 886                    "${CCR_TABLE}"
 887               
 888               return 1
 889            fi
 890         fi
 891         
 892         output=$(dump_${VM}_xml)
 893         if (( ${?} == 0 )) && [[ -n "${output}" ]]
 894         then
 895            newdesc=$(echo ${output} | ${TR} -s '\n' '[ ]')
 896            if [ "${olddesc}" != "${newdesc}" ]
 897            then
 898               if ! ${CCRADM} addkey --key=xml_${RESOURCE} --value "${newdesc}" ${CCR_TABLE} > /dev/null 2>&1
 899               then
 900                  if ! ${CCRADM} changekey --key=xml_${RESOURCE} --value "${newdesc}" ${CCR_TABLE} >> $LOGFILE 2>&1
 901                  then
 902                     # SCMSGS
 903                     # @explanation
 904                     # Failed to update the XMl dump to the CCR.
 905                     # @user_action
 906                     # Check the syslog for further messages.
 907                     # Determine why the ccr update failed.
 908                     scds_syslog -p daemon.error -t $(syslog_tag) -m \
 909                          "Failed to update domain XML %s to ccr." \
 910                          "${DOMAIN}"
 911                     
 912                     rc=1
 913                  fi
 914               else
 915                  debug_message "dump_domain_config - %s configuration added to CCR" "${DOMAIN}"
 916               fi
 917            fi
 918         else
 919            # error already logged.
 920            rc=1
 921         fi
 922         
 923         debug_message "Function: dump_domain_config - End"
 924         
 925         return ${rc}
 926 }
 927 
 928 #
 929 # probe function for domain data service
 930 #
 931 check_domain()
 932 {
 933         debug_message "Function: check_domain - Begin"
 934         ${SET_DEBUG}
 935         
 936         typeset rc
 937         SECONDS=0
 938         
 939         if ${PGREP} -f "control_xvm start -R ${RESOURCE} " >/dev/null 2>&1
 940         then
 941            debug_message "Function: check_domain - start program is still running "
 942            rc=100
 943         else
 944            domstate=$(get_${VM}_status 2>/dev/null)
 945            
 946            case "${domstate}" in
 947               
 948               # Acceptable run states
 949               "running"|"blocked"|"paused"|"in shutdown"| \
 950               "active"|"suspending"|"resuming"|"suspended"|"starting")
 951                     
 952                     if [ "${#PLUGIN_PROBE}" -ne 0 ]
 953                     then
 954                        if [ -x "$(echo ${PLUGIN_PROBE} | ${AWK} '{print $1}')" ]
 955                        then
 956                           PROBE_TIMEOUT=$(${SCHA_RESOURCE_GET} -O Extension -R ${RESOURCE} -G ${RESOURCEGROUP} Probe_timeout|tail -1)
 957                           # Run the supplied probe with only 90% of PROBE_TIMEOUT. Also note that this
 958                           # is supplied as a parameter to the PLUGIN_PROBE.
 959                           
 960                           HATIMERUN_TIMEOUT=$((PROBE_TIMEOUT*90/100-${SECONDS}))
 961                           
 962                           output=$(${HATIMERUN} -t ${HATIMERUN_TIMEOUT} -k 9 ${PLUGIN_PROBE} ${HATIMERUN_TIMEOUT})
 963                           rc=${?}
 964                           
 965                           case ${rc} in
 966                              0) debug_message "check_domain - ${DOMAIN} ${output}"
 967                                 rc=0
 968                              ;;
 969                              99)
 970                                 # SCMSGS
 971                                 # @explanation
 972                                 # The domain probe timed out.
 973                                 # @user_action
 974                                 # Ensure that ${PLUGIN_PROBE} can complete within
 975                                 # 90% of PROBE_TIMEOUT.
 976                                 scds_syslog -p daemon.error -t $(syslog_tag) -m \
 977                                      "%s did not complete within %s seconds." \
 978                                      "${PLUGIN_PROBE}" "${HATIMERUN_TIMEOUT}"
 979                                 
 980                                 rc=100
 981                              ;;
 982                              100) if ${PGREP} -f "gds_svc_start .*-R ${RESOURCE} " >/dev/null 2>&1
 983                                 then
 984                                    debug_message "check_domain - ${DOMAIN} is still starting"
 985                                    rc=100
 986                                 elif ${PGREP} -f "gds_svc_stop .*-R ${RESOURCE} " >/dev/null 2>&1
 987                                 then
 988                                    debug_message "check_domain - ${DOMAIN} is stopping"
 989                                    rc=100
 990                                 else
 991                                    # SCMSGS
 992                                    # @explanation
 993                                    # The domain probe has requested a domain restart.
 994                                    # @user_action
 995                                    # None. A domain restart will be attempted.
 996                                    scds_syslog -p daemon.error -t $(syslog_tag) -m \
 997                                         "% has requested a domain restart %s." \
 998                                         "${PLUGIN_PROBE}" "${output}"
 999                                    
1000                                    rc=100
1001                                 fi
1002                              ;;
1003                              201) if ${PGREP} -f "gds_svc_start .*-R ${RESOURCE} " >/dev/null 2>&1
1004                                 then
1005                                    debug_message "check_domain - ${DOMAIN} is still starting"
1006                                    rc=100
1007                                 elif ${PGREP} -f "gds_svc_stop .*-R ${RESOURCE} " >/dev/null 2>&1
1008                                 then
1009                                    debug_message "check_domain - ${DOMAIN} is stopping"
1010                                    rc=100
1011                                 else
1012                                    # SCMSGS
1013                                    # @explanation
1014                                    # The domain has requested an immediate failover.
1015                                    # @user_action
1016                                    # None. The domain will be immediately failed over.
1017                                    scds_syslog -p daemon.error -t $(syslog_tag) -m \
1018                                         "%s has requested an immediate failover." \
1019                                         "${PLUGIN_PROBE}"
1020                                    
1021                                    rc=201
1022                                 fi
1023                              ;;
1024                              *)
1025                                 # SCMSGS
1026                                 # @explanation
1027                                 # ${PLUGIN_PROBE} did not return 0, 100 or 201.
1028                                 # @user_action
1029                                 # None. A domain restart will be attempted.
1030                                 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1031                                      "%s did not return 0, 100 or 201, a domain restart will be attempted." \
1032                                      "${PLUGIN_PROBE}"
1033                                 rc=100
1034                              ;;
1035                           esac
1036                        else
1037                           # SCMSGS
1038                           # @explanation
1039                           # ${PLUGIN_PROBE} does not exist or is not executable.
1040                           # @user_action
1041                           # Check the pathname exists and that ${PLUGIN_PROBE} is executable.
1042                           scds_syslog -p daemon.error -t $(syslog_tag) -m \
1043                                "%s non-existent executable." \
1044                                "${PLUGIN_PROBE}"
1045                           
1046                           rc=0
1047                        fi
1048                     else
1049                        rc=0
1050                     fi
1051                     
1052                  ;;
1053                  
1054                  # Restartable run states
1055                  
1056                  "shut off"|"crashed"| \
1057                  "inactive"|"stopping")
1058                        
1059                        rc=100
1060                     ;;
1061                     
1062                     # Unknown run states
1063                     
1064                     *)
1065                        rc=100
1066                     ;;
1067            esac
1068            
1069            debug_message "check_domain - ${DOMAIN} ${domstate}"
1070            
1071         fi
1072         
1073         debug_message "Function: check_domain - End"
1074         return ${rc}
1075 }
1076 
1077 stop_domain()
1078 {
1079         debug_message "Function: stop_domain - Begin"
1080         ${SET_DEBUG}
1081         
1082         typeset rc=0
1083         
1084         STOP_TIMEOUT=$(${SCHA_RESOURCE_GET} -O STOP_TIMEOUT \
1085         -R ${RESOURCE} -G ${RESOURCEGROUP} )
1086         
1087         # Note that GDS will attempt to cleanup after 80% of STOP_TIMEOUT
1088         # has been consumed.  In this regard, we only allocate a combined
1089         # 75% of STOP_TIMEOUT to MAX_MIGRATE_TIMEOUT and MAX_STOP_TIMEOUT.
1090         #
1091         # This leaves 5% for domain_destroy() which maybe called if
1092         # domain_shutdown() exeecds it's timeout and finally domain_delete().
1093         
1094         MAX_MIGRATE_TIMEOUT=$((STOP_TIMEOUT*25/100))
1095         MAX_STOP_TIMEOUT=$((STOP_TIMEOUT*50/100))
1096         SECONDS=0
1097         
1098         # Save the domain configuration changes.
1099         if ! dump_domain_config
1100         then
1101            debug_message "Function: stop_domain - End"
1102            return 1
1103         fi
1104 
1105         # At resource creation, the administrator can determine the Migration_type.
1106         # Valid values for Migration_type are
1107         #
1108         # Migration_type="normal"
1109         #   o Stop the resource (shutdown the domain)
1110         #   o Failover the resource group from the source node to the target node
1111         #   o Start the resource (start the domain)
1112         #
1113         # Migration_type="migrate"
1114         #   o Suspend the domain on the source node
1115         #   o Copy the domain's memory pages from the source node to the target node
1116         #   o Resume the domain on the target node
1117         #
1118         # Migration_type="migrate_live"
1119         #   o Iteratively copy the domain's memory pages from the source node to the taregt node
1120         #   o When pre-copy is no longer benefical, suspend the domain on the source node
1121         #   o Copy the domain's remaning "dirty" pages from the source node to the taregt node
1122         #   o Resume the domain on the target node
1123         #
1124         # Note that migration or live migration is performed over the cluster interconnect.
1125         #
1126         # For migration or live migration to be attempted across Solaris Cluster xVM nodes
1127         # the following conditions must be met.
1128         #
1129         # - The target Solaris Cluster xVM node must be running the same xVM version.
1130         #
1131         # - The migration TCP port must be open and accepting connections from the source
1132         #    Solaris Cluster xVM node.
1133         #
1134         # - There must be sufficient resources for the domain to run in.
1135         #
1136         # - If the conditions are met and migration or live migration is successful a NO-OP
1137         # STOP and START is performed. This will ensure a successful STOP and START to the
1138         # appropriate RGM callback methods. Furthermore, doing a NO-OP RGM failover will
1139         # ensure that RGM subsequently actions any dependencies and that Solaris Cluster
1140         # reflects the correct state and status of resource groups and resources.
1141         #
1142         # - If the conditions are met but migration or live migration is not successful a
1143         # normal failover will be performed.
1144         #
1145         # - If the conditions are not met, migration or live migration will fail and a normal
1146         # failover will be performed.
1147         #
1148         # However, before attempting a migration or live migration we need to determine if the
1149         # resource is being disabled. To distinguish if the resource is being disabled we
1150         # test the ON_OFF_SWITCH property of the resource.
1151         #
1152         # If the resource is being disabled the ON_OFF_SWITCH will be DISABLED before the STOP
1153         # method is called. So, conversely if the ON_OFF_SWITCH is ENABLED the resource is not
1154         # being disabled and instead the resource group is undergoing either a switch to
1155         # another node or is being evacuated from the node.
1156         #
1157         # - If the resource is being disabled we perform a normal shutdown, regardless of the
1158         # Migration_type setting.
1159         
1160         ON_OFF_SWITCH=$(${SCHA_RESOURCE_GET} -O ON_OFF_SWITCH -R ${RESOURCE} -G ${RESOURCEGROUP})
1161         
1162         debug_message "stop_domain - ON_OFF_SWITCH=${ON_OFF_SWITCH}"
1163         debug_message "stop_domain - MIGRATION_TYPE=${MIGRATION_TYPE}"
1164         
1165         if [[ "${ON_OFF_SWITCH}" = "DISABLED" ]]
1166         then
1167            domain_shutdown
1168         else
1169            case "${MIGRATION_TYPE}" in
1170               NORMAL)   domain_shutdown
1171                         rc=${?}
1172                      ;;
1173               MIGRATE*) if ! domain_migrate
1174                         then
1175                            domain_shutdown
1176                         fi
1177                         rc=${?}
1178                      ;;
1179                      *)
1180                         # SCMSGS
1181                         # @explanation
1182                         # Invalid Migration_type specified.
1183                         # @user_action
1184                         # Delete and reregister the resource with
1185                         # a valid Migration_type entry.
1186                         scds_syslog -p daemon.error -t $(syslog_tag) -m \
1187                              "Invalid Migration_type=%s." \
1188                              "${MIGRATION_TYPE}"
1189                         rc=1
1190                      ;;
1191            esac
1192         fi
1193         
1194         debug_message "Function: stop_domain - End"
1195         return ${rc}
1196 }
1197      
1198 get_target_host()
1199 {
1200         debug_message "Function: get_target_host - Begin"
1201         ${SET_DEBUG}
1202         
1203         typeset rc=1
1204         
1205         # Here, we need to determine the target host as the resource group is either being
1206         # switched or the node, where the resoure group is online, is being evacuated.
1207         #
1208         # To determine the target host for a resource group switch we rely on the cluster
1209         # command log file /var/cluster/logs/commandlog to supply the target host. We need to
1210         # obtain the correct entry from the command log file and match against the following
1211         #
1212         #       <date> + ${RESOURCEGROUP} + "START" + "switch"
1213         #
1214         # after which we only save the nodename from a clrg or scswitch command.
1215         #
1216         # Sample /var/cluster/log/commandlog output is as follows,
1217         #
1218         # 02/07/2008 08:45:13 pelko1 10548 root START - scswitch -z -g "xvm2-rg" -h "pelko2"
1219         # 02/07/2008 08:45:38 pelko1 10548 root END 0
1220         # 02/07/2008 09:01:35 pelko1 10874 root START - clrg "switch" -n "pelko1" "xvm2-rg"
1221         # 02/07/2008 09:01:36 pelko1 10874 root END -20827641
1222         #
1223         # If we are unable to match an entry, as perhaps the entry was logged at <date>
1224         # and we are checking at <date> + 1 second, i.e. we are checking just as the second
1225         # entry is incrementing to the next second, we perform another check. In fact the
1226         # last 10 seconds are checked from the commandlog.
1227         #
1228         # Once we have matched an entry from /var/cluster/logs/commandlog, we verify that
1229         # the target host is a valid nodelist entry for the resource group.
1230         #
1231         # - If we have a valid nodelist entry we then determine that target host's cluster
1232         # interconnect hostname to perform the migration or live migration.
1233         #
1234         # - If we are unable to find a match for a switch, we need to consider that an evacuate
1235         # node is being performed. However, if the node is being evacuated we will rely on
1236         # RGM to dertermine the nodename regardless if a mirgation or live migration was
1237         # requested. Subsequently, we perform a normal failover. This ensures that we do not
1238         # migrate or live migrate the domain to a node that maybe different to the node
1239         # selected by RGM.
1240         #
1241         # So, suffice to say that if a "switch" match is not found, following the discovery
1242         # that the resource is not just being disabled, and that a migrate or live migrate
1243         # was defined, we will always perform a normal failover.
1244         #
1245         # Note that the target host match is performed within check_commandlog().
1246         
1247         check_commandlog
1248         
1249         debug_message "get_target_host - ${TARGET_HOST} size=${#TARGET_HOST}"
1250         
1251         if [ "${#TARGET_HOST}" -eq 0 ]
1252         then
1253            # SCMSGS
1254            # @explanation
1255            # A target host was not found
1256            # @user_action
1257            # None required. The domain will not be migrated or live
1258            # migrated instead a normal failover will be performed.
1259            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1260                 "Target host not found, normal failover will be performed."
1261            
1262         elif [ ${TARGET_HOST} = "$(/usr/bin/uname -n)" ] || [ $(echo ${TARGET_HOST} | /usr/bin/grep [0-9]:global) ]
1263         then
1264            # SCMSGS
1265            # @explanation
1266            # The node is being evecuated.
1267            # @user_action
1268            # None required. The domain will not be migrated or live
1269            # migrated. Instead, a normal failover will be performed.
1270            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1271                 "Node is being evacuated, normal failover will be performed."
1272            
1273         else
1274            for i in $(${SCHA_RESOURCEGROUP_GET} -O NODELIST -G ${RESOURCEGROUP})
1275            do
1276               [[ "${i}" != "$(uname -n)" || "${i}" = "${TARGET_HOST}" ]] && rc=0 && break
1277            done
1278            
1279            if [ "${rc}" -eq 0 ]
1280            then
1281               PRIVATELINK_TARGET_HOST=$(${SCHA_CLUSTER_GET} -O PRIVATELINK_HOSTNAME_NODE ${TARGET_HOST})
1282               debug_message "get_target_host - PRIVATELINK_TARGET_HOST=${PRIVATELINK_TARGET_HOST}"
1283            else
1284               # SCMSGS
1285               # @explanation
1286               # The target host found in the command log file is not
1287               # a valid entry within the resource groups nodelist.
1288               # @user_action
1289               # None required. The domain will not be migrated or live
1290               # migrated instead a normal failover will be performed.
1291               scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1292                    "Target host %s not matched with the resource group nodelist, normal failover will be performed." \
1293                    "${TARGET_HOST}"
1294            fi
1295         fi
1296         
1297         debug_message "Function: get_target_host - End"
1298         return ${rc}
1299 }
1300      
1301 check_commandlog()
1302 {
1303         debug_message "Function: check_commandlog - Begin"
1304         
1305         # Get the current epoch time
1306         typeset ETIME=$(/usr/bin/perl -e 'print time;')
1307         typeset DATE=$(/usr/bin/date '+%m/%d/%Y')
1308         i=10
1309         
1310         while (( $i > 0 ))
1311         do
1312            # Iteratively search the commandlog for a switch or evacuate, going back in time
1313            # by one second each time. If a match is found we break out of the loop.
1314            #
1315            # The following may help to understand the iterative loop.
1316            #
1317            # bash-3.2# ETIME=$(perl -e 'print time;')
1318            # bash-3.2# echo $ETIME
1319            # 1202814041
1320            # bash-3.2# HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | awk '{print $4}')
1321            # bash-3.2# echo $HHMMSS
1322            # 03:00:41
1323            # bash-3.2# ETIME=$(expr ${ETIME} - 1)
1324            # bash-3.2# echo $ETIME
1325            # 1202814040
1326            # bash-3.2# HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | awk '{print $4}')
1327            # bash-3.2# echo $HHMMSS
1328            # 03:00:40
1329            # bash-3.2#
1330            
1331            # Convert the epoch time into a readable format
1332            HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | ${AWK} '{print $4}')
1333            
1334            debug_message "check_commadlog - performed for ${DATE} ${HHMMSS}"
1335            
1336            # Check for a clrg switch or scswitch
1337            TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\
1338               /usr/bin/grep -w START | /usr/bin/grep switch | /usr/bin/grep \"${RESOURCEGROUP}\" |\
1339            /usr/bin/sed -e 's/^.*-h //' -e 's/^.*-n //' | ${AWK} '{print $1}' | ${TR} -d '" ')
1340            
1341            [ "${#TARGET_HOST}" -ne 0 ] && break
1342            
1343            # Check for a clrg evacuate
1344            TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\
1345               /usr/bin/grep -w START | /usr/bin/grep evacuate |\
1346            /usr/bin/sed -e 's/^.*-n //' | ${AWK} '{print $1}' | ${TR} -d '+" ' )
1347            
1348            [ "${#TARGET_HOST}" -ne 0 ] && break
1349            
1350            # Check for a scswitch -S
1351            TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\
1352               /usr/bin/grep -w START | /usr/bin/grep scswitch | /usr/bin/grep "\-S" |\
1353            /usr/bin/sed -e 's/^.*-h //' | ${AWK} '{print $1}' | ${TR} -d '\-SK" ' )
1354            
1355            [ "${#TARGET_HOST}" -ne 0 ] && break
1356            
1357            i=$(expr $i - 1)
1358            ETIME=$(expr ${ETIME} - 1)
1359         done
1360         
1361         debug_message "check_commandlog - TARGET_HOST=${TARGET_HOST}"
1362    
1363         debug_message "Function: check_commandlog - End"
1364 }
1365 
1366 #
1367 # routines to perform domain migration
1368 #
1369 migrate_xvm()
1370 {
1371         debug_message "Function: migrate_xvm - Begin"
1372         ${SET_DEBUG}
1373         
1374         typeset rc=0
1375 
1376         [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && OPTION="migrate"
1377         [[ "${MIGRATION_TYPE}" = "MIGRATE_LIVE" ]] && OPTION="migrate --live"
1378         
1379         debug_message "domain_migrate - Running /usr/sbin/xm ${OPTION} ${DOMAIN} ${PRIVATELINK_TARGET_HOST}"
1380         
1381         ${HATIMERUN} -t ${MAX_MIGRATE_TIMEOUT} -k KILL \
1382             ${XM} ${MIGRATION_TYPE} "${DOMAIN}" ${PRIVATELINK_TARGET_HOST} > /dev/null 2>&1
1383         rc=${?}
1384         
1385         debug_message "Function: migrate_xvm - End"
1386         return ${rc}
1387 }
1388 
1389 migrate_ldom()
1390 {
1391         debug_message "Function: migrate_ldom - Begin"
1392         ${SET_DEBUG}
1393         
1394         typeset rc=0
1395 
1396         [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && OPTION="migrate"
1397         
1398         debug_message "domain_migrate - Running /opt/SUNWscxvm/bin/ldm_migrate ${OPTION} ${DOMAIN} ${PRIVATELINK_TARGET_HOST}"
1399         
1400         ${HATIMERUN} -t ${MAX_MIGRATE_TIMEOUT} -k KILL \
1401             /opt/SUNWscxvm/bin/ldm_migrate ${OPTION} "${DOMAIN}" ${PRIVATELINK_TARGET_HOST} ${PASSWORD_FILE} >> $LOGFILE 2>&1
1402         rc=${?}
1403         
1404         debug_message "Function: migrate_ldom - End"
1405         return ${rc}
1406 }
1407 
1408 #
1409 # routines to cancel migration
1410 #
1411 cancel_xvm_migration()
1412 {
1413         # NO OP for a xvm domain
1414         return 0
1415 }
1416 
1417 cancel_ldom_migration()
1418 {
1419         debug_message "Function: cancel_ldom_migration - Begin"
1420         ${SET_DEBUG}
1421 
1422         # cancel domain migration for ldoms
1423         if ${LDM} cancel-operation migration ${DOMAIN} >> $LOGFILE 2>&1
1424         then
1425            # SCMSGS
1426            # @explanation
1427            # The domain migration operation was cancelled.
1428            # @user_action
1429            # None required. Informational message.
1430            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1431                 "Migration of domain %s is cancelled, the domain state is now in active state." \
1432                 "${DOMAIN}"
1433         fi
1434         
1435         while (( ${SECONDS} < ${MAX_STOP_TIMEOUT} ))
1436         do
1437            if get_${VM}_status | ${GREP} -q -E "^suspending|^resuming|^suspended|^starting" > /dev/null 2>&1
1438            then
1439               sleep 5
1440            else
1441               SECONDS=${MAX_STOP_TIMEOUT}
1442            fi
1443         done
1444         
1445         debug_message "Function: cancel_ldom_migration - End"
1446 }
1447 
1448 domain_migrate()
1449 {
1450         debug_message "Function: domain_migrate - Begin"
1451         ${SET_DEBUG}
1452         
1453         typeset rc
1454         
1455         [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && MSG="migrated"
1456         [[ "${MIGRATION_TYPE}" = "MIGRATE_LIVE" ]] && MSG="live migrated"
1457         
1458         if get_target_host
1459         then
1460            # SCMSGS
1461            # @explanation
1462            # The domain is being migrated or live migrated to the target host.
1463            # @user_action
1464            # None required.
1465            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1466                 "Domain %s is being %s to %s." \
1467                 "${DOMAIN}" "${MSG}" "${TARGET_HOST}"
1468            
1469            migrate_${VM} ${MIGRATION_TYPE} ${DOMAIN} ${PRIVATELINK_TARGET_HOST}
1470            rc=${?}
1471            
1472            if (( ${rc} == 0 ))
1473            then
1474               # SCMSGS
1475               # @explanation
1476               # The domain was migrated or live migrated to the target host.
1477               # @user_action
1478               # None required. The domain successfully migrated or live migrated
1479               # from the source node to the target node.
1480               scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1481                    "Domain %s successfully %s to %s." \
1482                    "${DOMAIN}" "${MSG}" "${TARGET_HOST}"
1483               
1484               # As the domain has been successfully migrated or live migrated
1485               # we need to indicate a successful stop by performing a NO-OP stop
1486               # and subsequently a successful start by performing a NO-OP start.
1487               
1488               if ${CCRADM} addkey --key=noop_${RESOURCE} --value="1" ${CCR_TABLE} >> $LOGFILE 2>&1
1489               then
1490                  debug_message "domain_migrate - .noop_${RESOURCE} flag added to CCR"
1491               else
1492                  # SCMSGS
1493                  # @explanation
1494                  # Failed to update the XMl configuration to the CCR.
1495                  # @user_action
1496                  # Check the syslog for further messages.
1497                  # Determine why the ccr update failed.
1498                  scds_syslog -p daemon.error -t $(syslog_tag) -m \
1499                       "Failed to add NO-OP flag for %s to ccr." \
1500                       "${DOMAIN}"
1501                  rc=1
1502               fi
1503               
1504               # SCMSGS
1505               # @explanation
1506               # The domain was migrated or live migrated.
1507               # @user_action
1508               # None required. Informational message.
1509               scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1510                    "NO-OP STOP being performed."
1511                    
1512            elif (( ${rc} == 99 ))
1513            then
1514               # SCMSGS
1515               # @explanation
1516               # The domain migration or live migration timed out.
1517               # @user_action
1518               # None required. Informational message.
1519               scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1520                    "Migration of domain %s timed out, the domain state is now shut off." \
1521                    "${DOMAIN}"
1522               
1523               rc=1
1524               cancel_${VM}_migration
1525            else
1526               # SCMSGS
1527               # @explanation
1528               # The domain failed to migrate or live migrate to the target host.
1529               # @user_action
1530               # None required. The domain failed to migrate or live migrate
1531               # from the source node to the target node. A normal failover
1532               # will be performed.
1533               scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1534                    "Domain %s failed to %s to %s, normal failover will be performed." \
1535                    "${DOMAIN}" "${MSG}" "${TARGET_HOST}"
1536               
1537               rc=1
1538               cancel_${VM}_migration
1539            fi
1540         else
1541            rc=1
1542         fi
1543         
1544         # If the domain has successfully migrated, we will now delete the domain.
1545         #
1546         # Doing this ensures that the domain is only defined and able to be started
1547         # on one cluster node at a time. Domains can use shared storage between cluster
1548         # nodes so it is very important that we prevent any data corruption if a domain
1549         # gets manually started on multiple cluster nodes where shared storage is used.
1550         #
1551         # Of course using SUNW.HAStoragePlus somewhat protects against this, however we
1552         # simply want to avoid any manual administrative errors performed by mistake.
1553         #
1554         # Note, unless the domain was migrated or live migrated, the domain is defined
1555         # before startup using a previously dumped XML file for the administrative file
1556         # system.
1557         
1558         (( ${rc} == 0 )) && [[ "${VM}" == "xvm" ]] && domain_delete
1559         
1560         debug_message "Function: domain_migrate - End"
1561         return ${rc}
1562 }
1563 
1564 #
1565 # routines to perform domain shutdown
1566 #
1567 shutdown_xvm()
1568 {
1569         debug_message "Function: shutdown_xvm - Begin"
1570         ${SET_DEBUG}
1571         
1572         typeset rc=0
1573 
1574         # Note that the virsh shutdown command returns before the domain
1575         # has shutdown, as such we do not use hatimerun.
1576         
1577         ${VIRSH} shutdown ${DOMAIN} > /dev/null 2>&1
1578         rc=${?}
1579         
1580         debug_message "Function: shutdown_xvm - End"
1581         return ${rc}
1582 }
1583      
1584 shutdown_ldom()
1585 {
1586         debug_message "Function: shutdown_ldom - Begin"
1587         ${SET_DEBUG}
1588 
1589         typeset rc
1590        
1591         status=$(get_${VM}_status) 
1592         if (( ${?} == 0 ))
1593         then
1594            if echo ${status} | ${GREP} -q -E "^active$|^suspending|^resuming|^suspended|^starting" > /dev/null 2>&1
1595            then
1596               ${HATIMERUN} -t ${MAX_STOP_TIMEOUT} -k KILL ${LDM} stop-domain ${DOMAIN} >> $LOGFILE 2>&1
1597               rc=${?}
1598            else
1599               # domain is already stopped
1600               rc=0
1601            fi
1602         else
1603            # domain is not present.
1604            rc=2
1605         fi
1606         
1607         debug_message "Function: shutdown_ldom - Begin"
1608         return ${rc}
1609 }
1610 
1611 domain_shutdown()
1612 {
1613         debug_message "Function: domain_shutdown - Begin"
1614         ${SET_DEBUG}
1615         
1616         typeset rc
1617         
1618         # Corordinate with the domain OS to perform a graceful shutdown.
1619         # Note that the virsh shutdown command returns before the domain
1620         # has shutdown, as such we do not use hatimerun.
1621         
1622         shutdown_${VM}
1623         rc=${?}
1624         if (( ${rc} == 2 ))
1625         then
1626                 debug_message "Function: domain_shutdown - End"
1627                 return 0
1628         elif (( ${rc} == 0 ))
1629         then 
1630            # Loop to test if the domain shuts down gracefully
1631            # or if the shutdown time is exceeded.
1632            
1633            while (( ${SECONDS} < ${MAX_STOP_TIMEOUT} ))
1634            do
1635               if is_${VM}_up
1636               then
1637                  sleep 5
1638               else
1639                  SECONDS=${MAX_STOP_TIMEOUT}
1640               fi
1641            done
1642            
1643            if is_${VM}_up
1644            then
1645               # SCMSGS
1646               # @explanation
1647               # The domain failed to shutdown gracefully.
1648               # @user_action
1649               # None required. The domain failed to shutdown
1650               # gracefully and will now be immediately terminated.
1651               scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1652                    "Domain %s failed to shutdown gracefully, immediate shutdown will now be performed." \
1653                    "${DOMAIN}"
1654               
1655               destroy_${VM}
1656               rc=${?}
1657            else
1658               # SCMSGS
1659               # @explanation
1660               # The domain was shutdown gracefully.
1661               # @user_action
1662               # None required. The domain has shutdown gracefully.
1663               scds_syslog -p daemon.info -t $(syslog_tag) -m \
1664                    "Domain %s has been gracefully shutdown." \
1665                    "${DOMAIN}"
1666               rc=0
1667            fi
1668            
1669         else
1670            # error already logged
1671            destroy_${VM}
1672            rc=${?}
1673         fi
1674         
1675         # If the domain has successfully shutdown, we will now delete the domain.
1676         #
1677         # Doing this ensures that the domain is only defined and able to be started
1678         # on one cluster node at a time. Domains can use shared storage between cluster
1679         # nodes so it is very important that we prevent any data corruption if a domain
1680         # gets manually started on multiple cluster nodes where shared storage is used.
1681         #
1682         # Of course using SUNW.HAStoragePlus somewhat protects against this, however we
1683         # simply want to avoid any manual administrative errors performed by mistake.
1684         #
1685         # Note, unless the domain was migrated or live migrated, the domain is defined
1686         # before startup using a previously dumped XML file for the administrative file
1687         # system.
1688         
1689         (( ${rc} == 0 )) && domain_delete
1690         
1691         debug_message "Function: domain_shutdown - End"
1692         return ${rc}
1693 }
1694 
1695 #
1696 # routines to destroy domain
1697 #
1698 destroy_xvm()
1699 {
1700         debug_message "Function: destroy_xvm - Begin"
1701         ${SET_DEBUG}
1702 
1703         typeset rc
1704         
1705         if ${VIRSH} destroy ${DOMAIN} >> $LOGFILE 2>&1
1706         then
1707            # SCMSGS
1708            # @explanation
1709            # The domain was immediately terminated.
1710            # @user_action
1711            # None required. The domain had previously failed to shutdown
1712            # gracefully but has now been immediately terminated.
1713            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1714                 "Domain %s has been immediately terminated." \
1715                 "${DOMAIN}"
1716            rc=0
1717         else
1718            # SCMSGS
1719            # @explanation
1720            # The /usr/bin/virsh destroy command failed.
1721            # @user_action
1722            # Determine why it was not possible to immediately terminate
1723            # the domain.
1724            scds_syslog -p daemon.error -t $(syslog_tag) -m \
1725                 "Domain %s failed to shutdown immediately." \
1726                 "${DOMAIN}"
1727            rc=1
1728         fi
1729         
1730         debug_message "Function: destroy_xvm - End"
1731         return ${rc}
1732 }
1733      
1734 destroy_ldom()
1735 {
1736         debug_message "Function: destroy_ldom - Begin"
1737         ${SET_DEBUG}
1738 
1739         typeset rc
1740         
1741         if ${LDM} stop-domain -f ${DOMAIN} >> $LOGFILE 2>&1
1742         then
1743            # SCMSGS
1744            # @explanation
1745            # The domain was immediately terminated.
1746            # @user_action
1747            # None required. The domain had previously failed to shutdown
1748            # gracefully but has now been immediately terminated.
1749            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1750                 "Domain %s has been forcefully terminated." \
1751                 "${DOMAIN}"
1752            rc=0
1753         else
1754            # SCMSGS
1755            # @explanation
1756            # The /opt/SUNWldm/bin/ldm stop-domain "-f" command failed.
1757            # @user_action
1758            # Determine why it was not possible to forcefully stop
1759            # the domain.
1760            scds_syslog -p daemon.error -t $(syslog_tag) -m \
1761                 "Domain %s failed to do a forceful shutdown." \
1762                 "${DOMAIN}"
1763            rc=1
1764         fi
1765         
1766         debug_message "Function: destroy_ldom - End"
1767         return ${rc}
1768 }
1769 
1770 #
1771 # routines to remove domains from the node
1772 #
1773 domain_delete()
1774 {
1775         debug_message "Function: domain_delete - Begin"
1776         ${SET_DEBUG}
1777         
1778         # The purpose of deleting the domain after shutdown is to avoid the possibility of
1779         # someone manually starting the domain on a different node. Doing so would compromise
1780         # the domain if shared storage was used for the domain. The domain's configuration
1781         # is always dumped to the agent's administrative file system so that the domain can
1782         # be defined before startup.
1783         
1784         typeset rc
1785         
1786         if delete_${VM}
1787         then
1788            # SCMSGS
1789            # @explanation
1790            # The domain was deleted.
1791            # @user_action
1792            # None required. The domain has been deleted as it
1793            # will be defined on another node. Deleting the domain
1794            # on this node ensures that it can't be started on
1795            # more than one cluster node at a time.
1796            scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1797                 "Domain %s has been deleted on this node." \
1798                 "${DOMAIN}"
1799            rc=0
1800         else
1801            # error already logged.
1802            rc=1
1803         fi
1804         
1805         debug_message "Function: domain_delete - End"
1806         return ${rc}
1807 }
1808 
1809 delete_xvm()
1810 {
1811         debug_message "Function: delete_xvm - Begin"
1812         ${SET_DEBUG}
1813         
1814         typeset rc=0
1815 
1816         if ! /usr/sbin/xm delete ${DOMAIN} >> $LOGFILE 2>&1
1817         then
1818            # SCMSGS
1819            # @explanation
1820            # The /usr/sbin/xm delete command failed.
1821            # @user_action
1822            # Determine why it was not possible to delete the domain.
1823            scds_syslog -p daemon.error -t $(syslog_tag) -m \
1824                 "Failed to delete domain %s on this node." \
1825                 "${DOMAIN}"
1826            rc=1
1827         fi
1828         
1829         debug_message "Function: delete_xvm - End"
1830         return ${rc}
1831 }
1832 
1833 delete_ldom()
1834 {
1835         debug_message "Function: delete_ldom - Begin"
1836         ${SET_DEBUG}
1837         
1838         if get_${VM}_status | ${GREP} -q -E "^bound$" > /dev/null 2>&1
1839         then
1840            
1841            # if the domain is in bound state, unbind it.
1842            ${LDM} unbind-domain ${DOMAIN} >> $LOGFILE 2>&1
1843            
1844            if (( ${?} != 0 ))
1845            then
1846               # SCMSGS
1847               # @explanation
1848               # The /opt/SUNWldm/bin/ldm unbind-domain command failed.
1849               # @user_action
1850               # Determine why it was not possible to unbind the domain.
1851               scds_syslog -p daemon.error -t $(syslog_tag) -m \
1852                    "Failed to unbind domain %s on this node." \
1853                    "${DOMAIN}"
1854               
1855               debug_message "Function: delete_ldom - End"
1856               return 1
1857            fi
1858         fi
1859         
1860         if ! ${LDM} remove-domain ${DOMAIN} >> $LOGFILE 2>&1
1861         then
1862            # SCMSGS
1863            # @explanation
1864            # The /opt/SUNWldm/bin/ldm remove-domain command failed.
1865            # @user_action
1866            # Determine why it was not possible to remove the domain.
1867            scds_syslog -p daemon.error -t $(syslog_tag) -m \
1868                 "Failed to remove domain %s on this node." \
1869                 "${DOMAIN}"
1870 
1871            debug_message "Function: delete_ldom - End"
1872            return 1
1873         fi
1874         
1875         debug_message "Function: delete_ldom - End"
1876         return 0
1877 }