Print this page
6865006 ldom validate shouldn't check for password file if migration type is set to normal
6864993 HA-xVM validate messages need to be wrapped by gettext
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/ha-services/gds-agents/xvm/functions.ksh
+++ new/usr/src/cmd/ha-services/gds-agents/xvm/functions.ksh
1 1 #!/bin/ksh
2 2 #
3 3 # CDDL HEADER START
4 4 #
5 5 # The contents of this file are subject to the terms of the
6 6 # Common Development and Distribution License (the License).
7 7 # You may not use this file except in compliance with the License.
8 8 #
9 9 # You can obtain a copy of the license at usr/src/CDDL.txt
10 10 # or http://www.opensolaris.org/os/licensing.
11 11 # See the License for the specific language governing permissions
12 12 # and limitations under the License.
13 13 #
14 14 # When distributing Covered Code, include this CDDL HEADER in each
15 15 # file and include the License file at usr/src/CDDL.txt.
16 16 # If applicable, add the following below this CDDL HEADER, with the
17 17 # fields enclosed by brackets [] replaced with your own identifying
18 18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 19 #
20 20 # CDDL HEADER END
21 21 #
22 22 # Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 # Use is subject to license terms.
24 24 #
25 25 # ident "%Z%%M% %I% %E% SMI"
26 26 #
27 27
28 28 PKG=SUNWscxvm
29 29 TASK_COMMAND=""
30 30 RESOURCE_PROJECT_NAME=""
31 31 CCR_TABLE=${VM}_"domain_config"
32 32 TMP_DIR="/var/tmp"
33 33 LOGFILE=${TMP_DIR}/${RESOURCE}_logfile
34 34
35 35 # Commands definition
36 36 SCLOGGER=/usr/cluster/lib/sc/scds_syslog
37 37 LOGGER=/usr/bin/logger
38 38 GREP=/usr/xpg4/bin/grep
39 39 AWK=/usr/bin/awk
40 40 PGREP=/usr/bin/pgrep
41 41 SLEEP=/usr/bin/sleep
42 42 TR=/usr/xpg4/bin/tr
43 43 SCHA_RESOURCE_GET=/usr/cluster/bin/scha_resource_get
44 44 SCHA_RESOURCEGROUP_GET=/usr/cluster/bin/scha_resourcegroup_get
45 45 SCHA_CLUSTER_GET=/usr/cluster/bin/scha_cluster_get
46 46 HATIMERUN=/usr/cluster/bin/hatimerun
47 47 LDM=/opt/SUNWldm/bin/ldm
48 48 VIRSH=/usr/bin/virsh
49 49 XM=/usr/sbin/xm
50 50 CCRADM=/usr/cluster/lib/sc/ccradm
51 51 CL_EXEC_CLIENT=/usr/cluster/lib/sc/cl_exec_client
52 52
53 53 syslog_tag()
54 54 {
55 55 ${SET_DEBUG}
56 56 print "SC[${PKG:-??}.${METHOD:-??}]:${RESOURCEGROUP:-??}:${RESOURCE:-??}"
57 57 }
58 58
59 59 scds_syslog()
60 60 {
61 61 if [ -f "${SCLOGGER}" ]
62 62 then
63 63 ${SCLOGGER} "$@" &
64 64 else
65 65 while getopts 'p:t:m' opt
66 66 do
|
↓ open down ↓ |
66 lines elided |
↑ open up ↑ |
67 67 case "${opt}" in
68 68 t) TAG=${OPTARG};;
69 69 p) PRI=${OPTARG};;
70 70 esac
71 71 done
72 72
73 73 shift $((${OPTIND} - 1))
74 74 LOG_STRING=$(/usr/bin/printf "$@")
75 75 ${LOGGER} -p ${PRI} -t ${TAG} ${LOG_STRING}
76 76 fi
77 +}
77 78
78 - if [[ "${METHOD}" == "validate" ]]
79 - then
80 - shift 5
81 - /usr/bin/printf "$@"
82 - fi
79 +i18n_message()
80 +{
81 + debug_message "Function: i18n_message - Begin"
82 + ${DEBUG}
83 +
84 + print -u2 $(/bin/printf "$@")
85 +
86 + debug_message "Function: i18n_message - End"
87 + return 0
83 88 }
84 89
85 90 debug_message()
86 91 {
87 92 typeset DEBUG_TEXT=
88 93
89 94 case ${DEBUG_LEVEL} in
90 95 0) # No debug msgs
91 96 SET_DEBUG=
92 97 ;;
93 98 1) # Begin and End msgs
94 99 SET_DEBUG=
95 100 DEBUG_TEXT=$(echo ${1} | ${GREP} -E 'Begin|End')
96 101 ;;
97 102 2) # All debug msgs
98 103 SET_DEBUG="set -x"
99 104 DEBUG_TEXT=${1}
100 105 ;;
101 106 esac
102 107
103 108 [[ -n "${DEBUG_TEXT}" ]] && \
104 109 scds_syslog -p daemon.debug -t $(syslog_tag) -m \
105 110 "%s" "${DEBUG_TEXT}"
106 111 }
107 112
108 113 log_message()
109 114 {
110 115 #
111 116 # Output a message to syslog as required
112 117 #
113 118
114 119 debug_message "Function: log_message - Begin"
115 120
116 121 ${SET_DEBUG}
117 122
118 123 if [ -s "${LOGFILE}" ]
119 124 then
120 125 PRIORITY=${1}
121 126 HEADER=${2}
122 127
123 128 #
124 129 # Ensure that the while loop only reads a closed file
125 130 #
126 131 strings ${LOGFILE} > ${LOGFILE}.copy
127 132 while read MSG_TXT
128 133 do
129 134 scds_syslog -p daemon.${PRIORITY} -t $(syslog_tag) -m \
130 135 "%s - %s" "${HEADER}" "${MSG_TXT}"
131 136 done < ${LOGFILE}.copy
132 137 fi
133 138
134 139 cat /dev/null > ${LOGFILE} > /dev/null
135 140 cat /dev/null > ${LOGFILE}.copy
136 141
137 142 debug_message "Function: log_message - End"
138 143 }
139 144
140 145
141 146 get_resource_property()
142 147 {
143 148 debug_message "Function: get_resource_property - Begin"
144 149 ${SET_DEBUG}
145 150
146 151 typeset RS=${1}
147 152 typeset PROPERTY=${2}
148 153 typeset rc
149 154
150 155 # Retrieve the property value.
151 156 OUTPUT=$(${SCHA_RESOURCE_GET} -O Extension -R ${RS} ${PROPERTY})
152 157 rc=${?}
153 158
154 159 debug_message "get_resource_property - " \
155 160 "scha_resource_get of property ${PROPERTY} returned ${rc}"
156 161
157 162 if (( ${rc} == 0 ))
158 163 then
159 164 # print the values
160 165 echo ${OUTPUT} | ${AWK} '{ \
161 166 if (NF > 1) for (i = 2; i <= NF; i++) print $i; else print "" }'
162 167 fi
163 168
164 169 debug_message "Function: get_resource_property - End"
165 170
166 171 return ${rc}
167 172 }
168 173
169 174
170 175 get_properties()
171 176 {
172 177 debug_message "Function: get_properties - Begin"
173 178 ${SET_DEBUG}
174 179
175 180 typeset -i rc
176 181 typeset props=$*
177 182
178 183 for prop in ${props}
179 184 do
180 185 # retrieve the property value
181 186 typeset val=$(get_resource_property ${RESOURCE} ${prop})
182 187 rc=${?}
183 188
184 189 if (( ${rc} == 0 ))
185 190 then
186 191 case ${prop} in
187 192 Domain_name) [[ -z ${DOMAIN} ]] && DOMAIN=${val};;
188 193 Migration_type) [[ -z ${MIGRATION_TYPE} ]] && MIGRATION_TYPE=${val};;
189 194 Plugin_probe) [[ -z ${PLUGIN_PROBE} ]] && PLUGIN_PROBE=${val};;
190 195 Password_file) [[ -z ${PASSWORD_FILE} ]] && PASSWORD_FILE=${val};;
191 196 Debug_level) [[ -z ${DEBUG_LEVEL} ]] && DEBUG_LEVEL=${val};;
192 197 esac
193 198 else
194 199 # SCMSGS
195 200 # @explanation
196 201 # The scha_resource_get call failed.
197 202 # @user_action
198 203 # Check the syslog for further messages.
199 204 scds_syslog -p daemon.error -t $(syslog_tag) -m \
200 205 "Cannot get the property %s of resource %s." \
201 206 "${prop}" "${RESOURCE}"
202 207 break
203 208 fi
204 209 done
205 210
206 211 debug_message "Function: get_properties - End"
|
↓ open down ↓ |
114 lines elided |
↑ open up ↑ |
207 212
208 213 return ${rc}
209 214 }
210 215
211 216 validate_xvm()
212 217 {
213 218 debug_message "Function: validate_xvm - Begin"
214 219 ${SET_DEBUG}
215 220
216 221 typeset rc=0
222 + typeset msgtext
217 223
218 224 if [ "$(/usr/bin/uname -i)" != "i86xpv" ]
219 225 then
220 226 # SCMSGS
221 227 # @explanation
222 228 # Solaris is not booted with xVM.
223 229 # @user_action
224 230 # Ensure that the default boot grub menu is set to boot
225 231 # Solaris xVM.
226 232 scds_syslog -p daemon.error -t $(syslog_tag) -m \
227 233 "Node is not booted with xVM."
228 234
235 + msgtext=$(gettext "Node is not booted with xVM.")
236 + i18n_message "${msgtext}"
237 +
229 238 rc=1
230 239 fi
231 240
232 241 debug_message "Function: validate_xvm - End"
233 242
234 243 return ${rc}
235 244 }
236 245
237 246 validate_ldom()
238 247 {
239 248 debug_message "Function: validate_ldom - Begin"
240 249 ${SET_DEBUG}
241 250
242 251 typeset ncount=0
252 + typeset msgtext
243 253
244 254 # Make sure that the password file is readable.
245 - if [ ! -r "${PASSWORD_FILE}" ]
255 + if [[ ${MIGRATION_TYPE} != "NORMAL" ]]
246 256 then
247 - # SCMSGS
248 - # @explanation
249 - # Incorrect Password file specified.
250 - # @user_action
251 - # Ensure that a valid password file is specified.
252 - scds_syslog -p daemon.error -t $(syslog_tag) -m \
253 - "Invalid password file specified %s." \
254 - "${PASSWORD_FILE}"
257 + if [ -z "${PASSWORD_FILE}" ]
258 + then
259 + # SCMSGS
260 + # @explanation
261 + # Password file cannot be null.
262 + # @user_action
263 + # Ensure that a password file name is specified.
264 + scds_syslog -p daemon.error -t $(syslog_tag) -m \
265 + "Password file cannot be (null)."
255 266
256 - debug_message "Function: validate_ldom - End"
257 - return 1
267 + msgtext=$(gettext "Password file cannot be (null).")
268 + i18n_message "${msgtext}"
269 +
270 + debug_message "Function: validate_ldom - End"
271 + return 1
272 + fi
273 +
274 + if [[ ! -f "${PASSWORD_FILE}" ]] || [[ ! -r "${PASSWORD_FILE}" ]]
275 + then
276 + # SCMSGS
277 + # @explanation
278 + # Incorrect Password file specified.
279 + # @user_action
280 + # Ensure that a valid password file is specified.
281 + scds_syslog -p daemon.error -t $(syslog_tag) -m \
282 + "Invalid password file specified %s." \
283 + "${PASSWORD_FILE}"
284 +
285 + msgtext=$(gettext "Invalid password file specified %s.")
286 + i18n_message "${msgtext}" "${PASSWORD_FILE}"
287 +
288 + debug_message "Function: validate_ldom - End"
289 + return 1
290 + fi
258 291 fi
259 292
260 293 # Ensure that the control domain is a cluster node.
261 294 if ! ${LDM} ls > /dev/null 2>&1
262 295 then
263 296 # SCMSGS
264 297 # @explanation
265 298 # Self explanatory.
266 299 # @user_action
267 300 # Ensure that the resource is configured in
268 301 # control domain.
269 302 scds_syslog -p daemon.error -t $(syslog_tag) -m \
270 303 "The LDom Manager is running in configuration mode."
271 304
272 - debug_message "Function: validate_ldom - End"
305 + msgtext=$(gettext "The LDom Manager is running in configuration mode.")
306 + i18n_message "${msgtext}"
307 +
308 + debug_message "Function: validate_ldom - End"
273 309 return 1
274 310 fi
275 311
276 312 # Ensure that the failure-policy setting is set to "reset".
277 313 # If the control domain fails,this would allow the guest domains
278 314 # to panic.
279 315 policy=$(${LDM} list -o domain primary \
280 316 | ${AWK} -F"=" '$1~/failure-policy/ {print $2}')
281 317
282 318 if [ "${policy}" != "reset" ]
283 319 then
|
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
284 320 # SCMSGS
285 321 # @explanation
286 322 # Incorrect failure-policy setting for the domain.
287 323 # @user_action
288 324 # Ensure that the failure-policy for the domain is
289 325 # set to "reset" on the control domain.
290 326 scds_syslog -p daemon.error -t $(syslog_tag) -m \
291 327 "Invalid failure policy \"%s\" for %s domain." \
292 328 "${policy}" "primary"
293 329
330 + msgtext=$(gettext "Invalid failure policy \"%s\" for %s domain.")
331 + i18n_message "${msgtext}" "${policy}" "primary"
332 +
294 333 debug_message "Function: validate_ldom - End"
295 334 return 1
296 335 fi
297 336
298 337 # The CL_EXEC_CLIENT program executes a command on any of the
299 338 # cluster nodes or a zone or in a zone cluster. It then generates
300 339 # as output the exit status of command and the stdout and stderr
301 340 # messages. The valid options are:
302 341 # [ -z zoneclustername] The command is run on the zone cluster
303 342 # represented by the zonename.
304 343 # -C { TS | RT | FSS | FX } The scheduling class in which the
305 344 # command is to be run.
306 345 # -p pri Specifies the priority of the command in the given
307 346 # scheduling class.
308 347 # -n id[,id..] A comma seperated list of node ID's of a
309 348 # zone cluster or a node to run the command.
310 349 # -c cmd [Args] The command to be run along with its arguments.
311 350
|
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
312 351 for nodename in $(${SCHA_RESOURCEGROUP_GET} -O NODELIST -G ${RESOURCEGROUP})
313 352 do
314 353 if [[ "$(${SCHA_CLUSTER_GET} -O NodeState_Node ${nodename})" == "DOWN" ]]
315 354 then
316 355 continue
317 356 fi
318 357
319 358 nodeid=$(${SCHA_CLUSTER_GET} -O NODEID_NODENAME ${nodename})
320 359 output=$(${CL_EXEC_CLIENT} -n ${nodeid} -c "${LDM} list-domain ${DOMAIN}")
321 360 result=${?}
322 -
323 361 status=$(echo ${output} | ${AWK} '{print $6}')
324 362
325 363 if (( ${result} == 0 )) && (( ${status} == 0 ))
326 364 then
327 365 domstate=$(echo $output | ${AWK} -F" " '{print $18}')
328 366
329 367 if (( ${update} == 0)) && echo $domstate | ${GREP} -q -E "^active$|suspending|resuming|suspended|starting" > /dev/null 2>&1
330 368 then
331 369 # SCMSGS
332 370 # @explanation
333 371 # The domain is in an invalid state.
334 372 # @user_action
335 373 # Ensure that the domain is in inactive or bound state.
336 374 scds_syslog -p daemon.error -t $(syslog_tag) -m \
337 375 "Domain %s is in %s state on %s." \
338 376 "${DOMAIN}" "${domstate}" "${nodename}"
339 377
378 + msgtext=$(gettext "Domain %s is in %s state on %s.")
379 + i18n_message "${msgtext}" "${DOMAIN}" "${domstate}" "${nodename}"
380 +
340 381 debug_message "Function: validate_ldom - End"
341 382 return 1
342 383 fi
343 384
344 385 ncount=$((ncount+1))
345 386 nlist=$(echo ${nodename} ${nlist})
346 387
347 388 # dump domain confguration to ccr
348 389 if [[ "$(/usr/bin/hostname)" == "${nodename}" ]]
349 390 then
350 391 if ! dump_domain_config
351 392 then
352 393 debug_message "Function: validate_ldom - End"
353 394 return 1
354 395 fi
355 396 fi
|
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
356 397 fi
357 398 done
358 399
359 400 if (( ${ncount} == 0 ))
360 401 then
361 402 if ! ${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} > /dev/null 2>&1
362 403 then
363 404 scds_syslog -p daemon.error -t $(syslog_tag) -m \
364 405 "Domain %s does not exist." \
365 406 "${DOMAIN}"
407 +
408 + msgtext=$(gettext "Domain %s does not exist.")
409 + i18n_message "${msgtext}" "${DOMAIN}"
410 +
366 411 return 1
367 412 fi
368 413 fi
369 414
370 415 if [[ ${ncount} -gt 1 ]]
371 416 then
372 417 # SCMSGS
373 418 # @explanation
374 419 # The domain is configured on multiple
375 420 # cluster nodes.
376 421 # @user_action
377 422 # Ensure that the domain is configured on one node
378 423 # of the cluster.
379 424 scds_syslog -p daemon.error -t $(syslog_tag) -m \
380 425 "Multiple domain %s configuration exists on %s." \
381 426 "${DOMAIN}" "${nlist}"
427 +
428 + msgtext=$(gettext "Multiple domain %s configuration exists on %s.")
429 + i18n_message "${msgtext}" "${DOMAIN}" "${nlist}"
430 +
382 431 return 1
383 432 fi
384 433
385 434 debug_message "Function: validate_ldom - End"
386 435 return 0
387 436 }
388 437
389 438 validate()
390 439 {
391 440 debug_message "Function: validate - Begin"
392 441 ${SET_DEBUG}
393 442
394 443 typeset rc
395 444
396 445 # Make sure that the plugin probe specified is readable.
397 446 if [[ -n "${PLUGIN_PROBE}" ]]
398 447 then
399 448 if [ -f "${PLUGIN_PROBE}" ] && [ ! -r "${PLUGIN_PROBE}" ]
|
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
400 449 then
401 450 # SCMSGS
402 451 # @explanation
403 452 # Incorrect user probe file specified.
404 453 # @user_action
405 454 # Ensure that a valid user probe file is specified.
406 455 scds_syslog -p daemon.error -t $(syslog_tag) -m \
407 456 "Invalid user probe file %s." \
408 457 "${PLUGIN_PROBE}"
409 458
459 + msgtext=$(gettext "Invalid user probe file %s.")
460 + i18n_message "${msgtext}" "${PLUGIN_PROBE}"
461 +
410 462 return 1
411 463 fi
412 464 fi
413 465
414 466 validate_${VM}
415 467 rc=${?}
416 468
417 469 debug_message "Function: validate - End"
418 470 return ${rc}
419 471 }
420 472
421 473 #
422 474 # get the domain status
423 475 #
424 476 get_xvm_status()
425 477 {
426 478 debug_message "Function: get_xvm_status - Begin"
427 479 ${SET_DEBUG}
428 480
429 481 typeset rc
430 482
431 483 ${VIRSH} domstate ${DOMAIN}
432 484 rc=${?}
433 485
434 486 debug_message "Function: get_xvm_status - End"
435 487 return ${rc}
436 488 }
437 489
438 490 get_ldom_status()
439 491 {
440 492 debug_message "Function: get_ldom_status - Begin"
441 493 ${SET_DEBUG}
442 494
443 495 typeset rc=1
444 496
445 497 OUTPUT=$(${LDM} list-domain ${DOMAIN})
446 498
447 499 if (( ${?} == 0 ))
448 500 then
449 501 echo ${OUTPUT} | ${AWK} '{print $10}'
450 502 rc=${?}
451 503 fi
452 504
453 505 debug_message "Function: get_ldom_status - End"
454 506 return ${rc}
455 507 }
456 508
457 509 #
458 510 # Routines to create the domain on the current cluster node.
459 511 #
460 512 add_xvm_domain()
461 513 {
462 514 debug_message "Function: add_xvm_domain - Begin"
463 515 ${SET_DEBUG}
464 516
465 517 typeset rc=0
466 518
467 519 if ! ${VIRSH} define ${TMP_DIR}/${RESOURCE}.xml >> $LOGFILE 2>&1
468 520 then
469 521 # SCMSGS
470 522 # @explanation
471 523 # Defining the domain using an XML file failed.
472 524 # @user_action
473 525 # The command /usr/bin/virsh define failed to define the domain.
474 526 # Determine if you have specified the correct domain name while
475 527 # registering the resource.
476 528 scds_syslog -p daemon.error -t $(syslog_tag) -m \
477 529 "Failed to define %s using %s/%s.xml." \
478 530 "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}"
479 531 rc=1
480 532 fi
481 533
482 534 debug_message "Function: add_xvm_domain - End"
483 535 return ${rc}
484 536 }
485 537
486 538 add_ldom_domain()
487 539 {
488 540 debug_message "Function: add_ldom_domain - Begin"
489 541 ${SET_DEBUG}
490 542
491 543 typeset rc=0
492 544
493 545 if ! ${LDM} add-domain -i ${TMP_DIR}/${RESOURCE}.xml ${DOMAIN} >> $LOGFILE 2>&1
494 546 then
495 547 # SCMSGS
496 548 # @explanation
497 549 # Defining the domain using an XML file failed.
498 550 # @user_action
499 551 # The command /opt/SUNWldm/bin/ldm "add-domain"
500 552 # failed to define the domain. Determine if you
501 553 # have specified the correct domain name when
502 554 # registering the resource.
503 555 scds_syslog -p daemon.error -t $(syslog_tag) -m \
504 556 "Failed to add the domain %s using %s/%s.xml." \
505 557 "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}"
506 558 rc=1
507 559 fi
508 560
509 561 debug_message "Function: add_ldom_domain - End"
510 562 return ${rc}
511 563 }
512 564
513 565 #
514 566 # test if domain is active
515 567 #
516 568 is_xvm_up()
517 569 {
518 570 debug_message "Function: is_xvm_up - Begin"
519 571 ${SET_DEBUG}
520 572
521 573 typeset rc=0
522 574
523 575 echo $(${VIRSH} domstate ${DOMAIN}) | \
524 576 ${GREP} -q -E "running|blocked|paused|in shutdown" > /dev/null 2>&1
525 577 rc=${?}
526 578
527 579 debug_message "Function: is_xvm_up - End"
528 580 return ${rc}
529 581 }
530 582
531 583 is_ldom_up()
532 584 {
533 585 debug_message "Function: is_ldom_up - Begin"
534 586 ${SET_DEBUG}
535 587
536 588 typeset rc=0
537 589
538 590 get_ldom_status | ${GREP} -q -E "^active$|^starting$" > /dev/null 2>&1
539 591 rc=${?}
540 592
541 593 debug_message "Function: is_ldom_up - End"
542 594 return ${rc}
543 595 }
544 596
545 597 #
546 598 # wrapper routines to start xvm or ldom domains
547 599 #
548 600 start_xvm()
549 601 {
550 602 debug_message "Function: start_xvm - Begin"
551 603 ${SET_DEBUG}
552 604
553 605 typeset rc=0
554 606
555 607 ${VIRSH} start ${DOMAIN} >> $LOGFILE 2>&1
556 608 rc=${?}
557 609
558 610 debug_message "Function: start_xvm - End"
559 611 return ${rc}
560 612 }
561 613
562 614 #
563 615 # After a crash/reboot of the node, the domain
564 616 # would be started and there would be multiple
565 617 # instances of the same domain across cluster
566 618 # nodes. Hence the domain is destroyed.
567 619 #
568 620 init_ldom()
569 621 {
570 622 debug_message "Function: init_ldom - Begin"
571 623 ${SET_DEBUG}
572 624
573 625 typeset rc
574 626
575 627 MAX_STOP_TIMEOUT=$(${SCHA_RESOURCE_GET} -O INIT_TIMEOUT \
576 628 -R ${RESOURCE} -G ${RESOURCEGROUP} )
577 629
578 630 domain_shutdown
579 631 rc=${?}
580 632
581 633 debug_message "Function: init_ldom - End"
582 634 return ${rc}
583 635 }
584 636
585 637 start_ldom()
586 638 {
587 639 debug_message "Function: start_ldom - Begin"
588 640 ${SET_DEBUG}
589 641
590 642 typeset rc=0
591 643
592 644 if get_${VM}_status | ${GREP} -q -E "^inactive$" > /dev/null 2>&1
593 645 then
594 646 if ${LDM} bind-domain ${DOMAIN} >> $LOGFILE 2>&1
595 647 then
596 648 # SCMSGS
597 649 # @explanation
598 650 # The domain was bound.
599 651 # @user_action
600 652 # None required. The domain has been bound on this node.
601 653 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
602 654 "Domain %s is bound." \
603 655 "${DOMAIN}"
604 656 rc=0
605 657 else
606 658 # SCMSGS
607 659 # @explanation
608 660 # The /opt/SUNWldm/bin/ldm bind-domain command failed.
609 661 # @user_action
610 662 # Determine why it was not possible to bind the domain.
611 663 scds_syslog -p daemon.error -t $(syslog_tag) -m \
612 664 "Failed to bind %s." \
613 665 "${DOMAIN}"
614 666 rc=1
615 667 fi
616 668
617 669 fi
618 670
619 671 #
620 672 # The domain is made to sit at the OBP prompt, so a reboot/crash
621 673 # wouldn't boot the Guest domain OS.
622 674 #
623 675 if (( ${rc} == 0 )) && ${LDM} set-var auto-boot?=true ${DOMAIN} >> $LOGFILE 2>&1
624 676 then
625 677 if ${LDM} start-domain ${DOMAIN} >> $LOGFILE 2>&1
626 678 then
627 679 while [ 1 ]
628 680 do
629 681 flag=$(${LDM} list-domain -p ${DOMAIN} | ${GREP} ${DOMAIN} \
630 682 | ${AWK} -F"|" '{print $4}'| ${AWK} -F"=" '{print $2}')
631 683 [[ "${flag}" == "-n----" ]] && break
632 684 ${SLEEP} 1
633 685 done
634 686 else
635 687 rc=1
636 688 fi
637 689 ${LDM} set-var auto-boot?=false ${DOMAIN} >> $LOGFILE 2>&1 || rc=1
638 690 else
639 691 rc=1
640 692 fi
641 693
642 694 debug_message "Function: start_ldom - End"
643 695 return ${rc}
644 696 }
645 697
646 698 start_domain()
647 699 {
648 700 debug_message "Function: start_domain - Begin"
649 701 ${SET_DEBUG}
650 702
651 703 typeset rc=0
652 704
653 705 # Turn off PMF restart. Starting a domain does not leave
654 706 # a running pid as in a classic Solaris Cluster agent.
655 707
656 708 START_TIMEOUT=$(${SCHA_RESOURCE_GET} -O START_TIMEOUT \
657 709 -R ${RESOURCE} -G ${RESOURCEGROUP} )
658 710
659 711 ${SLEEP} ${START_TIMEOUT} &
660 712 /usr/cluster/bin/pmfadm -s ${RESOURCEGROUP},${RESOURCE},0.svc
661 713
662 714 # Check if the domain exists.
663 715 #
664 716 # If the domain does not exist, we maybe starting the domain
665 717 # on a new cluster node following a failover. As such we will
666 718 # define the domain using the previously dumped XML file
667 719 # located within the agent's administrative file system.
668 720 #
669 721 # If the domain already exists, either the domain was manually
670 722 # started or the domain was migrated or live migrated from
671 723 # another cluster node. Therefore, we will use the already
672 724 # defined domain.
673 725 #
674 726 # Note that when the domain is successfully stopped the domain
675 727 # is deleted. We do this simply to avoid the domain from
676 728 # being manually started on multiple cluster nodes. See
677 729 # domain_delete() for more information.
678 730
679 731 if get_${VM}_status > /dev/null 2>&1
680 732 then
681 733 debug_message "Validate - domain ${DOMAIN} exists"
682 734 else
683 735 if ${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} > ${TMP_DIR}/${RESOURCE}.xml 2> /dev/null
684 736 then
685 737 # add the domain to the cluster node
686 738 if add_${VM}_domain ${DOMAIN} ${TMP_DIR}/${RESOURCE}.xml
687 739 then
688 740 # SCMSGS
689 741 # @explanation
690 742 # The domain is being defined using a XML file.
691 743 # @user_action
692 744 # None, the domain is being defined using a previously defined
693 745 # XML file when the domain was last successfully started.
694 746 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
695 747 "Domain %s defined using %s/%s.xml." \
696 748 "${DOMAIN}" "${TMP_DIR}" "${RESOURCE}"
697 749 else
698 750 # error already logged.
699 751 debug_message "Function: start_domain - End"
700 752 return 1
701 753 fi
702 754 else
703 755 # SCMSGS
704 756 # @explanation
705 757 # The domain does not exist.
706 758 # @user_action
707 759 # You must ensure that the domain exists.
708 760 scds_syslog -p daemon.error -t $(syslog_tag) -m \
709 761 "Domain %s does not exist." \
710 762 "${DOMAIN}"
711 763
712 764 debug_message "Function: start_domain - End"
713 765 return 1
714 766 fi
715 767 fi
716 768
717 769 # Tolerate a manually started domain and a NO-OP start
718 770 # otherwise start the domain.
719 771
720 772 if ${CCRADM} showkey --key noop_${RESOURCE} ${CCR_TABLE} > /dev/null 2>&1
721 773 then
722 774 # SCMSGS
723 775 # @explanation
724 776 # The domain was migrated or live migrated.
725 777 # @user_action
726 778 # None required. Informational message.
727 779 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
728 780 "NO-OP START being performed."
729 781
730 782 if ! ${CCRADM} delkey --key noop_${RESOURCE} ${CCR_TABLE} >> $LOGFILE 2>&1
731 783 then
732 784 # SCMSGS
733 785 # @explanation
734 786 # Failed to delete the NO-OP flag from CCR.
735 787 # @user_action
736 788 # Check the syslog for further messages.
737 789 # Determine why the NO-OP flag was not added to the CCR.
738 790 scds_syslog -p daemon.error -t $(syslog_tag) -m \
739 791 "Failed to delete NO-OP flag for %s domain." \
740 792 "${DOMAIN}"
741 793
742 794 debug_message "Function: start_domain - End"
743 795 return 1
744 796 else
745 797 debug_message "start_domain - noop_${RESOURCE} deleted"
746 798 fi
747 799
748 800 elif is_${VM}_up
749 801 then
750 802 # SCMSGS
751 803 # @explanation
752 804 # The domain was manually started.
753 805 # @user_action
754 806 # None required. Informational message.
755 807 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
756 808 "Domain %s was manually started." \
757 809 "${DOMAIN}"
758 810 else
759 811 if start_${VM}
760 812 then
761 813 # SCMSGS
762 814 # @explanation
763 815 # The domain was started successfully.
764 816 # @user_action
765 817 # None required. Informational message.
766 818 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
767 819 "Domain %s started." \
768 820 "${DOMAIN}"
769 821 else
770 822 # SCMSGS
771 823 # @explanation
772 824 # The domain failed to start.
773 825 # @user_action
774 826 # Check the syslog for further messages. If possible
775 827 # the cluster will attempt to restart the domain.
776 828 scds_syslog -p daemon.error -t $(syslog_tag) -m \
777 829 "Domain %s failed to start." \
778 830 "${DOMAIN}"
779 831
780 832 rc=1
781 833 fi
782 834 fi
783 835
784 836 if (( ${rc} == 0 ))
785 837 then
786 838 # Dump the domain configuration into an XML file. This file is then
787 839 # used on another cluster node to define the domain but only if the
788 840 # domain does not exist.
789 841
790 842 dump_domain_config
791 843 rc=${?}
792 844 fi
793 845
794 846 debug_message "Function: start_domain - End"
795 847 return ${rc}
796 848 }
797 849
798 850 #
799 851 # dump the domain configuration
800 852 #
801 853 dump_xvm_xml()
802 854 {
803 855 debug_message "Function: dump_xvm_xml - Begin"
804 856 ${SET_DEBUG}
805 857
806 858 typeset rc=0
807 859
808 860 if ! ${VIRSH} dumpxml ${DOMAIN} 2>> $LOGFILE
809 861 then
810 862 # SCMSGS
811 863 # @explanation
812 864 # "/usr/bin/virsh dumpxml" for domain failed.
813 865 # @user_action
814 866 # Determine why the command to dump domain
815 867 # configuration failed.
816 868 scds_syslog -p daemon.error -t $(syslog_tag) -m \
817 869 "%s dumpxml for domain %s failed." \
818 870 "${VIRSH}" "${DOMAIN}"
819 871 rc=${?}
820 872 fi
821 873
822 874 debug_message "Function: dump_xvm_xml - End"
823 875 return ${rc}
824 876 }
825 877
826 878 dump_ldom_xml()
827 879 {
828 880 debug_message "Function: dump_ldom_xml - Begin"
829 881 ${SET_DEBUG}
830 882
831 883 typeset rc=0
832 884
833 885 if ! ${LDM} list-constraints -x ${DOMAIN} 2>> $LOGFILE
834 886 then
835 887 # SCMSGS
836 888 # @explanation
837 889 # "/opt/SUNWldm/bin/ldm list-constraints -x"
838 890 # for domain failed.
839 891 # @user_action
840 892 # Determine why the command to list the
841 893 # domain constraints failed.
842 894 scds_syslog -p daemon.error -t $(syslog_tag) -m \
843 895 "%s list-constraints for domain %s failed." \
844 896 "${LDM}" "${DOMAIN}"
845 897 rc=1
846 898 fi
847 899
848 900 debug_message "Function: dump_ldom_xml - End"
849 901 return ${rc}
850 902 }
851 903
852 904 #
853 905 # save the domain configuration in the cluster
854 906 # configuration repository
855 907 #
856 908 dump_domain_config()
857 909 {
858 910 debug_message "Function: dump_domain_config - Begin"
859 911 ${SET_DEBUG}
860 912
861 913 typeset rc=0
862 914
863 915 # Dump the domain configuration into an XML file. The domain configuration
864 916 # can be changed, when under the the agent control.
865 917
866 918 olddesc=$(${CCRADM} showkey --key xml_${RESOURCE} ${CCR_TABLE} 2> /dev/null)
867 919
868 920 if (( ${?} == 1 ))
869 921 then
870 922 #
871 923 # The ccr table might not exist.
872 924 # create the CCR table, if it doesn't exist.
873 925 #
874 926 if ${CCRADM} addtab ${CCR_TABLE} >> $LOGFILE 2>&1
875 927 then
876 928 debug_message "created ccr table ${CCR_TABLE}"
877 929 else
878 930 # SCMSGS
879 931 # @explanation
880 932 # Failed to create the CCR table.
881 933 # @user_action
882 934 # Check the syslog for further messages.
883 935 # Determine why the CCR create failed.
884 936 scds_syslog -p daemon.error -t $(syslog_tag) -m \
885 937 "Failed to create CCR table %s." \
886 938 "${CCR_TABLE}"
887 939
888 940 return 1
889 941 fi
890 942 fi
891 943
892 944 output=$(dump_${VM}_xml)
893 945 if (( ${?} == 0 )) && [[ -n "${output}" ]]
894 946 then
895 947 newdesc=$(echo ${output} | ${TR} -s '\n' '[ ]')
896 948 if [ "${olddesc}" != "${newdesc}" ]
897 949 then
898 950 if ! ${CCRADM} addkey --key=xml_${RESOURCE} --value "${newdesc}" ${CCR_TABLE} > /dev/null 2>&1
899 951 then
900 952 if ! ${CCRADM} changekey --key=xml_${RESOURCE} --value "${newdesc}" ${CCR_TABLE} >> $LOGFILE 2>&1
901 953 then
902 954 # SCMSGS
903 955 # @explanation
904 956 # Failed to update the XMl dump to the CCR.
905 957 # @user_action
906 958 # Check the syslog for further messages.
907 959 # Determine why the ccr update failed.
908 960 scds_syslog -p daemon.error -t $(syslog_tag) -m \
909 961 "Failed to update domain XML %s to ccr." \
910 962 "${DOMAIN}"
911 963
912 964 rc=1
913 965 fi
914 966 else
915 967 debug_message "dump_domain_config - %s configuration added to CCR" "${DOMAIN}"
916 968 fi
917 969 fi
918 970 else
919 971 # error already logged.
920 972 rc=1
921 973 fi
922 974
923 975 debug_message "Function: dump_domain_config - End"
924 976
925 977 return ${rc}
926 978 }
927 979
928 980 #
929 981 # probe function for domain data service
930 982 #
931 983 check_domain()
932 984 {
933 985 debug_message "Function: check_domain - Begin"
934 986 ${SET_DEBUG}
935 987
936 988 typeset rc
937 989 SECONDS=0
938 990
939 991 if ${PGREP} -f "control_xvm start -R ${RESOURCE} " >/dev/null 2>&1
940 992 then
941 993 debug_message "Function: check_domain - start program is still running "
942 994 rc=100
943 995 else
944 996 domstate=$(get_${VM}_status 2>/dev/null)
945 997
946 998 case "${domstate}" in
947 999
948 1000 # Acceptable run states
949 1001 "running"|"blocked"|"paused"|"in shutdown"| \
950 1002 "active"|"suspending"|"resuming"|"suspended"|"starting")
951 1003
952 1004 if [ "${#PLUGIN_PROBE}" -ne 0 ]
953 1005 then
954 1006 if [ -x "$(echo ${PLUGIN_PROBE} | ${AWK} '{print $1}')" ]
955 1007 then
956 1008 PROBE_TIMEOUT=$(${SCHA_RESOURCE_GET} -O Extension -R ${RESOURCE} -G ${RESOURCEGROUP} Probe_timeout|tail -1)
957 1009 # Run the supplied probe with only 90% of PROBE_TIMEOUT. Also note that this
958 1010 # is supplied as a parameter to the PLUGIN_PROBE.
959 1011
960 1012 HATIMERUN_TIMEOUT=$((PROBE_TIMEOUT*90/100-${SECONDS}))
961 1013
962 1014 output=$(${HATIMERUN} -t ${HATIMERUN_TIMEOUT} -k 9 ${PLUGIN_PROBE} ${HATIMERUN_TIMEOUT})
963 1015 rc=${?}
964 1016
965 1017 case ${rc} in
966 1018 0) debug_message "check_domain - ${DOMAIN} ${output}"
967 1019 rc=0
968 1020 ;;
969 1021 99)
970 1022 # SCMSGS
971 1023 # @explanation
972 1024 # The domain probe timed out.
973 1025 # @user_action
974 1026 # Ensure that ${PLUGIN_PROBE} can complete within
975 1027 # 90% of PROBE_TIMEOUT.
976 1028 scds_syslog -p daemon.error -t $(syslog_tag) -m \
977 1029 "%s did not complete within %s seconds." \
978 1030 "${PLUGIN_PROBE}" "${HATIMERUN_TIMEOUT}"
979 1031
980 1032 rc=100
981 1033 ;;
982 1034 100) if ${PGREP} -f "gds_svc_start .*-R ${RESOURCE} " >/dev/null 2>&1
983 1035 then
984 1036 debug_message "check_domain - ${DOMAIN} is still starting"
985 1037 rc=100
986 1038 elif ${PGREP} -f "gds_svc_stop .*-R ${RESOURCE} " >/dev/null 2>&1
987 1039 then
988 1040 debug_message "check_domain - ${DOMAIN} is stopping"
989 1041 rc=100
990 1042 else
991 1043 # SCMSGS
992 1044 # @explanation
993 1045 # The domain probe has requested a domain restart.
994 1046 # @user_action
995 1047 # None. A domain restart will be attempted.
996 1048 scds_syslog -p daemon.error -t $(syslog_tag) -m \
997 1049 "% has requested a domain restart %s." \
998 1050 "${PLUGIN_PROBE}" "${output}"
999 1051
1000 1052 rc=100
1001 1053 fi
1002 1054 ;;
1003 1055 201) if ${PGREP} -f "gds_svc_start .*-R ${RESOURCE} " >/dev/null 2>&1
1004 1056 then
1005 1057 debug_message "check_domain - ${DOMAIN} is still starting"
1006 1058 rc=100
1007 1059 elif ${PGREP} -f "gds_svc_stop .*-R ${RESOURCE} " >/dev/null 2>&1
1008 1060 then
1009 1061 debug_message "check_domain - ${DOMAIN} is stopping"
1010 1062 rc=100
1011 1063 else
1012 1064 # SCMSGS
1013 1065 # @explanation
1014 1066 # The domain has requested an immediate failover.
1015 1067 # @user_action
1016 1068 # None. The domain will be immediately failed over.
1017 1069 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1018 1070 "%s has requested an immediate failover." \
1019 1071 "${PLUGIN_PROBE}"
1020 1072
1021 1073 rc=201
1022 1074 fi
1023 1075 ;;
1024 1076 *)
1025 1077 # SCMSGS
1026 1078 # @explanation
1027 1079 # ${PLUGIN_PROBE} did not return 0, 100 or 201.
1028 1080 # @user_action
1029 1081 # None. A domain restart will be attempted.
1030 1082 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1031 1083 "%s did not return 0, 100 or 201, a domain restart will be attempted." \
1032 1084 "${PLUGIN_PROBE}"
1033 1085 rc=100
1034 1086 ;;
1035 1087 esac
1036 1088 else
1037 1089 # SCMSGS
1038 1090 # @explanation
1039 1091 # ${PLUGIN_PROBE} does not exist or is not executable.
1040 1092 # @user_action
1041 1093 # Check the pathname exists and that ${PLUGIN_PROBE} is executable.
1042 1094 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1043 1095 "%s non-existent executable." \
1044 1096 "${PLUGIN_PROBE}"
1045 1097
1046 1098 rc=0
1047 1099 fi
1048 1100 else
1049 1101 rc=0
1050 1102 fi
1051 1103
1052 1104 ;;
1053 1105
1054 1106 # Restartable run states
1055 1107
1056 1108 "shut off"|"crashed"| \
1057 1109 "inactive"|"stopping")
1058 1110
1059 1111 rc=100
1060 1112 ;;
1061 1113
1062 1114 # Unknown run states
1063 1115
1064 1116 *)
1065 1117 rc=100
1066 1118 ;;
1067 1119 esac
1068 1120
1069 1121 debug_message "check_domain - ${DOMAIN} ${domstate}"
1070 1122
1071 1123 fi
1072 1124
1073 1125 debug_message "Function: check_domain - End"
1074 1126 return ${rc}
1075 1127 }
1076 1128
1077 1129 stop_domain()
1078 1130 {
1079 1131 debug_message "Function: stop_domain - Begin"
1080 1132 ${SET_DEBUG}
1081 1133
1082 1134 typeset rc=0
1083 1135
1084 1136 STOP_TIMEOUT=$(${SCHA_RESOURCE_GET} -O STOP_TIMEOUT \
1085 1137 -R ${RESOURCE} -G ${RESOURCEGROUP} )
1086 1138
1087 1139 # Note that GDS will attempt to cleanup after 80% of STOP_TIMEOUT
1088 1140 # has been consumed. In this regard, we only allocate a combined
1089 1141 # 75% of STOP_TIMEOUT to MAX_MIGRATE_TIMEOUT and MAX_STOP_TIMEOUT.
1090 1142 #
1091 1143 # This leaves 5% for domain_destroy() which maybe called if
1092 1144 # domain_shutdown() exeecds it's timeout and finally domain_delete().
1093 1145
1094 1146 MAX_MIGRATE_TIMEOUT=$((STOP_TIMEOUT*25/100))
1095 1147 MAX_STOP_TIMEOUT=$((STOP_TIMEOUT*50/100))
1096 1148 SECONDS=0
1097 1149
1098 1150 # Save the domain configuration changes.
1099 1151 if ! dump_domain_config
1100 1152 then
1101 1153 debug_message "Function: stop_domain - End"
1102 1154 return 1
1103 1155 fi
1104 1156
1105 1157 # At resource creation, the administrator can determine the Migration_type.
1106 1158 # Valid values for Migration_type are
1107 1159 #
1108 1160 # Migration_type="normal"
1109 1161 # o Stop the resource (shutdown the domain)
1110 1162 # o Failover the resource group from the source node to the target node
1111 1163 # o Start the resource (start the domain)
1112 1164 #
1113 1165 # Migration_type="migrate"
1114 1166 # o Suspend the domain on the source node
1115 1167 # o Copy the domain's memory pages from the source node to the target node
1116 1168 # o Resume the domain on the target node
1117 1169 #
1118 1170 # Migration_type="migrate_live"
1119 1171 # o Iteratively copy the domain's memory pages from the source node to the taregt node
1120 1172 # o When pre-copy is no longer benefical, suspend the domain on the source node
1121 1173 # o Copy the domain's remaning "dirty" pages from the source node to the taregt node
1122 1174 # o Resume the domain on the target node
1123 1175 #
1124 1176 # Note that migration or live migration is performed over the cluster interconnect.
1125 1177 #
1126 1178 # For migration or live migration to be attempted across Solaris Cluster xVM nodes
1127 1179 # the following conditions must be met.
1128 1180 #
1129 1181 # - The target Solaris Cluster xVM node must be running the same xVM version.
1130 1182 #
1131 1183 # - The migration TCP port must be open and accepting connections from the source
1132 1184 # Solaris Cluster xVM node.
1133 1185 #
1134 1186 # - There must be sufficient resources for the domain to run in.
1135 1187 #
1136 1188 # - If the conditions are met and migration or live migration is successful a NO-OP
1137 1189 # STOP and START is performed. This will ensure a successful STOP and START to the
1138 1190 # appropriate RGM callback methods. Furthermore, doing a NO-OP RGM failover will
1139 1191 # ensure that RGM subsequently actions any dependencies and that Solaris Cluster
1140 1192 # reflects the correct state and status of resource groups and resources.
1141 1193 #
1142 1194 # - If the conditions are met but migration or live migration is not successful a
1143 1195 # normal failover will be performed.
1144 1196 #
1145 1197 # - If the conditions are not met, migration or live migration will fail and a normal
1146 1198 # failover will be performed.
1147 1199 #
1148 1200 # However, before attempting a migration or live migration we need to determine if the
1149 1201 # resource is being disabled. To distinguish if the resource is being disabled we
1150 1202 # test the ON_OFF_SWITCH property of the resource.
1151 1203 #
1152 1204 # If the resource is being disabled the ON_OFF_SWITCH will be DISABLED before the STOP
1153 1205 # method is called. So, conversely if the ON_OFF_SWITCH is ENABLED the resource is not
1154 1206 # being disabled and instead the resource group is undergoing either a switch to
1155 1207 # another node or is being evacuated from the node.
1156 1208 #
1157 1209 # - If the resource is being disabled we perform a normal shutdown, regardless of the
1158 1210 # Migration_type setting.
1159 1211
1160 1212 ON_OFF_SWITCH=$(${SCHA_RESOURCE_GET} -O ON_OFF_SWITCH -R ${RESOURCE} -G ${RESOURCEGROUP})
1161 1213
1162 1214 debug_message "stop_domain - ON_OFF_SWITCH=${ON_OFF_SWITCH}"
1163 1215 debug_message "stop_domain - MIGRATION_TYPE=${MIGRATION_TYPE}"
1164 1216
1165 1217 if [[ "${ON_OFF_SWITCH}" = "DISABLED" ]]
1166 1218 then
1167 1219 domain_shutdown
1168 1220 else
1169 1221 case "${MIGRATION_TYPE}" in
1170 1222 NORMAL) domain_shutdown
1171 1223 rc=${?}
1172 1224 ;;
1173 1225 MIGRATE*) if ! domain_migrate
1174 1226 then
1175 1227 domain_shutdown
1176 1228 fi
1177 1229 rc=${?}
1178 1230 ;;
1179 1231 *)
1180 1232 # SCMSGS
1181 1233 # @explanation
1182 1234 # Invalid Migration_type specified.
1183 1235 # @user_action
1184 1236 # Delete and reregister the resource with
1185 1237 # a valid Migration_type entry.
1186 1238 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1187 1239 "Invalid Migration_type=%s." \
1188 1240 "${MIGRATION_TYPE}"
1189 1241 rc=1
1190 1242 ;;
1191 1243 esac
1192 1244 fi
1193 1245
1194 1246 debug_message "Function: stop_domain - End"
1195 1247 return ${rc}
1196 1248 }
1197 1249
1198 1250 get_target_host()
1199 1251 {
1200 1252 debug_message "Function: get_target_host - Begin"
1201 1253 ${SET_DEBUG}
1202 1254
1203 1255 typeset rc=1
1204 1256
1205 1257 # Here, we need to determine the target host as the resource group is either being
1206 1258 # switched or the node, where the resoure group is online, is being evacuated.
1207 1259 #
1208 1260 # To determine the target host for a resource group switch we rely on the cluster
1209 1261 # command log file /var/cluster/logs/commandlog to supply the target host. We need to
1210 1262 # obtain the correct entry from the command log file and match against the following
1211 1263 #
1212 1264 # <date> + ${RESOURCEGROUP} + "START" + "switch"
1213 1265 #
1214 1266 # after which we only save the nodename from a clrg or scswitch command.
1215 1267 #
1216 1268 # Sample /var/cluster/log/commandlog output is as follows,
1217 1269 #
1218 1270 # 02/07/2008 08:45:13 pelko1 10548 root START - scswitch -z -g "xvm2-rg" -h "pelko2"
1219 1271 # 02/07/2008 08:45:38 pelko1 10548 root END 0
1220 1272 # 02/07/2008 09:01:35 pelko1 10874 root START - clrg "switch" -n "pelko1" "xvm2-rg"
1221 1273 # 02/07/2008 09:01:36 pelko1 10874 root END -20827641
1222 1274 #
1223 1275 # If we are unable to match an entry, as perhaps the entry was logged at <date>
1224 1276 # and we are checking at <date> + 1 second, i.e. we are checking just as the second
1225 1277 # entry is incrementing to the next second, we perform another check. In fact the
1226 1278 # last 10 seconds are checked from the commandlog.
1227 1279 #
1228 1280 # Once we have matched an entry from /var/cluster/logs/commandlog, we verify that
1229 1281 # the target host is a valid nodelist entry for the resource group.
1230 1282 #
1231 1283 # - If we have a valid nodelist entry we then determine that target host's cluster
1232 1284 # interconnect hostname to perform the migration or live migration.
1233 1285 #
1234 1286 # - If we are unable to find a match for a switch, we need to consider that an evacuate
1235 1287 # node is being performed. However, if the node is being evacuated we will rely on
1236 1288 # RGM to dertermine the nodename regardless if a mirgation or live migration was
1237 1289 # requested. Subsequently, we perform a normal failover. This ensures that we do not
1238 1290 # migrate or live migrate the domain to a node that maybe different to the node
1239 1291 # selected by RGM.
1240 1292 #
1241 1293 # So, suffice to say that if a "switch" match is not found, following the discovery
1242 1294 # that the resource is not just being disabled, and that a migrate or live migrate
1243 1295 # was defined, we will always perform a normal failover.
1244 1296 #
1245 1297 # Note that the target host match is performed within check_commandlog().
1246 1298
1247 1299 check_commandlog
1248 1300
1249 1301 debug_message "get_target_host - ${TARGET_HOST} size=${#TARGET_HOST}"
1250 1302
1251 1303 if [ "${#TARGET_HOST}" -eq 0 ]
1252 1304 then
1253 1305 # SCMSGS
1254 1306 # @explanation
1255 1307 # A target host was not found
1256 1308 # @user_action
1257 1309 # None required. The domain will not be migrated or live
1258 1310 # migrated instead a normal failover will be performed.
1259 1311 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1260 1312 "Target host not found, normal failover will be performed."
1261 1313
1262 1314 elif [ ${TARGET_HOST} = "$(/usr/bin/uname -n)" ] || [ $(echo ${TARGET_HOST} | /usr/bin/grep [0-9]:global) ]
1263 1315 then
1264 1316 # SCMSGS
1265 1317 # @explanation
1266 1318 # The node is being evecuated.
1267 1319 # @user_action
1268 1320 # None required. The domain will not be migrated or live
1269 1321 # migrated. Instead, a normal failover will be performed.
1270 1322 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1271 1323 "Node is being evacuated, normal failover will be performed."
1272 1324
1273 1325 else
1274 1326 for i in $(${SCHA_RESOURCEGROUP_GET} -O NODELIST -G ${RESOURCEGROUP})
1275 1327 do
1276 1328 [[ "${i}" != "$(uname -n)" || "${i}" = "${TARGET_HOST}" ]] && rc=0 && break
1277 1329 done
1278 1330
1279 1331 if [ "${rc}" -eq 0 ]
1280 1332 then
1281 1333 PRIVATELINK_TARGET_HOST=$(${SCHA_CLUSTER_GET} -O PRIVATELINK_HOSTNAME_NODE ${TARGET_HOST})
1282 1334 debug_message "get_target_host - PRIVATELINK_TARGET_HOST=${PRIVATELINK_TARGET_HOST}"
1283 1335 else
1284 1336 # SCMSGS
1285 1337 # @explanation
1286 1338 # The target host found in the command log file is not
1287 1339 # a valid entry within the resource groups nodelist.
1288 1340 # @user_action
1289 1341 # None required. The domain will not be migrated or live
1290 1342 # migrated instead a normal failover will be performed.
1291 1343 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1292 1344 "Target host %s not matched with the resource group nodelist, normal failover will be performed." \
1293 1345 "${TARGET_HOST}"
1294 1346 fi
1295 1347 fi
1296 1348
1297 1349 debug_message "Function: get_target_host - End"
1298 1350 return ${rc}
1299 1351 }
1300 1352
1301 1353 check_commandlog()
1302 1354 {
1303 1355 debug_message "Function: check_commandlog - Begin"
1304 1356
1305 1357 # Get the current epoch time
1306 1358 typeset ETIME=$(/usr/bin/perl -e 'print time;')
1307 1359 typeset DATE=$(/usr/bin/date '+%m/%d/%Y')
1308 1360 i=10
1309 1361
1310 1362 while (( $i > 0 ))
1311 1363 do
1312 1364 # Iteratively search the commandlog for a switch or evacuate, going back in time
1313 1365 # by one second each time. If a match is found we break out of the loop.
1314 1366 #
1315 1367 # The following may help to understand the iterative loop.
1316 1368 #
1317 1369 # bash-3.2# ETIME=$(perl -e 'print time;')
1318 1370 # bash-3.2# echo $ETIME
1319 1371 # 1202814041
1320 1372 # bash-3.2# HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | awk '{print $4}')
1321 1373 # bash-3.2# echo $HHMMSS
1322 1374 # 03:00:41
1323 1375 # bash-3.2# ETIME=$(expr ${ETIME} - 1)
1324 1376 # bash-3.2# echo $ETIME
1325 1377 # 1202814040
1326 1378 # bash-3.2# HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | awk '{print $4}')
1327 1379 # bash-3.2# echo $HHMMSS
1328 1380 # 03:00:40
1329 1381 # bash-3.2#
1330 1382
1331 1383 # Convert the epoch time into a readable format
1332 1384 HHMMSS=$(echo "0t${ETIME}=Y" | /usr/bin/mdb | ${AWK} '{print $4}')
1333 1385
1334 1386 debug_message "check_commadlog - performed for ${DATE} ${HHMMSS}"
1335 1387
1336 1388 # Check for a clrg switch or scswitch
1337 1389 TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\
1338 1390 /usr/bin/grep -w START | /usr/bin/grep switch | /usr/bin/grep \"${RESOURCEGROUP}\" |\
1339 1391 /usr/bin/sed -e 's/^.*-h //' -e 's/^.*-n //' | ${AWK} '{print $1}' | ${TR} -d '" ')
1340 1392
1341 1393 [ "${#TARGET_HOST}" -ne 0 ] && break
1342 1394
1343 1395 # Check for a clrg evacuate
1344 1396 TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\
1345 1397 /usr/bin/grep -w START | /usr/bin/grep evacuate |\
1346 1398 /usr/bin/sed -e 's/^.*-n //' | ${AWK} '{print $1}' | ${TR} -d '+" ' )
1347 1399
1348 1400 [ "${#TARGET_HOST}" -ne 0 ] && break
1349 1401
1350 1402 # Check for a scswitch -S
1351 1403 TARGET_HOST=$(/usr/bin/grep "${DATE} ${HHMMSS}" /var/cluster/logs/commandlog |\
1352 1404 /usr/bin/grep -w START | /usr/bin/grep scswitch | /usr/bin/grep "\-S" |\
1353 1405 /usr/bin/sed -e 's/^.*-h //' | ${AWK} '{print $1}' | ${TR} -d '\-SK" ' )
1354 1406
1355 1407 [ "${#TARGET_HOST}" -ne 0 ] && break
1356 1408
1357 1409 i=$(expr $i - 1)
1358 1410 ETIME=$(expr ${ETIME} - 1)
1359 1411 done
1360 1412
1361 1413 debug_message "check_commandlog - TARGET_HOST=${TARGET_HOST}"
1362 1414
1363 1415 debug_message "Function: check_commandlog - End"
1364 1416 }
1365 1417
1366 1418 #
1367 1419 # routines to perform domain migration
1368 1420 #
1369 1421 migrate_xvm()
1370 1422 {
1371 1423 debug_message "Function: migrate_xvm - Begin"
1372 1424 ${SET_DEBUG}
1373 1425
1374 1426 typeset rc=0
1375 1427
1376 1428 [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && OPTION="migrate"
1377 1429 [[ "${MIGRATION_TYPE}" = "MIGRATE_LIVE" ]] && OPTION="migrate --live"
1378 1430
1379 1431 debug_message "domain_migrate - Running /usr/sbin/xm ${OPTION} ${DOMAIN} ${PRIVATELINK_TARGET_HOST}"
1380 1432
1381 1433 ${HATIMERUN} -t ${MAX_MIGRATE_TIMEOUT} -k KILL \
1382 1434 ${XM} ${MIGRATION_TYPE} "${DOMAIN}" ${PRIVATELINK_TARGET_HOST} > /dev/null 2>&1
1383 1435 rc=${?}
1384 1436
1385 1437 debug_message "Function: migrate_xvm - End"
1386 1438 return ${rc}
1387 1439 }
1388 1440
1389 1441 migrate_ldom()
1390 1442 {
1391 1443 debug_message "Function: migrate_ldom - Begin"
1392 1444 ${SET_DEBUG}
1393 1445
1394 1446 typeset rc=0
1395 1447
1396 1448 [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && OPTION="migrate"
1397 1449
1398 1450 debug_message "domain_migrate - Running /opt/SUNWscxvm/bin/ldm_migrate ${OPTION} ${DOMAIN} ${PRIVATELINK_TARGET_HOST}"
1399 1451
1400 1452 ${HATIMERUN} -t ${MAX_MIGRATE_TIMEOUT} -k KILL \
1401 1453 /opt/SUNWscxvm/bin/ldm_migrate ${OPTION} "${DOMAIN}" ${PRIVATELINK_TARGET_HOST} ${PASSWORD_FILE} >> $LOGFILE 2>&1
1402 1454 rc=${?}
1403 1455
1404 1456 debug_message "Function: migrate_ldom - End"
1405 1457 return ${rc}
1406 1458 }
1407 1459
1408 1460 #
1409 1461 # routines to cancel migration
1410 1462 #
1411 1463 cancel_xvm_migration()
1412 1464 {
1413 1465 # NO OP for a xvm domain
1414 1466 return 0
1415 1467 }
1416 1468
1417 1469 cancel_ldom_migration()
1418 1470 {
1419 1471 debug_message "Function: cancel_ldom_migration - Begin"
1420 1472 ${SET_DEBUG}
1421 1473
1422 1474 # cancel domain migration for ldoms
1423 1475 if ${LDM} cancel-operation migration ${DOMAIN} >> $LOGFILE 2>&1
1424 1476 then
1425 1477 # SCMSGS
1426 1478 # @explanation
1427 1479 # The domain migration operation was cancelled.
1428 1480 # @user_action
1429 1481 # None required. Informational message.
1430 1482 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1431 1483 "Migration of domain %s is cancelled, the domain state is now in active state." \
1432 1484 "${DOMAIN}"
1433 1485 fi
1434 1486
1435 1487 while (( ${SECONDS} < ${MAX_STOP_TIMEOUT} ))
1436 1488 do
1437 1489 if get_${VM}_status | ${GREP} -q -E "^suspending|^resuming|^suspended|^starting" > /dev/null 2>&1
1438 1490 then
1439 1491 sleep 5
1440 1492 else
1441 1493 SECONDS=${MAX_STOP_TIMEOUT}
1442 1494 fi
1443 1495 done
1444 1496
1445 1497 debug_message "Function: cancel_ldom_migration - End"
1446 1498 }
1447 1499
1448 1500 domain_migrate()
1449 1501 {
1450 1502 debug_message "Function: domain_migrate - Begin"
1451 1503 ${SET_DEBUG}
1452 1504
1453 1505 typeset rc
1454 1506
1455 1507 [[ "${MIGRATION_TYPE}" = "MIGRATE" ]] && MSG="migrated"
1456 1508 [[ "${MIGRATION_TYPE}" = "MIGRATE_LIVE" ]] && MSG="live migrated"
1457 1509
1458 1510 if get_target_host
1459 1511 then
1460 1512 # SCMSGS
1461 1513 # @explanation
1462 1514 # The domain is being migrated or live migrated to the target host.
1463 1515 # @user_action
1464 1516 # None required.
1465 1517 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1466 1518 "Domain %s is being %s to %s." \
1467 1519 "${DOMAIN}" "${MSG}" "${TARGET_HOST}"
1468 1520
1469 1521 migrate_${VM} ${MIGRATION_TYPE} ${DOMAIN} ${PRIVATELINK_TARGET_HOST}
1470 1522 rc=${?}
1471 1523
1472 1524 if (( ${rc} == 0 ))
1473 1525 then
1474 1526 # SCMSGS
1475 1527 # @explanation
1476 1528 # The domain was migrated or live migrated to the target host.
1477 1529 # @user_action
1478 1530 # None required. The domain successfully migrated or live migrated
1479 1531 # from the source node to the target node.
1480 1532 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1481 1533 "Domain %s successfully %s to %s." \
1482 1534 "${DOMAIN}" "${MSG}" "${TARGET_HOST}"
1483 1535
1484 1536 # As the domain has been successfully migrated or live migrated
1485 1537 # we need to indicate a successful stop by performing a NO-OP stop
1486 1538 # and subsequently a successful start by performing a NO-OP start.
1487 1539
1488 1540 if ${CCRADM} addkey --key=noop_${RESOURCE} --value="1" ${CCR_TABLE} >> $LOGFILE 2>&1
1489 1541 then
1490 1542 debug_message "domain_migrate - .noop_${RESOURCE} flag added to CCR"
1491 1543 else
1492 1544 # SCMSGS
1493 1545 # @explanation
1494 1546 # Failed to update the XMl configuration to the CCR.
1495 1547 # @user_action
1496 1548 # Check the syslog for further messages.
1497 1549 # Determine why the ccr update failed.
1498 1550 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1499 1551 "Failed to add NO-OP flag for %s to ccr." \
1500 1552 "${DOMAIN}"
1501 1553 rc=1
1502 1554 fi
1503 1555
1504 1556 # SCMSGS
1505 1557 # @explanation
1506 1558 # The domain was migrated or live migrated.
1507 1559 # @user_action
1508 1560 # None required. Informational message.
1509 1561 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1510 1562 "NO-OP STOP being performed."
1511 1563
1512 1564 elif (( ${rc} == 99 ))
1513 1565 then
1514 1566 # SCMSGS
1515 1567 # @explanation
1516 1568 # The domain migration or live migration timed out.
1517 1569 # @user_action
1518 1570 # None required. Informational message.
1519 1571 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1520 1572 "Migration of domain %s timed out, the domain state is now shut off." \
1521 1573 "${DOMAIN}"
1522 1574
1523 1575 rc=1
1524 1576 cancel_${VM}_migration
1525 1577 else
1526 1578 # SCMSGS
1527 1579 # @explanation
1528 1580 # The domain failed to migrate or live migrate to the target host.
1529 1581 # @user_action
1530 1582 # None required. The domain failed to migrate or live migrate
1531 1583 # from the source node to the target node. A normal failover
1532 1584 # will be performed.
1533 1585 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1534 1586 "Domain %s failed to %s to %s, normal failover will be performed." \
1535 1587 "${DOMAIN}" "${MSG}" "${TARGET_HOST}"
1536 1588
1537 1589 rc=1
1538 1590 cancel_${VM}_migration
1539 1591 fi
1540 1592 else
1541 1593 rc=1
1542 1594 fi
1543 1595
1544 1596 # If the domain has successfully migrated, we will now delete the domain.
1545 1597 #
1546 1598 # Doing this ensures that the domain is only defined and able to be started
1547 1599 # on one cluster node at a time. Domains can use shared storage between cluster
1548 1600 # nodes so it is very important that we prevent any data corruption if a domain
1549 1601 # gets manually started on multiple cluster nodes where shared storage is used.
1550 1602 #
1551 1603 # Of course using SUNW.HAStoragePlus somewhat protects against this, however we
1552 1604 # simply want to avoid any manual administrative errors performed by mistake.
1553 1605 #
1554 1606 # Note, unless the domain was migrated or live migrated, the domain is defined
1555 1607 # before startup using a previously dumped XML file for the administrative file
1556 1608 # system.
1557 1609
1558 1610 (( ${rc} == 0 )) && [[ "${VM}" == "xvm" ]] && domain_delete
1559 1611
1560 1612 debug_message "Function: domain_migrate - End"
1561 1613 return ${rc}
1562 1614 }
1563 1615
1564 1616 #
1565 1617 # routines to perform domain shutdown
1566 1618 #
1567 1619 shutdown_xvm()
1568 1620 {
1569 1621 debug_message "Function: shutdown_xvm - Begin"
1570 1622 ${SET_DEBUG}
1571 1623
1572 1624 typeset rc=0
1573 1625
1574 1626 # Note that the virsh shutdown command returns before the domain
1575 1627 # has shutdown, as such we do not use hatimerun.
1576 1628
1577 1629 ${VIRSH} shutdown ${DOMAIN} > /dev/null 2>&1
1578 1630 rc=${?}
1579 1631
1580 1632 debug_message "Function: shutdown_xvm - End"
1581 1633 return ${rc}
1582 1634 }
1583 1635
1584 1636 shutdown_ldom()
1585 1637 {
1586 1638 debug_message "Function: shutdown_ldom - Begin"
1587 1639 ${SET_DEBUG}
1588 1640
1589 1641 typeset rc
1590 1642
1591 1643 status=$(get_${VM}_status)
1592 1644 if (( ${?} == 0 ))
1593 1645 then
1594 1646 if echo ${status} | ${GREP} -q -E "^active$|^suspending|^resuming|^suspended|^starting" > /dev/null 2>&1
1595 1647 then
1596 1648 ${HATIMERUN} -t ${MAX_STOP_TIMEOUT} -k KILL ${LDM} stop-domain ${DOMAIN} >> $LOGFILE 2>&1
1597 1649 rc=${?}
1598 1650 else
1599 1651 # domain is already stopped
1600 1652 rc=0
1601 1653 fi
1602 1654 else
1603 1655 # domain is not present.
1604 1656 rc=2
1605 1657 fi
1606 1658
1607 1659 debug_message "Function: shutdown_ldom - Begin"
1608 1660 return ${rc}
1609 1661 }
1610 1662
1611 1663 domain_shutdown()
1612 1664 {
1613 1665 debug_message "Function: domain_shutdown - Begin"
1614 1666 ${SET_DEBUG}
1615 1667
1616 1668 typeset rc
1617 1669
1618 1670 # Corordinate with the domain OS to perform a graceful shutdown.
1619 1671 # Note that the virsh shutdown command returns before the domain
1620 1672 # has shutdown, as such we do not use hatimerun.
1621 1673
1622 1674 shutdown_${VM}
1623 1675 rc=${?}
1624 1676 if (( ${rc} == 2 ))
1625 1677 then
1626 1678 debug_message "Function: domain_shutdown - End"
1627 1679 return 0
1628 1680 elif (( ${rc} == 0 ))
1629 1681 then
1630 1682 # Loop to test if the domain shuts down gracefully
1631 1683 # or if the shutdown time is exceeded.
1632 1684
1633 1685 while (( ${SECONDS} < ${MAX_STOP_TIMEOUT} ))
1634 1686 do
1635 1687 if is_${VM}_up
1636 1688 then
1637 1689 sleep 5
1638 1690 else
1639 1691 SECONDS=${MAX_STOP_TIMEOUT}
1640 1692 fi
1641 1693 done
1642 1694
1643 1695 if is_${VM}_up
1644 1696 then
1645 1697 # SCMSGS
1646 1698 # @explanation
1647 1699 # The domain failed to shutdown gracefully.
1648 1700 # @user_action
1649 1701 # None required. The domain failed to shutdown
1650 1702 # gracefully and will now be immediately terminated.
1651 1703 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1652 1704 "Domain %s failed to shutdown gracefully, immediate shutdown will now be performed." \
1653 1705 "${DOMAIN}"
1654 1706
1655 1707 destroy_${VM}
1656 1708 rc=${?}
1657 1709 else
1658 1710 # SCMSGS
1659 1711 # @explanation
1660 1712 # The domain was shutdown gracefully.
1661 1713 # @user_action
1662 1714 # None required. The domain has shutdown gracefully.
1663 1715 scds_syslog -p daemon.info -t $(syslog_tag) -m \
1664 1716 "Domain %s has been gracefully shutdown." \
1665 1717 "${DOMAIN}"
1666 1718 rc=0
1667 1719 fi
1668 1720
1669 1721 else
1670 1722 # error already logged
1671 1723 destroy_${VM}
1672 1724 rc=${?}
1673 1725 fi
1674 1726
1675 1727 # If the domain has successfully shutdown, we will now delete the domain.
1676 1728 #
1677 1729 # Doing this ensures that the domain is only defined and able to be started
1678 1730 # on one cluster node at a time. Domains can use shared storage between cluster
1679 1731 # nodes so it is very important that we prevent any data corruption if a domain
1680 1732 # gets manually started on multiple cluster nodes where shared storage is used.
1681 1733 #
1682 1734 # Of course using SUNW.HAStoragePlus somewhat protects against this, however we
1683 1735 # simply want to avoid any manual administrative errors performed by mistake.
1684 1736 #
1685 1737 # Note, unless the domain was migrated or live migrated, the domain is defined
1686 1738 # before startup using a previously dumped XML file for the administrative file
1687 1739 # system.
1688 1740
1689 1741 (( ${rc} == 0 )) && domain_delete
1690 1742
1691 1743 debug_message "Function: domain_shutdown - End"
1692 1744 return ${rc}
1693 1745 }
1694 1746
1695 1747 #
1696 1748 # routines to destroy domain
1697 1749 #
1698 1750 destroy_xvm()
1699 1751 {
1700 1752 debug_message "Function: destroy_xvm - Begin"
1701 1753 ${SET_DEBUG}
1702 1754
1703 1755 typeset rc
1704 1756
1705 1757 if ${VIRSH} destroy ${DOMAIN} >> $LOGFILE 2>&1
1706 1758 then
1707 1759 # SCMSGS
1708 1760 # @explanation
1709 1761 # The domain was immediately terminated.
1710 1762 # @user_action
1711 1763 # None required. The domain had previously failed to shutdown
1712 1764 # gracefully but has now been immediately terminated.
1713 1765 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1714 1766 "Domain %s has been immediately terminated." \
1715 1767 "${DOMAIN}"
1716 1768 rc=0
1717 1769 else
1718 1770 # SCMSGS
1719 1771 # @explanation
1720 1772 # The /usr/bin/virsh destroy command failed.
1721 1773 # @user_action
1722 1774 # Determine why it was not possible to immediately terminate
1723 1775 # the domain.
1724 1776 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1725 1777 "Domain %s failed to shutdown immediately." \
1726 1778 "${DOMAIN}"
1727 1779 rc=1
1728 1780 fi
1729 1781
1730 1782 debug_message "Function: destroy_xvm - End"
1731 1783 return ${rc}
1732 1784 }
1733 1785
1734 1786 destroy_ldom()
1735 1787 {
1736 1788 debug_message "Function: destroy_ldom - Begin"
1737 1789 ${SET_DEBUG}
1738 1790
1739 1791 typeset rc
1740 1792
1741 1793 if ${LDM} stop-domain -f ${DOMAIN} >> $LOGFILE 2>&1
1742 1794 then
1743 1795 # SCMSGS
1744 1796 # @explanation
1745 1797 # The domain was immediately terminated.
1746 1798 # @user_action
1747 1799 # None required. The domain had previously failed to shutdown
1748 1800 # gracefully but has now been immediately terminated.
1749 1801 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1750 1802 "Domain %s has been forcefully terminated." \
1751 1803 "${DOMAIN}"
1752 1804 rc=0
1753 1805 else
1754 1806 # SCMSGS
1755 1807 # @explanation
1756 1808 # The /opt/SUNWldm/bin/ldm stop-domain "-f" command failed.
1757 1809 # @user_action
1758 1810 # Determine why it was not possible to forcefully stop
1759 1811 # the domain.
1760 1812 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1761 1813 "Domain %s failed to do a forceful shutdown." \
1762 1814 "${DOMAIN}"
1763 1815 rc=1
1764 1816 fi
1765 1817
1766 1818 debug_message "Function: destroy_ldom - End"
1767 1819 return ${rc}
1768 1820 }
1769 1821
1770 1822 #
1771 1823 # routines to remove domains from the node
1772 1824 #
1773 1825 domain_delete()
1774 1826 {
1775 1827 debug_message "Function: domain_delete - Begin"
1776 1828 ${SET_DEBUG}
1777 1829
1778 1830 # The purpose of deleting the domain after shutdown is to avoid the possibility of
1779 1831 # someone manually starting the domain on a different node. Doing so would compromise
1780 1832 # the domain if shared storage was used for the domain. The domain's configuration
1781 1833 # is always dumped to the agent's administrative file system so that the domain can
1782 1834 # be defined before startup.
1783 1835
1784 1836 typeset rc
1785 1837
1786 1838 if delete_${VM}
1787 1839 then
1788 1840 # SCMSGS
1789 1841 # @explanation
1790 1842 # The domain was deleted.
1791 1843 # @user_action
1792 1844 # None required. The domain has been deleted as it
1793 1845 # will be defined on another node. Deleting the domain
1794 1846 # on this node ensures that it can't be started on
1795 1847 # more than one cluster node at a time.
1796 1848 scds_syslog -p daemon.notice -t $(syslog_tag) -m \
1797 1849 "Domain %s has been deleted on this node." \
1798 1850 "${DOMAIN}"
1799 1851 rc=0
1800 1852 else
1801 1853 # error already logged.
1802 1854 rc=1
1803 1855 fi
1804 1856
1805 1857 debug_message "Function: domain_delete - End"
1806 1858 return ${rc}
1807 1859 }
1808 1860
1809 1861 delete_xvm()
1810 1862 {
1811 1863 debug_message "Function: delete_xvm - Begin"
1812 1864 ${SET_DEBUG}
1813 1865
1814 1866 typeset rc=0
1815 1867
1816 1868 if ! /usr/sbin/xm delete ${DOMAIN} >> $LOGFILE 2>&1
1817 1869 then
1818 1870 # SCMSGS
1819 1871 # @explanation
1820 1872 # The /usr/sbin/xm delete command failed.
1821 1873 # @user_action
1822 1874 # Determine why it was not possible to delete the domain.
1823 1875 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1824 1876 "Failed to delete domain %s on this node." \
1825 1877 "${DOMAIN}"
1826 1878 rc=1
1827 1879 fi
1828 1880
1829 1881 debug_message "Function: delete_xvm - End"
1830 1882 return ${rc}
1831 1883 }
1832 1884
1833 1885 delete_ldom()
1834 1886 {
1835 1887 debug_message "Function: delete_ldom - Begin"
1836 1888 ${SET_DEBUG}
1837 1889
1838 1890 if get_${VM}_status | ${GREP} -q -E "^bound$" > /dev/null 2>&1
1839 1891 then
1840 1892
1841 1893 # if the domain is in bound state, unbind it.
1842 1894 ${LDM} unbind-domain ${DOMAIN} >> $LOGFILE 2>&1
1843 1895
1844 1896 if (( ${?} != 0 ))
1845 1897 then
1846 1898 # SCMSGS
1847 1899 # @explanation
1848 1900 # The /opt/SUNWldm/bin/ldm unbind-domain command failed.
1849 1901 # @user_action
1850 1902 # Determine why it was not possible to unbind the domain.
1851 1903 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1852 1904 "Failed to unbind domain %s on this node." \
1853 1905 "${DOMAIN}"
1854 1906
1855 1907 debug_message "Function: delete_ldom - End"
1856 1908 return 1
1857 1909 fi
1858 1910 fi
1859 1911
1860 1912 if ! ${LDM} remove-domain ${DOMAIN} >> $LOGFILE 2>&1
1861 1913 then
1862 1914 # SCMSGS
1863 1915 # @explanation
1864 1916 # The /opt/SUNWldm/bin/ldm remove-domain command failed.
1865 1917 # @user_action
1866 1918 # Determine why it was not possible to remove the domain.
1867 1919 scds_syslog -p daemon.error -t $(syslog_tag) -m \
1868 1920 "Failed to remove domain %s on this node." \
1869 1921 "${DOMAIN}"
1870 1922
1871 1923 debug_message "Function: delete_ldom - End"
1872 1924 return 1
1873 1925 fi
1874 1926
1875 1927 debug_message "Function: delete_ldom - End"
1876 1928 return 0
1877 1929 }
|
↓ open down ↓ |
1458 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX