Print this page
6805730 some simple changes would make 'init 5' much faster
6809492 startd shouldn't let hung subprocesses impede shutdown

*** 176,185 **** --- 176,190 ---- static int halting = -1; static boolean_t go_single_user_mode = B_FALSE; static boolean_t go_to_level1 = B_FALSE; /* + * Tracks when we started halting. + */ + static time_t halting_time = 0; + + /* * This tracks the legacy runlevel to ensure we signal init and manage * utmpx entries correctly. */ static char current_runlevel = '\0';
*** 3439,3453 **** graph_enable_by_vertex(v, enabled, 0); return (0); } static void do_uadmin(void) { ! int fd, left; struct statvfs vfs; const char * const resetting = "/etc/svc/volatile/resetting"; fd = creat(resetting, 0777); if (fd >= 0) --- 3444,3491 ---- graph_enable_by_vertex(v, enabled, 0); return (0); } + static void + kill_user_procs(void) + { + (void) fputs("svc.startd: Killing user processes.\n", stdout); + + /* + * Despite its name, killall's role is to get select user processes-- + * basically those representing terminal-based logins-- to die. Victims + * are located by killall in the utmp database. Since these are most + * often shell based logins, and many shells mask SIGTERM (but are + * responsive to SIGHUP) we first HUP and then shortly thereafter + * kill -9. + */ + (void) fork_with_timeout("/usr/sbin/killall HUP", 1, 5); + (void) fork_with_timeout("/usr/sbin/killall KILL", 1, 5); + + /* + * Note the selection of user id's 0, 1 and 15, subsequently + * inverted by -v. 15 is reserved for dladmd. Yes, this is a + * kludge-- a better policy is needed. + * + * Note that fork_with_timeout will only wait out the 1 second + * "grace time" if pkill actually returns 0. So if there are + * no matches, this will run to completion much more quickly. + */ + (void) fork_with_timeout("/usr/bin/pkill -TERM -v -u 0,1,15", 1, 5); + (void) fork_with_timeout("/usr/bin/pkill -KILL -v -u 0,1,15", 1, 5); + } + + static void do_uadmin(void) { ! int fd; struct statvfs vfs; + time_t now; + struct tm nowtm; + char down_buf[256], time_buf[256]; const char * const resetting = "/etc/svc/volatile/resetting"; fd = creat(resetting, 0777); if (fd >= 0)
*** 3456,3490 **** uu_warn("Could not create \"%s\"", resetting); /* Kill dhcpagent if we're not using nfs for root */ if ((statvfs("/", &vfs) == 0) && (strncmp(vfs.f_basetype, "nfs", sizeof ("nfs") - 1) != 0)) ! (void) system("/usr/bin/pkill -x -u 0 dhcpagent"); ! (void) system("/usr/sbin/killall"); ! left = 5; ! while (left > 0) ! left = sleep(left); ! (void) system("/usr/sbin/killall 9"); ! left = 10; ! while (left > 0) ! left = sleep(left); ! sync(); ! sync(); ! sync(); ! (void) system("/sbin/umountall -l"); ! (void) system("/sbin/umount /tmp >/dev/null 2>&1"); ! (void) system("/sbin/umount /var/adm >/dev/null 2>&1"); ! (void) system("/sbin/umount /var/run >/dev/null 2>&1"); ! (void) system("/sbin/umount /var >/dev/null 2>&1"); ! (void) system("/sbin/umount /usr >/dev/null 2>&1"); ! uu_warn("The system is down.\n"); (void) uadmin(A_SHUTDOWN, halting, NULL); uu_warn("uadmin() failed"); if (remove(resetting) != 0 && errno != ENOENT) uu_warn("Could not remove \"%s\"", resetting); --- 3494,3585 ---- uu_warn("Could not create \"%s\"", resetting); /* Kill dhcpagent if we're not using nfs for root */ if ((statvfs("/", &vfs) == 0) && (strncmp(vfs.f_basetype, "nfs", sizeof ("nfs") - 1) != 0)) ! fork_with_timeout("/usr/bin/pkill -x -u 0 dhcpagent", 0, 5); ! /* ! * Call sync(2) now, before we kill off user processes. This takes ! * advantage of the several seconds of pause we have before the ! * killalls are done. Time we can make good use of to get pages ! * moving out to disk. ! * ! * Inside non-global zones, we don't bother, and it's better not to ! * anyway, since sync(2) can have system-wide impact. ! */ ! if (getzoneid() == 0) ! sync(); ! kill_user_procs(); ! /* ! * Note that this must come after the killing of user procs, since ! * killall relies on utmpx, and this command affects the contents of ! * said file. ! */ ! if (access("/usr/lib/acct/closewtmp", X_OK) == 0) ! fork_with_timeout("/usr/lib/acct/closewtmp", 0, 5); ! /* ! * For patches which may be installed as the system is shutting ! * down, we need to ensure, one more time, that the boot archive ! * really is up to date. ! */ ! if (getzoneid() == 0 && access("/usr/sbin/bootadm", X_OK) == 0) ! fork_with_timeout("/usr/sbin/bootadm -ea update_all", 0, 3600); ! fork_with_timeout("/sbin/umountall -l", 0, 5); ! fork_with_timeout("/sbin/umount /tmp /var/adm /var/run /var " ! ">/dev/null 2>&1", 0, 5); + /* + * Try to get to consistency for whatever UFS filesystems are left. + * This is pretty expensive, so we save it for the end in the hopes of + * minimizing what it must do. The other option would be to start in + * parallel with the killall's, but lockfs tends to throw out much more + * than is needed, and so subsequent commands (like umountall) take a + * long time to get going again. + * + * Inside of zones, we don't bother, since we're not about to terminate + * the whole OS instance. + * + * On systems using only ZFS, this call to lockfs -fa is a no-op. + */ + if (getzoneid() == 0) { + if (access("/usr/sbin/lockfs", X_OK) == 0) + fork_with_timeout("/usr/sbin/lockfs -fa", 0, 30); + + sync(); /* once more, with feeling */ + } + + fork_with_timeout("/sbin/umount /usr >/dev/null 2>&1", 0, 5); + + /* + * Construct and emit the last words from userland: + * "<timestamp> The system is down. Shutdown took <N> seconds." + * + * Normally we'd use syslog, but with /var and other things + * potentially gone, try to minimize the external dependencies. + */ + now = time(NULL); + (void) localtime_r(&now, &nowtm); + + if (strftime(down_buf, sizeof (down_buf), + "%b %e %T The system is down.", &nowtm) == 0) { + (void) strlcpy(down_buf, "The system is down.", + sizeof (down_buf)); + } + + if (halting_time != 0 && halting_time <= now) { + (void) snprintf(time_buf, sizeof (time_buf), + " Shutdown took %lu seconds.", now - halting_time); + } else { + time_buf[0] = '\0'; + } + (void) printf("%s%s\n", down_buf, time_buf); + (void) uadmin(A_SHUTDOWN, halting, NULL); uu_warn("uadmin() failed"); if (remove(resetting) != 0 && errno != ENOENT) uu_warn("Could not remove \"%s\"", resetting);
*** 3679,3706 **** int r; MUTEX_LOCK(&single_user_thread_lock); single_user_thread_count++; ! if (!booting_to_single_user) { ! /* ! * From rcS.sh: Look for ttymon, in.telnetd, in.rlogind and ! * processes in their process groups so they can be terminated. ! */ ! (void) fputs("svc.startd: Killing user processes: ", stdout); ! (void) system("/usr/sbin/killall"); ! (void) system("/usr/sbin/killall 9"); ! (void) system("/usr/bin/pkill -TERM -v -u 0,1"); - left = 5; - while (left > 0) - left = sleep(left); - - (void) system("/usr/bin/pkill -KILL -v -u 0,1"); - (void) puts("done."); - } - if (go_single_user_mode || booting_to_single_user) { msg = "SINGLE USER MODE\n"; } else { assert(go_to_level1); --- 3774,3786 ---- int r; MUTEX_LOCK(&single_user_thread_lock); single_user_thread_count++; ! if (!booting_to_single_user) ! kill_user_procs(); if (go_single_user_mode || booting_to_single_user) { msg = "SINGLE USER MODE\n"; } else { assert(go_to_level1);
*** 4970,4989 **** --- 5050,5072 ---- ms = single_user_fmri; go_single_user_mode = B_TRUE; break; case '0': + halting_time = time(NULL); fork_rc_script(rl, stop, B_TRUE); halting = AD_HALT; goto uadmin; case '5': + halting_time = time(NULL); fork_rc_script(rl, stop, B_TRUE); halting = AD_POWEROFF; goto uadmin; case '6': + halting_time = time(NULL); fork_rc_script(rl, stop, B_TRUE); halting = AD_BOOT; goto uadmin; uadmin: