Print this page
6805730 some simple changes would make 'init 5' much faster
6809492 startd shouldn't let hung subprocesses impede shutdown


 161 static pthread_cond_t initial_milestone_cv = PTHREAD_COND_INITIALIZER;
 162 
 163 /* protected by dgraph_lock */
 164 static boolean_t sulogin_thread_running = B_FALSE;
 165 static boolean_t sulogin_running = B_FALSE;
 166 static boolean_t console_login_ready = B_FALSE;
 167 
 168 /* Number of services to come down to complete milestone transition. */
 169 static uint_t non_subgraph_svcs;
 170 
 171 /*
 172  * These variables indicate what should be done when we reach the milestone
 173  * target milestone, i.e., when non_subgraph_svcs == 0.  They are acted upon in
 174  * dgraph_set_instance_state().
 175  */
 176 static int halting = -1;
 177 static boolean_t go_single_user_mode = B_FALSE;
 178 static boolean_t go_to_level1 = B_FALSE;
 179 
 180 /*





 181  * This tracks the legacy runlevel to ensure we signal init and manage
 182  * utmpx entries correctly.
 183  */
 184 static char current_runlevel = '\0';
 185 
 186 /* Number of single user threads currently running */
 187 static pthread_mutex_t single_user_thread_lock;
 188 static int single_user_thread_count = 0;
 189 
 190 /* Statistics for dependency cycle-checking */
 191 static u_longlong_t dep_inserts = 0;
 192 static u_longlong_t dep_cycle_ns = 0;
 193 static u_longlong_t dep_insert_ns = 0;
 194 
 195 
 196 static const char * const emsg_invalid_restarter =
 197         "Transitioning %s to maintenance, restarter FMRI %s is invalid "
 198         "(see 'svcs -xv' for details).\n";
 199 static const char * const console_login_fmri = CONSOLE_LOGIN_FMRI;
 200 static const char * const single_user_fmri = SCF_MILESTONE_SINGLE_USER;


3424 
3425                 goto out;
3426         }
3427         startd_free(restarter_fmri, max_scf_value_size);
3428 
3429         /* Add all the other dependencies. */
3430         err = refresh_vertex(v, inst);
3431         if (err != 0) {
3432                 assert(err == ECONNABORTED);
3433                 return (err);
3434         }
3435 
3436 out:
3437         v->gv_flags |= GV_CONFIGURED;
3438 
3439         graph_enable_by_vertex(v, enabled, 0);
3440 
3441         return (0);
3442 }
3443 

3444 static void





























3445 do_uadmin(void)
3446 {
3447         int fd, left;
3448         struct statvfs vfs;



3449 
3450         const char * const resetting = "/etc/svc/volatile/resetting";
3451 
3452         fd = creat(resetting, 0777);
3453         if (fd >= 0)
3454                 startd_close(fd);
3455         else
3456                 uu_warn("Could not create \"%s\"", resetting);
3457 
3458         /* Kill dhcpagent if we're not using nfs for root */
3459         if ((statvfs("/", &vfs) == 0) &&
3460             (strncmp(vfs.f_basetype, "nfs", sizeof ("nfs") - 1) != 0))
3461                 (void) system("/usr/bin/pkill -x -u 0 dhcpagent");
3462 
3463         (void) system("/usr/sbin/killall");
3464         left = 5;
3465         while (left > 0)
3466                 left = sleep(left);







3467 
3468         (void) system("/usr/sbin/killall 9");
3469         left = 10;
3470         while (left > 0)
3471                 left = sleep(left);
3472 
3473         sync();
3474         sync();
3475         sync();




3476 
3477         (void) system("/sbin/umountall -l");
3478         (void) system("/sbin/umount /tmp >/dev/null 2>&1");
3479         (void) system("/sbin/umount /var/adm >/dev/null 2>&1");
3480         (void) system("/sbin/umount /var/run >/dev/null 2>&1");
3481         (void) system("/sbin/umount /var >/dev/null 2>&1");
3482         (void) system("/sbin/umount /usr >/dev/null 2>&1");

3483 
3484         uu_warn("The system is down.\n");


3485 














































3486         (void) uadmin(A_SHUTDOWN, halting, NULL);
3487         uu_warn("uadmin() failed");
3488 
3489         if (remove(resetting) != 0 && errno != ENOENT)
3490                 uu_warn("Could not remove \"%s\"", resetting);
3491 }
3492 
3493 /*
3494  * If any of the up_svcs[] are online or satisfiable, return true.  If they are
3495  * all missing, disabled, in maintenance, or unsatisfiable, return false.
3496  */
3497 boolean_t
3498 can_come_up(void)
3499 {
3500         int i;
3501 
3502         assert(PTHREAD_MUTEX_HELD(&dgraph_lock));
3503 
3504         /*
3505          * If we are booting to single user (boot -s),


3664 
3665         return (NULL);
3666 }
3667 
3668 /* ARGSUSED */
3669 void *
3670 single_user_thread(void *unused)
3671 {
3672         uint_t left;
3673         scf_handle_t *h;
3674         scf_instance_t *inst;
3675         scf_property_t *prop;
3676         scf_value_t *val;
3677         const char *msg;
3678         char *buf;
3679         int r;
3680 
3681         MUTEX_LOCK(&single_user_thread_lock);
3682         single_user_thread_count++;
3683 
3684         if (!booting_to_single_user) {
3685                 /*
3686                  * From rcS.sh: Look for ttymon, in.telnetd, in.rlogind and
3687                  * processes in their process groups so they can be terminated.
3688                  */
3689                 (void) fputs("svc.startd: Killing user processes: ", stdout);
3690                 (void) system("/usr/sbin/killall");
3691                 (void) system("/usr/sbin/killall 9");
3692                 (void) system("/usr/bin/pkill -TERM -v -u 0,1");
3693 
3694                 left = 5;
3695                 while (left > 0)
3696                         left = sleep(left);
3697 
3698                 (void) system("/usr/bin/pkill -KILL -v -u 0,1");
3699                 (void) puts("done.");
3700         }
3701 
3702         if (go_single_user_mode || booting_to_single_user) {
3703                 msg = "SINGLE USER MODE\n";
3704         } else {
3705                 assert(go_to_level1);
3706 
3707                 fork_rc_script('1', "start", B_TRUE);
3708 
3709                 uu_warn("The system is ready for administration.\n");
3710 
3711                 msg = "";
3712         }
3713 
3714         MUTEX_UNLOCK(&single_user_thread_lock);
3715 
3716         for (;;) {
3717                 MUTEX_LOCK(&dgraph_lock);
3718                 r = run_sulogin(msg);
3719                 MUTEX_UNLOCK(&dgraph_lock);
3720                 if (r == 0)
3721                         break;


4955 
4956         /*
4957          * Some run levels don't have a direct correspondence to any
4958          * milestones, so we have to signal init directly.
4959          */
4960         if (mark_rl) {
4961                 current_runlevel = rl;
4962                 signal_init(rl);
4963         }
4964 
4965         switch (rl) {
4966         case 'S':
4967                 uu_warn("The system is coming down for administration.  "
4968                     "Please wait.\n");
4969                 fork_rc_script(rl, stop, B_FALSE);
4970                 ms = single_user_fmri;
4971                 go_single_user_mode = B_TRUE;
4972                 break;
4973 
4974         case '0':

4975                 fork_rc_script(rl, stop, B_TRUE);
4976                 halting = AD_HALT;
4977                 goto uadmin;
4978 
4979         case '5':

4980                 fork_rc_script(rl, stop, B_TRUE);
4981                 halting = AD_POWEROFF;
4982                 goto uadmin;
4983 
4984         case '6':

4985                 fork_rc_script(rl, stop, B_TRUE);
4986                 halting = AD_BOOT;
4987                 goto uadmin;
4988 
4989 uadmin:
4990                 uu_warn("The system is coming down.  Please wait.\n");
4991                 ms = "none";
4992 
4993                 /*
4994                  * We can't wait until all services are offline since this
4995                  * thread is responsible for taking them offline.  Instead we
4996                  * set halting to the second argument for uadmin() and call
4997                  * do_uadmin() from dgraph_set_instance_state() when
4998                  * appropriate.
4999                  */
5000                 break;
5001 
5002         case '1':
5003                 if (current_runlevel != 'S') {
5004                         uu_warn("Changing to state 1.\n");




 161 static pthread_cond_t initial_milestone_cv = PTHREAD_COND_INITIALIZER;
 162 
 163 /* protected by dgraph_lock */
 164 static boolean_t sulogin_thread_running = B_FALSE;
 165 static boolean_t sulogin_running = B_FALSE;
 166 static boolean_t console_login_ready = B_FALSE;
 167 
 168 /* Number of services to come down to complete milestone transition. */
 169 static uint_t non_subgraph_svcs;
 170 
 171 /*
 172  * These variables indicate what should be done when we reach the milestone
 173  * target milestone, i.e., when non_subgraph_svcs == 0.  They are acted upon in
 174  * dgraph_set_instance_state().
 175  */
 176 static int halting = -1;
 177 static boolean_t go_single_user_mode = B_FALSE;
 178 static boolean_t go_to_level1 = B_FALSE;
 179 
 180 /*
 181  * Tracks when we started halting.
 182  */
 183 static time_t halting_time = 0;
 184 
 185 /*
 186  * This tracks the legacy runlevel to ensure we signal init and manage
 187  * utmpx entries correctly.
 188  */
 189 static char current_runlevel = '\0';
 190 
 191 /* Number of single user threads currently running */
 192 static pthread_mutex_t single_user_thread_lock;
 193 static int single_user_thread_count = 0;
 194 
 195 /* Statistics for dependency cycle-checking */
 196 static u_longlong_t dep_inserts = 0;
 197 static u_longlong_t dep_cycle_ns = 0;
 198 static u_longlong_t dep_insert_ns = 0;
 199 
 200 
 201 static const char * const emsg_invalid_restarter =
 202         "Transitioning %s to maintenance, restarter FMRI %s is invalid "
 203         "(see 'svcs -xv' for details).\n";
 204 static const char * const console_login_fmri = CONSOLE_LOGIN_FMRI;
 205 static const char * const single_user_fmri = SCF_MILESTONE_SINGLE_USER;


3429 
3430                 goto out;
3431         }
3432         startd_free(restarter_fmri, max_scf_value_size);
3433 
3434         /* Add all the other dependencies. */
3435         err = refresh_vertex(v, inst);
3436         if (err != 0) {
3437                 assert(err == ECONNABORTED);
3438                 return (err);
3439         }
3440 
3441 out:
3442         v->gv_flags |= GV_CONFIGURED;
3443 
3444         graph_enable_by_vertex(v, enabled, 0);
3445 
3446         return (0);
3447 }
3448 
3449 
3450 static void
3451 kill_user_procs(void)
3452 {
3453         (void) fputs("svc.startd: Killing user processes.\n", stdout);
3454 
3455         /*
3456          * Despite its name, killall's role is to get select user processes--
3457          * basically those representing terminal-based logins-- to die.  Victims
3458          * are located by killall in the utmp database.  Since these are most
3459          * often shell based logins, and many shells mask SIGTERM (but are
3460          * responsive to SIGHUP) we first HUP and then shortly thereafter
3461          * kill -9.
3462          */
3463         (void) fork_with_timeout("/usr/sbin/killall HUP", 1, 5);
3464         (void) fork_with_timeout("/usr/sbin/killall KILL", 1, 5);
3465 
3466         /*
3467          * Note the selection of user id's 0, 1 and 15, subsequently
3468          * inverted by -v.  15 is reserved for dladmd.  Yes, this is a
3469          * kludge-- a better policy is needed.
3470          *
3471          * Note that fork_with_timeout will only wait out the 1 second
3472          * "grace time" if pkill actually returns 0.  So if there are
3473          * no matches, this will run to completion much more quickly.
3474          */
3475         (void) fork_with_timeout("/usr/bin/pkill -TERM -v -u 0,1,15", 1, 5);
3476         (void) fork_with_timeout("/usr/bin/pkill -KILL -v -u 0,1,15", 1, 5);
3477 }
3478 
3479 static void
3480 do_uadmin(void)
3481 {
3482         int fd;
3483         struct statvfs vfs;
3484         time_t now;
3485         struct tm nowtm;
3486         char down_buf[256], time_buf[256];
3487 
3488         const char * const resetting = "/etc/svc/volatile/resetting";
3489 
3490         fd = creat(resetting, 0777);
3491         if (fd >= 0)
3492                 startd_close(fd);
3493         else
3494                 uu_warn("Could not create \"%s\"", resetting);
3495 
3496         /* Kill dhcpagent if we're not using nfs for root */
3497         if ((statvfs("/", &vfs) == 0) &&
3498             (strncmp(vfs.f_basetype, "nfs", sizeof ("nfs") - 1) != 0))
3499                 fork_with_timeout("/usr/bin/pkill -x -u 0 dhcpagent", 0, 5);
3500 
3501         /*
3502          * Call sync(2) now, before we kill off user processes.  This takes
3503          * advantage of the several seconds of pause we have before the
3504          * killalls are done.  Time we can make good use of to get pages
3505          * moving out to disk.
3506          *
3507          * Inside non-global zones, we don't bother, and it's better not to
3508          * anyway, since sync(2) can have system-wide impact.
3509          */
3510         if (getzoneid() == 0)
3511                 sync();
3512 
3513         kill_user_procs();



3514 
3515         /*
3516          * Note that this must come after the killing of user procs, since
3517          * killall relies on utmpx, and this command affects the contents of
3518          * said file.
3519          */
3520         if (access("/usr/lib/acct/closewtmp", X_OK) == 0)
3521                 fork_with_timeout("/usr/lib/acct/closewtmp", 0, 5);
3522 
3523         /*
3524          * For patches which may be installed as the system is shutting
3525          * down, we need to ensure, one more time, that the boot archive
3526          * really is up to date.
3527          */
3528         if (getzoneid() == 0 && access("/usr/sbin/bootadm", X_OK) == 0)
3529                 fork_with_timeout("/usr/sbin/bootadm -ea update_all", 0, 3600);
3530 
3531         fork_with_timeout("/sbin/umountall -l", 0, 5);
3532         fork_with_timeout("/sbin/umount /tmp /var/adm /var/run /var "
3533             ">/dev/null 2>&1", 0, 5);
3534 
3535         /*
3536          * Try to get to consistency for whatever UFS filesystems are left.
3537          * This is pretty expensive, so we save it for the end in the hopes of
3538          * minimizing what it must do.  The other option would be to start in
3539          * parallel with the killall's, but lockfs tends to throw out much more
3540          * than is needed, and so subsequent commands (like umountall) take a
3541          * long time to get going again.
3542          *
3543          * Inside of zones, we don't bother, since we're not about to terminate
3544          * the whole OS instance.
3545          *
3546          * On systems using only ZFS, this call to lockfs -fa is a no-op.
3547          */
3548         if (getzoneid() == 0) {
3549                 if (access("/usr/sbin/lockfs", X_OK) == 0)
3550                         fork_with_timeout("/usr/sbin/lockfs -fa", 0, 30);
3551 
3552                 sync(); /* once more, with feeling */
3553         }
3554 
3555         fork_with_timeout("/sbin/umount /usr >/dev/null 2>&1", 0, 5);
3556 
3557         /*
3558          * Construct and emit the last words from userland:
3559          * "<timestamp> The system is down.  Shutdown took <N> seconds."
3560          *
3561          * Normally we'd use syslog, but with /var and other things
3562          * potentially gone, try to minimize the external dependencies.
3563          */
3564         now = time(NULL);
3565         (void) localtime_r(&now, &nowtm);
3566 
3567         if (strftime(down_buf, sizeof (down_buf),
3568             "%b %e %T The system is down.", &nowtm) == 0) {
3569                 (void) strlcpy(down_buf, "The system is down.",
3570                     sizeof (down_buf));
3571         }
3572 
3573         if (halting_time != 0 && halting_time <= now) {
3574                 (void) snprintf(time_buf, sizeof (time_buf),
3575                     "  Shutdown took %lu seconds.", now - halting_time);
3576         } else {
3577                 time_buf[0] = '\0';
3578         }
3579         (void) printf("%s%s\n", down_buf, time_buf);
3580 
3581         (void) uadmin(A_SHUTDOWN, halting, NULL);
3582         uu_warn("uadmin() failed");
3583 
3584         if (remove(resetting) != 0 && errno != ENOENT)
3585                 uu_warn("Could not remove \"%s\"", resetting);
3586 }
3587 
3588 /*
3589  * If any of the up_svcs[] are online or satisfiable, return true.  If they are
3590  * all missing, disabled, in maintenance, or unsatisfiable, return false.
3591  */
3592 boolean_t
3593 can_come_up(void)
3594 {
3595         int i;
3596 
3597         assert(PTHREAD_MUTEX_HELD(&dgraph_lock));
3598 
3599         /*
3600          * If we are booting to single user (boot -s),


3759 
3760         return (NULL);
3761 }
3762 
3763 /* ARGSUSED */
3764 void *
3765 single_user_thread(void *unused)
3766 {
3767         uint_t left;
3768         scf_handle_t *h;
3769         scf_instance_t *inst;
3770         scf_property_t *prop;
3771         scf_value_t *val;
3772         const char *msg;
3773         char *buf;
3774         int r;
3775 
3776         MUTEX_LOCK(&single_user_thread_lock);
3777         single_user_thread_count++;
3778 
3779         if (!booting_to_single_user)
3780                 kill_user_procs();







3781 








3782         if (go_single_user_mode || booting_to_single_user) {
3783                 msg = "SINGLE USER MODE\n";
3784         } else {
3785                 assert(go_to_level1);
3786 
3787                 fork_rc_script('1', "start", B_TRUE);
3788 
3789                 uu_warn("The system is ready for administration.\n");
3790 
3791                 msg = "";
3792         }
3793 
3794         MUTEX_UNLOCK(&single_user_thread_lock);
3795 
3796         for (;;) {
3797                 MUTEX_LOCK(&dgraph_lock);
3798                 r = run_sulogin(msg);
3799                 MUTEX_UNLOCK(&dgraph_lock);
3800                 if (r == 0)
3801                         break;


5035 
5036         /*
5037          * Some run levels don't have a direct correspondence to any
5038          * milestones, so we have to signal init directly.
5039          */
5040         if (mark_rl) {
5041                 current_runlevel = rl;
5042                 signal_init(rl);
5043         }
5044 
5045         switch (rl) {
5046         case 'S':
5047                 uu_warn("The system is coming down for administration.  "
5048                     "Please wait.\n");
5049                 fork_rc_script(rl, stop, B_FALSE);
5050                 ms = single_user_fmri;
5051                 go_single_user_mode = B_TRUE;
5052                 break;
5053 
5054         case '0':
5055                 halting_time = time(NULL);
5056                 fork_rc_script(rl, stop, B_TRUE);
5057                 halting = AD_HALT;
5058                 goto uadmin;
5059 
5060         case '5':
5061                 halting_time = time(NULL);
5062                 fork_rc_script(rl, stop, B_TRUE);
5063                 halting = AD_POWEROFF;
5064                 goto uadmin;
5065 
5066         case '6':
5067                 halting_time = time(NULL);
5068                 fork_rc_script(rl, stop, B_TRUE);
5069                 halting = AD_BOOT;
5070                 goto uadmin;
5071 
5072 uadmin:
5073                 uu_warn("The system is coming down.  Please wait.\n");
5074                 ms = "none";
5075 
5076                 /*
5077                  * We can't wait until all services are offline since this
5078                  * thread is responsible for taking them offline.  Instead we
5079                  * set halting to the second argument for uadmin() and call
5080                  * do_uadmin() from dgraph_set_instance_state() when
5081                  * appropriate.
5082                  */
5083                 break;
5084 
5085         case '1':
5086                 if (current_runlevel != 'S') {
5087                         uu_warn("Changing to state 1.\n");