Print this page
6805730 some simple changes would make 'init 5' much faster
6809492 startd shouldn't let hung subprocesses impede shutdown

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/svc/startd/graph.c
          +++ new/usr/src/cmd/svc/startd/graph.c
↓ open down ↓ 170 lines elided ↑ open up ↑
 171  171  /*
 172  172   * These variables indicate what should be done when we reach the milestone
 173  173   * target milestone, i.e., when non_subgraph_svcs == 0.  They are acted upon in
 174  174   * dgraph_set_instance_state().
 175  175   */
 176  176  static int halting = -1;
 177  177  static boolean_t go_single_user_mode = B_FALSE;
 178  178  static boolean_t go_to_level1 = B_FALSE;
 179  179  
 180  180  /*
      181 + * Tracks when we started halting.
      182 + */
      183 +static time_t halting_time = 0;
      184 +
      185 +/*
 181  186   * This tracks the legacy runlevel to ensure we signal init and manage
 182  187   * utmpx entries correctly.
 183  188   */
 184  189  static char current_runlevel = '\0';
 185  190  
 186  191  /* Number of single user threads currently running */
 187  192  static pthread_mutex_t single_user_thread_lock;
 188  193  static int single_user_thread_count = 0;
 189  194  
 190  195  /* Statistics for dependency cycle-checking */
↓ open down ↓ 3243 lines elided ↑ open up ↑
3434 3439          }
3435 3440  
3436 3441  out:
3437 3442          v->gv_flags |= GV_CONFIGURED;
3438 3443  
3439 3444          graph_enable_by_vertex(v, enabled, 0);
3440 3445  
3441 3446          return (0);
3442 3447  }
3443 3448  
     3449 +
3444 3450  static void
     3451 +kill_user_procs(void)
     3452 +{
     3453 +        (void) fputs("svc.startd: Killing user processes.\n", stdout);
     3454 +
     3455 +        /*
     3456 +         * Despite its name, killall's role is to get select user processes--
     3457 +         * basically those representing terminal-based logins-- to die.  Victims
     3458 +         * are located by killall in the utmp database.  Since these are most
     3459 +         * often shell based logins, and many shells mask SIGTERM (but are
     3460 +         * responsive to SIGHUP) we first HUP and then shortly thereafter
     3461 +         * kill -9.
     3462 +         */
     3463 +        (void) fork_with_timeout("/usr/sbin/killall HUP", 1, 5);
     3464 +        (void) fork_with_timeout("/usr/sbin/killall KILL", 1, 5);
     3465 +
     3466 +        /*
     3467 +         * Note the selection of user id's 0, 1 and 15, subsequently
     3468 +         * inverted by -v.  15 is reserved for dladmd.  Yes, this is a
     3469 +         * kludge-- a better policy is needed.
     3470 +         *
     3471 +         * Note that fork_with_timeout will only wait out the 1 second
     3472 +         * "grace time" if pkill actually returns 0.  So if there are
     3473 +         * no matches, this will run to completion much more quickly.
     3474 +         */
     3475 +        (void) fork_with_timeout("/usr/bin/pkill -TERM -v -u 0,1,15", 1, 5);
     3476 +        (void) fork_with_timeout("/usr/bin/pkill -KILL -v -u 0,1,15", 1, 5);
     3477 +}
     3478 +
     3479 +static void
3445 3480  do_uadmin(void)
3446 3481  {
3447      -        int fd, left;
     3482 +        int fd;
3448 3483          struct statvfs vfs;
     3484 +        time_t now;
     3485 +        struct tm nowtm;
     3486 +        char down_buf[256], time_buf[256];
3449 3487  
3450 3488          const char * const resetting = "/etc/svc/volatile/resetting";
3451 3489  
3452 3490          fd = creat(resetting, 0777);
3453 3491          if (fd >= 0)
3454 3492                  startd_close(fd);
3455 3493          else
3456 3494                  uu_warn("Could not create \"%s\"", resetting);
3457 3495  
3458 3496          /* Kill dhcpagent if we're not using nfs for root */
3459 3497          if ((statvfs("/", &vfs) == 0) &&
3460 3498              (strncmp(vfs.f_basetype, "nfs", sizeof ("nfs") - 1) != 0))
3461      -                (void) system("/usr/bin/pkill -x -u 0 dhcpagent");
     3499 +                fork_with_timeout("/usr/bin/pkill -x -u 0 dhcpagent", 0, 5);
3462 3500  
3463      -        (void) system("/usr/sbin/killall");
3464      -        left = 5;
3465      -        while (left > 0)
3466      -                left = sleep(left);
     3501 +        /*
     3502 +         * Call sync(2) now, before we kill off user processes.  This takes
     3503 +         * advantage of the several seconds of pause we have before the
     3504 +         * killalls are done.  Time we can make good use of to get pages
     3505 +         * moving out to disk.
     3506 +         *
     3507 +         * Inside non-global zones, we don't bother, and it's better not to
     3508 +         * anyway, since sync(2) can have system-wide impact.
     3509 +         */
     3510 +        if (getzoneid() == 0)
     3511 +                sync();
3467 3512  
3468      -        (void) system("/usr/sbin/killall 9");
3469      -        left = 10;
3470      -        while (left > 0)
3471      -                left = sleep(left);
     3513 +        kill_user_procs();
3472 3514  
3473      -        sync();
3474      -        sync();
3475      -        sync();
     3515 +        /*
     3516 +         * Note that this must come after the killing of user procs, since
     3517 +         * killall relies on utmpx, and this command affects the contents of
     3518 +         * said file.
     3519 +         */
     3520 +        if (access("/usr/lib/acct/closewtmp", X_OK) == 0)
     3521 +                fork_with_timeout("/usr/lib/acct/closewtmp", 0, 5);
3476 3522  
3477      -        (void) system("/sbin/umountall -l");
3478      -        (void) system("/sbin/umount /tmp >/dev/null 2>&1");
3479      -        (void) system("/sbin/umount /var/adm >/dev/null 2>&1");
3480      -        (void) system("/sbin/umount /var/run >/dev/null 2>&1");
3481      -        (void) system("/sbin/umount /var >/dev/null 2>&1");
3482      -        (void) system("/sbin/umount /usr >/dev/null 2>&1");
     3523 +        /*
     3524 +         * For patches which may be installed as the system is shutting
     3525 +         * down, we need to ensure, one more time, that the boot archive
     3526 +         * really is up to date.
     3527 +         */
     3528 +        if (getzoneid() == 0 && access("/usr/sbin/bootadm", X_OK) == 0)
     3529 +                fork_with_timeout("/usr/sbin/bootadm -ea update_all", 0, 3600);
3483 3530  
3484      -        uu_warn("The system is down.\n");
     3531 +        fork_with_timeout("/sbin/umountall -l", 0, 5);
     3532 +        fork_with_timeout("/sbin/umount /tmp /var/adm /var/run /var "
     3533 +            ">/dev/null 2>&1", 0, 5);
3485 3534  
     3535 +        /*
     3536 +         * Try to get to consistency for whatever UFS filesystems are left.
     3537 +         * This is pretty expensive, so we save it for the end in the hopes of
     3538 +         * minimizing what it must do.  The other option would be to start in
     3539 +         * parallel with the killall's, but lockfs tends to throw out much more
     3540 +         * than is needed, and so subsequent commands (like umountall) take a
     3541 +         * long time to get going again.
     3542 +         *
     3543 +         * Inside of zones, we don't bother, since we're not about to terminate
     3544 +         * the whole OS instance.
     3545 +         *
     3546 +         * On systems using only ZFS, this call to lockfs -fa is a no-op.
     3547 +         */
     3548 +        if (getzoneid() == 0) {
     3549 +                if (access("/usr/sbin/lockfs", X_OK) == 0)
     3550 +                        fork_with_timeout("/usr/sbin/lockfs -fa", 0, 30);
     3551 +
     3552 +                sync(); /* once more, with feeling */
     3553 +        }
     3554 +
     3555 +        fork_with_timeout("/sbin/umount /usr >/dev/null 2>&1", 0, 5);
     3556 +
     3557 +        /*
     3558 +         * Construct and emit the last words from userland:
     3559 +         * "<timestamp> The system is down.  Shutdown took <N> seconds."
     3560 +         *
     3561 +         * Normally we'd use syslog, but with /var and other things
     3562 +         * potentially gone, try to minimize the external dependencies.
     3563 +         */
     3564 +        now = time(NULL);
     3565 +        (void) localtime_r(&now, &nowtm);
     3566 +
     3567 +        if (strftime(down_buf, sizeof (down_buf),
     3568 +            "%b %e %T The system is down.", &nowtm) == 0) {
     3569 +                (void) strlcpy(down_buf, "The system is down.",
     3570 +                    sizeof (down_buf));
     3571 +        }
     3572 +
     3573 +        if (halting_time != 0 && halting_time <= now) {
     3574 +                (void) snprintf(time_buf, sizeof (time_buf),
     3575 +                    "  Shutdown took %lu seconds.", now - halting_time);
     3576 +        } else {
     3577 +                time_buf[0] = '\0';
     3578 +        }
     3579 +        (void) printf("%s%s\n", down_buf, time_buf);
     3580 +
3486 3581          (void) uadmin(A_SHUTDOWN, halting, NULL);
3487 3582          uu_warn("uadmin() failed");
3488 3583  
3489 3584          if (remove(resetting) != 0 && errno != ENOENT)
3490 3585                  uu_warn("Could not remove \"%s\"", resetting);
3491 3586  }
3492 3587  
3493 3588  /*
3494 3589   * If any of the up_svcs[] are online or satisfiable, return true.  If they are
3495 3590   * all missing, disabled, in maintenance, or unsatisfiable, return false.
↓ open down ↓ 178 lines elided ↑ open up ↑
3674 3769          scf_instance_t *inst;
3675 3770          scf_property_t *prop;
3676 3771          scf_value_t *val;
3677 3772          const char *msg;
3678 3773          char *buf;
3679 3774          int r;
3680 3775  
3681 3776          MUTEX_LOCK(&single_user_thread_lock);
3682 3777          single_user_thread_count++;
3683 3778  
3684      -        if (!booting_to_single_user) {
3685      -                /*
3686      -                 * From rcS.sh: Look for ttymon, in.telnetd, in.rlogind and
3687      -                 * processes in their process groups so they can be terminated.
3688      -                 */
3689      -                (void) fputs("svc.startd: Killing user processes: ", stdout);
3690      -                (void) system("/usr/sbin/killall");
3691      -                (void) system("/usr/sbin/killall 9");
3692      -                (void) system("/usr/bin/pkill -TERM -v -u 0,1");
     3779 +        if (!booting_to_single_user)
     3780 +                kill_user_procs();
3693 3781  
3694      -                left = 5;
3695      -                while (left > 0)
3696      -                        left = sleep(left);
3697      -
3698      -                (void) system("/usr/bin/pkill -KILL -v -u 0,1");
3699      -                (void) puts("done.");
3700      -        }
3701      -
3702 3782          if (go_single_user_mode || booting_to_single_user) {
3703 3783                  msg = "SINGLE USER MODE\n";
3704 3784          } else {
3705 3785                  assert(go_to_level1);
3706 3786  
3707 3787                  fork_rc_script('1', "start", B_TRUE);
3708 3788  
3709 3789                  uu_warn("The system is ready for administration.\n");
3710 3790  
3711 3791                  msg = "";
↓ open down ↓ 1253 lines elided ↑ open up ↑
4965 5045          switch (rl) {
4966 5046          case 'S':
4967 5047                  uu_warn("The system is coming down for administration.  "
4968 5048                      "Please wait.\n");
4969 5049                  fork_rc_script(rl, stop, B_FALSE);
4970 5050                  ms = single_user_fmri;
4971 5051                  go_single_user_mode = B_TRUE;
4972 5052                  break;
4973 5053  
4974 5054          case '0':
     5055 +                halting_time = time(NULL);
4975 5056                  fork_rc_script(rl, stop, B_TRUE);
4976 5057                  halting = AD_HALT;
4977 5058                  goto uadmin;
4978 5059  
4979 5060          case '5':
     5061 +                halting_time = time(NULL);
4980 5062                  fork_rc_script(rl, stop, B_TRUE);
4981 5063                  halting = AD_POWEROFF;
4982 5064                  goto uadmin;
4983 5065  
4984 5066          case '6':
     5067 +                halting_time = time(NULL);
4985 5068                  fork_rc_script(rl, stop, B_TRUE);
4986 5069                  halting = AD_BOOT;
4987 5070                  goto uadmin;
4988 5071  
4989 5072  uadmin:
4990 5073                  uu_warn("The system is coming down.  Please wait.\n");
4991 5074                  ms = "none";
4992 5075  
4993 5076                  /*
4994 5077                   * We can't wait until all services are offline since this
↓ open down ↓ 1562 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX