1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <mdb/mdb_ctf.h>
  27 #include <sys/zfs_context.h>
  28 #include <sys/mdb_modapi.h>
  29 #include <sys/dbuf.h>
  30 #include <sys/dmu_objset.h>
  31 #include <sys/dsl_dir.h>
  32 #include <sys/dsl_pool.h>
  33 #include <sys/metaslab_impl.h>
  34 #include <sys/space_map.h>
  35 #include <sys/list.h>
  36 #include <sys/spa_impl.h>
  37 #include <sys/vdev_impl.h>
  38 #include <sys/zio_compress.h>
  39 
  40 #ifndef _KERNEL
  41 #include "../genunix/list.h"
  42 #endif
  43 
  44 #ifdef _KERNEL
  45 #define ZFS_OBJ_NAME    "zfs"
  46 #else
  47 #define ZFS_OBJ_NAME    "libzpool.so.1"
  48 #endif
  49 
  50 static char *
  51 local_strdup(const char *s)
  52 {
  53         char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP);
  54 
  55         (void) strcpy(s1, s);
  56         return (s1);
  57 }
  58 
  59 static int
  60 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
  61     const char *member, int len, void *buf)
  62 {
  63         mdb_ctf_id_t id;
  64         ulong_t off;
  65         char name[64];
  66 
  67         if (idp == NULL) {
  68                 if (mdb_ctf_lookup_by_name(type, &id) == -1) {
  69                         mdb_warn("couldn't find type %s", type);
  70                         return (DCMD_ERR);
  71                 }
  72                 idp = &id;
  73         } else {
  74                 type = name;
  75                 mdb_ctf_type_name(*idp, name, sizeof (name));
  76         }
  77 
  78         if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
  79                 mdb_warn("couldn't find member %s of type %s\n", member, type);
  80                 return (DCMD_ERR);
  81         }
  82         if (off % 8 != 0) {
  83                 mdb_warn("member %s of type %s is unsupported bitfield",
  84                     member, type);
  85                 return (DCMD_ERR);
  86         }
  87         off /= 8;
  88 
  89         if (mdb_vread(buf, len, addr + off) == -1) {
  90                 mdb_warn("failed to read %s from %s at %p",
  91                     member, type, addr + off);
  92                 return (DCMD_ERR);
  93         }
  94         /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
  95 
  96         return (0);
  97 }
  98 
  99 #define GETMEMB(addr, type, member, dest) \
 100         getmember(addr, #type, NULL, #member, sizeof (dest), &(dest))
 101 
 102 #define GETMEMBID(addr, ctfid, member, dest) \
 103         getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
 104 
 105 static int
 106 getrefcount(uintptr_t addr, mdb_ctf_id_t *id,
 107     const char *member, uint64_t *rc)
 108 {
 109         static int gotid;
 110         static mdb_ctf_id_t rc_id;
 111         ulong_t off;
 112 
 113         if (!gotid) {
 114                 if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) {
 115                         mdb_warn("couldn't find struct refcount");
 116                         return (DCMD_ERR);
 117                 }
 118                 gotid = TRUE;
 119         }
 120 
 121         if (mdb_ctf_offsetof(*id, member, &off) == -1) {
 122                 char name[64];
 123                 mdb_ctf_type_name(*id, name, sizeof (name));
 124                 mdb_warn("couldn't find member %s of type %s\n", member, name);
 125                 return (DCMD_ERR);
 126         }
 127         off /= 8;
 128 
 129         return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
 130 }
 131 
 132 static int
 133 read_symbol(char *sym_name, void **bufp)
 134 {
 135         GElf_Sym sym;
 136 
 137         if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) {
 138                 mdb_warn("can't find symbol %s", sym_name);
 139                 return (DCMD_ERR);
 140         }
 141 
 142         *bufp = mdb_alloc(sym.st_size, UM_SLEEP);
 143 
 144         if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) {
 145                 mdb_warn("can't read data for symbol %s", sym_name);
 146                 mdb_free(*bufp, sym.st_size);
 147                 return (DCMD_ERR);
 148         }
 149 
 150         return (DCMD_OK);
 151 }
 152 
 153 static int verbose;
 154 
 155 static int
 156 freelist_walk_init(mdb_walk_state_t *wsp)
 157 {
 158         if (wsp->walk_addr == NULL) {
 159                 mdb_warn("must supply starting address\n");
 160                 return (WALK_ERR);
 161         }
 162 
 163         wsp->walk_data = 0;  /* Index into the freelist */
 164         return (WALK_NEXT);
 165 }
 166 
 167 static int
 168 freelist_walk_step(mdb_walk_state_t *wsp)
 169 {
 170         uint64_t entry;
 171         uintptr_t number = (uintptr_t)wsp->walk_data;
 172         char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 173                             "INVALID", "INVALID", "INVALID", "INVALID" };
 174         int mapshift = SPA_MINBLOCKSHIFT;
 175 
 176         if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
 177                 mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
 178                 return (WALK_DONE);
 179         }
 180         wsp->walk_addr += sizeof (entry);
 181         wsp->walk_data = (void *)(number + 1);
 182 
 183         if (SM_DEBUG_DECODE(entry)) {
 184                 mdb_printf("DEBUG: %3u  %10s: txg=%llu  pass=%llu\n",
 185                     number,
 186                     ddata[SM_DEBUG_ACTION_DECODE(entry)],
 187                     SM_DEBUG_TXG_DECODE(entry),
 188                     SM_DEBUG_SYNCPASS_DECODE(entry));
 189         } else {
 190                 mdb_printf("Entry: %3u  offsets=%08llx-%08llx  type=%c  "
 191                     "size=%06llx", number,
 192                     SM_OFFSET_DECODE(entry) << mapshift,
 193                     (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
 194                     mapshift,
 195                     SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 196                     SM_RUN_DECODE(entry) << mapshift);
 197                 if (verbose)
 198                         mdb_printf("      (raw=%012llx)\n", entry);
 199                 mdb_printf("\n");
 200         }
 201         return (WALK_NEXT);
 202 }
 203 
 204 
 205 static int
 206 dataset_name(uintptr_t addr, char *buf)
 207 {
 208         static int gotid;
 209         static mdb_ctf_id_t dd_id;
 210         uintptr_t dd_parent;
 211         char dd_myname[MAXNAMELEN];
 212 
 213         if (!gotid) {
 214                 if (mdb_ctf_lookup_by_name("struct dsl_dir",
 215                     &dd_id) == -1) {
 216                         mdb_warn("couldn't find struct dsl_dir");
 217                         return (DCMD_ERR);
 218                 }
 219                 gotid = TRUE;
 220         }
 221         if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
 222             GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
 223                 return (DCMD_ERR);
 224         }
 225 
 226         if (dd_parent) {
 227                 if (dataset_name(dd_parent, buf))
 228                         return (DCMD_ERR);
 229                 strcat(buf, "/");
 230         }
 231 
 232         if (dd_myname[0])
 233                 strcat(buf, dd_myname);
 234         else
 235                 strcat(buf, "???");
 236 
 237         return (0);
 238 }
 239 
 240 static int
 241 objset_name(uintptr_t addr, char *buf)
 242 {
 243         static int gotid;
 244         static mdb_ctf_id_t osi_id, ds_id;
 245         uintptr_t os_dsl_dataset;
 246         char ds_snapname[MAXNAMELEN];
 247         uintptr_t ds_dir;
 248 
 249         buf[0] = '\0';
 250 
 251         if (!gotid) {
 252                 if (mdb_ctf_lookup_by_name("struct objset_impl",
 253                     &osi_id) == -1) {
 254                         mdb_warn("couldn't find struct objset_impl");
 255                         return (DCMD_ERR);
 256                 }
 257                 if (mdb_ctf_lookup_by_name("struct dsl_dataset",
 258                     &ds_id) == -1) {
 259                         mdb_warn("couldn't find struct dsl_dataset");
 260                         return (DCMD_ERR);
 261                 }
 262 
 263                 gotid = TRUE;
 264         }
 265 
 266         if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
 267                 return (DCMD_ERR);
 268 
 269         if (os_dsl_dataset == 0) {
 270                 strcat(buf, "mos");
 271                 return (0);
 272         }
 273 
 274         if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
 275             GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
 276                 return (DCMD_ERR);
 277         }
 278 
 279         if (ds_dir && dataset_name(ds_dir, buf))
 280                 return (DCMD_ERR);
 281 
 282         if (ds_snapname[0]) {
 283                 strcat(buf, "@");
 284                 strcat(buf, ds_snapname);
 285         }
 286         return (0);
 287 }
 288 
 289 static void
 290 enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val,
 291     const char *prefix)
 292 {
 293         const char *cp;
 294         size_t len = strlen(prefix);
 295 
 296         if ((cp = mdb_ctf_enum_name(id, val)) != NULL) {
 297                 if (strncmp(cp, prefix, len) == 0)
 298                         cp += len;
 299                 (void) strncpy(out, cp, size);
 300         } else {
 301                 mdb_snprintf(out, size, "? (%d)", val);
 302         }
 303 }
 304 
 305 /* ARGSUSED */
 306 static int
 307 zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 308 {
 309         mdb_ctf_id_t pipe_enum;
 310         int i;
 311         char stage[1024];
 312 
 313         if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) {
 314                 mdb_warn("Could not find enum zio_stage");
 315                 return (DCMD_ERR);
 316         }
 317 
 318         for (i = 0; i < 32; i++) {
 319                 if (addr & (1U << i)) {
 320                         enum_lookup(stage, sizeof (stage), pipe_enum, i,
 321                             "ZIO_STAGE_");
 322                         mdb_printf("    %s\n", stage);
 323                 }
 324         }
 325 
 326         return (DCMD_OK);
 327 }
 328 
 329 /* ARGSUSED */
 330 static int
 331 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 332 {
 333         /*
 334          * This table can be approximately generated by running:
 335          * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
 336          */
 337         static const char *params[] = {
 338                 "arc_reduce_dnlc_percent",
 339                 "zfs_arc_max",
 340                 "zfs_arc_min",
 341                 "arc_shrink_shift",
 342                 "zfs_mdcomp_disable",
 343                 "zfs_prefetch_disable",
 344                 "zfetch_max_streams",
 345                 "zfetch_min_sec_reap",
 346                 "zfetch_block_cap",
 347                 "zfetch_array_rd_sz",
 348                 "zfs_default_bs",
 349                 "zfs_default_ibs",
 350                 "metaslab_aliquot",
 351                 "reference_tracking_enable",
 352                 "reference_history",
 353                 "zio_taskq_threads",
 354                 "spa_max_replication_override",
 355                 "spa_mode",
 356                 "zfs_flags",
 357                 "zfs_txg_synctime",
 358                 "zfs_txg_timeout",
 359                 "zfs_write_limit_min",
 360                 "zfs_write_limit_max",
 361                 "zfs_write_limit_shift",
 362                 "zfs_write_limit_override",
 363                 "zfs_no_write_throttle",
 364                 "zfs_vdev_cache_max",
 365                 "zfs_vdev_cache_size",
 366                 "zfs_vdev_cache_bshift",
 367                 "vdev_mirror_shift",
 368                 "zfs_vdev_max_pending",
 369                 "zfs_vdev_min_pending",
 370                 "zfs_scrub_limit",
 371                 "zfs_vdev_time_shift",
 372                 "zfs_vdev_ramp_rate",
 373                 "zfs_vdev_aggregation_limit",
 374                 "fzap_default_block_shift",
 375                 "zfs_immediate_write_sz",
 376                 "zfs_read_chunk_size",
 377                 "zil_disable",
 378                 "zfs_nocacheflush",
 379                 "metaslab_gang_bang",
 380                 "zio_injection_enabled",
 381                 "zvol_immediate_write_sz",
 382         };
 383         int i;
 384 
 385         for (i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
 386                 int sz;
 387                 uint64_t val64;
 388                 uint32_t *val32p = (uint32_t *)&val64;
 389 
 390                 sz = mdb_readvar(&val64, params[i]);
 391                 if (sz == 4) {
 392                         mdb_printf("%s = 0x%x\n", params[i], *val32p);
 393                 } else if (sz == 8) {
 394                         mdb_printf("%s = 0x%llx\n", params[i], val64);
 395                 } else {
 396                         mdb_warn("variable %s not found", params[i]);
 397                 }
 398         }
 399 
 400         return (DCMD_OK);
 401 }
 402 
 403 /* ARGSUSED */
 404 static int
 405 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 406 {
 407         blkptr_t bp;
 408         dmu_object_type_info_t *doti;
 409         zio_compress_info_t *zct;
 410         zio_checksum_info_t *zci;
 411         zio_crypt_info_t *zcc;
 412         int i;
 413         char buf[MAXPATHLEN];
 414 
 415         if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) {
 416                 mdb_warn("failed to read blkptr_t");
 417                 return (DCMD_ERR);
 418         }
 419 
 420         if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK)
 421                 return (DCMD_ERR);
 422         for (i = 0; i < DMU_OT_NUMTYPES; i++) {
 423                 mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name);
 424                 doti[i].ot_name = local_strdup(buf);
 425         }
 426 
 427         if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK)
 428                 return (DCMD_ERR);
 429         for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) {
 430                 mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name);
 431                 zci[i].ci_name = local_strdup(buf);
 432         }
 433 
 434         if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK)
 435                 return (DCMD_ERR);
 436         for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
 437                 mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name);
 438                 zct[i].ci_name = local_strdup(buf);
 439         }
 440 
 441         if (read_symbol("zio_crypt_table", (void **)&zcc) != DCMD_OK)
 442                 return (DCMD_ERR);
 443         for (i = 0; i < ZIO_CRYPT_FUNCTIONS; i++) {
 444                 mdb_readstr(buf, sizeof (buf), (uintptr_t)zcc[i].ci_name);
 445                 zcc[i].ci_name = local_strdup(buf);
 446         }
 447 
 448         /*
 449          * Super-ick warning:  This code is also duplicated in
 450          * cmd/zdb.c .   Yeah, I hate code replication, too.
 451          */
 452         for (i = 0; i < BP_GET_NDVAS(&bp); i++) {
 453                 dva_t *dva = &bp.blk_dva[i];
 454 
 455                 mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i,
 456                     DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
 457                 mdb_printf("DVA[%d]:       GANG: %-5s  GRID:  %04x\t"
 458                     "ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
 459                     DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
 460                 mdb_printf("DVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", i,
 461                     DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp),
 462                     BP_SHOULD_BYTESWAP(&bp) ? "e" : "",
 463                     !DVA_GET_GANG(dva) && BP_GET_LEVEL(&bp) != 0 ? "i" : "",
 464                     DVA_GET_GANG(dva) ? "g" : "",
 465                     BP_GET_COMPRESS(&bp) != 0 ? "d" : "");
 466         }
 467         mdb_printf("LSIZE:  %-16llx\t\tPSIZE: %llx\n",
 468             BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp));
 469         mdb_printf("ENDIAN: %6s\t\t\t\t\tTYPE:  %s\n",
 470             BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
 471             doti[BP_GET_TYPE(&bp)].ot_name);
 472         mdb_printf("BIRTH:  %-16llx   LEVEL: %-2d\tFILL:  %llx\n",
 473             bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill);
 474         mdb_printf("CKFUNC: %-16s\tCOMP: %s\tCRYPT: %s\n",
 475             zci[BP_GET_CHECKSUM(&bp)].ci_name,
 476             zct[BP_GET_COMPRESS(&bp)].ci_name,
 477             zcc[BP_GET_CRYPT(&bp)].ci_name);
 478         mdb_printf("CKSUM:  %llx:%llx:%llx:%llx\n",
 479             bp.blk_cksum.zc_word[0],
 480             bp.blk_cksum.zc_word[1],
 481             bp.blk_cksum.zc_word[2],
 482             bp.blk_cksum.zc_word[3]);
 483 
 484         return (DCMD_OK);
 485 }
 486 
 487 /* ARGSUSED */
 488 static int
 489 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 490 {
 491         mdb_ctf_id_t id;
 492         dmu_buf_t db;
 493         uintptr_t objset;
 494         uint8_t level;
 495         uint64_t blkid;
 496         uint64_t holds;
 497         char objectname[32];
 498         char blkidname[32];
 499         char path[MAXNAMELEN];
 500 
 501         if (DCMD_HDRSPEC(flags)) {
 502                 mdb_printf("        addr object lvl blkid holds os\n");
 503         }
 504 
 505         if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &id) == -1) {
 506                 mdb_warn("couldn't find struct dmu_buf_impl_t");
 507                 return (DCMD_ERR);
 508         }
 509 
 510         if (GETMEMBID(addr, &id, db_objset, objset) ||
 511             GETMEMBID(addr, &id, db, db) ||
 512             GETMEMBID(addr, &id, db_level, level) ||
 513             GETMEMBID(addr, &id, db_blkid, blkid)) {
 514                 return (WALK_ERR);
 515         }
 516 
 517         if (getrefcount(addr, &id, "db_holds", &holds)) {
 518                 return (WALK_ERR);
 519         }
 520 
 521         if (db.db_object == DMU_META_DNODE_OBJECT)
 522                 (void) strcpy(objectname, "mdn");
 523         else
 524                 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
 525                     (u_longlong_t)db.db_object);
 526 
 527         if (blkid == DB_BONUS_BLKID)
 528                 (void) strcpy(blkidname, "bonus");
 529         else
 530                 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
 531                     (u_longlong_t)blkid);
 532 
 533         if (objset_name(objset, path)) {
 534                 return (WALK_ERR);
 535         }
 536 
 537         mdb_printf("%p %8s %1u %9s %2llu %s\n",
 538             addr, objectname, level, blkidname, holds, path);
 539 
 540         return (DCMD_OK);
 541 }
 542 
 543 /* ARGSUSED */
 544 static int
 545 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 546 {
 547 #define HISTOSZ 32
 548         uintptr_t dbp;
 549         dmu_buf_impl_t db;
 550         dbuf_hash_table_t ht;
 551         uint64_t bucket, ndbufs;
 552         uint64_t histo[HISTOSZ];
 553         uint64_t histo2[HISTOSZ];
 554         int i, maxidx;
 555 
 556         if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
 557                 mdb_warn("failed to read 'dbuf_hash_table'");
 558                 return (DCMD_ERR);
 559         }
 560 
 561         for (i = 0; i < HISTOSZ; i++) {
 562                 histo[i] = 0;
 563                 histo2[i] = 0;
 564         }
 565 
 566         ndbufs = 0;
 567         for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
 568                 int len;
 569 
 570                 if (mdb_vread(&dbp, sizeof (void *),
 571                     (uintptr_t)(ht.hash_table+bucket)) == -1) {
 572                         mdb_warn("failed to read hash bucket %u at %p",
 573                             bucket, ht.hash_table+bucket);
 574                         return (DCMD_ERR);
 575                 }
 576 
 577                 len = 0;
 578                 while (dbp != 0) {
 579                         if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
 580                             dbp) == -1) {
 581                                 mdb_warn("failed to read dbuf at %p", dbp);
 582                                 return (DCMD_ERR);
 583                         }
 584                         dbp = (uintptr_t)db.db_hash_next;
 585                         for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
 586                                 histo2[i]++;
 587                         len++;
 588                         ndbufs++;
 589                 }
 590 
 591                 if (len >= HISTOSZ)
 592                         len = HISTOSZ-1;
 593                 histo[len]++;
 594         }
 595 
 596         mdb_printf("hash table has %llu buckets, %llu dbufs "
 597             "(avg %llu buckets/dbuf)\n",
 598             ht.hash_table_mask+1, ndbufs,
 599             (ht.hash_table_mask+1)/ndbufs);
 600 
 601         mdb_printf("\n");
 602         maxidx = 0;
 603         for (i = 0; i < HISTOSZ; i++)
 604                 if (histo[i] > 0)
 605                         maxidx = i;
 606         mdb_printf("hash chain length   number of buckets\n");
 607         for (i = 0; i <= maxidx; i++)
 608                 mdb_printf("%u                  %llu\n", i, histo[i]);
 609 
 610         mdb_printf("\n");
 611         maxidx = 0;
 612         for (i = 0; i < HISTOSZ; i++)
 613                 if (histo2[i] > 0)
 614                         maxidx = i;
 615         mdb_printf("hash chain depth    number of dbufs\n");
 616         for (i = 0; i <= maxidx; i++)
 617                 mdb_printf("%u or more          %llu    %llu%%\n",
 618                     i, histo2[i], histo2[i]*100/ndbufs);
 619 
 620 
 621         return (DCMD_OK);
 622 }
 623 
 624 typedef struct dbufs_data {
 625         mdb_ctf_id_t id;
 626         uint64_t objset;
 627         uint64_t object;
 628         uint64_t level;
 629         uint64_t blkid;
 630         char *osname;
 631 } dbufs_data_t;
 632 
 633 #define DBUFS_UNSET     (0xbaddcafedeadbeefULL)
 634 
 635 /* ARGSUSED */
 636 static int
 637 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
 638 {
 639         dbufs_data_t *data = arg;
 640         uintptr_t objset;
 641         dmu_buf_t db;
 642         uint8_t level;
 643         uint64_t blkid;
 644         char osname[MAXNAMELEN];
 645 
 646         if (GETMEMBID(addr, &data->id, db_objset, objset) ||
 647             GETMEMBID(addr, &data->id, db, db) ||
 648             GETMEMBID(addr, &data->id, db_level, level) ||
 649             GETMEMBID(addr, &data->id, db_blkid, blkid)) {
 650                 return (WALK_ERR);
 651         }
 652 
 653         if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
 654             (data->osname == NULL || (objset_name(objset, osname) == 0 &&
 655             strcmp(data->osname, osname) == 0)) &&
 656             (data->object == DBUFS_UNSET || data->object == db.db_object) &&
 657             (data->level == DBUFS_UNSET || data->level == level) &&
 658             (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
 659                 mdb_printf("%#lr\n", addr);
 660         }
 661         return (WALK_NEXT);
 662 }
 663 
 664 /* ARGSUSED */
 665 static int
 666 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 667 {
 668         dbufs_data_t data;
 669         char *object = NULL;
 670         char *blkid = NULL;
 671 
 672         data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
 673         data.osname = NULL;
 674 
 675         if (mdb_getopts(argc, argv,
 676             'O', MDB_OPT_UINT64, &data.objset,
 677             'n', MDB_OPT_STR, &data.osname,
 678             'o', MDB_OPT_STR, &object,
 679             'l', MDB_OPT_UINT64, &data.level,
 680             'b', MDB_OPT_STR, &blkid) != argc) {
 681                 return (DCMD_USAGE);
 682         }
 683 
 684         if (object) {
 685                 if (strcmp(object, "mdn") == 0) {
 686                         data.object = DMU_META_DNODE_OBJECT;
 687                 } else {
 688                         data.object = mdb_strtoull(object);
 689                 }
 690         }
 691 
 692         if (blkid) {
 693                 if (strcmp(blkid, "bonus") == 0) {
 694                         data.blkid = DB_BONUS_BLKID;
 695                 } else {
 696                         data.blkid = mdb_strtoull(blkid);
 697                 }
 698         }
 699 
 700         if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &data.id) == -1) {
 701                 mdb_warn("couldn't find struct dmu_buf_impl_t");
 702                 return (DCMD_ERR);
 703         }
 704 
 705         if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
 706                 mdb_warn("can't walk dbufs");
 707                 return (DCMD_ERR);
 708         }
 709 
 710         return (DCMD_OK);
 711 }
 712 
 713 typedef struct abuf_find_data {
 714         dva_t dva;
 715         mdb_ctf_id_t id;
 716 } abuf_find_data_t;
 717 
 718 /* ARGSUSED */
 719 static int
 720 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
 721 {
 722         abuf_find_data_t *data = arg;
 723         dva_t dva;
 724 
 725         if (GETMEMBID(addr, &data->id, b_dva, dva)) {
 726                 return (WALK_ERR);
 727         }
 728 
 729         if (dva.dva_word[0] == data->dva.dva_word[0] &&
 730             dva.dva_word[1] == data->dva.dva_word[1]) {
 731                 mdb_printf("%#lr\n", addr);
 732         }
 733         return (WALK_NEXT);
 734 }
 735 
 736 /* ARGSUSED */
 737 static int
 738 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 739 {
 740         abuf_find_data_t data;
 741         GElf_Sym sym;
 742         int i;
 743         const char *syms[] = {
 744                 "ARC_mru",
 745                 "ARC_mru_ghost",
 746                 "ARC_mfu",
 747                 "ARC_mfu_ghost",
 748         };
 749 
 750         if (argc != 2)
 751                 return (DCMD_USAGE);
 752 
 753         for (i = 0; i < 2; i ++) {
 754                 switch (argv[i].a_type) {
 755                 case MDB_TYPE_STRING:
 756                         data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
 757                         break;
 758                 case MDB_TYPE_IMMEDIATE:
 759                         data.dva.dva_word[i] = argv[i].a_un.a_val;
 760                         break;
 761                 default:
 762                         return (DCMD_USAGE);
 763                 }
 764         }
 765 
 766         if (mdb_ctf_lookup_by_name("struct arc_buf_hdr", &data.id) == -1) {
 767                 mdb_warn("couldn't find struct arc_buf_hdr");
 768                 return (DCMD_ERR);
 769         }
 770 
 771         for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
 772                 if (mdb_lookup_by_name(syms[i], &sym)) {
 773                         mdb_warn("can't find symbol %s", syms[i]);
 774                         return (DCMD_ERR);
 775                 }
 776 
 777                 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
 778                         mdb_warn("can't walk %s", syms[i]);
 779                         return (DCMD_ERR);
 780                 }
 781         }
 782 
 783         return (DCMD_OK);
 784 }
 785 
 786 /*ARGSUSED*/
 787 static int
 788 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 789 {
 790         kstat_named_t *stats;
 791         GElf_Sym sym;
 792         int nstats, i;
 793         uint_t opt_a = FALSE;
 794         uint_t opt_b = FALSE;
 795         uint_t shift = 0;
 796         const char *suffix;
 797 
 798         static const char *bytestats[] = {
 799                 "p", "c", "c_min", "c_max", "size", NULL
 800         };
 801 
 802         static const char *extras[] = {
 803                 "arc_no_grow", "arc_tempreserve",
 804                 "arc_meta_used", "arc_meta_limit", "arc_meta_max",
 805                 NULL
 806         };
 807 
 808         if (mdb_lookup_by_name("arc_stats", &sym) == -1) {
 809                 mdb_warn("failed to find 'arc_stats'");
 810                 return (DCMD_ERR);
 811         }
 812 
 813         stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
 814 
 815         if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
 816                 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
 817                 return (DCMD_ERR);
 818         }
 819 
 820         nstats = sym.st_size / sizeof (kstat_named_t);
 821 
 822         /* NB: -a / opt_a are ignored for backwards compatability */
 823         if (mdb_getopts(argc, argv,
 824             'a', MDB_OPT_SETBITS, TRUE, &opt_a,
 825             'b', MDB_OPT_SETBITS, TRUE, &opt_b,
 826             'k', MDB_OPT_SETBITS, 10, &shift,
 827             'm', MDB_OPT_SETBITS, 20, &shift,
 828             'g', MDB_OPT_SETBITS, 30, &shift,
 829             NULL) != argc)
 830                 return (DCMD_USAGE);
 831 
 832         if (!opt_b && !shift)
 833                 shift = 20;
 834 
 835         switch (shift) {
 836         case 0:
 837                 suffix = "B";
 838                 break;
 839         case 10:
 840                 suffix = "KB";
 841                 break;
 842         case 20:
 843                 suffix = "MB";
 844                 break;
 845         case 30:
 846                 suffix = "GB";
 847                 break;
 848         default:
 849                 suffix = "XX";
 850         }
 851 
 852         for (i = 0; i < nstats; i++) {
 853                 int j;
 854                 boolean_t bytes = B_FALSE;
 855 
 856                 for (j = 0; bytestats[j]; j++) {
 857                         if (strcmp(stats[i].name, bytestats[j]) == 0) {
 858                                 bytes = B_TRUE;
 859                                 break;
 860                         }
 861                 }
 862 
 863                 if (bytes) {
 864                         mdb_printf("%-25s = %9llu %s\n", stats[i].name,
 865                             stats[i].value.ui64 >> shift, suffix);
 866                 } else {
 867                         mdb_printf("%-25s = %9llu\n", stats[i].name,
 868                             stats[i].value.ui64);
 869                 }
 870         }
 871 
 872         for (i = 0; extras[i]; i++) {
 873                 uint64_t buf;
 874 
 875                 if (mdb_lookup_by_name(extras[i], &sym) == -1) {
 876                         mdb_warn("failed to find '%s'", extras[i]);
 877                         return (DCMD_ERR);
 878                 }
 879 
 880                 if (sym.st_size != sizeof (uint64_t) &&
 881                     sym.st_size != sizeof (uint32_t)) {
 882                         mdb_warn("expected scalar for variable '%s'\n",
 883                             extras[i]);
 884                         return (DCMD_ERR);
 885                 }
 886 
 887                 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
 888                         mdb_warn("couldn't read '%s'", extras[i]);
 889                         return (DCMD_ERR);
 890                 }
 891 
 892                 mdb_printf("%-25s = ", extras[i]);
 893 
 894                 /* NB: all the 64-bit extras happen to be byte counts */
 895                 if (sym.st_size == sizeof (uint64_t))
 896                         mdb_printf("%9llu %s\n", buf >> shift, suffix);
 897 
 898                 if (sym.st_size == sizeof (uint32_t))
 899                         mdb_printf("%9d\n", *((uint32_t *)&buf));
 900         }
 901         return (DCMD_OK);
 902 }
 903 
 904 /*
 905  * ::spa
 906  *
 907  *      -c      Print configuration information as well
 908  *      -v      Print vdev state
 909  *      -e      Print vdev error stats
 910  *
 911  * Print a summarized spa_t.  When given no arguments, prints out a table of all
 912  * active pools on the system.
 913  */
 914 /* ARGSUSED */
 915 static int
 916 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 917 {
 918         spa_t spa;
 919         const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
 920                 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
 921         const char *state;
 922         int config = FALSE;
 923         int vdevs = FALSE;
 924         int errors = FALSE;
 925 
 926         if (mdb_getopts(argc, argv,
 927             'c', MDB_OPT_SETBITS, TRUE, &config,
 928             'v', MDB_OPT_SETBITS, TRUE, &vdevs,
 929             'e', MDB_OPT_SETBITS, TRUE, &errors,
 930             NULL) != argc)
 931                 return (DCMD_USAGE);
 932 
 933         if (!(flags & DCMD_ADDRSPEC)) {
 934                 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
 935                         mdb_warn("can't walk spa");
 936                         return (DCMD_ERR);
 937                 }
 938 
 939                 return (DCMD_OK);
 940         }
 941 
 942         if (flags & DCMD_PIPE_OUT) {
 943                 mdb_printf("%#lr\n", addr);
 944                 return (DCMD_OK);
 945         }
 946 
 947         if (DCMD_HDRSPEC(flags))
 948                 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
 949                     sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
 950 
 951         if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
 952                 mdb_warn("failed to read spa_t at %p", addr);
 953                 return (DCMD_ERR);
 954         }
 955 
 956         if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
 957                 state = "UNKNOWN";
 958         else
 959                 state = statetab[spa.spa_state];
 960 
 961         mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
 962 
 963         if (config) {
 964                 mdb_printf("\n");
 965                 mdb_inc_indent(4);
 966                 if (mdb_call_dcmd("spa_config", addr, flags, 0,
 967                     NULL) != DCMD_OK)
 968                         return (DCMD_ERR);
 969                 mdb_dec_indent(4);
 970         }
 971 
 972         if (vdevs || errors) {
 973                 mdb_arg_t v;
 974 
 975                 v.a_type = MDB_TYPE_STRING;
 976                 v.a_un.a_str = "-e";
 977 
 978                 mdb_printf("\n");
 979                 mdb_inc_indent(4);
 980                 if (mdb_call_dcmd("spa_vdevs", addr, flags, errors ? 1 : 0,
 981                     &v) != DCMD_OK)
 982                         return (DCMD_ERR);
 983                 mdb_dec_indent(4);
 984         }
 985 
 986         return (DCMD_OK);
 987 }
 988 
 989 /*
 990  * ::spa_config
 991  *
 992  * Given a spa_t, print the configuration information stored in spa_config.
 993  * Since it's just an nvlist, format it as an indented list of name=value pairs.
 994  * We simply read the value of spa_config and pass off to ::nvlist.
 995  */
 996 /* ARGSUSED */
 997 static int
 998 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 999 {
1000         spa_t spa;
1001 
1002         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1003                 return (DCMD_USAGE);
1004 
1005         if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1006                 mdb_warn("failed to read spa_t at %p", addr);
1007                 return (DCMD_ERR);
1008         }
1009 
1010         if (spa.spa_config == NULL) {
1011                 mdb_printf("(none)\n");
1012                 return (DCMD_OK);
1013         }
1014 
1015         return (mdb_call_dcmd("nvlist", (uintptr_t)spa.spa_config, flags,
1016             0, NULL));
1017 }
1018 
1019 /*
1020  * ::vdev
1021  *
1022  * Print out a summarized vdev_t, in the following form:
1023  *
1024  * ADDR             STATE       AUX            DESC
1025  * fffffffbcde23df0 HEALTHY     -              /dev/dsk/c0t0d0
1026  *
1027  * If '-r' is specified, recursively visit all children.
1028  *
1029  * With '-e', the statistics associated with the vdev are printed as well.
1030  */
1031 static int
1032 do_print_vdev(uintptr_t addr, int flags, int depth, int stats,
1033     int recursive)
1034 {
1035         vdev_t vdev;
1036         char desc[MAXNAMELEN];
1037         int c, children;
1038         uintptr_t *child;
1039         const char *state, *aux;
1040 
1041         if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
1042                 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
1043                 return (DCMD_ERR);
1044         }
1045 
1046         if (flags & DCMD_PIPE_OUT) {
1047                 mdb_printf("%#lr", addr);
1048         } else {
1049                 if (vdev.vdev_path != NULL) {
1050                         if (mdb_readstr(desc, sizeof (desc),
1051                             (uintptr_t)vdev.vdev_path) == -1) {
1052                                 mdb_warn("failed to read vdev_path at %p\n",
1053                                     vdev.vdev_path);
1054                                 return (DCMD_ERR);
1055                         }
1056                 } else if (vdev.vdev_ops != NULL) {
1057                         vdev_ops_t ops;
1058                         if (mdb_vread(&ops, sizeof (ops),
1059                             (uintptr_t)vdev.vdev_ops) == -1) {
1060                                 mdb_warn("failed to read vdev_ops at %p\n",
1061                                     vdev.vdev_ops);
1062                                 return (DCMD_ERR);
1063                         }
1064                         (void) strcpy(desc, ops.vdev_op_type);
1065                 } else {
1066                         (void) strcpy(desc, "<unknown>");
1067                 }
1068 
1069                 if (depth == 0 && DCMD_HDRSPEC(flags))
1070                         mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1071                             "ADDR", "STATE", "AUX",
1072                             sizeof (uintptr_t) == 4 ? 43 : 35,
1073                             "DESCRIPTION");
1074 
1075                 mdb_printf("%0?p ", addr);
1076 
1077                 switch (vdev.vdev_state) {
1078                 case VDEV_STATE_CLOSED:
1079                         state = "CLOSED";
1080                         break;
1081                 case VDEV_STATE_OFFLINE:
1082                         state = "OFFLINE";
1083                         break;
1084                 case VDEV_STATE_CANT_OPEN:
1085                         state = "CANT_OPEN";
1086                         break;
1087                 case VDEV_STATE_DEGRADED:
1088                         state = "DEGRADED";
1089                         break;
1090                 case VDEV_STATE_HEALTHY:
1091                         state = "HEALTHY";
1092                         break;
1093                 case VDEV_STATE_REMOVED:
1094                         state = "REMOVED";
1095                         break;
1096                 case VDEV_STATE_FAULTED:
1097                         state = "FAULTED";
1098                         break;
1099                 default:
1100                         state = "UNKNOWN";
1101                         break;
1102                 }
1103 
1104                 switch (vdev.vdev_stat.vs_aux) {
1105                 case VDEV_AUX_NONE:
1106                         aux = "-";
1107                         break;
1108                 case VDEV_AUX_OPEN_FAILED:
1109                         aux = "OPEN_FAILED";
1110                         break;
1111                 case VDEV_AUX_CORRUPT_DATA:
1112                         aux = "CORRUPT_DATA";
1113                         break;
1114                 case VDEV_AUX_NO_REPLICAS:
1115                         aux = "NO_REPLICAS";
1116                         break;
1117                 case VDEV_AUX_BAD_GUID_SUM:
1118                         aux = "BAD_GUID_SUM";
1119                         break;
1120                 case VDEV_AUX_TOO_SMALL:
1121                         aux = "TOO_SMALL";
1122                         break;
1123                 case VDEV_AUX_BAD_LABEL:
1124                         aux = "BAD_LABEL";
1125                         break;
1126                 case VDEV_AUX_VERSION_NEWER:
1127                         aux = "VERS_NEWER";
1128                         break;
1129                 case VDEV_AUX_VERSION_OLDER:
1130                         aux = "VERS_OLDER";
1131                         break;
1132                 case VDEV_AUX_SPARED:
1133                         aux = "SPARED";
1134                         break;
1135                 case VDEV_AUX_ERR_EXCEEDED:
1136                         aux = "ERR_EXCEEDED";
1137                         break;
1138                 case VDEV_AUX_IO_FAILURE:
1139                         aux = "IO_FAILURE";
1140                         break;
1141                 case VDEV_AUX_BAD_LOG:
1142                         aux = "BAD_LOG";
1143                         break;
1144                 default:
1145                         aux = "UNKNOWN";
1146                         break;
1147                 }
1148 
1149                 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1150 
1151                 if (stats) {
1152                         vdev_stat_t *vs = &vdev.vdev_stat;
1153                         int i;
1154 
1155                         mdb_inc_indent(4);
1156                         mdb_printf("\n");
1157                         mdb_printf("%<u>       %12s %12s %12s %12s "
1158                             "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1159                             "IOCTL");
1160                         mdb_printf("OPS     ");
1161                         for (i = 1; i < ZIO_TYPES; i++)
1162                                 mdb_printf("%11#llx%s", vs->vs_ops[i],
1163                                     i == ZIO_TYPES - 1 ? "" : "  ");
1164                         mdb_printf("\n");
1165                         mdb_printf("BYTES   ");
1166                         for (i = 1; i < ZIO_TYPES; i++)
1167                                 mdb_printf("%11#llx%s", vs->vs_bytes[i],
1168                                     i == ZIO_TYPES - 1 ? "" : "  ");
1169 
1170 
1171                         mdb_printf("\n");
1172                         mdb_printf("EREAD    %10#llx\n", vs->vs_read_errors);
1173                         mdb_printf("EWRITE   %10#llx\n", vs->vs_write_errors);
1174                         mdb_printf("ECKSUM   %10#llx\n",
1175                             vs->vs_checksum_errors);
1176                         mdb_dec_indent(4);
1177                 }
1178 
1179                 if (stats)
1180                         mdb_printf("\n");
1181         }
1182 
1183         children = vdev.vdev_children;
1184 
1185         if (children == 0 || !recursive)
1186                 return (DCMD_OK);
1187 
1188         child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1189         if (mdb_vread(child, children * sizeof (void *),
1190             (uintptr_t)vdev.vdev_child) == -1) {
1191                 mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1192                 return (DCMD_ERR);
1193         }
1194 
1195         for (c = 0; c < children; c++) {
1196                 if (do_print_vdev(child[c], flags, depth + 2, stats,
1197                     recursive))
1198                         return (DCMD_ERR);
1199         }
1200 
1201         return (DCMD_OK);
1202 }
1203 
1204 static int
1205 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1206 {
1207         int recursive = FALSE;
1208         int stats = FALSE;
1209         uint64_t depth = 0;
1210 
1211         if (mdb_getopts(argc, argv,
1212             'r', MDB_OPT_SETBITS, TRUE, &recursive,
1213             'e', MDB_OPT_SETBITS, TRUE, &stats,
1214             'd', MDB_OPT_UINT64, &depth,
1215             NULL) != argc)
1216                 return (DCMD_USAGE);
1217 
1218         if (!(flags & DCMD_ADDRSPEC)) {
1219                 mdb_warn("no vdev_t address given\n");
1220                 return (DCMD_ERR);
1221         }
1222 
1223         return (do_print_vdev(addr, flags, (int)depth, stats, recursive));
1224 }
1225 
1226 typedef struct metaslab_walk_data {
1227         uint64_t mw_numvdevs;
1228         uintptr_t *mw_vdevs;
1229         int mw_curvdev;
1230         uint64_t mw_nummss;
1231         uintptr_t *mw_mss;
1232         int mw_curms;
1233 } metaslab_walk_data_t;
1234 
1235 static int
1236 metaslab_walk_step(mdb_walk_state_t *wsp)
1237 {
1238         metaslab_walk_data_t *mw = wsp->walk_data;
1239         metaslab_t ms;
1240         uintptr_t msp;
1241 
1242         if (mw->mw_curvdev >= mw->mw_numvdevs)
1243                 return (WALK_DONE);
1244 
1245         if (mw->mw_mss == NULL) {
1246                 uintptr_t mssp;
1247                 uintptr_t vdevp;
1248 
1249                 ASSERT(mw->mw_curms == 0);
1250                 ASSERT(mw->mw_nummss == 0);
1251 
1252                 vdevp = mw->mw_vdevs[mw->mw_curvdev];
1253                 if (GETMEMB(vdevp, struct vdev, vdev_ms, mssp) ||
1254                     GETMEMB(vdevp, struct vdev, vdev_ms_count, mw->mw_nummss)) {
1255                         return (WALK_ERR);
1256                 }
1257 
1258                 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
1259                     UM_SLEEP | UM_GC);
1260                 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
1261                     mssp) == -1) {
1262                         mdb_warn("failed to read vdev_ms at %p", mssp);
1263                         return (WALK_ERR);
1264                 }
1265         }
1266 
1267         if (mw->mw_curms >= mw->mw_nummss) {
1268                 mw->mw_mss = NULL;
1269                 mw->mw_curms = 0;
1270                 mw->mw_nummss = 0;
1271                 mw->mw_curvdev++;
1272                 return (WALK_NEXT);
1273         }
1274 
1275         msp = mw->mw_mss[mw->mw_curms];
1276         if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
1277                 mdb_warn("failed to read metaslab_t at %p", msp);
1278                 return (WALK_ERR);
1279         }
1280 
1281         mw->mw_curms++;
1282 
1283         return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
1284 }
1285 
1286 /* ARGSUSED */
1287 static int
1288 metaslab_walk_init(mdb_walk_state_t *wsp)
1289 {
1290         metaslab_walk_data_t *mw;
1291         uintptr_t root_vdevp;
1292         uintptr_t childp;
1293 
1294         if (wsp->walk_addr == NULL) {
1295                 mdb_warn("must supply address of spa_t\n");
1296                 return (WALK_ERR);
1297         }
1298 
1299         mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
1300 
1301         if (GETMEMB(wsp->walk_addr, struct spa, spa_root_vdev, root_vdevp) ||
1302             GETMEMB(root_vdevp, struct vdev, vdev_children, mw->mw_numvdevs) ||
1303             GETMEMB(root_vdevp, struct vdev, vdev_child, childp)) {
1304                 return (DCMD_ERR);
1305         }
1306 
1307         mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
1308             UM_SLEEP | UM_GC);
1309         if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
1310             childp) == -1) {
1311                 mdb_warn("failed to read root vdev children at %p", childp);
1312                 return (DCMD_ERR);
1313         }
1314 
1315         wsp->walk_data = mw;
1316 
1317         return (WALK_NEXT);
1318 }
1319 
1320 typedef struct mdb_spa {
1321         uintptr_t spa_dsl_pool;
1322         uintptr_t spa_root_vdev;
1323 } mdb_spa_t;
1324 
1325 typedef struct mdb_dsl_dir {
1326         uintptr_t dd_phys;
1327         int64_t dd_space_towrite[TXG_SIZE];
1328 } mdb_dsl_dir_t;
1329 
1330 typedef struct mdb_dsl_dir_phys {
1331         uint64_t dd_used_bytes;
1332         uint64_t dd_compressed_bytes;
1333         uint64_t dd_uncompressed_bytes;
1334 } mdb_dsl_dir_phys_t;
1335 
1336 typedef struct mdb_vdev {
1337         uintptr_t vdev_parent;
1338         uintptr_t vdev_ms;
1339         uint64_t vdev_ms_count;
1340         vdev_stat_t vdev_stat;
1341 } mdb_vdev_t;
1342 
1343 typedef struct mdb_metaslab {
1344         space_map_t ms_allocmap[TXG_SIZE];
1345         space_map_t ms_freemap[TXG_SIZE];
1346         space_map_t ms_map;
1347         space_map_obj_t ms_smo;
1348         space_map_obj_t ms_smo_syncing;
1349 } mdb_metaslab_t;
1350 
1351 typedef struct space_data {
1352         uint64_t ms_allocmap[TXG_SIZE];
1353         uint64_t ms_freemap[TXG_SIZE];
1354         uint64_t ms_map;
1355         uint64_t avail;
1356         uint64_t nowavail;
1357 } space_data_t;
1358 
1359 /* ARGSUSED */
1360 static int
1361 space_cb(uintptr_t addr, const void *unknown, void *arg)
1362 {
1363         space_data_t *sd = arg;
1364         mdb_metaslab_t ms;
1365 
1366         if (GETMEMB(addr, struct metaslab, ms_allocmap, ms.ms_allocmap) ||
1367             GETMEMB(addr, struct metaslab, ms_freemap, ms.ms_freemap) ||
1368             GETMEMB(addr, struct metaslab, ms_map, ms.ms_map) ||
1369             GETMEMB(addr, struct metaslab, ms_smo, ms.ms_smo) ||
1370             GETMEMB(addr, struct metaslab, ms_smo_syncing, ms.ms_smo_syncing)) {
1371                 return (WALK_ERR);
1372         }
1373 
1374         sd->ms_allocmap[0] += ms.ms_allocmap[0].sm_space;
1375         sd->ms_allocmap[1] += ms.ms_allocmap[1].sm_space;
1376         sd->ms_allocmap[2] += ms.ms_allocmap[2].sm_space;
1377         sd->ms_allocmap[3] += ms.ms_allocmap[3].sm_space;
1378         sd->ms_freemap[0] += ms.ms_freemap[0].sm_space;
1379         sd->ms_freemap[1] += ms.ms_freemap[1].sm_space;
1380         sd->ms_freemap[2] += ms.ms_freemap[2].sm_space;
1381         sd->ms_freemap[3] += ms.ms_freemap[3].sm_space;
1382         sd->ms_map += ms.ms_map.sm_space;
1383         sd->avail += ms.ms_map.sm_size - ms.ms_smo.smo_alloc;
1384         sd->nowavail += ms.ms_map.sm_size - ms.ms_smo_syncing.smo_alloc;
1385 
1386         return (WALK_NEXT);
1387 }
1388 
1389 /*
1390  * ::spa_space [-b]
1391  *
1392  * Given a spa_t, print out it's on-disk space usage and in-core
1393  * estimates of future usage.  If -b is given, print space in bytes.
1394  * Otherwise print in megabytes.
1395  */
1396 /* ARGSUSED */
1397 static int
1398 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1399 {
1400         mdb_spa_t spa;
1401         uintptr_t dp_root_dir;
1402         mdb_dsl_dir_t dd;
1403         mdb_dsl_dir_phys_t dsp;
1404         uint64_t children;
1405         uintptr_t childaddr;
1406         space_data_t sd;
1407         int shift = 20;
1408         char *suffix = "M";
1409         int bits = FALSE;
1410 
1411         if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bits, NULL) !=
1412             argc)
1413                 return (DCMD_USAGE);
1414         if (!(flags & DCMD_ADDRSPEC))
1415                 return (DCMD_USAGE);
1416 
1417         if (bits) {
1418                 shift = 0;
1419                 suffix = "";
1420         }
1421 
1422         if (GETMEMB(addr, struct spa, spa_dsl_pool, spa.spa_dsl_pool) ||
1423             GETMEMB(addr, struct spa, spa_root_vdev, spa.spa_root_vdev) ||
1424             GETMEMB(spa.spa_root_vdev, struct vdev, vdev_children, children) ||
1425             GETMEMB(spa.spa_root_vdev, struct vdev, vdev_child, childaddr) ||
1426             GETMEMB(spa.spa_dsl_pool, struct dsl_pool,
1427             dp_root_dir, dp_root_dir) ||
1428             GETMEMB(dp_root_dir, struct dsl_dir, dd_phys, dd.dd_phys) ||
1429             GETMEMB(dp_root_dir, struct dsl_dir,
1430             dd_space_towrite, dd.dd_space_towrite) ||
1431             GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1432             dd_used_bytes, dsp.dd_used_bytes) ||
1433             GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1434             dd_compressed_bytes, dsp.dd_compressed_bytes) ||
1435             GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1436             dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) {
1437                 return (DCMD_ERR);
1438         }
1439 
1440         mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
1441             dd.dd_space_towrite[0] >> shift, suffix,
1442             dd.dd_space_towrite[1] >> shift, suffix,
1443             dd.dd_space_towrite[2] >> shift, suffix,
1444             dd.dd_space_towrite[3] >> shift, suffix);
1445 
1446         mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
1447             dsp.dd_used_bytes >> shift, suffix);
1448         mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
1449             dsp.dd_compressed_bytes >> shift, suffix);
1450         mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
1451             dsp.dd_uncompressed_bytes >> shift, suffix);
1452 
1453         bzero(&sd, sizeof (sd));
1454         if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
1455                 mdb_warn("can't walk metaslabs");
1456                 return (DCMD_ERR);
1457         }
1458 
1459         mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
1460             sd.ms_allocmap[0] >> shift, suffix,
1461             sd.ms_allocmap[1] >> shift, suffix,
1462             sd.ms_allocmap[2] >> shift, suffix,
1463             sd.ms_allocmap[3] >> shift, suffix);
1464         mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n",
1465             sd.ms_freemap[0] >> shift, suffix,
1466             sd.ms_freemap[1] >> shift, suffix,
1467             sd.ms_freemap[2] >> shift, suffix,
1468             sd.ms_freemap[3] >> shift, suffix);
1469         mdb_printf("ms_map = %llu%s\n", sd.ms_map >> shift, suffix);
1470         mdb_printf("last synced avail = %llu%s\n", sd.avail >> shift, suffix);
1471         mdb_printf("current syncing avail = %llu%s\n",
1472             sd.nowavail >> shift, suffix);
1473 
1474         return (DCMD_OK);
1475 }
1476 
1477 /*
1478  * ::spa_verify
1479  *
1480  * Given a spa_t, verify that that the pool is self-consistent.
1481  * Currently, it only checks to make sure that the vdev tree exists.
1482  */
1483 /* ARGSUSED */
1484 static int
1485 spa_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1486 {
1487         spa_t spa;
1488 
1489         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1490                 return (DCMD_USAGE);
1491 
1492         if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1493                 mdb_warn("failed to read spa_t at %p", addr);
1494                 return (DCMD_ERR);
1495         }
1496 
1497         if (spa.spa_root_vdev == NULL) {
1498                 mdb_printf("no vdev tree present\n");
1499                 return (DCMD_OK);
1500         }
1501 
1502         return (DCMD_OK);
1503 }
1504 
1505 static int
1506 spa_print_aux(spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
1507     const char *name)
1508 {
1509         uintptr_t *aux;
1510         size_t len;
1511         int ret, i;
1512 
1513         /*
1514          * Iterate over aux vdevs and print those out as well.  This is a
1515          * little annoying because we don't have a root vdev to pass to ::vdev.
1516          * Instead, we print a single line and then call it for each child
1517          * vdev.
1518          */
1519         if (sav->sav_count != 0) {
1520                 v[1].a_type = MDB_TYPE_STRING;
1521                 v[1].a_un.a_str = "-d";
1522                 v[2].a_type = MDB_TYPE_IMMEDIATE;
1523                 v[2].a_un.a_val = 2;
1524 
1525                 len = sav->sav_count * sizeof (uintptr_t);
1526                 aux = mdb_alloc(len, UM_SLEEP);
1527                 if (mdb_vread(aux, len,
1528                     (uintptr_t)sav->sav_vdevs) == -1) {
1529                         mdb_free(aux, len);
1530                         mdb_warn("failed to read l2cache vdevs at %p",
1531                             sav->sav_vdevs);
1532                         return (DCMD_ERR);
1533                 }
1534 
1535                 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
1536 
1537                 for (i = 0; i < sav->sav_count; i++) {
1538                         ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
1539                         if (ret != DCMD_OK) {
1540                                 mdb_free(aux, len);
1541                                 return (ret);
1542                         }
1543                 }
1544 
1545                 mdb_free(aux, len);
1546         }
1547 
1548         return (0);
1549 }
1550 
1551 /*
1552  * ::spa_vdevs
1553  *
1554  *      -e      Include error stats
1555  *
1556  * Print out a summarized list of vdevs for the given spa_t.
1557  * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
1558  * iterating over the cache devices.
1559  */
1560 /* ARGSUSED */
1561 static int
1562 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1563 {
1564         spa_t spa;
1565         mdb_arg_t v[3];
1566         int errors = FALSE;
1567         int ret;
1568 
1569         if (mdb_getopts(argc, argv,
1570             'e', MDB_OPT_SETBITS, TRUE, &errors,
1571             NULL) != argc)
1572                 return (DCMD_USAGE);
1573 
1574         if (!(flags & DCMD_ADDRSPEC))
1575                 return (DCMD_USAGE);
1576 
1577         if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1578                 mdb_warn("failed to read spa_t at %p", addr);
1579                 return (DCMD_ERR);
1580         }
1581 
1582         /*
1583          * Unitialized spa_t structures can have a NULL root vdev.
1584          */
1585         if (spa.spa_root_vdev == NULL) {
1586                 mdb_printf("no associated vdevs\n");
1587                 return (DCMD_OK);
1588         }
1589 
1590         v[0].a_type = MDB_TYPE_STRING;
1591         v[0].a_un.a_str = errors ? "-re" : "-r";
1592 
1593         ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
1594             flags, 1, v);
1595         if (ret != DCMD_OK)
1596                 return (ret);
1597 
1598         if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
1599             spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
1600                 return (DCMD_ERR);
1601 
1602         return (DCMD_OK);
1603 }
1604 
1605 /*
1606  * ::zio
1607  *
1608  * Print a summary of zio_t and all its children.  This is intended to display a
1609  * zio tree, and hence we only pick the most important pieces of information for
1610  * the main summary.  More detailed information can always be found by doing a
1611  * '::print zio' on the underlying zio_t.  The columns we display are:
1612  *
1613  *      ADDRESS         TYPE    STAGE           WAITER
1614  *
1615  * The 'address' column is indented by one space for each depth level as we
1616  * descend down the tree.
1617  */
1618 
1619 #define ZIO_MAXINDENT   24
1620 #define ZIO_MAXWIDTH    (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
1621 #define ZIO_WALK_SELF   0
1622 #define ZIO_WALK_CHILD  1
1623 #define ZIO_WALK_PARENT 2
1624 
1625 typedef struct zio_print_args {
1626         int     zpa_current_depth;
1627         int     zpa_min_depth;
1628         int     zpa_max_depth;
1629         int     zpa_type;
1630         uint_t  zpa_flags;
1631 } zio_print_args_t;
1632 
1633 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
1634 
1635 static int
1636 zio_print_cb(uintptr_t addr, const void *data, void *priv)
1637 {
1638         const zio_t *zio = data;
1639         zio_print_args_t *zpa = priv;
1640         mdb_ctf_id_t type_enum, stage_enum;
1641         int indent = zpa->zpa_current_depth;
1642         const char *type, *stage;
1643         uintptr_t laddr;
1644 
1645         if (indent > ZIO_MAXINDENT)
1646                 indent = ZIO_MAXINDENT;
1647 
1648         if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
1649             mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
1650                 mdb_warn("failed to lookup zio enums");
1651                 return (WALK_ERR);
1652         }
1653 
1654         if ((type = mdb_ctf_enum_name(type_enum, zio->io_type)) != NULL)
1655                 type += sizeof ("ZIO_TYPE_") - 1;
1656         else
1657                 type = "?";
1658 
1659         if ((stage = mdb_ctf_enum_name(stage_enum, zio->io_stage)) != NULL)
1660                 stage += sizeof ("ZIO_STAGE_") - 1;
1661         else
1662                 stage = "?";
1663 
1664         if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
1665                 if (zpa->zpa_flags & DCMD_PIPE_OUT) {
1666                         mdb_printf("%?p\n", addr);
1667                 } else {
1668                         mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
1669                             ZIO_MAXWIDTH - indent, addr, type, stage);
1670                         if (zio->io_waiter)
1671                                 mdb_printf("%?p\n", zio->io_waiter);
1672                         else
1673                                 mdb_printf("-\n");
1674                 }
1675         }
1676 
1677         if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
1678                 return (WALK_NEXT);
1679 
1680         if (zpa->zpa_type == ZIO_WALK_PARENT)
1681                 laddr = addr + OFFSETOF(zio_t, io_parent_list);
1682         else
1683                 laddr = addr + OFFSETOF(zio_t, io_child_list);
1684 
1685         zpa->zpa_current_depth++;
1686         if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
1687                 mdb_warn("failed to walk zio_t children at %p\n", laddr);
1688                 return (WALK_ERR);
1689         }
1690         zpa->zpa_current_depth--;
1691 
1692         return (WALK_NEXT);
1693 }
1694 
1695 /* ARGSUSED */
1696 static int
1697 zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
1698 {
1699         zio_link_t zl;
1700         zio_t zio;
1701         uintptr_t ziop;
1702         zio_print_args_t *zpa = arg;
1703 
1704         if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
1705                 mdb_warn("failed to read zio_link_t at %p", addr);
1706                 return (WALK_ERR);
1707         }
1708 
1709         if (zpa->zpa_type == ZIO_WALK_PARENT)
1710                 ziop = (uintptr_t)zl.zl_parent;
1711         else
1712                 ziop = (uintptr_t)zl.zl_child;
1713 
1714         if (mdb_vread(&zio, sizeof (zio_t), ziop) == -1) {
1715                 mdb_warn("failed to read zio_t at %p", ziop);
1716                 return (WALK_ERR);
1717         }
1718 
1719         return (zio_print_cb(ziop, &zio, arg));
1720 }
1721 
1722 /* ARGSUSED */
1723 static int
1724 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1725 {
1726         zio_t zio;
1727         zio_print_args_t zpa = { 0 };
1728 
1729         if (!(flags & DCMD_ADDRSPEC))
1730                 return (DCMD_USAGE);
1731 
1732         if (mdb_getopts(argc, argv,
1733             'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
1734             'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
1735             'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
1736             NULL) != argc)
1737                 return (DCMD_USAGE);
1738 
1739         zpa.zpa_flags = flags;
1740         if (zpa.zpa_max_depth != 0) {
1741                 if (zpa.zpa_type == ZIO_WALK_SELF)
1742                         zpa.zpa_type = ZIO_WALK_CHILD;
1743         } else if (zpa.zpa_type != ZIO_WALK_SELF) {
1744                 zpa.zpa_min_depth = 1;
1745                 zpa.zpa_max_depth = 1;
1746         }
1747 
1748         if (mdb_vread(&zio, sizeof (zio_t), addr) == -1) {
1749                 mdb_warn("failed to read zio_t at %p", addr);
1750                 return (DCMD_ERR);
1751         }
1752 
1753         if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags))
1754                 mdb_printf("%<u>%-*s %-5s %-16s %-?s%</u>\n", ZIO_MAXWIDTH,
1755                     "ADDRESS", "TYPE", "STAGE", "WAITER");
1756 
1757         if (zio_print_cb(addr, &zio, &zpa) != WALK_NEXT)
1758                 return (DCMD_ERR);
1759 
1760         return (DCMD_OK);
1761 }
1762 
1763 /*
1764  * [addr]::zio_state
1765  *
1766  * Print a summary of all zio_t structures on the system, or for a particular
1767  * pool.  This is equivalent to '::walk zio_root | ::zio'.
1768  */
1769 /*ARGSUSED*/
1770 static int
1771 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1772 {
1773         /*
1774          * MDB will remember the last address of the pipeline, so if we don't
1775          * zero this we'll end up trying to walk zio structures for a
1776          * non-existent spa_t.
1777          */
1778         if (!(flags & DCMD_ADDRSPEC))
1779                 addr = 0;
1780 
1781         return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
1782 }
1783 
1784 typedef struct txg_list_walk_data {
1785         uintptr_t lw_head[TXG_SIZE];
1786         int     lw_txgoff;
1787         int     lw_maxoff;
1788         size_t  lw_offset;
1789         void    *lw_obj;
1790 } txg_list_walk_data_t;
1791 
1792 static int
1793 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
1794 {
1795         txg_list_walk_data_t *lwd;
1796         txg_list_t list;
1797         int i;
1798 
1799         lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
1800         if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) {
1801                 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
1802                 return (WALK_ERR);
1803         }
1804 
1805         for (i = 0; i < TXG_SIZE; i++)
1806                 lwd->lw_head[i] = (uintptr_t)list.tl_head[i];
1807         lwd->lw_offset = list.tl_offset;
1808         lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
1809             UM_SLEEP | UM_GC);
1810         lwd->lw_txgoff = txg;
1811         lwd->lw_maxoff = maxoff;
1812 
1813         wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1814         wsp->walk_data = lwd;
1815 
1816         return (WALK_NEXT);
1817 }
1818 
1819 static int
1820 txg_list_walk_init(mdb_walk_state_t *wsp)
1821 {
1822         return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
1823 }
1824 
1825 static int
1826 txg_list0_walk_init(mdb_walk_state_t *wsp)
1827 {
1828         return (txg_list_walk_init_common(wsp, 0, 0));
1829 }
1830 
1831 static int
1832 txg_list1_walk_init(mdb_walk_state_t *wsp)
1833 {
1834         return (txg_list_walk_init_common(wsp, 1, 1));
1835 }
1836 
1837 static int
1838 txg_list2_walk_init(mdb_walk_state_t *wsp)
1839 {
1840         return (txg_list_walk_init_common(wsp, 2, 2));
1841 }
1842 
1843 static int
1844 txg_list3_walk_init(mdb_walk_state_t *wsp)
1845 {
1846         return (txg_list_walk_init_common(wsp, 3, 3));
1847 }
1848 
1849 static int
1850 txg_list_walk_step(mdb_walk_state_t *wsp)
1851 {
1852         txg_list_walk_data_t *lwd = wsp->walk_data;
1853         uintptr_t addr;
1854         txg_node_t *node;
1855         int status;
1856 
1857         while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
1858                 lwd->lw_txgoff++;
1859                 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1860         }
1861 
1862         if (wsp->walk_addr == NULL)
1863                 return (WALK_DONE);
1864 
1865         addr = wsp->walk_addr - lwd->lw_offset;
1866 
1867         if (mdb_vread(lwd->lw_obj,
1868             lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
1869                 mdb_warn("failed to read list element at %#lx", addr);
1870                 return (WALK_ERR);
1871         }
1872 
1873         status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
1874         node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
1875         wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
1876 
1877         return (status);
1878 }
1879 
1880 /*
1881  * ::walk spa
1882  *
1883  * Walk all named spa_t structures in the namespace.  This is nothing more than
1884  * a layered avl walk.
1885  */
1886 static int
1887 spa_walk_init(mdb_walk_state_t *wsp)
1888 {
1889         GElf_Sym sym;
1890 
1891         if (wsp->walk_addr != NULL) {
1892                 mdb_warn("spa walk only supports global walks\n");
1893                 return (WALK_ERR);
1894         }
1895 
1896         if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
1897                 mdb_warn("failed to find symbol 'spa_namespace_avl'");
1898                 return (WALK_ERR);
1899         }
1900 
1901         wsp->walk_addr = (uintptr_t)sym.st_value;
1902 
1903         if (mdb_layered_walk("avl", wsp) == -1) {
1904                 mdb_warn("failed to walk 'avl'\n");
1905                 return (WALK_ERR);
1906         }
1907 
1908         return (WALK_NEXT);
1909 }
1910 
1911 static int
1912 spa_walk_step(mdb_walk_state_t *wsp)
1913 {
1914         spa_t   spa;
1915 
1916         if (mdb_vread(&spa, sizeof (spa), wsp->walk_addr) == -1) {
1917                 mdb_warn("failed to read spa_t at %p", wsp->walk_addr);
1918                 return (WALK_ERR);
1919         }
1920 
1921         return (wsp->walk_callback(wsp->walk_addr, &spa, wsp->walk_cbdata));
1922 }
1923 
1924 /*
1925  * [addr]::walk zio
1926  *
1927  * Walk all active zio_t structures on the system.  This is simply a layered
1928  * walk on top of ::walk zio_cache, with the optional ability to limit the
1929  * structures to a particular pool.
1930  */
1931 static int
1932 zio_walk_init(mdb_walk_state_t *wsp)
1933 {
1934         wsp->walk_data = (void *)wsp->walk_addr;
1935 
1936         if (mdb_layered_walk("zio_cache", wsp) == -1) {
1937                 mdb_warn("failed to walk 'zio_cache'\n");
1938                 return (WALK_ERR);
1939         }
1940 
1941         return (WALK_NEXT);
1942 }
1943 
1944 static int
1945 zio_walk_step(mdb_walk_state_t *wsp)
1946 {
1947         zio_t zio;
1948 
1949         if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
1950                 mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
1951                 return (WALK_ERR);
1952         }
1953 
1954         if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
1955                 return (WALK_NEXT);
1956 
1957         return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
1958 }
1959 
1960 /*
1961  * [addr]::walk zio_root
1962  *
1963  * Walk only root zio_t structures, optionally for a particular spa_t.
1964  */
1965 static int
1966 zio_walk_root_step(mdb_walk_state_t *wsp)
1967 {
1968         zio_t zio;
1969 
1970         if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
1971                 mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
1972                 return (WALK_ERR);
1973         }
1974 
1975         if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
1976                 return (WALK_NEXT);
1977 
1978         /* If the parent list is not empty, ignore */
1979         if (zio.io_parent_list.list_head.list_next !=
1980             &((zio_t *)wsp->walk_addr)->io_parent_list.list_head)
1981                 return (WALK_NEXT);
1982 
1983         return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
1984 }
1985 
1986 #define NICENUM_BUFLEN 6
1987 
1988 static int
1989 snprintfrac(char *buf, int len,
1990     uint64_t numerator, uint64_t denom, int frac_digits)
1991 {
1992         int mul = 1;
1993         int whole, frac, i;
1994 
1995         for (i = frac_digits; i; i--)
1996                 mul *= 10;
1997         whole = numerator / denom;
1998         frac = mul * numerator / denom - mul * whole;
1999         return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac));
2000 }
2001 
2002 static void
2003 mdb_nicenum(uint64_t num, char *buf)
2004 {
2005         uint64_t n = num;
2006         int index = 0;
2007         char *u;
2008 
2009         while (n >= 1024) {
2010                 n = (n + (1024 / 2)) / 1024; /* Round up or down */
2011                 index++;
2012         }
2013 
2014         u = &" \0K\0M\0G\0T\0P\0E\0"[index*2];
2015 
2016         if (index == 0) {
2017                 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu",
2018                     (u_longlong_t)n);
2019         } else if (n < 10 && (num & (num - 1)) != 0) {
2020                 (void) snprintfrac(buf, NICENUM_BUFLEN,
2021                     num, 1ULL << 10 * index, 2);
2022                 strcat(buf, u);
2023         } else if (n < 100 && (num & (num - 1)) != 0) {
2024                 (void) snprintfrac(buf, NICENUM_BUFLEN,
2025                     num, 1ULL << 10 * index, 1);
2026                 strcat(buf, u);
2027         } else {
2028                 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s",
2029                     (u_longlong_t)n, u);
2030         }
2031 }
2032 
2033 /*
2034  * ::zfs_blkstats
2035  *
2036  *      -v      print verbose per-level information
2037  *
2038  */
2039 static int
2040 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2041 {
2042         boolean_t verbose = B_FALSE;
2043         zfs_all_blkstats_t stats;
2044         dmu_object_type_t t;
2045         zfs_blkstat_t *tzb;
2046         uint64_t ditto;
2047         dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10];
2048         /* +10 in case it grew */
2049 
2050         if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) {
2051                 mdb_warn("failed to read 'dmu_ot'");
2052                 return (DCMD_ERR);
2053         }
2054 
2055         if (mdb_getopts(argc, argv,
2056             'v', MDB_OPT_SETBITS, TRUE, &verbose,
2057             NULL) != argc)
2058                 return (DCMD_USAGE);
2059 
2060         if (!(flags & DCMD_ADDRSPEC))
2061                 return (DCMD_USAGE);
2062 
2063         if (GETMEMB(addr, struct spa, spa_dsl_pool, addr) ||
2064             GETMEMB(addr, struct dsl_pool, dp_blkstats, addr) ||
2065             mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
2066                 mdb_warn("failed to read data at %p;", addr);
2067                 mdb_printf("maybe no stats? run \"zpool scrub\" first.");
2068                 return (DCMD_ERR);
2069         }
2070 
2071         tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_NUMTYPES];
2072         if (tzb->zb_gangs != 0) {
2073                 mdb_printf("Ganged blocks: %llu\n",
2074                     (longlong_t)tzb->zb_gangs);
2075         }
2076 
2077         ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
2078             tzb->zb_ditto_3_of_3_samevdev;
2079         if (ditto != 0) {
2080                 mdb_printf("Dittoed blocks on same vdev: %llu\n",
2081                     (longlong_t)ditto);
2082         }
2083 
2084         mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2085             "\t  avg\t comp\t%%Total\tType\n");
2086 
2087         for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
2088                 char csize[NICENUM_BUFLEN], lsize[NICENUM_BUFLEN];
2089                 char psize[NICENUM_BUFLEN], asize[NICENUM_BUFLEN];
2090                 char avg[NICENUM_BUFLEN];
2091                 char comp[NICENUM_BUFLEN], pct[NICENUM_BUFLEN];
2092                 char typename[64];
2093                 int l;
2094 
2095 
2096                 if (t == DMU_OT_DEFERRED)
2097                         strcpy(typename, "deferred free");
2098                 else if (t == DMU_OT_TOTAL)
2099                         strcpy(typename, "Total");
2100                 else if (mdb_readstr(typename, sizeof (typename),
2101                     (uintptr_t)dmu_ot[t].ot_name) == -1) {
2102                         mdb_warn("failed to read type name");
2103                         return (DCMD_ERR);
2104                 }
2105 
2106                 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
2107                         continue;
2108 
2109                 for (l = -1; l < DN_MAX_LEVELS; l++) {
2110                         int level = (l == -1 ? DN_MAX_LEVELS : l);
2111                         zfs_blkstat_t *zb = &stats.zab_type[level][t];
2112 
2113                         if (zb->zb_asize == 0)
2114                                 continue;
2115 
2116                         /*
2117                          * Don't print each level unless requested.
2118                          */
2119                         if (!verbose && level != DN_MAX_LEVELS)
2120                                 continue;
2121 
2122                         /*
2123                          * If all the space is level 0, don't print the
2124                          * level 0 separately.
2125                          */
2126                         if (level == 0 && zb->zb_asize ==
2127                             stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
2128                                 continue;
2129 
2130                         mdb_nicenum(zb->zb_count, csize);
2131                         mdb_nicenum(zb->zb_lsize, lsize);
2132                         mdb_nicenum(zb->zb_psize, psize);
2133                         mdb_nicenum(zb->zb_asize, asize);
2134                         mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2135                         (void) snprintfrac(comp, NICENUM_BUFLEN,
2136                             zb->zb_lsize, zb->zb_psize, 2);
2137                         (void) snprintfrac(pct, NICENUM_BUFLEN,
2138                             100 * zb->zb_asize, tzb->zb_asize, 2);
2139 
2140                         mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
2141                             "\t%5s\t%6s\t",
2142                             csize, lsize, psize, asize, avg, comp, pct);
2143 
2144                         if (level == DN_MAX_LEVELS)
2145                                 mdb_printf("%s\n", typename);
2146                         else
2147                                 mdb_printf("  L%d %s\n",
2148                                     level, typename);
2149                 }
2150         }
2151 
2152         return (DCMD_OK);
2153 }
2154 
2155 /*
2156  * MDB module linkage information:
2157  *
2158  * We declare a list of structures describing our dcmds, and a function
2159  * named _mdb_init to return a pointer to our module information.
2160  */
2161 
2162 static const mdb_dcmd_t dcmds[] = {
2163         { "arc", "[-bkmg]", "print ARC variables", arc_print },
2164         { "blkptr", ":", "print blkptr_t", blkptr },
2165         { "dbuf", ":", "print dmu_buf_impl_t", dbuf },
2166         { "dbuf_stats", ":", "dbuf stats", dbuf_stats },
2167         { "dbufs",
2168             "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] "
2169             "[-o object | \"mdn\"] \n"
2170             "\t[-l level] [-b blkid | \"bonus\"]",
2171             "find dmu_buf_impl_t's that match specified criteria", dbufs },
2172         { "abuf_find", "dva_word[0] dva_word[1]",
2173             "find arc_buf_hdr_t of a specified DVA",
2174             abuf_find },
2175         { "spa", "?[-cv]", "spa_t summary", spa_print },
2176         { "spa_config", ":", "print spa_t configuration", spa_print_config },
2177         { "spa_verify", ":", "verify spa_t consistency", spa_verify },
2178         { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
2179         { "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
2180         { "vdev", ":[-re]\n"
2181             "\t-r display recursively\n"
2182             "\t-e print statistics",
2183             "vdev_t summary", vdev_print },
2184         { "zio", ":[cpr]\n"
2185             "\t-c display children\n"
2186             "\t-p display parents\n"
2187             "\t-r display recursively",
2188             "zio_t summary", zio_print },
2189         { "zio_state", "?", "print out all zio_t structures on system or "
2190             "for a particular pool", zio_state },
2191         { "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
2192         { "zfs_blkstats", ":[-v]",
2193             "given a spa_t, print block type stats from last scrub",
2194             zfs_blkstats },
2195         { "zfs_params", "", "print zfs tunable parameters", zfs_params },
2196         { NULL }
2197 };
2198 
2199 static const mdb_walker_t walkers[] = {
2200         /*
2201          * In userland, there is no generic provider of list_t walkers, so we
2202          * need to add it.
2203          */
2204 #ifndef _KERNEL
2205         { LIST_WALK_NAME, LIST_WALK_DESC,
2206                 list_walk_init, list_walk_step, list_walk_fini },
2207 #endif
2208         { "zms_freelist", "walk ZFS metaslab freelist",
2209                 freelist_walk_init, freelist_walk_step, NULL },
2210         { "txg_list", "given any txg_list_t *, walk all entries in all txgs",
2211                 txg_list_walk_init, txg_list_walk_step, NULL },
2212         { "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
2213                 txg_list0_walk_init, txg_list_walk_step, NULL },
2214         { "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
2215                 txg_list1_walk_init, txg_list_walk_step, NULL },
2216         { "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
2217                 txg_list2_walk_init, txg_list_walk_step, NULL },
2218         { "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
2219                 txg_list3_walk_init, txg_list_walk_step, NULL },
2220         { "zio", "walk all zio structures, optionally for a particular spa_t",
2221                 zio_walk_init, zio_walk_step, NULL },
2222         { "zio_root", "walk all root zio_t structures, optionally for a "
2223             "particular spa_t",
2224                 zio_walk_init, zio_walk_root_step, NULL },
2225         { "spa", "walk all spa_t entries in the namespace",
2226                 spa_walk_init, spa_walk_step, NULL },
2227         { "metaslab", "given a spa_t *, walk all metaslab_t structures",
2228                 metaslab_walk_init, metaslab_walk_step, NULL },
2229         { NULL }
2230 };
2231 
2232 static const mdb_modinfo_t modinfo = {
2233         MDB_API_VERSION, dcmds, walkers
2234 };
2235 
2236 const mdb_modinfo_t *
2237 _mdb_init(void)
2238 {
2239         return (&modinfo);
2240 }