1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <mdb/mdb_ctf.h>
  27 #include <sys/zfs_context.h>
  28 #include <sys/mdb_modapi.h>
  29 #include <sys/dbuf.h>
  30 #include <sys/dmu_objset.h>
  31 #include <sys/dsl_dir.h>
  32 #include <sys/dsl_pool.h>
  33 #include <sys/metaslab_impl.h>
  34 #include <sys/space_map.h>
  35 #include <sys/list.h>
  36 #include <sys/spa_impl.h>
  37 #include <sys/vdev_impl.h>
  38 #include <sys/zio_compress.h>
  39 
  40 #ifndef _KERNEL
  41 #include "../genunix/list.h"
  42 #endif
  43 
  44 #ifdef _KERNEL
  45 #define ZFS_OBJ_NAME    "zfs"
  46 #else
  47 #define ZFS_OBJ_NAME    "libzpool.so.1"
  48 #endif
  49 
  50 static char *
  51 local_strdup(const char *s)
  52 {
  53         char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP);
  54 
  55         (void) strcpy(s1, s);
  56         return (s1);
  57 }
  58 
  59 static int
  60 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
  61     const char *member, int len, void *buf)
  62 {
  63         mdb_ctf_id_t id;
  64         ulong_t off;
  65         char name[64];
  66 
  67         if (idp == NULL) {
  68                 if (mdb_ctf_lookup_by_name(type, &id) == -1) {
  69                         mdb_warn("couldn't find type %s", type);
  70                         return (DCMD_ERR);
  71                 }
  72                 idp = &id;
  73         } else {
  74                 type = name;
  75                 mdb_ctf_type_name(*idp, name, sizeof (name));
  76         }
  77 
  78         if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
  79                 mdb_warn("couldn't find member %s of type %s\n", member, type);
  80                 return (DCMD_ERR);
  81         }
  82         if (off % 8 != 0) {
  83                 mdb_warn("member %s of type %s is unsupported bitfield",
  84                     member, type);
  85                 return (DCMD_ERR);
  86         }
  87         off /= 8;
  88 
  89         if (mdb_vread(buf, len, addr + off) == -1) {
  90                 mdb_warn("failed to read %s from %s at %p",
  91                     member, type, addr + off);
  92                 return (DCMD_ERR);
  93         }
  94         /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
  95 
  96         return (0);
  97 }
  98 
  99 #define GETMEMB(addr, type, member, dest) \
 100         getmember(addr, #type, NULL, #member, sizeof (dest), &(dest))
 101 
 102 #define GETMEMBID(addr, ctfid, member, dest) \
 103         getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
 104 
 105 static int
 106 getrefcount(uintptr_t addr, mdb_ctf_id_t *id,
 107     const char *member, uint64_t *rc)
 108 {
 109         static int gotid;
 110         static mdb_ctf_id_t rc_id;
 111         ulong_t off;
 112 
 113         if (!gotid) {
 114                 if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) {
 115                         mdb_warn("couldn't find struct refcount");
 116                         return (DCMD_ERR);
 117                 }
 118                 gotid = TRUE;
 119         }
 120 
 121         if (mdb_ctf_offsetof(*id, member, &off) == -1) {
 122                 char name[64];
 123                 mdb_ctf_type_name(*id, name, sizeof (name));
 124                 mdb_warn("couldn't find member %s of type %s\n", member, name);
 125                 return (DCMD_ERR);
 126         }
 127         off /= 8;
 128 
 129         return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
 130 }
 131 
 132 static int
 133 read_symbol(char *sym_name, void **bufp)
 134 {
 135         GElf_Sym sym;
 136 
 137         if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) {
 138                 mdb_warn("can't find symbol %s", sym_name);
 139                 return (DCMD_ERR);
 140         }
 141 
 142         *bufp = mdb_alloc(sym.st_size, UM_SLEEP);
 143 
 144         if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) {
 145                 mdb_warn("can't read data for symbol %s", sym_name);
 146                 mdb_free(*bufp, sym.st_size);
 147                 return (DCMD_ERR);
 148         }
 149 
 150         return (DCMD_OK);
 151 }
 152 
 153 static int verbose;
 154 
 155 static int
 156 freelist_walk_init(mdb_walk_state_t *wsp)
 157 {
 158         if (wsp->walk_addr == NULL) {
 159                 mdb_warn("must supply starting address\n");
 160                 return (WALK_ERR);
 161         }
 162 
 163         wsp->walk_data = 0;  /* Index into the freelist */
 164         return (WALK_NEXT);
 165 }
 166 
 167 static int
 168 freelist_walk_step(mdb_walk_state_t *wsp)
 169 {
 170         uint64_t entry;
 171         uintptr_t number = (uintptr_t)wsp->walk_data;
 172         char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 173                             "INVALID", "INVALID", "INVALID", "INVALID" };
 174         int mapshift = SPA_MINBLOCKSHIFT;
 175 
 176         if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
 177                 mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
 178                 return (WALK_DONE);
 179         }
 180         wsp->walk_addr += sizeof (entry);
 181         wsp->walk_data = (void *)(number + 1);
 182 
 183         if (SM_DEBUG_DECODE(entry)) {
 184                 mdb_printf("DEBUG: %3u  %10s: txg=%llu  pass=%llu\n",
 185                     number,
 186                     ddata[SM_DEBUG_ACTION_DECODE(entry)],
 187                     SM_DEBUG_TXG_DECODE(entry),
 188                     SM_DEBUG_SYNCPASS_DECODE(entry));
 189         } else {
 190                 mdb_printf("Entry: %3u  offsets=%08llx-%08llx  type=%c  "
 191                     "size=%06llx", number,
 192                     SM_OFFSET_DECODE(entry) << mapshift,
 193                     (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
 194                     mapshift,
 195                     SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 196                     SM_RUN_DECODE(entry) << mapshift);
 197                 if (verbose)
 198                         mdb_printf("      (raw=%012llx)\n", entry);
 199                 mdb_printf("\n");
 200         }
 201         return (WALK_NEXT);
 202 }
 203 
 204 
 205 static int
 206 dataset_name(uintptr_t addr, char *buf)
 207 {
 208         static int gotid;
 209         static mdb_ctf_id_t dd_id;
 210         uintptr_t dd_parent;
 211         char dd_myname[MAXNAMELEN];
 212 
 213         if (!gotid) {
 214                 if (mdb_ctf_lookup_by_name("struct dsl_dir",
 215                     &dd_id) == -1) {
 216                         mdb_warn("couldn't find struct dsl_dir");
 217                         return (DCMD_ERR);
 218                 }
 219                 gotid = TRUE;
 220         }
 221         if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
 222             GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
 223                 return (DCMD_ERR);
 224         }
 225 
 226         if (dd_parent) {
 227                 if (dataset_name(dd_parent, buf))
 228                         return (DCMD_ERR);
 229                 strcat(buf, "/");
 230         }
 231 
 232         if (dd_myname[0])
 233                 strcat(buf, dd_myname);
 234         else
 235                 strcat(buf, "???");
 236 
 237         return (0);
 238 }
 239 
 240 static int
 241 objset_name(uintptr_t addr, char *buf)
 242 {
 243         static int gotid;
 244         static mdb_ctf_id_t osi_id, ds_id;
 245         uintptr_t os_dsl_dataset;
 246         char ds_snapname[MAXNAMELEN];
 247         uintptr_t ds_dir;
 248 
 249         buf[0] = '\0';
 250 
 251         if (!gotid) {
 252                 if (mdb_ctf_lookup_by_name("struct objset_impl",
 253                     &osi_id) == -1) {
 254                         mdb_warn("couldn't find struct objset_impl");
 255                         return (DCMD_ERR);
 256                 }
 257                 if (mdb_ctf_lookup_by_name("struct dsl_dataset",
 258                     &ds_id) == -1) {
 259                         mdb_warn("couldn't find struct dsl_dataset");
 260                         return (DCMD_ERR);
 261                 }
 262 
 263                 gotid = TRUE;
 264         }
 265 
 266         if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
 267                 return (DCMD_ERR);
 268 
 269         if (os_dsl_dataset == 0) {
 270                 strcat(buf, "mos");
 271                 return (0);
 272         }
 273 
 274         if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
 275             GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
 276                 return (DCMD_ERR);
 277         }
 278 
 279         if (ds_dir && dataset_name(ds_dir, buf))
 280                 return (DCMD_ERR);
 281 
 282         if (ds_snapname[0]) {
 283                 strcat(buf, "@");
 284                 strcat(buf, ds_snapname);
 285         }
 286         return (0);
 287 }
 288 
 289 static void
 290 enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val,
 291     const char *prefix)
 292 {
 293         const char *cp;
 294         size_t len = strlen(prefix);
 295 
 296         if ((cp = mdb_ctf_enum_name(id, val)) != NULL) {
 297                 if (strncmp(cp, prefix, len) == 0)
 298                         cp += len;
 299                 (void) strncpy(out, cp, size);
 300         } else {
 301                 mdb_snprintf(out, size, "? (%d)", val);
 302         }
 303 }
 304 
 305 /* ARGSUSED */
 306 static int
 307 zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 308 {
 309         mdb_ctf_id_t pipe_enum;
 310         int i;
 311         char stage[1024];
 312 
 313         if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) {
 314                 mdb_warn("Could not find enum zio_stage");
 315                 return (DCMD_ERR);
 316         }
 317 
 318         for (i = 0; i < 32; i++) {
 319                 if (addr & (1U << i)) {
 320                         enum_lookup(stage, sizeof (stage), pipe_enum, i,
 321                             "ZIO_STAGE_");
 322                         mdb_printf("    %s\n", stage);
 323                 }
 324         }
 325 
 326         return (DCMD_OK);
 327 }
 328 
 329 /* ARGSUSED */
 330 static int
 331 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 332 {
 333         /*
 334          * This table can be approximately generated by running:
 335          * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
 336          */
 337         static const char *params[] = {
 338                 "arc_reduce_dnlc_percent",
 339                 "zfs_arc_max",
 340                 "zfs_arc_min",
 341                 "arc_shrink_shift",
 342                 "zfs_mdcomp_disable",
 343                 "zfs_prefetch_disable",
 344                 "zfetch_max_streams",
 345                 "zfetch_min_sec_reap",
 346                 "zfetch_block_cap",
 347                 "zfetch_array_rd_sz",
 348                 "zfs_default_bs",
 349                 "zfs_default_ibs",
 350                 "metaslab_aliquot",
 351                 "reference_tracking_enable",
 352                 "reference_history",
 353                 "zio_taskq_threads",
 354                 "spa_max_replication_override",
 355                 "spa_mode",
 356                 "zfs_flags",
 357                 "zfs_txg_synctime",
 358                 "zfs_txg_timeout",
 359                 "zfs_write_limit_min",
 360                 "zfs_write_limit_max",
 361                 "zfs_write_limit_shift",
 362                 "zfs_write_limit_override",
 363                 "zfs_no_write_throttle",
 364                 "zfs_vdev_cache_max",
 365                 "zfs_vdev_cache_size",
 366                 "zfs_vdev_cache_bshift",
 367                 "vdev_mirror_shift",
 368                 "zfs_vdev_max_pending",
 369                 "zfs_vdev_min_pending",
 370                 "zfs_scrub_limit",
 371                 "zfs_vdev_time_shift",
 372                 "zfs_vdev_ramp_rate",
 373                 "zfs_vdev_aggregation_limit",
 374                 "fzap_default_block_shift",
 375                 "zfs_immediate_write_sz",
 376                 "zfs_read_chunk_size",
 377                 "zil_disable",
 378                 "zfs_nocacheflush",
 379                 "metaslab_gang_bang",
 380                 "zio_injection_enabled",
 381                 "zvol_immediate_write_sz",
 382         };
 383         int i;
 384 
 385         for (i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
 386                 int sz;
 387                 uint64_t val64;
 388                 uint32_t *val32p = (uint32_t *)&val64;
 389 
 390                 sz = mdb_readvar(&val64, params[i]);
 391                 if (sz == 4) {
 392                         mdb_printf("%s = 0x%x\n", params[i], *val32p);
 393                 } else if (sz == 8) {
 394                         mdb_printf("%s = 0x%llx\n", params[i], val64);
 395                 } else {
 396                         mdb_warn("variable %s not found", params[i]);
 397                 }
 398         }
 399 
 400         return (DCMD_OK);
 401 }
 402 
 403 /* ARGSUSED */
 404 static int
 405 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 406 {
 407         blkptr_t bp;
 408         dmu_object_type_info_t *doti;
 409         zio_compress_info_t *zct;
 410         zio_checksum_info_t *zci;
 411         int i;
 412         char buf[MAXPATHLEN];
 413 
 414         if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) {
 415                 mdb_warn("failed to read blkptr_t");
 416                 return (DCMD_ERR);
 417         }
 418 
 419         if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK)
 420                 return (DCMD_ERR);
 421         for (i = 0; i < DMU_OT_NUMTYPES; i++) {
 422                 mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name);
 423                 doti[i].ot_name = local_strdup(buf);
 424         }
 425 
 426         if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK)
 427                 return (DCMD_ERR);
 428         for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) {
 429                 mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name);
 430                 zci[i].ci_name = local_strdup(buf);
 431         }
 432 
 433         if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK)
 434                 return (DCMD_ERR);
 435         for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
 436                 mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name);
 437                 zct[i].ci_name = local_strdup(buf);
 438         }
 439 
 440         /*
 441          * Super-ick warning:  This code is also duplicated in
 442          * cmd/zdb.c .   Yeah, I hate code replication, too.
 443          */
 444         for (i = 0; i < BP_GET_NDVAS(&bp); i++) {
 445                 dva_t *dva = &bp.blk_dva[i];
 446 
 447                 mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i,
 448                     DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
 449                 mdb_printf("DVA[%d]:       GANG: %-5s  GRID:  %04x\t"
 450                     "ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
 451                     DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
 452                 mdb_printf("DVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", i,
 453                     DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp),
 454                     BP_SHOULD_BYTESWAP(&bp) ? "e" : "",
 455                     !DVA_GET_GANG(dva) && BP_GET_LEVEL(&bp) != 0 ? "i" : "",
 456                     DVA_GET_GANG(dva) ? "g" : "",
 457                     BP_GET_COMPRESS(&bp) != 0 ? "d" : "");
 458         }
 459         mdb_printf("LSIZE:  %-16llx\t\tPSIZE: %llx\n",
 460             BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp));
 461         mdb_printf("ENDIAN: %6s\t\t\t\t\tTYPE:  %s\n",
 462             BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
 463             doti[BP_GET_TYPE(&bp)].ot_name);
 464         mdb_printf("BIRTH:  %-16llx   LEVEL: %-2d\tFILL:  %llx\n",
 465             bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill);
 466         mdb_printf("CKFUNC: %-16s\t\tCOMP:  %s\n",
 467             zci[BP_GET_CHECKSUM(&bp)].ci_name,
 468             zct[BP_GET_COMPRESS(&bp)].ci_name);
 469         mdb_printf("CKSUM:  %llx:%llx:%llx:%llx\n",
 470             bp.blk_cksum.zc_word[0],
 471             bp.blk_cksum.zc_word[1],
 472             bp.blk_cksum.zc_word[2],
 473             bp.blk_cksum.zc_word[3]);
 474 
 475         return (DCMD_OK);
 476 }
 477 
 478 /* ARGSUSED */
 479 static int
 480 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 481 {
 482         mdb_ctf_id_t id;
 483         dmu_buf_t db;
 484         uintptr_t objset;
 485         uint8_t level;
 486         uint64_t blkid;
 487         uint64_t holds;
 488         char objectname[32];
 489         char blkidname[32];
 490         char path[MAXNAMELEN];
 491 
 492         if (DCMD_HDRSPEC(flags)) {
 493                 mdb_printf("        addr object lvl blkid holds os\n");
 494         }
 495 
 496         if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &id) == -1) {
 497                 mdb_warn("couldn't find struct dmu_buf_impl_t");
 498                 return (DCMD_ERR);
 499         }
 500 
 501         if (GETMEMBID(addr, &id, db_objset, objset) ||
 502             GETMEMBID(addr, &id, db, db) ||
 503             GETMEMBID(addr, &id, db_level, level) ||
 504             GETMEMBID(addr, &id, db_blkid, blkid)) {
 505                 return (WALK_ERR);
 506         }
 507 
 508         if (getrefcount(addr, &id, "db_holds", &holds)) {
 509                 return (WALK_ERR);
 510         }
 511 
 512         if (db.db_object == DMU_META_DNODE_OBJECT)
 513                 (void) strcpy(objectname, "mdn");
 514         else
 515                 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
 516                     (u_longlong_t)db.db_object);
 517 
 518         if (blkid == DB_BONUS_BLKID)
 519                 (void) strcpy(blkidname, "bonus");
 520         else
 521                 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
 522                     (u_longlong_t)blkid);
 523 
 524         if (objset_name(objset, path)) {
 525                 return (WALK_ERR);
 526         }
 527 
 528         mdb_printf("%p %8s %1u %9s %2llu %s\n",
 529             addr, objectname, level, blkidname, holds, path);
 530 
 531         return (DCMD_OK);
 532 }
 533 
 534 /* ARGSUSED */
 535 static int
 536 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 537 {
 538 #define HISTOSZ 32
 539         uintptr_t dbp;
 540         dmu_buf_impl_t db;
 541         dbuf_hash_table_t ht;
 542         uint64_t bucket, ndbufs;
 543         uint64_t histo[HISTOSZ];
 544         uint64_t histo2[HISTOSZ];
 545         int i, maxidx;
 546 
 547         if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
 548                 mdb_warn("failed to read 'dbuf_hash_table'");
 549                 return (DCMD_ERR);
 550         }
 551 
 552         for (i = 0; i < HISTOSZ; i++) {
 553                 histo[i] = 0;
 554                 histo2[i] = 0;
 555         }
 556 
 557         ndbufs = 0;
 558         for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
 559                 int len;
 560 
 561                 if (mdb_vread(&dbp, sizeof (void *),
 562                     (uintptr_t)(ht.hash_table+bucket)) == -1) {
 563                         mdb_warn("failed to read hash bucket %u at %p",
 564                             bucket, ht.hash_table+bucket);
 565                         return (DCMD_ERR);
 566                 }
 567 
 568                 len = 0;
 569                 while (dbp != 0) {
 570                         if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
 571                             dbp) == -1) {
 572                                 mdb_warn("failed to read dbuf at %p", dbp);
 573                                 return (DCMD_ERR);
 574                         }
 575                         dbp = (uintptr_t)db.db_hash_next;
 576                         for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
 577                                 histo2[i]++;
 578                         len++;
 579                         ndbufs++;
 580                 }
 581 
 582                 if (len >= HISTOSZ)
 583                         len = HISTOSZ-1;
 584                 histo[len]++;
 585         }
 586 
 587         mdb_printf("hash table has %llu buckets, %llu dbufs "
 588             "(avg %llu buckets/dbuf)\n",
 589             ht.hash_table_mask+1, ndbufs,
 590             (ht.hash_table_mask+1)/ndbufs);
 591 
 592         mdb_printf("\n");
 593         maxidx = 0;
 594         for (i = 0; i < HISTOSZ; i++)
 595                 if (histo[i] > 0)
 596                         maxidx = i;
 597         mdb_printf("hash chain length   number of buckets\n");
 598         for (i = 0; i <= maxidx; i++)
 599                 mdb_printf("%u                  %llu\n", i, histo[i]);
 600 
 601         mdb_printf("\n");
 602         maxidx = 0;
 603         for (i = 0; i < HISTOSZ; i++)
 604                 if (histo2[i] > 0)
 605                         maxidx = i;
 606         mdb_printf("hash chain depth    number of dbufs\n");
 607         for (i = 0; i <= maxidx; i++)
 608                 mdb_printf("%u or more          %llu    %llu%%\n",
 609                     i, histo2[i], histo2[i]*100/ndbufs);
 610 
 611 
 612         return (DCMD_OK);
 613 }
 614 
 615 typedef struct dbufs_data {
 616         mdb_ctf_id_t id;
 617         uint64_t objset;
 618         uint64_t object;
 619         uint64_t level;
 620         uint64_t blkid;
 621         char *osname;
 622 } dbufs_data_t;
 623 
 624 #define DBUFS_UNSET     (0xbaddcafedeadbeefULL)
 625 
 626 /* ARGSUSED */
 627 static int
 628 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
 629 {
 630         dbufs_data_t *data = arg;
 631         uintptr_t objset;
 632         dmu_buf_t db;
 633         uint8_t level;
 634         uint64_t blkid;
 635         char osname[MAXNAMELEN];
 636 
 637         if (GETMEMBID(addr, &data->id, db_objset, objset) ||
 638             GETMEMBID(addr, &data->id, db, db) ||
 639             GETMEMBID(addr, &data->id, db_level, level) ||
 640             GETMEMBID(addr, &data->id, db_blkid, blkid)) {
 641                 return (WALK_ERR);
 642         }
 643 
 644         if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
 645             (data->osname == NULL || (objset_name(objset, osname) == 0 &&
 646             strcmp(data->osname, osname) == 0)) &&
 647             (data->object == DBUFS_UNSET || data->object == db.db_object) &&
 648             (data->level == DBUFS_UNSET || data->level == level) &&
 649             (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
 650                 mdb_printf("%#lr\n", addr);
 651         }
 652         return (WALK_NEXT);
 653 }
 654 
 655 /* ARGSUSED */
 656 static int
 657 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 658 {
 659         dbufs_data_t data;
 660         char *object = NULL;
 661         char *blkid = NULL;
 662 
 663         data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
 664         data.osname = NULL;
 665 
 666         if (mdb_getopts(argc, argv,
 667             'O', MDB_OPT_UINT64, &data.objset,
 668             'n', MDB_OPT_STR, &data.osname,
 669             'o', MDB_OPT_STR, &object,
 670             'l', MDB_OPT_UINT64, &data.level,
 671             'b', MDB_OPT_STR, &blkid) != argc) {
 672                 return (DCMD_USAGE);
 673         }
 674 
 675         if (object) {
 676                 if (strcmp(object, "mdn") == 0) {
 677                         data.object = DMU_META_DNODE_OBJECT;
 678                 } else {
 679                         data.object = mdb_strtoull(object);
 680                 }
 681         }
 682 
 683         if (blkid) {
 684                 if (strcmp(blkid, "bonus") == 0) {
 685                         data.blkid = DB_BONUS_BLKID;
 686                 } else {
 687                         data.blkid = mdb_strtoull(blkid);
 688                 }
 689         }
 690 
 691         if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &data.id) == -1) {
 692                 mdb_warn("couldn't find struct dmu_buf_impl_t");
 693                 return (DCMD_ERR);
 694         }
 695 
 696         if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
 697                 mdb_warn("can't walk dbufs");
 698                 return (DCMD_ERR);
 699         }
 700 
 701         return (DCMD_OK);
 702 }
 703 
 704 typedef struct abuf_find_data {
 705         dva_t dva;
 706         mdb_ctf_id_t id;
 707 } abuf_find_data_t;
 708 
 709 /* ARGSUSED */
 710 static int
 711 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
 712 {
 713         abuf_find_data_t *data = arg;
 714         dva_t dva;
 715 
 716         if (GETMEMBID(addr, &data->id, b_dva, dva)) {
 717                 return (WALK_ERR);
 718         }
 719 
 720         if (dva.dva_word[0] == data->dva.dva_word[0] &&
 721             dva.dva_word[1] == data->dva.dva_word[1]) {
 722                 mdb_printf("%#lr\n", addr);
 723         }
 724         return (WALK_NEXT);
 725 }
 726 
 727 /* ARGSUSED */
 728 static int
 729 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 730 {
 731         abuf_find_data_t data;
 732         GElf_Sym sym;
 733         int i;
 734         const char *syms[] = {
 735                 "ARC_mru",
 736                 "ARC_mru_ghost",
 737                 "ARC_mfu",
 738                 "ARC_mfu_ghost",
 739         };
 740 
 741         if (argc != 2)
 742                 return (DCMD_USAGE);
 743 
 744         for (i = 0; i < 2; i ++) {
 745                 switch (argv[i].a_type) {
 746                 case MDB_TYPE_STRING:
 747                         data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
 748                         break;
 749                 case MDB_TYPE_IMMEDIATE:
 750                         data.dva.dva_word[i] = argv[i].a_un.a_val;
 751                         break;
 752                 default:
 753                         return (DCMD_USAGE);
 754                 }
 755         }
 756 
 757         if (mdb_ctf_lookup_by_name("struct arc_buf_hdr", &data.id) == -1) {
 758                 mdb_warn("couldn't find struct arc_buf_hdr");
 759                 return (DCMD_ERR);
 760         }
 761 
 762         for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
 763                 if (mdb_lookup_by_name(syms[i], &sym)) {
 764                         mdb_warn("can't find symbol %s", syms[i]);
 765                         return (DCMD_ERR);
 766                 }
 767 
 768                 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
 769                         mdb_warn("can't walk %s", syms[i]);
 770                         return (DCMD_ERR);
 771                 }
 772         }
 773 
 774         return (DCMD_OK);
 775 }
 776 
 777 /*ARGSUSED*/
 778 static int
 779 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 780 {
 781         kstat_named_t *stats;
 782         GElf_Sym sym;
 783         int nstats, i;
 784         uint_t opt_a = FALSE;
 785         uint_t opt_b = FALSE;
 786         uint_t shift = 0;
 787         const char *suffix;
 788 
 789         static const char *bytestats[] = {
 790                 "p", "c", "c_min", "c_max", "size", NULL
 791         };
 792 
 793         static const char *extras[] = {
 794                 "arc_no_grow", "arc_tempreserve",
 795                 "arc_meta_used", "arc_meta_limit", "arc_meta_max",
 796                 NULL
 797         };
 798 
 799         if (mdb_lookup_by_name("arc_stats", &sym) == -1) {
 800                 mdb_warn("failed to find 'arc_stats'");
 801                 return (DCMD_ERR);
 802         }
 803 
 804         stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
 805 
 806         if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
 807                 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
 808                 return (DCMD_ERR);
 809         }
 810 
 811         nstats = sym.st_size / sizeof (kstat_named_t);
 812 
 813         /* NB: -a / opt_a are ignored for backwards compatability */
 814         if (mdb_getopts(argc, argv,
 815             'a', MDB_OPT_SETBITS, TRUE, &opt_a,
 816             'b', MDB_OPT_SETBITS, TRUE, &opt_b,
 817             'k', MDB_OPT_SETBITS, 10, &shift,
 818             'm', MDB_OPT_SETBITS, 20, &shift,
 819             'g', MDB_OPT_SETBITS, 30, &shift,
 820             NULL) != argc)
 821                 return (DCMD_USAGE);
 822 
 823         if (!opt_b && !shift)
 824                 shift = 20;
 825 
 826         switch (shift) {
 827         case 0:
 828                 suffix = "B";
 829                 break;
 830         case 10:
 831                 suffix = "KB";
 832                 break;
 833         case 20:
 834                 suffix = "MB";
 835                 break;
 836         case 30:
 837                 suffix = "GB";
 838                 break;
 839         default:
 840                 suffix = "XX";
 841         }
 842 
 843         for (i = 0; i < nstats; i++) {
 844                 int j;
 845                 boolean_t bytes = B_FALSE;
 846 
 847                 for (j = 0; bytestats[j]; j++) {
 848                         if (strcmp(stats[i].name, bytestats[j]) == 0) {
 849                                 bytes = B_TRUE;
 850                                 break;
 851                         }
 852                 }
 853 
 854                 if (bytes) {
 855                         mdb_printf("%-25s = %9llu %s\n", stats[i].name,
 856                             stats[i].value.ui64 >> shift, suffix);
 857                 } else {
 858                         mdb_printf("%-25s = %9llu\n", stats[i].name,
 859                             stats[i].value.ui64);
 860                 }
 861         }
 862 
 863         for (i = 0; extras[i]; i++) {
 864                 uint64_t buf;
 865 
 866                 if (mdb_lookup_by_name(extras[i], &sym) == -1) {
 867                         mdb_warn("failed to find '%s'", extras[i]);
 868                         return (DCMD_ERR);
 869                 }
 870 
 871                 if (sym.st_size != sizeof (uint64_t) &&
 872                     sym.st_size != sizeof (uint32_t)) {
 873                         mdb_warn("expected scalar for variable '%s'\n",
 874                             extras[i]);
 875                         return (DCMD_ERR);
 876                 }
 877 
 878                 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
 879                         mdb_warn("couldn't read '%s'", extras[i]);
 880                         return (DCMD_ERR);
 881                 }
 882 
 883                 mdb_printf("%-25s = ", extras[i]);
 884 
 885                 /* NB: all the 64-bit extras happen to be byte counts */
 886                 if (sym.st_size == sizeof (uint64_t))
 887                         mdb_printf("%9llu %s\n", buf >> shift, suffix);
 888 
 889                 if (sym.st_size == sizeof (uint32_t))
 890                         mdb_printf("%9d\n", *((uint32_t *)&buf));
 891         }
 892         return (DCMD_OK);
 893 }
 894 
 895 /*
 896  * ::spa
 897  *
 898  *      -c      Print configuration information as well
 899  *      -v      Print vdev state
 900  *      -e      Print vdev error stats
 901  *
 902  * Print a summarized spa_t.  When given no arguments, prints out a table of all
 903  * active pools on the system.
 904  */
 905 /* ARGSUSED */
 906 static int
 907 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 908 {
 909         spa_t spa;
 910         const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
 911                 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
 912         const char *state;
 913         int config = FALSE;
 914         int vdevs = FALSE;
 915         int errors = FALSE;
 916 
 917         if (mdb_getopts(argc, argv,
 918             'c', MDB_OPT_SETBITS, TRUE, &config,
 919             'v', MDB_OPT_SETBITS, TRUE, &vdevs,
 920             'e', MDB_OPT_SETBITS, TRUE, &errors,
 921             NULL) != argc)
 922                 return (DCMD_USAGE);
 923 
 924         if (!(flags & DCMD_ADDRSPEC)) {
 925                 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
 926                         mdb_warn("can't walk spa");
 927                         return (DCMD_ERR);
 928                 }
 929 
 930                 return (DCMD_OK);
 931         }
 932 
 933         if (flags & DCMD_PIPE_OUT) {
 934                 mdb_printf("%#lr\n", addr);
 935                 return (DCMD_OK);
 936         }
 937 
 938         if (DCMD_HDRSPEC(flags))
 939                 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
 940                     sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
 941 
 942         if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
 943                 mdb_warn("failed to read spa_t at %p", addr);
 944                 return (DCMD_ERR);
 945         }
 946 
 947         if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
 948                 state = "UNKNOWN";
 949         else
 950                 state = statetab[spa.spa_state];
 951 
 952         mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
 953 
 954         if (config) {
 955                 mdb_printf("\n");
 956                 mdb_inc_indent(4);
 957                 if (mdb_call_dcmd("spa_config", addr, flags, 0,
 958                     NULL) != DCMD_OK)
 959                         return (DCMD_ERR);
 960                 mdb_dec_indent(4);
 961         }
 962 
 963         if (vdevs || errors) {
 964                 mdb_arg_t v;
 965 
 966                 v.a_type = MDB_TYPE_STRING;
 967                 v.a_un.a_str = "-e";
 968 
 969                 mdb_printf("\n");
 970                 mdb_inc_indent(4);
 971                 if (mdb_call_dcmd("spa_vdevs", addr, flags, errors ? 1 : 0,
 972                     &v) != DCMD_OK)
 973                         return (DCMD_ERR);
 974                 mdb_dec_indent(4);
 975         }
 976 
 977         return (DCMD_OK);
 978 }
 979 
 980 /*
 981  * ::spa_config
 982  *
 983  * Given a spa_t, print the configuration information stored in spa_config.
 984  * Since it's just an nvlist, format it as an indented list of name=value pairs.
 985  * We simply read the value of spa_config and pass off to ::nvlist.
 986  */
 987 /* ARGSUSED */
 988 static int
 989 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
 990 {
 991         spa_t spa;
 992 
 993         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
 994                 return (DCMD_USAGE);
 995 
 996         if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
 997                 mdb_warn("failed to read spa_t at %p", addr);
 998                 return (DCMD_ERR);
 999         }
1000 
1001         if (spa.spa_config == NULL) {
1002                 mdb_printf("(none)\n");
1003                 return (DCMD_OK);
1004         }
1005 
1006         return (mdb_call_dcmd("nvlist", (uintptr_t)spa.spa_config, flags,
1007             0, NULL));
1008 }
1009 
1010 /*
1011  * ::vdev
1012  *
1013  * Print out a summarized vdev_t, in the following form:
1014  *
1015  * ADDR             STATE       AUX            DESC
1016  * fffffffbcde23df0 HEALTHY     -              /dev/dsk/c0t0d0
1017  *
1018  * If '-r' is specified, recursively visit all children.
1019  *
1020  * With '-e', the statistics associated with the vdev are printed as well.
1021  */
1022 static int
1023 do_print_vdev(uintptr_t addr, int flags, int depth, int stats,
1024     int recursive)
1025 {
1026         vdev_t vdev;
1027         char desc[MAXNAMELEN];
1028         int c, children;
1029         uintptr_t *child;
1030         const char *state, *aux;
1031 
1032         if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
1033                 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
1034                 return (DCMD_ERR);
1035         }
1036 
1037         if (flags & DCMD_PIPE_OUT) {
1038                 mdb_printf("%#lr", addr);
1039         } else {
1040                 if (vdev.vdev_path != NULL) {
1041                         if (mdb_readstr(desc, sizeof (desc),
1042                             (uintptr_t)vdev.vdev_path) == -1) {
1043                                 mdb_warn("failed to read vdev_path at %p\n",
1044                                     vdev.vdev_path);
1045                                 return (DCMD_ERR);
1046                         }
1047                 } else if (vdev.vdev_ops != NULL) {
1048                         vdev_ops_t ops;
1049                         if (mdb_vread(&ops, sizeof (ops),
1050                             (uintptr_t)vdev.vdev_ops) == -1) {
1051                                 mdb_warn("failed to read vdev_ops at %p\n",
1052                                     vdev.vdev_ops);
1053                                 return (DCMD_ERR);
1054                         }
1055                         (void) strcpy(desc, ops.vdev_op_type);
1056                 } else {
1057                         (void) strcpy(desc, "<unknown>");
1058                 }
1059 
1060                 if (depth == 0 && DCMD_HDRSPEC(flags))
1061                         mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1062                             "ADDR", "STATE", "AUX",
1063                             sizeof (uintptr_t) == 4 ? 43 : 35,
1064                             "DESCRIPTION");
1065 
1066                 mdb_printf("%0?p ", addr);
1067 
1068                 switch (vdev.vdev_state) {
1069                 case VDEV_STATE_CLOSED:
1070                         state = "CLOSED";
1071                         break;
1072                 case VDEV_STATE_OFFLINE:
1073                         state = "OFFLINE";
1074                         break;
1075                 case VDEV_STATE_CANT_OPEN:
1076                         state = "CANT_OPEN";
1077                         break;
1078                 case VDEV_STATE_DEGRADED:
1079                         state = "DEGRADED";
1080                         break;
1081                 case VDEV_STATE_HEALTHY:
1082                         state = "HEALTHY";
1083                         break;
1084                 case VDEV_STATE_REMOVED:
1085                         state = "REMOVED";
1086                         break;
1087                 case VDEV_STATE_FAULTED:
1088                         state = "FAULTED";
1089                         break;
1090                 default:
1091                         state = "UNKNOWN";
1092                         break;
1093                 }
1094 
1095                 switch (vdev.vdev_stat.vs_aux) {
1096                 case VDEV_AUX_NONE:
1097                         aux = "-";
1098                         break;
1099                 case VDEV_AUX_OPEN_FAILED:
1100                         aux = "OPEN_FAILED";
1101                         break;
1102                 case VDEV_AUX_CORRUPT_DATA:
1103                         aux = "CORRUPT_DATA";
1104                         break;
1105                 case VDEV_AUX_NO_REPLICAS:
1106                         aux = "NO_REPLICAS";
1107                         break;
1108                 case VDEV_AUX_BAD_GUID_SUM:
1109                         aux = "BAD_GUID_SUM";
1110                         break;
1111                 case VDEV_AUX_TOO_SMALL:
1112                         aux = "TOO_SMALL";
1113                         break;
1114                 case VDEV_AUX_BAD_LABEL:
1115                         aux = "BAD_LABEL";
1116                         break;
1117                 case VDEV_AUX_VERSION_NEWER:
1118                         aux = "VERS_NEWER";
1119                         break;
1120                 case VDEV_AUX_VERSION_OLDER:
1121                         aux = "VERS_OLDER";
1122                         break;
1123                 case VDEV_AUX_SPARED:
1124                         aux = "SPARED";
1125                         break;
1126                 case VDEV_AUX_ERR_EXCEEDED:
1127                         aux = "ERR_EXCEEDED";
1128                         break;
1129                 case VDEV_AUX_IO_FAILURE:
1130                         aux = "IO_FAILURE";
1131                         break;
1132                 case VDEV_AUX_BAD_LOG:
1133                         aux = "BAD_LOG";
1134                         break;
1135                 default:
1136                         aux = "UNKNOWN";
1137                         break;
1138                 }
1139 
1140                 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1141 
1142                 if (stats) {
1143                         vdev_stat_t *vs = &vdev.vdev_stat;
1144                         int i;
1145 
1146                         mdb_inc_indent(4);
1147                         mdb_printf("\n");
1148                         mdb_printf("%<u>       %12s %12s %12s %12s "
1149                             "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1150                             "IOCTL");
1151                         mdb_printf("OPS     ");
1152                         for (i = 1; i < ZIO_TYPES; i++)
1153                                 mdb_printf("%11#llx%s", vs->vs_ops[i],
1154                                     i == ZIO_TYPES - 1 ? "" : "  ");
1155                         mdb_printf("\n");
1156                         mdb_printf("BYTES   ");
1157                         for (i = 1; i < ZIO_TYPES; i++)
1158                                 mdb_printf("%11#llx%s", vs->vs_bytes[i],
1159                                     i == ZIO_TYPES - 1 ? "" : "  ");
1160 
1161 
1162                         mdb_printf("\n");
1163                         mdb_printf("EREAD    %10#llx\n", vs->vs_read_errors);
1164                         mdb_printf("EWRITE   %10#llx\n", vs->vs_write_errors);
1165                         mdb_printf("ECKSUM   %10#llx\n",
1166                             vs->vs_checksum_errors);
1167                         mdb_dec_indent(4);
1168                 }
1169 
1170                 if (stats)
1171                         mdb_printf("\n");
1172         }
1173 
1174         children = vdev.vdev_children;
1175 
1176         if (children == 0 || !recursive)
1177                 return (DCMD_OK);
1178 
1179         child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1180         if (mdb_vread(child, children * sizeof (void *),
1181             (uintptr_t)vdev.vdev_child) == -1) {
1182                 mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1183                 return (DCMD_ERR);
1184         }
1185 
1186         for (c = 0; c < children; c++) {
1187                 if (do_print_vdev(child[c], flags, depth + 2, stats,
1188                     recursive))
1189                         return (DCMD_ERR);
1190         }
1191 
1192         return (DCMD_OK);
1193 }
1194 
1195 static int
1196 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1197 {
1198         int recursive = FALSE;
1199         int stats = FALSE;
1200         uint64_t depth = 0;
1201 
1202         if (mdb_getopts(argc, argv,
1203             'r', MDB_OPT_SETBITS, TRUE, &recursive,
1204             'e', MDB_OPT_SETBITS, TRUE, &stats,
1205             'd', MDB_OPT_UINT64, &depth,
1206             NULL) != argc)
1207                 return (DCMD_USAGE);
1208 
1209         if (!(flags & DCMD_ADDRSPEC)) {
1210                 mdb_warn("no vdev_t address given\n");
1211                 return (DCMD_ERR);
1212         }
1213 
1214         return (do_print_vdev(addr, flags, (int)depth, stats, recursive));
1215 }
1216 
1217 typedef struct metaslab_walk_data {
1218         uint64_t mw_numvdevs;
1219         uintptr_t *mw_vdevs;
1220         int mw_curvdev;
1221         uint64_t mw_nummss;
1222         uintptr_t *mw_mss;
1223         int mw_curms;
1224 } metaslab_walk_data_t;
1225 
1226 static int
1227 metaslab_walk_step(mdb_walk_state_t *wsp)
1228 {
1229         metaslab_walk_data_t *mw = wsp->walk_data;
1230         metaslab_t ms;
1231         uintptr_t msp;
1232 
1233         if (mw->mw_curvdev >= mw->mw_numvdevs)
1234                 return (WALK_DONE);
1235 
1236         if (mw->mw_mss == NULL) {
1237                 uintptr_t mssp;
1238                 uintptr_t vdevp;
1239 
1240                 ASSERT(mw->mw_curms == 0);
1241                 ASSERT(mw->mw_nummss == 0);
1242 
1243                 vdevp = mw->mw_vdevs[mw->mw_curvdev];
1244                 if (GETMEMB(vdevp, struct vdev, vdev_ms, mssp) ||
1245                     GETMEMB(vdevp, struct vdev, vdev_ms_count, mw->mw_nummss)) {
1246                         return (WALK_ERR);
1247                 }
1248 
1249                 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
1250                     UM_SLEEP | UM_GC);
1251                 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
1252                     mssp) == -1) {
1253                         mdb_warn("failed to read vdev_ms at %p", mssp);
1254                         return (WALK_ERR);
1255                 }
1256         }
1257 
1258         if (mw->mw_curms >= mw->mw_nummss) {
1259                 mw->mw_mss = NULL;
1260                 mw->mw_curms = 0;
1261                 mw->mw_nummss = 0;
1262                 mw->mw_curvdev++;
1263                 return (WALK_NEXT);
1264         }
1265 
1266         msp = mw->mw_mss[mw->mw_curms];
1267         if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
1268                 mdb_warn("failed to read metaslab_t at %p", msp);
1269                 return (WALK_ERR);
1270         }
1271 
1272         mw->mw_curms++;
1273 
1274         return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
1275 }
1276 
1277 /* ARGSUSED */
1278 static int
1279 metaslab_walk_init(mdb_walk_state_t *wsp)
1280 {
1281         metaslab_walk_data_t *mw;
1282         uintptr_t root_vdevp;
1283         uintptr_t childp;
1284 
1285         if (wsp->walk_addr == NULL) {
1286                 mdb_warn("must supply address of spa_t\n");
1287                 return (WALK_ERR);
1288         }
1289 
1290         mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
1291 
1292         if (GETMEMB(wsp->walk_addr, struct spa, spa_root_vdev, root_vdevp) ||
1293             GETMEMB(root_vdevp, struct vdev, vdev_children, mw->mw_numvdevs) ||
1294             GETMEMB(root_vdevp, struct vdev, vdev_child, childp)) {
1295                 return (DCMD_ERR);
1296         }
1297 
1298         mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
1299             UM_SLEEP | UM_GC);
1300         if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
1301             childp) == -1) {
1302                 mdb_warn("failed to read root vdev children at %p", childp);
1303                 return (DCMD_ERR);
1304         }
1305 
1306         wsp->walk_data = mw;
1307 
1308         return (WALK_NEXT);
1309 }
1310 
1311 typedef struct mdb_spa {
1312         uintptr_t spa_dsl_pool;
1313         uintptr_t spa_root_vdev;
1314 } mdb_spa_t;
1315 
1316 typedef struct mdb_dsl_dir {
1317         uintptr_t dd_phys;
1318         int64_t dd_space_towrite[TXG_SIZE];
1319 } mdb_dsl_dir_t;
1320 
1321 typedef struct mdb_dsl_dir_phys {
1322         uint64_t dd_used_bytes;
1323         uint64_t dd_compressed_bytes;
1324         uint64_t dd_uncompressed_bytes;
1325 } mdb_dsl_dir_phys_t;
1326 
1327 typedef struct mdb_vdev {
1328         uintptr_t vdev_parent;
1329         uintptr_t vdev_ms;
1330         uint64_t vdev_ms_count;
1331         vdev_stat_t vdev_stat;
1332 } mdb_vdev_t;
1333 
1334 typedef struct mdb_metaslab {
1335         space_map_t ms_allocmap[TXG_SIZE];
1336         space_map_t ms_freemap[TXG_SIZE];
1337         space_map_t ms_map;
1338         space_map_obj_t ms_smo;
1339         space_map_obj_t ms_smo_syncing;
1340 } mdb_metaslab_t;
1341 
1342 typedef struct space_data {
1343         uint64_t ms_allocmap[TXG_SIZE];
1344         uint64_t ms_freemap[TXG_SIZE];
1345         uint64_t ms_map;
1346         uint64_t avail;
1347         uint64_t nowavail;
1348 } space_data_t;
1349 
1350 /* ARGSUSED */
1351 static int
1352 space_cb(uintptr_t addr, const void *unknown, void *arg)
1353 {
1354         space_data_t *sd = arg;
1355         mdb_metaslab_t ms;
1356 
1357         if (GETMEMB(addr, struct metaslab, ms_allocmap, ms.ms_allocmap) ||
1358             GETMEMB(addr, struct metaslab, ms_freemap, ms.ms_freemap) ||
1359             GETMEMB(addr, struct metaslab, ms_map, ms.ms_map) ||
1360             GETMEMB(addr, struct metaslab, ms_smo, ms.ms_smo) ||
1361             GETMEMB(addr, struct metaslab, ms_smo_syncing, ms.ms_smo_syncing)) {
1362                 return (WALK_ERR);
1363         }
1364 
1365         sd->ms_allocmap[0] += ms.ms_allocmap[0].sm_space;
1366         sd->ms_allocmap[1] += ms.ms_allocmap[1].sm_space;
1367         sd->ms_allocmap[2] += ms.ms_allocmap[2].sm_space;
1368         sd->ms_allocmap[3] += ms.ms_allocmap[3].sm_space;
1369         sd->ms_freemap[0] += ms.ms_freemap[0].sm_space;
1370         sd->ms_freemap[1] += ms.ms_freemap[1].sm_space;
1371         sd->ms_freemap[2] += ms.ms_freemap[2].sm_space;
1372         sd->ms_freemap[3] += ms.ms_freemap[3].sm_space;
1373         sd->ms_map += ms.ms_map.sm_space;
1374         sd->avail += ms.ms_map.sm_size - ms.ms_smo.smo_alloc;
1375         sd->nowavail += ms.ms_map.sm_size - ms.ms_smo_syncing.smo_alloc;
1376 
1377         return (WALK_NEXT);
1378 }
1379 
1380 /*
1381  * ::spa_space [-b]
1382  *
1383  * Given a spa_t, print out it's on-disk space usage and in-core
1384  * estimates of future usage.  If -b is given, print space in bytes.
1385  * Otherwise print in megabytes.
1386  */
1387 /* ARGSUSED */
1388 static int
1389 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1390 {
1391         mdb_spa_t spa;
1392         uintptr_t dp_root_dir;
1393         mdb_dsl_dir_t dd;
1394         mdb_dsl_dir_phys_t dsp;
1395         uint64_t children;
1396         uintptr_t childaddr;
1397         space_data_t sd;
1398         int shift = 20;
1399         char *suffix = "M";
1400         int bits = FALSE;
1401 
1402         if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bits, NULL) !=
1403             argc)
1404                 return (DCMD_USAGE);
1405         if (!(flags & DCMD_ADDRSPEC))
1406                 return (DCMD_USAGE);
1407 
1408         if (bits) {
1409                 shift = 0;
1410                 suffix = "";
1411         }
1412 
1413         if (GETMEMB(addr, struct spa, spa_dsl_pool, spa.spa_dsl_pool) ||
1414             GETMEMB(addr, struct spa, spa_root_vdev, spa.spa_root_vdev) ||
1415             GETMEMB(spa.spa_root_vdev, struct vdev, vdev_children, children) ||
1416             GETMEMB(spa.spa_root_vdev, struct vdev, vdev_child, childaddr) ||
1417             GETMEMB(spa.spa_dsl_pool, struct dsl_pool,
1418             dp_root_dir, dp_root_dir) ||
1419             GETMEMB(dp_root_dir, struct dsl_dir, dd_phys, dd.dd_phys) ||
1420             GETMEMB(dp_root_dir, struct dsl_dir,
1421             dd_space_towrite, dd.dd_space_towrite) ||
1422             GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1423             dd_used_bytes, dsp.dd_used_bytes) ||
1424             GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1425             dd_compressed_bytes, dsp.dd_compressed_bytes) ||
1426             GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1427             dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) {
1428                 return (DCMD_ERR);
1429         }
1430 
1431         mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
1432             dd.dd_space_towrite[0] >> shift, suffix,
1433             dd.dd_space_towrite[1] >> shift, suffix,
1434             dd.dd_space_towrite[2] >> shift, suffix,
1435             dd.dd_space_towrite[3] >> shift, suffix);
1436 
1437         mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
1438             dsp.dd_used_bytes >> shift, suffix);
1439         mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
1440             dsp.dd_compressed_bytes >> shift, suffix);
1441         mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
1442             dsp.dd_uncompressed_bytes >> shift, suffix);
1443 
1444         bzero(&sd, sizeof (sd));
1445         if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
1446                 mdb_warn("can't walk metaslabs");
1447                 return (DCMD_ERR);
1448         }
1449 
1450         mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
1451             sd.ms_allocmap[0] >> shift, suffix,
1452             sd.ms_allocmap[1] >> shift, suffix,
1453             sd.ms_allocmap[2] >> shift, suffix,
1454             sd.ms_allocmap[3] >> shift, suffix);
1455         mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n",
1456             sd.ms_freemap[0] >> shift, suffix,
1457             sd.ms_freemap[1] >> shift, suffix,
1458             sd.ms_freemap[2] >> shift, suffix,
1459             sd.ms_freemap[3] >> shift, suffix);
1460         mdb_printf("ms_map = %llu%s\n", sd.ms_map >> shift, suffix);
1461         mdb_printf("last synced avail = %llu%s\n", sd.avail >> shift, suffix);
1462         mdb_printf("current syncing avail = %llu%s\n",
1463             sd.nowavail >> shift, suffix);
1464 
1465         return (DCMD_OK);
1466 }
1467 
1468 /*
1469  * ::spa_verify
1470  *
1471  * Given a spa_t, verify that that the pool is self-consistent.
1472  * Currently, it only checks to make sure that the vdev tree exists.
1473  */
1474 /* ARGSUSED */
1475 static int
1476 spa_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1477 {
1478         spa_t spa;
1479 
1480         if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1481                 return (DCMD_USAGE);
1482 
1483         if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1484                 mdb_warn("failed to read spa_t at %p", addr);
1485                 return (DCMD_ERR);
1486         }
1487 
1488         if (spa.spa_root_vdev == NULL) {
1489                 mdb_printf("no vdev tree present\n");
1490                 return (DCMD_OK);
1491         }
1492 
1493         return (DCMD_OK);
1494 }
1495 
1496 static int
1497 spa_print_aux(spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
1498     const char *name)
1499 {
1500         uintptr_t *aux;
1501         size_t len;
1502         int ret, i;
1503 
1504         /*
1505          * Iterate over aux vdevs and print those out as well.  This is a
1506          * little annoying because we don't have a root vdev to pass to ::vdev.
1507          * Instead, we print a single line and then call it for each child
1508          * vdev.
1509          */
1510         if (sav->sav_count != 0) {
1511                 v[1].a_type = MDB_TYPE_STRING;
1512                 v[1].a_un.a_str = "-d";
1513                 v[2].a_type = MDB_TYPE_IMMEDIATE;
1514                 v[2].a_un.a_val = 2;
1515 
1516                 len = sav->sav_count * sizeof (uintptr_t);
1517                 aux = mdb_alloc(len, UM_SLEEP);
1518                 if (mdb_vread(aux, len,
1519                     (uintptr_t)sav->sav_vdevs) == -1) {
1520                         mdb_free(aux, len);
1521                         mdb_warn("failed to read l2cache vdevs at %p",
1522                             sav->sav_vdevs);
1523                         return (DCMD_ERR);
1524                 }
1525 
1526                 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
1527 
1528                 for (i = 0; i < sav->sav_count; i++) {
1529                         ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
1530                         if (ret != DCMD_OK) {
1531                                 mdb_free(aux, len);
1532                                 return (ret);
1533                         }
1534                 }
1535 
1536                 mdb_free(aux, len);
1537         }
1538 
1539         return (0);
1540 }
1541 
1542 /*
1543  * ::spa_vdevs
1544  *
1545  *      -e      Include error stats
1546  *
1547  * Print out a summarized list of vdevs for the given spa_t.
1548  * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
1549  * iterating over the cache devices.
1550  */
1551 /* ARGSUSED */
1552 static int
1553 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1554 {
1555         spa_t spa;
1556         mdb_arg_t v[3];
1557         int errors = FALSE;
1558         int ret;
1559 
1560         if (mdb_getopts(argc, argv,
1561             'e', MDB_OPT_SETBITS, TRUE, &errors,
1562             NULL) != argc)
1563                 return (DCMD_USAGE);
1564 
1565         if (!(flags & DCMD_ADDRSPEC))
1566                 return (DCMD_USAGE);
1567 
1568         if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1569                 mdb_warn("failed to read spa_t at %p", addr);
1570                 return (DCMD_ERR);
1571         }
1572 
1573         /*
1574          * Unitialized spa_t structures can have a NULL root vdev.
1575          */
1576         if (spa.spa_root_vdev == NULL) {
1577                 mdb_printf("no associated vdevs\n");
1578                 return (DCMD_OK);
1579         }
1580 
1581         v[0].a_type = MDB_TYPE_STRING;
1582         v[0].a_un.a_str = errors ? "-re" : "-r";
1583 
1584         ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
1585             flags, 1, v);
1586         if (ret != DCMD_OK)
1587                 return (ret);
1588 
1589         if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
1590             spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
1591                 return (DCMD_ERR);
1592 
1593         return (DCMD_OK);
1594 }
1595 
1596 /*
1597  * ::zio
1598  *
1599  * Print a summary of zio_t and all its children.  This is intended to display a
1600  * zio tree, and hence we only pick the most important pieces of information for
1601  * the main summary.  More detailed information can always be found by doing a
1602  * '::print zio' on the underlying zio_t.  The columns we display are:
1603  *
1604  *      ADDRESS         TYPE    STAGE           WAITER
1605  *
1606  * The 'address' column is indented by one space for each depth level as we
1607  * descend down the tree.
1608  */
1609 
1610 #define ZIO_MAXINDENT   24
1611 #define ZIO_MAXWIDTH    (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
1612 #define ZIO_WALK_SELF   0
1613 #define ZIO_WALK_CHILD  1
1614 #define ZIO_WALK_PARENT 2
1615 
1616 typedef struct zio_print_args {
1617         int     zpa_current_depth;
1618         int     zpa_min_depth;
1619         int     zpa_max_depth;
1620         int     zpa_type;
1621         uint_t  zpa_flags;
1622 } zio_print_args_t;
1623 
1624 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
1625 
1626 static int
1627 zio_print_cb(uintptr_t addr, const void *data, void *priv)
1628 {
1629         const zio_t *zio = data;
1630         zio_print_args_t *zpa = priv;
1631         mdb_ctf_id_t type_enum, stage_enum;
1632         int indent = zpa->zpa_current_depth;
1633         const char *type, *stage;
1634         uintptr_t laddr;
1635 
1636         if (indent > ZIO_MAXINDENT)
1637                 indent = ZIO_MAXINDENT;
1638 
1639         if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
1640             mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
1641                 mdb_warn("failed to lookup zio enums");
1642                 return (WALK_ERR);
1643         }
1644 
1645         if ((type = mdb_ctf_enum_name(type_enum, zio->io_type)) != NULL)
1646                 type += sizeof ("ZIO_TYPE_") - 1;
1647         else
1648                 type = "?";
1649 
1650         if ((stage = mdb_ctf_enum_name(stage_enum, zio->io_stage)) != NULL)
1651                 stage += sizeof ("ZIO_STAGE_") - 1;
1652         else
1653                 stage = "?";
1654 
1655         if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
1656                 if (zpa->zpa_flags & DCMD_PIPE_OUT) {
1657                         mdb_printf("%?p\n", addr);
1658                 } else {
1659                         mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
1660                             ZIO_MAXWIDTH - indent, addr, type, stage);
1661                         if (zio->io_waiter)
1662                                 mdb_printf("%?p\n", zio->io_waiter);
1663                         else
1664                                 mdb_printf("-\n");
1665                 }
1666         }
1667 
1668         if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
1669                 return (WALK_NEXT);
1670 
1671         if (zpa->zpa_type == ZIO_WALK_PARENT)
1672                 laddr = addr + OFFSETOF(zio_t, io_parent_list);
1673         else
1674                 laddr = addr + OFFSETOF(zio_t, io_child_list);
1675 
1676         zpa->zpa_current_depth++;
1677         if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
1678                 mdb_warn("failed to walk zio_t children at %p\n", laddr);
1679                 return (WALK_ERR);
1680         }
1681         zpa->zpa_current_depth--;
1682 
1683         return (WALK_NEXT);
1684 }
1685 
1686 /* ARGSUSED */
1687 static int
1688 zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
1689 {
1690         zio_link_t zl;
1691         zio_t zio;
1692         uintptr_t ziop;
1693         zio_print_args_t *zpa = arg;
1694 
1695         if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
1696                 mdb_warn("failed to read zio_link_t at %p", addr);
1697                 return (WALK_ERR);
1698         }
1699 
1700         if (zpa->zpa_type == ZIO_WALK_PARENT)
1701                 ziop = (uintptr_t)zl.zl_parent;
1702         else
1703                 ziop = (uintptr_t)zl.zl_child;
1704 
1705         if (mdb_vread(&zio, sizeof (zio_t), ziop) == -1) {
1706                 mdb_warn("failed to read zio_t at %p", ziop);
1707                 return (WALK_ERR);
1708         }
1709 
1710         return (zio_print_cb(ziop, &zio, arg));
1711 }
1712 
1713 /* ARGSUSED */
1714 static int
1715 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1716 {
1717         zio_t zio;
1718         zio_print_args_t zpa = { 0 };
1719 
1720         if (!(flags & DCMD_ADDRSPEC))
1721                 return (DCMD_USAGE);
1722 
1723         if (mdb_getopts(argc, argv,
1724             'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
1725             'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
1726             'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
1727             NULL) != argc)
1728                 return (DCMD_USAGE);
1729 
1730         zpa.zpa_flags = flags;
1731         if (zpa.zpa_max_depth != 0) {
1732                 if (zpa.zpa_type == ZIO_WALK_SELF)
1733                         zpa.zpa_type = ZIO_WALK_CHILD;
1734         } else if (zpa.zpa_type != ZIO_WALK_SELF) {
1735                 zpa.zpa_min_depth = 1;
1736                 zpa.zpa_max_depth = 1;
1737         }
1738 
1739         if (mdb_vread(&zio, sizeof (zio_t), addr) == -1) {
1740                 mdb_warn("failed to read zio_t at %p", addr);
1741                 return (DCMD_ERR);
1742         }
1743 
1744         if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags))
1745                 mdb_printf("%<u>%-*s %-5s %-16s %-?s%</u>\n", ZIO_MAXWIDTH,
1746                     "ADDRESS", "TYPE", "STAGE", "WAITER");
1747 
1748         if (zio_print_cb(addr, &zio, &zpa) != WALK_NEXT)
1749                 return (DCMD_ERR);
1750 
1751         return (DCMD_OK);
1752 }
1753 
1754 /*
1755  * [addr]::zio_state
1756  *
1757  * Print a summary of all zio_t structures on the system, or for a particular
1758  * pool.  This is equivalent to '::walk zio_root | ::zio'.
1759  */
1760 /*ARGSUSED*/
1761 static int
1762 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1763 {
1764         /*
1765          * MDB will remember the last address of the pipeline, so if we don't
1766          * zero this we'll end up trying to walk zio structures for a
1767          * non-existent spa_t.
1768          */
1769         if (!(flags & DCMD_ADDRSPEC))
1770                 addr = 0;
1771 
1772         return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
1773 }
1774 
1775 typedef struct txg_list_walk_data {
1776         uintptr_t lw_head[TXG_SIZE];
1777         int     lw_txgoff;
1778         int     lw_maxoff;
1779         size_t  lw_offset;
1780         void    *lw_obj;
1781 } txg_list_walk_data_t;
1782 
1783 static int
1784 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
1785 {
1786         txg_list_walk_data_t *lwd;
1787         txg_list_t list;
1788         int i;
1789 
1790         lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
1791         if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) {
1792                 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
1793                 return (WALK_ERR);
1794         }
1795 
1796         for (i = 0; i < TXG_SIZE; i++)
1797                 lwd->lw_head[i] = (uintptr_t)list.tl_head[i];
1798         lwd->lw_offset = list.tl_offset;
1799         lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
1800             UM_SLEEP | UM_GC);
1801         lwd->lw_txgoff = txg;
1802         lwd->lw_maxoff = maxoff;
1803 
1804         wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1805         wsp->walk_data = lwd;
1806 
1807         return (WALK_NEXT);
1808 }
1809 
1810 static int
1811 txg_list_walk_init(mdb_walk_state_t *wsp)
1812 {
1813         return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
1814 }
1815 
1816 static int
1817 txg_list0_walk_init(mdb_walk_state_t *wsp)
1818 {
1819         return (txg_list_walk_init_common(wsp, 0, 0));
1820 }
1821 
1822 static int
1823 txg_list1_walk_init(mdb_walk_state_t *wsp)
1824 {
1825         return (txg_list_walk_init_common(wsp, 1, 1));
1826 }
1827 
1828 static int
1829 txg_list2_walk_init(mdb_walk_state_t *wsp)
1830 {
1831         return (txg_list_walk_init_common(wsp, 2, 2));
1832 }
1833 
1834 static int
1835 txg_list3_walk_init(mdb_walk_state_t *wsp)
1836 {
1837         return (txg_list_walk_init_common(wsp, 3, 3));
1838 }
1839 
1840 static int
1841 txg_list_walk_step(mdb_walk_state_t *wsp)
1842 {
1843         txg_list_walk_data_t *lwd = wsp->walk_data;
1844         uintptr_t addr;
1845         txg_node_t *node;
1846         int status;
1847 
1848         while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
1849                 lwd->lw_txgoff++;
1850                 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1851         }
1852 
1853         if (wsp->walk_addr == NULL)
1854                 return (WALK_DONE);
1855 
1856         addr = wsp->walk_addr - lwd->lw_offset;
1857 
1858         if (mdb_vread(lwd->lw_obj,
1859             lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
1860                 mdb_warn("failed to read list element at %#lx", addr);
1861                 return (WALK_ERR);
1862         }
1863 
1864         status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
1865         node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
1866         wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
1867 
1868         return (status);
1869 }
1870 
1871 /*
1872  * ::walk spa
1873  *
1874  * Walk all named spa_t structures in the namespace.  This is nothing more than
1875  * a layered avl walk.
1876  */
1877 static int
1878 spa_walk_init(mdb_walk_state_t *wsp)
1879 {
1880         GElf_Sym sym;
1881 
1882         if (wsp->walk_addr != NULL) {
1883                 mdb_warn("spa walk only supports global walks\n");
1884                 return (WALK_ERR);
1885         }
1886 
1887         if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
1888                 mdb_warn("failed to find symbol 'spa_namespace_avl'");
1889                 return (WALK_ERR);
1890         }
1891 
1892         wsp->walk_addr = (uintptr_t)sym.st_value;
1893 
1894         if (mdb_layered_walk("avl", wsp) == -1) {
1895                 mdb_warn("failed to walk 'avl'\n");
1896                 return (WALK_ERR);
1897         }
1898 
1899         return (WALK_NEXT);
1900 }
1901 
1902 static int
1903 spa_walk_step(mdb_walk_state_t *wsp)
1904 {
1905         spa_t   spa;
1906 
1907         if (mdb_vread(&spa, sizeof (spa), wsp->walk_addr) == -1) {
1908                 mdb_warn("failed to read spa_t at %p", wsp->walk_addr);
1909                 return (WALK_ERR);
1910         }
1911 
1912         return (wsp->walk_callback(wsp->walk_addr, &spa, wsp->walk_cbdata));
1913 }
1914 
1915 /*
1916  * [addr]::walk zio
1917  *
1918  * Walk all active zio_t structures on the system.  This is simply a layered
1919  * walk on top of ::walk zio_cache, with the optional ability to limit the
1920  * structures to a particular pool.
1921  */
1922 static int
1923 zio_walk_init(mdb_walk_state_t *wsp)
1924 {
1925         wsp->walk_data = (void *)wsp->walk_addr;
1926 
1927         if (mdb_layered_walk("zio_cache", wsp) == -1) {
1928                 mdb_warn("failed to walk 'zio_cache'\n");
1929                 return (WALK_ERR);
1930         }
1931 
1932         return (WALK_NEXT);
1933 }
1934 
1935 static int
1936 zio_walk_step(mdb_walk_state_t *wsp)
1937 {
1938         zio_t zio;
1939 
1940         if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
1941                 mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
1942                 return (WALK_ERR);
1943         }
1944 
1945         if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
1946                 return (WALK_NEXT);
1947 
1948         return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
1949 }
1950 
1951 /*
1952  * [addr]::walk zio_root
1953  *
1954  * Walk only root zio_t structures, optionally for a particular spa_t.
1955  */
1956 static int
1957 zio_walk_root_step(mdb_walk_state_t *wsp)
1958 {
1959         zio_t zio;
1960 
1961         if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
1962                 mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
1963                 return (WALK_ERR);
1964         }
1965 
1966         if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
1967                 return (WALK_NEXT);
1968 
1969         /* If the parent list is not empty, ignore */
1970         if (zio.io_parent_list.list_head.list_next !=
1971             &((zio_t *)wsp->walk_addr)->io_parent_list.list_head)
1972                 return (WALK_NEXT);
1973 
1974         return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
1975 }
1976 
1977 #define NICENUM_BUFLEN 6
1978 
1979 static int
1980 snprintfrac(char *buf, int len,
1981     uint64_t numerator, uint64_t denom, int frac_digits)
1982 {
1983         int mul = 1;
1984         int whole, frac, i;
1985 
1986         for (i = frac_digits; i; i--)
1987                 mul *= 10;
1988         whole = numerator / denom;
1989         frac = mul * numerator / denom - mul * whole;
1990         return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac));
1991 }
1992 
1993 static void
1994 mdb_nicenum(uint64_t num, char *buf)
1995 {
1996         uint64_t n = num;
1997         int index = 0;
1998         char *u;
1999 
2000         while (n >= 1024) {
2001                 n = (n + (1024 / 2)) / 1024; /* Round up or down */
2002                 index++;
2003         }
2004 
2005         u = &" \0K\0M\0G\0T\0P\0E\0"[index*2];
2006 
2007         if (index == 0) {
2008                 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu",
2009                     (u_longlong_t)n);
2010         } else if (n < 10 && (num & (num - 1)) != 0) {
2011                 (void) snprintfrac(buf, NICENUM_BUFLEN,
2012                     num, 1ULL << 10 * index, 2);
2013                 strcat(buf, u);
2014         } else if (n < 100 && (num & (num - 1)) != 0) {
2015                 (void) snprintfrac(buf, NICENUM_BUFLEN,
2016                     num, 1ULL << 10 * index, 1);
2017                 strcat(buf, u);
2018         } else {
2019                 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s",
2020                     (u_longlong_t)n, u);
2021         }
2022 }
2023 
2024 /*
2025  * ::zfs_blkstats
2026  *
2027  *      -v      print verbose per-level information
2028  *
2029  */
2030 static int
2031 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2032 {
2033         boolean_t verbose = B_FALSE;
2034         zfs_all_blkstats_t stats;
2035         dmu_object_type_t t;
2036         zfs_blkstat_t *tzb;
2037         uint64_t ditto;
2038         dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10];
2039         /* +10 in case it grew */
2040 
2041         if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) {
2042                 mdb_warn("failed to read 'dmu_ot'");
2043                 return (DCMD_ERR);
2044         }
2045 
2046         if (mdb_getopts(argc, argv,
2047             'v', MDB_OPT_SETBITS, TRUE, &verbose,
2048             NULL) != argc)
2049                 return (DCMD_USAGE);
2050 
2051         if (!(flags & DCMD_ADDRSPEC))
2052                 return (DCMD_USAGE);
2053 
2054         if (GETMEMB(addr, struct spa, spa_dsl_pool, addr) ||
2055             GETMEMB(addr, struct dsl_pool, dp_blkstats, addr) ||
2056             mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
2057                 mdb_warn("failed to read data at %p;", addr);
2058                 mdb_printf("maybe no stats? run \"zpool scrub\" first.");
2059                 return (DCMD_ERR);
2060         }
2061 
2062         tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_NUMTYPES];
2063         if (tzb->zb_gangs != 0) {
2064                 mdb_printf("Ganged blocks: %llu\n",
2065                     (longlong_t)tzb->zb_gangs);
2066         }
2067 
2068         ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
2069             tzb->zb_ditto_3_of_3_samevdev;
2070         if (ditto != 0) {
2071                 mdb_printf("Dittoed blocks on same vdev: %llu\n",
2072                     (longlong_t)ditto);
2073         }
2074 
2075         mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2076             "\t  avg\t comp\t%%Total\tType\n");
2077 
2078         for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
2079                 char csize[NICENUM_BUFLEN], lsize[NICENUM_BUFLEN];
2080                 char psize[NICENUM_BUFLEN], asize[NICENUM_BUFLEN];
2081                 char avg[NICENUM_BUFLEN];
2082                 char comp[NICENUM_BUFLEN], pct[NICENUM_BUFLEN];
2083                 char typename[64];
2084                 int l;
2085 
2086 
2087                 if (t == DMU_OT_DEFERRED)
2088                         strcpy(typename, "deferred free");
2089                 else if (t == DMU_OT_TOTAL)
2090                         strcpy(typename, "Total");
2091                 else if (mdb_readstr(typename, sizeof (typename),
2092                     (uintptr_t)dmu_ot[t].ot_name) == -1) {
2093                         mdb_warn("failed to read type name");
2094                         return (DCMD_ERR);
2095                 }
2096 
2097                 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
2098                         continue;
2099 
2100                 for (l = -1; l < DN_MAX_LEVELS; l++) {
2101                         int level = (l == -1 ? DN_MAX_LEVELS : l);
2102                         zfs_blkstat_t *zb = &stats.zab_type[level][t];
2103 
2104                         if (zb->zb_asize == 0)
2105                                 continue;
2106 
2107                         /*
2108                          * Don't print each level unless requested.
2109                          */
2110                         if (!verbose && level != DN_MAX_LEVELS)
2111                                 continue;
2112 
2113                         /*
2114                          * If all the space is level 0, don't print the
2115                          * level 0 separately.
2116                          */
2117                         if (level == 0 && zb->zb_asize ==
2118                             stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
2119                                 continue;
2120 
2121                         mdb_nicenum(zb->zb_count, csize);
2122                         mdb_nicenum(zb->zb_lsize, lsize);
2123                         mdb_nicenum(zb->zb_psize, psize);
2124                         mdb_nicenum(zb->zb_asize, asize);
2125                         mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2126                         (void) snprintfrac(comp, NICENUM_BUFLEN,
2127                             zb->zb_lsize, zb->zb_psize, 2);
2128                         (void) snprintfrac(pct, NICENUM_BUFLEN,
2129                             100 * zb->zb_asize, tzb->zb_asize, 2);
2130 
2131                         mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
2132                             "\t%5s\t%6s\t",
2133                             csize, lsize, psize, asize, avg, comp, pct);
2134 
2135                         if (level == DN_MAX_LEVELS)
2136                                 mdb_printf("%s\n", typename);
2137                         else
2138                                 mdb_printf("  L%d %s\n",
2139                                     level, typename);
2140                 }
2141         }
2142 
2143         return (DCMD_OK);
2144 }
2145 
2146 /*
2147  * MDB module linkage information:
2148  *
2149  * We declare a list of structures describing our dcmds, and a function
2150  * named _mdb_init to return a pointer to our module information.
2151  */
2152 
2153 static const mdb_dcmd_t dcmds[] = {
2154         { "arc", "[-bkmg]", "print ARC variables", arc_print },
2155         { "blkptr", ":", "print blkptr_t", blkptr },
2156         { "dbuf", ":", "print dmu_buf_impl_t", dbuf },
2157         { "dbuf_stats", ":", "dbuf stats", dbuf_stats },
2158         { "dbufs",
2159             "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] "
2160             "[-o object | \"mdn\"] \n"
2161             "\t[-l level] [-b blkid | \"bonus\"]",
2162             "find dmu_buf_impl_t's that match specified criteria", dbufs },
2163         { "abuf_find", "dva_word[0] dva_word[1]",
2164             "find arc_buf_hdr_t of a specified DVA",
2165             abuf_find },
2166         { "spa", "?[-cv]", "spa_t summary", spa_print },
2167         { "spa_config", ":", "print spa_t configuration", spa_print_config },
2168         { "spa_verify", ":", "verify spa_t consistency", spa_verify },
2169         { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
2170         { "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
2171         { "vdev", ":[-re]\n"
2172             "\t-r display recursively\n"
2173             "\t-e print statistics",
2174             "vdev_t summary", vdev_print },
2175         { "zio", ":[cpr]\n"
2176             "\t-c display children\n"
2177             "\t-p display parents\n"
2178             "\t-r display recursively",
2179             "zio_t summary", zio_print },
2180         { "zio_state", "?", "print out all zio_t structures on system or "
2181             "for a particular pool", zio_state },
2182         { "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
2183         { "zfs_blkstats", ":[-v]",
2184             "given a spa_t, print block type stats from last scrub",
2185             zfs_blkstats },
2186         { "zfs_params", "", "print zfs tunable parameters", zfs_params },
2187         { NULL }
2188 };
2189 
2190 static const mdb_walker_t walkers[] = {
2191         /*
2192          * In userland, there is no generic provider of list_t walkers, so we
2193          * need to add it.
2194          */
2195 #ifndef _KERNEL
2196         { LIST_WALK_NAME, LIST_WALK_DESC,
2197                 list_walk_init, list_walk_step, list_walk_fini },
2198 #endif
2199         { "zms_freelist", "walk ZFS metaslab freelist",
2200                 freelist_walk_init, freelist_walk_step, NULL },
2201         { "txg_list", "given any txg_list_t *, walk all entries in all txgs",
2202                 txg_list_walk_init, txg_list_walk_step, NULL },
2203         { "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
2204                 txg_list0_walk_init, txg_list_walk_step, NULL },
2205         { "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
2206                 txg_list1_walk_init, txg_list_walk_step, NULL },
2207         { "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
2208                 txg_list2_walk_init, txg_list_walk_step, NULL },
2209         { "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
2210                 txg_list3_walk_init, txg_list_walk_step, NULL },
2211         { "zio", "walk all zio structures, optionally for a particular spa_t",
2212                 zio_walk_init, zio_walk_step, NULL },
2213         { "zio_root", "walk all root zio_t structures, optionally for a "
2214             "particular spa_t",
2215                 zio_walk_init, zio_walk_root_step, NULL },
2216         { "spa", "walk all spa_t entries in the namespace",
2217                 spa_walk_init, spa_walk_step, NULL },
2218         { "metaslab", "given a spa_t *, walk all metaslab_t structures",
2219                 metaslab_walk_init, metaslab_walk_step, NULL },
2220         { NULL }
2221 };
2222 
2223 static const mdb_modinfo_t modinfo = {
2224         MDB_API_VERSION, dcmds, walkers
2225 };
2226 
2227 const mdb_modinfo_t *
2228 _mdb_init(void)
2229 {
2230         return (&modinfo);
2231 }