1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <mdb/mdb_ctf.h>
27 #include <sys/zfs_context.h>
28 #include <sys/mdb_modapi.h>
29 #include <sys/dbuf.h>
30 #include <sys/dmu_objset.h>
31 #include <sys/dsl_dir.h>
32 #include <sys/dsl_pool.h>
33 #include <sys/metaslab_impl.h>
34 #include <sys/space_map.h>
35 #include <sys/list.h>
36 #include <sys/spa_impl.h>
37 #include <sys/vdev_impl.h>
38 #include <sys/zio_compress.h>
39
40 #ifndef _KERNEL
41 #include "../genunix/list.h"
42 #endif
43
44 #ifdef _KERNEL
45 #define ZFS_OBJ_NAME "zfs"
46 #else
47 #define ZFS_OBJ_NAME "libzpool.so.1"
48 #endif
49
50 static char *
51 local_strdup(const char *s)
52 {
53 char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP);
54
55 (void) strcpy(s1, s);
56 return (s1);
57 }
58
59 static int
60 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
61 const char *member, int len, void *buf)
62 {
63 mdb_ctf_id_t id;
64 ulong_t off;
65 char name[64];
66
67 if (idp == NULL) {
68 if (mdb_ctf_lookup_by_name(type, &id) == -1) {
69 mdb_warn("couldn't find type %s", type);
70 return (DCMD_ERR);
71 }
72 idp = &id;
73 } else {
74 type = name;
75 mdb_ctf_type_name(*idp, name, sizeof (name));
76 }
77
78 if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
79 mdb_warn("couldn't find member %s of type %s\n", member, type);
80 return (DCMD_ERR);
81 }
82 if (off % 8 != 0) {
83 mdb_warn("member %s of type %s is unsupported bitfield",
84 member, type);
85 return (DCMD_ERR);
86 }
87 off /= 8;
88
89 if (mdb_vread(buf, len, addr + off) == -1) {
90 mdb_warn("failed to read %s from %s at %p",
91 member, type, addr + off);
92 return (DCMD_ERR);
93 }
94 /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
95
96 return (0);
97 }
98
99 #define GETMEMB(addr, type, member, dest) \
100 getmember(addr, #type, NULL, #member, sizeof (dest), &(dest))
101
102 #define GETMEMBID(addr, ctfid, member, dest) \
103 getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
104
105 static int
106 getrefcount(uintptr_t addr, mdb_ctf_id_t *id,
107 const char *member, uint64_t *rc)
108 {
109 static int gotid;
110 static mdb_ctf_id_t rc_id;
111 ulong_t off;
112
113 if (!gotid) {
114 if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) {
115 mdb_warn("couldn't find struct refcount");
116 return (DCMD_ERR);
117 }
118 gotid = TRUE;
119 }
120
121 if (mdb_ctf_offsetof(*id, member, &off) == -1) {
122 char name[64];
123 mdb_ctf_type_name(*id, name, sizeof (name));
124 mdb_warn("couldn't find member %s of type %s\n", member, name);
125 return (DCMD_ERR);
126 }
127 off /= 8;
128
129 return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
130 }
131
132 static int
133 read_symbol(char *sym_name, void **bufp)
134 {
135 GElf_Sym sym;
136
137 if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) {
138 mdb_warn("can't find symbol %s", sym_name);
139 return (DCMD_ERR);
140 }
141
142 *bufp = mdb_alloc(sym.st_size, UM_SLEEP);
143
144 if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) {
145 mdb_warn("can't read data for symbol %s", sym_name);
146 mdb_free(*bufp, sym.st_size);
147 return (DCMD_ERR);
148 }
149
150 return (DCMD_OK);
151 }
152
153 static int verbose;
154
155 static int
156 freelist_walk_init(mdb_walk_state_t *wsp)
157 {
158 if (wsp->walk_addr == NULL) {
159 mdb_warn("must supply starting address\n");
160 return (WALK_ERR);
161 }
162
163 wsp->walk_data = 0; /* Index into the freelist */
164 return (WALK_NEXT);
165 }
166
167 static int
168 freelist_walk_step(mdb_walk_state_t *wsp)
169 {
170 uint64_t entry;
171 uintptr_t number = (uintptr_t)wsp->walk_data;
172 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
173 "INVALID", "INVALID", "INVALID", "INVALID" };
174 int mapshift = SPA_MINBLOCKSHIFT;
175
176 if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
177 mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
178 return (WALK_DONE);
179 }
180 wsp->walk_addr += sizeof (entry);
181 wsp->walk_data = (void *)(number + 1);
182
183 if (SM_DEBUG_DECODE(entry)) {
184 mdb_printf("DEBUG: %3u %10s: txg=%llu pass=%llu\n",
185 number,
186 ddata[SM_DEBUG_ACTION_DECODE(entry)],
187 SM_DEBUG_TXG_DECODE(entry),
188 SM_DEBUG_SYNCPASS_DECODE(entry));
189 } else {
190 mdb_printf("Entry: %3u offsets=%08llx-%08llx type=%c "
191 "size=%06llx", number,
192 SM_OFFSET_DECODE(entry) << mapshift,
193 (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
194 mapshift,
195 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
196 SM_RUN_DECODE(entry) << mapshift);
197 if (verbose)
198 mdb_printf(" (raw=%012llx)\n", entry);
199 mdb_printf("\n");
200 }
201 return (WALK_NEXT);
202 }
203
204
205 static int
206 dataset_name(uintptr_t addr, char *buf)
207 {
208 static int gotid;
209 static mdb_ctf_id_t dd_id;
210 uintptr_t dd_parent;
211 char dd_myname[MAXNAMELEN];
212
213 if (!gotid) {
214 if (mdb_ctf_lookup_by_name("struct dsl_dir",
215 &dd_id) == -1) {
216 mdb_warn("couldn't find struct dsl_dir");
217 return (DCMD_ERR);
218 }
219 gotid = TRUE;
220 }
221 if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
222 GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
223 return (DCMD_ERR);
224 }
225
226 if (dd_parent) {
227 if (dataset_name(dd_parent, buf))
228 return (DCMD_ERR);
229 strcat(buf, "/");
230 }
231
232 if (dd_myname[0])
233 strcat(buf, dd_myname);
234 else
235 strcat(buf, "???");
236
237 return (0);
238 }
239
240 static int
241 objset_name(uintptr_t addr, char *buf)
242 {
243 static int gotid;
244 static mdb_ctf_id_t osi_id, ds_id;
245 uintptr_t os_dsl_dataset;
246 char ds_snapname[MAXNAMELEN];
247 uintptr_t ds_dir;
248
249 buf[0] = '\0';
250
251 if (!gotid) {
252 if (mdb_ctf_lookup_by_name("struct objset_impl",
253 &osi_id) == -1) {
254 mdb_warn("couldn't find struct objset_impl");
255 return (DCMD_ERR);
256 }
257 if (mdb_ctf_lookup_by_name("struct dsl_dataset",
258 &ds_id) == -1) {
259 mdb_warn("couldn't find struct dsl_dataset");
260 return (DCMD_ERR);
261 }
262
263 gotid = TRUE;
264 }
265
266 if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
267 return (DCMD_ERR);
268
269 if (os_dsl_dataset == 0) {
270 strcat(buf, "mos");
271 return (0);
272 }
273
274 if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
275 GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
276 return (DCMD_ERR);
277 }
278
279 if (ds_dir && dataset_name(ds_dir, buf))
280 return (DCMD_ERR);
281
282 if (ds_snapname[0]) {
283 strcat(buf, "@");
284 strcat(buf, ds_snapname);
285 }
286 return (0);
287 }
288
289 static void
290 enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val,
291 const char *prefix)
292 {
293 const char *cp;
294 size_t len = strlen(prefix);
295
296 if ((cp = mdb_ctf_enum_name(id, val)) != NULL) {
297 if (strncmp(cp, prefix, len) == 0)
298 cp += len;
299 (void) strncpy(out, cp, size);
300 } else {
301 mdb_snprintf(out, size, "? (%d)", val);
302 }
303 }
304
305 /* ARGSUSED */
306 static int
307 zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
308 {
309 mdb_ctf_id_t pipe_enum;
310 int i;
311 char stage[1024];
312
313 if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) {
314 mdb_warn("Could not find enum zio_stage");
315 return (DCMD_ERR);
316 }
317
318 for (i = 0; i < 32; i++) {
319 if (addr & (1U << i)) {
320 enum_lookup(stage, sizeof (stage), pipe_enum, i,
321 "ZIO_STAGE_");
322 mdb_printf(" %s\n", stage);
323 }
324 }
325
326 return (DCMD_OK);
327 }
328
329 /* ARGSUSED */
330 static int
331 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
332 {
333 /*
334 * This table can be approximately generated by running:
335 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
336 */
337 static const char *params[] = {
338 "arc_reduce_dnlc_percent",
339 "zfs_arc_max",
340 "zfs_arc_min",
341 "arc_shrink_shift",
342 "zfs_mdcomp_disable",
343 "zfs_prefetch_disable",
344 "zfetch_max_streams",
345 "zfetch_min_sec_reap",
346 "zfetch_block_cap",
347 "zfetch_array_rd_sz",
348 "zfs_default_bs",
349 "zfs_default_ibs",
350 "metaslab_aliquot",
351 "reference_tracking_enable",
352 "reference_history",
353 "zio_taskq_threads",
354 "spa_max_replication_override",
355 "spa_mode",
356 "zfs_flags",
357 "zfs_txg_synctime",
358 "zfs_txg_timeout",
359 "zfs_write_limit_min",
360 "zfs_write_limit_max",
361 "zfs_write_limit_shift",
362 "zfs_write_limit_override",
363 "zfs_no_write_throttle",
364 "zfs_vdev_cache_max",
365 "zfs_vdev_cache_size",
366 "zfs_vdev_cache_bshift",
367 "vdev_mirror_shift",
368 "zfs_vdev_max_pending",
369 "zfs_vdev_min_pending",
370 "zfs_scrub_limit",
371 "zfs_vdev_time_shift",
372 "zfs_vdev_ramp_rate",
373 "zfs_vdev_aggregation_limit",
374 "fzap_default_block_shift",
375 "zfs_immediate_write_sz",
376 "zfs_read_chunk_size",
377 "zil_disable",
378 "zfs_nocacheflush",
379 "metaslab_gang_bang",
380 "zio_injection_enabled",
381 "zvol_immediate_write_sz",
382 };
383 int i;
384
385 for (i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
386 int sz;
387 uint64_t val64;
388 uint32_t *val32p = (uint32_t *)&val64;
389
390 sz = mdb_readvar(&val64, params[i]);
391 if (sz == 4) {
392 mdb_printf("%s = 0x%x\n", params[i], *val32p);
393 } else if (sz == 8) {
394 mdb_printf("%s = 0x%llx\n", params[i], val64);
395 } else {
396 mdb_warn("variable %s not found", params[i]);
397 }
398 }
399
400 return (DCMD_OK);
401 }
402
403 /* ARGSUSED */
404 static int
405 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
406 {
407 blkptr_t bp;
408 dmu_object_type_info_t *doti;
409 zio_compress_info_t *zct;
410 zio_checksum_info_t *zci;
411 zio_crypt_info_t *zcc;
412 int i;
413 char buf[MAXPATHLEN];
414
415 if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) {
416 mdb_warn("failed to read blkptr_t");
417 return (DCMD_ERR);
418 }
419
420 if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK)
421 return (DCMD_ERR);
422 for (i = 0; i < DMU_OT_NUMTYPES; i++) {
423 mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name);
424 doti[i].ot_name = local_strdup(buf);
425 }
426
427 if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK)
428 return (DCMD_ERR);
429 for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) {
430 mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name);
431 zci[i].ci_name = local_strdup(buf);
432 }
433
434 if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK)
435 return (DCMD_ERR);
436 for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
437 mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name);
438 zct[i].ci_name = local_strdup(buf);
439 }
440
441 if (read_symbol("zio_crypt_table", (void **)&zcc) != DCMD_OK)
442 return (DCMD_ERR);
443 for (i = 0; i < ZIO_CRYPT_FUNCTIONS; i++) {
444 mdb_readstr(buf, sizeof (buf), (uintptr_t)zcc[i].ci_name);
445 zcc[i].ci_name = local_strdup(buf);
446 }
447
448 /*
449 * Super-ick warning: This code is also duplicated in
450 * cmd/zdb.c . Yeah, I hate code replication, too.
451 */
452 for (i = 0; i < BP_GET_NDVAS(&bp); i++) {
453 dva_t *dva = &bp.blk_dva[i];
454
455 mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i,
456 DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
457 mdb_printf("DVA[%d]: GANG: %-5s GRID: %04x\t"
458 "ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
459 DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
460 mdb_printf("DVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", i,
461 DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp),
462 BP_SHOULD_BYTESWAP(&bp) ? "e" : "",
463 !DVA_GET_GANG(dva) && BP_GET_LEVEL(&bp) != 0 ? "i" : "",
464 DVA_GET_GANG(dva) ? "g" : "",
465 BP_GET_COMPRESS(&bp) != 0 ? "d" : "");
466 }
467 mdb_printf("LSIZE: %-16llx\t\tPSIZE: %llx\n",
468 BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp));
469 mdb_printf("ENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
470 BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
471 doti[BP_GET_TYPE(&bp)].ot_name);
472 mdb_printf("BIRTH: %-16llx LEVEL: %-2d\tFILL: %llx\n",
473 bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill);
474 mdb_printf("CKFUNC: %-16s\tCOMP: %s\tCRYPT: %s\n",
475 zci[BP_GET_CHECKSUM(&bp)].ci_name,
476 zct[BP_GET_COMPRESS(&bp)].ci_name,
477 zcc[BP_GET_CRYPT(&bp)].ci_name);
478 mdb_printf("CKSUM: %llx:%llx:%llx:%llx\n",
479 bp.blk_cksum.zc_word[0],
480 bp.blk_cksum.zc_word[1],
481 bp.blk_cksum.zc_word[2],
482 bp.blk_cksum.zc_word[3]);
483
484 return (DCMD_OK);
485 }
486
487 /* ARGSUSED */
488 static int
489 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
490 {
491 mdb_ctf_id_t id;
492 dmu_buf_t db;
493 uintptr_t objset;
494 uint8_t level;
495 uint64_t blkid;
496 uint64_t holds;
497 char objectname[32];
498 char blkidname[32];
499 char path[MAXNAMELEN];
500
501 if (DCMD_HDRSPEC(flags)) {
502 mdb_printf(" addr object lvl blkid holds os\n");
503 }
504
505 if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &id) == -1) {
506 mdb_warn("couldn't find struct dmu_buf_impl_t");
507 return (DCMD_ERR);
508 }
509
510 if (GETMEMBID(addr, &id, db_objset, objset) ||
511 GETMEMBID(addr, &id, db, db) ||
512 GETMEMBID(addr, &id, db_level, level) ||
513 GETMEMBID(addr, &id, db_blkid, blkid)) {
514 return (WALK_ERR);
515 }
516
517 if (getrefcount(addr, &id, "db_holds", &holds)) {
518 return (WALK_ERR);
519 }
520
521 if (db.db_object == DMU_META_DNODE_OBJECT)
522 (void) strcpy(objectname, "mdn");
523 else
524 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
525 (u_longlong_t)db.db_object);
526
527 if (blkid == DB_BONUS_BLKID)
528 (void) strcpy(blkidname, "bonus");
529 else
530 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
531 (u_longlong_t)blkid);
532
533 if (objset_name(objset, path)) {
534 return (WALK_ERR);
535 }
536
537 mdb_printf("%p %8s %1u %9s %2llu %s\n",
538 addr, objectname, level, blkidname, holds, path);
539
540 return (DCMD_OK);
541 }
542
543 /* ARGSUSED */
544 static int
545 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
546 {
547 #define HISTOSZ 32
548 uintptr_t dbp;
549 dmu_buf_impl_t db;
550 dbuf_hash_table_t ht;
551 uint64_t bucket, ndbufs;
552 uint64_t histo[HISTOSZ];
553 uint64_t histo2[HISTOSZ];
554 int i, maxidx;
555
556 if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
557 mdb_warn("failed to read 'dbuf_hash_table'");
558 return (DCMD_ERR);
559 }
560
561 for (i = 0; i < HISTOSZ; i++) {
562 histo[i] = 0;
563 histo2[i] = 0;
564 }
565
566 ndbufs = 0;
567 for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
568 int len;
569
570 if (mdb_vread(&dbp, sizeof (void *),
571 (uintptr_t)(ht.hash_table+bucket)) == -1) {
572 mdb_warn("failed to read hash bucket %u at %p",
573 bucket, ht.hash_table+bucket);
574 return (DCMD_ERR);
575 }
576
577 len = 0;
578 while (dbp != 0) {
579 if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
580 dbp) == -1) {
581 mdb_warn("failed to read dbuf at %p", dbp);
582 return (DCMD_ERR);
583 }
584 dbp = (uintptr_t)db.db_hash_next;
585 for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
586 histo2[i]++;
587 len++;
588 ndbufs++;
589 }
590
591 if (len >= HISTOSZ)
592 len = HISTOSZ-1;
593 histo[len]++;
594 }
595
596 mdb_printf("hash table has %llu buckets, %llu dbufs "
597 "(avg %llu buckets/dbuf)\n",
598 ht.hash_table_mask+1, ndbufs,
599 (ht.hash_table_mask+1)/ndbufs);
600
601 mdb_printf("\n");
602 maxidx = 0;
603 for (i = 0; i < HISTOSZ; i++)
604 if (histo[i] > 0)
605 maxidx = i;
606 mdb_printf("hash chain length number of buckets\n");
607 for (i = 0; i <= maxidx; i++)
608 mdb_printf("%u %llu\n", i, histo[i]);
609
610 mdb_printf("\n");
611 maxidx = 0;
612 for (i = 0; i < HISTOSZ; i++)
613 if (histo2[i] > 0)
614 maxidx = i;
615 mdb_printf("hash chain depth number of dbufs\n");
616 for (i = 0; i <= maxidx; i++)
617 mdb_printf("%u or more %llu %llu%%\n",
618 i, histo2[i], histo2[i]*100/ndbufs);
619
620
621 return (DCMD_OK);
622 }
623
624 typedef struct dbufs_data {
625 mdb_ctf_id_t id;
626 uint64_t objset;
627 uint64_t object;
628 uint64_t level;
629 uint64_t blkid;
630 char *osname;
631 } dbufs_data_t;
632
633 #define DBUFS_UNSET (0xbaddcafedeadbeefULL)
634
635 /* ARGSUSED */
636 static int
637 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
638 {
639 dbufs_data_t *data = arg;
640 uintptr_t objset;
641 dmu_buf_t db;
642 uint8_t level;
643 uint64_t blkid;
644 char osname[MAXNAMELEN];
645
646 if (GETMEMBID(addr, &data->id, db_objset, objset) ||
647 GETMEMBID(addr, &data->id, db, db) ||
648 GETMEMBID(addr, &data->id, db_level, level) ||
649 GETMEMBID(addr, &data->id, db_blkid, blkid)) {
650 return (WALK_ERR);
651 }
652
653 if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
654 (data->osname == NULL || (objset_name(objset, osname) == 0 &&
655 strcmp(data->osname, osname) == 0)) &&
656 (data->object == DBUFS_UNSET || data->object == db.db_object) &&
657 (data->level == DBUFS_UNSET || data->level == level) &&
658 (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
659 mdb_printf("%#lr\n", addr);
660 }
661 return (WALK_NEXT);
662 }
663
664 /* ARGSUSED */
665 static int
666 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
667 {
668 dbufs_data_t data;
669 char *object = NULL;
670 char *blkid = NULL;
671
672 data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
673 data.osname = NULL;
674
675 if (mdb_getopts(argc, argv,
676 'O', MDB_OPT_UINT64, &data.objset,
677 'n', MDB_OPT_STR, &data.osname,
678 'o', MDB_OPT_STR, &object,
679 'l', MDB_OPT_UINT64, &data.level,
680 'b', MDB_OPT_STR, &blkid) != argc) {
681 return (DCMD_USAGE);
682 }
683
684 if (object) {
685 if (strcmp(object, "mdn") == 0) {
686 data.object = DMU_META_DNODE_OBJECT;
687 } else {
688 data.object = mdb_strtoull(object);
689 }
690 }
691
692 if (blkid) {
693 if (strcmp(blkid, "bonus") == 0) {
694 data.blkid = DB_BONUS_BLKID;
695 } else {
696 data.blkid = mdb_strtoull(blkid);
697 }
698 }
699
700 if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &data.id) == -1) {
701 mdb_warn("couldn't find struct dmu_buf_impl_t");
702 return (DCMD_ERR);
703 }
704
705 if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
706 mdb_warn("can't walk dbufs");
707 return (DCMD_ERR);
708 }
709
710 return (DCMD_OK);
711 }
712
713 typedef struct abuf_find_data {
714 dva_t dva;
715 mdb_ctf_id_t id;
716 } abuf_find_data_t;
717
718 /* ARGSUSED */
719 static int
720 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
721 {
722 abuf_find_data_t *data = arg;
723 dva_t dva;
724
725 if (GETMEMBID(addr, &data->id, b_dva, dva)) {
726 return (WALK_ERR);
727 }
728
729 if (dva.dva_word[0] == data->dva.dva_word[0] &&
730 dva.dva_word[1] == data->dva.dva_word[1]) {
731 mdb_printf("%#lr\n", addr);
732 }
733 return (WALK_NEXT);
734 }
735
736 /* ARGSUSED */
737 static int
738 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
739 {
740 abuf_find_data_t data;
741 GElf_Sym sym;
742 int i;
743 const char *syms[] = {
744 "ARC_mru",
745 "ARC_mru_ghost",
746 "ARC_mfu",
747 "ARC_mfu_ghost",
748 };
749
750 if (argc != 2)
751 return (DCMD_USAGE);
752
753 for (i = 0; i < 2; i ++) {
754 switch (argv[i].a_type) {
755 case MDB_TYPE_STRING:
756 data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
757 break;
758 case MDB_TYPE_IMMEDIATE:
759 data.dva.dva_word[i] = argv[i].a_un.a_val;
760 break;
761 default:
762 return (DCMD_USAGE);
763 }
764 }
765
766 if (mdb_ctf_lookup_by_name("struct arc_buf_hdr", &data.id) == -1) {
767 mdb_warn("couldn't find struct arc_buf_hdr");
768 return (DCMD_ERR);
769 }
770
771 for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
772 if (mdb_lookup_by_name(syms[i], &sym)) {
773 mdb_warn("can't find symbol %s", syms[i]);
774 return (DCMD_ERR);
775 }
776
777 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
778 mdb_warn("can't walk %s", syms[i]);
779 return (DCMD_ERR);
780 }
781 }
782
783 return (DCMD_OK);
784 }
785
786 /*ARGSUSED*/
787 static int
788 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
789 {
790 kstat_named_t *stats;
791 GElf_Sym sym;
792 int nstats, i;
793 uint_t opt_a = FALSE;
794 uint_t opt_b = FALSE;
795 uint_t shift = 0;
796 const char *suffix;
797
798 static const char *bytestats[] = {
799 "p", "c", "c_min", "c_max", "size", NULL
800 };
801
802 static const char *extras[] = {
803 "arc_no_grow", "arc_tempreserve",
804 "arc_meta_used", "arc_meta_limit", "arc_meta_max",
805 NULL
806 };
807
808 if (mdb_lookup_by_name("arc_stats", &sym) == -1) {
809 mdb_warn("failed to find 'arc_stats'");
810 return (DCMD_ERR);
811 }
812
813 stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
814
815 if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
816 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
817 return (DCMD_ERR);
818 }
819
820 nstats = sym.st_size / sizeof (kstat_named_t);
821
822 /* NB: -a / opt_a are ignored for backwards compatability */
823 if (mdb_getopts(argc, argv,
824 'a', MDB_OPT_SETBITS, TRUE, &opt_a,
825 'b', MDB_OPT_SETBITS, TRUE, &opt_b,
826 'k', MDB_OPT_SETBITS, 10, &shift,
827 'm', MDB_OPT_SETBITS, 20, &shift,
828 'g', MDB_OPT_SETBITS, 30, &shift,
829 NULL) != argc)
830 return (DCMD_USAGE);
831
832 if (!opt_b && !shift)
833 shift = 20;
834
835 switch (shift) {
836 case 0:
837 suffix = "B";
838 break;
839 case 10:
840 suffix = "KB";
841 break;
842 case 20:
843 suffix = "MB";
844 break;
845 case 30:
846 suffix = "GB";
847 break;
848 default:
849 suffix = "XX";
850 }
851
852 for (i = 0; i < nstats; i++) {
853 int j;
854 boolean_t bytes = B_FALSE;
855
856 for (j = 0; bytestats[j]; j++) {
857 if (strcmp(stats[i].name, bytestats[j]) == 0) {
858 bytes = B_TRUE;
859 break;
860 }
861 }
862
863 if (bytes) {
864 mdb_printf("%-25s = %9llu %s\n", stats[i].name,
865 stats[i].value.ui64 >> shift, suffix);
866 } else {
867 mdb_printf("%-25s = %9llu\n", stats[i].name,
868 stats[i].value.ui64);
869 }
870 }
871
872 for (i = 0; extras[i]; i++) {
873 uint64_t buf;
874
875 if (mdb_lookup_by_name(extras[i], &sym) == -1) {
876 mdb_warn("failed to find '%s'", extras[i]);
877 return (DCMD_ERR);
878 }
879
880 if (sym.st_size != sizeof (uint64_t) &&
881 sym.st_size != sizeof (uint32_t)) {
882 mdb_warn("expected scalar for variable '%s'\n",
883 extras[i]);
884 return (DCMD_ERR);
885 }
886
887 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
888 mdb_warn("couldn't read '%s'", extras[i]);
889 return (DCMD_ERR);
890 }
891
892 mdb_printf("%-25s = ", extras[i]);
893
894 /* NB: all the 64-bit extras happen to be byte counts */
895 if (sym.st_size == sizeof (uint64_t))
896 mdb_printf("%9llu %s\n", buf >> shift, suffix);
897
898 if (sym.st_size == sizeof (uint32_t))
899 mdb_printf("%9d\n", *((uint32_t *)&buf));
900 }
901 return (DCMD_OK);
902 }
903
904 /*
905 * ::spa
906 *
907 * -c Print configuration information as well
908 * -v Print vdev state
909 * -e Print vdev error stats
910 *
911 * Print a summarized spa_t. When given no arguments, prints out a table of all
912 * active pools on the system.
913 */
914 /* ARGSUSED */
915 static int
916 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
917 {
918 spa_t spa;
919 const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
920 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
921 const char *state;
922 int config = FALSE;
923 int vdevs = FALSE;
924 int errors = FALSE;
925
926 if (mdb_getopts(argc, argv,
927 'c', MDB_OPT_SETBITS, TRUE, &config,
928 'v', MDB_OPT_SETBITS, TRUE, &vdevs,
929 'e', MDB_OPT_SETBITS, TRUE, &errors,
930 NULL) != argc)
931 return (DCMD_USAGE);
932
933 if (!(flags & DCMD_ADDRSPEC)) {
934 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
935 mdb_warn("can't walk spa");
936 return (DCMD_ERR);
937 }
938
939 return (DCMD_OK);
940 }
941
942 if (flags & DCMD_PIPE_OUT) {
943 mdb_printf("%#lr\n", addr);
944 return (DCMD_OK);
945 }
946
947 if (DCMD_HDRSPEC(flags))
948 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
949 sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
950
951 if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
952 mdb_warn("failed to read spa_t at %p", addr);
953 return (DCMD_ERR);
954 }
955
956 if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
957 state = "UNKNOWN";
958 else
959 state = statetab[spa.spa_state];
960
961 mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
962
963 if (config) {
964 mdb_printf("\n");
965 mdb_inc_indent(4);
966 if (mdb_call_dcmd("spa_config", addr, flags, 0,
967 NULL) != DCMD_OK)
968 return (DCMD_ERR);
969 mdb_dec_indent(4);
970 }
971
972 if (vdevs || errors) {
973 mdb_arg_t v;
974
975 v.a_type = MDB_TYPE_STRING;
976 v.a_un.a_str = "-e";
977
978 mdb_printf("\n");
979 mdb_inc_indent(4);
980 if (mdb_call_dcmd("spa_vdevs", addr, flags, errors ? 1 : 0,
981 &v) != DCMD_OK)
982 return (DCMD_ERR);
983 mdb_dec_indent(4);
984 }
985
986 return (DCMD_OK);
987 }
988
989 /*
990 * ::spa_config
991 *
992 * Given a spa_t, print the configuration information stored in spa_config.
993 * Since it's just an nvlist, format it as an indented list of name=value pairs.
994 * We simply read the value of spa_config and pass off to ::nvlist.
995 */
996 /* ARGSUSED */
997 static int
998 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
999 {
1000 spa_t spa;
1001
1002 if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1003 return (DCMD_USAGE);
1004
1005 if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1006 mdb_warn("failed to read spa_t at %p", addr);
1007 return (DCMD_ERR);
1008 }
1009
1010 if (spa.spa_config == NULL) {
1011 mdb_printf("(none)\n");
1012 return (DCMD_OK);
1013 }
1014
1015 return (mdb_call_dcmd("nvlist", (uintptr_t)spa.spa_config, flags,
1016 0, NULL));
1017 }
1018
1019 /*
1020 * ::vdev
1021 *
1022 * Print out a summarized vdev_t, in the following form:
1023 *
1024 * ADDR STATE AUX DESC
1025 * fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0
1026 *
1027 * If '-r' is specified, recursively visit all children.
1028 *
1029 * With '-e', the statistics associated with the vdev are printed as well.
1030 */
1031 static int
1032 do_print_vdev(uintptr_t addr, int flags, int depth, int stats,
1033 int recursive)
1034 {
1035 vdev_t vdev;
1036 char desc[MAXNAMELEN];
1037 int c, children;
1038 uintptr_t *child;
1039 const char *state, *aux;
1040
1041 if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
1042 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
1043 return (DCMD_ERR);
1044 }
1045
1046 if (flags & DCMD_PIPE_OUT) {
1047 mdb_printf("%#lr", addr);
1048 } else {
1049 if (vdev.vdev_path != NULL) {
1050 if (mdb_readstr(desc, sizeof (desc),
1051 (uintptr_t)vdev.vdev_path) == -1) {
1052 mdb_warn("failed to read vdev_path at %p\n",
1053 vdev.vdev_path);
1054 return (DCMD_ERR);
1055 }
1056 } else if (vdev.vdev_ops != NULL) {
1057 vdev_ops_t ops;
1058 if (mdb_vread(&ops, sizeof (ops),
1059 (uintptr_t)vdev.vdev_ops) == -1) {
1060 mdb_warn("failed to read vdev_ops at %p\n",
1061 vdev.vdev_ops);
1062 return (DCMD_ERR);
1063 }
1064 (void) strcpy(desc, ops.vdev_op_type);
1065 } else {
1066 (void) strcpy(desc, "<unknown>");
1067 }
1068
1069 if (depth == 0 && DCMD_HDRSPEC(flags))
1070 mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1071 "ADDR", "STATE", "AUX",
1072 sizeof (uintptr_t) == 4 ? 43 : 35,
1073 "DESCRIPTION");
1074
1075 mdb_printf("%0?p ", addr);
1076
1077 switch (vdev.vdev_state) {
1078 case VDEV_STATE_CLOSED:
1079 state = "CLOSED";
1080 break;
1081 case VDEV_STATE_OFFLINE:
1082 state = "OFFLINE";
1083 break;
1084 case VDEV_STATE_CANT_OPEN:
1085 state = "CANT_OPEN";
1086 break;
1087 case VDEV_STATE_DEGRADED:
1088 state = "DEGRADED";
1089 break;
1090 case VDEV_STATE_HEALTHY:
1091 state = "HEALTHY";
1092 break;
1093 case VDEV_STATE_REMOVED:
1094 state = "REMOVED";
1095 break;
1096 case VDEV_STATE_FAULTED:
1097 state = "FAULTED";
1098 break;
1099 default:
1100 state = "UNKNOWN";
1101 break;
1102 }
1103
1104 switch (vdev.vdev_stat.vs_aux) {
1105 case VDEV_AUX_NONE:
1106 aux = "-";
1107 break;
1108 case VDEV_AUX_OPEN_FAILED:
1109 aux = "OPEN_FAILED";
1110 break;
1111 case VDEV_AUX_CORRUPT_DATA:
1112 aux = "CORRUPT_DATA";
1113 break;
1114 case VDEV_AUX_NO_REPLICAS:
1115 aux = "NO_REPLICAS";
1116 break;
1117 case VDEV_AUX_BAD_GUID_SUM:
1118 aux = "BAD_GUID_SUM";
1119 break;
1120 case VDEV_AUX_TOO_SMALL:
1121 aux = "TOO_SMALL";
1122 break;
1123 case VDEV_AUX_BAD_LABEL:
1124 aux = "BAD_LABEL";
1125 break;
1126 case VDEV_AUX_VERSION_NEWER:
1127 aux = "VERS_NEWER";
1128 break;
1129 case VDEV_AUX_VERSION_OLDER:
1130 aux = "VERS_OLDER";
1131 break;
1132 case VDEV_AUX_SPARED:
1133 aux = "SPARED";
1134 break;
1135 case VDEV_AUX_ERR_EXCEEDED:
1136 aux = "ERR_EXCEEDED";
1137 break;
1138 case VDEV_AUX_IO_FAILURE:
1139 aux = "IO_FAILURE";
1140 break;
1141 case VDEV_AUX_BAD_LOG:
1142 aux = "BAD_LOG";
1143 break;
1144 default:
1145 aux = "UNKNOWN";
1146 break;
1147 }
1148
1149 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1150
1151 if (stats) {
1152 vdev_stat_t *vs = &vdev.vdev_stat;
1153 int i;
1154
1155 mdb_inc_indent(4);
1156 mdb_printf("\n");
1157 mdb_printf("%<u> %12s %12s %12s %12s "
1158 "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1159 "IOCTL");
1160 mdb_printf("OPS ");
1161 for (i = 1; i < ZIO_TYPES; i++)
1162 mdb_printf("%11#llx%s", vs->vs_ops[i],
1163 i == ZIO_TYPES - 1 ? "" : " ");
1164 mdb_printf("\n");
1165 mdb_printf("BYTES ");
1166 for (i = 1; i < ZIO_TYPES; i++)
1167 mdb_printf("%11#llx%s", vs->vs_bytes[i],
1168 i == ZIO_TYPES - 1 ? "" : " ");
1169
1170
1171 mdb_printf("\n");
1172 mdb_printf("EREAD %10#llx\n", vs->vs_read_errors);
1173 mdb_printf("EWRITE %10#llx\n", vs->vs_write_errors);
1174 mdb_printf("ECKSUM %10#llx\n",
1175 vs->vs_checksum_errors);
1176 mdb_dec_indent(4);
1177 }
1178
1179 if (stats)
1180 mdb_printf("\n");
1181 }
1182
1183 children = vdev.vdev_children;
1184
1185 if (children == 0 || !recursive)
1186 return (DCMD_OK);
1187
1188 child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1189 if (mdb_vread(child, children * sizeof (void *),
1190 (uintptr_t)vdev.vdev_child) == -1) {
1191 mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1192 return (DCMD_ERR);
1193 }
1194
1195 for (c = 0; c < children; c++) {
1196 if (do_print_vdev(child[c], flags, depth + 2, stats,
1197 recursive))
1198 return (DCMD_ERR);
1199 }
1200
1201 return (DCMD_OK);
1202 }
1203
1204 static int
1205 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1206 {
1207 int recursive = FALSE;
1208 int stats = FALSE;
1209 uint64_t depth = 0;
1210
1211 if (mdb_getopts(argc, argv,
1212 'r', MDB_OPT_SETBITS, TRUE, &recursive,
1213 'e', MDB_OPT_SETBITS, TRUE, &stats,
1214 'd', MDB_OPT_UINT64, &depth,
1215 NULL) != argc)
1216 return (DCMD_USAGE);
1217
1218 if (!(flags & DCMD_ADDRSPEC)) {
1219 mdb_warn("no vdev_t address given\n");
1220 return (DCMD_ERR);
1221 }
1222
1223 return (do_print_vdev(addr, flags, (int)depth, stats, recursive));
1224 }
1225
1226 typedef struct metaslab_walk_data {
1227 uint64_t mw_numvdevs;
1228 uintptr_t *mw_vdevs;
1229 int mw_curvdev;
1230 uint64_t mw_nummss;
1231 uintptr_t *mw_mss;
1232 int mw_curms;
1233 } metaslab_walk_data_t;
1234
1235 static int
1236 metaslab_walk_step(mdb_walk_state_t *wsp)
1237 {
1238 metaslab_walk_data_t *mw = wsp->walk_data;
1239 metaslab_t ms;
1240 uintptr_t msp;
1241
1242 if (mw->mw_curvdev >= mw->mw_numvdevs)
1243 return (WALK_DONE);
1244
1245 if (mw->mw_mss == NULL) {
1246 uintptr_t mssp;
1247 uintptr_t vdevp;
1248
1249 ASSERT(mw->mw_curms == 0);
1250 ASSERT(mw->mw_nummss == 0);
1251
1252 vdevp = mw->mw_vdevs[mw->mw_curvdev];
1253 if (GETMEMB(vdevp, struct vdev, vdev_ms, mssp) ||
1254 GETMEMB(vdevp, struct vdev, vdev_ms_count, mw->mw_nummss)) {
1255 return (WALK_ERR);
1256 }
1257
1258 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
1259 UM_SLEEP | UM_GC);
1260 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
1261 mssp) == -1) {
1262 mdb_warn("failed to read vdev_ms at %p", mssp);
1263 return (WALK_ERR);
1264 }
1265 }
1266
1267 if (mw->mw_curms >= mw->mw_nummss) {
1268 mw->mw_mss = NULL;
1269 mw->mw_curms = 0;
1270 mw->mw_nummss = 0;
1271 mw->mw_curvdev++;
1272 return (WALK_NEXT);
1273 }
1274
1275 msp = mw->mw_mss[mw->mw_curms];
1276 if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
1277 mdb_warn("failed to read metaslab_t at %p", msp);
1278 return (WALK_ERR);
1279 }
1280
1281 mw->mw_curms++;
1282
1283 return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
1284 }
1285
1286 /* ARGSUSED */
1287 static int
1288 metaslab_walk_init(mdb_walk_state_t *wsp)
1289 {
1290 metaslab_walk_data_t *mw;
1291 uintptr_t root_vdevp;
1292 uintptr_t childp;
1293
1294 if (wsp->walk_addr == NULL) {
1295 mdb_warn("must supply address of spa_t\n");
1296 return (WALK_ERR);
1297 }
1298
1299 mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
1300
1301 if (GETMEMB(wsp->walk_addr, struct spa, spa_root_vdev, root_vdevp) ||
1302 GETMEMB(root_vdevp, struct vdev, vdev_children, mw->mw_numvdevs) ||
1303 GETMEMB(root_vdevp, struct vdev, vdev_child, childp)) {
1304 return (DCMD_ERR);
1305 }
1306
1307 mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
1308 UM_SLEEP | UM_GC);
1309 if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
1310 childp) == -1) {
1311 mdb_warn("failed to read root vdev children at %p", childp);
1312 return (DCMD_ERR);
1313 }
1314
1315 wsp->walk_data = mw;
1316
1317 return (WALK_NEXT);
1318 }
1319
1320 typedef struct mdb_spa {
1321 uintptr_t spa_dsl_pool;
1322 uintptr_t spa_root_vdev;
1323 } mdb_spa_t;
1324
1325 typedef struct mdb_dsl_dir {
1326 uintptr_t dd_phys;
1327 int64_t dd_space_towrite[TXG_SIZE];
1328 } mdb_dsl_dir_t;
1329
1330 typedef struct mdb_dsl_dir_phys {
1331 uint64_t dd_used_bytes;
1332 uint64_t dd_compressed_bytes;
1333 uint64_t dd_uncompressed_bytes;
1334 } mdb_dsl_dir_phys_t;
1335
1336 typedef struct mdb_vdev {
1337 uintptr_t vdev_parent;
1338 uintptr_t vdev_ms;
1339 uint64_t vdev_ms_count;
1340 vdev_stat_t vdev_stat;
1341 } mdb_vdev_t;
1342
1343 typedef struct mdb_metaslab {
1344 space_map_t ms_allocmap[TXG_SIZE];
1345 space_map_t ms_freemap[TXG_SIZE];
1346 space_map_t ms_map;
1347 space_map_obj_t ms_smo;
1348 space_map_obj_t ms_smo_syncing;
1349 } mdb_metaslab_t;
1350
1351 typedef struct space_data {
1352 uint64_t ms_allocmap[TXG_SIZE];
1353 uint64_t ms_freemap[TXG_SIZE];
1354 uint64_t ms_map;
1355 uint64_t avail;
1356 uint64_t nowavail;
1357 } space_data_t;
1358
1359 /* ARGSUSED */
1360 static int
1361 space_cb(uintptr_t addr, const void *unknown, void *arg)
1362 {
1363 space_data_t *sd = arg;
1364 mdb_metaslab_t ms;
1365
1366 if (GETMEMB(addr, struct metaslab, ms_allocmap, ms.ms_allocmap) ||
1367 GETMEMB(addr, struct metaslab, ms_freemap, ms.ms_freemap) ||
1368 GETMEMB(addr, struct metaslab, ms_map, ms.ms_map) ||
1369 GETMEMB(addr, struct metaslab, ms_smo, ms.ms_smo) ||
1370 GETMEMB(addr, struct metaslab, ms_smo_syncing, ms.ms_smo_syncing)) {
1371 return (WALK_ERR);
1372 }
1373
1374 sd->ms_allocmap[0] += ms.ms_allocmap[0].sm_space;
1375 sd->ms_allocmap[1] += ms.ms_allocmap[1].sm_space;
1376 sd->ms_allocmap[2] += ms.ms_allocmap[2].sm_space;
1377 sd->ms_allocmap[3] += ms.ms_allocmap[3].sm_space;
1378 sd->ms_freemap[0] += ms.ms_freemap[0].sm_space;
1379 sd->ms_freemap[1] += ms.ms_freemap[1].sm_space;
1380 sd->ms_freemap[2] += ms.ms_freemap[2].sm_space;
1381 sd->ms_freemap[3] += ms.ms_freemap[3].sm_space;
1382 sd->ms_map += ms.ms_map.sm_space;
1383 sd->avail += ms.ms_map.sm_size - ms.ms_smo.smo_alloc;
1384 sd->nowavail += ms.ms_map.sm_size - ms.ms_smo_syncing.smo_alloc;
1385
1386 return (WALK_NEXT);
1387 }
1388
1389 /*
1390 * ::spa_space [-b]
1391 *
1392 * Given a spa_t, print out it's on-disk space usage and in-core
1393 * estimates of future usage. If -b is given, print space in bytes.
1394 * Otherwise print in megabytes.
1395 */
1396 /* ARGSUSED */
1397 static int
1398 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1399 {
1400 mdb_spa_t spa;
1401 uintptr_t dp_root_dir;
1402 mdb_dsl_dir_t dd;
1403 mdb_dsl_dir_phys_t dsp;
1404 uint64_t children;
1405 uintptr_t childaddr;
1406 space_data_t sd;
1407 int shift = 20;
1408 char *suffix = "M";
1409 int bits = FALSE;
1410
1411 if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bits, NULL) !=
1412 argc)
1413 return (DCMD_USAGE);
1414 if (!(flags & DCMD_ADDRSPEC))
1415 return (DCMD_USAGE);
1416
1417 if (bits) {
1418 shift = 0;
1419 suffix = "";
1420 }
1421
1422 if (GETMEMB(addr, struct spa, spa_dsl_pool, spa.spa_dsl_pool) ||
1423 GETMEMB(addr, struct spa, spa_root_vdev, spa.spa_root_vdev) ||
1424 GETMEMB(spa.spa_root_vdev, struct vdev, vdev_children, children) ||
1425 GETMEMB(spa.spa_root_vdev, struct vdev, vdev_child, childaddr) ||
1426 GETMEMB(spa.spa_dsl_pool, struct dsl_pool,
1427 dp_root_dir, dp_root_dir) ||
1428 GETMEMB(dp_root_dir, struct dsl_dir, dd_phys, dd.dd_phys) ||
1429 GETMEMB(dp_root_dir, struct dsl_dir,
1430 dd_space_towrite, dd.dd_space_towrite) ||
1431 GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1432 dd_used_bytes, dsp.dd_used_bytes) ||
1433 GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1434 dd_compressed_bytes, dsp.dd_compressed_bytes) ||
1435 GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1436 dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) {
1437 return (DCMD_ERR);
1438 }
1439
1440 mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
1441 dd.dd_space_towrite[0] >> shift, suffix,
1442 dd.dd_space_towrite[1] >> shift, suffix,
1443 dd.dd_space_towrite[2] >> shift, suffix,
1444 dd.dd_space_towrite[3] >> shift, suffix);
1445
1446 mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
1447 dsp.dd_used_bytes >> shift, suffix);
1448 mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
1449 dsp.dd_compressed_bytes >> shift, suffix);
1450 mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
1451 dsp.dd_uncompressed_bytes >> shift, suffix);
1452
1453 bzero(&sd, sizeof (sd));
1454 if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
1455 mdb_warn("can't walk metaslabs");
1456 return (DCMD_ERR);
1457 }
1458
1459 mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
1460 sd.ms_allocmap[0] >> shift, suffix,
1461 sd.ms_allocmap[1] >> shift, suffix,
1462 sd.ms_allocmap[2] >> shift, suffix,
1463 sd.ms_allocmap[3] >> shift, suffix);
1464 mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n",
1465 sd.ms_freemap[0] >> shift, suffix,
1466 sd.ms_freemap[1] >> shift, suffix,
1467 sd.ms_freemap[2] >> shift, suffix,
1468 sd.ms_freemap[3] >> shift, suffix);
1469 mdb_printf("ms_map = %llu%s\n", sd.ms_map >> shift, suffix);
1470 mdb_printf("last synced avail = %llu%s\n", sd.avail >> shift, suffix);
1471 mdb_printf("current syncing avail = %llu%s\n",
1472 sd.nowavail >> shift, suffix);
1473
1474 return (DCMD_OK);
1475 }
1476
1477 /*
1478 * ::spa_verify
1479 *
1480 * Given a spa_t, verify that that the pool is self-consistent.
1481 * Currently, it only checks to make sure that the vdev tree exists.
1482 */
1483 /* ARGSUSED */
1484 static int
1485 spa_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1486 {
1487 spa_t spa;
1488
1489 if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1490 return (DCMD_USAGE);
1491
1492 if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1493 mdb_warn("failed to read spa_t at %p", addr);
1494 return (DCMD_ERR);
1495 }
1496
1497 if (spa.spa_root_vdev == NULL) {
1498 mdb_printf("no vdev tree present\n");
1499 return (DCMD_OK);
1500 }
1501
1502 return (DCMD_OK);
1503 }
1504
1505 static int
1506 spa_print_aux(spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
1507 const char *name)
1508 {
1509 uintptr_t *aux;
1510 size_t len;
1511 int ret, i;
1512
1513 /*
1514 * Iterate over aux vdevs and print those out as well. This is a
1515 * little annoying because we don't have a root vdev to pass to ::vdev.
1516 * Instead, we print a single line and then call it for each child
1517 * vdev.
1518 */
1519 if (sav->sav_count != 0) {
1520 v[1].a_type = MDB_TYPE_STRING;
1521 v[1].a_un.a_str = "-d";
1522 v[2].a_type = MDB_TYPE_IMMEDIATE;
1523 v[2].a_un.a_val = 2;
1524
1525 len = sav->sav_count * sizeof (uintptr_t);
1526 aux = mdb_alloc(len, UM_SLEEP);
1527 if (mdb_vread(aux, len,
1528 (uintptr_t)sav->sav_vdevs) == -1) {
1529 mdb_free(aux, len);
1530 mdb_warn("failed to read l2cache vdevs at %p",
1531 sav->sav_vdevs);
1532 return (DCMD_ERR);
1533 }
1534
1535 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
1536
1537 for (i = 0; i < sav->sav_count; i++) {
1538 ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
1539 if (ret != DCMD_OK) {
1540 mdb_free(aux, len);
1541 return (ret);
1542 }
1543 }
1544
1545 mdb_free(aux, len);
1546 }
1547
1548 return (0);
1549 }
1550
1551 /*
1552 * ::spa_vdevs
1553 *
1554 * -e Include error stats
1555 *
1556 * Print out a summarized list of vdevs for the given spa_t.
1557 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
1558 * iterating over the cache devices.
1559 */
1560 /* ARGSUSED */
1561 static int
1562 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1563 {
1564 spa_t spa;
1565 mdb_arg_t v[3];
1566 int errors = FALSE;
1567 int ret;
1568
1569 if (mdb_getopts(argc, argv,
1570 'e', MDB_OPT_SETBITS, TRUE, &errors,
1571 NULL) != argc)
1572 return (DCMD_USAGE);
1573
1574 if (!(flags & DCMD_ADDRSPEC))
1575 return (DCMD_USAGE);
1576
1577 if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1578 mdb_warn("failed to read spa_t at %p", addr);
1579 return (DCMD_ERR);
1580 }
1581
1582 /*
1583 * Unitialized spa_t structures can have a NULL root vdev.
1584 */
1585 if (spa.spa_root_vdev == NULL) {
1586 mdb_printf("no associated vdevs\n");
1587 return (DCMD_OK);
1588 }
1589
1590 v[0].a_type = MDB_TYPE_STRING;
1591 v[0].a_un.a_str = errors ? "-re" : "-r";
1592
1593 ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
1594 flags, 1, v);
1595 if (ret != DCMD_OK)
1596 return (ret);
1597
1598 if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
1599 spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
1600 return (DCMD_ERR);
1601
1602 return (DCMD_OK);
1603 }
1604
1605 /*
1606 * ::zio
1607 *
1608 * Print a summary of zio_t and all its children. This is intended to display a
1609 * zio tree, and hence we only pick the most important pieces of information for
1610 * the main summary. More detailed information can always be found by doing a
1611 * '::print zio' on the underlying zio_t. The columns we display are:
1612 *
1613 * ADDRESS TYPE STAGE WAITER
1614 *
1615 * The 'address' column is indented by one space for each depth level as we
1616 * descend down the tree.
1617 */
1618
1619 #define ZIO_MAXINDENT 24
1620 #define ZIO_MAXWIDTH (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
1621 #define ZIO_WALK_SELF 0
1622 #define ZIO_WALK_CHILD 1
1623 #define ZIO_WALK_PARENT 2
1624
1625 typedef struct zio_print_args {
1626 int zpa_current_depth;
1627 int zpa_min_depth;
1628 int zpa_max_depth;
1629 int zpa_type;
1630 uint_t zpa_flags;
1631 } zio_print_args_t;
1632
1633 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
1634
1635 static int
1636 zio_print_cb(uintptr_t addr, const void *data, void *priv)
1637 {
1638 const zio_t *zio = data;
1639 zio_print_args_t *zpa = priv;
1640 mdb_ctf_id_t type_enum, stage_enum;
1641 int indent = zpa->zpa_current_depth;
1642 const char *type, *stage;
1643 uintptr_t laddr;
1644
1645 if (indent > ZIO_MAXINDENT)
1646 indent = ZIO_MAXINDENT;
1647
1648 if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
1649 mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
1650 mdb_warn("failed to lookup zio enums");
1651 return (WALK_ERR);
1652 }
1653
1654 if ((type = mdb_ctf_enum_name(type_enum, zio->io_type)) != NULL)
1655 type += sizeof ("ZIO_TYPE_") - 1;
1656 else
1657 type = "?";
1658
1659 if ((stage = mdb_ctf_enum_name(stage_enum, zio->io_stage)) != NULL)
1660 stage += sizeof ("ZIO_STAGE_") - 1;
1661 else
1662 stage = "?";
1663
1664 if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
1665 if (zpa->zpa_flags & DCMD_PIPE_OUT) {
1666 mdb_printf("%?p\n", addr);
1667 } else {
1668 mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
1669 ZIO_MAXWIDTH - indent, addr, type, stage);
1670 if (zio->io_waiter)
1671 mdb_printf("%?p\n", zio->io_waiter);
1672 else
1673 mdb_printf("-\n");
1674 }
1675 }
1676
1677 if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
1678 return (WALK_NEXT);
1679
1680 if (zpa->zpa_type == ZIO_WALK_PARENT)
1681 laddr = addr + OFFSETOF(zio_t, io_parent_list);
1682 else
1683 laddr = addr + OFFSETOF(zio_t, io_child_list);
1684
1685 zpa->zpa_current_depth++;
1686 if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
1687 mdb_warn("failed to walk zio_t children at %p\n", laddr);
1688 return (WALK_ERR);
1689 }
1690 zpa->zpa_current_depth--;
1691
1692 return (WALK_NEXT);
1693 }
1694
1695 /* ARGSUSED */
1696 static int
1697 zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
1698 {
1699 zio_link_t zl;
1700 zio_t zio;
1701 uintptr_t ziop;
1702 zio_print_args_t *zpa = arg;
1703
1704 if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
1705 mdb_warn("failed to read zio_link_t at %p", addr);
1706 return (WALK_ERR);
1707 }
1708
1709 if (zpa->zpa_type == ZIO_WALK_PARENT)
1710 ziop = (uintptr_t)zl.zl_parent;
1711 else
1712 ziop = (uintptr_t)zl.zl_child;
1713
1714 if (mdb_vread(&zio, sizeof (zio_t), ziop) == -1) {
1715 mdb_warn("failed to read zio_t at %p", ziop);
1716 return (WALK_ERR);
1717 }
1718
1719 return (zio_print_cb(ziop, &zio, arg));
1720 }
1721
1722 /* ARGSUSED */
1723 static int
1724 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1725 {
1726 zio_t zio;
1727 zio_print_args_t zpa = { 0 };
1728
1729 if (!(flags & DCMD_ADDRSPEC))
1730 return (DCMD_USAGE);
1731
1732 if (mdb_getopts(argc, argv,
1733 'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
1734 'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
1735 'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
1736 NULL) != argc)
1737 return (DCMD_USAGE);
1738
1739 zpa.zpa_flags = flags;
1740 if (zpa.zpa_max_depth != 0) {
1741 if (zpa.zpa_type == ZIO_WALK_SELF)
1742 zpa.zpa_type = ZIO_WALK_CHILD;
1743 } else if (zpa.zpa_type != ZIO_WALK_SELF) {
1744 zpa.zpa_min_depth = 1;
1745 zpa.zpa_max_depth = 1;
1746 }
1747
1748 if (mdb_vread(&zio, sizeof (zio_t), addr) == -1) {
1749 mdb_warn("failed to read zio_t at %p", addr);
1750 return (DCMD_ERR);
1751 }
1752
1753 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags))
1754 mdb_printf("%<u>%-*s %-5s %-16s %-?s%</u>\n", ZIO_MAXWIDTH,
1755 "ADDRESS", "TYPE", "STAGE", "WAITER");
1756
1757 if (zio_print_cb(addr, &zio, &zpa) != WALK_NEXT)
1758 return (DCMD_ERR);
1759
1760 return (DCMD_OK);
1761 }
1762
1763 /*
1764 * [addr]::zio_state
1765 *
1766 * Print a summary of all zio_t structures on the system, or for a particular
1767 * pool. This is equivalent to '::walk zio_root | ::zio'.
1768 */
1769 /*ARGSUSED*/
1770 static int
1771 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1772 {
1773 /*
1774 * MDB will remember the last address of the pipeline, so if we don't
1775 * zero this we'll end up trying to walk zio structures for a
1776 * non-existent spa_t.
1777 */
1778 if (!(flags & DCMD_ADDRSPEC))
1779 addr = 0;
1780
1781 return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
1782 }
1783
1784 typedef struct txg_list_walk_data {
1785 uintptr_t lw_head[TXG_SIZE];
1786 int lw_txgoff;
1787 int lw_maxoff;
1788 size_t lw_offset;
1789 void *lw_obj;
1790 } txg_list_walk_data_t;
1791
1792 static int
1793 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
1794 {
1795 txg_list_walk_data_t *lwd;
1796 txg_list_t list;
1797 int i;
1798
1799 lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
1800 if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) {
1801 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
1802 return (WALK_ERR);
1803 }
1804
1805 for (i = 0; i < TXG_SIZE; i++)
1806 lwd->lw_head[i] = (uintptr_t)list.tl_head[i];
1807 lwd->lw_offset = list.tl_offset;
1808 lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
1809 UM_SLEEP | UM_GC);
1810 lwd->lw_txgoff = txg;
1811 lwd->lw_maxoff = maxoff;
1812
1813 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1814 wsp->walk_data = lwd;
1815
1816 return (WALK_NEXT);
1817 }
1818
1819 static int
1820 txg_list_walk_init(mdb_walk_state_t *wsp)
1821 {
1822 return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
1823 }
1824
1825 static int
1826 txg_list0_walk_init(mdb_walk_state_t *wsp)
1827 {
1828 return (txg_list_walk_init_common(wsp, 0, 0));
1829 }
1830
1831 static int
1832 txg_list1_walk_init(mdb_walk_state_t *wsp)
1833 {
1834 return (txg_list_walk_init_common(wsp, 1, 1));
1835 }
1836
1837 static int
1838 txg_list2_walk_init(mdb_walk_state_t *wsp)
1839 {
1840 return (txg_list_walk_init_common(wsp, 2, 2));
1841 }
1842
1843 static int
1844 txg_list3_walk_init(mdb_walk_state_t *wsp)
1845 {
1846 return (txg_list_walk_init_common(wsp, 3, 3));
1847 }
1848
1849 static int
1850 txg_list_walk_step(mdb_walk_state_t *wsp)
1851 {
1852 txg_list_walk_data_t *lwd = wsp->walk_data;
1853 uintptr_t addr;
1854 txg_node_t *node;
1855 int status;
1856
1857 while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
1858 lwd->lw_txgoff++;
1859 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1860 }
1861
1862 if (wsp->walk_addr == NULL)
1863 return (WALK_DONE);
1864
1865 addr = wsp->walk_addr - lwd->lw_offset;
1866
1867 if (mdb_vread(lwd->lw_obj,
1868 lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
1869 mdb_warn("failed to read list element at %#lx", addr);
1870 return (WALK_ERR);
1871 }
1872
1873 status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
1874 node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
1875 wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
1876
1877 return (status);
1878 }
1879
1880 /*
1881 * ::walk spa
1882 *
1883 * Walk all named spa_t structures in the namespace. This is nothing more than
1884 * a layered avl walk.
1885 */
1886 static int
1887 spa_walk_init(mdb_walk_state_t *wsp)
1888 {
1889 GElf_Sym sym;
1890
1891 if (wsp->walk_addr != NULL) {
1892 mdb_warn("spa walk only supports global walks\n");
1893 return (WALK_ERR);
1894 }
1895
1896 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
1897 mdb_warn("failed to find symbol 'spa_namespace_avl'");
1898 return (WALK_ERR);
1899 }
1900
1901 wsp->walk_addr = (uintptr_t)sym.st_value;
1902
1903 if (mdb_layered_walk("avl", wsp) == -1) {
1904 mdb_warn("failed to walk 'avl'\n");
1905 return (WALK_ERR);
1906 }
1907
1908 return (WALK_NEXT);
1909 }
1910
1911 static int
1912 spa_walk_step(mdb_walk_state_t *wsp)
1913 {
1914 spa_t spa;
1915
1916 if (mdb_vread(&spa, sizeof (spa), wsp->walk_addr) == -1) {
1917 mdb_warn("failed to read spa_t at %p", wsp->walk_addr);
1918 return (WALK_ERR);
1919 }
1920
1921 return (wsp->walk_callback(wsp->walk_addr, &spa, wsp->walk_cbdata));
1922 }
1923
1924 /*
1925 * [addr]::walk zio
1926 *
1927 * Walk all active zio_t structures on the system. This is simply a layered
1928 * walk on top of ::walk zio_cache, with the optional ability to limit the
1929 * structures to a particular pool.
1930 */
1931 static int
1932 zio_walk_init(mdb_walk_state_t *wsp)
1933 {
1934 wsp->walk_data = (void *)wsp->walk_addr;
1935
1936 if (mdb_layered_walk("zio_cache", wsp) == -1) {
1937 mdb_warn("failed to walk 'zio_cache'\n");
1938 return (WALK_ERR);
1939 }
1940
1941 return (WALK_NEXT);
1942 }
1943
1944 static int
1945 zio_walk_step(mdb_walk_state_t *wsp)
1946 {
1947 zio_t zio;
1948
1949 if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
1950 mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
1951 return (WALK_ERR);
1952 }
1953
1954 if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
1955 return (WALK_NEXT);
1956
1957 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
1958 }
1959
1960 /*
1961 * [addr]::walk zio_root
1962 *
1963 * Walk only root zio_t structures, optionally for a particular spa_t.
1964 */
1965 static int
1966 zio_walk_root_step(mdb_walk_state_t *wsp)
1967 {
1968 zio_t zio;
1969
1970 if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
1971 mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
1972 return (WALK_ERR);
1973 }
1974
1975 if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
1976 return (WALK_NEXT);
1977
1978 /* If the parent list is not empty, ignore */
1979 if (zio.io_parent_list.list_head.list_next !=
1980 &((zio_t *)wsp->walk_addr)->io_parent_list.list_head)
1981 return (WALK_NEXT);
1982
1983 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
1984 }
1985
1986 #define NICENUM_BUFLEN 6
1987
1988 static int
1989 snprintfrac(char *buf, int len,
1990 uint64_t numerator, uint64_t denom, int frac_digits)
1991 {
1992 int mul = 1;
1993 int whole, frac, i;
1994
1995 for (i = frac_digits; i; i--)
1996 mul *= 10;
1997 whole = numerator / denom;
1998 frac = mul * numerator / denom - mul * whole;
1999 return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac));
2000 }
2001
2002 static void
2003 mdb_nicenum(uint64_t num, char *buf)
2004 {
2005 uint64_t n = num;
2006 int index = 0;
2007 char *u;
2008
2009 while (n >= 1024) {
2010 n = (n + (1024 / 2)) / 1024; /* Round up or down */
2011 index++;
2012 }
2013
2014 u = &" \0K\0M\0G\0T\0P\0E\0"[index*2];
2015
2016 if (index == 0) {
2017 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu",
2018 (u_longlong_t)n);
2019 } else if (n < 10 && (num & (num - 1)) != 0) {
2020 (void) snprintfrac(buf, NICENUM_BUFLEN,
2021 num, 1ULL << 10 * index, 2);
2022 strcat(buf, u);
2023 } else if (n < 100 && (num & (num - 1)) != 0) {
2024 (void) snprintfrac(buf, NICENUM_BUFLEN,
2025 num, 1ULL << 10 * index, 1);
2026 strcat(buf, u);
2027 } else {
2028 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s",
2029 (u_longlong_t)n, u);
2030 }
2031 }
2032
2033 /*
2034 * ::zfs_blkstats
2035 *
2036 * -v print verbose per-level information
2037 *
2038 */
2039 static int
2040 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2041 {
2042 boolean_t verbose = B_FALSE;
2043 zfs_all_blkstats_t stats;
2044 dmu_object_type_t t;
2045 zfs_blkstat_t *tzb;
2046 uint64_t ditto;
2047 dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10];
2048 /* +10 in case it grew */
2049
2050 if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) {
2051 mdb_warn("failed to read 'dmu_ot'");
2052 return (DCMD_ERR);
2053 }
2054
2055 if (mdb_getopts(argc, argv,
2056 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2057 NULL) != argc)
2058 return (DCMD_USAGE);
2059
2060 if (!(flags & DCMD_ADDRSPEC))
2061 return (DCMD_USAGE);
2062
2063 if (GETMEMB(addr, struct spa, spa_dsl_pool, addr) ||
2064 GETMEMB(addr, struct dsl_pool, dp_blkstats, addr) ||
2065 mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
2066 mdb_warn("failed to read data at %p;", addr);
2067 mdb_printf("maybe no stats? run \"zpool scrub\" first.");
2068 return (DCMD_ERR);
2069 }
2070
2071 tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_NUMTYPES];
2072 if (tzb->zb_gangs != 0) {
2073 mdb_printf("Ganged blocks: %llu\n",
2074 (longlong_t)tzb->zb_gangs);
2075 }
2076
2077 ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
2078 tzb->zb_ditto_3_of_3_samevdev;
2079 if (ditto != 0) {
2080 mdb_printf("Dittoed blocks on same vdev: %llu\n",
2081 (longlong_t)ditto);
2082 }
2083
2084 mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2085 "\t avg\t comp\t%%Total\tType\n");
2086
2087 for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
2088 char csize[NICENUM_BUFLEN], lsize[NICENUM_BUFLEN];
2089 char psize[NICENUM_BUFLEN], asize[NICENUM_BUFLEN];
2090 char avg[NICENUM_BUFLEN];
2091 char comp[NICENUM_BUFLEN], pct[NICENUM_BUFLEN];
2092 char typename[64];
2093 int l;
2094
2095
2096 if (t == DMU_OT_DEFERRED)
2097 strcpy(typename, "deferred free");
2098 else if (t == DMU_OT_TOTAL)
2099 strcpy(typename, "Total");
2100 else if (mdb_readstr(typename, sizeof (typename),
2101 (uintptr_t)dmu_ot[t].ot_name) == -1) {
2102 mdb_warn("failed to read type name");
2103 return (DCMD_ERR);
2104 }
2105
2106 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
2107 continue;
2108
2109 for (l = -1; l < DN_MAX_LEVELS; l++) {
2110 int level = (l == -1 ? DN_MAX_LEVELS : l);
2111 zfs_blkstat_t *zb = &stats.zab_type[level][t];
2112
2113 if (zb->zb_asize == 0)
2114 continue;
2115
2116 /*
2117 * Don't print each level unless requested.
2118 */
2119 if (!verbose && level != DN_MAX_LEVELS)
2120 continue;
2121
2122 /*
2123 * If all the space is level 0, don't print the
2124 * level 0 separately.
2125 */
2126 if (level == 0 && zb->zb_asize ==
2127 stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
2128 continue;
2129
2130 mdb_nicenum(zb->zb_count, csize);
2131 mdb_nicenum(zb->zb_lsize, lsize);
2132 mdb_nicenum(zb->zb_psize, psize);
2133 mdb_nicenum(zb->zb_asize, asize);
2134 mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2135 (void) snprintfrac(comp, NICENUM_BUFLEN,
2136 zb->zb_lsize, zb->zb_psize, 2);
2137 (void) snprintfrac(pct, NICENUM_BUFLEN,
2138 100 * zb->zb_asize, tzb->zb_asize, 2);
2139
2140 mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
2141 "\t%5s\t%6s\t",
2142 csize, lsize, psize, asize, avg, comp, pct);
2143
2144 if (level == DN_MAX_LEVELS)
2145 mdb_printf("%s\n", typename);
2146 else
2147 mdb_printf(" L%d %s\n",
2148 level, typename);
2149 }
2150 }
2151
2152 return (DCMD_OK);
2153 }
2154
2155 /*
2156 * MDB module linkage information:
2157 *
2158 * We declare a list of structures describing our dcmds, and a function
2159 * named _mdb_init to return a pointer to our module information.
2160 */
2161
2162 static const mdb_dcmd_t dcmds[] = {
2163 { "arc", "[-bkmg]", "print ARC variables", arc_print },
2164 { "blkptr", ":", "print blkptr_t", blkptr },
2165 { "dbuf", ":", "print dmu_buf_impl_t", dbuf },
2166 { "dbuf_stats", ":", "dbuf stats", dbuf_stats },
2167 { "dbufs",
2168 "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] "
2169 "[-o object | \"mdn\"] \n"
2170 "\t[-l level] [-b blkid | \"bonus\"]",
2171 "find dmu_buf_impl_t's that match specified criteria", dbufs },
2172 { "abuf_find", "dva_word[0] dva_word[1]",
2173 "find arc_buf_hdr_t of a specified DVA",
2174 abuf_find },
2175 { "spa", "?[-cv]", "spa_t summary", spa_print },
2176 { "spa_config", ":", "print spa_t configuration", spa_print_config },
2177 { "spa_verify", ":", "verify spa_t consistency", spa_verify },
2178 { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
2179 { "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
2180 { "vdev", ":[-re]\n"
2181 "\t-r display recursively\n"
2182 "\t-e print statistics",
2183 "vdev_t summary", vdev_print },
2184 { "zio", ":[cpr]\n"
2185 "\t-c display children\n"
2186 "\t-p display parents\n"
2187 "\t-r display recursively",
2188 "zio_t summary", zio_print },
2189 { "zio_state", "?", "print out all zio_t structures on system or "
2190 "for a particular pool", zio_state },
2191 { "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
2192 { "zfs_blkstats", ":[-v]",
2193 "given a spa_t, print block type stats from last scrub",
2194 zfs_blkstats },
2195 { "zfs_params", "", "print zfs tunable parameters", zfs_params },
2196 { NULL }
2197 };
2198
2199 static const mdb_walker_t walkers[] = {
2200 /*
2201 * In userland, there is no generic provider of list_t walkers, so we
2202 * need to add it.
2203 */
2204 #ifndef _KERNEL
2205 { LIST_WALK_NAME, LIST_WALK_DESC,
2206 list_walk_init, list_walk_step, list_walk_fini },
2207 #endif
2208 { "zms_freelist", "walk ZFS metaslab freelist",
2209 freelist_walk_init, freelist_walk_step, NULL },
2210 { "txg_list", "given any txg_list_t *, walk all entries in all txgs",
2211 txg_list_walk_init, txg_list_walk_step, NULL },
2212 { "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
2213 txg_list0_walk_init, txg_list_walk_step, NULL },
2214 { "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
2215 txg_list1_walk_init, txg_list_walk_step, NULL },
2216 { "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
2217 txg_list2_walk_init, txg_list_walk_step, NULL },
2218 { "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
2219 txg_list3_walk_init, txg_list_walk_step, NULL },
2220 { "zio", "walk all zio structures, optionally for a particular spa_t",
2221 zio_walk_init, zio_walk_step, NULL },
2222 { "zio_root", "walk all root zio_t structures, optionally for a "
2223 "particular spa_t",
2224 zio_walk_init, zio_walk_root_step, NULL },
2225 { "spa", "walk all spa_t entries in the namespace",
2226 spa_walk_init, spa_walk_step, NULL },
2227 { "metaslab", "given a spa_t *, walk all metaslab_t structures",
2228 metaslab_walk_init, metaslab_walk_step, NULL },
2229 { NULL }
2230 };
2231
2232 static const mdb_modinfo_t modinfo = {
2233 MDB_API_VERSION, dcmds, walkers
2234 };
2235
2236 const mdb_modinfo_t *
2237 _mdb_init(void)
2238 {
2239 return (&modinfo);
2240 }