1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <mdb/mdb_ctf.h>
27 #include <sys/zfs_context.h>
28 #include <sys/mdb_modapi.h>
29 #include <sys/dbuf.h>
30 #include <sys/dmu_objset.h>
31 #include <sys/dsl_dir.h>
32 #include <sys/dsl_pool.h>
33 #include <sys/metaslab_impl.h>
34 #include <sys/space_map.h>
35 #include <sys/list.h>
36 #include <sys/spa_impl.h>
37 #include <sys/vdev_impl.h>
38 #include <sys/zio_compress.h>
39
40 #ifndef _KERNEL
41 #include "../genunix/list.h"
42 #endif
43
44 #ifdef _KERNEL
45 #define ZFS_OBJ_NAME "zfs"
46 #else
47 #define ZFS_OBJ_NAME "libzpool.so.1"
48 #endif
49
50 static char *
51 local_strdup(const char *s)
52 {
53 char *s1 = mdb_alloc(strlen(s) + 1, UM_SLEEP);
54
55 (void) strcpy(s1, s);
56 return (s1);
57 }
58
59 static int
60 getmember(uintptr_t addr, const char *type, mdb_ctf_id_t *idp,
61 const char *member, int len, void *buf)
62 {
63 mdb_ctf_id_t id;
64 ulong_t off;
65 char name[64];
66
67 if (idp == NULL) {
68 if (mdb_ctf_lookup_by_name(type, &id) == -1) {
69 mdb_warn("couldn't find type %s", type);
70 return (DCMD_ERR);
71 }
72 idp = &id;
73 } else {
74 type = name;
75 mdb_ctf_type_name(*idp, name, sizeof (name));
76 }
77
78 if (mdb_ctf_offsetof(*idp, member, &off) == -1) {
79 mdb_warn("couldn't find member %s of type %s\n", member, type);
80 return (DCMD_ERR);
81 }
82 if (off % 8 != 0) {
83 mdb_warn("member %s of type %s is unsupported bitfield",
84 member, type);
85 return (DCMD_ERR);
86 }
87 off /= 8;
88
89 if (mdb_vread(buf, len, addr + off) == -1) {
90 mdb_warn("failed to read %s from %s at %p",
91 member, type, addr + off);
92 return (DCMD_ERR);
93 }
94 /* mdb_warn("read %s from %s at %p+%llx\n", member, type, addr, off); */
95
96 return (0);
97 }
98
99 #define GETMEMB(addr, type, member, dest) \
100 getmember(addr, #type, NULL, #member, sizeof (dest), &(dest))
101
102 #define GETMEMBID(addr, ctfid, member, dest) \
103 getmember(addr, NULL, ctfid, #member, sizeof (dest), &(dest))
104
105 static int
106 getrefcount(uintptr_t addr, mdb_ctf_id_t *id,
107 const char *member, uint64_t *rc)
108 {
109 static int gotid;
110 static mdb_ctf_id_t rc_id;
111 ulong_t off;
112
113 if (!gotid) {
114 if (mdb_ctf_lookup_by_name("struct refcount", &rc_id) == -1) {
115 mdb_warn("couldn't find struct refcount");
116 return (DCMD_ERR);
117 }
118 gotid = TRUE;
119 }
120
121 if (mdb_ctf_offsetof(*id, member, &off) == -1) {
122 char name[64];
123 mdb_ctf_type_name(*id, name, sizeof (name));
124 mdb_warn("couldn't find member %s of type %s\n", member, name);
125 return (DCMD_ERR);
126 }
127 off /= 8;
128
129 return (GETMEMBID(addr + off, &rc_id, rc_count, *rc));
130 }
131
132 static int
133 read_symbol(char *sym_name, void **bufp)
134 {
135 GElf_Sym sym;
136
137 if (mdb_lookup_by_obj(MDB_TGT_OBJ_EVERY, sym_name, &sym)) {
138 mdb_warn("can't find symbol %s", sym_name);
139 return (DCMD_ERR);
140 }
141
142 *bufp = mdb_alloc(sym.st_size, UM_SLEEP);
143
144 if (mdb_vread(*bufp, sym.st_size, sym.st_value) == -1) {
145 mdb_warn("can't read data for symbol %s", sym_name);
146 mdb_free(*bufp, sym.st_size);
147 return (DCMD_ERR);
148 }
149
150 return (DCMD_OK);
151 }
152
153 static int verbose;
154
155 static int
156 freelist_walk_init(mdb_walk_state_t *wsp)
157 {
158 if (wsp->walk_addr == NULL) {
159 mdb_warn("must supply starting address\n");
160 return (WALK_ERR);
161 }
162
163 wsp->walk_data = 0; /* Index into the freelist */
164 return (WALK_NEXT);
165 }
166
167 static int
168 freelist_walk_step(mdb_walk_state_t *wsp)
169 {
170 uint64_t entry;
171 uintptr_t number = (uintptr_t)wsp->walk_data;
172 char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
173 "INVALID", "INVALID", "INVALID", "INVALID" };
174 int mapshift = SPA_MINBLOCKSHIFT;
175
176 if (mdb_vread(&entry, sizeof (entry), wsp->walk_addr) == -1) {
177 mdb_warn("failed to read freelist entry %p", wsp->walk_addr);
178 return (WALK_DONE);
179 }
180 wsp->walk_addr += sizeof (entry);
181 wsp->walk_data = (void *)(number + 1);
182
183 if (SM_DEBUG_DECODE(entry)) {
184 mdb_printf("DEBUG: %3u %10s: txg=%llu pass=%llu\n",
185 number,
186 ddata[SM_DEBUG_ACTION_DECODE(entry)],
187 SM_DEBUG_TXG_DECODE(entry),
188 SM_DEBUG_SYNCPASS_DECODE(entry));
189 } else {
190 mdb_printf("Entry: %3u offsets=%08llx-%08llx type=%c "
191 "size=%06llx", number,
192 SM_OFFSET_DECODE(entry) << mapshift,
193 (SM_OFFSET_DECODE(entry) + SM_RUN_DECODE(entry)) <<
194 mapshift,
195 SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
196 SM_RUN_DECODE(entry) << mapshift);
197 if (verbose)
198 mdb_printf(" (raw=%012llx)\n", entry);
199 mdb_printf("\n");
200 }
201 return (WALK_NEXT);
202 }
203
204
205 static int
206 dataset_name(uintptr_t addr, char *buf)
207 {
208 static int gotid;
209 static mdb_ctf_id_t dd_id;
210 uintptr_t dd_parent;
211 char dd_myname[MAXNAMELEN];
212
213 if (!gotid) {
214 if (mdb_ctf_lookup_by_name("struct dsl_dir",
215 &dd_id) == -1) {
216 mdb_warn("couldn't find struct dsl_dir");
217 return (DCMD_ERR);
218 }
219 gotid = TRUE;
220 }
221 if (GETMEMBID(addr, &dd_id, dd_parent, dd_parent) ||
222 GETMEMBID(addr, &dd_id, dd_myname, dd_myname)) {
223 return (DCMD_ERR);
224 }
225
226 if (dd_parent) {
227 if (dataset_name(dd_parent, buf))
228 return (DCMD_ERR);
229 strcat(buf, "/");
230 }
231
232 if (dd_myname[0])
233 strcat(buf, dd_myname);
234 else
235 strcat(buf, "???");
236
237 return (0);
238 }
239
240 static int
241 objset_name(uintptr_t addr, char *buf)
242 {
243 static int gotid;
244 static mdb_ctf_id_t osi_id, ds_id;
245 uintptr_t os_dsl_dataset;
246 char ds_snapname[MAXNAMELEN];
247 uintptr_t ds_dir;
248
249 buf[0] = '\0';
250
251 if (!gotid) {
252 if (mdb_ctf_lookup_by_name("struct objset_impl",
253 &osi_id) == -1) {
254 mdb_warn("couldn't find struct objset_impl");
255 return (DCMD_ERR);
256 }
257 if (mdb_ctf_lookup_by_name("struct dsl_dataset",
258 &ds_id) == -1) {
259 mdb_warn("couldn't find struct dsl_dataset");
260 return (DCMD_ERR);
261 }
262
263 gotid = TRUE;
264 }
265
266 if (GETMEMBID(addr, &osi_id, os_dsl_dataset, os_dsl_dataset))
267 return (DCMD_ERR);
268
269 if (os_dsl_dataset == 0) {
270 strcat(buf, "mos");
271 return (0);
272 }
273
274 if (GETMEMBID(os_dsl_dataset, &ds_id, ds_snapname, ds_snapname) ||
275 GETMEMBID(os_dsl_dataset, &ds_id, ds_dir, ds_dir)) {
276 return (DCMD_ERR);
277 }
278
279 if (ds_dir && dataset_name(ds_dir, buf))
280 return (DCMD_ERR);
281
282 if (ds_snapname[0]) {
283 strcat(buf, "@");
284 strcat(buf, ds_snapname);
285 }
286 return (0);
287 }
288
289 static void
290 enum_lookup(char *out, size_t size, mdb_ctf_id_t id, int val,
291 const char *prefix)
292 {
293 const char *cp;
294 size_t len = strlen(prefix);
295
296 if ((cp = mdb_ctf_enum_name(id, val)) != NULL) {
297 if (strncmp(cp, prefix, len) == 0)
298 cp += len;
299 (void) strncpy(out, cp, size);
300 } else {
301 mdb_snprintf(out, size, "? (%d)", val);
302 }
303 }
304
305 /* ARGSUSED */
306 static int
307 zio_pipeline(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
308 {
309 mdb_ctf_id_t pipe_enum;
310 int i;
311 char stage[1024];
312
313 if (mdb_ctf_lookup_by_name("enum zio_stage", &pipe_enum) == -1) {
314 mdb_warn("Could not find enum zio_stage");
315 return (DCMD_ERR);
316 }
317
318 for (i = 0; i < 32; i++) {
319 if (addr & (1U << i)) {
320 enum_lookup(stage, sizeof (stage), pipe_enum, i,
321 "ZIO_STAGE_");
322 mdb_printf(" %s\n", stage);
323 }
324 }
325
326 return (DCMD_OK);
327 }
328
329 /* ARGSUSED */
330 static int
331 zfs_params(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
332 {
333 /*
334 * This table can be approximately generated by running:
335 * egrep "^[a-z0-9_]+ [a-z0-9_]+( =.*)?;" *.c | cut -d ' ' -f 2
336 */
337 static const char *params[] = {
338 "arc_reduce_dnlc_percent",
339 "zfs_arc_max",
340 "zfs_arc_min",
341 "arc_shrink_shift",
342 "zfs_mdcomp_disable",
343 "zfs_prefetch_disable",
344 "zfetch_max_streams",
345 "zfetch_min_sec_reap",
346 "zfetch_block_cap",
347 "zfetch_array_rd_sz",
348 "zfs_default_bs",
349 "zfs_default_ibs",
350 "metaslab_aliquot",
351 "reference_tracking_enable",
352 "reference_history",
353 "zio_taskq_threads",
354 "spa_max_replication_override",
355 "spa_mode",
356 "zfs_flags",
357 "zfs_txg_synctime",
358 "zfs_txg_timeout",
359 "zfs_write_limit_min",
360 "zfs_write_limit_max",
361 "zfs_write_limit_shift",
362 "zfs_write_limit_override",
363 "zfs_no_write_throttle",
364 "zfs_vdev_cache_max",
365 "zfs_vdev_cache_size",
366 "zfs_vdev_cache_bshift",
367 "vdev_mirror_shift",
368 "zfs_vdev_max_pending",
369 "zfs_vdev_min_pending",
370 "zfs_scrub_limit",
371 "zfs_vdev_time_shift",
372 "zfs_vdev_ramp_rate",
373 "zfs_vdev_aggregation_limit",
374 "fzap_default_block_shift",
375 "zfs_immediate_write_sz",
376 "zfs_read_chunk_size",
377 "zil_disable",
378 "zfs_nocacheflush",
379 "metaslab_gang_bang",
380 "zio_injection_enabled",
381 "zvol_immediate_write_sz",
382 };
383 int i;
384
385 for (i = 0; i < sizeof (params) / sizeof (params[0]); i++) {
386 int sz;
387 uint64_t val64;
388 uint32_t *val32p = (uint32_t *)&val64;
389
390 sz = mdb_readvar(&val64, params[i]);
391 if (sz == 4) {
392 mdb_printf("%s = 0x%x\n", params[i], *val32p);
393 } else if (sz == 8) {
394 mdb_printf("%s = 0x%llx\n", params[i], val64);
395 } else {
396 mdb_warn("variable %s not found", params[i]);
397 }
398 }
399
400 return (DCMD_OK);
401 }
402
403 /* ARGSUSED */
404 static int
405 blkptr(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
406 {
407 blkptr_t bp;
408 dmu_object_type_info_t *doti;
409 zio_compress_info_t *zct;
410 zio_checksum_info_t *zci;
411 int i;
412 char buf[MAXPATHLEN];
413
414 if (mdb_vread(&bp, sizeof (blkptr_t), addr) == -1) {
415 mdb_warn("failed to read blkptr_t");
416 return (DCMD_ERR);
417 }
418
419 if (read_symbol("dmu_ot", (void **)&doti) != DCMD_OK)
420 return (DCMD_ERR);
421 for (i = 0; i < DMU_OT_NUMTYPES; i++) {
422 mdb_readstr(buf, sizeof (buf), (uintptr_t)doti[i].ot_name);
423 doti[i].ot_name = local_strdup(buf);
424 }
425
426 if (read_symbol("zio_checksum_table", (void **)&zci) != DCMD_OK)
427 return (DCMD_ERR);
428 for (i = 0; i < ZIO_CHECKSUM_FUNCTIONS; i++) {
429 mdb_readstr(buf, sizeof (buf), (uintptr_t)zci[i].ci_name);
430 zci[i].ci_name = local_strdup(buf);
431 }
432
433 if (read_symbol("zio_compress_table", (void **)&zct) != DCMD_OK)
434 return (DCMD_ERR);
435 for (i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
436 mdb_readstr(buf, sizeof (buf), (uintptr_t)zct[i].ci_name);
437 zct[i].ci_name = local_strdup(buf);
438 }
439
440 /*
441 * Super-ick warning: This code is also duplicated in
442 * cmd/zdb.c . Yeah, I hate code replication, too.
443 */
444 for (i = 0; i < BP_GET_NDVAS(&bp); i++) {
445 dva_t *dva = &bp.blk_dva[i];
446
447 mdb_printf("DVA[%d]: vdev_id %lld / %llx\n", i,
448 DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva));
449 mdb_printf("DVA[%d]: GANG: %-5s GRID: %04x\t"
450 "ASIZE: %llx\n", i, DVA_GET_GANG(dva) ? "TRUE" : "FALSE",
451 DVA_GET_GRID(dva), DVA_GET_ASIZE(dva));
452 mdb_printf("DVA[%d]: :%llu:%llx:%llx:%s%s%s%s\n", i,
453 DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), BP_GET_PSIZE(&bp),
454 BP_SHOULD_BYTESWAP(&bp) ? "e" : "",
455 !DVA_GET_GANG(dva) && BP_GET_LEVEL(&bp) != 0 ? "i" : "",
456 DVA_GET_GANG(dva) ? "g" : "",
457 BP_GET_COMPRESS(&bp) != 0 ? "d" : "");
458 }
459 mdb_printf("LSIZE: %-16llx\t\tPSIZE: %llx\n",
460 BP_GET_LSIZE(&bp), BP_GET_PSIZE(&bp));
461 mdb_printf("ENDIAN: %6s\t\t\t\t\tTYPE: %s\n",
462 BP_GET_BYTEORDER(&bp) ? "LITTLE" : "BIG",
463 doti[BP_GET_TYPE(&bp)].ot_name);
464 mdb_printf("BIRTH: %-16llx LEVEL: %-2d\tFILL: %llx\n",
465 bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill);
466 mdb_printf("CKFUNC: %-16s\t\tCOMP: %s\n",
467 zci[BP_GET_CHECKSUM(&bp)].ci_name,
468 zct[BP_GET_COMPRESS(&bp)].ci_name);
469 mdb_printf("CKSUM: %llx:%llx:%llx:%llx\n",
470 bp.blk_cksum.zc_word[0],
471 bp.blk_cksum.zc_word[1],
472 bp.blk_cksum.zc_word[2],
473 bp.blk_cksum.zc_word[3]);
474
475 return (DCMD_OK);
476 }
477
478 /* ARGSUSED */
479 static int
480 dbuf(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
481 {
482 mdb_ctf_id_t id;
483 dmu_buf_t db;
484 uintptr_t objset;
485 uint8_t level;
486 uint64_t blkid;
487 uint64_t holds;
488 char objectname[32];
489 char blkidname[32];
490 char path[MAXNAMELEN];
491
492 if (DCMD_HDRSPEC(flags)) {
493 mdb_printf(" addr object lvl blkid holds os\n");
494 }
495
496 if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &id) == -1) {
497 mdb_warn("couldn't find struct dmu_buf_impl_t");
498 return (DCMD_ERR);
499 }
500
501 if (GETMEMBID(addr, &id, db_objset, objset) ||
502 GETMEMBID(addr, &id, db, db) ||
503 GETMEMBID(addr, &id, db_level, level) ||
504 GETMEMBID(addr, &id, db_blkid, blkid)) {
505 return (WALK_ERR);
506 }
507
508 if (getrefcount(addr, &id, "db_holds", &holds)) {
509 return (WALK_ERR);
510 }
511
512 if (db.db_object == DMU_META_DNODE_OBJECT)
513 (void) strcpy(objectname, "mdn");
514 else
515 (void) mdb_snprintf(objectname, sizeof (objectname), "%llx",
516 (u_longlong_t)db.db_object);
517
518 if (blkid == DB_BONUS_BLKID)
519 (void) strcpy(blkidname, "bonus");
520 else
521 (void) mdb_snprintf(blkidname, sizeof (blkidname), "%llx",
522 (u_longlong_t)blkid);
523
524 if (objset_name(objset, path)) {
525 return (WALK_ERR);
526 }
527
528 mdb_printf("%p %8s %1u %9s %2llu %s\n",
529 addr, objectname, level, blkidname, holds, path);
530
531 return (DCMD_OK);
532 }
533
534 /* ARGSUSED */
535 static int
536 dbuf_stats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
537 {
538 #define HISTOSZ 32
539 uintptr_t dbp;
540 dmu_buf_impl_t db;
541 dbuf_hash_table_t ht;
542 uint64_t bucket, ndbufs;
543 uint64_t histo[HISTOSZ];
544 uint64_t histo2[HISTOSZ];
545 int i, maxidx;
546
547 if (mdb_readvar(&ht, "dbuf_hash_table") == -1) {
548 mdb_warn("failed to read 'dbuf_hash_table'");
549 return (DCMD_ERR);
550 }
551
552 for (i = 0; i < HISTOSZ; i++) {
553 histo[i] = 0;
554 histo2[i] = 0;
555 }
556
557 ndbufs = 0;
558 for (bucket = 0; bucket < ht.hash_table_mask+1; bucket++) {
559 int len;
560
561 if (mdb_vread(&dbp, sizeof (void *),
562 (uintptr_t)(ht.hash_table+bucket)) == -1) {
563 mdb_warn("failed to read hash bucket %u at %p",
564 bucket, ht.hash_table+bucket);
565 return (DCMD_ERR);
566 }
567
568 len = 0;
569 while (dbp != 0) {
570 if (mdb_vread(&db, sizeof (dmu_buf_impl_t),
571 dbp) == -1) {
572 mdb_warn("failed to read dbuf at %p", dbp);
573 return (DCMD_ERR);
574 }
575 dbp = (uintptr_t)db.db_hash_next;
576 for (i = MIN(len, HISTOSZ - 1); i >= 0; i--)
577 histo2[i]++;
578 len++;
579 ndbufs++;
580 }
581
582 if (len >= HISTOSZ)
583 len = HISTOSZ-1;
584 histo[len]++;
585 }
586
587 mdb_printf("hash table has %llu buckets, %llu dbufs "
588 "(avg %llu buckets/dbuf)\n",
589 ht.hash_table_mask+1, ndbufs,
590 (ht.hash_table_mask+1)/ndbufs);
591
592 mdb_printf("\n");
593 maxidx = 0;
594 for (i = 0; i < HISTOSZ; i++)
595 if (histo[i] > 0)
596 maxidx = i;
597 mdb_printf("hash chain length number of buckets\n");
598 for (i = 0; i <= maxidx; i++)
599 mdb_printf("%u %llu\n", i, histo[i]);
600
601 mdb_printf("\n");
602 maxidx = 0;
603 for (i = 0; i < HISTOSZ; i++)
604 if (histo2[i] > 0)
605 maxidx = i;
606 mdb_printf("hash chain depth number of dbufs\n");
607 for (i = 0; i <= maxidx; i++)
608 mdb_printf("%u or more %llu %llu%%\n",
609 i, histo2[i], histo2[i]*100/ndbufs);
610
611
612 return (DCMD_OK);
613 }
614
615 typedef struct dbufs_data {
616 mdb_ctf_id_t id;
617 uint64_t objset;
618 uint64_t object;
619 uint64_t level;
620 uint64_t blkid;
621 char *osname;
622 } dbufs_data_t;
623
624 #define DBUFS_UNSET (0xbaddcafedeadbeefULL)
625
626 /* ARGSUSED */
627 static int
628 dbufs_cb(uintptr_t addr, const void *unknown, void *arg)
629 {
630 dbufs_data_t *data = arg;
631 uintptr_t objset;
632 dmu_buf_t db;
633 uint8_t level;
634 uint64_t blkid;
635 char osname[MAXNAMELEN];
636
637 if (GETMEMBID(addr, &data->id, db_objset, objset) ||
638 GETMEMBID(addr, &data->id, db, db) ||
639 GETMEMBID(addr, &data->id, db_level, level) ||
640 GETMEMBID(addr, &data->id, db_blkid, blkid)) {
641 return (WALK_ERR);
642 }
643
644 if ((data->objset == DBUFS_UNSET || data->objset == objset) &&
645 (data->osname == NULL || (objset_name(objset, osname) == 0 &&
646 strcmp(data->osname, osname) == 0)) &&
647 (data->object == DBUFS_UNSET || data->object == db.db_object) &&
648 (data->level == DBUFS_UNSET || data->level == level) &&
649 (data->blkid == DBUFS_UNSET || data->blkid == blkid)) {
650 mdb_printf("%#lr\n", addr);
651 }
652 return (WALK_NEXT);
653 }
654
655 /* ARGSUSED */
656 static int
657 dbufs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
658 {
659 dbufs_data_t data;
660 char *object = NULL;
661 char *blkid = NULL;
662
663 data.objset = data.object = data.level = data.blkid = DBUFS_UNSET;
664 data.osname = NULL;
665
666 if (mdb_getopts(argc, argv,
667 'O', MDB_OPT_UINT64, &data.objset,
668 'n', MDB_OPT_STR, &data.osname,
669 'o', MDB_OPT_STR, &object,
670 'l', MDB_OPT_UINT64, &data.level,
671 'b', MDB_OPT_STR, &blkid) != argc) {
672 return (DCMD_USAGE);
673 }
674
675 if (object) {
676 if (strcmp(object, "mdn") == 0) {
677 data.object = DMU_META_DNODE_OBJECT;
678 } else {
679 data.object = mdb_strtoull(object);
680 }
681 }
682
683 if (blkid) {
684 if (strcmp(blkid, "bonus") == 0) {
685 data.blkid = DB_BONUS_BLKID;
686 } else {
687 data.blkid = mdb_strtoull(blkid);
688 }
689 }
690
691 if (mdb_ctf_lookup_by_name("struct dmu_buf_impl", &data.id) == -1) {
692 mdb_warn("couldn't find struct dmu_buf_impl_t");
693 return (DCMD_ERR);
694 }
695
696 if (mdb_walk("dmu_buf_impl_t", dbufs_cb, &data) != 0) {
697 mdb_warn("can't walk dbufs");
698 return (DCMD_ERR);
699 }
700
701 return (DCMD_OK);
702 }
703
704 typedef struct abuf_find_data {
705 dva_t dva;
706 mdb_ctf_id_t id;
707 } abuf_find_data_t;
708
709 /* ARGSUSED */
710 static int
711 abuf_find_cb(uintptr_t addr, const void *unknown, void *arg)
712 {
713 abuf_find_data_t *data = arg;
714 dva_t dva;
715
716 if (GETMEMBID(addr, &data->id, b_dva, dva)) {
717 return (WALK_ERR);
718 }
719
720 if (dva.dva_word[0] == data->dva.dva_word[0] &&
721 dva.dva_word[1] == data->dva.dva_word[1]) {
722 mdb_printf("%#lr\n", addr);
723 }
724 return (WALK_NEXT);
725 }
726
727 /* ARGSUSED */
728 static int
729 abuf_find(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
730 {
731 abuf_find_data_t data;
732 GElf_Sym sym;
733 int i;
734 const char *syms[] = {
735 "ARC_mru",
736 "ARC_mru_ghost",
737 "ARC_mfu",
738 "ARC_mfu_ghost",
739 };
740
741 if (argc != 2)
742 return (DCMD_USAGE);
743
744 for (i = 0; i < 2; i ++) {
745 switch (argv[i].a_type) {
746 case MDB_TYPE_STRING:
747 data.dva.dva_word[i] = mdb_strtoull(argv[i].a_un.a_str);
748 break;
749 case MDB_TYPE_IMMEDIATE:
750 data.dva.dva_word[i] = argv[i].a_un.a_val;
751 break;
752 default:
753 return (DCMD_USAGE);
754 }
755 }
756
757 if (mdb_ctf_lookup_by_name("struct arc_buf_hdr", &data.id) == -1) {
758 mdb_warn("couldn't find struct arc_buf_hdr");
759 return (DCMD_ERR);
760 }
761
762 for (i = 0; i < sizeof (syms) / sizeof (syms[0]); i++) {
763 if (mdb_lookup_by_name(syms[i], &sym)) {
764 mdb_warn("can't find symbol %s", syms[i]);
765 return (DCMD_ERR);
766 }
767
768 if (mdb_pwalk("list", abuf_find_cb, &data, sym.st_value) != 0) {
769 mdb_warn("can't walk %s", syms[i]);
770 return (DCMD_ERR);
771 }
772 }
773
774 return (DCMD_OK);
775 }
776
777 /*ARGSUSED*/
778 static int
779 arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
780 {
781 kstat_named_t *stats;
782 GElf_Sym sym;
783 int nstats, i;
784 uint_t opt_a = FALSE;
785 uint_t opt_b = FALSE;
786 uint_t shift = 0;
787 const char *suffix;
788
789 static const char *bytestats[] = {
790 "p", "c", "c_min", "c_max", "size", NULL
791 };
792
793 static const char *extras[] = {
794 "arc_no_grow", "arc_tempreserve",
795 "arc_meta_used", "arc_meta_limit", "arc_meta_max",
796 NULL
797 };
798
799 if (mdb_lookup_by_name("arc_stats", &sym) == -1) {
800 mdb_warn("failed to find 'arc_stats'");
801 return (DCMD_ERR);
802 }
803
804 stats = mdb_zalloc(sym.st_size, UM_SLEEP | UM_GC);
805
806 if (mdb_vread(stats, sym.st_size, sym.st_value) == -1) {
807 mdb_warn("couldn't read 'arc_stats' at %p", sym.st_value);
808 return (DCMD_ERR);
809 }
810
811 nstats = sym.st_size / sizeof (kstat_named_t);
812
813 /* NB: -a / opt_a are ignored for backwards compatability */
814 if (mdb_getopts(argc, argv,
815 'a', MDB_OPT_SETBITS, TRUE, &opt_a,
816 'b', MDB_OPT_SETBITS, TRUE, &opt_b,
817 'k', MDB_OPT_SETBITS, 10, &shift,
818 'm', MDB_OPT_SETBITS, 20, &shift,
819 'g', MDB_OPT_SETBITS, 30, &shift,
820 NULL) != argc)
821 return (DCMD_USAGE);
822
823 if (!opt_b && !shift)
824 shift = 20;
825
826 switch (shift) {
827 case 0:
828 suffix = "B";
829 break;
830 case 10:
831 suffix = "KB";
832 break;
833 case 20:
834 suffix = "MB";
835 break;
836 case 30:
837 suffix = "GB";
838 break;
839 default:
840 suffix = "XX";
841 }
842
843 for (i = 0; i < nstats; i++) {
844 int j;
845 boolean_t bytes = B_FALSE;
846
847 for (j = 0; bytestats[j]; j++) {
848 if (strcmp(stats[i].name, bytestats[j]) == 0) {
849 bytes = B_TRUE;
850 break;
851 }
852 }
853
854 if (bytes) {
855 mdb_printf("%-25s = %9llu %s\n", stats[i].name,
856 stats[i].value.ui64 >> shift, suffix);
857 } else {
858 mdb_printf("%-25s = %9llu\n", stats[i].name,
859 stats[i].value.ui64);
860 }
861 }
862
863 for (i = 0; extras[i]; i++) {
864 uint64_t buf;
865
866 if (mdb_lookup_by_name(extras[i], &sym) == -1) {
867 mdb_warn("failed to find '%s'", extras[i]);
868 return (DCMD_ERR);
869 }
870
871 if (sym.st_size != sizeof (uint64_t) &&
872 sym.st_size != sizeof (uint32_t)) {
873 mdb_warn("expected scalar for variable '%s'\n",
874 extras[i]);
875 return (DCMD_ERR);
876 }
877
878 if (mdb_vread(&buf, sym.st_size, sym.st_value) == -1) {
879 mdb_warn("couldn't read '%s'", extras[i]);
880 return (DCMD_ERR);
881 }
882
883 mdb_printf("%-25s = ", extras[i]);
884
885 /* NB: all the 64-bit extras happen to be byte counts */
886 if (sym.st_size == sizeof (uint64_t))
887 mdb_printf("%9llu %s\n", buf >> shift, suffix);
888
889 if (sym.st_size == sizeof (uint32_t))
890 mdb_printf("%9d\n", *((uint32_t *)&buf));
891 }
892 return (DCMD_OK);
893 }
894
895 /*
896 * ::spa
897 *
898 * -c Print configuration information as well
899 * -v Print vdev state
900 * -e Print vdev error stats
901 *
902 * Print a summarized spa_t. When given no arguments, prints out a table of all
903 * active pools on the system.
904 */
905 /* ARGSUSED */
906 static int
907 spa_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
908 {
909 spa_t spa;
910 const char *statetab[] = { "ACTIVE", "EXPORTED", "DESTROYED",
911 "SPARE", "L2CACHE", "UNINIT", "UNAVAIL", "POTENTIAL" };
912 const char *state;
913 int config = FALSE;
914 int vdevs = FALSE;
915 int errors = FALSE;
916
917 if (mdb_getopts(argc, argv,
918 'c', MDB_OPT_SETBITS, TRUE, &config,
919 'v', MDB_OPT_SETBITS, TRUE, &vdevs,
920 'e', MDB_OPT_SETBITS, TRUE, &errors,
921 NULL) != argc)
922 return (DCMD_USAGE);
923
924 if (!(flags & DCMD_ADDRSPEC)) {
925 if (mdb_walk_dcmd("spa", "spa", argc, argv) == -1) {
926 mdb_warn("can't walk spa");
927 return (DCMD_ERR);
928 }
929
930 return (DCMD_OK);
931 }
932
933 if (flags & DCMD_PIPE_OUT) {
934 mdb_printf("%#lr\n", addr);
935 return (DCMD_OK);
936 }
937
938 if (DCMD_HDRSPEC(flags))
939 mdb_printf("%<u>%-?s %9s %-*s%</u>\n", "ADDR", "STATE",
940 sizeof (uintptr_t) == 4 ? 60 : 52, "NAME");
941
942 if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
943 mdb_warn("failed to read spa_t at %p", addr);
944 return (DCMD_ERR);
945 }
946
947 if (spa.spa_state < 0 || spa.spa_state > POOL_STATE_UNAVAIL)
948 state = "UNKNOWN";
949 else
950 state = statetab[spa.spa_state];
951
952 mdb_printf("%0?p %9s %s\n", addr, state, spa.spa_name);
953
954 if (config) {
955 mdb_printf("\n");
956 mdb_inc_indent(4);
957 if (mdb_call_dcmd("spa_config", addr, flags, 0,
958 NULL) != DCMD_OK)
959 return (DCMD_ERR);
960 mdb_dec_indent(4);
961 }
962
963 if (vdevs || errors) {
964 mdb_arg_t v;
965
966 v.a_type = MDB_TYPE_STRING;
967 v.a_un.a_str = "-e";
968
969 mdb_printf("\n");
970 mdb_inc_indent(4);
971 if (mdb_call_dcmd("spa_vdevs", addr, flags, errors ? 1 : 0,
972 &v) != DCMD_OK)
973 return (DCMD_ERR);
974 mdb_dec_indent(4);
975 }
976
977 return (DCMD_OK);
978 }
979
980 /*
981 * ::spa_config
982 *
983 * Given a spa_t, print the configuration information stored in spa_config.
984 * Since it's just an nvlist, format it as an indented list of name=value pairs.
985 * We simply read the value of spa_config and pass off to ::nvlist.
986 */
987 /* ARGSUSED */
988 static int
989 spa_print_config(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
990 {
991 spa_t spa;
992
993 if (argc != 0 || !(flags & DCMD_ADDRSPEC))
994 return (DCMD_USAGE);
995
996 if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
997 mdb_warn("failed to read spa_t at %p", addr);
998 return (DCMD_ERR);
999 }
1000
1001 if (spa.spa_config == NULL) {
1002 mdb_printf("(none)\n");
1003 return (DCMD_OK);
1004 }
1005
1006 return (mdb_call_dcmd("nvlist", (uintptr_t)spa.spa_config, flags,
1007 0, NULL));
1008 }
1009
1010 /*
1011 * ::vdev
1012 *
1013 * Print out a summarized vdev_t, in the following form:
1014 *
1015 * ADDR STATE AUX DESC
1016 * fffffffbcde23df0 HEALTHY - /dev/dsk/c0t0d0
1017 *
1018 * If '-r' is specified, recursively visit all children.
1019 *
1020 * With '-e', the statistics associated with the vdev are printed as well.
1021 */
1022 static int
1023 do_print_vdev(uintptr_t addr, int flags, int depth, int stats,
1024 int recursive)
1025 {
1026 vdev_t vdev;
1027 char desc[MAXNAMELEN];
1028 int c, children;
1029 uintptr_t *child;
1030 const char *state, *aux;
1031
1032 if (mdb_vread(&vdev, sizeof (vdev), (uintptr_t)addr) == -1) {
1033 mdb_warn("failed to read vdev_t at %p\n", (uintptr_t)addr);
1034 return (DCMD_ERR);
1035 }
1036
1037 if (flags & DCMD_PIPE_OUT) {
1038 mdb_printf("%#lr", addr);
1039 } else {
1040 if (vdev.vdev_path != NULL) {
1041 if (mdb_readstr(desc, sizeof (desc),
1042 (uintptr_t)vdev.vdev_path) == -1) {
1043 mdb_warn("failed to read vdev_path at %p\n",
1044 vdev.vdev_path);
1045 return (DCMD_ERR);
1046 }
1047 } else if (vdev.vdev_ops != NULL) {
1048 vdev_ops_t ops;
1049 if (mdb_vread(&ops, sizeof (ops),
1050 (uintptr_t)vdev.vdev_ops) == -1) {
1051 mdb_warn("failed to read vdev_ops at %p\n",
1052 vdev.vdev_ops);
1053 return (DCMD_ERR);
1054 }
1055 (void) strcpy(desc, ops.vdev_op_type);
1056 } else {
1057 (void) strcpy(desc, "<unknown>");
1058 }
1059
1060 if (depth == 0 && DCMD_HDRSPEC(flags))
1061 mdb_printf("%<u>%-?s %-9s %-12s %-*s%</u>\n",
1062 "ADDR", "STATE", "AUX",
1063 sizeof (uintptr_t) == 4 ? 43 : 35,
1064 "DESCRIPTION");
1065
1066 mdb_printf("%0?p ", addr);
1067
1068 switch (vdev.vdev_state) {
1069 case VDEV_STATE_CLOSED:
1070 state = "CLOSED";
1071 break;
1072 case VDEV_STATE_OFFLINE:
1073 state = "OFFLINE";
1074 break;
1075 case VDEV_STATE_CANT_OPEN:
1076 state = "CANT_OPEN";
1077 break;
1078 case VDEV_STATE_DEGRADED:
1079 state = "DEGRADED";
1080 break;
1081 case VDEV_STATE_HEALTHY:
1082 state = "HEALTHY";
1083 break;
1084 case VDEV_STATE_REMOVED:
1085 state = "REMOVED";
1086 break;
1087 case VDEV_STATE_FAULTED:
1088 state = "FAULTED";
1089 break;
1090 default:
1091 state = "UNKNOWN";
1092 break;
1093 }
1094
1095 switch (vdev.vdev_stat.vs_aux) {
1096 case VDEV_AUX_NONE:
1097 aux = "-";
1098 break;
1099 case VDEV_AUX_OPEN_FAILED:
1100 aux = "OPEN_FAILED";
1101 break;
1102 case VDEV_AUX_CORRUPT_DATA:
1103 aux = "CORRUPT_DATA";
1104 break;
1105 case VDEV_AUX_NO_REPLICAS:
1106 aux = "NO_REPLICAS";
1107 break;
1108 case VDEV_AUX_BAD_GUID_SUM:
1109 aux = "BAD_GUID_SUM";
1110 break;
1111 case VDEV_AUX_TOO_SMALL:
1112 aux = "TOO_SMALL";
1113 break;
1114 case VDEV_AUX_BAD_LABEL:
1115 aux = "BAD_LABEL";
1116 break;
1117 case VDEV_AUX_VERSION_NEWER:
1118 aux = "VERS_NEWER";
1119 break;
1120 case VDEV_AUX_VERSION_OLDER:
1121 aux = "VERS_OLDER";
1122 break;
1123 case VDEV_AUX_SPARED:
1124 aux = "SPARED";
1125 break;
1126 case VDEV_AUX_ERR_EXCEEDED:
1127 aux = "ERR_EXCEEDED";
1128 break;
1129 case VDEV_AUX_IO_FAILURE:
1130 aux = "IO_FAILURE";
1131 break;
1132 case VDEV_AUX_BAD_LOG:
1133 aux = "BAD_LOG";
1134 break;
1135 default:
1136 aux = "UNKNOWN";
1137 break;
1138 }
1139
1140 mdb_printf("%-9s %-12s %*s%s\n", state, aux, depth, "", desc);
1141
1142 if (stats) {
1143 vdev_stat_t *vs = &vdev.vdev_stat;
1144 int i;
1145
1146 mdb_inc_indent(4);
1147 mdb_printf("\n");
1148 mdb_printf("%<u> %12s %12s %12s %12s "
1149 "%12s%</u>\n", "READ", "WRITE", "FREE", "CLAIM",
1150 "IOCTL");
1151 mdb_printf("OPS ");
1152 for (i = 1; i < ZIO_TYPES; i++)
1153 mdb_printf("%11#llx%s", vs->vs_ops[i],
1154 i == ZIO_TYPES - 1 ? "" : " ");
1155 mdb_printf("\n");
1156 mdb_printf("BYTES ");
1157 for (i = 1; i < ZIO_TYPES; i++)
1158 mdb_printf("%11#llx%s", vs->vs_bytes[i],
1159 i == ZIO_TYPES - 1 ? "" : " ");
1160
1161
1162 mdb_printf("\n");
1163 mdb_printf("EREAD %10#llx\n", vs->vs_read_errors);
1164 mdb_printf("EWRITE %10#llx\n", vs->vs_write_errors);
1165 mdb_printf("ECKSUM %10#llx\n",
1166 vs->vs_checksum_errors);
1167 mdb_dec_indent(4);
1168 }
1169
1170 if (stats)
1171 mdb_printf("\n");
1172 }
1173
1174 children = vdev.vdev_children;
1175
1176 if (children == 0 || !recursive)
1177 return (DCMD_OK);
1178
1179 child = mdb_alloc(children * sizeof (void *), UM_SLEEP | UM_GC);
1180 if (mdb_vread(child, children * sizeof (void *),
1181 (uintptr_t)vdev.vdev_child) == -1) {
1182 mdb_warn("failed to read vdev children at %p", vdev.vdev_child);
1183 return (DCMD_ERR);
1184 }
1185
1186 for (c = 0; c < children; c++) {
1187 if (do_print_vdev(child[c], flags, depth + 2, stats,
1188 recursive))
1189 return (DCMD_ERR);
1190 }
1191
1192 return (DCMD_OK);
1193 }
1194
1195 static int
1196 vdev_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1197 {
1198 int recursive = FALSE;
1199 int stats = FALSE;
1200 uint64_t depth = 0;
1201
1202 if (mdb_getopts(argc, argv,
1203 'r', MDB_OPT_SETBITS, TRUE, &recursive,
1204 'e', MDB_OPT_SETBITS, TRUE, &stats,
1205 'd', MDB_OPT_UINT64, &depth,
1206 NULL) != argc)
1207 return (DCMD_USAGE);
1208
1209 if (!(flags & DCMD_ADDRSPEC)) {
1210 mdb_warn("no vdev_t address given\n");
1211 return (DCMD_ERR);
1212 }
1213
1214 return (do_print_vdev(addr, flags, (int)depth, stats, recursive));
1215 }
1216
1217 typedef struct metaslab_walk_data {
1218 uint64_t mw_numvdevs;
1219 uintptr_t *mw_vdevs;
1220 int mw_curvdev;
1221 uint64_t mw_nummss;
1222 uintptr_t *mw_mss;
1223 int mw_curms;
1224 } metaslab_walk_data_t;
1225
1226 static int
1227 metaslab_walk_step(mdb_walk_state_t *wsp)
1228 {
1229 metaslab_walk_data_t *mw = wsp->walk_data;
1230 metaslab_t ms;
1231 uintptr_t msp;
1232
1233 if (mw->mw_curvdev >= mw->mw_numvdevs)
1234 return (WALK_DONE);
1235
1236 if (mw->mw_mss == NULL) {
1237 uintptr_t mssp;
1238 uintptr_t vdevp;
1239
1240 ASSERT(mw->mw_curms == 0);
1241 ASSERT(mw->mw_nummss == 0);
1242
1243 vdevp = mw->mw_vdevs[mw->mw_curvdev];
1244 if (GETMEMB(vdevp, struct vdev, vdev_ms, mssp) ||
1245 GETMEMB(vdevp, struct vdev, vdev_ms_count, mw->mw_nummss)) {
1246 return (WALK_ERR);
1247 }
1248
1249 mw->mw_mss = mdb_alloc(mw->mw_nummss * sizeof (void*),
1250 UM_SLEEP | UM_GC);
1251 if (mdb_vread(mw->mw_mss, mw->mw_nummss * sizeof (void*),
1252 mssp) == -1) {
1253 mdb_warn("failed to read vdev_ms at %p", mssp);
1254 return (WALK_ERR);
1255 }
1256 }
1257
1258 if (mw->mw_curms >= mw->mw_nummss) {
1259 mw->mw_mss = NULL;
1260 mw->mw_curms = 0;
1261 mw->mw_nummss = 0;
1262 mw->mw_curvdev++;
1263 return (WALK_NEXT);
1264 }
1265
1266 msp = mw->mw_mss[mw->mw_curms];
1267 if (mdb_vread(&ms, sizeof (metaslab_t), msp) == -1) {
1268 mdb_warn("failed to read metaslab_t at %p", msp);
1269 return (WALK_ERR);
1270 }
1271
1272 mw->mw_curms++;
1273
1274 return (wsp->walk_callback(msp, &ms, wsp->walk_cbdata));
1275 }
1276
1277 /* ARGSUSED */
1278 static int
1279 metaslab_walk_init(mdb_walk_state_t *wsp)
1280 {
1281 metaslab_walk_data_t *mw;
1282 uintptr_t root_vdevp;
1283 uintptr_t childp;
1284
1285 if (wsp->walk_addr == NULL) {
1286 mdb_warn("must supply address of spa_t\n");
1287 return (WALK_ERR);
1288 }
1289
1290 mw = mdb_zalloc(sizeof (metaslab_walk_data_t), UM_SLEEP | UM_GC);
1291
1292 if (GETMEMB(wsp->walk_addr, struct spa, spa_root_vdev, root_vdevp) ||
1293 GETMEMB(root_vdevp, struct vdev, vdev_children, mw->mw_numvdevs) ||
1294 GETMEMB(root_vdevp, struct vdev, vdev_child, childp)) {
1295 return (DCMD_ERR);
1296 }
1297
1298 mw->mw_vdevs = mdb_alloc(mw->mw_numvdevs * sizeof (void *),
1299 UM_SLEEP | UM_GC);
1300 if (mdb_vread(mw->mw_vdevs, mw->mw_numvdevs * sizeof (void *),
1301 childp) == -1) {
1302 mdb_warn("failed to read root vdev children at %p", childp);
1303 return (DCMD_ERR);
1304 }
1305
1306 wsp->walk_data = mw;
1307
1308 return (WALK_NEXT);
1309 }
1310
1311 typedef struct mdb_spa {
1312 uintptr_t spa_dsl_pool;
1313 uintptr_t spa_root_vdev;
1314 } mdb_spa_t;
1315
1316 typedef struct mdb_dsl_dir {
1317 uintptr_t dd_phys;
1318 int64_t dd_space_towrite[TXG_SIZE];
1319 } mdb_dsl_dir_t;
1320
1321 typedef struct mdb_dsl_dir_phys {
1322 uint64_t dd_used_bytes;
1323 uint64_t dd_compressed_bytes;
1324 uint64_t dd_uncompressed_bytes;
1325 } mdb_dsl_dir_phys_t;
1326
1327 typedef struct mdb_vdev {
1328 uintptr_t vdev_parent;
1329 uintptr_t vdev_ms;
1330 uint64_t vdev_ms_count;
1331 vdev_stat_t vdev_stat;
1332 } mdb_vdev_t;
1333
1334 typedef struct mdb_metaslab {
1335 space_map_t ms_allocmap[TXG_SIZE];
1336 space_map_t ms_freemap[TXG_SIZE];
1337 space_map_t ms_map;
1338 space_map_obj_t ms_smo;
1339 space_map_obj_t ms_smo_syncing;
1340 } mdb_metaslab_t;
1341
1342 typedef struct space_data {
1343 uint64_t ms_allocmap[TXG_SIZE];
1344 uint64_t ms_freemap[TXG_SIZE];
1345 uint64_t ms_map;
1346 uint64_t avail;
1347 uint64_t nowavail;
1348 } space_data_t;
1349
1350 /* ARGSUSED */
1351 static int
1352 space_cb(uintptr_t addr, const void *unknown, void *arg)
1353 {
1354 space_data_t *sd = arg;
1355 mdb_metaslab_t ms;
1356
1357 if (GETMEMB(addr, struct metaslab, ms_allocmap, ms.ms_allocmap) ||
1358 GETMEMB(addr, struct metaslab, ms_freemap, ms.ms_freemap) ||
1359 GETMEMB(addr, struct metaslab, ms_map, ms.ms_map) ||
1360 GETMEMB(addr, struct metaslab, ms_smo, ms.ms_smo) ||
1361 GETMEMB(addr, struct metaslab, ms_smo_syncing, ms.ms_smo_syncing)) {
1362 return (WALK_ERR);
1363 }
1364
1365 sd->ms_allocmap[0] += ms.ms_allocmap[0].sm_space;
1366 sd->ms_allocmap[1] += ms.ms_allocmap[1].sm_space;
1367 sd->ms_allocmap[2] += ms.ms_allocmap[2].sm_space;
1368 sd->ms_allocmap[3] += ms.ms_allocmap[3].sm_space;
1369 sd->ms_freemap[0] += ms.ms_freemap[0].sm_space;
1370 sd->ms_freemap[1] += ms.ms_freemap[1].sm_space;
1371 sd->ms_freemap[2] += ms.ms_freemap[2].sm_space;
1372 sd->ms_freemap[3] += ms.ms_freemap[3].sm_space;
1373 sd->ms_map += ms.ms_map.sm_space;
1374 sd->avail += ms.ms_map.sm_size - ms.ms_smo.smo_alloc;
1375 sd->nowavail += ms.ms_map.sm_size - ms.ms_smo_syncing.smo_alloc;
1376
1377 return (WALK_NEXT);
1378 }
1379
1380 /*
1381 * ::spa_space [-b]
1382 *
1383 * Given a spa_t, print out it's on-disk space usage and in-core
1384 * estimates of future usage. If -b is given, print space in bytes.
1385 * Otherwise print in megabytes.
1386 */
1387 /* ARGSUSED */
1388 static int
1389 spa_space(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1390 {
1391 mdb_spa_t spa;
1392 uintptr_t dp_root_dir;
1393 mdb_dsl_dir_t dd;
1394 mdb_dsl_dir_phys_t dsp;
1395 uint64_t children;
1396 uintptr_t childaddr;
1397 space_data_t sd;
1398 int shift = 20;
1399 char *suffix = "M";
1400 int bits = FALSE;
1401
1402 if (mdb_getopts(argc, argv, 'b', MDB_OPT_SETBITS, TRUE, &bits, NULL) !=
1403 argc)
1404 return (DCMD_USAGE);
1405 if (!(flags & DCMD_ADDRSPEC))
1406 return (DCMD_USAGE);
1407
1408 if (bits) {
1409 shift = 0;
1410 suffix = "";
1411 }
1412
1413 if (GETMEMB(addr, struct spa, spa_dsl_pool, spa.spa_dsl_pool) ||
1414 GETMEMB(addr, struct spa, spa_root_vdev, spa.spa_root_vdev) ||
1415 GETMEMB(spa.spa_root_vdev, struct vdev, vdev_children, children) ||
1416 GETMEMB(spa.spa_root_vdev, struct vdev, vdev_child, childaddr) ||
1417 GETMEMB(spa.spa_dsl_pool, struct dsl_pool,
1418 dp_root_dir, dp_root_dir) ||
1419 GETMEMB(dp_root_dir, struct dsl_dir, dd_phys, dd.dd_phys) ||
1420 GETMEMB(dp_root_dir, struct dsl_dir,
1421 dd_space_towrite, dd.dd_space_towrite) ||
1422 GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1423 dd_used_bytes, dsp.dd_used_bytes) ||
1424 GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1425 dd_compressed_bytes, dsp.dd_compressed_bytes) ||
1426 GETMEMB(dd.dd_phys, struct dsl_dir_phys,
1427 dd_uncompressed_bytes, dsp.dd_uncompressed_bytes)) {
1428 return (DCMD_ERR);
1429 }
1430
1431 mdb_printf("dd_space_towrite = %llu%s %llu%s %llu%s %llu%s\n",
1432 dd.dd_space_towrite[0] >> shift, suffix,
1433 dd.dd_space_towrite[1] >> shift, suffix,
1434 dd.dd_space_towrite[2] >> shift, suffix,
1435 dd.dd_space_towrite[3] >> shift, suffix);
1436
1437 mdb_printf("dd_phys.dd_used_bytes = %llu%s\n",
1438 dsp.dd_used_bytes >> shift, suffix);
1439 mdb_printf("dd_phys.dd_compressed_bytes = %llu%s\n",
1440 dsp.dd_compressed_bytes >> shift, suffix);
1441 mdb_printf("dd_phys.dd_uncompressed_bytes = %llu%s\n",
1442 dsp.dd_uncompressed_bytes >> shift, suffix);
1443
1444 bzero(&sd, sizeof (sd));
1445 if (mdb_pwalk("metaslab", space_cb, &sd, addr) != 0) {
1446 mdb_warn("can't walk metaslabs");
1447 return (DCMD_ERR);
1448 }
1449
1450 mdb_printf("ms_allocmap = %llu%s %llu%s %llu%s %llu%s\n",
1451 sd.ms_allocmap[0] >> shift, suffix,
1452 sd.ms_allocmap[1] >> shift, suffix,
1453 sd.ms_allocmap[2] >> shift, suffix,
1454 sd.ms_allocmap[3] >> shift, suffix);
1455 mdb_printf("ms_freemap = %llu%s %llu%s %llu%s %llu%s\n",
1456 sd.ms_freemap[0] >> shift, suffix,
1457 sd.ms_freemap[1] >> shift, suffix,
1458 sd.ms_freemap[2] >> shift, suffix,
1459 sd.ms_freemap[3] >> shift, suffix);
1460 mdb_printf("ms_map = %llu%s\n", sd.ms_map >> shift, suffix);
1461 mdb_printf("last synced avail = %llu%s\n", sd.avail >> shift, suffix);
1462 mdb_printf("current syncing avail = %llu%s\n",
1463 sd.nowavail >> shift, suffix);
1464
1465 return (DCMD_OK);
1466 }
1467
1468 /*
1469 * ::spa_verify
1470 *
1471 * Given a spa_t, verify that that the pool is self-consistent.
1472 * Currently, it only checks to make sure that the vdev tree exists.
1473 */
1474 /* ARGSUSED */
1475 static int
1476 spa_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1477 {
1478 spa_t spa;
1479
1480 if (argc != 0 || !(flags & DCMD_ADDRSPEC))
1481 return (DCMD_USAGE);
1482
1483 if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1484 mdb_warn("failed to read spa_t at %p", addr);
1485 return (DCMD_ERR);
1486 }
1487
1488 if (spa.spa_root_vdev == NULL) {
1489 mdb_printf("no vdev tree present\n");
1490 return (DCMD_OK);
1491 }
1492
1493 return (DCMD_OK);
1494 }
1495
1496 static int
1497 spa_print_aux(spa_aux_vdev_t *sav, uint_t flags, mdb_arg_t *v,
1498 const char *name)
1499 {
1500 uintptr_t *aux;
1501 size_t len;
1502 int ret, i;
1503
1504 /*
1505 * Iterate over aux vdevs and print those out as well. This is a
1506 * little annoying because we don't have a root vdev to pass to ::vdev.
1507 * Instead, we print a single line and then call it for each child
1508 * vdev.
1509 */
1510 if (sav->sav_count != 0) {
1511 v[1].a_type = MDB_TYPE_STRING;
1512 v[1].a_un.a_str = "-d";
1513 v[2].a_type = MDB_TYPE_IMMEDIATE;
1514 v[2].a_un.a_val = 2;
1515
1516 len = sav->sav_count * sizeof (uintptr_t);
1517 aux = mdb_alloc(len, UM_SLEEP);
1518 if (mdb_vread(aux, len,
1519 (uintptr_t)sav->sav_vdevs) == -1) {
1520 mdb_free(aux, len);
1521 mdb_warn("failed to read l2cache vdevs at %p",
1522 sav->sav_vdevs);
1523 return (DCMD_ERR);
1524 }
1525
1526 mdb_printf("%-?s %-9s %-12s %s\n", "-", "-", "-", name);
1527
1528 for (i = 0; i < sav->sav_count; i++) {
1529 ret = mdb_call_dcmd("vdev", aux[i], flags, 3, v);
1530 if (ret != DCMD_OK) {
1531 mdb_free(aux, len);
1532 return (ret);
1533 }
1534 }
1535
1536 mdb_free(aux, len);
1537 }
1538
1539 return (0);
1540 }
1541
1542 /*
1543 * ::spa_vdevs
1544 *
1545 * -e Include error stats
1546 *
1547 * Print out a summarized list of vdevs for the given spa_t.
1548 * This is accomplished by invoking "::vdev -re" on the root vdev, as well as
1549 * iterating over the cache devices.
1550 */
1551 /* ARGSUSED */
1552 static int
1553 spa_vdevs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1554 {
1555 spa_t spa;
1556 mdb_arg_t v[3];
1557 int errors = FALSE;
1558 int ret;
1559
1560 if (mdb_getopts(argc, argv,
1561 'e', MDB_OPT_SETBITS, TRUE, &errors,
1562 NULL) != argc)
1563 return (DCMD_USAGE);
1564
1565 if (!(flags & DCMD_ADDRSPEC))
1566 return (DCMD_USAGE);
1567
1568 if (mdb_vread(&spa, sizeof (spa), addr) == -1) {
1569 mdb_warn("failed to read spa_t at %p", addr);
1570 return (DCMD_ERR);
1571 }
1572
1573 /*
1574 * Unitialized spa_t structures can have a NULL root vdev.
1575 */
1576 if (spa.spa_root_vdev == NULL) {
1577 mdb_printf("no associated vdevs\n");
1578 return (DCMD_OK);
1579 }
1580
1581 v[0].a_type = MDB_TYPE_STRING;
1582 v[0].a_un.a_str = errors ? "-re" : "-r";
1583
1584 ret = mdb_call_dcmd("vdev", (uintptr_t)spa.spa_root_vdev,
1585 flags, 1, v);
1586 if (ret != DCMD_OK)
1587 return (ret);
1588
1589 if (spa_print_aux(&spa.spa_l2cache, flags, v, "cache") != 0 ||
1590 spa_print_aux(&spa.spa_spares, flags, v, "spares") != 0)
1591 return (DCMD_ERR);
1592
1593 return (DCMD_OK);
1594 }
1595
1596 /*
1597 * ::zio
1598 *
1599 * Print a summary of zio_t and all its children. This is intended to display a
1600 * zio tree, and hence we only pick the most important pieces of information for
1601 * the main summary. More detailed information can always be found by doing a
1602 * '::print zio' on the underlying zio_t. The columns we display are:
1603 *
1604 * ADDRESS TYPE STAGE WAITER
1605 *
1606 * The 'address' column is indented by one space for each depth level as we
1607 * descend down the tree.
1608 */
1609
1610 #define ZIO_MAXINDENT 24
1611 #define ZIO_MAXWIDTH (sizeof (uintptr_t) * 2 + ZIO_MAXINDENT)
1612 #define ZIO_WALK_SELF 0
1613 #define ZIO_WALK_CHILD 1
1614 #define ZIO_WALK_PARENT 2
1615
1616 typedef struct zio_print_args {
1617 int zpa_current_depth;
1618 int zpa_min_depth;
1619 int zpa_max_depth;
1620 int zpa_type;
1621 uint_t zpa_flags;
1622 } zio_print_args_t;
1623
1624 static int zio_child_cb(uintptr_t addr, const void *unknown, void *arg);
1625
1626 static int
1627 zio_print_cb(uintptr_t addr, const void *data, void *priv)
1628 {
1629 const zio_t *zio = data;
1630 zio_print_args_t *zpa = priv;
1631 mdb_ctf_id_t type_enum, stage_enum;
1632 int indent = zpa->zpa_current_depth;
1633 const char *type, *stage;
1634 uintptr_t laddr;
1635
1636 if (indent > ZIO_MAXINDENT)
1637 indent = ZIO_MAXINDENT;
1638
1639 if (mdb_ctf_lookup_by_name("enum zio_type", &type_enum) == -1 ||
1640 mdb_ctf_lookup_by_name("enum zio_stage", &stage_enum) == -1) {
1641 mdb_warn("failed to lookup zio enums");
1642 return (WALK_ERR);
1643 }
1644
1645 if ((type = mdb_ctf_enum_name(type_enum, zio->io_type)) != NULL)
1646 type += sizeof ("ZIO_TYPE_") - 1;
1647 else
1648 type = "?";
1649
1650 if ((stage = mdb_ctf_enum_name(stage_enum, zio->io_stage)) != NULL)
1651 stage += sizeof ("ZIO_STAGE_") - 1;
1652 else
1653 stage = "?";
1654
1655 if (zpa->zpa_current_depth >= zpa->zpa_min_depth) {
1656 if (zpa->zpa_flags & DCMD_PIPE_OUT) {
1657 mdb_printf("%?p\n", addr);
1658 } else {
1659 mdb_printf("%*s%-*p %-5s %-16s ", indent, "",
1660 ZIO_MAXWIDTH - indent, addr, type, stage);
1661 if (zio->io_waiter)
1662 mdb_printf("%?p\n", zio->io_waiter);
1663 else
1664 mdb_printf("-\n");
1665 }
1666 }
1667
1668 if (zpa->zpa_current_depth >= zpa->zpa_max_depth)
1669 return (WALK_NEXT);
1670
1671 if (zpa->zpa_type == ZIO_WALK_PARENT)
1672 laddr = addr + OFFSETOF(zio_t, io_parent_list);
1673 else
1674 laddr = addr + OFFSETOF(zio_t, io_child_list);
1675
1676 zpa->zpa_current_depth++;
1677 if (mdb_pwalk("list", zio_child_cb, zpa, laddr) != 0) {
1678 mdb_warn("failed to walk zio_t children at %p\n", laddr);
1679 return (WALK_ERR);
1680 }
1681 zpa->zpa_current_depth--;
1682
1683 return (WALK_NEXT);
1684 }
1685
1686 /* ARGSUSED */
1687 static int
1688 zio_child_cb(uintptr_t addr, const void *unknown, void *arg)
1689 {
1690 zio_link_t zl;
1691 zio_t zio;
1692 uintptr_t ziop;
1693 zio_print_args_t *zpa = arg;
1694
1695 if (mdb_vread(&zl, sizeof (zl), addr) == -1) {
1696 mdb_warn("failed to read zio_link_t at %p", addr);
1697 return (WALK_ERR);
1698 }
1699
1700 if (zpa->zpa_type == ZIO_WALK_PARENT)
1701 ziop = (uintptr_t)zl.zl_parent;
1702 else
1703 ziop = (uintptr_t)zl.zl_child;
1704
1705 if (mdb_vread(&zio, sizeof (zio_t), ziop) == -1) {
1706 mdb_warn("failed to read zio_t at %p", ziop);
1707 return (WALK_ERR);
1708 }
1709
1710 return (zio_print_cb(ziop, &zio, arg));
1711 }
1712
1713 /* ARGSUSED */
1714 static int
1715 zio_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1716 {
1717 zio_t zio;
1718 zio_print_args_t zpa = { 0 };
1719
1720 if (!(flags & DCMD_ADDRSPEC))
1721 return (DCMD_USAGE);
1722
1723 if (mdb_getopts(argc, argv,
1724 'r', MDB_OPT_SETBITS, INT_MAX, &zpa.zpa_max_depth,
1725 'c', MDB_OPT_SETBITS, ZIO_WALK_CHILD, &zpa.zpa_type,
1726 'p', MDB_OPT_SETBITS, ZIO_WALK_PARENT, &zpa.zpa_type,
1727 NULL) != argc)
1728 return (DCMD_USAGE);
1729
1730 zpa.zpa_flags = flags;
1731 if (zpa.zpa_max_depth != 0) {
1732 if (zpa.zpa_type == ZIO_WALK_SELF)
1733 zpa.zpa_type = ZIO_WALK_CHILD;
1734 } else if (zpa.zpa_type != ZIO_WALK_SELF) {
1735 zpa.zpa_min_depth = 1;
1736 zpa.zpa_max_depth = 1;
1737 }
1738
1739 if (mdb_vread(&zio, sizeof (zio_t), addr) == -1) {
1740 mdb_warn("failed to read zio_t at %p", addr);
1741 return (DCMD_ERR);
1742 }
1743
1744 if (!(flags & DCMD_PIPE_OUT) && DCMD_HDRSPEC(flags))
1745 mdb_printf("%<u>%-*s %-5s %-16s %-?s%</u>\n", ZIO_MAXWIDTH,
1746 "ADDRESS", "TYPE", "STAGE", "WAITER");
1747
1748 if (zio_print_cb(addr, &zio, &zpa) != WALK_NEXT)
1749 return (DCMD_ERR);
1750
1751 return (DCMD_OK);
1752 }
1753
1754 /*
1755 * [addr]::zio_state
1756 *
1757 * Print a summary of all zio_t structures on the system, or for a particular
1758 * pool. This is equivalent to '::walk zio_root | ::zio'.
1759 */
1760 /*ARGSUSED*/
1761 static int
1762 zio_state(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1763 {
1764 /*
1765 * MDB will remember the last address of the pipeline, so if we don't
1766 * zero this we'll end up trying to walk zio structures for a
1767 * non-existent spa_t.
1768 */
1769 if (!(flags & DCMD_ADDRSPEC))
1770 addr = 0;
1771
1772 return (mdb_pwalk_dcmd("zio_root", "zio", argc, argv, addr));
1773 }
1774
1775 typedef struct txg_list_walk_data {
1776 uintptr_t lw_head[TXG_SIZE];
1777 int lw_txgoff;
1778 int lw_maxoff;
1779 size_t lw_offset;
1780 void *lw_obj;
1781 } txg_list_walk_data_t;
1782
1783 static int
1784 txg_list_walk_init_common(mdb_walk_state_t *wsp, int txg, int maxoff)
1785 {
1786 txg_list_walk_data_t *lwd;
1787 txg_list_t list;
1788 int i;
1789
1790 lwd = mdb_alloc(sizeof (txg_list_walk_data_t), UM_SLEEP | UM_GC);
1791 if (mdb_vread(&list, sizeof (txg_list_t), wsp->walk_addr) == -1) {
1792 mdb_warn("failed to read txg_list_t at %#lx", wsp->walk_addr);
1793 return (WALK_ERR);
1794 }
1795
1796 for (i = 0; i < TXG_SIZE; i++)
1797 lwd->lw_head[i] = (uintptr_t)list.tl_head[i];
1798 lwd->lw_offset = list.tl_offset;
1799 lwd->lw_obj = mdb_alloc(lwd->lw_offset + sizeof (txg_node_t),
1800 UM_SLEEP | UM_GC);
1801 lwd->lw_txgoff = txg;
1802 lwd->lw_maxoff = maxoff;
1803
1804 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1805 wsp->walk_data = lwd;
1806
1807 return (WALK_NEXT);
1808 }
1809
1810 static int
1811 txg_list_walk_init(mdb_walk_state_t *wsp)
1812 {
1813 return (txg_list_walk_init_common(wsp, 0, TXG_SIZE-1));
1814 }
1815
1816 static int
1817 txg_list0_walk_init(mdb_walk_state_t *wsp)
1818 {
1819 return (txg_list_walk_init_common(wsp, 0, 0));
1820 }
1821
1822 static int
1823 txg_list1_walk_init(mdb_walk_state_t *wsp)
1824 {
1825 return (txg_list_walk_init_common(wsp, 1, 1));
1826 }
1827
1828 static int
1829 txg_list2_walk_init(mdb_walk_state_t *wsp)
1830 {
1831 return (txg_list_walk_init_common(wsp, 2, 2));
1832 }
1833
1834 static int
1835 txg_list3_walk_init(mdb_walk_state_t *wsp)
1836 {
1837 return (txg_list_walk_init_common(wsp, 3, 3));
1838 }
1839
1840 static int
1841 txg_list_walk_step(mdb_walk_state_t *wsp)
1842 {
1843 txg_list_walk_data_t *lwd = wsp->walk_data;
1844 uintptr_t addr;
1845 txg_node_t *node;
1846 int status;
1847
1848 while (wsp->walk_addr == NULL && lwd->lw_txgoff < lwd->lw_maxoff) {
1849 lwd->lw_txgoff++;
1850 wsp->walk_addr = lwd->lw_head[lwd->lw_txgoff];
1851 }
1852
1853 if (wsp->walk_addr == NULL)
1854 return (WALK_DONE);
1855
1856 addr = wsp->walk_addr - lwd->lw_offset;
1857
1858 if (mdb_vread(lwd->lw_obj,
1859 lwd->lw_offset + sizeof (txg_node_t), addr) == -1) {
1860 mdb_warn("failed to read list element at %#lx", addr);
1861 return (WALK_ERR);
1862 }
1863
1864 status = wsp->walk_callback(addr, lwd->lw_obj, wsp->walk_cbdata);
1865 node = (txg_node_t *)((uintptr_t)lwd->lw_obj + lwd->lw_offset);
1866 wsp->walk_addr = (uintptr_t)node->tn_next[lwd->lw_txgoff];
1867
1868 return (status);
1869 }
1870
1871 /*
1872 * ::walk spa
1873 *
1874 * Walk all named spa_t structures in the namespace. This is nothing more than
1875 * a layered avl walk.
1876 */
1877 static int
1878 spa_walk_init(mdb_walk_state_t *wsp)
1879 {
1880 GElf_Sym sym;
1881
1882 if (wsp->walk_addr != NULL) {
1883 mdb_warn("spa walk only supports global walks\n");
1884 return (WALK_ERR);
1885 }
1886
1887 if (mdb_lookup_by_obj(ZFS_OBJ_NAME, "spa_namespace_avl", &sym) == -1) {
1888 mdb_warn("failed to find symbol 'spa_namespace_avl'");
1889 return (WALK_ERR);
1890 }
1891
1892 wsp->walk_addr = (uintptr_t)sym.st_value;
1893
1894 if (mdb_layered_walk("avl", wsp) == -1) {
1895 mdb_warn("failed to walk 'avl'\n");
1896 return (WALK_ERR);
1897 }
1898
1899 return (WALK_NEXT);
1900 }
1901
1902 static int
1903 spa_walk_step(mdb_walk_state_t *wsp)
1904 {
1905 spa_t spa;
1906
1907 if (mdb_vread(&spa, sizeof (spa), wsp->walk_addr) == -1) {
1908 mdb_warn("failed to read spa_t at %p", wsp->walk_addr);
1909 return (WALK_ERR);
1910 }
1911
1912 return (wsp->walk_callback(wsp->walk_addr, &spa, wsp->walk_cbdata));
1913 }
1914
1915 /*
1916 * [addr]::walk zio
1917 *
1918 * Walk all active zio_t structures on the system. This is simply a layered
1919 * walk on top of ::walk zio_cache, with the optional ability to limit the
1920 * structures to a particular pool.
1921 */
1922 static int
1923 zio_walk_init(mdb_walk_state_t *wsp)
1924 {
1925 wsp->walk_data = (void *)wsp->walk_addr;
1926
1927 if (mdb_layered_walk("zio_cache", wsp) == -1) {
1928 mdb_warn("failed to walk 'zio_cache'\n");
1929 return (WALK_ERR);
1930 }
1931
1932 return (WALK_NEXT);
1933 }
1934
1935 static int
1936 zio_walk_step(mdb_walk_state_t *wsp)
1937 {
1938 zio_t zio;
1939
1940 if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
1941 mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
1942 return (WALK_ERR);
1943 }
1944
1945 if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
1946 return (WALK_NEXT);
1947
1948 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
1949 }
1950
1951 /*
1952 * [addr]::walk zio_root
1953 *
1954 * Walk only root zio_t structures, optionally for a particular spa_t.
1955 */
1956 static int
1957 zio_walk_root_step(mdb_walk_state_t *wsp)
1958 {
1959 zio_t zio;
1960
1961 if (mdb_vread(&zio, sizeof (zio), wsp->walk_addr) == -1) {
1962 mdb_warn("failed to read zio_t at %p", wsp->walk_addr);
1963 return (WALK_ERR);
1964 }
1965
1966 if (wsp->walk_data != NULL && wsp->walk_data != zio.io_spa)
1967 return (WALK_NEXT);
1968
1969 /* If the parent list is not empty, ignore */
1970 if (zio.io_parent_list.list_head.list_next !=
1971 &((zio_t *)wsp->walk_addr)->io_parent_list.list_head)
1972 return (WALK_NEXT);
1973
1974 return (wsp->walk_callback(wsp->walk_addr, &zio, wsp->walk_cbdata));
1975 }
1976
1977 #define NICENUM_BUFLEN 6
1978
1979 static int
1980 snprintfrac(char *buf, int len,
1981 uint64_t numerator, uint64_t denom, int frac_digits)
1982 {
1983 int mul = 1;
1984 int whole, frac, i;
1985
1986 for (i = frac_digits; i; i--)
1987 mul *= 10;
1988 whole = numerator / denom;
1989 frac = mul * numerator / denom - mul * whole;
1990 return (mdb_snprintf(buf, len, "%u.%0*u", whole, frac_digits, frac));
1991 }
1992
1993 static void
1994 mdb_nicenum(uint64_t num, char *buf)
1995 {
1996 uint64_t n = num;
1997 int index = 0;
1998 char *u;
1999
2000 while (n >= 1024) {
2001 n = (n + (1024 / 2)) / 1024; /* Round up or down */
2002 index++;
2003 }
2004
2005 u = &" \0K\0M\0G\0T\0P\0E\0"[index*2];
2006
2007 if (index == 0) {
2008 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu",
2009 (u_longlong_t)n);
2010 } else if (n < 10 && (num & (num - 1)) != 0) {
2011 (void) snprintfrac(buf, NICENUM_BUFLEN,
2012 num, 1ULL << 10 * index, 2);
2013 strcat(buf, u);
2014 } else if (n < 100 && (num & (num - 1)) != 0) {
2015 (void) snprintfrac(buf, NICENUM_BUFLEN,
2016 num, 1ULL << 10 * index, 1);
2017 strcat(buf, u);
2018 } else {
2019 (void) mdb_snprintf(buf, NICENUM_BUFLEN, "%llu%s",
2020 (u_longlong_t)n, u);
2021 }
2022 }
2023
2024 /*
2025 * ::zfs_blkstats
2026 *
2027 * -v print verbose per-level information
2028 *
2029 */
2030 static int
2031 zfs_blkstats(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2032 {
2033 boolean_t verbose = B_FALSE;
2034 zfs_all_blkstats_t stats;
2035 dmu_object_type_t t;
2036 zfs_blkstat_t *tzb;
2037 uint64_t ditto;
2038 dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES + 10];
2039 /* +10 in case it grew */
2040
2041 if (mdb_readvar(&dmu_ot, "dmu_ot") == -1) {
2042 mdb_warn("failed to read 'dmu_ot'");
2043 return (DCMD_ERR);
2044 }
2045
2046 if (mdb_getopts(argc, argv,
2047 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2048 NULL) != argc)
2049 return (DCMD_USAGE);
2050
2051 if (!(flags & DCMD_ADDRSPEC))
2052 return (DCMD_USAGE);
2053
2054 if (GETMEMB(addr, struct spa, spa_dsl_pool, addr) ||
2055 GETMEMB(addr, struct dsl_pool, dp_blkstats, addr) ||
2056 mdb_vread(&stats, sizeof (zfs_all_blkstats_t), addr) == -1) {
2057 mdb_warn("failed to read data at %p;", addr);
2058 mdb_printf("maybe no stats? run \"zpool scrub\" first.");
2059 return (DCMD_ERR);
2060 }
2061
2062 tzb = &stats.zab_type[DN_MAX_LEVELS][DMU_OT_NUMTYPES];
2063 if (tzb->zb_gangs != 0) {
2064 mdb_printf("Ganged blocks: %llu\n",
2065 (longlong_t)tzb->zb_gangs);
2066 }
2067
2068 ditto = tzb->zb_ditto_2_of_2_samevdev + tzb->zb_ditto_2_of_3_samevdev +
2069 tzb->zb_ditto_3_of_3_samevdev;
2070 if (ditto != 0) {
2071 mdb_printf("Dittoed blocks on same vdev: %llu\n",
2072 (longlong_t)ditto);
2073 }
2074
2075 mdb_printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
2076 "\t avg\t comp\t%%Total\tType\n");
2077
2078 for (t = 0; t <= DMU_OT_NUMTYPES; t++) {
2079 char csize[NICENUM_BUFLEN], lsize[NICENUM_BUFLEN];
2080 char psize[NICENUM_BUFLEN], asize[NICENUM_BUFLEN];
2081 char avg[NICENUM_BUFLEN];
2082 char comp[NICENUM_BUFLEN], pct[NICENUM_BUFLEN];
2083 char typename[64];
2084 int l;
2085
2086
2087 if (t == DMU_OT_DEFERRED)
2088 strcpy(typename, "deferred free");
2089 else if (t == DMU_OT_TOTAL)
2090 strcpy(typename, "Total");
2091 else if (mdb_readstr(typename, sizeof (typename),
2092 (uintptr_t)dmu_ot[t].ot_name) == -1) {
2093 mdb_warn("failed to read type name");
2094 return (DCMD_ERR);
2095 }
2096
2097 if (stats.zab_type[DN_MAX_LEVELS][t].zb_asize == 0)
2098 continue;
2099
2100 for (l = -1; l < DN_MAX_LEVELS; l++) {
2101 int level = (l == -1 ? DN_MAX_LEVELS : l);
2102 zfs_blkstat_t *zb = &stats.zab_type[level][t];
2103
2104 if (zb->zb_asize == 0)
2105 continue;
2106
2107 /*
2108 * Don't print each level unless requested.
2109 */
2110 if (!verbose && level != DN_MAX_LEVELS)
2111 continue;
2112
2113 /*
2114 * If all the space is level 0, don't print the
2115 * level 0 separately.
2116 */
2117 if (level == 0 && zb->zb_asize ==
2118 stats.zab_type[DN_MAX_LEVELS][t].zb_asize)
2119 continue;
2120
2121 mdb_nicenum(zb->zb_count, csize);
2122 mdb_nicenum(zb->zb_lsize, lsize);
2123 mdb_nicenum(zb->zb_psize, psize);
2124 mdb_nicenum(zb->zb_asize, asize);
2125 mdb_nicenum(zb->zb_asize / zb->zb_count, avg);
2126 (void) snprintfrac(comp, NICENUM_BUFLEN,
2127 zb->zb_lsize, zb->zb_psize, 2);
2128 (void) snprintfrac(pct, NICENUM_BUFLEN,
2129 100 * zb->zb_asize, tzb->zb_asize, 2);
2130
2131 mdb_printf("%6s\t%5s\t%5s\t%5s\t%5s"
2132 "\t%5s\t%6s\t",
2133 csize, lsize, psize, asize, avg, comp, pct);
2134
2135 if (level == DN_MAX_LEVELS)
2136 mdb_printf("%s\n", typename);
2137 else
2138 mdb_printf(" L%d %s\n",
2139 level, typename);
2140 }
2141 }
2142
2143 return (DCMD_OK);
2144 }
2145
2146 /*
2147 * MDB module linkage information:
2148 *
2149 * We declare a list of structures describing our dcmds, and a function
2150 * named _mdb_init to return a pointer to our module information.
2151 */
2152
2153 static const mdb_dcmd_t dcmds[] = {
2154 { "arc", "[-bkmg]", "print ARC variables", arc_print },
2155 { "blkptr", ":", "print blkptr_t", blkptr },
2156 { "dbuf", ":", "print dmu_buf_impl_t", dbuf },
2157 { "dbuf_stats", ":", "dbuf stats", dbuf_stats },
2158 { "dbufs",
2159 "\t[-O objset_impl_t*] [-n objset_name | \"mos\"] "
2160 "[-o object | \"mdn\"] \n"
2161 "\t[-l level] [-b blkid | \"bonus\"]",
2162 "find dmu_buf_impl_t's that match specified criteria", dbufs },
2163 { "abuf_find", "dva_word[0] dva_word[1]",
2164 "find arc_buf_hdr_t of a specified DVA",
2165 abuf_find },
2166 { "spa", "?[-cv]", "spa_t summary", spa_print },
2167 { "spa_config", ":", "print spa_t configuration", spa_print_config },
2168 { "spa_verify", ":", "verify spa_t consistency", spa_verify },
2169 { "spa_space", ":[-b]", "print spa_t on-disk space usage", spa_space },
2170 { "spa_vdevs", ":", "given a spa_t, print vdev summary", spa_vdevs },
2171 { "vdev", ":[-re]\n"
2172 "\t-r display recursively\n"
2173 "\t-e print statistics",
2174 "vdev_t summary", vdev_print },
2175 { "zio", ":[cpr]\n"
2176 "\t-c display children\n"
2177 "\t-p display parents\n"
2178 "\t-r display recursively",
2179 "zio_t summary", zio_print },
2180 { "zio_state", "?", "print out all zio_t structures on system or "
2181 "for a particular pool", zio_state },
2182 { "zio_pipeline", ":", "decode a zio pipeline", zio_pipeline },
2183 { "zfs_blkstats", ":[-v]",
2184 "given a spa_t, print block type stats from last scrub",
2185 zfs_blkstats },
2186 { "zfs_params", "", "print zfs tunable parameters", zfs_params },
2187 { NULL }
2188 };
2189
2190 static const mdb_walker_t walkers[] = {
2191 /*
2192 * In userland, there is no generic provider of list_t walkers, so we
2193 * need to add it.
2194 */
2195 #ifndef _KERNEL
2196 { LIST_WALK_NAME, LIST_WALK_DESC,
2197 list_walk_init, list_walk_step, list_walk_fini },
2198 #endif
2199 { "zms_freelist", "walk ZFS metaslab freelist",
2200 freelist_walk_init, freelist_walk_step, NULL },
2201 { "txg_list", "given any txg_list_t *, walk all entries in all txgs",
2202 txg_list_walk_init, txg_list_walk_step, NULL },
2203 { "txg_list0", "given any txg_list_t *, walk all entries in txg 0",
2204 txg_list0_walk_init, txg_list_walk_step, NULL },
2205 { "txg_list1", "given any txg_list_t *, walk all entries in txg 1",
2206 txg_list1_walk_init, txg_list_walk_step, NULL },
2207 { "txg_list2", "given any txg_list_t *, walk all entries in txg 2",
2208 txg_list2_walk_init, txg_list_walk_step, NULL },
2209 { "txg_list3", "given any txg_list_t *, walk all entries in txg 3",
2210 txg_list3_walk_init, txg_list_walk_step, NULL },
2211 { "zio", "walk all zio structures, optionally for a particular spa_t",
2212 zio_walk_init, zio_walk_step, NULL },
2213 { "zio_root", "walk all root zio_t structures, optionally for a "
2214 "particular spa_t",
2215 zio_walk_init, zio_walk_root_step, NULL },
2216 { "spa", "walk all spa_t entries in the namespace",
2217 spa_walk_init, spa_walk_step, NULL },
2218 { "metaslab", "given a spa_t *, walk all metaslab_t structures",
2219 metaslab_walk_init, metaslab_walk_step, NULL },
2220 { NULL }
2221 };
2222
2223 static const mdb_modinfo_t modinfo = {
2224 MDB_API_VERSION, dcmds, walkers
2225 };
2226
2227 const mdb_modinfo_t *
2228 _mdb_init(void)
2229 {
2230 return (&modinfo);
2231 }