Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/ztest/ztest.c
+++ new/usr/src/cmd/ztest/ztest.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * The objective of this program is to provide a DMU/ZAP/SPA stress test
28 28 * that runs entirely in userland, is easy to use, and easy to extend.
29 29 *
30 30 * The overall design of the ztest program is as follows:
31 31 *
32 32 * (1) For each major functional area (e.g. adding vdevs to a pool,
33 33 * creating and destroying datasets, reading and writing objects, etc)
34 34 * we have a simple routine to test that functionality. These
35 35 * individual routines do not have to do anything "stressful".
36 36 *
37 37 * (2) We turn these simple functionality tests into a stress test by
38 38 * running them all in parallel, with as many threads as desired,
39 39 * and spread across as many datasets, objects, and vdevs as desired.
40 40 *
41 41 * (3) While all this is happening, we inject faults into the pool to
42 42 * verify that self-healing data really works.
43 43 *
44 44 * (4) Every time we open a dataset, we change its checksum and compression
45 45 * functions. Thus even individual objects vary from block to block
46 46 * in which checksum they use and whether they're compressed.
47 47 *
48 48 * (5) To verify that we never lose on-disk consistency after a crash,
49 49 * we run the entire test in a child of the main process.
50 50 * At random times, the child self-immolates with a SIGKILL.
51 51 * This is the software equivalent of pulling the power cord.
52 52 * The parent then runs the test again, using the existing
53 53 * storage pool, as many times as desired.
54 54 *
55 55 * (6) To verify that we don't have future leaks or temporal incursions,
56 56 * many of the functional tests record the transaction group number
57 57 * as part of their data. When reading old data, they verify that
58 58 * the transaction group number is less than the current, open txg.
59 59 * If you add a new test, please do this if applicable.
60 60 *
61 61 * When run with no arguments, ztest runs for about five minutes and
62 62 * produces no output if successful. To get a little bit of information,
63 63 * specify -V. To get more information, specify -VV, and so on.
64 64 *
65 65 * To turn this into an overnight stress test, use -T to specify run time.
66 66 *
67 67 * You can ask more more vdevs [-v], datasets [-d], or threads [-t]
68 68 * to increase the pool capacity, fanout, and overall stress level.
69 69 *
70 70 * The -N(okill) option will suppress kills, so each child runs to completion.
71 71 * This can be useful when you're trying to distinguish temporal incursions
72 72 * from plain old race conditions.
73 73 */
74 74
75 75 #include <sys/zfs_context.h>
76 76 #include <sys/spa.h>
77 77 #include <sys/dmu.h>
78 78 #include <sys/txg.h>
79 79 #include <sys/zap.h>
|
↓ open down ↓ |
79 lines elided |
↑ open up ↑ |
80 80 #include <sys/dmu_objset.h>
81 81 #include <sys/poll.h>
82 82 #include <sys/stat.h>
83 83 #include <sys/time.h>
84 84 #include <sys/wait.h>
85 85 #include <sys/mman.h>
86 86 #include <sys/resource.h>
87 87 #include <sys/zio.h>
88 88 #include <sys/zio_checksum.h>
89 89 #include <sys/zio_compress.h>
90 +#include <sys/zio_crypt.h>
90 91 #include <sys/zil.h>
91 92 #include <sys/vdev_impl.h>
92 93 #include <sys/vdev_file.h>
93 94 #include <sys/spa_impl.h>
94 95 #include <sys/dsl_prop.h>
95 96 #include <sys/refcount.h>
96 97 #include <stdio.h>
97 98 #include <stdio_ext.h>
98 99 #include <stdlib.h>
99 100 #include <unistd.h>
100 101 #include <signal.h>
101 102 #include <umem.h>
102 103 #include <dlfcn.h>
103 104 #include <ctype.h>
104 105 #include <math.h>
105 106 #include <sys/fs/zfs.h>
106 107
107 108 static char cmdname[] = "ztest";
108 109 static char *zopt_pool = cmdname;
109 110
110 111 static uint64_t zopt_vdevs = 5;
111 112 static uint64_t zopt_vdevtime;
112 113 static int zopt_ashift = SPA_MINBLOCKSHIFT;
113 114 static int zopt_mirrors = 2;
114 115 static int zopt_raidz = 4;
115 116 static int zopt_raidz_parity = 1;
116 117 static size_t zopt_vdev_size = SPA_MINDEVSIZE;
117 118 static int zopt_datasets = 7;
118 119 static int zopt_threads = 23;
119 120 static uint64_t zopt_passtime = 60; /* 60 seconds */
120 121 static uint64_t zopt_killrate = 70; /* 70% kill rate */
121 122 static int zopt_verbose = 0;
122 123 static int zopt_init = 1;
123 124 static char *zopt_dir = "/tmp";
124 125 static uint64_t zopt_time = 300; /* 5 minutes */
125 126 static int zopt_maxfaults;
126 127
127 128 typedef struct ztest_block_tag {
128 129 uint64_t bt_objset;
129 130 uint64_t bt_object;
130 131 uint64_t bt_offset;
131 132 uint64_t bt_txg;
132 133 uint64_t bt_thread;
133 134 uint64_t bt_seq;
134 135 } ztest_block_tag_t;
135 136
136 137 typedef struct ztest_args {
137 138 char za_pool[MAXNAMELEN];
138 139 spa_t *za_spa;
139 140 objset_t *za_os;
140 141 zilog_t *za_zilog;
141 142 thread_t za_thread;
142 143 uint64_t za_instance;
143 144 uint64_t za_random;
144 145 uint64_t za_diroff;
145 146 uint64_t za_diroff_shared;
146 147 uint64_t za_zil_seq;
147 148 hrtime_t za_start;
148 149 hrtime_t za_stop;
149 150 hrtime_t za_kill;
150 151 /*
151 152 * Thread-local variables can go here to aid debugging.
152 153 */
153 154 ztest_block_tag_t za_rbt;
154 155 ztest_block_tag_t za_wbt;
155 156 dmu_object_info_t za_doi;
156 157 dmu_buf_t *za_dbuf;
157 158 } ztest_args_t;
158 159
159 160 typedef void ztest_func_t(ztest_args_t *);
160 161
161 162 /*
162 163 * Note: these aren't static because we want dladdr() to work.
163 164 */
164 165 ztest_func_t ztest_dmu_read_write;
165 166 ztest_func_t ztest_dmu_write_parallel;
166 167 ztest_func_t ztest_dmu_object_alloc_free;
167 168 ztest_func_t ztest_zap;
168 169 ztest_func_t ztest_zap_parallel;
169 170 ztest_func_t ztest_traverse;
170 171 ztest_func_t ztest_dsl_prop_get_set;
171 172 ztest_func_t ztest_dmu_objset_create_destroy;
172 173 ztest_func_t ztest_dmu_snapshot_create_destroy;
173 174 ztest_func_t ztest_spa_create_destroy;
174 175 ztest_func_t ztest_fault_inject;
175 176 ztest_func_t ztest_spa_rename;
176 177 ztest_func_t ztest_vdev_attach_detach;
177 178 ztest_func_t ztest_vdev_LUN_growth;
178 179 ztest_func_t ztest_vdev_add_remove;
179 180 ztest_func_t ztest_vdev_aux_add_remove;
180 181 ztest_func_t ztest_scrub;
181 182
182 183 typedef struct ztest_info {
183 184 ztest_func_t *zi_func; /* test function */
184 185 uint64_t zi_iters; /* iterations per execution */
185 186 uint64_t *zi_interval; /* execute every <interval> seconds */
186 187 uint64_t zi_calls; /* per-pass count */
187 188 uint64_t zi_call_time; /* per-pass time */
188 189 uint64_t zi_call_total; /* cumulative total */
189 190 uint64_t zi_call_target; /* target cumulative total */
190 191 } ztest_info_t;
191 192
192 193 uint64_t zopt_always = 0; /* all the time */
193 194 uint64_t zopt_often = 1; /* every second */
194 195 uint64_t zopt_sometimes = 10; /* every 10 seconds */
195 196 uint64_t zopt_rarely = 60; /* every 60 seconds */
196 197
197 198 ztest_info_t ztest_info[] = {
198 199 { ztest_dmu_read_write, 1, &zopt_always },
199 200 { ztest_dmu_write_parallel, 30, &zopt_always },
200 201 { ztest_dmu_object_alloc_free, 1, &zopt_always },
201 202 { ztest_zap, 30, &zopt_always },
202 203 { ztest_zap_parallel, 100, &zopt_always },
203 204 { ztest_dsl_prop_get_set, 1, &zopt_sometimes },
204 205 { ztest_dmu_objset_create_destroy, 1, &zopt_sometimes },
205 206 { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
206 207 { ztest_spa_create_destroy, 1, &zopt_sometimes },
207 208 { ztest_fault_inject, 1, &zopt_sometimes },
208 209 { ztest_spa_rename, 1, &zopt_rarely },
209 210 { ztest_vdev_attach_detach, 1, &zopt_rarely },
210 211 { ztest_vdev_LUN_growth, 1, &zopt_rarely },
211 212 { ztest_vdev_add_remove, 1, &zopt_vdevtime },
212 213 { ztest_vdev_aux_add_remove, 1, &zopt_vdevtime },
213 214 { ztest_scrub, 1, &zopt_vdevtime },
214 215 };
215 216
216 217 #define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
217 218
218 219 #define ZTEST_SYNC_LOCKS 16
219 220
220 221 /*
221 222 * Stuff we need to share writably between parent and child.
222 223 */
223 224 typedef struct ztest_shared {
224 225 mutex_t zs_vdev_lock;
225 226 rwlock_t zs_name_lock;
226 227 uint64_t zs_vdev_primaries;
227 228 uint64_t zs_vdev_aux;
228 229 uint64_t zs_enospc_count;
229 230 hrtime_t zs_start_time;
230 231 hrtime_t zs_stop_time;
231 232 uint64_t zs_alloc;
232 233 uint64_t zs_space;
233 234 ztest_info_t zs_info[ZTEST_FUNCS];
234 235 mutex_t zs_sync_lock[ZTEST_SYNC_LOCKS];
235 236 uint64_t zs_seq[ZTEST_SYNC_LOCKS];
236 237 } ztest_shared_t;
237 238
238 239 static char ztest_dev_template[] = "%s/%s.%llua";
239 240 static char ztest_aux_template[] = "%s/%s.%s.%llu";
240 241 static ztest_shared_t *ztest_shared;
241 242
242 243 static int ztest_random_fd;
243 244 static int ztest_dump_core = 1;
244 245
245 246 static boolean_t ztest_exiting;
246 247
247 248 extern uint64_t metaslab_gang_bang;
248 249
249 250 #define ZTEST_DIROBJ 1
250 251 #define ZTEST_MICROZAP_OBJ 2
251 252 #define ZTEST_FATZAP_OBJ 3
252 253
253 254 #define ZTEST_DIROBJ_BLOCKSIZE (1 << 10)
254 255 #define ZTEST_DIRSIZE 256
255 256
256 257 static void usage(boolean_t) __NORETURN;
257 258
258 259 /*
259 260 * These libumem hooks provide a reasonable set of defaults for the allocator's
260 261 * debugging facilities.
261 262 */
262 263 const char *
263 264 _umem_debug_init()
264 265 {
265 266 return ("default,verbose"); /* $UMEM_DEBUG setting */
266 267 }
267 268
268 269 const char *
269 270 _umem_logging_init(void)
270 271 {
271 272 return ("fail,contents"); /* $UMEM_LOGGING setting */
272 273 }
273 274
274 275 #define FATAL_MSG_SZ 1024
275 276
276 277 char *fatal_msg;
277 278
278 279 static void
279 280 fatal(int do_perror, char *message, ...)
280 281 {
281 282 va_list args;
282 283 int save_errno = errno;
283 284 char buf[FATAL_MSG_SZ];
284 285
285 286 (void) fflush(stdout);
286 287
287 288 va_start(args, message);
288 289 (void) sprintf(buf, "ztest: ");
289 290 /* LINTED */
290 291 (void) vsprintf(buf + strlen(buf), message, args);
291 292 va_end(args);
292 293 if (do_perror) {
293 294 (void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf),
294 295 ": %s", strerror(save_errno));
295 296 }
296 297 (void) fprintf(stderr, "%s\n", buf);
297 298 fatal_msg = buf; /* to ease debugging */
298 299 if (ztest_dump_core)
299 300 abort();
300 301 exit(3);
301 302 }
302 303
303 304 static int
304 305 str2shift(const char *buf)
305 306 {
306 307 const char *ends = "BKMGTPEZ";
307 308 int i;
308 309
309 310 if (buf[0] == '\0')
310 311 return (0);
311 312 for (i = 0; i < strlen(ends); i++) {
312 313 if (toupper(buf[0]) == ends[i])
313 314 break;
314 315 }
315 316 if (i == strlen(ends)) {
316 317 (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n",
317 318 buf);
318 319 usage(B_FALSE);
319 320 }
320 321 if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) {
321 322 return (10*i);
322 323 }
323 324 (void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf);
324 325 usage(B_FALSE);
325 326 /* NOTREACHED */
326 327 }
327 328
328 329 static uint64_t
329 330 nicenumtoull(const char *buf)
330 331 {
331 332 char *end;
332 333 uint64_t val;
333 334
334 335 val = strtoull(buf, &end, 0);
335 336 if (end == buf) {
336 337 (void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf);
337 338 usage(B_FALSE);
338 339 } else if (end[0] == '.') {
339 340 double fval = strtod(buf, &end);
340 341 fval *= pow(2, str2shift(end));
341 342 if (fval > UINT64_MAX) {
342 343 (void) fprintf(stderr, "ztest: value too large: %s\n",
343 344 buf);
344 345 usage(B_FALSE);
345 346 }
346 347 val = (uint64_t)fval;
347 348 } else {
348 349 int shift = str2shift(end);
349 350 if (shift >= 64 || (val << shift) >> shift != val) {
350 351 (void) fprintf(stderr, "ztest: value too large: %s\n",
351 352 buf);
352 353 usage(B_FALSE);
353 354 }
354 355 val <<= shift;
355 356 }
356 357 return (val);
357 358 }
358 359
359 360 static void
360 361 usage(boolean_t requested)
361 362 {
362 363 char nice_vdev_size[10];
363 364 char nice_gang_bang[10];
364 365 FILE *fp = requested ? stdout : stderr;
365 366
366 367 nicenum(zopt_vdev_size, nice_vdev_size);
367 368 nicenum(metaslab_gang_bang, nice_gang_bang);
368 369
369 370 (void) fprintf(fp, "Usage: %s\n"
370 371 "\t[-v vdevs (default: %llu)]\n"
371 372 "\t[-s size_of_each_vdev (default: %s)]\n"
372 373 "\t[-a alignment_shift (default: %d) (use 0 for random)]\n"
373 374 "\t[-m mirror_copies (default: %d)]\n"
374 375 "\t[-r raidz_disks (default: %d)]\n"
375 376 "\t[-R raidz_parity (default: %d)]\n"
376 377 "\t[-d datasets (default: %d)]\n"
377 378 "\t[-t threads (default: %d)]\n"
378 379 "\t[-g gang_block_threshold (default: %s)]\n"
379 380 "\t[-i initialize pool i times (default: %d)]\n"
380 381 "\t[-k kill percentage (default: %llu%%)]\n"
381 382 "\t[-p pool_name (default: %s)]\n"
382 383 "\t[-f file directory for vdev files (default: %s)]\n"
383 384 "\t[-V(erbose)] (use multiple times for ever more blather)\n"
384 385 "\t[-E(xisting)] (use existing pool instead of creating new one)\n"
385 386 "\t[-T time] total run time (default: %llu sec)\n"
386 387 "\t[-P passtime] time per pass (default: %llu sec)\n"
387 388 "\t[-h] (print help)\n"
388 389 "",
389 390 cmdname,
390 391 (u_longlong_t)zopt_vdevs, /* -v */
391 392 nice_vdev_size, /* -s */
392 393 zopt_ashift, /* -a */
393 394 zopt_mirrors, /* -m */
394 395 zopt_raidz, /* -r */
395 396 zopt_raidz_parity, /* -R */
396 397 zopt_datasets, /* -d */
397 398 zopt_threads, /* -t */
398 399 nice_gang_bang, /* -g */
399 400 zopt_init, /* -i */
400 401 (u_longlong_t)zopt_killrate, /* -k */
401 402 zopt_pool, /* -p */
402 403 zopt_dir, /* -f */
403 404 (u_longlong_t)zopt_time, /* -T */
404 405 (u_longlong_t)zopt_passtime); /* -P */
405 406 exit(requested ? 0 : 1);
406 407 }
407 408
408 409 static uint64_t
409 410 ztest_random(uint64_t range)
410 411 {
411 412 uint64_t r;
412 413
413 414 if (range == 0)
414 415 return (0);
415 416
416 417 if (read(ztest_random_fd, &r, sizeof (r)) != sizeof (r))
417 418 fatal(1, "short read from /dev/urandom");
418 419
419 420 return (r % range);
420 421 }
421 422
422 423 /* ARGSUSED */
423 424 static void
424 425 ztest_record_enospc(char *s)
425 426 {
426 427 ztest_shared->zs_enospc_count++;
427 428 }
428 429
429 430 static void
430 431 process_options(int argc, char **argv)
431 432 {
432 433 int opt;
433 434 uint64_t value;
434 435
435 436 /* By default, test gang blocks for blocks 32K and greater */
436 437 metaslab_gang_bang = 32 << 10;
437 438
438 439 while ((opt = getopt(argc, argv,
439 440 "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:h")) != EOF) {
440 441 value = 0;
441 442 switch (opt) {
442 443 case 'v':
443 444 case 's':
444 445 case 'a':
445 446 case 'm':
446 447 case 'r':
447 448 case 'R':
448 449 case 'd':
449 450 case 't':
450 451 case 'g':
451 452 case 'i':
452 453 case 'k':
453 454 case 'T':
454 455 case 'P':
455 456 value = nicenumtoull(optarg);
456 457 }
457 458 switch (opt) {
458 459 case 'v':
459 460 zopt_vdevs = value;
460 461 break;
461 462 case 's':
462 463 zopt_vdev_size = MAX(SPA_MINDEVSIZE, value);
463 464 break;
464 465 case 'a':
465 466 zopt_ashift = value;
466 467 break;
467 468 case 'm':
468 469 zopt_mirrors = value;
469 470 break;
470 471 case 'r':
471 472 zopt_raidz = MAX(1, value);
472 473 break;
473 474 case 'R':
474 475 zopt_raidz_parity = MIN(MAX(value, 1), 2);
475 476 break;
476 477 case 'd':
477 478 zopt_datasets = MAX(1, value);
478 479 break;
479 480 case 't':
480 481 zopt_threads = MAX(1, value);
481 482 break;
482 483 case 'g':
483 484 metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1, value);
484 485 break;
485 486 case 'i':
486 487 zopt_init = value;
487 488 break;
488 489 case 'k':
489 490 zopt_killrate = value;
490 491 break;
491 492 case 'p':
492 493 zopt_pool = strdup(optarg);
493 494 break;
494 495 case 'f':
495 496 zopt_dir = strdup(optarg);
496 497 break;
497 498 case 'V':
498 499 zopt_verbose++;
499 500 break;
500 501 case 'E':
501 502 zopt_init = 0;
502 503 break;
503 504 case 'T':
504 505 zopt_time = value;
505 506 break;
506 507 case 'P':
507 508 zopt_passtime = MAX(1, value);
508 509 break;
509 510 case 'h':
510 511 usage(B_TRUE);
511 512 break;
512 513 case '?':
513 514 default:
514 515 usage(B_FALSE);
515 516 break;
516 517 }
517 518 }
518 519
519 520 zopt_raidz_parity = MIN(zopt_raidz_parity, zopt_raidz - 1);
520 521
521 522 zopt_vdevtime = (zopt_vdevs > 0 ? zopt_time / zopt_vdevs : UINT64_MAX);
522 523 zopt_maxfaults = MAX(zopt_mirrors, 1) * (zopt_raidz_parity + 1) - 1;
523 524 }
524 525
525 526 static uint64_t
526 527 ztest_get_ashift(void)
527 528 {
528 529 if (zopt_ashift == 0)
529 530 return (SPA_MINBLOCKSHIFT + ztest_random(3));
530 531 return (zopt_ashift);
531 532 }
532 533
533 534 static nvlist_t *
534 535 make_vdev_file(char *path, char *aux, size_t size, uint64_t ashift)
535 536 {
536 537 char pathbuf[MAXPATHLEN];
537 538 uint64_t vdev;
538 539 nvlist_t *file;
539 540
540 541 if (ashift == 0)
541 542 ashift = ztest_get_ashift();
542 543
543 544 if (path == NULL) {
544 545 path = pathbuf;
545 546
546 547 if (aux != NULL) {
547 548 vdev = ztest_shared->zs_vdev_aux;
548 549 (void) sprintf(path, ztest_aux_template,
549 550 zopt_dir, zopt_pool, aux, vdev);
550 551 } else {
551 552 vdev = ztest_shared->zs_vdev_primaries++;
552 553 (void) sprintf(path, ztest_dev_template,
553 554 zopt_dir, zopt_pool, vdev);
554 555 }
555 556 }
556 557
557 558 if (size != 0) {
558 559 int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
559 560 if (fd == -1)
560 561 fatal(1, "can't open %s", path);
561 562 if (ftruncate(fd, size) != 0)
562 563 fatal(1, "can't ftruncate %s", path);
563 564 (void) close(fd);
564 565 }
565 566
566 567 VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
567 568 VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
568 569 VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
569 570 VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
570 571
571 572 return (file);
572 573 }
573 574
574 575 static nvlist_t *
575 576 make_vdev_raidz(char *path, char *aux, size_t size, uint64_t ashift, int r)
576 577 {
577 578 nvlist_t *raidz, **child;
578 579 int c;
579 580
580 581 if (r < 2)
581 582 return (make_vdev_file(path, aux, size, ashift));
582 583 child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
583 584
584 585 for (c = 0; c < r; c++)
585 586 child[c] = make_vdev_file(path, aux, size, ashift);
586 587
587 588 VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
588 589 VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
589 590 VDEV_TYPE_RAIDZ) == 0);
590 591 VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
591 592 zopt_raidz_parity) == 0);
592 593 VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
593 594 child, r) == 0);
594 595
595 596 for (c = 0; c < r; c++)
596 597 nvlist_free(child[c]);
597 598
598 599 umem_free(child, r * sizeof (nvlist_t *));
599 600
600 601 return (raidz);
601 602 }
602 603
603 604 static nvlist_t *
604 605 make_vdev_mirror(char *path, char *aux, size_t size, uint64_t ashift,
605 606 int r, int m)
606 607 {
607 608 nvlist_t *mirror, **child;
608 609 int c;
609 610
610 611 if (m < 1)
611 612 return (make_vdev_raidz(path, aux, size, ashift, r));
612 613
613 614 child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
614 615
615 616 for (c = 0; c < m; c++)
616 617 child[c] = make_vdev_raidz(path, aux, size, ashift, r);
617 618
618 619 VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
619 620 VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
620 621 VDEV_TYPE_MIRROR) == 0);
621 622 VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
622 623 child, m) == 0);
623 624
624 625 for (c = 0; c < m; c++)
625 626 nvlist_free(child[c]);
626 627
627 628 umem_free(child, m * sizeof (nvlist_t *));
628 629
629 630 return (mirror);
630 631 }
631 632
632 633 static nvlist_t *
633 634 make_vdev_root(char *path, char *aux, size_t size, uint64_t ashift,
634 635 int log, int r, int m, int t)
635 636 {
636 637 nvlist_t *root, **child;
637 638 int c;
638 639
639 640 ASSERT(t > 0);
640 641
641 642 child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
642 643
643 644 for (c = 0; c < t; c++) {
644 645 child[c] = make_vdev_mirror(path, aux, size, ashift, r, m);
645 646 VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
646 647 log) == 0);
647 648 }
648 649
649 650 VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
650 651 VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
651 652 VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
652 653 child, t) == 0);
653 654
654 655 for (c = 0; c < t; c++)
655 656 nvlist_free(child[c]);
656 657
657 658 umem_free(child, t * sizeof (nvlist_t *));
658 659
659 660 return (root);
660 661 }
661 662
662 663 static void
663 664 ztest_set_random_blocksize(objset_t *os, uint64_t object, dmu_tx_t *tx)
664 665 {
665 666 int bs = SPA_MINBLOCKSHIFT +
666 667 ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1);
667 668 int ibs = DN_MIN_INDBLKSHIFT +
668 669 ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1);
669 670 int error;
670 671
671 672 error = dmu_object_set_blocksize(os, object, 1ULL << bs, ibs, tx);
672 673 if (error) {
673 674 char osname[300];
674 675 dmu_objset_name(os, osname);
675 676 fatal(0, "dmu_object_set_blocksize('%s', %llu, %d, %d) = %d",
676 677 osname, object, 1 << bs, ibs, error);
677 678 }
678 679 }
679 680
680 681 static uint8_t
681 682 ztest_random_checksum(void)
682 683 {
683 684 uint8_t checksum;
684 685
685 686 do {
686 687 checksum = ztest_random(ZIO_CHECKSUM_FUNCTIONS);
687 688 } while (zio_checksum_table[checksum].ci_zbt);
688 689
689 690 if (checksum == ZIO_CHECKSUM_OFF)
690 691 checksum = ZIO_CHECKSUM_ON;
|
↓ open down ↓ |
591 lines elided |
↑ open up ↑ |
691 692
692 693 return (checksum);
693 694 }
694 695
695 696 static uint8_t
696 697 ztest_random_compress(void)
697 698 {
698 699 return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS));
699 700 }
700 701
702 +static uint8_t
703 +ztest_random_crypt(void)
704 +{
705 + return ((uint8_t)ztest_random(ZIO_CRYPT_FUNCTIONS));
706 +}
707 +
701 708 static int
702 709 ztest_replay_create(objset_t *os, lr_create_t *lr, boolean_t byteswap)
703 710 {
704 711 dmu_tx_t *tx;
705 712 int error;
706 713
707 714 if (byteswap)
708 715 byteswap_uint64_array(lr, sizeof (*lr));
709 716
710 717 tx = dmu_tx_create(os);
711 718 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
712 719 error = dmu_tx_assign(tx, TXG_WAIT);
713 720 if (error) {
714 721 dmu_tx_abort(tx);
715 722 return (error);
716 723 }
717 724
718 725 error = dmu_object_claim(os, lr->lr_doid, lr->lr_mode, 0,
719 726 DMU_OT_NONE, 0, tx);
720 727 ASSERT3U(error, ==, 0);
721 728 dmu_tx_commit(tx);
722 729
723 730 if (zopt_verbose >= 5) {
724 731 char osname[MAXNAMELEN];
725 732 dmu_objset_name(os, osname);
726 733 (void) printf("replay create of %s object %llu"
727 734 " in txg %llu = %d\n",
728 735 osname, (u_longlong_t)lr->lr_doid,
729 736 (u_longlong_t)dmu_tx_get_txg(tx), error);
730 737 }
731 738
732 739 return (error);
733 740 }
734 741
735 742 static int
736 743 ztest_replay_remove(objset_t *os, lr_remove_t *lr, boolean_t byteswap)
737 744 {
738 745 dmu_tx_t *tx;
739 746 int error;
740 747
741 748 if (byteswap)
742 749 byteswap_uint64_array(lr, sizeof (*lr));
743 750
744 751 tx = dmu_tx_create(os);
745 752 dmu_tx_hold_free(tx, lr->lr_doid, 0, DMU_OBJECT_END);
746 753 error = dmu_tx_assign(tx, TXG_WAIT);
747 754 if (error) {
748 755 dmu_tx_abort(tx);
749 756 return (error);
750 757 }
751 758
752 759 error = dmu_object_free(os, lr->lr_doid, tx);
753 760 dmu_tx_commit(tx);
754 761
755 762 return (error);
756 763 }
757 764
758 765 zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
759 766 NULL, /* 0 no such transaction type */
760 767 ztest_replay_create, /* TX_CREATE */
761 768 NULL, /* TX_MKDIR */
762 769 NULL, /* TX_MKXATTR */
763 770 NULL, /* TX_SYMLINK */
764 771 ztest_replay_remove, /* TX_REMOVE */
765 772 NULL, /* TX_RMDIR */
766 773 NULL, /* TX_LINK */
767 774 NULL, /* TX_RENAME */
768 775 NULL, /* TX_WRITE */
769 776 NULL, /* TX_TRUNCATE */
770 777 NULL, /* TX_SETATTR */
771 778 NULL, /* TX_ACL */
772 779 };
773 780
774 781 /*
775 782 * Verify that we can't destroy an active pool, create an existing pool,
776 783 * or create a pool with a bad vdev spec.
777 784 */
778 785 void
779 786 ztest_spa_create_destroy(ztest_args_t *za)
780 787 {
781 788 int error;
782 789 spa_t *spa;
783 790 nvlist_t *nvroot;
784 791
785 792 /*
786 793 * Attempt to create using a bad file.
787 794 */
788 795 nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
789 796 error = spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL);
790 797 nvlist_free(nvroot);
791 798 if (error != ENOENT)
792 799 fatal(0, "spa_create(bad_file) = %d", error);
793 800
794 801 /*
795 802 * Attempt to create using a bad mirror.
796 803 */
797 804 nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 2, 1);
798 805 error = spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL);
799 806 nvlist_free(nvroot);
800 807 if (error != ENOENT)
801 808 fatal(0, "spa_create(bad_mirror) = %d", error);
802 809
803 810 /*
804 811 * Attempt to create an existing pool. It shouldn't matter
805 812 * what's in the nvroot; we should fail with EEXIST.
806 813 */
807 814 (void) rw_rdlock(&ztest_shared->zs_name_lock);
808 815 nvroot = make_vdev_root("/dev/bogus", NULL, 0, 0, 0, 0, 0, 1);
809 816 error = spa_create(za->za_pool, nvroot, NULL, NULL, NULL);
810 817 nvlist_free(nvroot);
811 818 if (error != EEXIST)
812 819 fatal(0, "spa_create(whatever) = %d", error);
813 820
814 821 error = spa_open(za->za_pool, &spa, FTAG);
815 822 if (error)
816 823 fatal(0, "spa_open() = %d", error);
817 824
818 825 error = spa_destroy(za->za_pool);
819 826 if (error != EBUSY)
820 827 fatal(0, "spa_destroy() = %d", error);
821 828
822 829 spa_close(spa, FTAG);
823 830 (void) rw_unlock(&ztest_shared->zs_name_lock);
824 831 }
825 832
826 833 static vdev_t *
827 834 vdev_lookup_by_path(vdev_t *vd, const char *path)
828 835 {
829 836 vdev_t *mvd;
830 837
831 838 if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
832 839 return (vd);
833 840
834 841 for (int c = 0; c < vd->vdev_children; c++)
835 842 if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
836 843 NULL)
837 844 return (mvd);
838 845
839 846 return (NULL);
840 847 }
841 848
842 849 /*
843 850 * Verify that vdev_add() works as expected.
844 851 */
845 852 void
846 853 ztest_vdev_add_remove(ztest_args_t *za)
847 854 {
848 855 spa_t *spa = za->za_spa;
849 856 uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
850 857 nvlist_t *nvroot;
851 858 int error;
852 859
853 860 (void) mutex_lock(&ztest_shared->zs_vdev_lock);
854 861
855 862 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
856 863
857 864 ztest_shared->zs_vdev_primaries =
858 865 spa->spa_root_vdev->vdev_children * leaves;
859 866
860 867 spa_config_exit(spa, SCL_VDEV, FTAG);
861 868
862 869 /*
863 870 * Make 1/4 of the devices be log devices.
864 871 */
865 872 nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
866 873 ztest_random(4) == 0, zopt_raidz, zopt_mirrors, 1);
867 874
868 875 error = spa_vdev_add(spa, nvroot);
869 876 nvlist_free(nvroot);
870 877
871 878 (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
872 879
873 880 if (error == ENOSPC)
874 881 ztest_record_enospc("spa_vdev_add");
875 882 else if (error != 0)
876 883 fatal(0, "spa_vdev_add() = %d", error);
877 884 }
878 885
879 886 /*
880 887 * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
881 888 */
882 889 void
883 890 ztest_vdev_aux_add_remove(ztest_args_t *za)
884 891 {
885 892 spa_t *spa = za->za_spa;
886 893 vdev_t *rvd = spa->spa_root_vdev;
887 894 spa_aux_vdev_t *sav;
888 895 char *aux;
889 896 uint64_t guid = 0;
890 897 int error;
891 898
892 899 if (ztest_random(2) == 0) {
893 900 sav = &spa->spa_spares;
894 901 aux = ZPOOL_CONFIG_SPARES;
895 902 } else {
896 903 sav = &spa->spa_l2cache;
897 904 aux = ZPOOL_CONFIG_L2CACHE;
898 905 }
899 906
900 907 (void) mutex_lock(&ztest_shared->zs_vdev_lock);
901 908
902 909 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
903 910
904 911 if (sav->sav_count != 0 && ztest_random(4) == 0) {
905 912 /*
906 913 * Pick a random device to remove.
907 914 */
908 915 guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
909 916 } else {
910 917 /*
911 918 * Find an unused device we can add.
912 919 */
913 920 ztest_shared->zs_vdev_aux = 0;
914 921 for (;;) {
915 922 char path[MAXPATHLEN];
916 923 int c;
917 924 (void) sprintf(path, ztest_aux_template, zopt_dir,
918 925 zopt_pool, aux, ztest_shared->zs_vdev_aux);
919 926 for (c = 0; c < sav->sav_count; c++)
920 927 if (strcmp(sav->sav_vdevs[c]->vdev_path,
921 928 path) == 0)
922 929 break;
923 930 if (c == sav->sav_count &&
924 931 vdev_lookup_by_path(rvd, path) == NULL)
925 932 break;
926 933 ztest_shared->zs_vdev_aux++;
927 934 }
928 935 }
929 936
930 937 spa_config_exit(spa, SCL_VDEV, FTAG);
931 938
932 939 if (guid == 0) {
933 940 /*
934 941 * Add a new device.
935 942 */
936 943 nvlist_t *nvroot = make_vdev_root(NULL, aux,
937 944 (zopt_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
938 945 error = spa_vdev_add(spa, nvroot);
939 946 if (error != 0)
940 947 fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
941 948 nvlist_free(nvroot);
942 949 } else {
943 950 /*
944 951 * Remove an existing device. Sometimes, dirty its
945 952 * vdev state first to make sure we handle removal
946 953 * of devices that have pending state changes.
947 954 */
948 955 if (ztest_random(2) == 0)
949 956 (void) vdev_online(spa, guid, B_FALSE, NULL);
950 957
951 958 error = spa_vdev_remove(spa, guid, B_FALSE);
952 959 if (error != 0 && error != EBUSY)
953 960 fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
954 961 }
955 962
956 963 (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
957 964 }
958 965
959 966 /*
960 967 * Verify that we can attach and detach devices.
961 968 */
962 969 void
963 970 ztest_vdev_attach_detach(ztest_args_t *za)
964 971 {
965 972 spa_t *spa = za->za_spa;
966 973 spa_aux_vdev_t *sav = &spa->spa_spares;
967 974 vdev_t *rvd = spa->spa_root_vdev;
968 975 vdev_t *oldvd, *newvd, *pvd;
969 976 nvlist_t *root;
970 977 uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
971 978 uint64_t leaf, top;
972 979 uint64_t ashift = ztest_get_ashift();
973 980 uint64_t oldguid, pguid;
974 981 size_t oldsize, newsize;
975 982 char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
976 983 int replacing;
977 984 int oldvd_has_siblings = B_FALSE;
978 985 int newvd_is_spare = B_FALSE;
979 986 int oldvd_is_log;
980 987 int error, expected_error;
981 988
982 989 (void) mutex_lock(&ztest_shared->zs_vdev_lock);
983 990
984 991 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
985 992
986 993 /*
987 994 * Decide whether to do an attach or a replace.
988 995 */
989 996 replacing = ztest_random(2);
990 997
991 998 /*
992 999 * Pick a random top-level vdev.
993 1000 */
994 1001 top = ztest_random(rvd->vdev_children);
995 1002
996 1003 /*
997 1004 * Pick a random leaf within it.
998 1005 */
999 1006 leaf = ztest_random(leaves);
1000 1007
1001 1008 /*
1002 1009 * Locate this vdev.
1003 1010 */
1004 1011 oldvd = rvd->vdev_child[top];
1005 1012 if (zopt_mirrors >= 1) {
1006 1013 ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
1007 1014 ASSERT(oldvd->vdev_children >= zopt_mirrors);
1008 1015 oldvd = oldvd->vdev_child[leaf / zopt_raidz];
1009 1016 }
1010 1017 if (zopt_raidz > 1) {
1011 1018 ASSERT(oldvd->vdev_ops == &vdev_raidz_ops);
1012 1019 ASSERT(oldvd->vdev_children == zopt_raidz);
1013 1020 oldvd = oldvd->vdev_child[leaf % zopt_raidz];
1014 1021 }
1015 1022
1016 1023 /*
1017 1024 * If we're already doing an attach or replace, oldvd may be a
1018 1025 * mirror vdev -- in which case, pick a random child.
1019 1026 */
1020 1027 while (oldvd->vdev_children != 0) {
1021 1028 oldvd_has_siblings = B_TRUE;
1022 1029 ASSERT(oldvd->vdev_children >= 2);
1023 1030 oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
1024 1031 }
1025 1032
1026 1033 oldguid = oldvd->vdev_guid;
1027 1034 oldsize = vdev_get_rsize(oldvd);
1028 1035 oldvd_is_log = oldvd->vdev_top->vdev_islog;
1029 1036 (void) strcpy(oldpath, oldvd->vdev_path);
1030 1037 pvd = oldvd->vdev_parent;
1031 1038 pguid = pvd->vdev_guid;
1032 1039
1033 1040 /*
1034 1041 * If oldvd has siblings, then half of the time, detach it.
1035 1042 */
1036 1043 if (oldvd_has_siblings && ztest_random(2) == 0) {
1037 1044 spa_config_exit(spa, SCL_VDEV, FTAG);
1038 1045 error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
1039 1046 if (error != 0 && error != ENODEV && error != EBUSY &&
1040 1047 error != ENOTSUP)
1041 1048 fatal(0, "detach (%s) returned %d", oldpath, error);
1042 1049 (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
1043 1050 return;
1044 1051 }
1045 1052
1046 1053 /*
1047 1054 * For the new vdev, choose with equal probability between the two
1048 1055 * standard paths (ending in either 'a' or 'b') or a random hot spare.
1049 1056 */
1050 1057 if (sav->sav_count != 0 && ztest_random(3) == 0) {
1051 1058 newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
1052 1059 newvd_is_spare = B_TRUE;
1053 1060 (void) strcpy(newpath, newvd->vdev_path);
1054 1061 } else {
1055 1062 (void) snprintf(newpath, sizeof (newpath), ztest_dev_template,
1056 1063 zopt_dir, zopt_pool, top * leaves + leaf);
1057 1064 if (ztest_random(2) == 0)
1058 1065 newpath[strlen(newpath) - 1] = 'b';
1059 1066 newvd = vdev_lookup_by_path(rvd, newpath);
1060 1067 }
1061 1068
1062 1069 if (newvd) {
1063 1070 newsize = vdev_get_rsize(newvd);
1064 1071 } else {
1065 1072 /*
1066 1073 * Make newsize a little bigger or smaller than oldsize.
1067 1074 * If it's smaller, the attach should fail.
1068 1075 * If it's larger, and we're doing a replace,
1069 1076 * we should get dynamic LUN growth when we're done.
1070 1077 */
1071 1078 newsize = 10 * oldsize / (9 + ztest_random(3));
1072 1079 }
1073 1080
1074 1081 /*
1075 1082 * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
1076 1083 * unless it's a replace; in that case any non-replacing parent is OK.
1077 1084 *
1078 1085 * If newvd is already part of the pool, it should fail with EBUSY.
1079 1086 *
1080 1087 * If newvd is too small, it should fail with EOVERFLOW.
1081 1088 */
1082 1089 if (pvd->vdev_ops != &vdev_mirror_ops &&
1083 1090 pvd->vdev_ops != &vdev_root_ops && (!replacing ||
1084 1091 pvd->vdev_ops == &vdev_replacing_ops ||
1085 1092 pvd->vdev_ops == &vdev_spare_ops))
1086 1093 expected_error = ENOTSUP;
1087 1094 else if (newvd_is_spare && (!replacing || oldvd_is_log))
1088 1095 expected_error = ENOTSUP;
1089 1096 else if (newvd == oldvd)
1090 1097 expected_error = replacing ? 0 : EBUSY;
1091 1098 else if (vdev_lookup_by_path(rvd, newpath) != NULL)
1092 1099 expected_error = EBUSY;
1093 1100 else if (newsize < oldsize)
1094 1101 expected_error = EOVERFLOW;
1095 1102 else if (ashift > oldvd->vdev_top->vdev_ashift)
1096 1103 expected_error = EDOM;
1097 1104 else
1098 1105 expected_error = 0;
1099 1106
1100 1107 spa_config_exit(spa, SCL_VDEV, FTAG);
1101 1108
1102 1109 /*
1103 1110 * Build the nvlist describing newpath.
1104 1111 */
1105 1112 root = make_vdev_root(newpath, NULL, newvd == NULL ? newsize : 0,
1106 1113 ashift, 0, 0, 0, 1);
1107 1114
1108 1115 error = spa_vdev_attach(spa, oldguid, root, replacing);
1109 1116
1110 1117 nvlist_free(root);
1111 1118
1112 1119 /*
1113 1120 * If our parent was the replacing vdev, but the replace completed,
1114 1121 * then instead of failing with ENOTSUP we may either succeed,
1115 1122 * fail with ENODEV, or fail with EOVERFLOW.
1116 1123 */
1117 1124 if (expected_error == ENOTSUP &&
1118 1125 (error == 0 || error == ENODEV || error == EOVERFLOW))
1119 1126 expected_error = error;
1120 1127
1121 1128 /*
1122 1129 * If someone grew the LUN, the replacement may be too small.
1123 1130 */
1124 1131 if (error == EOVERFLOW || error == EBUSY)
1125 1132 expected_error = error;
1126 1133
1127 1134 /* XXX workaround 6690467 */
1128 1135 if (error != expected_error && expected_error != EBUSY) {
1129 1136 fatal(0, "attach (%s %llu, %s %llu, %d) "
1130 1137 "returned %d, expected %d",
1131 1138 oldpath, (longlong_t)oldsize, newpath,
1132 1139 (longlong_t)newsize, replacing, error, expected_error);
1133 1140 }
1134 1141
1135 1142 (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
1136 1143 }
1137 1144
1138 1145 /*
1139 1146 * Verify that dynamic LUN growth works as expected.
1140 1147 */
1141 1148 void
1142 1149 ztest_vdev_LUN_growth(ztest_args_t *za)
1143 1150 {
1144 1151 spa_t *spa = za->za_spa;
1145 1152 char dev_name[MAXPATHLEN];
1146 1153 uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
1147 1154 uint64_t vdev;
1148 1155 size_t fsize;
1149 1156 int fd;
1150 1157
1151 1158 (void) mutex_lock(&ztest_shared->zs_vdev_lock);
1152 1159
1153 1160 /*
1154 1161 * Pick a random leaf vdev.
1155 1162 */
1156 1163 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
1157 1164 vdev = ztest_random(spa->spa_root_vdev->vdev_children * leaves);
1158 1165 spa_config_exit(spa, SCL_VDEV, FTAG);
1159 1166
1160 1167 (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
1161 1168
1162 1169 if ((fd = open(dev_name, O_RDWR)) != -1) {
1163 1170 /*
1164 1171 * Determine the size.
1165 1172 */
1166 1173 fsize = lseek(fd, 0, SEEK_END);
1167 1174
1168 1175 /*
1169 1176 * If it's less than 2x the original size, grow by around 3%.
1170 1177 */
1171 1178 if (fsize < 2 * zopt_vdev_size) {
1172 1179 size_t newsize = fsize + ztest_random(fsize / 32);
1173 1180 (void) ftruncate(fd, newsize);
1174 1181 if (zopt_verbose >= 6) {
1175 1182 (void) printf("%s grew from %lu to %lu bytes\n",
1176 1183 dev_name, (ulong_t)fsize, (ulong_t)newsize);
1177 1184 }
1178 1185 }
1179 1186 (void) close(fd);
1180 1187 }
1181 1188
1182 1189 (void) mutex_unlock(&ztest_shared->zs_vdev_lock);
1183 1190 }
1184 1191
1185 1192 /* ARGSUSED */
1186 1193 static void
1187 1194 ztest_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
1188 1195 {
1189 1196 /*
1190 1197 * Create the directory object.
1191 1198 */
1192 1199 VERIFY(dmu_object_claim(os, ZTEST_DIROBJ,
1193 1200 DMU_OT_UINT64_OTHER, ZTEST_DIROBJ_BLOCKSIZE,
1194 1201 DMU_OT_UINT64_OTHER, 5 * sizeof (ztest_block_tag_t), tx) == 0);
1195 1202
1196 1203 VERIFY(zap_create_claim(os, ZTEST_MICROZAP_OBJ,
1197 1204 DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
1198 1205
1199 1206 VERIFY(zap_create_claim(os, ZTEST_FATZAP_OBJ,
1200 1207 DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
1201 1208 }
1202 1209
1203 1210 static int
1204 1211 ztest_destroy_cb(char *name, void *arg)
1205 1212 {
1206 1213 ztest_args_t *za = arg;
1207 1214 objset_t *os;
1208 1215 dmu_object_info_t *doi = &za->za_doi;
1209 1216 int error;
1210 1217
1211 1218 /*
1212 1219 * Verify that the dataset contains a directory object.
1213 1220 */
1214 1221 error = dmu_objset_open(name, DMU_OST_OTHER,
1215 1222 DS_MODE_USER | DS_MODE_READONLY, &os);
1216 1223 ASSERT3U(error, ==, 0);
1217 1224 error = dmu_object_info(os, ZTEST_DIROBJ, doi);
1218 1225 if (error != ENOENT) {
1219 1226 /* We could have crashed in the middle of destroying it */
1220 1227 ASSERT3U(error, ==, 0);
1221 1228 ASSERT3U(doi->doi_type, ==, DMU_OT_UINT64_OTHER);
1222 1229 ASSERT3S(doi->doi_physical_blks, >=, 0);
1223 1230 }
1224 1231 dmu_objset_close(os);
1225 1232
1226 1233 /*
1227 1234 * Destroy the dataset.
1228 1235 */
1229 1236 error = dmu_objset_destroy(name);
1230 1237 if (error) {
1231 1238 (void) dmu_objset_open(name, DMU_OST_OTHER,
1232 1239 DS_MODE_USER | DS_MODE_READONLY, &os);
1233 1240 fatal(0, "dmu_objset_destroy(os=%p) = %d\n", &os, error);
1234 1241 }
1235 1242 return (0);
1236 1243 }
1237 1244
1238 1245 /*
1239 1246 * Verify that dmu_objset_{create,destroy,open,close} work as expected.
1240 1247 */
1241 1248 static uint64_t
1242 1249 ztest_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t object, int mode)
1243 1250 {
1244 1251 itx_t *itx;
1245 1252 lr_create_t *lr;
1246 1253 size_t namesize;
1247 1254 char name[24];
1248 1255
1249 1256 (void) sprintf(name, "ZOBJ_%llu", (u_longlong_t)object);
1250 1257 namesize = strlen(name) + 1;
1251 1258
1252 1259 itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize +
1253 1260 ztest_random(ZIL_MAX_BLKSZ));
1254 1261 lr = (lr_create_t *)&itx->itx_lr;
1255 1262 bzero(lr + 1, lr->lr_common.lrc_reclen - sizeof (*lr));
1256 1263 lr->lr_doid = object;
1257 1264 lr->lr_foid = 0;
1258 1265 lr->lr_mode = mode;
1259 1266 lr->lr_uid = 0;
1260 1267 lr->lr_gid = 0;
1261 1268 lr->lr_gen = dmu_tx_get_txg(tx);
1262 1269 lr->lr_crtime[0] = time(NULL);
1263 1270 lr->lr_crtime[1] = 0;
1264 1271 lr->lr_rdev = 0;
1265 1272 bcopy(name, (char *)(lr + 1), namesize);
1266 1273
1267 1274 return (zil_itx_assign(zilog, itx, tx));
1268 1275 }
1269 1276
1270 1277 void
1271 1278 ztest_dmu_objset_create_destroy(ztest_args_t *za)
1272 1279 {
1273 1280 int error;
1274 1281 objset_t *os, *os2;
1275 1282 char name[100];
1276 1283 int basemode, expected_error;
1277 1284 zilog_t *zilog;
1278 1285 uint64_t seq;
1279 1286 uint64_t objects;
1280 1287
1281 1288 (void) rw_rdlock(&ztest_shared->zs_name_lock);
1282 1289 (void) snprintf(name, 100, "%s/%s_temp_%llu", za->za_pool, za->za_pool,
1283 1290 (u_longlong_t)za->za_instance);
1284 1291
1285 1292 basemode = DS_MODE_TYPE(za->za_instance);
1286 1293 if (basemode != DS_MODE_USER && basemode != DS_MODE_OWNER)
1287 1294 basemode = DS_MODE_USER;
1288 1295
1289 1296 /*
1290 1297 * If this dataset exists from a previous run, process its replay log
1291 1298 * half of the time. If we don't replay it, then dmu_objset_destroy()
1292 1299 * (invoked from ztest_destroy_cb() below) should just throw it away.
1293 1300 */
1294 1301 if (ztest_random(2) == 0 &&
1295 1302 dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os) == 0) {
1296 1303 zil_replay(os, os, ztest_replay_vector);
1297 1304 dmu_objset_close(os);
1298 1305 }
1299 1306
1300 1307 /*
1301 1308 * There may be an old instance of the dataset we're about to
1302 1309 * create lying around from a previous run. If so, destroy it
1303 1310 * and all of its snapshots.
1304 1311 */
1305 1312 (void) dmu_objset_find(name, ztest_destroy_cb, za,
1306 1313 DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
1307 1314
1308 1315 /*
1309 1316 * Verify that the destroyed dataset is no longer in the namespace.
1310 1317 */
1311 1318 error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
1312 1319 if (error != ENOENT)
1313 1320 fatal(1, "dmu_objset_open(%s) found destroyed dataset %p",
1314 1321 name, os);
1315 1322
1316 1323 /*
1317 1324 * Verify that we can create a new dataset.
1318 1325 */
1319 1326 error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
1320 1327 ztest_create_cb, NULL);
1321 1328 if (error) {
1322 1329 if (error == ENOSPC) {
1323 1330 ztest_record_enospc("dmu_objset_create");
1324 1331 (void) rw_unlock(&ztest_shared->zs_name_lock);
1325 1332 return;
1326 1333 }
1327 1334 fatal(0, "dmu_objset_create(%s) = %d", name, error);
1328 1335 }
1329 1336
1330 1337 error = dmu_objset_open(name, DMU_OST_OTHER, basemode, &os);
1331 1338 if (error) {
1332 1339 fatal(0, "dmu_objset_open(%s) = %d", name, error);
1333 1340 }
1334 1341
1335 1342 /*
1336 1343 * Open the intent log for it.
1337 1344 */
1338 1345 zilog = zil_open(os, NULL);
1339 1346
1340 1347 /*
1341 1348 * Put a random number of objects in there.
1342 1349 */
1343 1350 objects = ztest_random(20);
1344 1351 seq = 0;
1345 1352 while (objects-- != 0) {
1346 1353 uint64_t object;
1347 1354 dmu_tx_t *tx = dmu_tx_create(os);
1348 1355 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, sizeof (name));
1349 1356 error = dmu_tx_assign(tx, TXG_WAIT);
1350 1357 if (error) {
1351 1358 dmu_tx_abort(tx);
1352 1359 } else {
1353 1360 object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
1354 1361 DMU_OT_NONE, 0, tx);
1355 1362 ztest_set_random_blocksize(os, object, tx);
1356 1363 seq = ztest_log_create(zilog, tx, object,
1357 1364 DMU_OT_UINT64_OTHER);
1358 1365 dmu_write(os, object, 0, sizeof (name), name, tx);
1359 1366 dmu_tx_commit(tx);
1360 1367 }
1361 1368 if (ztest_random(5) == 0) {
1362 1369 zil_commit(zilog, seq, object);
1363 1370 }
1364 1371 if (ztest_random(100) == 0) {
1365 1372 error = zil_suspend(zilog);
1366 1373 if (error == 0) {
1367 1374 zil_resume(zilog);
1368 1375 }
1369 1376 }
1370 1377 }
1371 1378
1372 1379 /*
1373 1380 * Verify that we cannot create an existing dataset.
1374 1381 */
1375 1382 error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0, NULL, NULL);
1376 1383 if (error != EEXIST)
1377 1384 fatal(0, "created existing dataset, error = %d", error);
1378 1385
1379 1386 /*
1380 1387 * Verify that multiple dataset holds are allowed, but only when
1381 1388 * the new access mode is compatible with the base mode.
1382 1389 */
1383 1390 if (basemode == DS_MODE_OWNER) {
1384 1391 error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_USER,
1385 1392 &os2);
1386 1393 if (error)
1387 1394 fatal(0, "dmu_objset_open('%s') = %d", name, error);
1388 1395 else
1389 1396 dmu_objset_close(os2);
1390 1397 }
1391 1398 error = dmu_objset_open(name, DMU_OST_OTHER, DS_MODE_OWNER, &os2);
1392 1399 expected_error = (basemode == DS_MODE_OWNER) ? EBUSY : 0;
1393 1400 if (error != expected_error)
1394 1401 fatal(0, "dmu_objset_open('%s') = %d, expected %d",
1395 1402 name, error, expected_error);
1396 1403 if (error == 0)
1397 1404 dmu_objset_close(os2);
1398 1405
1399 1406 zil_close(zilog);
1400 1407 dmu_objset_close(os);
1401 1408
1402 1409 error = dmu_objset_destroy(name);
1403 1410 if (error)
1404 1411 fatal(0, "dmu_objset_destroy(%s) = %d", name, error);
1405 1412
1406 1413 (void) rw_unlock(&ztest_shared->zs_name_lock);
1407 1414 }
1408 1415
1409 1416 /*
1410 1417 * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
1411 1418 */
1412 1419 void
1413 1420 ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
1414 1421 {
1415 1422 int error;
1416 1423 objset_t *os = za->za_os;
1417 1424 char snapname[100];
1418 1425 char osname[MAXNAMELEN];
1419 1426
1420 1427 (void) rw_rdlock(&ztest_shared->zs_name_lock);
1421 1428 dmu_objset_name(os, osname);
1422 1429 (void) snprintf(snapname, 100, "%s@%llu", osname,
1423 1430 (u_longlong_t)za->za_instance);
1424 1431
1425 1432 error = dmu_objset_destroy(snapname);
1426 1433 if (error != 0 && error != ENOENT)
1427 1434 fatal(0, "dmu_objset_destroy() = %d", error);
1428 1435 error = dmu_objset_snapshot(osname, strchr(snapname, '@')+1, FALSE);
1429 1436 if (error == ENOSPC)
1430 1437 ztest_record_enospc("dmu_take_snapshot");
1431 1438 else if (error != 0 && error != EEXIST)
1432 1439 fatal(0, "dmu_take_snapshot() = %d", error);
1433 1440 (void) rw_unlock(&ztest_shared->zs_name_lock);
1434 1441 }
1435 1442
1436 1443 /*
1437 1444 * Verify that dmu_object_{alloc,free} work as expected.
1438 1445 */
1439 1446 void
1440 1447 ztest_dmu_object_alloc_free(ztest_args_t *za)
1441 1448 {
1442 1449 objset_t *os = za->za_os;
1443 1450 dmu_buf_t *db;
1444 1451 dmu_tx_t *tx;
1445 1452 uint64_t batchobj, object, batchsize, endoff, temp;
1446 1453 int b, c, error, bonuslen;
1447 1454 dmu_object_info_t *doi = &za->za_doi;
1448 1455 char osname[MAXNAMELEN];
1449 1456
1450 1457 dmu_objset_name(os, osname);
1451 1458
1452 1459 endoff = -8ULL;
1453 1460 batchsize = 2;
1454 1461
1455 1462 /*
1456 1463 * Create a batch object if necessary, and record it in the directory.
1457 1464 */
1458 1465 VERIFY3U(0, ==, dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
1459 1466 sizeof (uint64_t), &batchobj));
1460 1467 if (batchobj == 0) {
1461 1468 tx = dmu_tx_create(os);
1462 1469 dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
1463 1470 sizeof (uint64_t));
1464 1471 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
1465 1472 error = dmu_tx_assign(tx, TXG_WAIT);
1466 1473 if (error) {
1467 1474 ztest_record_enospc("create a batch object");
1468 1475 dmu_tx_abort(tx);
1469 1476 return;
1470 1477 }
1471 1478 batchobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
1472 1479 DMU_OT_NONE, 0, tx);
1473 1480 ztest_set_random_blocksize(os, batchobj, tx);
1474 1481 dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
1475 1482 sizeof (uint64_t), &batchobj, tx);
1476 1483 dmu_tx_commit(tx);
1477 1484 }
1478 1485
1479 1486 /*
1480 1487 * Destroy the previous batch of objects.
1481 1488 */
1482 1489 for (b = 0; b < batchsize; b++) {
1483 1490 VERIFY3U(0, ==, dmu_read(os, batchobj, b * sizeof (uint64_t),
1484 1491 sizeof (uint64_t), &object));
1485 1492 if (object == 0)
1486 1493 continue;
1487 1494 /*
1488 1495 * Read and validate contents.
1489 1496 * We expect the nth byte of the bonus buffer to be n.
1490 1497 */
1491 1498 VERIFY(0 == dmu_bonus_hold(os, object, FTAG, &db));
1492 1499 za->za_dbuf = db;
1493 1500
1494 1501 dmu_object_info_from_db(db, doi);
1495 1502 ASSERT(doi->doi_type == DMU_OT_UINT64_OTHER);
1496 1503 ASSERT(doi->doi_bonus_type == DMU_OT_PLAIN_OTHER);
1497 1504 ASSERT3S(doi->doi_physical_blks, >=, 0);
1498 1505
1499 1506 bonuslen = doi->doi_bonus_size;
1500 1507
1501 1508 for (c = 0; c < bonuslen; c++) {
1502 1509 if (((uint8_t *)db->db_data)[c] !=
1503 1510 (uint8_t)(c + bonuslen)) {
1504 1511 fatal(0,
1505 1512 "bad bonus: %s, obj %llu, off %d: %u != %u",
1506 1513 osname, object, c,
1507 1514 ((uint8_t *)db->db_data)[c],
1508 1515 (uint8_t)(c + bonuslen));
1509 1516 }
1510 1517 }
1511 1518
1512 1519 dmu_buf_rele(db, FTAG);
1513 1520 za->za_dbuf = NULL;
1514 1521
1515 1522 /*
1516 1523 * We expect the word at endoff to be our object number.
1517 1524 */
1518 1525 VERIFY(0 == dmu_read(os, object, endoff,
1519 1526 sizeof (uint64_t), &temp));
1520 1527
1521 1528 if (temp != object) {
1522 1529 fatal(0, "bad data in %s, got %llu, expected %llu",
1523 1530 osname, temp, object);
1524 1531 }
1525 1532
1526 1533 /*
1527 1534 * Destroy old object and clear batch entry.
1528 1535 */
1529 1536 tx = dmu_tx_create(os);
1530 1537 dmu_tx_hold_write(tx, batchobj,
1531 1538 b * sizeof (uint64_t), sizeof (uint64_t));
1532 1539 dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
1533 1540 error = dmu_tx_assign(tx, TXG_WAIT);
1534 1541 if (error) {
1535 1542 ztest_record_enospc("free object");
1536 1543 dmu_tx_abort(tx);
1537 1544 return;
1538 1545 }
1539 1546 error = dmu_object_free(os, object, tx);
|
↓ open down ↓ |
829 lines elided |
↑ open up ↑ |
1540 1547 if (error) {
1541 1548 fatal(0, "dmu_object_free('%s', %llu) = %d",
1542 1549 osname, object, error);
1543 1550 }
1544 1551 object = 0;
1545 1552
1546 1553 dmu_object_set_checksum(os, batchobj,
1547 1554 ztest_random_checksum(), tx);
1548 1555 dmu_object_set_compress(os, batchobj,
1549 1556 ztest_random_compress(), tx);
1557 + dmu_object_set_crypt(os, batchobj,
1558 + ztest_random_crypt(), tx);
1550 1559
1551 1560 dmu_write(os, batchobj, b * sizeof (uint64_t),
1552 1561 sizeof (uint64_t), &object, tx);
1553 1562
1554 1563 dmu_tx_commit(tx);
1555 1564 }
1556 1565
1557 1566 /*
1558 1567 * Before creating the new batch of objects, generate a bunch of churn.
1559 1568 */
1560 1569 for (b = ztest_random(100); b > 0; b--) {
1561 1570 tx = dmu_tx_create(os);
1562 1571 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
1563 1572 error = dmu_tx_assign(tx, TXG_WAIT);
1564 1573 if (error) {
1565 1574 ztest_record_enospc("churn objects");
1566 1575 dmu_tx_abort(tx);
1567 1576 return;
1568 1577 }
1569 1578 object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
1570 1579 DMU_OT_NONE, 0, tx);
1571 1580 ztest_set_random_blocksize(os, object, tx);
1572 1581 error = dmu_object_free(os, object, tx);
1573 1582 if (error) {
1574 1583 fatal(0, "dmu_object_free('%s', %llu) = %d",
1575 1584 osname, object, error);
1576 1585 }
1577 1586 dmu_tx_commit(tx);
1578 1587 }
1579 1588
1580 1589 /*
1581 1590 * Create a new batch of objects with randomly chosen
1582 1591 * blocksizes and record them in the batch directory.
1583 1592 */
1584 1593 for (b = 0; b < batchsize; b++) {
1585 1594 uint32_t va_blksize;
1586 1595 u_longlong_t va_nblocks;
1587 1596
1588 1597 tx = dmu_tx_create(os);
1589 1598 dmu_tx_hold_write(tx, batchobj, b * sizeof (uint64_t),
1590 1599 sizeof (uint64_t));
1591 1600 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
1592 1601 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, endoff,
1593 1602 sizeof (uint64_t));
1594 1603 error = dmu_tx_assign(tx, TXG_WAIT);
1595 1604 if (error) {
1596 1605 ztest_record_enospc("create batchobj");
1597 1606 dmu_tx_abort(tx);
1598 1607 return;
1599 1608 }
1600 1609 bonuslen = (int)ztest_random(dmu_bonus_max()) + 1;
|
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
1601 1610
1602 1611 object = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
1603 1612 DMU_OT_PLAIN_OTHER, bonuslen, tx);
1604 1613
1605 1614 ztest_set_random_blocksize(os, object, tx);
1606 1615
1607 1616 dmu_object_set_checksum(os, object,
1608 1617 ztest_random_checksum(), tx);
1609 1618 dmu_object_set_compress(os, object,
1610 1619 ztest_random_compress(), tx);
1620 + dmu_object_set_crypt(os, object,
1621 + ztest_random_crypt(), tx);
1611 1622
1612 1623 dmu_write(os, batchobj, b * sizeof (uint64_t),
1613 1624 sizeof (uint64_t), &object, tx);
1614 1625
1615 1626 /*
1616 1627 * Write to both the bonus buffer and the regular data.
1617 1628 */
1618 1629 VERIFY(dmu_bonus_hold(os, object, FTAG, &db) == 0);
1619 1630 za->za_dbuf = db;
1620 1631 ASSERT3U(bonuslen, <=, db->db_size);
1621 1632
1622 1633 dmu_object_size_from_db(db, &va_blksize, &va_nblocks);
1623 1634 ASSERT3S(va_nblocks, >=, 0);
1624 1635
1625 1636 dmu_buf_will_dirty(db, tx);
1626 1637
1627 1638 /*
1628 1639 * See comments above regarding the contents of
1629 1640 * the bonus buffer and the word at endoff.
1630 1641 */
1631 1642 for (c = 0; c < bonuslen; c++)
1632 1643 ((uint8_t *)db->db_data)[c] = (uint8_t)(c + bonuslen);
1633 1644
1634 1645 dmu_buf_rele(db, FTAG);
1635 1646 za->za_dbuf = NULL;
1636 1647
1637 1648 /*
1638 1649 * Write to a large offset to increase indirection.
1639 1650 */
1640 1651 dmu_write(os, object, endoff, sizeof (uint64_t), &object, tx);
1641 1652
1642 1653 dmu_tx_commit(tx);
1643 1654 }
1644 1655 }
1645 1656
1646 1657 /*
1647 1658 * Verify that dmu_{read,write} work as expected.
1648 1659 */
1649 1660 typedef struct bufwad {
1650 1661 uint64_t bw_index;
1651 1662 uint64_t bw_txg;
1652 1663 uint64_t bw_data;
1653 1664 } bufwad_t;
1654 1665
1655 1666 typedef struct dmu_read_write_dir {
1656 1667 uint64_t dd_packobj;
1657 1668 uint64_t dd_bigobj;
1658 1669 uint64_t dd_chunk;
1659 1670 } dmu_read_write_dir_t;
1660 1671
1661 1672 void
1662 1673 ztest_dmu_read_write(ztest_args_t *za)
1663 1674 {
1664 1675 objset_t *os = za->za_os;
1665 1676 dmu_read_write_dir_t dd;
1666 1677 dmu_tx_t *tx;
1667 1678 int i, freeit, error;
1668 1679 uint64_t n, s, txg;
1669 1680 bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
1670 1681 uint64_t packoff, packsize, bigoff, bigsize;
1671 1682 uint64_t regions = 997;
1672 1683 uint64_t stride = 123456789ULL;
1673 1684 uint64_t width = 40;
1674 1685 int free_percent = 5;
1675 1686
1676 1687 /*
1677 1688 * This test uses two objects, packobj and bigobj, that are always
1678 1689 * updated together (i.e. in the same tx) so that their contents are
1679 1690 * in sync and can be compared. Their contents relate to each other
1680 1691 * in a simple way: packobj is a dense array of 'bufwad' structures,
1681 1692 * while bigobj is a sparse array of the same bufwads. Specifically,
1682 1693 * for any index n, there are three bufwads that should be identical:
1683 1694 *
1684 1695 * packobj, at offset n * sizeof (bufwad_t)
1685 1696 * bigobj, at the head of the nth chunk
1686 1697 * bigobj, at the tail of the nth chunk
1687 1698 *
1688 1699 * The chunk size is arbitrary. It doesn't have to be a power of two,
1689 1700 * and it doesn't have any relation to the object blocksize.
1690 1701 * The only requirement is that it can hold at least two bufwads.
1691 1702 *
1692 1703 * Normally, we write the bufwad to each of these locations.
1693 1704 * However, free_percent of the time we instead write zeroes to
1694 1705 * packobj and perform a dmu_free_range() on bigobj. By comparing
1695 1706 * bigobj to packobj, we can verify that the DMU is correctly
1696 1707 * tracking which parts of an object are allocated and free,
1697 1708 * and that the contents of the allocated blocks are correct.
1698 1709 */
1699 1710
1700 1711 /*
1701 1712 * Read the directory info. If it's the first time, set things up.
1702 1713 */
1703 1714 VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
1704 1715 sizeof (dd), &dd));
1705 1716 if (dd.dd_chunk == 0) {
1706 1717 ASSERT(dd.dd_packobj == 0);
1707 1718 ASSERT(dd.dd_bigobj == 0);
1708 1719 tx = dmu_tx_create(os);
1709 1720 dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (dd));
1710 1721 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
1711 1722 error = dmu_tx_assign(tx, TXG_WAIT);
1712 1723 if (error) {
1713 1724 ztest_record_enospc("create r/w directory");
1714 1725 dmu_tx_abort(tx);
1715 1726 return;
1716 1727 }
1717 1728
1718 1729 dd.dd_packobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
1719 1730 DMU_OT_NONE, 0, tx);
1720 1731 dd.dd_bigobj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0,
1721 1732 DMU_OT_NONE, 0, tx);
1722 1733 dd.dd_chunk = (1000 + ztest_random(1000)) * sizeof (uint64_t);
1723 1734
1724 1735 ztest_set_random_blocksize(os, dd.dd_packobj, tx);
1725 1736 ztest_set_random_blocksize(os, dd.dd_bigobj, tx);
1726 1737
1727 1738 dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (dd), &dd,
1728 1739 tx);
1729 1740 dmu_tx_commit(tx);
1730 1741 }
1731 1742
1732 1743 /*
1733 1744 * Prefetch a random chunk of the big object.
1734 1745 * Our aim here is to get some async reads in flight
1735 1746 * for blocks that we may free below; the DMU should
1736 1747 * handle this race correctly.
1737 1748 */
1738 1749 n = ztest_random(regions) * stride + ztest_random(width);
1739 1750 s = 1 + ztest_random(2 * width - 1);
1740 1751 dmu_prefetch(os, dd.dd_bigobj, n * dd.dd_chunk, s * dd.dd_chunk);
1741 1752
1742 1753 /*
1743 1754 * Pick a random index and compute the offsets into packobj and bigobj.
1744 1755 */
1745 1756 n = ztest_random(regions) * stride + ztest_random(width);
1746 1757 s = 1 + ztest_random(width - 1);
1747 1758
1748 1759 packoff = n * sizeof (bufwad_t);
1749 1760 packsize = s * sizeof (bufwad_t);
1750 1761
1751 1762 bigoff = n * dd.dd_chunk;
1752 1763 bigsize = s * dd.dd_chunk;
1753 1764
1754 1765 packbuf = umem_alloc(packsize, UMEM_NOFAIL);
1755 1766 bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
1756 1767
1757 1768 /*
1758 1769 * free_percent of the time, free a range of bigobj rather than
1759 1770 * overwriting it.
1760 1771 */
1761 1772 freeit = (ztest_random(100) < free_percent);
1762 1773
1763 1774 /*
1764 1775 * Read the current contents of our objects.
1765 1776 */
1766 1777 error = dmu_read(os, dd.dd_packobj, packoff, packsize, packbuf);
1767 1778 ASSERT3U(error, ==, 0);
1768 1779 error = dmu_read(os, dd.dd_bigobj, bigoff, bigsize, bigbuf);
1769 1780 ASSERT3U(error, ==, 0);
1770 1781
1771 1782 /*
1772 1783 * Get a tx for the mods to both packobj and bigobj.
1773 1784 */
1774 1785 tx = dmu_tx_create(os);
1775 1786
1776 1787 dmu_tx_hold_write(tx, dd.dd_packobj, packoff, packsize);
1777 1788
1778 1789 if (freeit)
1779 1790 dmu_tx_hold_free(tx, dd.dd_bigobj, bigoff, bigsize);
1780 1791 else
1781 1792 dmu_tx_hold_write(tx, dd.dd_bigobj, bigoff, bigsize);
1782 1793
1783 1794 error = dmu_tx_assign(tx, TXG_WAIT);
1784 1795
1785 1796 if (error) {
1786 1797 ztest_record_enospc("dmu r/w range");
1787 1798 dmu_tx_abort(tx);
1788 1799 umem_free(packbuf, packsize);
1789 1800 umem_free(bigbuf, bigsize);
1790 1801 return;
1791 1802 }
1792 1803
1793 1804 txg = dmu_tx_get_txg(tx);
1794 1805
1795 1806 /*
1796 1807 * For each index from n to n + s, verify that the existing bufwad
1797 1808 * in packobj matches the bufwads at the head and tail of the
1798 1809 * corresponding chunk in bigobj. Then update all three bufwads
1799 1810 * with the new values we want to write out.
1800 1811 */
1801 1812 for (i = 0; i < s; i++) {
1802 1813 /* LINTED */
1803 1814 pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
1804 1815 /* LINTED */
1805 1816 bigH = (bufwad_t *)((char *)bigbuf + i * dd.dd_chunk);
1806 1817 /* LINTED */
1807 1818 bigT = (bufwad_t *)((char *)bigH + dd.dd_chunk) - 1;
1808 1819
1809 1820 ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
1810 1821 ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
1811 1822
1812 1823 if (pack->bw_txg > txg)
1813 1824 fatal(0, "future leak: got %llx, open txg is %llx",
1814 1825 pack->bw_txg, txg);
1815 1826
1816 1827 if (pack->bw_data != 0 && pack->bw_index != n + i)
1817 1828 fatal(0, "wrong index: got %llx, wanted %llx+%llx",
1818 1829 pack->bw_index, n, i);
1819 1830
1820 1831 if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
1821 1832 fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
1822 1833
1823 1834 if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
1824 1835 fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
1825 1836
1826 1837 if (freeit) {
1827 1838 bzero(pack, sizeof (bufwad_t));
1828 1839 } else {
1829 1840 pack->bw_index = n + i;
1830 1841 pack->bw_txg = txg;
1831 1842 pack->bw_data = 1 + ztest_random(-2ULL);
1832 1843 }
1833 1844 *bigH = *pack;
1834 1845 *bigT = *pack;
1835 1846 }
1836 1847
1837 1848 /*
1838 1849 * We've verified all the old bufwads, and made new ones.
1839 1850 * Now write them out.
1840 1851 */
1841 1852 dmu_write(os, dd.dd_packobj, packoff, packsize, packbuf, tx);
1842 1853
1843 1854 if (freeit) {
1844 1855 if (zopt_verbose >= 6) {
1845 1856 (void) printf("freeing offset %llx size %llx"
1846 1857 " txg %llx\n",
1847 1858 (u_longlong_t)bigoff,
1848 1859 (u_longlong_t)bigsize,
1849 1860 (u_longlong_t)txg);
1850 1861 }
1851 1862 VERIFY(0 == dmu_free_range(os, dd.dd_bigobj, bigoff,
1852 1863 bigsize, tx));
1853 1864 } else {
1854 1865 if (zopt_verbose >= 6) {
1855 1866 (void) printf("writing offset %llx size %llx"
1856 1867 " txg %llx\n",
1857 1868 (u_longlong_t)bigoff,
1858 1869 (u_longlong_t)bigsize,
1859 1870 (u_longlong_t)txg);
1860 1871 }
1861 1872 dmu_write(os, dd.dd_bigobj, bigoff, bigsize, bigbuf, tx);
1862 1873 }
1863 1874
1864 1875 dmu_tx_commit(tx);
1865 1876
1866 1877 /*
1867 1878 * Sanity check the stuff we just wrote.
1868 1879 */
1869 1880 {
1870 1881 void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
1871 1882 void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
1872 1883
1873 1884 VERIFY(0 == dmu_read(os, dd.dd_packobj, packoff,
1874 1885 packsize, packcheck));
1875 1886 VERIFY(0 == dmu_read(os, dd.dd_bigobj, bigoff,
1876 1887 bigsize, bigcheck));
1877 1888
1878 1889 ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
1879 1890 ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
1880 1891
1881 1892 umem_free(packcheck, packsize);
1882 1893 umem_free(bigcheck, bigsize);
1883 1894 }
1884 1895
1885 1896 umem_free(packbuf, packsize);
1886 1897 umem_free(bigbuf, bigsize);
1887 1898 }
1888 1899
1889 1900 void
1890 1901 ztest_dmu_check_future_leak(ztest_args_t *za)
1891 1902 {
1892 1903 objset_t *os = za->za_os;
1893 1904 dmu_buf_t *db;
1894 1905 ztest_block_tag_t *bt;
1895 1906 dmu_object_info_t *doi = &za->za_doi;
1896 1907
1897 1908 /*
1898 1909 * Make sure that, if there is a write record in the bonus buffer
1899 1910 * of the ZTEST_DIROBJ, that the txg for this record is <= the
1900 1911 * last synced txg of the pool.
1901 1912 */
1902 1913 VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db) == 0);
1903 1914 za->za_dbuf = db;
1904 1915 VERIFY(dmu_object_info(os, ZTEST_DIROBJ, doi) == 0);
1905 1916 ASSERT3U(doi->doi_bonus_size, >=, sizeof (*bt));
1906 1917 ASSERT3U(doi->doi_bonus_size, <=, db->db_size);
1907 1918 ASSERT3U(doi->doi_bonus_size % sizeof (*bt), ==, 0);
1908 1919 bt = (void *)((char *)db->db_data + doi->doi_bonus_size - sizeof (*bt));
1909 1920 if (bt->bt_objset != 0) {
1910 1921 ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
1911 1922 ASSERT3U(bt->bt_object, ==, ZTEST_DIROBJ);
1912 1923 ASSERT3U(bt->bt_offset, ==, -1ULL);
1913 1924 ASSERT3U(bt->bt_txg, <, spa_first_txg(za->za_spa));
1914 1925 }
1915 1926 dmu_buf_rele(db, FTAG);
1916 1927 za->za_dbuf = NULL;
1917 1928 }
1918 1929
1919 1930 void
1920 1931 ztest_dmu_write_parallel(ztest_args_t *za)
1921 1932 {
1922 1933 objset_t *os = za->za_os;
1923 1934 ztest_block_tag_t *rbt = &za->za_rbt;
1924 1935 ztest_block_tag_t *wbt = &za->za_wbt;
1925 1936 const size_t btsize = sizeof (ztest_block_tag_t);
1926 1937 dmu_buf_t *db;
1927 1938 int b, error;
1928 1939 int bs = ZTEST_DIROBJ_BLOCKSIZE;
1929 1940 int do_free = 0;
1930 1941 uint64_t off, txg, txg_how;
1931 1942 mutex_t *lp;
1932 1943 char osname[MAXNAMELEN];
1933 1944 char iobuf[SPA_MAXBLOCKSIZE];
1934 1945 blkptr_t blk = { 0 };
1935 1946 uint64_t blkoff;
1936 1947 zbookmark_t zb;
1937 1948 dmu_tx_t *tx = dmu_tx_create(os);
1938 1949
1939 1950 dmu_objset_name(os, osname);
1940 1951
1941 1952 /*
1942 1953 * Have multiple threads write to large offsets in ZTEST_DIROBJ
1943 1954 * to verify that having multiple threads writing to the same object
1944 1955 * in parallel doesn't cause any trouble.
1945 1956 */
1946 1957 if (ztest_random(4) == 0) {
1947 1958 /*
1948 1959 * Do the bonus buffer instead of a regular block.
1949 1960 * We need a lock to serialize resize vs. others,
1950 1961 * so we hash on the objset ID.
1951 1962 */
1952 1963 b = dmu_objset_id(os) % ZTEST_SYNC_LOCKS;
1953 1964 off = -1ULL;
1954 1965 dmu_tx_hold_bonus(tx, ZTEST_DIROBJ);
1955 1966 } else {
1956 1967 b = ztest_random(ZTEST_SYNC_LOCKS);
1957 1968 off = za->za_diroff_shared + (b << SPA_MAXBLOCKSHIFT);
1958 1969 if (ztest_random(4) == 0) {
1959 1970 do_free = 1;
1960 1971 dmu_tx_hold_free(tx, ZTEST_DIROBJ, off, bs);
1961 1972 } else {
1962 1973 dmu_tx_hold_write(tx, ZTEST_DIROBJ, off, bs);
1963 1974 }
1964 1975 }
1965 1976
1966 1977 txg_how = ztest_random(2) == 0 ? TXG_WAIT : TXG_NOWAIT;
1967 1978 error = dmu_tx_assign(tx, txg_how);
1968 1979 if (error) {
1969 1980 if (error == ERESTART) {
1970 1981 ASSERT(txg_how == TXG_NOWAIT);
1971 1982 dmu_tx_wait(tx);
1972 1983 } else {
1973 1984 ztest_record_enospc("dmu write parallel");
1974 1985 }
1975 1986 dmu_tx_abort(tx);
1976 1987 return;
1977 1988 }
1978 1989 txg = dmu_tx_get_txg(tx);
1979 1990
1980 1991 lp = &ztest_shared->zs_sync_lock[b];
1981 1992 (void) mutex_lock(lp);
1982 1993
1983 1994 wbt->bt_objset = dmu_objset_id(os);
1984 1995 wbt->bt_object = ZTEST_DIROBJ;
1985 1996 wbt->bt_offset = off;
1986 1997 wbt->bt_txg = txg;
1987 1998 wbt->bt_thread = za->za_instance;
1988 1999 wbt->bt_seq = ztest_shared->zs_seq[b]++; /* protected by lp */
1989 2000
1990 2001 /*
1991 2002 * Occasionally, write an all-zero block to test the behavior
1992 2003 * of blocks that compress into holes.
1993 2004 */
1994 2005 if (off != -1ULL && ztest_random(8) == 0)
1995 2006 bzero(wbt, btsize);
1996 2007
1997 2008 if (off == -1ULL) {
1998 2009 dmu_object_info_t *doi = &za->za_doi;
1999 2010 char *dboff;
2000 2011
2001 2012 VERIFY(dmu_bonus_hold(os, ZTEST_DIROBJ, FTAG, &db) == 0);
2002 2013 za->za_dbuf = db;
2003 2014 dmu_object_info_from_db(db, doi);
2004 2015 ASSERT3U(doi->doi_bonus_size, <=, db->db_size);
2005 2016 ASSERT3U(doi->doi_bonus_size, >=, btsize);
2006 2017 ASSERT3U(doi->doi_bonus_size % btsize, ==, 0);
2007 2018 dboff = (char *)db->db_data + doi->doi_bonus_size - btsize;
2008 2019 bcopy(dboff, rbt, btsize);
2009 2020 if (rbt->bt_objset != 0) {
2010 2021 ASSERT3U(rbt->bt_objset, ==, wbt->bt_objset);
2011 2022 ASSERT3U(rbt->bt_object, ==, wbt->bt_object);
2012 2023 ASSERT3U(rbt->bt_offset, ==, wbt->bt_offset);
2013 2024 ASSERT3U(rbt->bt_txg, <=, wbt->bt_txg);
2014 2025 }
2015 2026 if (ztest_random(10) == 0) {
2016 2027 int newsize = (ztest_random(db->db_size /
2017 2028 btsize) + 1) * btsize;
2018 2029
2019 2030 ASSERT3U(newsize, >=, btsize);
2020 2031 ASSERT3U(newsize, <=, db->db_size);
2021 2032 VERIFY3U(dmu_set_bonus(db, newsize, tx), ==, 0);
2022 2033 dboff = (char *)db->db_data + newsize - btsize;
2023 2034 }
2024 2035 dmu_buf_will_dirty(db, tx);
2025 2036 bcopy(wbt, dboff, btsize);
2026 2037 dmu_buf_rele(db, FTAG);
2027 2038 za->za_dbuf = NULL;
2028 2039 } else if (do_free) {
2029 2040 VERIFY(dmu_free_range(os, ZTEST_DIROBJ, off, bs, tx) == 0);
2030 2041 } else {
2031 2042 dmu_write(os, ZTEST_DIROBJ, off, btsize, wbt, tx);
2032 2043 }
2033 2044
2034 2045 (void) mutex_unlock(lp);
2035 2046
2036 2047 if (ztest_random(1000) == 0)
2037 2048 (void) poll(NULL, 0, 1); /* open dn_notxholds window */
2038 2049
2039 2050 dmu_tx_commit(tx);
2040 2051
2041 2052 if (ztest_random(10000) == 0)
2042 2053 txg_wait_synced(dmu_objset_pool(os), txg);
2043 2054
2044 2055 if (off == -1ULL || do_free)
2045 2056 return;
2046 2057
2047 2058 if (ztest_random(2) != 0)
2048 2059 return;
2049 2060
2050 2061 /*
2051 2062 * dmu_sync() the block we just wrote.
2052 2063 */
2053 2064 (void) mutex_lock(lp);
2054 2065
2055 2066 blkoff = P2ALIGN_TYPED(off, bs, uint64_t);
2056 2067 error = dmu_buf_hold(os, ZTEST_DIROBJ, blkoff, FTAG, &db);
2057 2068 za->za_dbuf = db;
2058 2069 if (error) {
2059 2070 (void) mutex_unlock(lp);
2060 2071 return;
2061 2072 }
2062 2073 blkoff = off - blkoff;
2063 2074 error = dmu_sync(NULL, db, &blk, txg, NULL, NULL);
2064 2075 dmu_buf_rele(db, FTAG);
2065 2076 za->za_dbuf = NULL;
2066 2077
2067 2078 (void) mutex_unlock(lp);
2068 2079
2069 2080 if (error)
2070 2081 return;
2071 2082
2072 2083 if (blk.blk_birth == 0) /* concurrent free */
2073 2084 return;
2074 2085
2075 2086 txg_suspend(dmu_objset_pool(os));
2076 2087
2077 2088 ASSERT(blk.blk_fill == 1);
2078 2089 ASSERT3U(BP_GET_TYPE(&blk), ==, DMU_OT_UINT64_OTHER);
2079 2090 ASSERT3U(BP_GET_LEVEL(&blk), ==, 0);
2080 2091 ASSERT3U(BP_GET_LSIZE(&blk), ==, bs);
2081 2092
2082 2093 /*
2083 2094 * Read the block that dmu_sync() returned to make sure its contents
2084 2095 * match what we wrote. We do this while still txg_suspend()ed
2085 2096 * to ensure that the block can't be reused before we read it.
2086 2097 */
2087 2098 zb.zb_objset = dmu_objset_id(os);
2088 2099 zb.zb_object = ZTEST_DIROBJ;
2089 2100 zb.zb_level = 0;
2090 2101 zb.zb_blkid = off / bs;
2091 2102 error = zio_wait(zio_read(NULL, za->za_spa, &blk, iobuf, bs,
2092 2103 NULL, NULL, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_MUSTSUCCEED, &zb));
2093 2104 ASSERT3U(error, ==, 0);
2094 2105
2095 2106 txg_resume(dmu_objset_pool(os));
2096 2107
2097 2108 bcopy(&iobuf[blkoff], rbt, btsize);
2098 2109
2099 2110 if (rbt->bt_objset == 0) /* concurrent free */
2100 2111 return;
2101 2112
2102 2113 if (wbt->bt_objset == 0) /* all-zero overwrite */
2103 2114 return;
2104 2115
2105 2116 ASSERT3U(rbt->bt_objset, ==, wbt->bt_objset);
2106 2117 ASSERT3U(rbt->bt_object, ==, wbt->bt_object);
2107 2118 ASSERT3U(rbt->bt_offset, ==, wbt->bt_offset);
2108 2119
2109 2120 /*
2110 2121 * The semantic of dmu_sync() is that we always push the most recent
2111 2122 * version of the data, so in the face of concurrent updates we may
2112 2123 * see a newer version of the block. That's OK.
2113 2124 */
2114 2125 ASSERT3U(rbt->bt_txg, >=, wbt->bt_txg);
2115 2126 if (rbt->bt_thread == wbt->bt_thread)
2116 2127 ASSERT3U(rbt->bt_seq, ==, wbt->bt_seq);
2117 2128 else
2118 2129 ASSERT3U(rbt->bt_seq, >, wbt->bt_seq);
2119 2130 }
2120 2131
2121 2132 /*
2122 2133 * Verify that zap_{create,destroy,add,remove,update} work as expected.
2123 2134 */
2124 2135 #define ZTEST_ZAP_MIN_INTS 1
2125 2136 #define ZTEST_ZAP_MAX_INTS 4
2126 2137 #define ZTEST_ZAP_MAX_PROPS 1000
2127 2138
2128 2139 void
2129 2140 ztest_zap(ztest_args_t *za)
2130 2141 {
2131 2142 objset_t *os = za->za_os;
2132 2143 uint64_t object;
2133 2144 uint64_t txg, last_txg;
2134 2145 uint64_t value[ZTEST_ZAP_MAX_INTS];
2135 2146 uint64_t zl_ints, zl_intsize, prop;
2136 2147 int i, ints;
2137 2148 dmu_tx_t *tx;
2138 2149 char propname[100], txgname[100];
2139 2150 int error;
2140 2151 char osname[MAXNAMELEN];
2141 2152 char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
2142 2153
2143 2154 dmu_objset_name(os, osname);
2144 2155
2145 2156 /*
2146 2157 * Create a new object if necessary, and record it in the directory.
2147 2158 */
2148 2159 VERIFY(0 == dmu_read(os, ZTEST_DIROBJ, za->za_diroff,
2149 2160 sizeof (uint64_t), &object));
2150 2161
2151 2162 if (object == 0) {
2152 2163 tx = dmu_tx_create(os);
2153 2164 dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff,
2154 2165 sizeof (uint64_t));
2155 2166 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, TRUE, NULL);
2156 2167 error = dmu_tx_assign(tx, TXG_WAIT);
2157 2168 if (error) {
2158 2169 ztest_record_enospc("create zap test obj");
2159 2170 dmu_tx_abort(tx);
2160 2171 return;
2161 2172 }
2162 2173 object = zap_create(os, DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx);
2163 2174 if (error) {
2164 2175 fatal(0, "zap_create('%s', %llu) = %d",
2165 2176 osname, object, error);
2166 2177 }
2167 2178 ASSERT(object != 0);
2168 2179 dmu_write(os, ZTEST_DIROBJ, za->za_diroff,
2169 2180 sizeof (uint64_t), &object, tx);
2170 2181 /*
2171 2182 * Generate a known hash collision, and verify that
2172 2183 * we can lookup and remove both entries.
2173 2184 */
2174 2185 for (i = 0; i < 2; i++) {
2175 2186 value[i] = i;
2176 2187 error = zap_add(os, object, hc[i], sizeof (uint64_t),
2177 2188 1, &value[i], tx);
2178 2189 ASSERT3U(error, ==, 0);
2179 2190 }
2180 2191 for (i = 0; i < 2; i++) {
2181 2192 error = zap_add(os, object, hc[i], sizeof (uint64_t),
2182 2193 1, &value[i], tx);
2183 2194 ASSERT3U(error, ==, EEXIST);
2184 2195 error = zap_length(os, object, hc[i],
2185 2196 &zl_intsize, &zl_ints);
2186 2197 ASSERT3U(error, ==, 0);
2187 2198 ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
2188 2199 ASSERT3U(zl_ints, ==, 1);
2189 2200 }
2190 2201 for (i = 0; i < 2; i++) {
2191 2202 error = zap_remove(os, object, hc[i], tx);
2192 2203 ASSERT3U(error, ==, 0);
2193 2204 }
2194 2205
2195 2206 dmu_tx_commit(tx);
2196 2207 }
2197 2208
2198 2209 ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
2199 2210
2200 2211 prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
2201 2212 (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
2202 2213 (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
2203 2214 bzero(value, sizeof (value));
2204 2215 last_txg = 0;
2205 2216
2206 2217 /*
2207 2218 * If these zap entries already exist, validate their contents.
2208 2219 */
2209 2220 error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
2210 2221 if (error == 0) {
2211 2222 ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
2212 2223 ASSERT3U(zl_ints, ==, 1);
2213 2224
2214 2225 VERIFY(zap_lookup(os, object, txgname, zl_intsize,
2215 2226 zl_ints, &last_txg) == 0);
2216 2227
2217 2228 VERIFY(zap_length(os, object, propname, &zl_intsize,
2218 2229 &zl_ints) == 0);
2219 2230
2220 2231 ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
2221 2232 ASSERT3U(zl_ints, ==, ints);
2222 2233
2223 2234 VERIFY(zap_lookup(os, object, propname, zl_intsize,
2224 2235 zl_ints, value) == 0);
2225 2236
2226 2237 for (i = 0; i < ints; i++) {
2227 2238 ASSERT3U(value[i], ==, last_txg + object + i);
2228 2239 }
2229 2240 } else {
2230 2241 ASSERT3U(error, ==, ENOENT);
2231 2242 }
2232 2243
2233 2244 /*
2234 2245 * Atomically update two entries in our zap object.
2235 2246 * The first is named txg_%llu, and contains the txg
2236 2247 * in which the property was last updated. The second
2237 2248 * is named prop_%llu, and the nth element of its value
2238 2249 * should be txg + object + n.
2239 2250 */
2240 2251 tx = dmu_tx_create(os);
2241 2252 dmu_tx_hold_zap(tx, object, TRUE, NULL);
2242 2253 error = dmu_tx_assign(tx, TXG_WAIT);
2243 2254 if (error) {
2244 2255 ztest_record_enospc("create zap entry");
2245 2256 dmu_tx_abort(tx);
2246 2257 return;
2247 2258 }
2248 2259 txg = dmu_tx_get_txg(tx);
2249 2260
2250 2261 if (last_txg > txg)
2251 2262 fatal(0, "zap future leak: old %llu new %llu", last_txg, txg);
2252 2263
2253 2264 for (i = 0; i < ints; i++)
2254 2265 value[i] = txg + object + i;
2255 2266
2256 2267 error = zap_update(os, object, txgname, sizeof (uint64_t), 1, &txg, tx);
2257 2268 if (error)
2258 2269 fatal(0, "zap_update('%s', %llu, '%s') = %d",
2259 2270 osname, object, txgname, error);
2260 2271
2261 2272 error = zap_update(os, object, propname, sizeof (uint64_t),
2262 2273 ints, value, tx);
2263 2274 if (error)
2264 2275 fatal(0, "zap_update('%s', %llu, '%s') = %d",
2265 2276 osname, object, propname, error);
2266 2277
2267 2278 dmu_tx_commit(tx);
2268 2279
2269 2280 /*
2270 2281 * Remove a random pair of entries.
2271 2282 */
2272 2283 prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
2273 2284 (void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
2274 2285 (void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
2275 2286
2276 2287 error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
2277 2288
2278 2289 if (error == ENOENT)
2279 2290 return;
2280 2291
2281 2292 ASSERT3U(error, ==, 0);
2282 2293
2283 2294 tx = dmu_tx_create(os);
2284 2295 dmu_tx_hold_zap(tx, object, TRUE, NULL);
2285 2296 error = dmu_tx_assign(tx, TXG_WAIT);
2286 2297 if (error) {
2287 2298 ztest_record_enospc("remove zap entry");
2288 2299 dmu_tx_abort(tx);
2289 2300 return;
2290 2301 }
2291 2302 error = zap_remove(os, object, txgname, tx);
2292 2303 if (error)
2293 2304 fatal(0, "zap_remove('%s', %llu, '%s') = %d",
2294 2305 osname, object, txgname, error);
2295 2306
2296 2307 error = zap_remove(os, object, propname, tx);
2297 2308 if (error)
2298 2309 fatal(0, "zap_remove('%s', %llu, '%s') = %d",
2299 2310 osname, object, propname, error);
2300 2311
2301 2312 dmu_tx_commit(tx);
2302 2313
2303 2314 /*
2304 2315 * Once in a while, destroy the object.
2305 2316 */
2306 2317 if (ztest_random(1000) != 0)
2307 2318 return;
2308 2319
2309 2320 tx = dmu_tx_create(os);
2310 2321 dmu_tx_hold_write(tx, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t));
2311 2322 dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
2312 2323 error = dmu_tx_assign(tx, TXG_WAIT);
2313 2324 if (error) {
2314 2325 ztest_record_enospc("destroy zap object");
2315 2326 dmu_tx_abort(tx);
2316 2327 return;
2317 2328 }
2318 2329 error = zap_destroy(os, object, tx);
2319 2330 if (error)
2320 2331 fatal(0, "zap_destroy('%s', %llu) = %d",
2321 2332 osname, object, error);
2322 2333 object = 0;
2323 2334 dmu_write(os, ZTEST_DIROBJ, za->za_diroff, sizeof (uint64_t),
2324 2335 &object, tx);
2325 2336 dmu_tx_commit(tx);
2326 2337 }
2327 2338
2328 2339 void
2329 2340 ztest_zap_parallel(ztest_args_t *za)
2330 2341 {
2331 2342 objset_t *os = za->za_os;
2332 2343 uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
2333 2344 dmu_tx_t *tx;
2334 2345 int i, namelen, error;
2335 2346 char name[20], string_value[20];
2336 2347 void *data;
2337 2348
2338 2349 /*
2339 2350 * Generate a random name of the form 'xxx.....' where each
2340 2351 * x is a random printable character and the dots are dots.
2341 2352 * There are 94 such characters, and the name length goes from
2342 2353 * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
2343 2354 */
2344 2355 namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
2345 2356
2346 2357 for (i = 0; i < 3; i++)
2347 2358 name[i] = '!' + ztest_random('~' - '!' + 1);
2348 2359 for (; i < namelen - 1; i++)
2349 2360 name[i] = '.';
2350 2361 name[i] = '\0';
2351 2362
2352 2363 if (ztest_random(2) == 0)
2353 2364 object = ZTEST_MICROZAP_OBJ;
2354 2365 else
2355 2366 object = ZTEST_FATZAP_OBJ;
2356 2367
2357 2368 if ((namelen & 1) || object == ZTEST_MICROZAP_OBJ) {
2358 2369 wsize = sizeof (txg);
2359 2370 wc = 1;
2360 2371 data = &txg;
2361 2372 } else {
2362 2373 wsize = 1;
2363 2374 wc = namelen;
2364 2375 data = string_value;
2365 2376 }
2366 2377
2367 2378 count = -1ULL;
2368 2379 VERIFY(zap_count(os, object, &count) == 0);
2369 2380 ASSERT(count != -1ULL);
2370 2381
2371 2382 /*
2372 2383 * Select an operation: length, lookup, add, update, remove.
2373 2384 */
2374 2385 i = ztest_random(5);
2375 2386
2376 2387 if (i >= 2) {
2377 2388 tx = dmu_tx_create(os);
2378 2389 dmu_tx_hold_zap(tx, object, TRUE, NULL);
2379 2390 error = dmu_tx_assign(tx, TXG_WAIT);
2380 2391 if (error) {
2381 2392 ztest_record_enospc("zap parallel");
2382 2393 dmu_tx_abort(tx);
2383 2394 return;
2384 2395 }
2385 2396 txg = dmu_tx_get_txg(tx);
2386 2397 bcopy(name, string_value, namelen);
2387 2398 } else {
2388 2399 tx = NULL;
2389 2400 txg = 0;
2390 2401 bzero(string_value, namelen);
2391 2402 }
2392 2403
2393 2404 switch (i) {
2394 2405
2395 2406 case 0:
2396 2407 error = zap_length(os, object, name, &zl_wsize, &zl_wc);
2397 2408 if (error == 0) {
2398 2409 ASSERT3U(wsize, ==, zl_wsize);
2399 2410 ASSERT3U(wc, ==, zl_wc);
2400 2411 } else {
2401 2412 ASSERT3U(error, ==, ENOENT);
2402 2413 }
2403 2414 break;
2404 2415
2405 2416 case 1:
2406 2417 error = zap_lookup(os, object, name, wsize, wc, data);
2407 2418 if (error == 0) {
2408 2419 if (data == string_value &&
2409 2420 bcmp(name, data, namelen) != 0)
2410 2421 fatal(0, "name '%s' != val '%s' len %d",
2411 2422 name, data, namelen);
2412 2423 } else {
2413 2424 ASSERT3U(error, ==, ENOENT);
2414 2425 }
2415 2426 break;
2416 2427
2417 2428 case 2:
2418 2429 error = zap_add(os, object, name, wsize, wc, data, tx);
2419 2430 ASSERT(error == 0 || error == EEXIST);
2420 2431 break;
2421 2432
2422 2433 case 3:
2423 2434 VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0);
2424 2435 break;
2425 2436
2426 2437 case 4:
2427 2438 error = zap_remove(os, object, name, tx);
2428 2439 ASSERT(error == 0 || error == ENOENT);
2429 2440 break;
2430 2441 }
2431 2442
2432 2443 if (tx != NULL)
2433 2444 dmu_tx_commit(tx);
2434 2445 }
2435 2446
2436 2447 void
2437 2448 ztest_dsl_prop_get_set(ztest_args_t *za)
2438 2449 {
2439 2450 objset_t *os = za->za_os;
2440 2451 int i, inherit;
|
↓ open down ↓ |
820 lines elided |
↑ open up ↑ |
2441 2452 uint64_t value;
2442 2453 const char *prop, *valname;
2443 2454 char setpoint[MAXPATHLEN];
2444 2455 char osname[MAXNAMELEN];
2445 2456 int error;
2446 2457
2447 2458 (void) rw_rdlock(&ztest_shared->zs_name_lock);
2448 2459
2449 2460 dmu_objset_name(os, osname);
2450 2461
2451 - for (i = 0; i < 2; i++) {
2462 + for (i = 0; i < 3; i++) {
2452 2463 if (i == 0) {
2453 2464 prop = "checksum";
2454 2465 value = ztest_random_checksum();
2455 2466 inherit = (value == ZIO_CHECKSUM_INHERIT);
2456 - } else {
2467 + } else if (i == 1) {
2457 2468 prop = "compression";
2458 2469 value = ztest_random_compress();
2459 2470 inherit = (value == ZIO_COMPRESS_INHERIT);
2471 + } else {
2472 + prop = "crypt";
2473 + value = ztest_random_crypt();
2474 + inherit = (value == ZIO_CRYPT_INHERIT);
2460 2475 }
2461 2476
2462 2477 error = dsl_prop_set(osname, prop, sizeof (value),
2463 2478 !inherit, &value);
2464 2479
2465 2480 if (error == ENOSPC) {
2466 2481 ztest_record_enospc("dsl_prop_set");
2467 2482 break;
2468 2483 }
2469 2484
2470 2485 ASSERT3U(error, ==, 0);
2471 2486
2472 2487 VERIFY3U(dsl_prop_get(osname, prop, sizeof (value),
2473 2488 1, &value, setpoint), ==, 0);
2474 2489
2475 - if (i == 0)
2490 + if (i == 0) {
2476 2491 valname = zio_checksum_table[value].ci_name;
2477 - else
2492 + } else if (i == 1) {
2478 2493 valname = zio_compress_table[value].ci_name;
2494 + } else {
2495 + valname = zio_crypt_table[value].ci_name;
2496 + }
2479 2497
2498 +
2480 2499 if (zopt_verbose >= 6) {
2481 2500 (void) printf("%s %s = %s for '%s'\n",
2482 2501 osname, prop, valname, setpoint);
2483 2502 }
2484 2503 }
2485 2504
2486 2505 (void) rw_unlock(&ztest_shared->zs_name_lock);
2487 2506 }
2488 2507
2489 2508 /*
2490 2509 * Inject random faults into the on-disk data.
2491 2510 */
2492 2511 void
2493 2512 ztest_fault_inject(ztest_args_t *za)
2494 2513 {
2495 2514 int fd;
2496 2515 uint64_t offset;
2497 2516 uint64_t leaves = MAX(zopt_mirrors, 1) * zopt_raidz;
2498 2517 uint64_t bad = 0x1990c0ffeedecade;
2499 2518 uint64_t top, leaf;
2500 2519 char path0[MAXPATHLEN];
2501 2520 char pathrand[MAXPATHLEN];
2502 2521 size_t fsize;
2503 2522 spa_t *spa = za->za_spa;
2504 2523 int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
2505 2524 int iters = 1000;
2506 2525 int maxfaults = zopt_maxfaults;
2507 2526 vdev_t *vd0 = NULL;
2508 2527 uint64_t guid0 = 0;
2509 2528
2510 2529 ASSERT(leaves >= 1);
2511 2530
2512 2531 /*
2513 2532 * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
2514 2533 */
2515 2534 spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
2516 2535
2517 2536 if (ztest_random(2) == 0) {
2518 2537 /*
2519 2538 * Inject errors on a normal data device.
2520 2539 */
2521 2540 top = ztest_random(spa->spa_root_vdev->vdev_children);
2522 2541 leaf = ztest_random(leaves);
2523 2542
2524 2543 /*
2525 2544 * Generate paths to the first leaf in this top-level vdev,
2526 2545 * and to the random leaf we selected. We'll induce transient
2527 2546 * write failures and random online/offline activity on leaf 0,
2528 2547 * and we'll write random garbage to the randomly chosen leaf.
2529 2548 */
2530 2549 (void) snprintf(path0, sizeof (path0), ztest_dev_template,
2531 2550 zopt_dir, zopt_pool, top * leaves + 0);
2532 2551 (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template,
2533 2552 zopt_dir, zopt_pool, top * leaves + leaf);
2534 2553
2535 2554 vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
2536 2555 if (vd0 != NULL && maxfaults != 1) {
2537 2556 /*
2538 2557 * Make vd0 explicitly claim to be unreadable,
2539 2558 * or unwriteable, or reach behind its back
2540 2559 * and close the underlying fd. We can do this if
2541 2560 * maxfaults == 0 because we'll fail and reexecute,
2542 2561 * and we can do it if maxfaults >= 2 because we'll
2543 2562 * have enough redundancy. If maxfaults == 1, the
2544 2563 * combination of this with injection of random data
2545 2564 * corruption below exceeds the pool's fault tolerance.
2546 2565 */
2547 2566 vdev_file_t *vf = vd0->vdev_tsd;
2548 2567
2549 2568 if (vf != NULL && ztest_random(3) == 0) {
2550 2569 (void) close(vf->vf_vnode->v_fd);
2551 2570 vf->vf_vnode->v_fd = -1;
2552 2571 } else if (ztest_random(2) == 0) {
2553 2572 vd0->vdev_cant_read = B_TRUE;
2554 2573 } else {
2555 2574 vd0->vdev_cant_write = B_TRUE;
2556 2575 }
2557 2576 guid0 = vd0->vdev_guid;
2558 2577 }
2559 2578 } else {
2560 2579 /*
2561 2580 * Inject errors on an l2cache device.
2562 2581 */
2563 2582 spa_aux_vdev_t *sav = &spa->spa_l2cache;
2564 2583
2565 2584 if (sav->sav_count == 0) {
2566 2585 spa_config_exit(spa, SCL_STATE, FTAG);
2567 2586 return;
2568 2587 }
2569 2588 vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
2570 2589 guid0 = vd0->vdev_guid;
2571 2590 (void) strcpy(path0, vd0->vdev_path);
2572 2591 (void) strcpy(pathrand, vd0->vdev_path);
2573 2592
2574 2593 leaf = 0;
2575 2594 leaves = 1;
2576 2595 maxfaults = INT_MAX; /* no limit on cache devices */
2577 2596 }
2578 2597
2579 2598 spa_config_exit(spa, SCL_STATE, FTAG);
2580 2599
2581 2600 if (maxfaults == 0)
2582 2601 return;
2583 2602
2584 2603 /*
2585 2604 * If we can tolerate two or more faults, randomly online/offline vd0.
2586 2605 */
2587 2606 if (maxfaults >= 2 && guid0 != 0) {
2588 2607 if (ztest_random(10) < 6) {
2589 2608 int flags = (ztest_random(2) == 0 ?
2590 2609 ZFS_OFFLINE_TEMPORARY : 0);
2591 2610 VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
2592 2611 } else {
2593 2612 (void) vdev_online(spa, guid0, 0, NULL);
2594 2613 }
2595 2614 }
2596 2615
2597 2616 /*
2598 2617 * We have at least single-fault tolerance, so inject data corruption.
2599 2618 */
2600 2619 fd = open(pathrand, O_RDWR);
2601 2620
2602 2621 if (fd == -1) /* we hit a gap in the device namespace */
2603 2622 return;
2604 2623
2605 2624 fsize = lseek(fd, 0, SEEK_END);
2606 2625
2607 2626 while (--iters != 0) {
2608 2627 offset = ztest_random(fsize / (leaves << bshift)) *
2609 2628 (leaves << bshift) + (leaf << bshift) +
2610 2629 (ztest_random(1ULL << (bshift - 1)) & -8ULL);
2611 2630
2612 2631 if (offset >= fsize)
2613 2632 continue;
2614 2633
2615 2634 if (zopt_verbose >= 6)
2616 2635 (void) printf("injecting bad word into %s,"
2617 2636 " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
2618 2637
2619 2638 if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
2620 2639 fatal(1, "can't inject bad word at 0x%llx in %s",
2621 2640 offset, pathrand);
2622 2641 }
2623 2642
2624 2643 (void) close(fd);
2625 2644 }
2626 2645
2627 2646 /*
2628 2647 * Scrub the pool.
2629 2648 */
2630 2649 void
2631 2650 ztest_scrub(ztest_args_t *za)
2632 2651 {
2633 2652 spa_t *spa = za->za_spa;
2634 2653
2635 2654 (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
2636 2655 (void) poll(NULL, 0, 1000); /* wait a second, then force a restart */
2637 2656 (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
2638 2657 }
2639 2658
2640 2659 /*
2641 2660 * Rename the pool to a different name and then rename it back.
2642 2661 */
2643 2662 void
2644 2663 ztest_spa_rename(ztest_args_t *za)
2645 2664 {
2646 2665 char *oldname, *newname;
2647 2666 int error;
2648 2667 spa_t *spa;
2649 2668
2650 2669 (void) rw_wrlock(&ztest_shared->zs_name_lock);
2651 2670
2652 2671 oldname = za->za_pool;
2653 2672 newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
2654 2673 (void) strcpy(newname, oldname);
2655 2674 (void) strcat(newname, "_tmp");
2656 2675
2657 2676 /*
2658 2677 * Do the rename
2659 2678 */
2660 2679 error = spa_rename(oldname, newname);
2661 2680 if (error)
2662 2681 fatal(0, "spa_rename('%s', '%s') = %d", oldname,
2663 2682 newname, error);
2664 2683
2665 2684 /*
2666 2685 * Try to open it under the old name, which shouldn't exist
2667 2686 */
2668 2687 error = spa_open(oldname, &spa, FTAG);
2669 2688 if (error != ENOENT)
2670 2689 fatal(0, "spa_open('%s') = %d", oldname, error);
2671 2690
2672 2691 /*
2673 2692 * Open it under the new name and make sure it's still the same spa_t.
2674 2693 */
2675 2694 error = spa_open(newname, &spa, FTAG);
2676 2695 if (error != 0)
2677 2696 fatal(0, "spa_open('%s') = %d", newname, error);
2678 2697
2679 2698 ASSERT(spa == za->za_spa);
2680 2699 spa_close(spa, FTAG);
2681 2700
2682 2701 /*
2683 2702 * Rename it back to the original
2684 2703 */
2685 2704 error = spa_rename(newname, oldname);
2686 2705 if (error)
2687 2706 fatal(0, "spa_rename('%s', '%s') = %d", newname,
2688 2707 oldname, error);
2689 2708
2690 2709 /*
2691 2710 * Make sure it can still be opened
2692 2711 */
2693 2712 error = spa_open(oldname, &spa, FTAG);
2694 2713 if (error != 0)
2695 2714 fatal(0, "spa_open('%s') = %d", oldname, error);
2696 2715
2697 2716 ASSERT(spa == za->za_spa);
2698 2717 spa_close(spa, FTAG);
2699 2718
2700 2719 umem_free(newname, strlen(newname) + 1);
2701 2720
2702 2721 (void) rw_unlock(&ztest_shared->zs_name_lock);
2703 2722 }
2704 2723
2705 2724
2706 2725 /*
2707 2726 * Completely obliterate one disk.
2708 2727 */
2709 2728 static void
2710 2729 ztest_obliterate_one_disk(uint64_t vdev)
2711 2730 {
2712 2731 int fd;
2713 2732 char dev_name[MAXPATHLEN], copy_name[MAXPATHLEN];
2714 2733 size_t fsize;
2715 2734
2716 2735 if (zopt_maxfaults < 2)
2717 2736 return;
2718 2737
2719 2738 (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
2720 2739 (void) snprintf(copy_name, MAXPATHLEN, "%s.old", dev_name);
2721 2740
2722 2741 fd = open(dev_name, O_RDWR);
2723 2742
2724 2743 if (fd == -1)
2725 2744 fatal(1, "can't open %s", dev_name);
2726 2745
2727 2746 /*
2728 2747 * Determine the size.
2729 2748 */
2730 2749 fsize = lseek(fd, 0, SEEK_END);
2731 2750
2732 2751 (void) close(fd);
2733 2752
2734 2753 /*
2735 2754 * Rename the old device to dev_name.old (useful for debugging).
2736 2755 */
2737 2756 VERIFY(rename(dev_name, copy_name) == 0);
2738 2757
2739 2758 /*
2740 2759 * Create a new one.
2741 2760 */
2742 2761 VERIFY((fd = open(dev_name, O_RDWR | O_CREAT | O_TRUNC, 0666)) >= 0);
2743 2762 VERIFY(ftruncate(fd, fsize) == 0);
2744 2763 (void) close(fd);
2745 2764 }
2746 2765
2747 2766 static void
2748 2767 ztest_replace_one_disk(spa_t *spa, uint64_t vdev)
2749 2768 {
2750 2769 char dev_name[MAXPATHLEN];
2751 2770 nvlist_t *root;
2752 2771 int error;
2753 2772 uint64_t guid;
2754 2773 vdev_t *vd;
2755 2774
2756 2775 (void) sprintf(dev_name, ztest_dev_template, zopt_dir, zopt_pool, vdev);
2757 2776
2758 2777 /*
2759 2778 * Build the nvlist describing dev_name.
2760 2779 */
2761 2780 root = make_vdev_root(dev_name, NULL, 0, 0, 0, 0, 0, 1);
2762 2781
2763 2782 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2764 2783 if ((vd = vdev_lookup_by_path(spa->spa_root_vdev, dev_name)) == NULL)
2765 2784 guid = 0;
2766 2785 else
2767 2786 guid = vd->vdev_guid;
2768 2787 spa_config_exit(spa, SCL_VDEV, FTAG);
2769 2788 error = spa_vdev_attach(spa, guid, root, B_TRUE);
2770 2789 if (error != 0 &&
2771 2790 error != EBUSY &&
2772 2791 error != ENOTSUP &&
2773 2792 error != ENODEV &&
2774 2793 error != EDOM)
2775 2794 fatal(0, "spa_vdev_attach(in-place) = %d", error);
2776 2795
2777 2796 nvlist_free(root);
2778 2797 }
2779 2798
2780 2799 static void
2781 2800 ztest_verify_blocks(char *pool)
2782 2801 {
2783 2802 int status;
2784 2803 char zdb[MAXPATHLEN + MAXNAMELEN + 20];
2785 2804 char zbuf[1024];
2786 2805 char *bin;
2787 2806 char *ztest;
2788 2807 char *isa;
2789 2808 int isalen;
2790 2809 FILE *fp;
2791 2810
2792 2811 (void) realpath(getexecname(), zdb);
2793 2812
2794 2813 /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */
2795 2814 bin = strstr(zdb, "/usr/bin/");
2796 2815 ztest = strstr(bin, "/ztest");
2797 2816 isa = bin + 8;
2798 2817 isalen = ztest - isa;
2799 2818 isa = strdup(isa);
2800 2819 /* LINTED */
2801 2820 (void) sprintf(bin,
2802 2821 "/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache %s",
2803 2822 isalen,
2804 2823 isa,
2805 2824 zopt_verbose >= 3 ? "s" : "",
2806 2825 zopt_verbose >= 4 ? "v" : "",
2807 2826 pool);
2808 2827 free(isa);
2809 2828
2810 2829 if (zopt_verbose >= 5)
2811 2830 (void) printf("Executing %s\n", strstr(zdb, "zdb "));
2812 2831
2813 2832 fp = popen(zdb, "r");
2814 2833
2815 2834 while (fgets(zbuf, sizeof (zbuf), fp) != NULL)
2816 2835 if (zopt_verbose >= 3)
2817 2836 (void) printf("%s", zbuf);
2818 2837
2819 2838 status = pclose(fp);
2820 2839
2821 2840 if (status == 0)
2822 2841 return;
2823 2842
2824 2843 ztest_dump_core = 0;
2825 2844 if (WIFEXITED(status))
2826 2845 fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status));
2827 2846 else
2828 2847 fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status));
2829 2848 }
2830 2849
2831 2850 static void
2832 2851 ztest_walk_pool_directory(char *header)
2833 2852 {
2834 2853 spa_t *spa = NULL;
2835 2854
2836 2855 if (zopt_verbose >= 6)
2837 2856 (void) printf("%s\n", header);
2838 2857
2839 2858 mutex_enter(&spa_namespace_lock);
2840 2859 while ((spa = spa_next(spa)) != NULL)
2841 2860 if (zopt_verbose >= 6)
2842 2861 (void) printf("\t%s\n", spa_name(spa));
2843 2862 mutex_exit(&spa_namespace_lock);
2844 2863 }
2845 2864
2846 2865 static void
2847 2866 ztest_spa_import_export(char *oldname, char *newname)
2848 2867 {
2849 2868 nvlist_t *config, *newconfig;
2850 2869 uint64_t pool_guid;
2851 2870 spa_t *spa;
2852 2871 int error;
2853 2872
2854 2873 if (zopt_verbose >= 4) {
2855 2874 (void) printf("import/export: old = %s, new = %s\n",
2856 2875 oldname, newname);
2857 2876 }
2858 2877
2859 2878 /*
2860 2879 * Clean up from previous runs.
2861 2880 */
2862 2881 (void) spa_destroy(newname);
2863 2882
2864 2883 /*
2865 2884 * Get the pool's configuration and guid.
2866 2885 */
2867 2886 error = spa_open(oldname, &spa, FTAG);
2868 2887 if (error)
2869 2888 fatal(0, "spa_open('%s') = %d", oldname, error);
2870 2889
2871 2890 /*
2872 2891 * Kick off a scrub to tickle scrub/export races.
2873 2892 */
2874 2893 if (ztest_random(2) == 0)
2875 2894 (void) spa_scrub(spa, POOL_SCRUB_EVERYTHING);
2876 2895
2877 2896 pool_guid = spa_guid(spa);
2878 2897 spa_close(spa, FTAG);
2879 2898
2880 2899 ztest_walk_pool_directory("pools before export");
2881 2900
2882 2901 /*
2883 2902 * Export it.
2884 2903 */
2885 2904 error = spa_export(oldname, &config, B_FALSE, B_FALSE);
2886 2905 if (error)
2887 2906 fatal(0, "spa_export('%s') = %d", oldname, error);
2888 2907
2889 2908 ztest_walk_pool_directory("pools after export");
2890 2909
2891 2910 /*
2892 2911 * Try to import it.
2893 2912 */
2894 2913 newconfig = spa_tryimport(config);
2895 2914 ASSERT(newconfig != NULL);
2896 2915 nvlist_free(newconfig);
2897 2916
2898 2917 /*
2899 2918 * Import it under the new name.
2900 2919 */
2901 2920 error = spa_import(newname, config, NULL);
2902 2921 if (error)
2903 2922 fatal(0, "spa_import('%s') = %d", newname, error);
2904 2923
2905 2924 ztest_walk_pool_directory("pools after import");
2906 2925
2907 2926 /*
2908 2927 * Try to import it again -- should fail with EEXIST.
2909 2928 */
2910 2929 error = spa_import(newname, config, NULL);
2911 2930 if (error != EEXIST)
2912 2931 fatal(0, "spa_import('%s') twice", newname);
2913 2932
2914 2933 /*
2915 2934 * Try to import it under a different name -- should fail with EEXIST.
2916 2935 */
2917 2936 error = spa_import(oldname, config, NULL);
2918 2937 if (error != EEXIST)
2919 2938 fatal(0, "spa_import('%s') under multiple names", newname);
2920 2939
2921 2940 /*
2922 2941 * Verify that the pool is no longer visible under the old name.
2923 2942 */
2924 2943 error = spa_open(oldname, &spa, FTAG);
2925 2944 if (error != ENOENT)
2926 2945 fatal(0, "spa_open('%s') = %d", newname, error);
2927 2946
2928 2947 /*
2929 2948 * Verify that we can open and close the pool using the new name.
2930 2949 */
2931 2950 error = spa_open(newname, &spa, FTAG);
2932 2951 if (error)
2933 2952 fatal(0, "spa_open('%s') = %d", newname, error);
2934 2953 ASSERT(pool_guid == spa_guid(spa));
2935 2954 spa_close(spa, FTAG);
2936 2955
2937 2956 nvlist_free(config);
2938 2957 }
2939 2958
2940 2959 static void
2941 2960 ztest_resume(spa_t *spa)
2942 2961 {
2943 2962 if (spa_suspended(spa)) {
2944 2963 spa_vdev_state_enter(spa);
2945 2964 vdev_clear(spa, NULL);
2946 2965 (void) spa_vdev_state_exit(spa, NULL, 0);
2947 2966 zio_resume(spa);
2948 2967 }
2949 2968 }
2950 2969
2951 2970 static void *
2952 2971 ztest_resume_thread(void *arg)
2953 2972 {
2954 2973 spa_t *spa = arg;
2955 2974
2956 2975 while (!ztest_exiting) {
2957 2976 (void) poll(NULL, 0, 1000);
2958 2977 ztest_resume(spa);
2959 2978 }
2960 2979 return (NULL);
2961 2980 }
2962 2981
2963 2982 static void *
2964 2983 ztest_thread(void *arg)
2965 2984 {
2966 2985 ztest_args_t *za = arg;
2967 2986 ztest_shared_t *zs = ztest_shared;
2968 2987 hrtime_t now, functime;
2969 2988 ztest_info_t *zi;
2970 2989 int f, i;
2971 2990
2972 2991 while ((now = gethrtime()) < za->za_stop) {
2973 2992 /*
2974 2993 * See if it's time to force a crash.
2975 2994 */
2976 2995 if (now > za->za_kill) {
2977 2996 zs->zs_alloc = spa_get_alloc(za->za_spa);
2978 2997 zs->zs_space = spa_get_space(za->za_spa);
2979 2998 (void) kill(getpid(), SIGKILL);
2980 2999 }
2981 3000
2982 3001 /*
2983 3002 * Pick a random function.
2984 3003 */
2985 3004 f = ztest_random(ZTEST_FUNCS);
2986 3005 zi = &zs->zs_info[f];
2987 3006
2988 3007 /*
2989 3008 * Decide whether to call it, based on the requested frequency.
2990 3009 */
2991 3010 if (zi->zi_call_target == 0 ||
2992 3011 (double)zi->zi_call_total / zi->zi_call_target >
2993 3012 (double)(now - zs->zs_start_time) / (zopt_time * NANOSEC))
2994 3013 continue;
2995 3014
2996 3015 atomic_add_64(&zi->zi_calls, 1);
2997 3016 atomic_add_64(&zi->zi_call_total, 1);
2998 3017
2999 3018 za->za_diroff = (za->za_instance * ZTEST_FUNCS + f) *
3000 3019 ZTEST_DIRSIZE;
3001 3020 za->za_diroff_shared = (1ULL << 63);
3002 3021
3003 3022 for (i = 0; i < zi->zi_iters; i++)
3004 3023 zi->zi_func(za);
3005 3024
3006 3025 functime = gethrtime() - now;
3007 3026
3008 3027 atomic_add_64(&zi->zi_call_time, functime);
3009 3028
3010 3029 if (zopt_verbose >= 4) {
3011 3030 Dl_info dli;
3012 3031 (void) dladdr((void *)zi->zi_func, &dli);
3013 3032 (void) printf("%6.2f sec in %s\n",
3014 3033 (double)functime / NANOSEC, dli.dli_sname);
3015 3034 }
3016 3035
3017 3036 /*
3018 3037 * If we're getting ENOSPC with some regularity, stop.
3019 3038 */
3020 3039 if (zs->zs_enospc_count > 10)
3021 3040 break;
3022 3041 }
3023 3042
3024 3043 return (NULL);
3025 3044 }
3026 3045
3027 3046 /*
3028 3047 * Kick off threads to run tests on all datasets in parallel.
3029 3048 */
3030 3049 static void
3031 3050 ztest_run(char *pool)
3032 3051 {
3033 3052 int t, d, error;
3034 3053 ztest_shared_t *zs = ztest_shared;
3035 3054 ztest_args_t *za;
3036 3055 spa_t *spa;
3037 3056 char name[100];
3038 3057 thread_t resume_tid;
3039 3058
3040 3059 ztest_exiting = B_FALSE;
3041 3060
3042 3061 (void) _mutex_init(&zs->zs_vdev_lock, USYNC_THREAD, NULL);
3043 3062 (void) rwlock_init(&zs->zs_name_lock, USYNC_THREAD, NULL);
3044 3063
3045 3064 for (t = 0; t < ZTEST_SYNC_LOCKS; t++)
3046 3065 (void) _mutex_init(&zs->zs_sync_lock[t], USYNC_THREAD, NULL);
3047 3066
3048 3067 /*
3049 3068 * Destroy one disk before we even start.
3050 3069 * It's mirrored, so everything should work just fine.
3051 3070 * This makes us exercise fault handling very early in spa_load().
3052 3071 */
3053 3072 ztest_obliterate_one_disk(0);
3054 3073
3055 3074 /*
3056 3075 * Verify that the sum of the sizes of all blocks in the pool
3057 3076 * equals the SPA's allocated space total.
3058 3077 */
3059 3078 ztest_verify_blocks(pool);
3060 3079
3061 3080 /*
3062 3081 * Kick off a replacement of the disk we just obliterated.
3063 3082 */
3064 3083 kernel_init(FREAD | FWRITE);
3065 3084 VERIFY(spa_open(pool, &spa, FTAG) == 0);
3066 3085 ztest_replace_one_disk(spa, 0);
3067 3086 if (zopt_verbose >= 5)
3068 3087 show_pool_stats(spa);
3069 3088 spa_close(spa, FTAG);
3070 3089 kernel_fini();
3071 3090
3072 3091 kernel_init(FREAD | FWRITE);
3073 3092
3074 3093 /*
3075 3094 * Verify that we can export the pool and reimport it under a
3076 3095 * different name.
3077 3096 */
3078 3097 if (ztest_random(2) == 0) {
3079 3098 (void) snprintf(name, 100, "%s_import", pool);
3080 3099 ztest_spa_import_export(pool, name);
3081 3100 ztest_spa_import_export(name, pool);
3082 3101 }
3083 3102
3084 3103 /*
3085 3104 * Verify that we can loop over all pools.
3086 3105 */
3087 3106 mutex_enter(&spa_namespace_lock);
3088 3107 for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa)) {
3089 3108 if (zopt_verbose > 3) {
3090 3109 (void) printf("spa_next: found %s\n", spa_name(spa));
3091 3110 }
3092 3111 }
3093 3112 mutex_exit(&spa_namespace_lock);
3094 3113
3095 3114 /*
3096 3115 * Open our pool.
3097 3116 */
3098 3117 VERIFY(spa_open(pool, &spa, FTAG) == 0);
3099 3118
3100 3119 /*
3101 3120 * We don't expect the pool to suspend unless maxfaults == 0,
3102 3121 * in which case ztest_fault_inject() temporarily takes away
3103 3122 * the only valid replica.
3104 3123 */
3105 3124 if (zopt_maxfaults == 0)
3106 3125 spa->spa_failmode = ZIO_FAILURE_MODE_WAIT;
3107 3126 else
3108 3127 spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
3109 3128
3110 3129 /*
3111 3130 * Create a thread to periodically resume suspended I/O.
3112 3131 */
3113 3132 VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND,
3114 3133 &resume_tid) == 0);
3115 3134
3116 3135 /*
3117 3136 * Verify that we can safely inquire about about any object,
3118 3137 * whether it's allocated or not. To make it interesting,
3119 3138 * we probe a 5-wide window around each power of two.
3120 3139 * This hits all edge cases, including zero and the max.
3121 3140 */
3122 3141 for (t = 0; t < 64; t++) {
3123 3142 for (d = -5; d <= 5; d++) {
3124 3143 error = dmu_object_info(spa->spa_meta_objset,
3125 3144 (1ULL << t) + d, NULL);
3126 3145 ASSERT(error == 0 || error == ENOENT ||
3127 3146 error == EINVAL);
3128 3147 }
3129 3148 }
3130 3149
3131 3150 /*
3132 3151 * Now kick off all the tests that run in parallel.
3133 3152 */
3134 3153 zs->zs_enospc_count = 0;
3135 3154
3136 3155 za = umem_zalloc(zopt_threads * sizeof (ztest_args_t), UMEM_NOFAIL);
3137 3156
3138 3157 if (zopt_verbose >= 4)
3139 3158 (void) printf("starting main threads...\n");
3140 3159
3141 3160 za[0].za_start = gethrtime();
3142 3161 za[0].za_stop = za[0].za_start + zopt_passtime * NANOSEC;
3143 3162 za[0].za_stop = MIN(za[0].za_stop, zs->zs_stop_time);
3144 3163 za[0].za_kill = za[0].za_stop;
3145 3164 if (ztest_random(100) < zopt_killrate)
3146 3165 za[0].za_kill -= ztest_random(zopt_passtime * NANOSEC);
3147 3166
3148 3167 for (t = 0; t < zopt_threads; t++) {
3149 3168 d = t % zopt_datasets;
3150 3169
3151 3170 (void) strcpy(za[t].za_pool, pool);
3152 3171 za[t].za_os = za[d].za_os;
3153 3172 za[t].za_spa = spa;
3154 3173 za[t].za_zilog = za[d].za_zilog;
3155 3174 za[t].za_instance = t;
3156 3175 za[t].za_random = ztest_random(-1ULL);
3157 3176 za[t].za_start = za[0].za_start;
3158 3177 za[t].za_stop = za[0].za_stop;
3159 3178 za[t].za_kill = za[0].za_kill;
3160 3179
3161 3180 if (t < zopt_datasets) {
3162 3181 int test_future = FALSE;
3163 3182 (void) rw_rdlock(&ztest_shared->zs_name_lock);
3164 3183 (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
3165 3184 error = dmu_objset_create(name, DMU_OST_OTHER, NULL, 0,
3166 3185 ztest_create_cb, NULL);
3167 3186 if (error == EEXIST) {
3168 3187 test_future = TRUE;
3169 3188 } else if (error == ENOSPC) {
3170 3189 zs->zs_enospc_count++;
3171 3190 (void) rw_unlock(&ztest_shared->zs_name_lock);
3172 3191 break;
3173 3192 } else if (error != 0) {
3174 3193 fatal(0, "dmu_objset_create(%s) = %d",
3175 3194 name, error);
3176 3195 }
3177 3196 error = dmu_objset_open(name, DMU_OST_OTHER,
3178 3197 DS_MODE_USER, &za[d].za_os);
3179 3198 if (error)
3180 3199 fatal(0, "dmu_objset_open('%s') = %d",
3181 3200 name, error);
3182 3201 (void) rw_unlock(&ztest_shared->zs_name_lock);
3183 3202 if (test_future)
3184 3203 ztest_dmu_check_future_leak(&za[t]);
3185 3204 zil_replay(za[d].za_os, za[d].za_os,
3186 3205 ztest_replay_vector);
3187 3206 za[d].za_zilog = zil_open(za[d].za_os, NULL);
3188 3207 }
3189 3208
3190 3209 VERIFY(thr_create(0, 0, ztest_thread, &za[t], THR_BOUND,
3191 3210 &za[t].za_thread) == 0);
3192 3211 }
3193 3212
3194 3213 while (--t >= 0) {
3195 3214 VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0);
3196 3215 if (t < zopt_datasets) {
3197 3216 zil_close(za[t].za_zilog);
3198 3217 dmu_objset_close(za[t].za_os);
3199 3218 }
3200 3219 }
3201 3220
3202 3221 if (zopt_verbose >= 3)
3203 3222 show_pool_stats(spa);
3204 3223
3205 3224 txg_wait_synced(spa_get_dsl(spa), 0);
3206 3225
3207 3226 zs->zs_alloc = spa_get_alloc(spa);
3208 3227 zs->zs_space = spa_get_space(spa);
3209 3228
3210 3229 /*
3211 3230 * If we had out-of-space errors, destroy a random objset.
3212 3231 */
3213 3232 if (zs->zs_enospc_count != 0) {
3214 3233 (void) rw_rdlock(&ztest_shared->zs_name_lock);
3215 3234 d = (int)ztest_random(zopt_datasets);
3216 3235 (void) snprintf(name, 100, "%s/%s_%d", pool, pool, d);
3217 3236 if (zopt_verbose >= 3)
3218 3237 (void) printf("Destroying %s to free up space\n", name);
3219 3238 (void) dmu_objset_find(name, ztest_destroy_cb, &za[d],
3220 3239 DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
3221 3240 (void) rw_unlock(&ztest_shared->zs_name_lock);
3222 3241 }
3223 3242
3224 3243 txg_wait_synced(spa_get_dsl(spa), 0);
3225 3244
3226 3245 umem_free(za, zopt_threads * sizeof (ztest_args_t));
3227 3246
3228 3247 /* Kill the resume thread */
3229 3248 ztest_exiting = B_TRUE;
3230 3249 VERIFY(thr_join(resume_tid, NULL, NULL) == 0);
3231 3250 ztest_resume(spa);
3232 3251
3233 3252 /*
3234 3253 * Right before closing the pool, kick off a bunch of async I/O;
3235 3254 * spa_close() should wait for it to complete.
3236 3255 */
3237 3256 for (t = 1; t < 50; t++)
3238 3257 dmu_prefetch(spa->spa_meta_objset, t, 0, 1 << 15);
3239 3258
3240 3259 spa_close(spa, FTAG);
3241 3260
3242 3261 kernel_fini();
3243 3262 }
3244 3263
3245 3264 void
3246 3265 print_time(hrtime_t t, char *timebuf)
3247 3266 {
3248 3267 hrtime_t s = t / NANOSEC;
3249 3268 hrtime_t m = s / 60;
3250 3269 hrtime_t h = m / 60;
3251 3270 hrtime_t d = h / 24;
3252 3271
3253 3272 s -= m * 60;
3254 3273 m -= h * 60;
3255 3274 h -= d * 24;
3256 3275
3257 3276 timebuf[0] = '\0';
3258 3277
3259 3278 if (d)
3260 3279 (void) sprintf(timebuf,
3261 3280 "%llud%02lluh%02llum%02llus", d, h, m, s);
3262 3281 else if (h)
3263 3282 (void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
3264 3283 else if (m)
3265 3284 (void) sprintf(timebuf, "%llum%02llus", m, s);
3266 3285 else
3267 3286 (void) sprintf(timebuf, "%llus", s);
3268 3287 }
3269 3288
3270 3289 /*
3271 3290 * Create a storage pool with the given name and initial vdev size.
3272 3291 * Then create the specified number of datasets in the pool.
3273 3292 */
3274 3293 static void
3275 3294 ztest_init(char *pool)
3276 3295 {
3277 3296 spa_t *spa;
3278 3297 int error;
3279 3298 nvlist_t *nvroot;
3280 3299
3281 3300 kernel_init(FREAD | FWRITE);
3282 3301
3283 3302 /*
3284 3303 * Create the storage pool.
3285 3304 */
3286 3305 (void) spa_destroy(pool);
3287 3306 ztest_shared->zs_vdev_primaries = 0;
3288 3307 nvroot = make_vdev_root(NULL, NULL, zopt_vdev_size, 0,
3289 3308 0, zopt_raidz, zopt_mirrors, 1);
3290 3309 error = spa_create(pool, nvroot, NULL, NULL, NULL);
3291 3310 nvlist_free(nvroot);
3292 3311
3293 3312 if (error)
3294 3313 fatal(0, "spa_create() = %d", error);
3295 3314 error = spa_open(pool, &spa, FTAG);
3296 3315 if (error)
3297 3316 fatal(0, "spa_open() = %d", error);
3298 3317
3299 3318 if (zopt_verbose >= 3)
3300 3319 show_pool_stats(spa);
3301 3320
3302 3321 spa_close(spa, FTAG);
3303 3322
3304 3323 kernel_fini();
3305 3324 }
3306 3325
3307 3326 int
3308 3327 main(int argc, char **argv)
3309 3328 {
3310 3329 int kills = 0;
3311 3330 int iters = 0;
3312 3331 int i, f;
3313 3332 ztest_shared_t *zs;
3314 3333 ztest_info_t *zi;
3315 3334 char timebuf[100];
3316 3335 char numbuf[6];
3317 3336
3318 3337 (void) setvbuf(stdout, NULL, _IOLBF, 0);
3319 3338
3320 3339 /* Override location of zpool.cache */
3321 3340 spa_config_path = "/tmp/zpool.cache";
3322 3341
3323 3342 ztest_random_fd = open("/dev/urandom", O_RDONLY);
3324 3343
3325 3344 process_options(argc, argv);
3326 3345
3327 3346 /*
3328 3347 * Blow away any existing copy of zpool.cache
3329 3348 */
3330 3349 if (zopt_init != 0)
3331 3350 (void) remove("/tmp/zpool.cache");
3332 3351
3333 3352 zs = ztest_shared = (void *)mmap(0,
3334 3353 P2ROUNDUP(sizeof (ztest_shared_t), getpagesize()),
3335 3354 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
3336 3355
3337 3356 if (zopt_verbose >= 1) {
3338 3357 (void) printf("%llu vdevs, %d datasets, %d threads,"
3339 3358 " %llu seconds...\n",
3340 3359 (u_longlong_t)zopt_vdevs, zopt_datasets, zopt_threads,
3341 3360 (u_longlong_t)zopt_time);
3342 3361 }
3343 3362
3344 3363 /*
3345 3364 * Create and initialize our storage pool.
3346 3365 */
3347 3366 for (i = 1; i <= zopt_init; i++) {
3348 3367 bzero(zs, sizeof (ztest_shared_t));
3349 3368 if (zopt_verbose >= 3 && zopt_init != 1)
3350 3369 (void) printf("ztest_init(), pass %d\n", i);
3351 3370 ztest_init(zopt_pool);
3352 3371 }
3353 3372
3354 3373 /*
3355 3374 * Initialize the call targets for each function.
3356 3375 */
3357 3376 for (f = 0; f < ZTEST_FUNCS; f++) {
3358 3377 zi = &zs->zs_info[f];
3359 3378
3360 3379 *zi = ztest_info[f];
3361 3380
3362 3381 if (*zi->zi_interval == 0)
3363 3382 zi->zi_call_target = UINT64_MAX;
3364 3383 else
3365 3384 zi->zi_call_target = zopt_time / *zi->zi_interval;
3366 3385 }
3367 3386
3368 3387 zs->zs_start_time = gethrtime();
3369 3388 zs->zs_stop_time = zs->zs_start_time + zopt_time * NANOSEC;
3370 3389
3371 3390 /*
3372 3391 * Run the tests in a loop. These tests include fault injection
3373 3392 * to verify that self-healing data works, and forced crashes
3374 3393 * to verify that we never lose on-disk consistency.
3375 3394 */
3376 3395 while (gethrtime() < zs->zs_stop_time) {
3377 3396 int status;
3378 3397 pid_t pid;
3379 3398 char *tmp;
3380 3399
3381 3400 /*
3382 3401 * Initialize the workload counters for each function.
3383 3402 */
3384 3403 for (f = 0; f < ZTEST_FUNCS; f++) {
3385 3404 zi = &zs->zs_info[f];
3386 3405 zi->zi_calls = 0;
3387 3406 zi->zi_call_time = 0;
3388 3407 }
3389 3408
3390 3409 pid = fork();
3391 3410
3392 3411 if (pid == -1)
3393 3412 fatal(1, "fork failed");
3394 3413
3395 3414 if (pid == 0) { /* child */
3396 3415 struct rlimit rl = { 1024, 1024 };
3397 3416 (void) setrlimit(RLIMIT_NOFILE, &rl);
3398 3417 (void) enable_extended_FILE_stdio(-1, -1);
3399 3418 ztest_run(zopt_pool);
3400 3419 exit(0);
3401 3420 }
3402 3421
3403 3422 while (waitpid(pid, &status, 0) != pid)
3404 3423 continue;
3405 3424
3406 3425 if (WIFEXITED(status)) {
3407 3426 if (WEXITSTATUS(status) != 0) {
3408 3427 (void) fprintf(stderr,
3409 3428 "child exited with code %d\n",
3410 3429 WEXITSTATUS(status));
3411 3430 exit(2);
3412 3431 }
3413 3432 } else if (WIFSIGNALED(status)) {
3414 3433 if (WTERMSIG(status) != SIGKILL) {
3415 3434 (void) fprintf(stderr,
3416 3435 "child died with signal %d\n",
3417 3436 WTERMSIG(status));
3418 3437 exit(3);
3419 3438 }
3420 3439 kills++;
3421 3440 } else {
3422 3441 (void) fprintf(stderr, "something strange happened "
3423 3442 "to child\n");
3424 3443 exit(4);
3425 3444 }
3426 3445
3427 3446 iters++;
3428 3447
3429 3448 if (zopt_verbose >= 1) {
3430 3449 hrtime_t now = gethrtime();
3431 3450
3432 3451 now = MIN(now, zs->zs_stop_time);
3433 3452 print_time(zs->zs_stop_time - now, timebuf);
3434 3453 nicenum(zs->zs_space, numbuf);
3435 3454
3436 3455 (void) printf("Pass %3d, %8s, %3llu ENOSPC, "
3437 3456 "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
3438 3457 iters,
3439 3458 WIFEXITED(status) ? "Complete" : "SIGKILL",
3440 3459 (u_longlong_t)zs->zs_enospc_count,
3441 3460 100.0 * zs->zs_alloc / zs->zs_space,
3442 3461 numbuf,
3443 3462 100.0 * (now - zs->zs_start_time) /
3444 3463 (zopt_time * NANOSEC), timebuf);
3445 3464 }
3446 3465
3447 3466 if (zopt_verbose >= 2) {
3448 3467 (void) printf("\nWorkload summary:\n\n");
3449 3468 (void) printf("%7s %9s %s\n",
3450 3469 "Calls", "Time", "Function");
3451 3470 (void) printf("%7s %9s %s\n",
3452 3471 "-----", "----", "--------");
3453 3472 for (f = 0; f < ZTEST_FUNCS; f++) {
3454 3473 Dl_info dli;
3455 3474
3456 3475 zi = &zs->zs_info[f];
3457 3476 print_time(zi->zi_call_time, timebuf);
3458 3477 (void) dladdr((void *)zi->zi_func, &dli);
3459 3478 (void) printf("%7llu %9s %s\n",
3460 3479 (u_longlong_t)zi->zi_calls, timebuf,
3461 3480 dli.dli_sname);
3462 3481 }
3463 3482 (void) printf("\n");
3464 3483 }
3465 3484
3466 3485 /*
3467 3486 * It's possible that we killed a child during a rename test, in
3468 3487 * which case we'll have a 'ztest_tmp' pool lying around instead
3469 3488 * of 'ztest'. Do a blind rename in case this happened.
3470 3489 */
3471 3490 tmp = umem_alloc(strlen(zopt_pool) + 5, UMEM_NOFAIL);
3472 3491 (void) strcpy(tmp, zopt_pool);
3473 3492 (void) strcat(tmp, "_tmp");
3474 3493 kernel_init(FREAD | FWRITE);
3475 3494 (void) spa_rename(tmp, zopt_pool);
3476 3495 kernel_fini();
3477 3496 umem_free(tmp, strlen(tmp) + 1);
3478 3497 }
3479 3498
3480 3499 ztest_verify_blocks(zopt_pool);
3481 3500
3482 3501 if (zopt_verbose >= 1) {
3483 3502 (void) printf("%d killed, %d completed, %.0f%% kill rate\n",
3484 3503 kills, iters - kills, (100.0 * kills) / MAX(1, iters));
3485 3504 }
3486 3505
3487 3506 return (0);
3488 3507 }
|
↓ open down ↓ |
999 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX