--- old/usr/src/cmd/mdb/common/modules/zfs/zfs.c Tue Feb 3 13:17:11 2009 +++ new/usr/src/cmd/mdb/common/modules/zfs/zfs.c Tue Feb 3 13:17:10 2009 @@ -408,6 +408,7 @@ dmu_object_type_info_t *doti; zio_compress_info_t *zct; zio_checksum_info_t *zci; + zio_crypt_info_t *zcc; int i; char buf[MAXPATHLEN]; @@ -437,6 +438,13 @@ zct[i].ci_name = local_strdup(buf); } + if (read_symbol("zio_crypt_table", (void **)&zcc) != DCMD_OK) + return (DCMD_ERR); + for (i = 0; i < ZIO_CRYPT_FUNCTIONS; i++) { + mdb_readstr(buf, sizeof (buf), (uintptr_t)zcc[i].ci_name); + zcc[i].ci_name = local_strdup(buf); + } + /* * Super-ick warning: This code is also duplicated in * cmd/zdb.c . Yeah, I hate code replication, too. @@ -463,9 +471,10 @@ doti[BP_GET_TYPE(&bp)].ot_name); mdb_printf("BIRTH: %-16llx LEVEL: %-2d\tFILL: %llx\n", bp.blk_birth, BP_GET_LEVEL(&bp), bp.blk_fill); - mdb_printf("CKFUNC: %-16s\t\tCOMP: %s\n", + mdb_printf("CKFUNC: %-16s\tCOMP: %s\tCRYPT: %s\n", zci[BP_GET_CHECKSUM(&bp)].ci_name, - zct[BP_GET_COMPRESS(&bp)].ci_name); + zct[BP_GET_COMPRESS(&bp)].ci_name, + zcc[BP_GET_CRYPT(&bp)].ci_name); mdb_printf("CKSUM: %llx:%llx:%llx:%llx\n", bp.blk_cksum.zc_word[0], bp.blk_cksum.zc_word[1], --- old/usr/src/cmd/svc/milestone/devices-local Tue Feb 3 13:17:12 2009 +++ new/usr/src/cmd/svc/milestone/devices-local Tue Feb 3 13:17:11 2009 @@ -20,14 +20,12 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T. # All rights reserved. # -# -# ident "%Z%%M% %I% %E% SMI" # Initiate the device reconfiguration process in case we need some # device links established so that we can successfully perform our @@ -82,6 +80,7 @@ # Create any zvol devices if [ -x /usr/sbin/zfs ]; then + /usr/sbin/zpool key -l -a /usr/sbin/zfs volinit || exit $SMF_EXIT_ERR_FATAL # # Add swap again to allow for swapping to zvols. --- old/usr/src/cmd/truss/codes.c Tue Feb 3 13:17:14 2009 +++ new/usr/src/cmd/truss/codes.c Tue Feb 3 13:17:13 2009 @@ -1064,6 +1064,8 @@ "zfs_cmd_t" }, { (uint_t)ZFS_IOC_INHERIT_PROP, "ZFS_IOC_INHERIT_PROP", "zfs_cmd_t" }, + { (uint_t)ZFS_IOC_CRYPTO, "ZFS_IOC_CRYPTO", + "zfs_cmd_t" }, /* kssl ioctls */ { (uint_t)KSSL_ADD_ENTRY, "KSSL_ADD_ENTRY", --- old/usr/src/cmd/zdb/zdb.c Tue Feb 3 13:17:16 2009 +++ new/usr/src/cmd/zdb/zdb.c Tue Feb 3 13:17:15 2009 @@ -54,6 +54,7 @@ #undef ZFS_MAXNAMELEN #undef verify #include +#include const char cmdname[] = "zdb"; uint8_t dump_opt[256]; @@ -65,7 +66,7 @@ int zopt_objects = 0; libzfs_handle_t *g_zfs; boolean_t zdb_sig_user_data = B_TRUE; -int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256; +int zdb_sig_cksumalg = ZIO_CHECKSUM_FUNCTIONS - 1; /* * These libumem hooks provide a reasonable set of defaults for the allocator's @@ -1072,6 +1073,11 @@ zio_compress_table[doi.doi_compress].ci_name); } + if (doi.doi_crypt != ZIO_CRYPT_INHERIT || verbosity >= 6) { + (void) snprintf(aux + strlen(aux), sizeof (aux), " (E=%s)", + zio_crypt_table[doi.doi_crypt].ci_name); + } + (void) printf("%10lld %3u %5s %5s %5s %5s %s%s\n", (u_longlong_t)object, doi.doi_indirection, iblk, dblk, lsize, asize, dmu_ot[doi.doi_type].ot_name, aux); @@ -1479,7 +1485,7 @@ print_sig = B_FALSE; if (print_sig) { - (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t" + (void) printf("%llu\t%lld\t%lld\t%s\t%s\t%s\t%s\t" "%llx:%llx:%llx:%llx\n", (u_longlong_t)BP_GET_LEVEL(bp), (longlong_t)BP_GET_PSIZE(bp), @@ -1487,6 +1493,7 @@ dmu_ot[BP_GET_TYPE(bp)].ot_name, zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name, zio_compress_table[BP_GET_COMPRESS(bp)].ci_name, + zio_crypt_table[BP_GET_CRYPT(bp)].ci_name, (u_longlong_t)bp->blk_cksum.zc_word[0], (u_longlong_t)bp->blk_cksum.zc_word[1], (u_longlong_t)bp->blk_cksum.zc_word[2], @@ -1845,9 +1852,10 @@ (void) printf("\tBIRTH: %-16llx LEVEL: %-2llu\tFILL: %llx\n", (u_longlong_t)bp->blk_birth, (u_longlong_t)BP_GET_LEVEL(bp), (u_longlong_t)bp->blk_fill); - (void) printf("\tCKFUNC: %-16s\t\tCOMP: %s\n", + (void) printf("\tCKFUNC: %-16s\t\tCOMP: %s\t\tCRYPT: %s\n", zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name, - zio_compress_table[BP_GET_COMPRESS(bp)].ci_name); + zio_compress_table[BP_GET_COMPRESS(bp)].ci_name, + zio_crypt_table[BP_GET_CRYPT(bp)].ci_name); (void) printf("\tCKSUM: %llx:%llx:%llx:%llx\n", (u_longlong_t)bp->blk_cksum.zc_word[0], (u_longlong_t)bp->blk_cksum.zc_word[1], @@ -2297,6 +2305,8 @@ zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_4; else if (strcmp(endstr, "sha256") == 0) zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256; + else if (strcmp(endstr, "sha256+mac") == 0) + zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256_MAC; else if (strcmp(endstr, "all") == 0) zdb_sig_cksumalg = ZIO_CHECKSUM_FLETCHER_2; else --- old/usr/src/cmd/zdb/zdb_il.c Tue Feb 3 13:17:17 2009 +++ new/usr/src/cmd/zdb/zdb_il.c Tue Feb 3 13:17:16 2009 @@ -19,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Print intent log header and statistics. */ @@ -357,7 +355,7 @@ if (verbose >= 2) { (void) printf("\n"); (void) zil_parse(zilog, print_log_block, print_log_record, NULL, - zh->zh_claim_txg); + zh->zh_claim_txg, 0); print_log_stats(verbose); } } --- old/usr/src/cmd/zfs/zfs_main.c Tue Feb 3 13:17:19 2009 +++ new/usr/src/cmd/zfs/zfs_main.c Tue Feb 3 13:17:18 2009 @@ -77,6 +77,7 @@ static int zfs_do_promote(int argc, char **argv); static int zfs_do_allow(int argc, char **argv); static int zfs_do_unallow(int argc, char **argv); +static int zfs_do_key(int argc, char **argv); /* * Enable a reasonable set of defaults for libumem debugging on DEBUG builds. @@ -116,7 +117,8 @@ HELP_UNMOUNT, HELP_UNSHARE, HELP_ALLOW, - HELP_UNALLOW + HELP_UNALLOW, + HELP_KEY } zfs_help_t; typedef struct zfs_command { @@ -162,6 +164,8 @@ { "allow", zfs_do_allow, HELP_ALLOW }, { NULL }, { "unallow", zfs_do_unallow, HELP_UNALLOW }, + { NULL }, + { "key", zfs_do_key, HELP_KEY }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -250,6 +254,9 @@ "\n" "\tunallow [-r] -s @setname [[,...]] " "\n")); + case HELP_KEY: + return (gettext("\tkey <-l | -u | -c [ -o ]> " + "<-a | filesystem>\n")); } abort(); @@ -3061,7 +3068,9 @@ /* * Ignore any filesystems which don't apply to us. This * includes those with a legacy mountpoint, or those with - * legacy share options. + * legacy share options. We also have to ignore those + * that are encrypted that don't currently have their + * key available. */ verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint, sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); @@ -3130,7 +3139,23 @@ return (0); } + /* + * Only need to check for ZFS_CRYPT_KEY_UNAVAILABLE since + * datasets that aren't encrypted have a keystatus of + * ZFS_CRYPT_KEY_UNDEFINED. + */ + if (zfs_mount_crypto_check(zhp) != 0) { + if (!explicit) + return (0); + + (void) fprintf(stderr, gettext("cannot %s '%s': " + "encryption key unavailable\n"), cmdname, + zfs_get_name(zhp)); + return (1); + } + + /* * At this point, we have verified that the mountpoint and/or * shareopts are appropriate for auto management. If the * filesystem is already mounted or shared, return (failing @@ -3350,7 +3375,9 @@ zfs_handle_t **dslist = NULL; size_t i, count = 0; char *protocol = NULL; + char bypass[ZPOOL_MAXPROPLEN] = { 0 }; + if (op == OP_MOUNT) { types = ZFS_TYPE_FILESYSTEM; } else if (argc > 0) { @@ -3384,12 +3411,41 @@ qsort(dslist, count, sizeof (void *), dataset_cmp); for (i = 0; i < count; i++) { + if (verbose) report_mount_progress(i, count); - if (share_mount_one(dslist[i], op, flags, protocol, - B_FALSE, options) != 0) - ret = 1; + /* + * If bypass has a dataset value, then we need to skip + * any datasets that are underneath it. + */ + if (bypass[0] != NULL) { + int len = strlen(bypass); + char *ds_name = (char *)zfs_get_name(dslist[i]); + + if (strncmp(bypass, ds_name, len) == 0 && + (strlen(ds_name) > len) && + ds_name[len] == '/') { + zfs_close(dslist[i]); + continue; + } else + bypass[0] = '\0'; + } + + /* + * Check if the dataset has a key before loading, if + * no, then store it in 'bypass'. + */ + if (zfs_mount_crypto_check(dslist[i])) { + (void) strlcpy(bypass, zfs_get_name(dslist[i]), + ZPOOL_MAXPROPLEN); + } else { + if (share_mount_one(dslist[i], op, flags, + protocol, B_FALSE, options) != 0) { + ret = 1; + } + } + zfs_close(dslist[i]); } @@ -4079,6 +4135,203 @@ return (unshare_unmount_path(OP_MOUNT, argv[0], flags, B_TRUE)); } +int +zfs_do_key(int argc, char **argv) +{ + int error = 1, options = 0; + nvlist_t *props = NULL; + char c, *propname, *propval = NULL; + boolean_t load = B_FALSE, unload = B_FALSE, change = B_FALSE; + boolean_t do_all = B_FALSE; + char *strval; + zfs_handle_t **dslist = NULL, *zhp = NULL; + uint_t count; + zfs_prop_t zprop; + + while ((c = getopt(argc, argv, "aluco:")) != -1) { + switch (c) { + case 'a': + do_all = B_TRUE; + break; + + case 'l': + load = B_TRUE; + break; + + case 'u': + unload = B_TRUE; + break; + + case 'c': + change = B_TRUE; + break; + + case 'o': + /* Key change is the only command that allows options */ + if (change != B_TRUE) { + (void) fprintf(stderr, gettext("Property " + "options only allowed during key " + "change.\n")); + usage(B_FALSE); + goto error; + } + + propname = optarg; + if ((propval = strchr(optarg, '=')) == NULL) { + (void) fprintf(stderr, gettext("missing " + "'=' for -o option\n")); + goto error; + } + + *propval = '\0'; + propval++; + + zprop = zfs_name_to_prop(propname); + switch (zprop) { + case ZFS_PROP_KEYSOURCE: + case ZFS_PROP_KEYSCOPE: + break; + + default: + (void) fprintf(stderr, gettext("Invalid " + "property for key operation: '%s'\n"), + propname); + goto error; + }; + + if (props == NULL && + nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) { + (void) fprintf(stderr, gettext("internal " + "error: out of memory\n")); + goto error; + } + + if (nvlist_lookup_string(props, propname, + &strval) == 0) { + (void) fprintf(stderr, gettext("property '%s' " + "specified multiple times\n"), propname); + goto error; + } + if (nvlist_add_string(props, propname, propval) != 0) { + (void) fprintf(stderr, gettext("internal " + "error: out of memory\n")); + goto error; + } + + options += 2; + break; + + case '?': + default: + (void) fprintf(stderr, gettext("invalid option '%c'\n"), + optopt); + usage(B_FALSE); + + } + } + + if (!change && props != NULL) + (void) fprintf(stderr, gettext("Properties are not allowed to " + "be used in this command.\n")); + + if (((load || unload) && (argc > 3)) || + (change && ((argc - options) > 3))) { + (void) fprintf(stderr, + gettext("too many arguments\n")); + usage(B_FALSE); + goto error; + } else if ((load || unload) && (argc < 3)) { + (void) fprintf(stderr, gettext("missing dataset " + "argument (specify -a for all)\n")); + usage(B_FALSE); + goto error; + } else if (change && ((argc - options) < 3)) { + (void) fprintf(stderr, gettext("missing dataset " + "argument\n")); + usage(B_FALSE); + goto error; + } + + if (do_all == B_FALSE) { + zhp = zfs_open(g_zfs, argv[argc - 1], + ZFS_TYPE_FILESYSTEM|ZFS_TYPE_VOLUME); + if (zhp == NULL) + goto error; + + } else if (change) { + /* We don't support do_all in a change operation */ + + (void) fprintf(stderr, gettext("cannot use '-a' with " + "change operation.\n")); + usage(B_FALSE); + goto error; + + } else { + get_all_datasets(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, + &dslist, &count, B_FALSE); + if (count == 0) + return (0); + + qsort(dslist, count, sizeof (void *), dataset_cmp); + } + + if (load) { + if (do_all) { + int i; + zfs_crypt_t *cry = NULL; + + cry = calloc(1, sizeof (zfs_crypt_t)); + for (i = 0; i < count; i++) { + if (zfs_is_encrypted(dslist[i])) { + zfs_set_libzfs_cry(dslist[i], cry); + (void) zfs_load_key(dslist[i]); + bzero(cry, sizeof (zfs_crypt_t)); + } + zfs_close(dslist[i]); + } + + free(cry); + free(dslist); + error = 0; + + } else + error = zfs_cmd_key_load(zhp); + + } else if (unload) { + if (do_all) { + int i; + + /* Do in reverse order so we can unmount easily */ + for (i = count - 1; i > 0; i--) { + if (zfs_is_encrypted(dslist[i])) + (void) zfs_unload_key(dslist[i]); + + zfs_close(dslist[i]); + } + + free(dslist); + error = 0; + + } else + error = zfs_cmd_key_unload(zhp); + + } else if (change) { + error = zfs_cmd_key_change(zhp, props); + + } else + usage(B_FALSE); + + if (zhp != NULL) + zfs_close(zhp); + +error: + if (props != NULL) { + nvlist_free(props); + } + return (error); +} + + static int volcheck(zpool_handle_t *zhp, void *data) { --- old/usr/src/cmd/zpool/zpool_main.c Tue Feb 3 13:17:21 2009 +++ new/usr/src/cmd/zpool/zpool_main.c Tue Feb 3 13:17:20 2009 @@ -80,6 +80,8 @@ static int zpool_do_get(int, char **); static int zpool_do_set(int, char **); +static int zpool_do_key(int, char **); + /* * These libumem hooks provide a reasonable set of defaults for the allocator's * debugging facilities. @@ -119,7 +121,8 @@ HELP_STATUS, HELP_UPGRADE, HELP_GET, - HELP_SET + HELP_SET, + HELP_KEY } zpool_help_t; @@ -166,6 +169,8 @@ { "history", zpool_do_history, HELP_HISTORY }, { "get", zpool_do_get, HELP_GET }, { "set", zpool_do_set, HELP_SET }, + { NULL }, + { "key", zpool_do_key, HELP_KEY }, }; #define NCOMMAND (sizeof (command_table) / sizeof (command_table[0])) @@ -230,6 +235,9 @@ " ...\n")); case HELP_SET: return (gettext("\tset \n")); + case HELP_KEY: + return (gettext("\tkey <-l | -u | -c [ -o ]> " + "<-a | pool>\n")); } abort(); @@ -807,6 +815,7 @@ zpool_do_destroy(int argc, char **argv) { boolean_t force = B_FALSE; + boolean_t encrypted_only = B_FALSE; int c; char *pool; zpool_handle_t *zhp; @@ -851,7 +860,7 @@ return (1); } - if (zpool_disable_datasets(zhp, force) != 0) { + if (zpool_disable_datasets(zhp, force, encrypted_only) != 0) { (void) fprintf(stderr, gettext("could not destroy '%s': " "could not unmount datasets\n"), zpool_get_name(zhp)); return (1); @@ -878,6 +887,7 @@ { boolean_t force = B_FALSE; boolean_t hardforce = B_FALSE; + boolean_t encrypted_only = B_FALSE; int c; zpool_handle_t *zhp; int ret; @@ -915,7 +925,7 @@ continue; } - if (zpool_disable_datasets(zhp, force) != 0) { + if (zpool_disable_datasets(zhp, force, encrypted_only) != 0) { ret = 1; zpool_close(zhp); continue; @@ -1263,6 +1273,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, int force, nvlist_t *props, boolean_t allowfaulted) { + boolean_t encrypted_only = B_FALSE; zpool_handle_t *zhp; char *name; uint64_t state; @@ -1323,14 +1334,36 @@ verify((zhp = zpool_open_canfail(g_zfs, name)) != NULL); + /* + * Attempt to load the crypto keys before mounting up the datasets + * Note that depending on the value of the keysource property + * this could cause us to go interactive. + * We don't mind if this falls though and continue to enable what + * datasets we can if it does. + * + * Use zpool_load_key() and tell it we are doing an import rather than + * zpool_cmd_key_load(). + * + * We then attempt enable all datasets not just the encrypted ones. + */ + + error = zpool_load_key(zhp, B_TRUE); + if (error != 0 && error != ENOTSUP) { + libzfs_handle_t *zpool_hdl = zpool_get_handle(zhp); + (void) fprintf(stderr, gettext( + "%s.\nEncrypted datasets with keyscope=pool " + "will not be mounted.\n"), + libzfs_error_description(zpool_hdl)); + } + if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL && - zpool_enable_datasets(zhp, mntopts, 0) != 0) { + zpool_enable_datasets(zhp, mntopts, 0, encrypted_only) != 0) { zpool_close(zhp); return (1); } zpool_close(zhp); - return (error); + return (0); } /* @@ -2990,6 +3023,8 @@ const char *health; uint_t c; vdev_stat_t *vs; + char keystatus[MAXNAMELEN]; + zprop_source_t srctype; config = zpool_get_config(zhp, NULL); reason = zpool_get_status(zhp, &msgid); @@ -3024,6 +3059,11 @@ (void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp)); (void) printf(gettext(" state: %s\n"), health); + (void) zpool_get_prop(zhp, ZPOOL_PROP_KEYSTATUS, keystatus, + sizeof (keystatus), &srctype); + + (void) printf(gettext(" key: %s\n"), keystatus); + switch (reason) { case ZPOOL_STATUS_MISSING_DEV_R: (void) printf(gettext("status: One or more devices could not " @@ -3497,6 +3537,7 @@ (void) printf(gettext(" 13 snapused property\n")); (void) printf(gettext(" 14 passthrough-x aclinherit " "support\n")); + (void) printf(gettext(" 15 Cryptographic support.\n")); (void) printf(gettext("For more information on a particular " "version, including supported releases, see:\n\n")); (void) printf("http://www.opensolaris.org/os/community/zfs/" @@ -3582,6 +3623,7 @@ "refquota set", "refreservation set", "pool scrub done", + "crypto key create", }; /* @@ -3811,7 +3853,6 @@ set_cbdata_t *cb = (set_cbdata_t *)data; error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value); - if (!error) cb->cb_any_successful = B_TRUE; @@ -3863,6 +3904,163 @@ return (error); } + +static int +/* LINTED E_FUNC_ARG_UNUSED */ +key_callback_load(zpool_handle_t *zhp, void *data) +{ + int ret; + if (zpool_keysource_prompt(zhp)) { + char *smf_fmri = getenv("SMF_FMRI"); + + if (smf_fmri != NULL && + strcmp(smf_fmri, "svc:/system/device/local") == 0) { + return (0); + } + } + ret = zpool_cmd_key_load(zhp); + if (ret != 0 && !(ret == EEXIST || ret == ENOTSUP)) { + return (1); + } + + return (0); +} + +static int +/* LINTED E_FUNC_ARG_UNUSED */ +key_callback_unload(zpool_handle_t *zhp, void *data) +{ + int ret; + + ret = zpool_cmd_key_unload(zhp); + if (ret != 0 && !(ret == ENOENT || ret == ENOTSUP)) { + return (1); + } + + return (0); +} + + +static int +zpool_do_key(int argc, char **argv) +{ + int error = 1, options = 0; + nvlist_t *props = NULL; + char c, *propval = NULL; + boolean_t load = B_FALSE, unload = B_FALSE, change = B_FALSE; + boolean_t do_all = B_FALSE; + zpool_handle_t *zhp = NULL; + + while ((c = getopt(argc, argv, "aluco:")) != -1) { + switch (c) { + case 'a': + do_all = B_TRUE; + break; + + case 'l': + load = B_TRUE; + break; + + case 'u': + unload = B_TRUE; + break; + + case 'c': + change = B_TRUE; + break; + + case 'o': + if ((propval = strchr(optarg, '=')) == NULL) { + (void) fprintf(stderr, gettext("missing " + "'=' for -o option\n")); + return (error); + } + + *propval = '\0'; + propval++; + if (strcmp(optarg, "keysource") != 0) { + (void) fprintf(stderr, gettext( + "Invalid property for key change: " + "\"%s\"\n"), + optarg); + return (error); + } + if (add_prop_list(optarg, propval, &props, B_TRUE)) + return (error); + + options += 2; + break; + + } + } + + if (!change && props != NULL) { + (void) fprintf(stderr, gettext("Properties are not allowed to " + "be used in this command.\n")); + goto error; + } + + if (((load || unload) && (argc > 3)) || + (change && ((argc - options) > 3))) { + (void) fprintf(stderr, + gettext("too many arguments\n")); + usage(B_FALSE); + goto error; + } else if ((load || unload) && (argc < 3)) { + (void) fprintf(stderr, gettext("missing dataset " + "argument (specify -a for all)\n")); + usage(B_FALSE); + goto error; + } else if (change && ((argc - options) < 3)) { + (void) fprintf(stderr, gettext("missing dataset " + "argument\n")); + usage(B_FALSE); + goto error; + } + + if (do_all == B_FALSE) { + zhp = zpool_open(g_zfs, argv[argc - 1]); + if (zhp == NULL) + goto error; + } + + if (load) { + if (do_all) + error = for_each_pool(0, NULL, B_TRUE, NULL, + key_callback_load, NULL); + else + error = zpool_cmd_key_load(zhp); + + } else if (unload) { + if (do_all) + error = for_each_pool(0, NULL, B_TRUE, NULL, + key_callback_unload, NULL); + else + error = zpool_cmd_key_unload(zhp); + + } else if (change) { + if (do_all) { + (void) fprintf(stderr, gettext("cannot use '-a' with " + "change operation.\n")); + usage(B_FALSE); + goto error; + } + + error = zpool_cmd_key_change(zhp, props); + + } else + usage(B_FALSE); + + if (zhp != NULL && !do_all) + zpool_close(zhp); + +error: + nvlist_free(props); + if (error != 0) + return (1); + return (0); +} + static int find_command_idx(char *command, int *idx) { --- old/usr/src/cmd/ztest/ztest.c Tue Feb 3 13:17:22 2009 +++ new/usr/src/cmd/ztest/ztest.c Tue Feb 3 13:17:22 2009 @@ -87,6 +87,7 @@ #include #include #include +#include #include #include #include @@ -698,6 +699,12 @@ return ((uint8_t)ztest_random(ZIO_COMPRESS_FUNCTIONS)); } +static uint8_t +ztest_random_crypt(void) +{ + return ((uint8_t)ztest_random(ZIO_CRYPT_FUNCTIONS)); +} + static int ztest_replay_create(objset_t *os, lr_create_t *lr, boolean_t byteswap) { @@ -1547,6 +1554,8 @@ ztest_random_checksum(), tx); dmu_object_set_compress(os, batchobj, ztest_random_compress(), tx); + dmu_object_set_crypt(os, batchobj, + ztest_random_crypt(), tx); dmu_write(os, batchobj, b * sizeof (uint64_t), sizeof (uint64_t), &object, tx); @@ -1608,6 +1617,8 @@ ztest_random_checksum(), tx); dmu_object_set_compress(os, object, ztest_random_compress(), tx); + dmu_object_set_crypt(os, object, + ztest_random_crypt(), tx); dmu_write(os, batchobj, b * sizeof (uint64_t), sizeof (uint64_t), &object, tx); @@ -2448,15 +2459,19 @@ dmu_objset_name(os, osname); - for (i = 0; i < 2; i++) { + for (i = 0; i < 3; i++) { if (i == 0) { prop = "checksum"; value = ztest_random_checksum(); inherit = (value == ZIO_CHECKSUM_INHERIT); - } else { + } else if (i == 1) { prop = "compression"; value = ztest_random_compress(); inherit = (value == ZIO_COMPRESS_INHERIT); + } else { + prop = "crypt"; + value = ztest_random_crypt(); + inherit = (value == ZIO_CRYPT_INHERIT); } error = dsl_prop_set(osname, prop, sizeof (value), @@ -2472,11 +2487,15 @@ VERIFY3U(dsl_prop_get(osname, prop, sizeof (value), 1, &value, setpoint), ==, 0); - if (i == 0) + if (i == 0) { valname = zio_checksum_table[value].ci_name; - else + } else if (i == 1) { valname = zio_compress_table[value].ci_name; + } else { + valname = zio_crypt_table[value].ci_name; + } + if (zopt_verbose >= 6) { (void) printf("%s %s = %s for '%s'\n", osname, prop, valname, setpoint); --- old/usr/src/common/crypto/modes/ccm.c Tue Feb 3 13:17:24 2009 +++ new/usr/src/common/crypto/modes/ccm.c Tue Feb 3 13:17:23 2009 @@ -435,8 +435,8 @@ [ctx->ccm_remainder_len], pt_part); ctx->ccm_remainder_len += pt_part; ccm_decrypt_incomplete_block(ctx, encrypt_block); + ctx->ccm_processed_data_len += ctx->ccm_remainder_len; ctx->ccm_remainder_len = 0; - ctx->ccm_processed_data_len += pt_part; return (CRYPTO_SUCCESS); } else { /* let rest of the code handle this */ --- /dev/null Tue Feb 3 13:17:26 2009 +++ new/usr/src/common/zfs/zcrypt_common.c Tue Feb 3 13:17:25 2009 @@ -0,0 +1,50 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * Common routines/data used by zfs and zpool and zio crypto functions. + */ + +#include + +/* + * Cryptographic Algorithm table. + * + * NOTE that some crypto mechanisms require the key length in the + * crypto_key_t to be specified in bits not bytes. Bytes are used + * here since we kmem_alloc based on these values. + * + * on == aes-256-ccm + * + * Algorithm/Mode Keylen iv MACLEN ZIL Option_name + * Bytes MACLEN + */ +zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS] = { + "", 0, 0, 0, 0, "inherit", + SUN_CKM_AES_CCM, 32, 12, 16, 8, "on", + "", 0, 0, 0, 0, "off", + SUN_CKM_AES_CCM, 16, 12, 16, 8, "aes-128-ccm", + SUN_CKM_AES_CCM, 32, 12, 16, 8, "aes-256-ccm", +}; --- old/usr/src/common/zfs/zfs_deleg.c Tue Feb 3 13:17:27 2009 +++ new/usr/src/common/zfs/zfs_deleg.c Tue Feb 3 13:17:26 2009 @@ -66,6 +66,8 @@ {ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE }, {ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE }, {ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP }, + {ZFS_DELEG_PERM_CRYPTO_DSL, ZFS_DELEG_NOTE_CRYPTO_DSL }, + {ZFS_DELEG_PERM_CRYPTO_DSL_CHANGE, ZFS_DELEG_NOTE_CRYPTO_DSL_CHANGE }, {NULL, ZFS_DELEG_NOTE_NONE } }; --- old/usr/src/common/zfs/zfs_deleg.h Tue Feb 3 13:17:28 2009 +++ new/usr/src/common/zfs/zfs_deleg.h Tue Feb 3 13:17:27 2009 @@ -59,6 +59,8 @@ ZFS_DELEG_NOTE_USERPROP, ZFS_DELEG_NOTE_MOUNT, ZFS_DELEG_NOTE_SHARE, + ZFS_DELEG_NOTE_CRYPTO_DSL, + ZFS_DELEG_NOTE_CRYPTO_DSL_CHANGE, ZFS_DELEG_NOTE_NONE } zfs_deleg_note_t; --- old/usr/src/common/zfs/zfs_prop.c Tue Feb 3 13:17:30 2009 +++ new/usr/src/common/zfs/zfs_prop.c Tue Feb 3 13:17:29 2009 @@ -58,6 +58,7 @@ { "fletcher2", ZIO_CHECKSUM_FLETCHER_2 }, { "fletcher4", ZIO_CHECKSUM_FLETCHER_4 }, { "sha256", ZIO_CHECKSUM_SHA256 }, + { "sha256+mac", ZIO_CHECKSUM_SHA256_MAC }, { NULL } }; @@ -157,6 +158,29 @@ { NULL } }; + static zprop_index_t crypt_table[] = { + { "on", ZIO_CRYPT_ON }, + { "off", ZIO_CRYPT_OFF }, + { "aes-128-ccm", ZIO_CRYPT_AES_128_CCM }, + { "aes-256-ccm", ZIO_CRYPT_AES_256_CCM }, + { NULL } + }; + + static zprop_index_t keyscope_table[] = { + { "pool", ZFS_KEYSCOPE_POOL }, + { "dataset", ZFS_KEYSCOPE_DSL }, + { NULL } + }; + + static zprop_index_t keystatus_table[] = { + { "undefined", ZFS_CRYPT_KEY_UNDEFINED}, + { "defined", ZFS_CRYPT_KEY_DEFINED}, + { "unavailable", ZFS_CRYPT_KEY_UNAVAILABLE}, + { "available", ZFS_CRYPT_KEY_AVAILABLE}, + { NULL } + }; + + /* inherit index properties */ register_index(ZFS_PROP_CHECKSUM, "checksum", ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, @@ -187,7 +211,11 @@ ZFS_CACHE_ALL, PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME, "all | none | metadata", "SECONDARYCACHE", cache_table); + register_index(ZFS_PROP_KEYSCOPE, "keyscope", ZFS_KEYSCOPE_POOL, + PROP_INHERIT, ZFS_TYPE_DATASET, "pool | dataset", "KEYSCOPE", + keyscope_table); + /* inherit index (boolean) properties */ register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table); @@ -226,6 +254,10 @@ /* readonly index (boolean) properties */ register_index(ZFS_PROP_MOUNTED, "mounted", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM, "yes | no", "MOUNTED", boolean_table); + register_index(ZFS_PROP_KEYSTATUS, "keystatus", + ZFS_CRYPT_KEY_UNDEFINED, PROP_READONLY, + ZFS_TYPE_DATASET, "undefined | unavailable | available", + "KEYSTATUS", keystatus_table); /* set once index properties */ register_index(ZFS_PROP_NORMALIZE, "normalization", 0, @@ -235,6 +267,9 @@ register_index(ZFS_PROP_CASE, "casesensitivity", ZFS_CASE_SENSITIVE, PROP_ONETIME, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT, "sensitive | insensitive | mixed", "CASE", case_table); + register_index(ZFS_PROP_ENCRYPTION, "encryption", ZIO_CRYPT_DEFAULT, + PROP_INHERIT, ZFS_TYPE_DATASET, + "on | off | aes-128-ccm | aes-256-ccm", "CRYPT", crypt_table); /* set once index (boolean) properties */ register_index(ZFS_PROP_UTF8ONLY, "utf8only", 0, PROP_ONETIME, @@ -254,6 +289,9 @@ ZFS_TYPE_DATASET, "filesystem | volume | snapshot", "TYPE"); register_string(ZFS_PROP_SHARESMB, "sharesmb", "off", PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "on | off | sharemgr(1M) options", "SHARESMB"); + register_string(ZFS_PROP_KEYSOURCE, "keysource", NULL, PROP_INHERIT, + ZFS_TYPE_DATASET, + ",", "KEYSOURCE"); /* readonly number properties */ register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY, @@ -307,6 +345,10 @@ PROP_INHERIT, ZFS_TYPE_VOLUME, "ISCSIOPTIONS"); register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "GUID"); + register_hidden(ZFS_PROP_WRAPPEDKEY, "wrappedkey", PROP_TYPE_BINARY, + PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "WRAPPEDKEY"); + register_hidden(ZFS_PROP_SALT, "salt", PROP_TYPE_NUMBER, + PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "SALT"); /* oddball properties */ register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0, NULL, --- old/usr/src/common/zfs/zfs_prop.h Tue Feb 3 13:17:31 2009 +++ new/usr/src/common/zfs/zfs_prop.h Tue Feb 3 13:17:30 2009 @@ -42,7 +42,8 @@ typedef enum { PROP_TYPE_NUMBER, /* numeric value */ PROP_TYPE_STRING, /* string value */ - PROP_TYPE_INDEX /* numeric value indexed by string */ + PROP_TYPE_INDEX, /* numeric value indexed by string */ + PROP_TYPE_BINARY /* binary data */ } zprop_type_t; typedef enum { --- old/usr/src/common/zfs/zpool_prop.c Tue Feb 3 13:17:32 2009 +++ new/usr/src/common/zfs/zpool_prop.c Tue Feb 3 13:17:31 2009 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "zfs_prop.h" @@ -63,6 +64,15 @@ { NULL } }; + static zprop_index_t keystatus_table[] = { + { "undefined", ZFS_CRYPT_KEY_UNDEFINED}, + { "defined", ZFS_CRYPT_KEY_DEFINED}, + { "unavailable", ZFS_CRYPT_KEY_UNAVAILABLE}, + { "available", ZFS_CRYPT_KEY_AVAILABLE}, + { NULL } + }; + + /* string properties */ register_string(ZPOOL_PROP_ALTROOT, "altroot", NULL, PROP_DEFAULT, ZFS_TYPE_POOL, "", "ALTROOT"); @@ -70,7 +80,11 @@ ZFS_TYPE_POOL, "", "BOOTFS"); register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL, PROP_DEFAULT, ZFS_TYPE_POOL, " | none", "CACHEFILE"); + register_string(ZPOOL_PROP_KEYSOURCE, "keysource", NULL, PROP_DEFAULT, + ZFS_TYPE_POOL, ",", + "keysource"); + /* readonly number properties */ register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY, ZFS_TYPE_POOL, "", "SIZE"); @@ -102,9 +116,19 @@ ZIO_FAILURE_MODE_WAIT, PROP_DEFAULT, ZFS_TYPE_POOL, "wait | continue | panic", "FAILMODE", failuremode_table); + /* readonly index properties */ + register_index(ZPOOL_PROP_KEYSTATUS, "keystatus", + ZFS_CRYPT_KEY_UNDEFINED, PROP_READONLY, ZFS_TYPE_POOL, + "undefined | unavailable | available", + "KEYSTATUS", keystatus_table); + /* hidden properties */ register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING, PROP_READONLY, ZFS_TYPE_POOL, "NAME"); + register_hidden(ZPOOL_PROP_SALT, "salt", PROP_TYPE_NUMBER, + PROP_DEFAULT, ZFS_TYPE_POOL, "SALT"); + register_hidden(ZPOOL_PROP_KEYCHECK, "keycheck", PROP_TYPE_BINARY, + PROP_READONLY, ZFS_TYPE_POOL, "KEYCHECK"); } /* --- old/usr/src/common/zfs/zprop_common.c Tue Feb 3 13:17:34 2009 +++ new/usr/src/common/zfs/zprop_common.c Tue Feb 3 13:17:33 2009 @@ -23,8 +23,6 @@ * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Common routines used by zfs and zpool property management. */ @@ -396,8 +394,15 @@ break; case PROP_TYPE_STRING: - *fixed = B_FALSE; break; + + case PROP_TYPE_BINARY: + /* + * Since we don't actually allow display of binary + * properties they can be treated as fixed width (of 0) + */ + *fixed = B_TRUE; + break; } return (ret); --- old/usr/src/grub/grub-0.97/stage2/zfs-include/dnode.h Tue Feb 3 13:17:36 2009 +++ new/usr/src/grub/grub-0.97/stage2/zfs-include/dnode.h Tue Feb 3 13:17:35 2009 @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -24,8 +24,6 @@ #ifndef _SYS_DNODE_H #define _SYS_DNODE_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Fixed constants. */ @@ -63,8 +61,8 @@ uint8_t dn_flags; /* DNODE_FLAG_* */ uint16_t dn_datablkszsec; /* data block size in 512b sectors */ uint16_t dn_bonuslen; /* length of dn_bonus */ - uint8_t dn_pad2[4]; - + uint8_t dn_crypt; /* ZIO_CRYPT type */ + uint8_t dn_pad2[3]; /* accounting is protected by dn_dirty_mtx */ uint64_t dn_maxblkid; /* largest allocated block ID */ uint64_t dn_used; /* bytes (or sectors) of disk space */ --- old/usr/src/grub/grub-0.97/stage2/zfs-include/spa.h Tue Feb 3 13:17:37 2009 +++ new/usr/src/grub/grub-0.97/stage2/zfs-include/spa.h Tue Feb 3 13:17:36 2009 @@ -24,8 +24,6 @@ #ifndef _SYS_SPA_H #define _SYS_SPA_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * General-purpose 32-bit and 64-bit bitfield encodings. */ @@ -110,7 +108,7 @@ * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 7 | padding | + * 7 | padding | crypt | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -139,6 +137,7 @@ * GRID RAID-Z layout information (reserved for future use) * cksum checksum function * comp compression function + * crypt encryption function (algorithm/mode/keylength) * G gang block indicator * E endianness * type DMU object type @@ -149,8 +148,8 @@ */ typedef struct blkptr { dva_t blk_dva[3]; /* 128-bit Data Virtual Address */ - uint64_t blk_prop; /* size, compression, type, etc */ - uint64_t blk_pad[3]; /* Extra space for the future */ + uint64_t blk_prop[2]; /* size, compression, type, etc */ + uint64_t blk_pad[2]; /* Extra space for the future */ uint64_t blk_birth; /* transaction group at birth */ uint64_t blk_fill; /* fill count */ zio_cksum_t blk_cksum; /* 256-bit checksum */ @@ -183,30 +182,33 @@ #define BP_GET_LSIZE(bp) \ (BP_IS_HOLE(bp) ? 0 : \ - BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)) + BF64_GET_SB((bp)->blk_prop[0], 0, 16, SPA_MINBLOCKSHIFT, 1)) #define BP_SET_LSIZE(bp, x) \ - BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) + BF64_SET_SB((bp)->blk_prop[0], 0, 16, SPA_MINBLOCKSHIFT, 1, x) #define BP_GET_PSIZE(bp) \ - BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1) + BF64_GET_SB((bp)->blk_prop[0], 16, 16, SPA_MINBLOCKSHIFT, 1) #define BP_SET_PSIZE(bp, x) \ - BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) + BF64_SET_SB((bp)->blk_prop[0], 16, 16, SPA_MINBLOCKSHIFT, 1, x) -#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) -#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) +#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop[0], 32, 8) +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop[0], 32, 8, x) -#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) -#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) +#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop[0], 40, 8) +#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop[0], 40, 8, x) -#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) -#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) +#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop[0], 48, 8) +#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop[0], 48, 8, x) -#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) -#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) +#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop[0], 56, 5) +#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop[0], 56, 5, x) -#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1)) -#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) +#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop[0], 63, 1)) +#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop[0], 63, 1, x) +#define BP_GET_CRYPT(bp) BF64_GET((bp)->blk_prop[1], 0, 8) +#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop[1], 0, 8, x) + #define BP_GET_ASIZE(bp) \ (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[2])) @@ -251,7 +253,7 @@ #define BP_IS_HOLE(bp) ((bp)->blk_birth == 0) #define BP_IS_OLDER(bp, txg) (!BP_IS_HOLE(bp) && (bp)->blk_birth < (txg)) -#define BP_ZERO(bp) \ +#define BP_ZERO_DVAS(bp) \ { \ (bp)->blk_dva[0].dva_word[0] = 0; \ (bp)->blk_dva[0].dva_word[1] = 0; \ @@ -259,11 +261,16 @@ (bp)->blk_dva[1].dva_word[1] = 0; \ (bp)->blk_dva[2].dva_word[0] = 0; \ (bp)->blk_dva[2].dva_word[1] = 0; \ - (bp)->blk_prop = 0; \ + (bp)->blk_birth = 0; \ +} + +#define BP_ZERO(bp) \ +{ \ + BP_ZERO_DVAS(bp) \ + (bp)->blk_prop[0] = 0; \ + (bp)->blk_prop[1] = 0; \ (bp)->blk_pad[0] = 0; \ (bp)->blk_pad[1] = 0; \ - (bp)->blk_pad[2] = 0; \ - (bp)->blk_birth = 0; \ (bp)->blk_fill = 0; \ ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0); \ } --- old/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h Tue Feb 3 13:17:39 2009 +++ new/usr/src/grub/grub-0.97/stage2/zfs-include/zfs.h Tue Feb 3 13:17:38 2009 @@ -27,7 +27,7 @@ /* * On-disk version number. */ -#define SPA_VERSION 14ULL +#define SPA_VERSION 15ULL /* * The following are configuration names used in the nvlist describing a pool's --- old/usr/src/grub/grub-0.97/stage2/zfs-include/zil.h Tue Feb 3 13:17:41 2009 +++ new/usr/src/grub/grub-0.97/stage2/zfs-include/zil.h Tue Feb 3 13:17:40 2009 @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -24,8 +24,6 @@ #ifndef _SYS_ZIL_H #define _SYS_ZIL_H -#pragma ident "%Z%%M% %I% %E% SMI" - /* * Intent log format: * @@ -47,7 +45,8 @@ uint64_t zh_replay_seq; /* highest replayed sequence number */ blkptr_t zh_log; /* log chain */ uint64_t zh_claim_seq; /* highest claimed sequence number */ - uint64_t zh_pad[5]; + uint64_t zh_last_seq; /* last log block sequence number used */ + uint64_t zh_pad[4]; } zil_header_t; #endif /* _SYS_ZIL_H */ --- old/usr/src/grub/grub-0.97/stage2/zfs-include/zio.h Tue Feb 3 13:17:42 2009 +++ new/usr/src/grub/grub-0.97/stage2/zfs-include/zio.h Tue Feb 3 13:17:41 2009 @@ -17,7 +17,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -24,8 +24,6 @@ #ifndef _ZIO_H #define _ZIO_H -#pragma ident "%Z%%M% %I% %E% SMI" - #define ZBT_MAGIC 0x210da7ab10c7a11ULL /* zio data bloc tail */ typedef struct zio_block_tail { @@ -65,6 +63,7 @@ ZIO_CHECKSUM_FLETCHER_2, ZIO_CHECKSUM_FLETCHER_4, ZIO_CHECKSUM_SHA256, + ZIO_CHECKSUM_SHA256_MAC, /* SHA256 Trunc 128 + 16byte CCM MAC */ ZIO_CHECKSUM_FUNCTIONS }; --- old/usr/src/lib/libzfs/Makefile.com Tue Feb 3 13:17:43 2009 +++ new/usr/src/lib/libzfs/Makefile.com Tue Feb 3 13:17:42 2009 @@ -21,18 +21,15 @@ # # Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" -# LIBRARY= libzfs.a VERS= .1 OBJS_SHARED= zfs_namecheck.o zprop_common.o zfs_prop.o zpool_prop.o \ - zfs_deleg.o zfs_comutil.o + zfs_deleg.o zfs_comutil.o zcrypt_common.o OBJS_COMMON= libzfs_dataset.o libzfs_util.o libzfs_graph.o libzfs_mount.o \ libzfs_pool.o libzfs_changelist.o libzfs_config.o libzfs_import.o \ - libzfs_status.o libzfs_sendrecv.o + libzfs_status.o libzfs_sendrecv.o libzfs_crypto.o OBJECTS= $(OBJS_COMMON) $(OBJS_SHARED) include ../../Makefile.lib @@ -46,11 +43,13 @@ INCS += -I$(SRCDIR) INCS += -I../../../uts/common/fs/zfs +INCS += -I../../../uts/common/fs/zfs/sys INCS += -I../../../common/zfs -C99MODE= -xc99=%all +C99MODE= -xc99=%all -K PIC C99LMODE= -Xc99=%all -LDLIBS += -lc -lm -ldevinfo -ldevid -lgen -lnvpair -luutil -lavl -lefi +LDLIBS += -lc -lm -ldevinfo -ldevid -lgen -lnvpair -luutil -lavl -lefi \ + -lcryptoutil -lpkcs11 CPPFLAGS += $(INCS) -D_REENTRANT SRCS= $(OBJS_COMMON:%.o=$(SRCDIR)/%.c) \ @@ -66,5 +65,9 @@ pics/%.o: ../../../common/zfs/%.c $(COMPILE.c) -o $@ $< $(POST_PROCESS_O) + +pics/%.o: ../../../uts/common/fs/zfs/%.c + $(COMPILE.c) -o $@ $< + $(POST_PROCESS_O) include ../../Makefile.targ --- old/usr/src/lib/libzfs/common/libzfs.h Tue Feb 3 13:17:44 2009 +++ new/usr/src/lib/libzfs/common/libzfs.h Tue Feb 3 13:17:43 2009 @@ -116,6 +116,7 @@ EZFS_VDEVNOTSUP, /* unsupported vdev type */ EZFS_NOTSUP, /* ops not supported on this dataset */ EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */ + EZFS_KEYERR, /* No key in given location */ EZFS_UNKNOWN }; @@ -156,6 +157,25 @@ } zfs_allow_t; /* + * Crypto API structure + * + * All allocated data in the structure will be cleaned up by libzfs_fini() + */ +typedef struct zfs_crypt { + /* Creating, loading, or changed key value */ + char *zc_key_data; + size_t zc_key_data_len; + + /* Change operations */ + char *zc_keysource; + uint64_t zc_keyscope; + + /* Private */ + uint64_t zc_salt; + boolean_t zc_is_key_change; +} zfs_crypt_t; + +/* * Basic handle types */ typedef struct zfs_handle zfs_handle_t; @@ -183,6 +203,8 @@ extern void libzfs_mnttab_add(libzfs_handle_t *, const char *, const char *, const char *); extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *); +extern void zpool_set_libzfs_cry(zpool_handle_t *, zfs_crypt_t *); +extern void zfs_set_libzfs_cry(zfs_handle_t *, zfs_crypt_t *); /* * Basic handle functions @@ -244,6 +266,19 @@ extern const char *zpool_prop_values(zpool_prop_t); /* + * Crypto key functions for pools + */ +extern int zpool_set_key(zpool_handle_t *, char *); +extern int zpool_load_key(zpool_handle_t *, boolean_t); +extern int zpool_unload_key(zpool_handle_t *); +extern int zpool_change_key(zpool_handle_t *); +extern boolean_t zpool_keysource_prompt(zpool_handle_t *zhp); + +extern int zpool_cmd_key_load(zpool_handle_t *); +extern int zpool_cmd_key_unload(zpool_handle_t *); +extern int zpool_cmd_key_change(zpool_handle_t *, nvlist_t *); + +/* * Pool health statistics. */ typedef enum { @@ -336,6 +371,17 @@ size_t len); extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *); extern int zpool_get_physpath(zpool_handle_t *, char *); +extern int zfs_crypto_create(libzfs_handle_t *, nvlist_t *, const char *path, + struct zfs_cmd *); +extern int zfs_crypto_clone(libzfs_handle_t *, const char *target, + const char *parent); +extern int zfs_crypto_rename(libzfs_handle_t *, const char *target, + const char *parent); +extern int zpool_crypto_create(libzfs_handle_t *, nvlist_t *, struct zfs_cmd *); +extern int valid_keysource(char *); +extern int valid_set_keysource_change(zfs_crypt_t *, char *, char *); +extern int zfs_mount_crypto_check(zfs_handle_t *); + /* * Basic handle manipulations. These functions do not create or destroy the * underlying datasets, only the references to them. @@ -364,6 +410,7 @@ extern const char *zfs_prop_to_name(zfs_prop_t); extern int zfs_prop_set(zfs_handle_t *, const char *, const char *); +extern int zfs_prop_set_int(zfs_handle_t *, zfs_prop_t, uint64_t); extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t, zprop_source_t *, char *, size_t, boolean_t); extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *, @@ -540,6 +587,21 @@ void *, void *, int, zfs_share_op_t); /* + * Crypto key functions for datasets + */ +extern int zfs_is_encrypted(zfs_handle_t *); +extern boolean_t zfs_changing_key(zfs_handle_t *); + +extern int zfs_load_key(zfs_handle_t *); +extern int zfs_unload_key(zfs_handle_t *); +extern int zfs_change_key(zfs_handle_t *); + +extern int zfs_cmd_key_unload(zfs_handle_t *); +extern int zfs_cmd_key_load(zfs_handle_t *); +extern int zfs_cmd_key_change(zfs_handle_t *, nvlist_t *); + + +/* * When dealing with nvlists, verify() is extremely useful */ #ifdef NDEBUG @@ -578,8 +640,9 @@ * Enable and disable datasets within a pool by mounting/unmounting and * sharing/unsharing them. */ -extern int zpool_enable_datasets(zpool_handle_t *, const char *, int); -extern int zpool_disable_datasets(zpool_handle_t *, boolean_t); +extern int zpool_enable_datasets(zpool_handle_t *, const char *, int, + boolean_t); +extern int zpool_disable_datasets(zpool_handle_t *, boolean_t, boolean_t); #ifdef __cplusplus } --- /dev/null Tue Feb 3 13:17:46 2009 +++ new/usr/src/lib/libzfs/common/libzfs_crypto.c Tue Feb 3 13:17:45 2009 @@ -0,0 +1,1802 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zfs_namecheck.h" +#include "zfs_prop.h" +#include "libzfs_impl.h" + + +/* Operational states for loading a key */ +#define KEY_OP_SET 1 +#define KEY_OP_GET 2 + +#define MAXPROMPTLEN (ZPOOL_MAXNAMELEN + 100) + +typedef enum key_format { + KEY_FORMAT_NONE = 0, + KEY_FORMAT_RAW, + KEY_FORMAT_HEX, + KEY_FORMAT_PASSPHRASE +} key_format_t; + +typedef enum key_locator { + KEY_LOCATOR_NONE, + KEY_LOCATOR_PROMPT, + KEY_LOCATOR_URI +} key_locator_t; + + +static int +parse_format(key_format_t *format, char *s, int len) +{ + + if (strncmp("raw", s, len) == 0 && len == 3) + *format = KEY_FORMAT_RAW; + else if (strncmp("hex", s, len) == 0 && len == 3) + *format = KEY_FORMAT_HEX; + else if (strncmp("passphrase", s, len) == 0 && len == 10) + *format = KEY_FORMAT_PASSPHRASE; + else + return (1); + + return (0); +} + +static int +parse_locator(key_locator_t *locator, char *s, int len, char **uri) +{ + + if (len == 6 && strncmp("prompt", s, 6) == 0) { + *locator = KEY_LOCATOR_PROMPT; + return (0); + } + + if (len > 8 && strncmp("file:///", s, 8) == 0) { + *locator = KEY_LOCATOR_URI; + *uri = s; + return (0); + } + + return (1); +} + + +static int +keysource_prop_parser(char *prop_value, key_format_t *format, + key_locator_t *locator, char **uri) +{ + int len, ret; + int prop_len = strlen(prop_value); + char *s = prop_value; + + *format = KEY_FORMAT_NONE; + *locator = KEY_LOCATOR_NONE; + + if (prop_len > ZPOOL_MAXPROPLEN) + return (1); + + for (len = 0; len < prop_len; len++) + if (s[len] == ',') + break; + + /* If we are at the end of the key property, there is a problem */ + if (len == prop_len) + return (1); + + ret = parse_format(format, s, len); + if (ret) + return (ret); + + s = s + len + 1; + len = prop_len - len - 1; + ret = parse_locator(locator, s, len, uri); + + return (ret); +} + +static boolean_t +need_salt(char *keysource, size_t keysource_len) +{ + key_format_t format; + int len; + + if (keysource_len > ZPOOL_MAXPROPLEN) + return (B_FALSE); + + for (len = 0; len < keysource_len; len++) + if (keysource[len] == ',') + + (void) parse_format(&format, keysource, len); + if (format == KEY_FORMAT_PASSPHRASE) + return (B_TRUE); + + return (B_FALSE); +} + + +static int +get_passphrase(libzfs_handle_t *hdl, int key_op, char **passphrase, + size_t *passphraselen, key_format_t format, zfs_cmd_t *zc) +{ + zfs_crypt_t *cry = hdl->libzfs_cry; + char prompt[MAXPROMPTLEN]; + char *tmpbuf = NULL; + int tries; + int min_psize = 8; + + if (zc->zc_name == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "unable to get name.")); + return (EINVAL); + } + + if (format == KEY_FORMAT_HEX) { + if (cry->zc_is_key_change) + (void) snprintf(prompt, MAXPROMPTLEN, "%s \'%s\': ", + gettext("Enter new hexadecimal key for"), + zc->zc_name); + else + (void) snprintf(prompt, MAXPROMPTLEN, "%s \'%s\': ", + gettext("Enter hexadecimal key for"), zc->zc_name); + } else { + if (cry->zc_is_key_change) + (void) snprintf(prompt, MAXPROMPTLEN, "%s \'%s\': ", + gettext("Enter new passphrase for"), zc->zc_name); + else + (void) snprintf(prompt, MAXPROMPTLEN, "%s \'%s\': ", + gettext("Enter passphrase for"), zc->zc_name); + } + + for (tries = 3; tries > 0; tries--) { + tmpbuf = getpassphrase(prompt); + if (tmpbuf == NULL) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "Failed to generate " + "key from passphrase.")); + return (-1); + } + + if (strnlen(tmpbuf, min_psize) >= min_psize) + break; + + (void) fprintf(stderr, dgettext(TEXT_DOMAIN, + "Must be at least %d characters.\n"), + min_psize); + + if (tries == 1) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Exceeded number of attempts.\n")); + return (EINVAL); + } + } + + *passphrase = strdup(tmpbuf); + + (void) memset(tmpbuf, 0, strlen(tmpbuf)); /* clean up */ + if (passphrase == NULL) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "Failed to generate " + "key from passphrase.")); + return (EINVAL); + } + + if (key_op == KEY_OP_SET) { + (void) sprintf(prompt, dgettext(TEXT_DOMAIN, "Enter again: ")); + + tmpbuf = getpassphrase(prompt); + if (tmpbuf == NULL) { + /* clean up */ + (void) memset(passphrase, 0, strlen(*passphrase)); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Passphrase does not match.")); + return (EINVAL); + } + + if (strcmp(*passphrase, tmpbuf) != 0) { + /* clean up */ + (void) memset(tmpbuf, 0, strlen(tmpbuf)); + (void) memset(*passphrase, 0, strlen(*passphrase)); + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Passphrase does not match.")); + return (EINVAL); + } + + } + + *passphraselen = strlen(*passphrase); + return (0); +} + + +/* + * This sends the key through the ioctl to be processed. + * + * 'type' is for the type of operation: setting, loading, or changing keys + * 'zc_key' holds the current key or for setting a new key + * 'zc_newkey' holds the key to be changed to. Only during change operations + * will this value not be null. + */ +static int +crypto_key_ioctl(libzfs_handle_t *hdl, zfs_cmd_t *zc) { + int ret; + + zc->zc_crypto.zic_keytype = ZFS_IOC_CRYPTO_KEY_TYPE_RAW; + if (zc->zc_crypto.zic_cmd == ZFS_IOC_CRYPTO_LOAD_KEY_SPA || + zc->zc_crypto.zic_cmd == ZFS_IOC_CRYPTO_LOAD_KEY_DSL) { + zc->zc_crypto.zic_salt = hdl->libzfs_cry->zc_salt; + } + +#ifdef DEBUG + { + int i; + char *keydata = (char *)(uintptr_t)zc->zc_crypto.zic_keydata; + + fprintf(stderr, "DEBUG: crypto_key_ioctl keyscope= %d\n", + zc->zc_crypto.zic_keyscope); + fprintf(stderr, "DEBUG: crypto_key_ioctl crypt= %llu\n", + zc->zc_crypto.zic_crypt); + fprintf(stderr, "DEBUG: crypto_key_ioctl salt= %llu\n", + zc->zc_crypto.zic_salt); + fprintf(stderr, "DEBUG: crypto_key_ioctl keydata="); + for (i = 0; i < zc->zc_crypto.zic_keydatalen; i++) + fprintf(stderr, "%hhx", + ((char *)(uintptr_t)zc->zc_crypto.zic_keydata)[i]); + fprintf(stderr, "\n"); + fprintf(stderr, "DEBUG: crypto_key_ioctl dataset name=%s\n", + zc->zc_name); + } +#endif + + ret = zfs_ioctl(hdl, ZFS_IOC_CRYPTO, zc); + if (ret) + return (errno); + + return (0); +} + + +/* + * This internal function unloads the key on a given handle. Both pools and + * datasets are handled by the same function. + */ +static int +unload_key(libzfs_handle_t *hdl, zfs_cmd_t *zc) +{ + int ret = 0; + + ret = crypto_key_ioctl(hdl, zc); + switch (ret) { + case 0: + break; + + case EPERM: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to unload key: Permission denied.")); + break; + + default: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to unload key: %s"), strerror(ret)); + } + + return (ret); +} + + +/* + * This API function is to be called for unloading a key from a dataset + */ +int +zfs_unload_key(zfs_handle_t *zhp) +{ + zfs_cmd_t zc = { 0 }; + int ret = 0; + char *mntpt; + + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) == ZIO_CRYPT_OFF) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "No key to unload when encryption=off.")); + return (EINVAL); + } + if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) != + ZFS_CRYPT_KEY_AVAILABLE) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key not present.")); + return (ENOENT); + } + if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSCOPE) != ZFS_KEYSCOPE_DSL) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Invalid keyscope for key unload.")); + return (EINVAL); + } + + /* + * We need to be sure that all the data has been written to + * disk before we unload the key so we first have to attempt + * an unmount, if that fails we don't continue with the key unload + * and instead return the error from zfs_umount. + * Maybe in the future 'zfs key' will grow a -f flag in which case + * MS_FORCE would get passed to zfs_umount, but for now that flag + * doesn't exist. + */ + if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { + if (zfs_is_mounted(zhp, &mntpt)) { + ret = zfs_unmount(zhp, mntpt, 0); + if (ret) { + zfs_error_aux(zhp->zfs_hdl, + dgettext(TEXT_DOMAIN, + "Failed to unload key: unmount failed")); + return (ret); + } + } + } else { + ret = zfs_unshare_iscsi(zhp); + if (ret) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to unload key: unshare iscsi failed")); + return (ret); + } + ret = zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name); + if (ret) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to unload key: zvol remove link failed")); + return (ret); + } + } + + zc.zc_crypto.zic_cmd = ZFS_IOC_CRYPTO_UNLOAD_KEY_DSL; + (void) strlcpy(zc.zc_name, zfs_get_name(zhp), sizeof (zc.zc_name)); + + ret = unload_key(zhp->zfs_hdl, &zc); + + if (ret == 0) + zfs_refresh_properties(zhp); + + return (ret); +} + +/* CLI to API function */ +int +zfs_cmd_key_unload(zfs_handle_t *zhp) +{ + char errbuf[1024]; + int ret; + + ret = zfs_unload_key(zhp); + if (ret) { + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "Key error in '%s'"), zfs_get_name(zhp)); + (void) zfs_error(zhp->zfs_hdl, EZFS_KEYERR, errbuf); + return (1); + } + + return (0); +} + + +boolean_t +zpool_keysource_prompt(zpool_handle_t *zhp) +{ + char keysource[ZPOOL_MAXPROPLEN]; + int ret; + key_format_t format; + key_locator_t locator; + char *uri; + + ret = zpool_get_prop(zhp, ZPOOL_PROP_KEYSOURCE, keysource, + sizeof (keysource), NULL); + if (ret != 0) { + return (B_FALSE); + } + + ret = keysource_prop_parser(keysource, &format, &locator, &uri); + if (ret != 0) { + return (B_FALSE); + } + + return (locator == KEY_LOCATOR_PROMPT); +} + +/* + * This API function is to be called for unloading a key from a pool + */ +int +zpool_unload_key(zpool_handle_t *zhp) +{ + zfs_cmd_t zc = { 0 }; + boolean_t encrypted_only = B_TRUE; + boolean_t force = B_FALSE; + + if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < + SPA_VERSION_CRYPTO) { + zfs_error_aux(zhp->zpool_hdl, + gettext("Insufficient version for encrypted pools.")); + return (ENOTSUP); + } + + zc.zc_crypto.zic_cmd = ZFS_IOC_CRYPTO_UNLOAD_KEY_SPA; + (void) strlcpy(zc.zc_name, zpool_get_name(zhp), sizeof (zc.zc_name)); + + /* + * We have to unmount the datasets to make sure the data + * is flushed to disk. + * If any dataset fails to unmount we can't unload the key + * because that probably means there is data in transit that might + * need the key. + * Ideally because this is a pool wide operation and we don't + * really want some end user process with a CWD inside an + * encrypted dataset causing the unload to be blocked but we have + * to live with that. The admin can always find those processes + * and kill them and try a key unload again, the downside to this + * is we may have unmounted some of the datasets but not all of + * them. + * + * We may also add a -f flag for zpool key -u to allow passing + * in the force flag. + */ + if (zpool_disable_datasets(zhp, force, encrypted_only) != 0) { + zfs_error_aux(zhp->zpool_hdl, strerror(errno)); + return (-1); + } + return (unload_key(zhp->zpool_hdl, &zc)); + +} + +/* CLI to API function */ + +int +zpool_cmd_key_unload(zpool_handle_t *zhp) +{ + char errbuf[1024]; + int ret; + + ret = zpool_unload_key(zhp); + if (ret != 0) { + (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, + "Key error in '%s'"), zpool_get_name(zhp)); + (void) zfs_error(zhp->zpool_hdl, EZFS_KEYERR, errbuf); + } + return (ret); +} + + +static int +get_key_material(libzfs_handle_t *hdl, char *keysource, int key_op, int keylen, + char **outkey, size_t *outkeylen, zfs_cmd_t *zc) +{ + int ret; + key_format_t format; + key_locator_t locator; + char *uri; + + ret = keysource_prop_parser(keysource, &format, &locator, &uri); + if (ret) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid keysource property.")); + return (EINVAL); + } + + switch (locator) { + case KEY_LOCATOR_PROMPT: + if (format == KEY_FORMAT_RAW) { + int rbytes; + + *outkey = zfs_alloc(hdl, keylen); + errno = 0; + rbytes = read(STDIN_FILENO, *outkey, keylen); + if (rbytes != keylen) { + *outkeylen = 0; + free(*outkey); + return (errno); + } + *outkeylen = keylen; + + } else { + + ret = get_passphrase(hdl, key_op, outkey, outkeylen, + format, zc); + if (ret) + return (ret); + } + + break; + + case KEY_LOCATOR_URI: + if (uri[7] != '/') { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "File must have an absolute path.")); + return (EINVAL); + } + + /* + * If we have a null outkey buffer and no length, then we + * might need to tell pkcs11_read_data() how big of a key + * we want if the locator URI is a device (/dev/random) + * to be read from and not a file. + */ + if (*outkey == NULL && *outkeylen == 0) + *outkeylen = keylen; + + cryptoerror_off(); + ret = pkcs11_read_data(&(uri[7]), (void **)outkey, outkeylen); + cryptoerror_on(); + if (ret != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to read key file: %s"), strerror(ret)); + return (ret); + } + + break; + } + + return (0); +} + + +static int +use_key_material(libzfs_handle_t *hdl, char *keysource, int keylen, + char *inkey, size_t inkeylen, uint64_t *zic_key, uint64_t *zic_keylen) +{ + zfs_crypt_t *cry = hdl->libzfs_cry; + int ret; + key_format_t format; + key_locator_t locator; + char *uri; + char *outkey; + size_t outkeylen = 0; + + if (cry == NULL || inkeylen == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Missing key material.")); + return (EINVAL); + } + + (void) keysource_prop_parser(keysource, &format, &locator, &uri); + + switch (format) { + case KEY_FORMAT_RAW: + outkey = inkey; + outkeylen = inkeylen; + break; + + case KEY_FORMAT_HEX: + /* + * If the keylen is not on the byte boundary, in terms of hex + * format, and that extra char is a linefeed, we can trim it + */ + if ((keylen * 2) + 1 == inkeylen && inkey[keylen*2] == '\n') + inkeylen--; + + ret = hexstr_to_bytes(inkey, inkeylen, (uchar_t **)&outkey, + &outkeylen); + if (ret) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to convert hex key to bytes.")); + return (EINVAL); + } + + break; + + case KEY_FORMAT_PASSPHRASE: + { + CK_SESSION_HANDLE session; + + /* Remove any extra linefeed that may be on the end */ + if (inkey[inkeylen - 1] == '\n') + inkeylen--; + + outkeylen = keylen; + ret = SUNW_C_GetMechSession(CKM_PKCS5_PBKD2, &session); + if (ret) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "Failed to access CKM_PKCS5_PBKD2: %s."), + pkcs11_strerror(ret)); + return (EINVAL); + } + + if (cry->zc_salt == 0) { + ret = pkcs11_random_data(&cry->zc_salt, + sizeof (uint64_t)); + if (ret) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to obtain salt: %s."), + pkcs11_strerror(ret)); + return (EIO); + } + } + + ret = pkcs11_PasswdToKey(session, inkey, inkeylen, + (void *)&cry->zc_salt, sizeof (uint64_t), CKK_AES, + outkeylen, (void **)&outkey, &outkeylen); + if (ret) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Failed to generate key: %s."), + pkcs11_strerror(ret)); + return (EINVAL); + } + + break; + } + + default: + ASSERT(0); + } + + if (outkeylen != keylen) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Key length invalid. expected %lu bytes"), keylen); + return (EINVAL); + } + + /* + * Sets the proper uint64_t value from the char* and size_t used by + * other functions + */ + *zic_key = (uintptr_t)outkey; + *zic_keylen = outkeylen; + +#ifdef DEBUG + fprintf(stderr, "DEBUG: use_key_material salt (%llu)\n", cry->zc_salt); + fprintf(stderr, "DEBUG: use_key_material outkey (%u)=", + (uintptr_t)outkey); + for (int i = 0; i < outkeylen; i++) + fprintf(stderr, "%hhx", outkey[i]); + fprintf(stderr, "\n"); + fprintf(stderr, "DEBUG: use_key_material zic_keylen (%llu)\n", + *zic_keylen); + fprintf(stderr, "DEBUG: use_key_material zic_key (%llu)\n", *zic_key); +#endif + + return (0); +} + + + +/* + * Internal function to handle loading of an existing pool/dataset key and + * setting of a newly created pool/dataset key. + * + * This should be called by an outside function + */ +static int +load_key(libzfs_handle_t *hdl, char *keysource, int keylen, int key_op, + zfs_cmd_t *zc) +{ + zfs_crypt_t *cry = hdl->libzfs_cry; + int ret = 0; + + if (strlen(keysource) <= 1) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "Invalid keysource property value.")); + return (EINVAL); + } + + if (cry->zc_key_data_len == 0) { + ret = get_key_material(hdl, keysource, key_op, keylen, + &(cry->zc_key_data), &(cry->zc_key_data_len), zc); + if (ret) + return (ret); + } + + ret = use_key_material(hdl, keysource, keylen, cry->zc_key_data, + cry->zc_key_data_len, &zc->zc_crypto.zic_keydata, + &zc->zc_crypto.zic_keydatalen); + if (ret) + return (ret); + + return (crypto_key_ioctl(hdl, zc)); +} + + +/* + * This API function to load or set a key-encryption-key on a datasets + */ +int +zfs_load_key(zfs_handle_t *zhp) +{ + char keysource[MAXNAMELEN]; + uint64_t ret, crypt, key_op = KEY_OP_GET; + zfs_cmd_t zc = { 0 }; + zfs_crypt_t *cry = zhp->zfs_hdl->libzfs_cry; + + switch (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS)) { + case ZFS_CRYPT_KEY_UNDEFINED: + key_op = KEY_OP_SET; + break; + + case ZFS_CRYPT_KEY_AVAILABLE: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded.")); + return (EEXIST); + }; + + (void) strlcpy(zc.zc_name, zfs_get_name(zhp), sizeof (zc.zc_name)); + + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + if (crypt == ZIO_CRYPT_OFF) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Encryption not enabled.")); + return (EINVAL); + } + + ret = zfs_prop_get(zhp, ZFS_PROP_KEYSOURCE, keysource, + sizeof (keysource), NULL, NULL, 0, B_TRUE); + if (ret) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Failed to obtain keysource property.")); + return (EIO); + } + + if (need_salt(keysource, strlen(keysource))) { + cry->zc_salt = zfs_prop_get_int(zhp, ZFS_PROP_SALT); + if (key_op == KEY_OP_SET) + zc.zc_crypto.zic_salt = cry->zc_salt; + } + + zc.zc_crypto.zic_cmd = ZFS_IOC_CRYPTO_LOAD_KEY_DSL; + + ret = load_key(zhp->zfs_hdl, keysource, + zio_crypt_table[crypt].ci_keylen, key_op, &zc); + + /* Clean up handle so a potential iteration doesn't get confused */ + bzero(cry, sizeof (zfs_crypt_t)); + + switch (ret) { + case 0: + break; + + case EINVAL: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key incorrect.")); + return (ret); + + case EEXIST: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded.")); + return (ret); + + default: + if (zhp->zfs_hdl->libzfs_desc_active == 0) + zfs_error_aux(zhp->zfs_hdl, strerror(ret)); + return (ret); + } + + zfs_refresh_properties(zhp); + + if (zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) { + ret = zfs_mount(zhp, NULL, NULL); + if (ret) + return (ret); + } else { + zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); + } + + ret = zfs_share(zhp); + if (ret) + return (ret); + + return (0); +} + + +int +zfs_cmd_key_load(zfs_handle_t *zhp) +{ + char errbuf[1024]; + zfs_crypt_t *cry = NULL; + int ret; + + cry = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_crypt_t)); + zfs_set_libzfs_cry(zhp, cry); + ret = zfs_load_key(zhp); + if (ret != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key error in '%s'"), + zfs_get_name(zhp)); + (void) zfs_error(zhp->zfs_hdl, EZFS_KEYERR, errbuf); + } + + bzero(cry, sizeof (zfs_crypt_t)); + free(cry); + zfs_set_libzfs_cry(zhp, NULL); + + return (ret != 0); +} + +/* + * This API function is to load or set a key-encryption-key on a pool + */ +int +zpool_load_key(zpool_handle_t *zhp, boolean_t importing) +{ + char keysource[ZPOOL_MAXPROPLEN]; + zprop_source_t srctype; + int key_op = KEY_OP_GET; + zfs_crypt_t *cry = zhp->zpool_hdl->libzfs_cry; + zfs_cmd_t zc = { 0 }; + int ret = 0; + boolean_t refresh; + + if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < + SPA_VERSION_CRYPTO) { + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "Insufficient version for encrypted pools.")); + return (ENOTSUP); + } + + switch (zpool_get_prop_int(zhp, ZPOOL_PROP_KEYSTATUS, NULL)) { + case ZFS_CRYPT_KEY_UNDEFINED: + /* If we are importing, we cannot define the key now */ + if (importing) + return (0); + + key_op = KEY_OP_SET; + break; + + case ZFS_CRYPT_KEY_AVAILABLE: + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded.")); + return (EEXIST); + }; + + + if (importing) { + cry = zfs_alloc(zhp->zpool_hdl, sizeof (zfs_crypt_t)); + zpool_set_libzfs_cry(zhp, cry); + } + + (void) strlcpy(zc.zc_name, zpool_get_name(zhp), sizeof (zc.zc_name)); + (void) zpool_get_prop(zhp, ZPOOL_PROP_KEYSOURCE, keysource, + sizeof (keysource), &srctype); + if (need_salt(keysource, strlen(keysource))) { + if (key_op == KEY_OP_GET) { + cry->zc_salt = zpool_get_prop_int(zhp, + ZPOOL_PROP_SALT, NULL); +#ifdef DEBUG + fprintf(stderr, "DEBUG: load salt = %llu\n", + cry->zc_salt); +#endif + } + } + + zc.zc_crypto.zic_cmd = ZFS_IOC_CRYPTO_LOAD_KEY_SPA; + + ret = load_key(zhp->zpool_hdl, keysource, ZPOOL_CRYPTO_KEK_LEN, + key_op, &zc); + switch (ret) { + case 0: + break; + + case EACCES: + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "Key incorrect.")); + return (ret); + + case EEXIST: + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "Key already loaded.")); + return (ret); + + default: + if (zhp->zpool_hdl->libzfs_desc_active == 0) + zfs_error_aux(zhp->zpool_hdl, strerror(ret)); + return (ret); + } + + (void) zpool_refresh_stats(zhp, &refresh); + + /* + * Attempt to mount/share the datasets if we aren't being called + * from import. + * The reason we do this in here is so that clients other than + * zpool(1M) benefit from this as well. + */ + + if (!ret && !importing) { + ret = zpool_enable_datasets(zhp, NULL, NULL, B_TRUE); + (void) zpool_create_zvol_links(zhp); + } + + if (ret && importing) + (void) zfs_error(zhp->zpool_hdl, EZFS_KEYERR, + dgettext(TEXT_DOMAIN, "Load key error")); + + if (importing) { + bzero(cry, sizeof (zfs_crypt_t)); + free(cry); + zpool_set_libzfs_cry(zhp, NULL); + } + + return (ret); +} + +/* + * This CLI function is to load or set a key-encryption-key on a pool + */ +int +zpool_cmd_key_load(zpool_handle_t *zhp) +{ + char errbuf[1024]; + zfs_crypt_t *cry = NULL; + int ret; + + cry = zfs_alloc(zhp->zpool_hdl, sizeof (zfs_crypt_t)); + zpool_set_libzfs_cry(zhp, cry); + ret = zpool_load_key(zhp, B_FALSE); + if (ret != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key error in '%s'"), + zpool_get_name(zhp)); + (void) zfs_error(zhp->zpool_hdl, EZFS_KEYERR, errbuf); + } + bzero(cry, sizeof (zfs_crypt_t)); + free(cry); + zpool_set_libzfs_cry(zhp, NULL); + + return (ret); +} + + + +/* + * This internal function is for setting the key-encryption-key on a pool + * or a dataset. + */ +static int +change_key_setup(libzfs_handle_t *hdl, char *keysource, int keylen, + zfs_cmd_t *zc) +{ + int ret; + char *key_material = NULL; + size_t key_material_len = 0; + char *key_src = keysource; + zfs_crypt_t *cry = hdl->libzfs_cry; + + if (cry->zc_keyscope != ZFS_KEYSCOPE_POOL) { + /* + * If the new wrapping key is not provided, use the keysource + * property to obtain it. + */ + if (cry->zc_key_data_len == 0) { + + /* If the keysource has changed, use the new one. */ + if (cry->zc_keysource != NULL) { + key_src = cry->zc_keysource; + } + + ret = get_key_material(hdl, key_src, KEY_OP_SET, + keylen, &key_material, &key_material_len, zc); + if (ret) + return (ret); + } + + cry->zc_salt = 0; + + ret = use_key_material(hdl, key_src, keylen, key_material, + key_material_len, &zc->zc_crypto.zic_keydata, + &zc->zc_crypto.zic_keydatalen); + if (ret) + return (ret); + } + + zc->zc_crypto.zic_keyscope = cry->zc_keyscope; + zc->zc_crypto.zic_salt = cry->zc_salt; + return (0); +} + + +boolean_t +zfs_changing_key(zfs_handle_t *zhp) +{ + zfs_crypt_t *cry = zhp->zfs_hdl->libzfs_cry; + + if (cry == NULL) + return (B_FALSE); + + return (cry->zc_is_key_change); +} + +/* + * This API function to load or set a key-encryption-key on a datasets + * + * zfs_hdl->libzfs_cry MUST have been setup before this function is called. + */ +int +zfs_change_key(zfs_handle_t *zhp) +{ + int ret; + zfs_cmd_t zc = { 0 }; + zfs_crypt_t *cry = zhp->zfs_hdl->libzfs_cry; + char keysource[ZFS_MAXPROPLEN]; + char okeysource[ZFS_MAXPROPLEN]; + uint64_t crypt; + uint64_t keyscope; + boolean_t changed_keysource = B_FALSE, changed_keyscope = B_FALSE; + zprop_source_t src_keysource = ZPROP_SRC_NONE; + zprop_source_t src_keyscope = ZPROP_SRC_NONE; + char statbuf[ZFS_MAXNAMELEN]; + + if (cry == NULL) { + return (EINVAL); + } + /* + * First check this key change is possible with this command. + * Needs the current key to be available and for current + * and possibly changing keyscope to be correct. + */ + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + if (crypt == ZIO_CRYPT_OFF) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "cannot change key when encryption=off")); + return (EINVAL); + } + + keyscope = zfs_prop_get_int(zhp, ZFS_PROP_KEYSCOPE); + if (keyscope == ZFS_KEYSCOPE_POOL && + cry->zc_keyscope != ZFS_KEYSCOPE_DSL) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "cannot change key for keyscope=pool." + " Use 'zpool key -c'")); + return (EINVAL); + } + + if (cry->zc_keyscope == ZFS_KEYSCOPE_POOL && + cry->zc_keysource != NULL) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "cannot set keysource when changing to keyscope=pool." + " use 'zpool key -c'")); + return (EINVAL); + } + + cry->zc_is_key_change = B_TRUE; + + switch (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS)) { + case ZFS_CRYPT_KEY_UNDEFINED: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Key not defined.")); + return (EINVAL); + + case ZFS_CRYPT_KEY_UNAVAILABLE: + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Load existing key first: 'zfs key -l '.")); + return (ENOENT); + }; + + ret = zfs_prop_get(zhp, ZFS_PROP_KEYSOURCE, okeysource, + sizeof (okeysource), &src_keysource, statbuf, ZFS_MAXNAMELEN, + B_TRUE); + if (ret != 0) + return (ret); + + if (cry->zc_keysource == NULL) { + (void) strlcpy(keysource, okeysource, sizeof (keysource)); + } else { + (void) strlcpy(keysource, cry->zc_keysource, + sizeof (keysource)); + } + + (void) strlcpy(zc.zc_name, zfs_get_name(zhp), sizeof (zc.zc_name)); + zc.zc_crypto.zic_cmd = ZFS_IOC_CRYPTO_CHANGE_KEY_DSL; + + ret = change_key_setup(zhp->zfs_hdl, keysource, + zio_crypt_table[crypt].ci_keylen, &zc); + if (ret) + return (ret); + + /* + * If keysource and keyscope are being updated do that first to + * make sure that we have the delegation to do so + */ + if (cry->zc_keysource != NULL) { + ret = zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_KEYSOURCE), + cry->zc_keysource); + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Cannot change keysource.")); + return (ret); + } + + changed_keysource = B_TRUE; + } + + if (cry->zc_keyscope != 0) { + const char *keyscope_str; + char strval[ZFS_MAXNAMELEN]; + + /* Get string value of new keyscope */ + (void) zfs_prop_index_to_string(ZFS_PROP_KEYSCOPE, + cry->zc_keyscope, &keyscope_str); + + /* Retrieve the source value for the current keyscope */ + (void) zfs_prop_get(zhp, ZFS_PROP_KEYSCOPE, strval, + sizeof (strval), &src_keyscope, statbuf, ZFS_MAXNAMELEN, + B_TRUE); + + ret = zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_KEYSCOPE), + keyscope_str); + /* If we failed, we must revert the keysource */ + if (ret != 0) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "Cannot change keyscope.")); + goto cleanup; + } + + changed_keyscope = B_TRUE; + } + + /* Send change to kernel */ + ret = crypto_key_ioctl(zhp->zfs_hdl, &zc); + if (ret == 0) { + zfs_refresh_properties(zhp); + goto out; + } + + /* + * ENOENT when attempting to go to keyscope = pool will be + * returned when the spa key wasn't available. + * We could have short cutted the call to the kernel + * to check this first but in the case were it succeeds + * that results in an extra call, plus it opens up a + * window where the spa key could be removed so we still + * have to deal with this failure here anyway. + */ + if (ret == ENOENT && cry->zc_keyscope == ZFS_KEYSCOPE_POOL) { + zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, + "pool key not available.")); + } else { + zfs_error_aux(zhp->zfs_hdl, "%s", strerror(ret)); + } + + + /* Revert keyscope back if change failed */ + if (changed_keyscope) { + const char *keyscope_str; + + if (src_keyscope == ZPROP_SRC_LOCAL) { + (void) zfs_prop_index_to_string(ZFS_PROP_KEYSCOPE, + keyscope, &keyscope_str); + (void) zfs_prop_set(zhp, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), keyscope_str); + } else + (void) zfs_prop_inherit(zhp, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE)); + } + +cleanup: + /* Revert keysource back if change failed */ + if (changed_keysource) { + if (src_keysource == ZPROP_SRC_LOCAL) + (void) zfs_prop_set(zhp, + zfs_prop_to_name(ZFS_PROP_KEYSOURCE), okeysource); + else + (void) zfs_prop_inherit(zhp, + zfs_prop_to_name(ZFS_PROP_KEYSOURCE)); + } + +out: + /* Clean up handle so a potential iteration doesn't get confused */ + bzero(cry, sizeof (zfs_crypt_t)); + return (ret); +} + +/* + * CLI to API function + */ +int +zfs_cmd_key_change(zfs_handle_t *zhp, nvlist_t *nvprops) +{ + char errbuf[1024]; + int ret; + char *keyscope; + zfs_crypt_t *cry = NULL; + + cry = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_crypt_t)); + zfs_set_libzfs_cry(zhp, cry); + /* Check if the keysource / key scope properties are being changed */ + if (nvprops != NULL) { + (void) nvlist_lookup_string(nvprops, + zfs_prop_to_name(ZFS_PROP_KEYSOURCE), &cry->zc_keysource); + + if (nvlist_lookup_string(nvprops, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), &keyscope) == 0) { + if (zfs_prop_string_to_index(ZFS_PROP_KEYSCOPE, + keyscope, &cry->zc_keyscope) != 0) { + ret = EINVAL; + goto out; + } + } + } + + ret = zfs_change_key(zhp); +out: + if (ret != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key change error")); + (void) zfs_error(zhp->zfs_hdl, EZFS_KEYERR, errbuf); + } else { + (void) printf(gettext("Key change successful.\n")); + } + + if (cry != NULL) { + bzero(cry, sizeof (zfs_crypt_t)); + free(cry); + zfs_set_libzfs_cry(zhp, NULL); + } + + return (ret != 0); +} + +int +zpool_change_key(zpool_handle_t *zhp) +{ + zfs_cmd_t zc = { 0 }; + zfs_crypt_t *cry = zhp->zpool_hdl->libzfs_cry; + int ret; + zprop_source_t srctype; + char okeysource[ZPOOL_MAXPROPLEN]; + boolean_t keysource_changed = B_FALSE; + + if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) < + SPA_VERSION_CRYPTO) { + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "Insufficient version for encrypted pools.\n")); + return (EINVAL); + } + + if (cry->zc_key_data_len > ZPOOL_MAXPROPLEN) { + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "data lengths too long.")); + return (EINVAL); + } + + switch (zpool_get_prop_int(zhp, ZPOOL_PROP_KEYSTATUS, NULL)) { + case ZFS_CRYPT_KEY_UNDEFINED: + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "key not defined.")); + return (EINVAL); + + case ZFS_CRYPT_KEY_UNAVAILABLE: + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN, + "pool key not available.")); + return (EINVAL); + }; + + /* Prepare zfs_cmd_t for changing the key */ + cry->zc_is_key_change = B_TRUE; + (void) strlcpy(zc.zc_name, zpool_get_name(zhp), sizeof (zc.zc_name)); + (void) zpool_get_prop(zhp, ZPOOL_PROP_KEYSOURCE, okeysource, + sizeof (okeysource), &srctype); + + zc.zc_crypto.zic_cmd = ZFS_IOC_CRYPTO_CHANGE_KEY_SPA; + + ret = change_key_setup(zhp->zpool_hdl, okeysource, + ZPOOL_CRYPTO_KEK_LEN, &zc); + if (ret) { + zfs_error_aux(zhp->zpool_hdl, "%s", strerror(ret)); + return (ret); + } + + if (cry->zc_keysource != NULL) { + ret = zpool_set_prop(zhp, + zpool_prop_to_name(ZPOOL_PROP_KEYSOURCE), + cry->zc_keysource); + if (ret != 0) { + zfs_error_aux(zhp->zpool_hdl, + dgettext(TEXT_DOMAIN, "Cannot change keysource.")); + return (ret); + } + + keysource_changed = B_TRUE; + + } + + ret = crypto_key_ioctl(zhp->zpool_hdl, &zc); + if (ret == 0) + goto out; + + zfs_error_aux(zhp->zpool_hdl, "%s", strerror(ret)); + + if (keysource_changed) + (void) zpool_set_prop(zhp, + zpool_prop_to_name(ZPOOL_PROP_KEYSOURCE), okeysource); + +out: + /* Clean up handle so a potential iteration doesn't get confused */ + bzero(cry, sizeof (zfs_crypt_t)); + + return (ret); +} + + +/* + * CLI to API function + */ +int +zpool_cmd_key_change(zpool_handle_t *zhp, nvlist_t *nvprops) +{ + char errbuf[1024]; + zfs_crypt_t *cry = NULL; + int ret; + + /* Check if the keysource pool property is being changed */ + cry = zfs_alloc(zhp->zpool_hdl, sizeof (zfs_crypt_t)); + zpool_set_libzfs_cry(zhp, cry); + if (nvprops != NULL) { + (void) nvlist_lookup_string(nvprops, + zpool_prop_to_name(ZPOOL_PROP_KEYSOURCE), + &cry->zc_keysource); + } + + ret = zpool_change_key(zhp); + if (ret != 0) { + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Key change error")); + (void) zfs_error(zhp->zpool_hdl, EZFS_KEYERR, errbuf); + } else { + (void) printf(gettext("Key change successful.\n")); + } + if (cry != NULL) { + bzero(cry, sizeof (zfs_crypt_t)); + free(cry); + zpool_set_libzfs_cry(zhp, NULL); + } + + return (ret != 0); +} + + +/* + * This is to verify that the proposed keysource property change via + * 'zpool set', 'zfs set', and internal functions is valid. + */ +int +valid_set_keysource_change(zfs_crypt_t *cry, char *old_src, char *new_src) +{ + key_format_t old_format, new_format; + key_locator_t old_locator, new_locator; + char *uri; + int ret; + + /* + * If we are calling this from a change key operation, the valid + * keysource changes have no restrictions. + */ + if (cry != NULL && cry->zc_is_key_change == B_TRUE) { + if (strlen(new_src) > 0) + return (valid_keysource(new_src)); + return (0); + } + + /* + * If we are calling this from a set property operation, the valid + * keysources are limited to the same format + */ + ret = keysource_prop_parser(new_src, &new_format, &new_locator, &uri); + if (ret) + return (ret); + + /* If the current keysource is not valid, it must be blank */ + ret = keysource_prop_parser(old_src, &old_format, &old_locator, &uri); + if (ret == 0 && old_format != new_format) + return (1); + + return (0); +} + + +/* Validate the keysource provided is a valid keysource */ +int +valid_keysource(char *src) +{ + key_format_t format; + key_locator_t locator; + char *uri; + + if (src == NULL) + return (1); + + return (keysource_prop_parser(src, &format, &locator, &uri)); +} + +/* + * zfs_crypto_clone/zfs_crypto_rename + * + * Validate that the clone/rename is okay to place in the tree at that + * location. + * + * Just like we don't allow an encryption=off dataset to be created below + * and encrypted one we shouldn't allow a clone of an encryption=off dataset + * to be placed below an encrypted dataset either. + * + * If in the future we allow clones to have different keys from their + * parents some key setup may be required in here and zfs_crypto_rename + * will likely need split off. + */ +#pragma weak zfs_crypto_rename = zfs_crypto_clone +int +zfs_crypto_clone(libzfs_handle_t *hdl, const char *snapshot, const char *parent) +{ + int ret = 0; + zfs_handle_t *pzhp = NULL, *szhp = NULL; + uint64_t pcrypt = ZIO_CRYPT_INHERIT, scrypt = ZIO_CRYPT_INHERIT; + + ASSERT(snapshot != NULL); + ASSERT(parent != NULL); + + do { + pzhp = make_dataset_handle(hdl, parent); + if (pzhp == NULL) { + break; + } + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + } while (pcrypt == ZIO_CRYPT_INHERIT); + + if (pcrypt == ZIO_CRYPT_INHERIT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to obtain parent encryption value.")); + ret = -1; + goto out; + } + + do { + szhp = make_dataset_handle(hdl, snapshot); + if (szhp == NULL) { + break; + } + scrypt = zfs_prop_get_int(szhp, ZFS_PROP_ENCRYPTION); + } while (scrypt == ZIO_CRYPT_INHERIT); + + if (scrypt == ZIO_CRYPT_INHERIT) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to obtain source encryption value.")); + ret = -1; + goto out; + } + + ASSERT(scrypt != ZIO_CRYPT_INHERIT); + ASSERT(pcrypt != ZIO_CRYPT_INHERIT); + + if (pcrypt != ZIO_CRYPT_OFF && scrypt == ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "source must be encrypted.")); + ret = -1; + } +out: + if (pzhp != NULL) { + zfs_close(pzhp); + } + if (szhp != NULL) { + zfs_close(szhp); + } + + return (ret); +} + +/* + * zfs_crypto_create + * + * For encryption !=off && keyscope=dataset get the key material. + */ +int +zfs_crypto_create(libzfs_handle_t *hdl, nvlist_t *props, const char *parent, + zfs_cmd_t *zc) +{ + zfs_crypt_t *cry = hdl->libzfs_cry; + char errbuf[1024]; + uint64_t crypt = 0, pcrypt = 0, keyscope = 0; + char *keysource = NULL; + char *key_material = NULL; + size_t key_material_len = 0; + int ret = 0; + zfs_handle_t *pzhp = NULL; + boolean_t inherit_crypt = B_FALSE; + boolean_t inherit_keyscope = B_FALSE; + boolean_t inherit_keysource = B_FALSE; + boolean_t local_cry = B_FALSE; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Encryption create error")); + + /* + * If explicitly encryption == off or keyscope != dataset + * + * return success, ie nothing to do. + */ + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); + if (ret != 0) { + inherit_crypt = B_TRUE; + } + + ret = nvlist_lookup_uint64(props, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), &keyscope); + if (ret != 0) { + inherit_keyscope = B_TRUE; + } + + ret = nvlist_lookup_string(props, + zfs_prop_to_name(ZFS_PROP_KEYSOURCE), &keysource); + if (ret != 0) { + inherit_keysource = B_TRUE; + } + + /* parent should never be null */ + pzhp = make_dataset_handle(hdl, parent); + if (pzhp == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "failed to obtain parent to check version.")); + return (-1); + } + + if (zpool_get_prop_int(pzhp->zpool_hdl, ZPOOL_PROP_VERSION, NULL) < + SPA_VERSION_CRYPTO) { + /* + * If all properties are inherited, they didn't want crypto + * and the older pool version can proceed. + */ + if (inherit_crypt && inherit_keyscope && inherit_keysource) + return (0); + + zfs_error_aux(hdl, gettext("Insufficient version for " + "encrypted datasets.\n")); + return (-1); + } + + /* + * Inheritance happens kernel side as part of the DSL, + * so we need to lookup to the parent dataset to find out the + * values for encryption and keyscope to determine if we have work + * to do, then do the same for keysource if it isn't specified either. + * + */ + + if (crypt == ZIO_CRYPT_OFF || inherit_crypt) { + pcrypt = zfs_prop_get_int(pzhp, ZFS_PROP_ENCRYPTION); + if (pcrypt != ZIO_CRYPT_OFF && crypt == ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid " + "encryption value. dataset must be encrypted.")); + return (EINVAL); + } + + if (pcrypt == ZIO_CRYPT_OFF && + (inherit_crypt || crypt == ZIO_CRYPT_OFF)) + return (0); + + crypt = pcrypt; + } + + if (inherit_keyscope) { + keyscope = zfs_prop_get_int(pzhp, ZFS_PROP_KEYSCOPE); + } + + /* + * Need to pass down the inherited crypt & keyscope values + * so that dsl_crypto_key_gen() can see the same that we saw. + */ + zc->zc_crypto.zic_crypt = (uint64_t)crypt; + zc->zc_crypto.zic_keyscope = keyscope; + + /* + * The checking that the spa key is available is performed in + * kernel. + */ + if (keyscope == ZFS_KEYSCOPE_POOL) { + return (0); + } + + /* + * Here we have encryption on and keyscope is dataset so we need + * to find a valid keysource property. + * + * Now lets see if we have an explict setting for keysource and + * we have validate it; otherwise, if we inherit then it is already + * validated. + */ + if (!inherit_keysource) { + ret = valid_keysource(keysource); + if (ret) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid keysource.")); + return (ret); + } + + } else { /* Get the already validated keysource from our parent */ + + keysource = zfs_alloc(hdl, ZFS_MAXNAMELEN); + if (keysource == NULL) { + (void) no_memory(hdl); + return (-1); + } + + if (zfs_prop_get(pzhp, ZFS_PROP_KEYSOURCE, keysource, + ZFS_MAXNAMELEN, NULL, NULL, 0, FALSE) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "no keysource property available.")); + ret = -1; + goto out; + } + } + + if (hdl->libzfs_cry == NULL) { + hdl->libzfs_cry = cry = zfs_alloc(hdl, sizeof (zfs_crypt_t)); + local_cry = B_TRUE; + } + + if (cry->zc_key_data_len == 0) { + ret = get_key_material(hdl, keysource, KEY_OP_SET, + zio_crypt_table[crypt].ci_keylen, + &key_material, &key_material_len, zc); + if (ret != 0) + goto out; + + } else { + key_material = cry->zc_key_data; + key_material_len = cry->zc_key_data_len; + } + + ret = use_key_material(hdl, keysource, + zio_crypt_table[crypt].ci_keylen, key_material, key_material_len, + &zc->zc_crypto.zic_keydata, &zc->zc_crypto.zic_keydatalen); + + if (ret != 0) + goto out; + + if (cry->zc_salt != 0) { + ret = nvlist_add_uint64(props, + zfs_prop_to_name(ZFS_PROP_SALT), cry->zc_salt); + if (ret) + goto out; + zc->zc_crypto.zic_salt = cry->zc_salt; + } + + zc->zc_crypto.zic_cmd = ZFS_IOC_CRYPTO_LOAD_KEY_DSL; + ret = 0; + +out: + if (inherit_keysource) + free(keysource); + + if (local_cry) { + bzero(cry, sizeof (zfs_crypt_t)); + free(cry); + } + + return (ret); +} + +int +zpool_crypto_create(libzfs_handle_t *hdl, nvlist_t *props, zfs_cmd_t *zc) +{ + char errbuf[1024]; + zfs_crypt_t *cry = hdl->libzfs_cry; + char *keysource; + char *key_material = NULL; + size_t key_material_len = 0; + int ret = 0; + boolean_t local_cry = B_FALSE; + + (void) snprintf(errbuf, sizeof (errbuf), + dgettext(TEXT_DOMAIN, "Encryption create error:")); + + ret = nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_KEYSOURCE), &keysource); + if (ret) + return (0); + + if (cry == NULL) { + hdl->libzfs_cry = cry = zfs_alloc(hdl, sizeof (zfs_crypt_t)); + local_cry = B_TRUE; + } + + if (cry->zc_key_data_len == 0) { + ret = get_key_material(hdl, keysource, KEY_OP_SET, + ZPOOL_CRYPTO_KEK_LEN, &key_material, &key_material_len, + zc); + if (ret) + goto out; + + } else { + key_material = cry->zc_key_data; + key_material_len = cry->zc_key_data_len; + } + + ret = use_key_material(hdl, keysource, ZPOOL_CRYPTO_KEK_LEN, + key_material, key_material_len, + &zc->zc_crypto.zic_keydata, + &zc->zc_crypto.zic_keydatalen); + + if (ret) + goto out; + + if (cry->zc_salt != 0) { + ret = nvlist_add_uint64(props, + zpool_prop_to_name(ZPOOL_PROP_SALT), cry->zc_salt); + if (ret) + goto out; + zc->zc_crypto.zic_salt = cry->zc_salt; + } + + zc->zc_crypto.zic_cmd = ZFS_IOC_CRYPTO_LOAD_KEY_SPA; + + +#ifdef DEBUG + int i; + char *key = (char *)(uintptr_t)zc->zc_crypto.zic_keydata; + fprintf(stderr, "DEBUG: zic_keydata (%llu)\n", + zc->zc_crypto.zic_keydata); + fprintf(stderr, "DEBUG: create salt = %llu\n", zc->zc_crypto.zic_salt); + fprintf(stderr, "DEBUG: cry->zc_key_data_len = %d\n", + cry->zc_key_data_len); + fprintf(stderr, "DEBUG: zc->zc_crypto.zic_keydatalen = %llu\n", + zc->zc_crypto.zic_keydatalen); + fprintf(stderr, "DEBUG: keydata="); + for (i = 0; i < zc->zc_crypto.zic_keydatalen; i++) + fprintf(stderr, "%hhx", key[i]); + fprintf(stderr, "\n"); +#endif + +out: + if (local_cry) { + bzero(cry, sizeof (zfs_crypt_t)); + free(cry); + } + + return (ret); +} + +int +zfs_mount_crypto_check(zfs_handle_t *zhp) +{ + int keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS); + int encryption = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + + if (encryption != ZIO_CRYPT_OFF && + keystatus < ZFS_CRYPT_KEY_AVAILABLE) + return (1); + + return (0); +} + +int +zfs_is_encrypted(zfs_handle_t *zhp) +{ + int encryption = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + + if (encryption == ZIO_CRYPT_OFF) + return (0); + + return (1); +} --- old/usr/src/lib/libzfs/common/libzfs_dataset.c Tue Feb 3 13:17:47 2009 +++ new/usr/src/lib/libzfs/common/libzfs_dataset.c Tue Feb 3 13:17:46 2009 @@ -1013,7 +1013,114 @@ case ZFS_PROP_NORMALIZE: chosen_normal = (int)intval; break; + + case ZFS_PROP_CHECKSUM: + if (zhp != NULL && zfs_prop_get_int(zhp, + ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "'%s' cannot be changed on encrypted " + "datasets."), propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, + errbuf); + goto error; + } + + break; + + case ZFS_PROP_ENCRYPTION: + /* This is creation time only */ + if (zhp != NULL) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, "encryption " + "can only be set at creation time.")); + (void) zfs_error(hdl, + EZFS_PROPREADONLY, errbuf); + goto error; + } + + /* + * If the user explicitly defined encryption as off, + * we need to skip the checksum requirement. + */ + if (intval == ZIO_CRYPT_OFF) + break; + + /* + * Since encryption is on, we must make sure the user + * did not specify a checksum so we can set it to + * the crypto default + */ + if (nvlist_lookup_string(nvl, "checksum", + &strval) == 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption requires that checksum " + "not be set.")); + (void) zfs_error(hdl, + EZFS_PROPREADONLY, errbuf); + goto error; + } + + (void) nvlist_add_string(nvl, "checksum", "sha256+mac"); + break; + + case ZFS_PROP_KEYSCOPE: + if (zhp != NULL && zfs_prop_get_int(zhp, + ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF && + !zfs_changing_key(zhp)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' must be modifed as part of a key " + "change"), propname); + (void) zfs_error(hdl, EZFS_PROPREADONLY, + errbuf); + goto error; + } + break; + + case ZFS_PROP_KEYSOURCE: { + /* + * All existing dataset can be set and changed if + * meet the valid_set_keysource_change() conditions + */ + + if (zhp != NULL) { + char keysrc[ZFS_MAXPROPLEN] = { 0 }; + int ret; + zfs_crypt_t *cry = zhp->zfs_hdl->libzfs_cry; + + ret = zfs_prop_get(zhp, ZFS_PROP_KEYSOURCE, + keysrc, MAXNAMELEN, NULL, NULL, 0, B_TRUE); + if (ret) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "error getting keysource.")); + (void) zfs_error(hdl, EZFS_KEYERR, + errbuf); + goto error; + } + + if (valid_set_keysource_change(cry, keysrc, + strval)) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "invalid keysource change.")); + (void) zfs_error(hdl, EZFS_KEYERR, + errbuf); + goto error; + } + + break; + } + + if (valid_keysource(strval)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid keysource."), propname); + (void) zfs_error(hdl, EZFS_KEYERR, errbuf); + goto error; + + } } + } /* * For changes to existing volumes, we have some additional @@ -1850,6 +1957,12 @@ return (dgettext(TEXT_DOMAIN, "Allows sharing file systems over NFS or SMB\n" "\t\t\t\tprotocols")); + case ZFS_DELEG_NOTE_CRYPTO_DSL: + return (dgettext(TEXT_DOMAIN, + "Allows load/unload of key for keyscope=dataset")); + case ZFS_DELEG_NOTE_CRYPTO_DSL_CHANGE: + return (dgettext(TEXT_DOMAIN, + "Allows change of key for keyscope=dataset")); case ZFS_DELEG_NOTE_NONE: default: return (dgettext(TEXT_DOMAIN, "")); @@ -2038,7 +2151,8 @@ break; case ERANGE: - if (prop == ZFS_PROP_COMPRESSION) { + if (prop == ZFS_PROP_COMPRESSION || + prop == ZFS_PROP_ENCRYPTION) { (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "property setting is not allowed on " "bootable datasets")); @@ -2404,6 +2518,7 @@ break; case PROP_TYPE_STRING: + case PROP_TYPE_BINARY: default: zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN, "cannot get non-numeric property")); @@ -2656,6 +2771,11 @@ (void) strlcpy(propbuf, strval, proplen); break; + case PROP_TYPE_BINARY: + propbuf = NULL; + proplen = 0; + break; + default: abort(); } @@ -3107,6 +3227,7 @@ uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE); char errbuf[1024]; uint64_t zoned; + char parent[ZFS_MAXNAMELEN]; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot create '%s'"), path); @@ -3188,6 +3309,10 @@ } } + (void) parent_name(path, parent, sizeof (parent)); + if (zfs_crypto_create(hdl, props, parent, &zc) != 0) + return (zfs_error(hdl, EZFS_KEYERR, errbuf)); + if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0) return (-1); nvlist_free(props); @@ -3239,6 +3364,11 @@ "pool must be upgraded to set this " "property or value")); return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); + + case EAGAIN: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption key not available")); + return (zfs_error(hdl, EZFS_BADVERSION, errbuf)); #ifdef _ILP32 case EOVERFLOW: /* @@ -3432,6 +3562,8 @@ nvlist_free(props); } + if (zfs_crypto_clone(hdl, zhp->zfs_name, parent) != 0) + return (zfs_error(hdl, EZFS_KEYERR, errbuf)); (void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value)); @@ -4077,6 +4209,9 @@ else zc.zc_objset_type = DMU_OST_ZFS; + if (zfs_crypto_rename(hdl, zhp->zfs_name, parent) != 0) + return (zfs_error(hdl, EZFS_KEYERR, errbuf)); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value)); --- old/usr/src/lib/libzfs/common/libzfs_impl.h Tue Feb 3 13:17:49 2009 +++ new/usr/src/lib/libzfs/common/libzfs_impl.h Tue Feb 3 13:17:48 2009 @@ -24,8 +24,8 @@ * Use is subject to license terms. */ -#ifndef _LIBFS_IMPL_H -#define _LIBFS_IMPL_H +#ifndef _LIBZFS_IMPL_H +#define _LIBZFS_IMPL_H #include #include @@ -64,6 +64,7 @@ void *libzfs_sharehdl; /* libshare handle */ uint_t libzfs_shareflags; avl_tree_t libzfs_mnttab_cache; + zfs_crypt_t *libzfs_cry; }; #define ZFSSHARE_MISS 0x01 /* Didn't find entry in cache */ @@ -191,4 +192,4 @@ } #endif -#endif /* _LIBFS_IMPL_H */ +#endif /* _LIBZFS_IMPL_H */ --- old/usr/src/lib/libzfs/common/libzfs_mount.c Tue Feb 3 13:17:50 2009 +++ new/usr/src/lib/libzfs/common/libzfs_mount.c Tue Feb 3 13:17:49 2009 @@ -94,6 +94,7 @@ static int (*iscsitgt_zfs_is_shared)(const char *); static int (*iscsitgt_svc_online)(); + /* * The share protocols table must be in the same order as the zfs_share_prot_t * enum in libzfs_impl.h @@ -280,6 +281,11 @@ getzoneid() == GLOBAL_ZONEID) return (B_FALSE); + if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF && + zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) + != ZFS_CRYPT_KEY_AVAILABLE) + return (B_FALSE); + if (source) *source = sourcetype; @@ -1149,7 +1155,8 @@ */ #pragma weak zpool_mount_datasets = zpool_enable_datasets int -zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags, + boolean_t encrypted) { mount_cbdata_t cb = { 0 }; libzfs_handle_t *hdl = zhp->zpool_hdl; @@ -1156,6 +1163,8 @@ zfs_handle_t *zfsp; int i, ret = -1; int *good; + int version; + char *prev_dataset = NULL; /* * Gather all non-snap datasets within the pool. @@ -1173,6 +1182,8 @@ if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0) goto out; + version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); + /* * Sort the datasets by mountpoint. */ @@ -1188,10 +1199,40 @@ ret = 0; for (i = 0; i < cb.cb_used; i++) { + + /* + * If we are looking for encrypted datasets of keyscope=pool + * and unencrypted children to mount. However, other + * encrypted dataset types and unencrypted datasets that + * do not have an encrypted parent must be skipped. + */ + if (version >= SPA_VERSION_CRYPTO && encrypted == B_TRUE) { + if (zfs_prop_get_int(cb.cb_datasets[i], + ZFS_PROP_ENCRYPTION) == ZIO_CRYPT_OFF || + zfs_prop_get_int(cb.cb_datasets[i], + ZFS_PROP_KEYSCOPE) != ZFS_KEYSCOPE_POOL) { + if (prev_dataset == NULL) + continue; + else { + int len = strlen(prev_dataset); + char *ds_name = (char *)zfs_get_name( + cb.cb_datasets[i]); + + if (!(strncmp(prev_dataset, ds_name, + len) == 0 && + (strlen(ds_name) > len) && + ds_name[len] == '/')) + continue; + } + } + } + if (zfs_mount(cb.cb_datasets[i], mntopts, flags) != 0) ret = -1; - else + else { good[i] = 1; + prev_dataset = (char *)zfs_get_name(cb.cb_datasets[i]); + } } /* @@ -1216,61 +1257,112 @@ } +typedef struct { + libzfs_handle_t *hdl; + boolean_t encrypted_only; + int pool_version; +} zvol_cb_arg_t; + static int zvol_cb(const char *dataset, void *data) { - libzfs_handle_t *hdl = data; + zvol_cb_arg_t *zcbt = data; + libzfs_handle_t *hdl = zcbt->hdl; zfs_handle_t *zhp; + int crypt, keyscope; /* * Ignore snapshots and ignore failures from non-existant datasets. */ if (strchr(dataset, '@') != NULL || - (zhp = zfs_open(hdl, dataset, ZFS_TYPE_VOLUME)) == NULL) + (zhp = zfs_open(zcbt->hdl, dataset, ZFS_TYPE_VOLUME)) == NULL) return (0); - if (zfs_unshare_iscsi(zhp) != 0) + /* + * crypt will be ZIO_CRYPT_INHERIT for <= SPA_VERSION_CRYPTO + * + * Skip over any datasets that either aren't encrypted or + * don't have keyscope=pool. + */ + if (zcbt->pool_version >= SPA_VERSION_CRYPTO &&zcbt->encrypted_only) { + crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION); + keyscope = zfs_prop_get_int(zhp, ZFS_PROP_KEYSCOPE); + if (crypt == ZIO_CRYPT_OFF || keyscope != ZFS_KEYSCOPE_POOL) { + zfs_close(zhp); + return (0); + } + } + + if (zfs_unshare_iscsi(zhp) != 0) { + zfs_close(zhp); return (-1); + } + if (zvol_remove_link(zcbt->hdl, dataset)) { + zfs_close(zhp); + return (-1); + } + zfs_close(zhp); return (0); } + +typedef struct { + char *mountpoint; + zfs_handle_t *dataset; + boolean_t crypto; +} compare_mntpt_t; + static int mountpoint_compare(const void *a, const void *b) { - const char *mounta = *((char **)a); - const char *mountb = *((char **)b); + compare_mntpt_t *mounta = (compare_mntpt_t *)a; + compare_mntpt_t *mountb = (compare_mntpt_t *)b; - return (strcmp(mountb, mounta)); + return (strcmp(mountb->mountpoint, mounta->mountpoint)); } + /* - * Unshare and unmount all datasets within the given pool. We don't want to - * rely on traversing the DSL to discover the filesystems within the pool, - * because this may be expensive (if not all of them are mounted), and can fail - * arbitrarily (on I/O error, for example). Instead, we walk /etc/mnttab and - * gather all the filesystems that are currently mounted. + * Unshare and unmount all or only encrypted datasets within the given pool. + * We don't want to rely on traversing the DSL to discover the filesystems + * within the pool, because this may be expensive (if not all of them are + * mounted), and can fail arbitrarily (on I/O error, for example). Instead, + * we walk /etc/mnttab and gather all the filesystems that are currently + * mounted. + * + * When operations are only be performed on encrypted datasets, the function is + * allowed to silently fail as we are not unmounting any underneath unencrypted + * datasets or encrypted datasets of a different key. */ + #pragma weak zpool_unmount_datasets = zpool_disable_datasets int -zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) +zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force, + boolean_t encrypted) { int used, alloc; struct mnttab entry; size_t namelen; - char **mountpoints = NULL; - zfs_handle_t **datasets = NULL; + compare_mntpt_t *mp = NULL; + zvol_cb_arg_t zcbt; libzfs_handle_t *hdl = zhp->zpool_hdl; int i; int ret = -1; int flags = (force ? MS_FORCE : 0); + int version; + version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL); + /* * First unshare all zvols. */ - if (zpool_iter_zvol(zhp, zvol_cb, hdl) != 0) + zcbt.hdl = hdl; + zcbt.encrypted_only = encrypted; + zcbt.pool_version = version; + if (zpool_iter_zvol(zhp, zvol_cb, &zcbt) != 0) return (-1); namelen = strlen(zhp->zpool_name); @@ -1300,35 +1392,25 @@ */ if (used == alloc) { if (alloc == 0) { - if ((mountpoints = zfs_alloc(hdl, - 8 * sizeof (void *))) == NULL) + if ((mp = zfs_alloc(hdl, + 8 * sizeof (compare_mntpt_t))) == NULL) goto out; - if ((datasets = zfs_alloc(hdl, - 8 * sizeof (void *))) == NULL) - goto out; - alloc = 8; } else { void *ptr; - if ((ptr = zfs_realloc(hdl, mountpoints, - alloc * sizeof (void *), - alloc * 2 * sizeof (void *))) == NULL) + if ((ptr = zfs_realloc(hdl, mp, + alloc * sizeof (compare_mntpt_t), + alloc * 2 * sizeof (compare_mntpt_t))) == + NULL) goto out; - mountpoints = ptr; - - if ((ptr = zfs_realloc(hdl, datasets, - alloc * sizeof (void *), - alloc * 2 * sizeof (void *))) == NULL) - goto out; - datasets = ptr; - + mp = ptr; alloc *= 2; } } - if ((mountpoints[used] = zfs_strdup(hdl, + if ((mp[used].mountpoint = zfs_strdup(hdl, entry.mnt_mountp)) == NULL) goto out; @@ -1337,8 +1419,7 @@ * is only used to determine if we need to remove the underlying * mountpoint, so failure is not fatal. */ - datasets[used] = make_dataset_handle(hdl, entry.mnt_special); - + mp[used].dataset = make_dataset_handle(hdl, entry.mnt_special); used++; } @@ -1346,19 +1427,40 @@ * At this point, we have the entire list of filesystems, so sort it by * mountpoint. */ - qsort(mountpoints, used, sizeof (char *), mountpoint_compare); + qsort(mp, used, sizeof (compare_mntpt_t), mountpoint_compare); /* - * Walk through and first unshare everything. + * If we are unmounting encrypted file systems we need to + * run through the sorted list in reverse so we can tag which + * datasets are children of encrypted datasets with keyscope=pool */ - for (i = 0; i < used; i++) { - zfs_share_proto_t *curr_proto; - for (curr_proto = share_all_proto; *curr_proto != PROTO_END; - curr_proto++) { - if (is_shared(hdl, mountpoints[i], *curr_proto) && - unshare_one(hdl, mountpoints[i], - mountpoints[i], *curr_proto) != 0) - goto out; + if (version >= SPA_VERSION_CRYPTO && encrypted == B_TRUE) { + + char *prev_mntpt = NULL; + int len = 0; + + for (i = used - 1; i > -1; i--) { + + if (prev_mntpt != NULL && + strncmp(prev_mntpt, mp[i].mountpoint, len) == 0 && + strlen(mp[i].mountpoint) > len && + mp[i].mountpoint[len] == '/') { + mp[i].crypto = B_TRUE; + continue; + } + + if (zfs_prop_get_int(mp[i].dataset, + ZFS_PROP_ENCRYPTION) == ZIO_CRYPT_OFF || + zfs_prop_get_int(mp[i].dataset, + ZFS_PROP_KEYSCOPE) != ZFS_KEYSCOPE_POOL) { + prev_mntpt = NULL; + continue; + } + + prev_mntpt = mp[i].mountpoint; + len = strlen(mp[i].mountpoint); + mp[i].crypto = B_TRUE; + } } @@ -1367,24 +1469,67 @@ * appropriate. */ for (i = 0; i < used; i++) { - if (unmount_one(hdl, mountpoints[i], flags) != 0) + zfs_share_proto_t *curr_proto; + + if (mp[i].dataset == NULL) + continue; + + /* + * If we are looking for encrypted datasets, bypass those + * that are not children of encrypted datasets of keyscope pool + */ + + if (version >= SPA_VERSION_CRYPTO && encrypted == B_TRUE && + mp[i].crypto == B_FALSE) + continue; + + /* + * Walk through and first unshare everything. + */ + for (curr_proto = share_all_proto; *curr_proto != PROTO_END; + curr_proto++) { + if (is_shared(hdl, mp[i].mountpoint, *curr_proto) && + unshare_one(hdl, mp[i].mountpoint, + mp[i].mountpoint, *curr_proto) != 0) { + goto out; + } + } + + if (umount2(mp[i].mountpoint, flags) != 0) { + zfs_error_aux(hdl, strerror(errno)); + (void) zfs_error_fmt(hdl, EZFS_UMOUNTFAILED, + dgettext(TEXT_DOMAIN, "cannot unmount '%s'"), + mp[i].mountpoint); goto out; - } + } - for (i = 0; i < used; i++) { - if (datasets[i]) - remove_mountpoint(datasets[i]); + /* + * If this dataset is not encrypted by a pool key, we + * should unload the key now that it has been unmounted. + */ + if (encrypted == B_TRUE && mp[i].crypto == B_TRUE) { + if (zfs_prop_get_int(mp[i].dataset, + ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF && + zfs_prop_get_int(mp[i].dataset, + ZFS_PROP_KEYSCOPE) == ZFS_KEYSCOPE_DSL) + (void) zfs_unload_key(mp[i].dataset); + } } ret = 0; + out: + for (i = 0; i < used; i++) { - if (datasets[i]) - zfs_close(datasets[i]); - free(mountpoints[i]); + if (mp[i].dataset == NULL) { + remove_mountpoint(mp[i].dataset); + zfs_close(mp[i].dataset); + } + + free(mp[i].mountpoint); } - free(datasets); - free(mountpoints); + free(mp); + return (ret); } --- old/usr/src/lib/libzfs/common/libzfs_pool.c Tue Feb 3 13:17:52 2009 +++ new/usr/src/lib/libzfs/common/libzfs_pool.c Tue Feb 3 13:17:51 2009 @@ -300,6 +300,11 @@ (void) strlcpy(buf, strval, len); break; + case PROP_TYPE_BINARY: + buf = NULL; + len = 0; + break; + default: abort(); } @@ -365,6 +370,9 @@ * Given an nvlist of zpool properties to be set, validate that they are * correct, and parse any numeric properties (index, boolean, etc) if they are * specified as strings. + * + * The handle can either be the libzfs_handle_t, for create_or_import == true, + * or zpool_handle_t, for create_or_import == false. */ static nvlist_t * zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, @@ -533,6 +541,62 @@ *slash = '/'; break; + + case ZPOOL_PROP_KEYSOURCE: + if (!create_or_import && version < SPA_VERSION_CRYPTO) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "pool must be upgraded to support " + "'%s' property"), propname); + (void) zfs_error(hdl, EZFS_BADVERSION, + errbuf); + goto error; + } + + /* + * If we're doing a set operation and the key has been + * previously loaded, the keysource has a number of + * restrictions to ensure proper key change operations. + */ + + if (!create_or_import) { + zhp = zpool_open_canfail(hdl, poolname); + if (zhp == NULL) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "could not open pool '%s'"), + poolname); + (void) zfs_error(hdl, EZFS_OPENFAILED, + errbuf); + goto error; + } + } else + zhp = NULL; + + if (zhp != NULL && + zpool_get_prop_int(zhp, ZPOOL_PROP_KEYSTATUS, NULL) + > ZFS_CRYPT_KEY_UNDEFINED) { + if (valid_set_keysource_change(hdl->libzfs_cry, + zpool_get_prop_string(zhp, + ZPOOL_PROP_KEYSOURCE, NULL), strval)) { + zfs_error_aux(hdl, + dgettext(TEXT_DOMAIN, + "invalid keysource change.")); + (void) zfs_error(hdl, EZFS_KEYERR, + errbuf); + goto error; + } + + break; + } + + if (valid_keysource(strval) != 0) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "invalid keysource.")); + (void) zfs_error(hdl, EZFS_KEYERR, errbuf); + goto error; + } + + break; } } @@ -626,7 +690,6 @@ return (0); } - /* * Validate the given pool name, optionally putting an extended error message in * 'buf'. @@ -876,14 +939,26 @@ } } + (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); + if (props && (zpool_crypto_create(hdl, props, &zc) != 0)) + return (zfs_error(hdl, EZFS_KEYERR, msg)); + if (fsprops) { uint64_t zoned; - char *zonestr; + char *zonestr, *cryptstr; zoned = ((nvlist_lookup_string(fsprops, zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) && strcmp(zonestr, "on") == 0); + if ((nvlist_lookup_string(fsprops, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &cryptstr) == 0 && + strcmp(cryptstr, "off") != 0)) { + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "encryption not supported on top level dataset")); + return (zfs_error(hdl, EZFS_KEYERR, msg)); + } + if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) { goto create_failed; @@ -901,8 +976,6 @@ if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0) goto create_failed; - (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name)); - if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) { zcmd_free_nvlists(&zc); @@ -2231,7 +2304,7 @@ { int ret = 0; - if (ZFS_IS_VOLUME(zhp)) { + if (ZFS_IS_VOLUME(zhp) && zfs_mount_crypto_check(zhp) == 0) { (void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name); ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL); } --- old/usr/src/lib/libzfs/common/libzfs_sendrecv.c Tue Feb 3 13:17:53 2009 +++ new/usr/src/lib/libzfs/common/libzfs_sendrecv.c Tue Feb 3 13:17:52 2009 @@ -235,6 +235,7 @@ while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) { char *propname = nvpair_name(elem); zfs_prop_t prop = zfs_name_to_prop(propname); + zprop_type_t type = zfs_prop_get_type(prop); nvlist_t *propnv; if (!zfs_prop_user(propname) && zfs_prop_readonly(prop)) @@ -248,6 +249,8 @@ ZPROP_VALUE, &value) == 0); if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) continue; + } else if (prop == ZFS_PROP_WRAPPEDKEY) { + continue; } else { char *source; if (nvlist_lookup_string(propnv, @@ -257,12 +260,19 @@ continue; } - if (zfs_prop_user(propname) || - zfs_prop_get_type(prop) == PROP_TYPE_STRING) { + if (zfs_prop_user(propname) || type == PROP_TYPE_STRING) { char *value; verify(nvlist_lookup_string(propnv, ZPROP_VALUE, &value) == 0); VERIFY(0 == nvlist_add_string(nv, propname, value)); + } else if (type == PROP_TYPE_BINARY) { + /* + * Binary properties can't be sent just now. + * That is okay for now since the only supported binary + * property is the wrapped encryption key and + * that is already skipped above. + */ + continue; } else { uint64_t value; verify(nvlist_lookup_uint64(propnv, --- old/usr/src/lib/libzfs/common/libzfs_util.c Tue Feb 3 13:17:55 2009 +++ new/usr/src/lib/libzfs/common/libzfs_util.c Tue Feb 3 13:17:54 2009 @@ -210,6 +210,8 @@ case EZFS_ACTIVE_SPARE: return (dgettext(TEXT_DOMAIN, "pool has active shared spare " "device")); + case EZFS_KEYERR: + return (dgettext(TEXT_DOMAIN, "crypto key failure")); case EZFS_UNKNOWN: return (dgettext(TEXT_DOMAIN, "unknown error")); default: @@ -551,6 +553,25 @@ hdl->libzfs_printerr = printerr; } +/* + * These functions are to obtain the pointer to the zfs_crypt_t structure in + * the libzfs_handle_t. Since the internals of libzfs_handle_t are defined in + * internal headers and not public, a separate function is needed so crypto + * API users can provide key information for crypto operations + * + */ +void +zpool_set_libzfs_cry(zpool_handle_t *hdl, zfs_crypt_t *cry) +{ + hdl->zpool_hdl->libzfs_cry = cry; +} + +void +zfs_set_libzfs_cry(zfs_handle_t *hdl, zfs_crypt_t *cry) +{ + hdl->zfs_hdl->libzfs_cry = cry; +} + libzfs_handle_t * libzfs_init(void) { @@ -1166,6 +1187,10 @@ } break; + case PROP_TYPE_BINARY: + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, + "'%s' is a binary property cannot be set")); + goto error; default: abort(); } --- old/usr/src/lib/libzfs/common/mapfile-vers Tue Feb 3 13:17:57 2009 +++ new/usr/src/lib/libzfs/common/mapfile-vers Tue Feb 3 13:17:56 2009 @@ -33,6 +33,7 @@ libzfs_print_on_error; zfs_allocatable_devs; zfs_build_perms; + zfs_change_key; zfs_clone; zfs_close; zfs_create; @@ -49,7 +50,9 @@ zfs_get_pool_handle; zfs_get_user_props; zfs_get_type; + zfs_set_libzfs_cry; zfs_iscsi_perm_check; + zfs_is_encrypted; zfs_is_mounted; zfs_is_shared; zfs_is_shared_iscsi; @@ -60,7 +63,12 @@ zfs_iter_filesystems; zfs_iter_root; zfs_iter_snapshots; + zfs_cmd_key_change; + zfs_cmd_key_load; + zfs_cmd_key_unload; + zfs_load_key; zfs_mount; + zfs_mount_crypto_check; zfs_name_to_prop; zfs_name_valid; zfs_nicenum; @@ -101,6 +109,7 @@ zfs_snapshot; zfs_spa_version; zfs_type_to_name; + zfs_unload_key; zfs_unmount; zfs_unmountall; zfs_unshare; @@ -112,6 +121,7 @@ zfs_unshareall_nfs; zfs_unshareall_smb; zpool_add; + zpool_change_key; zpool_clear; zpool_close; zpool_create; @@ -138,12 +148,18 @@ zpool_get_prop_int; zpool_get_state; zpool_get_status; + zpool_set_libzfs_cry; zpool_import; zpool_import_props; zpool_import_status; zpool_in_use; zpool_iter; + zpool_cmd_key_change; + zpool_cmd_key_load; + zpool_cmd_key_unload; + zpool_keysource_prompt; zpool_label_disk; + zpool_load_key; zpool_mount_datasets; zpool_name_to_prop; zpool_obj_to_path; @@ -162,6 +178,7 @@ zpool_set_prop; zpool_stage_history; zpool_state_to_name; + zpool_unload_key; zpool_unmount_datasets; zpool_upgrade; zpool_vdev_attach; --- old/usr/src/lib/libzpool/Makefile.com Tue Feb 3 13:18:00 2009 +++ new/usr/src/lib/libzpool/Makefile.com Tue Feb 3 13:17:59 2009 @@ -19,11 +19,9 @@ # CDDL HEADER END # # -# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Copyright 2008 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# LIBRARY= libzpool.a VERS= .1 @@ -30,7 +28,7 @@ # include the list of ZFS sources include ../../../uts/common/Makefile.files -KERNEL_OBJS = kernel.o taskq.o util.o +KERNEL_OBJS = kernel.o taskq.o util.o kcf.o LIST_OBJS = list.o OBJECTS=$(ZFS_COMMON_OBJS) $(ZFS_SHARED_OBJS) $(KERNEL_OBJS) $(LIST_OBJS) @@ -63,7 +61,7 @@ CFLAGS += -g $(CCVERBOSE) $(CNOGLOBAL) CFLAGS64 += -g $(CCVERBOSE) $(CNOGLOBAL) -LDLIBS += -lumem -lavl -lnvpair -lz -lc +LDLIBS += -lumem -lavl -lnvpair -lz -lc -lmd CPPFLAGS += $(INCS) .KEEP_STATE: --- /dev/null Tue Feb 3 13:18:02 2009 +++ new/usr/src/lib/libzpool/common/kcf.c Tue Feb 3 13:18:01 2009 @@ -0,0 +1,80 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include + +#define _KERNEL +#include +#include +#include + +crypto_mech_type_t +crypto_mech2id(crypto_mech_name_t name) +{ + return (1); +} + +int +crypto_create_ctx_template(crypto_mechanism_t *mech, + crypto_key_t *key, crypto_ctx_template_t *tmpl, int kmflag) +{ + return (NULL); +} + +void +crypto_destroy_ctx_template(crypto_ctx_template_t tmpl) +{ +} + + + +/*ARGSUSED*/ +int +crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, + crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *ciphertext, + crypto_call_req_t *cr) +{ + bcopy(plaintext->cd_raw.iov_base, ciphertext->cd_raw.iov_base, + plaintext->cd_length); + return (0); +} + +/*ARGSUSED*/ +int +crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *ciphertext, + crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *plaintext, + crypto_call_req_t *cr) +{ + bcopy(ciphertext->cd_raw.iov_base, plaintext->cd_raw.iov_base, + plaintext->cd_length); + return (0); +} + + +int +crypto_kmflag(crypto_req_handle_t handle) +{ + return (KM_SLEEP); +} --- old/usr/src/uts/common/Makefile.files Tue Feb 3 13:18:04 2009 +++ new/usr/src/uts/common/Makefile.files Tue Feb 3 13:18:02 2009 @@ -1258,6 +1258,7 @@ zio.o \ zio_checksum.o \ zio_compress.o \ + zio_crypt.o \ zio_inject.o ZFS_SHARED_OBJS += \ @@ -1265,6 +1266,7 @@ zfs_deleg.o \ zfs_prop.o \ zfs_comutil.o \ + zcrypt_common.o \ zpool_prop.o \ zprop_common.o --- old/usr/src/uts/common/fs/zfs/arc.c Tue Feb 3 13:18:05 2009 +++ new/usr/src/uts/common/fs/zfs/arc.c Tue Feb 3 13:18:04 2009 @@ -118,8 +118,10 @@ */ #include +#include #include #include +#include #include #include #include @@ -280,6 +282,10 @@ kstat_named_t arcstat_l2_size; kstat_named_t arcstat_l2_hdr_size; kstat_named_t arcstat_memory_throttle_count; + kstat_named_t arcstat_l2_encrypt; + kstat_named_t arcstat_l2_decrypt; + kstat_named_t arcstat_l2_encrypt_error; + kstat_named_t arcstat_l2_decrypt_error; } arc_stats_t; static arc_stats_t arc_stats = { @@ -332,7 +338,11 @@ { "l2_io_error", KSTAT_DATA_UINT64 }, { "l2_size", KSTAT_DATA_UINT64 }, { "l2_hdr_size", KSTAT_DATA_UINT64 }, - { "memory_throttle_count", KSTAT_DATA_UINT64 } + { "memory_throttle_count", KSTAT_DATA_UINT64 }, + { "l2_encrypt", KSTAT_DATA_UINT64 }, + { "l2_decrypt", KSTAT_DATA_UINT64 }, + { "l2_encrypt_fail", KSTAT_DATA_UINT64 }, + { "l2_decrypt_fail", KSTAT_DATA_UINT64 } }; #define ARCSTAT(stat) (arc_stats.stat.value.ui64) @@ -489,6 +499,7 @@ #define ARC_L2_EVICTED (1 << 17) /* evicted during I/O */ #define ARC_L2_WRITE_HEAD (1 << 18) /* head of write list */ #define ARC_STORED (1 << 19) /* has been store()d to */ +#define ARC_L2_CRYPT (1 << 20) /* Buf is for crypt'd dataset */ #define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_IN_HASH_TABLE) #define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_IO_IN_PROGRESS) @@ -503,6 +514,7 @@ #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_L2_WRITING) #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_L2_EVICTED) #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_L2_WRITE_HEAD) +#define HDR_L2_CRYPT(hdr) ((hdr)->b_flags & ARC_L2_CRYPT) /* * Other sizes @@ -2371,6 +2383,10 @@ buf = zio->io_private; hdr = buf->b_hdr; + if (BP_GET_CRYPT(zio->io_bp) != ZIO_CRYPT_OFF && + BP_GET_CRYPT(zio->io_bp) != ZIO_CRYPT_INHERIT) { + hdr->b_flags |= ARC_L2_CRYPT; + } /* * The hdr was inserted into hash-table and removed from lists @@ -2494,9 +2510,9 @@ * arc_read_done() will invoke all the requested "done" functions * for readers of this block. * - * Normal callers should use arc_read and pass the arc buffer and offset + * Normal callers should use arc_read() and pass the arc buffer and offset * for the bp. But if you know you don't need locking, you can use - * arc_read_bp. + * arc_read_nolock(). */ int arc_read(zio_t *pio, spa_t *spa, blkptr_t *bp, arc_buf_t *pbuf, @@ -2534,7 +2550,6 @@ if (hdr && hdr->b_datacnt > 0) { *arc_flags |= ARC_CACHED; - if (HDR_IO_IN_PROGRESS(hdr)) { if (*arc_flags & ARC_WAIT) { @@ -2636,6 +2651,9 @@ hdr->b_flags |= ARC_L2CACHE; if (BP_GET_LEVEL(bp) > 0) hdr->b_flags |= ARC_INDIRECT; + if (spa_version(spa) >= SPA_VERSION_CRYPTO && + BP_GET_CRYPT(bp) != ZIO_CRYPT_OFF) + hdr->b_flags |= ARC_L2_CRYPT; } else { /* this block is in the ghost cache */ ASSERT(GHOST_STATE(hdr->b_state)); @@ -3101,6 +3119,11 @@ hdr->b_dva = *BP_IDENTITY(zio->io_bp); hdr->b_birth = zio->io_bp->blk_birth; hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0]; + if (BP_GET_CRYPT(zio->io_bp) != ZIO_CRYPT_OFF && + BP_GET_CRYPT(zio->io_bp) != ZIO_CRYPT_INHERIT) { + hdr->b_flags |= ARC_L2_CRYPT; + } + /* * If the block to be written was all-zero, we may have * compressed it away. In this case no write was performed @@ -3200,6 +3223,35 @@ wp->wp_oscompress); } + /* Determine encryption setting */ + if (dmu_ot[wp->wp_type].ot_encrypt && wp->wp_level <= 0) { + zp->zp_crypt = zio_crypt_select(wp->wp_dncrypt, wp->wp_oscrypt); + /* + * Need to turn off compression for level 0 dnodes so that when + * we get to the ZIO layer we can encrypt the bonusbufs + * but leave the rest in the clear + */ + if (zp->zp_crypt != ZIO_CRYPT_OFF && + wp->wp_type == DMU_OT_DNODE) { + zp->zp_compress = ZIO_COMPRESS_OFF; + } + } else { + zp->zp_crypt = ZIO_CRYPT_OFF; + } + + /* + * Make sure we only use the truncated SHA256 when we crypt is + * set to an on value, and if crypt is set to on make sure we + * do use the truncated SHA256 + * This probably isn't needed but it is a useful safety net. + */ + if (zp->zp_crypt == ZIO_CRYPT_OFF && + zp->zp_checksum == ZIO_CHECKSUM_SHA256_MAC) + zp->zp_checksum = ZIO_CHECKSUM_SHA256; + if (zp->zp_crypt != ZIO_CRYPT_OFF && + zp->zp_checksum != ZIO_CHECKSUM_SHA256_MAC) + zp->zp_checksum = ZIO_CHECKSUM_SHA256_MAC; + zp->zp_type = wp->wp_type; zp->zp_level = wp->wp_level; zp->zp_ndvas = MIN(wp->wp_copies + ismd, spa_max_replication(spa)); @@ -3970,6 +4022,7 @@ arc_buf_t *buf; kmutex_t *hash_lock; int equal; + boolean_t decrypted = B_TRUE; ASSERT(zio->io_vd != NULL); ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE); @@ -3987,10 +4040,24 @@ mutex_enter(hash_lock); /* + * Decrypt here before we validate the checksum on the cleartext + * Note that the underlying implementation is assumed to be doing + * inplace decryption. + */ + if (HDR_L2_CRYPT(hdr)) { + decrypted = l2arc_decrypt_buf(cb->l2rcb_spa, + &hdr->b_dva, buf->b_data, hdr->b_size); + if (!decrypted) { + ARCSTAT_BUMP(arcstat_l2_decrypt_error); + } else { + ARCSTAT_BUMP(arcstat_l2_decrypt); + } + } + /* * Check this survived the L2ARC journey. */ equal = arc_cksum_equal(buf); - if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) { + if (decrypted && equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) { mutex_exit(hash_lock); zio->io_private = buf; zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */ @@ -4284,6 +4351,34 @@ break; } + /* + * Data must be encrypted once we pass here + * since zio_write_phys doesn't have encryption in + * its pipeline. + * We only do encryption buffers that are from + * encrypted datasets as indicated by b_flags. + * The key to use for L2ARC encryption is found + * in the spa_t, so all we need to pass in is the + * spa and the buffer. b_birth is passed in for + * use as an IV. + * NOTE we can't encrypt in place because the buffer + * may still get referenced by the in memory ARC. + */ + if (HDR_L2_CRYPT(ab)) { + int crypt_ok; + buf_data = kmem_alloc(ab->b_size, KM_SLEEP); + crypt_ok = l2arc_encrypt_buf(dev->l2ad_spa, + &ab->b_dva, ab->b_buf->b_data, + ab->b_size, buf_data); + if (!crypt_ok) { + ARCSTAT_BUMP(arcstat_l2_encrypt_error); + mutex_exit(hash_lock); + continue; + } + ARCSTAT_BUMP(arcstat_l2_encrypt); + } else { + buf_data = ab->b_buf->b_data; + } if (pio == NULL) { /* * Insert a dummy header on the buflist so @@ -4310,8 +4405,6 @@ ab->b_flags |= ARC_L2_WRITING; ab->b_l2hdr = hdrl2; list_insert_head(dev->l2ad_buflist, ab); - buf_data = ab->b_buf->b_data; - buf_sz = ab->b_size; /* * Compute and store the buffer cksum before @@ -4320,6 +4413,7 @@ arc_cksum_verify(ab->b_buf); arc_cksum_compute(ab->b_buf, B_TRUE); + buf_sz = ab->b_size; mutex_exit(hash_lock); wzio = zio_write_phys(pio, dev->l2ad_vdev, --- old/usr/src/uts/common/fs/zfs/dbuf.c Tue Feb 3 13:18:07 2009 +++ new/usr/src/uts/common/fs/zfs/dbuf.c Tue Feb 3 13:18:06 2009 @@ -2170,6 +2170,8 @@ wp.wp_oscompress = os->os_compress; wp.wp_dnchecksum = dn->dn_checksum; wp.wp_oschecksum = os->os_checksum; + wp.wp_dncrypt = dn->dn_crypt; + wp.wp_oscrypt = os->os_crypt; if (BP_IS_OLDER(db->db_blkptr, txg)) (void) dsl_dataset_block_kill( --- old/usr/src/uts/common/fs/zfs/dmu.c Tue Feb 3 13:18:09 2009 +++ new/usr/src/uts/common/fs/zfs/dmu.c Tue Feb 3 13:18:08 2009 @@ -45,46 +45,73 @@ #include #endif +/* + * dmu_ot: + * + * Guidance on when to say TRUE for encryption: + * + * User Created Data, file contents + * User Identifying Data, eg ACL + * Indirect User Identifying Data, FUID table + * + * What can't be encrypted: + * Metadata needed to traverse the pool/datasets for resilver/scrub + * Metadata needed to find datasets for mounting + * Properties - encryption,keysource,keyscope are properties. + * + * What would be nice to be encrypted but can't be just now: + * + * User properties + * + * Unusual exceptions: + * ZIL intent log: The DMU_OT_ZIL isn't marked as encrypted but data + * that is in the ZIL is encrypted. + * + * If in doubt ask. + */ const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { - { byteswap_uint8_array, TRUE, "unallocated" }, - { zap_byteswap, TRUE, "object directory" }, - { byteswap_uint64_array, TRUE, "object array" }, - { byteswap_uint8_array, TRUE, "packed nvlist" }, - { byteswap_uint64_array, TRUE, "packed nvlist size" }, - { byteswap_uint64_array, TRUE, "bplist" }, - { byteswap_uint64_array, TRUE, "bplist header" }, - { byteswap_uint64_array, TRUE, "SPA space map header" }, - { byteswap_uint64_array, TRUE, "SPA space map" }, - { byteswap_uint64_array, TRUE, "ZIL intent log" }, - { dnode_buf_byteswap, TRUE, "DMU dnode" }, - { dmu_objset_byteswap, TRUE, "DMU objset" }, - { byteswap_uint64_array, TRUE, "DSL directory" }, - { zap_byteswap, TRUE, "DSL directory child map"}, - { zap_byteswap, TRUE, "DSL dataset snap map" }, - { zap_byteswap, TRUE, "DSL props" }, - { byteswap_uint64_array, TRUE, "DSL dataset" }, - { zfs_znode_byteswap, TRUE, "ZFS znode" }, - { zfs_oldacl_byteswap, TRUE, "ZFS V0 ACL" }, - { byteswap_uint8_array, FALSE, "ZFS plain file" }, - { zap_byteswap, TRUE, "ZFS directory" }, - { zap_byteswap, TRUE, "ZFS master node" }, - { zap_byteswap, TRUE, "ZFS delete queue" }, - { byteswap_uint8_array, FALSE, "zvol object" }, - { zap_byteswap, TRUE, "zvol prop" }, - { byteswap_uint8_array, FALSE, "other uint8[]" }, - { byteswap_uint64_array, FALSE, "other uint64[]" }, - { zap_byteswap, TRUE, "other ZAP" }, - { zap_byteswap, TRUE, "persistent error log" }, - { byteswap_uint8_array, TRUE, "SPA history" }, - { byteswap_uint64_array, TRUE, "SPA history offsets" }, - { zap_byteswap, TRUE, "Pool properties" }, - { zap_byteswap, TRUE, "DSL permissions" }, - { zfs_acl_byteswap, TRUE, "ZFS ACL" }, - { byteswap_uint8_array, TRUE, "ZFS SYSACL" }, - { byteswap_uint8_array, TRUE, "FUID table" }, - { byteswap_uint64_array, TRUE, "FUID table size" }, - { zap_byteswap, TRUE, "DSL dataset next clones"}, - { zap_byteswap, TRUE, "scrub work queue" }, +/* BEGIN CSTYLED */ + /* byte_swap_function meta encrypt name */ + { byteswap_uint8_array, TRUE, FALSE, "unallocated" }, + { zap_byteswap, TRUE, FALSE, "object directory" }, + { byteswap_uint64_array, TRUE, FALSE, "object array" }, + { byteswap_uint8_array, TRUE, FALSE, "packed nvlist" }, + { byteswap_uint64_array, TRUE, FALSE, "packed nvlist size" }, + { byteswap_uint64_array, TRUE, FALSE, "bplist" }, + { byteswap_uint64_array, TRUE, FALSE, "bplist header" }, + { byteswap_uint64_array, TRUE, FALSE, "SPA space map header" }, + { byteswap_uint64_array, TRUE, FALSE, "SPA space map" }, + { byteswap_uint64_array, TRUE, TRUE, "ZIL intent log" }, + { dnode_buf_byteswap, TRUE, TRUE, "DMU dnode" }, + { dmu_objset_byteswap, TRUE, FALSE, "DMU objset" }, + { byteswap_uint64_array, TRUE, FALSE, "DSL directory" }, + { zap_byteswap, TRUE, FALSE, "DSL directory child map" }, + { zap_byteswap, TRUE, FALSE, "DSL dataset snap map" }, + { zap_byteswap, TRUE, FALSE, "DSL props" }, + { byteswap_uint64_array, TRUE, FALSE, "DSL dataset" }, + { zfs_znode_byteswap, TRUE, TRUE, "ZFS znode" }, + { zfs_oldacl_byteswap, TRUE, TRUE, "ZFS V0 ACL" }, + { byteswap_uint8_array, FALSE, TRUE, "ZFS plain file" }, + { zap_byteswap, TRUE, TRUE, "ZFS directory" }, + { zap_byteswap, TRUE, FALSE, "ZFS master node" }, + { zap_byteswap, TRUE, FALSE, "ZFS delete queue" }, + { byteswap_uint8_array, FALSE, TRUE, "zvol object" }, + { zap_byteswap, TRUE, FALSE, "zvol prop" }, + { byteswap_uint8_array, FALSE, TRUE, "other uint8[]" }, + { byteswap_uint64_array, FALSE, TRUE, "other uint64[]" }, + { zap_byteswap, TRUE, FALSE, "other ZAP" }, + { zap_byteswap, TRUE, FALSE, "persistent error log" }, + { byteswap_uint8_array, TRUE, FALSE, "SPA history" }, + { byteswap_uint64_array, TRUE, FALSE, "SPA history offsets" }, + { zap_byteswap, TRUE, FALSE, "Pool properties" }, + { zap_byteswap, TRUE, FALSE, "DSL permissions" }, + { zfs_acl_byteswap, TRUE, TRUE, "ZFS ACL" }, + { byteswap_uint8_array, TRUE, TRUE, "ZFS SYSACL" }, + { byteswap_uint8_array, TRUE, TRUE, "FUID table" }, + { byteswap_uint64_array, TRUE, FALSE, "FUID table size" }, + { zap_byteswap, TRUE, FALSE, "DSL dataset next clones" }, + { zap_byteswap, TRUE, FALSE, "scrub work queue" }, +/* END CSTYLED */ }; int @@ -1006,6 +1033,8 @@ wp.wp_oschecksum = os->os_checksum; wp.wp_dncompress = db->db_dnode->dn_compress; wp.wp_oscompress = os->os_compress; + wp.wp_dncrypt = db->db_dnode->dn_crypt; + wp.wp_oscrypt = os->os_crypt; ASSERT(BP_IS_HOLE(bp)); @@ -1065,6 +1094,21 @@ dnode_rele(dn, FTAG); } +void +dmu_object_set_crypt(objset_t *os, uint64_t object, uint8_t crypt, + dmu_tx_t *tx) +{ + dnode_t *dn; + + /* XXX assumes dnode_hold will not get an i/o error */ + (void) dnode_hold(os->os, object, FTAG, &dn); + ASSERT(crypt < ZIO_CRYPT_FUNCTIONS); + dn->dn_crypt = crypt; + dnode_setdirty(dn, tx); + dnode_rele(dn, FTAG); +} + + int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { @@ -1108,6 +1152,7 @@ doi->doi_indirection = dn->dn_nlevels; doi->doi_checksum = dn->dn_checksum; doi->doi_compress = dn->dn_compress; + doi->doi_crypt = dn->dn_crypt; doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; --- old/usr/src/uts/common/fs/zfs/dmu_objset.c Tue Feb 3 13:18:11 2009 +++ new/usr/src/uts/common/fs/zfs/dmu_objset.c Tue Feb 3 13:18:10 2009 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -159,6 +160,19 @@ osi->os_secondary_cache = newval; } +static void +crypt_changed_cb(void *arg, uint64_t newval) +{ + objset_impl_t *osi = arg; + + /* + * Inheritance and range checking should have been done by now. + */ + ASSERT(newval != ZIO_CRYPT_INHERIT); + + osi->os_crypt = zio_crypt_select(newval, ZIO_CRYPT_ON_VALUE); +} + void dmu_objset_byteswap(void *buf, size_t size) { @@ -222,7 +236,10 @@ * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about - * checksum/compression/copies. + * checksum/compression/copies. But they do need to know about + * encryption so that clones from the snaphost inherit the + * same encryption property regardless of where in the namespace + * they get created. */ if (ds) { err = dsl_prop_register(ds, "primarycache", @@ -241,6 +258,9 @@ err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); } + if (err == 0) + err = dsl_prop_register(ds, "encryption", + crypt_changed_cb, osi); if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); @@ -248,12 +268,17 @@ return (err); } } else if (ds == NULL) { - /* It's the meta-objset. */ + /* + * It's the meta-objset. + * Encryption is off for ZFS metadata but on for ZPL metadata + * and file/zvol contents. + */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); osi->os_primary_cache = ZFS_CACHE_ALL; osi->os_secondary_cache = ZFS_CACHE_ALL; + osi->os_crypt = ZIO_CRYPT_OFF; } osi->os_zil_header = osi->os_phys->os_zil_header; @@ -447,6 +472,8 @@ primary_cache_changed_cb, osi)); VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", secondary_cache_changed_cb, osi)); + VERIFY(0 == dsl_prop_unregister(ds, "encryption", + crypt_changed_cb, osi)); } /* @@ -542,6 +569,7 @@ dsl_dir_t *dd = arg1; struct oscarg *oa = arg2; objset_t *mos = dd->dd_pool->dp_meta_objset; + spa_t *spa = dd->dd_pool->dp_spa; int err; uint64_t ddobj; @@ -562,6 +590,65 @@ */ if (oa->clone_parent->ds_phys->ds_num_children == 0) return (EINVAL); + } else { + /* + * If encryption is on and keyscope=pool then we must + * have the spa key available when dsl_crypto_key_gen + * is run, but this is our last chance to fail the + * dataset creation. + */ + if (spa_version(spa) >= SPA_VERSION_CRYPTO && + oa->type == DMU_OST_ZFS || oa->type == DMU_OST_ZVOL) { + uint64_t crypt = ZIO_CRYPT_DEFAULT; + uint64_t keyscope = ZFS_KEYSCOPE_DEFAULT; + zfs_creat_t *zct = oa->userarg; + nvlist_t *nvprops = zct->zct_props; + int ret; + + ret = nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt); + if (ret != 0 && zct->zct_cryptkey != NULL && + zct->zct_cryptkey->zk_crypt != ZIO_CRYPT_INHERIT) { + crypt = zct->zct_cryptkey->zk_crypt; + } + + ret = nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), &keyscope); + if (ret != 0 && zct->zct_cryptkey != NULL && + zct->zct_cryptkey->zk_keyscope != + ZFS_KEYSCOPE_INHERIT) { + keyscope = zct->zct_cryptkey->zk_keyscope; + } + + /* + * Check we actually have the crypto mech we + * need for this dataset. + */ + if (crypt != ZIO_CRYPT_OFF && + !zio_crypt_mech_available(crypt)) { + /* + * Ideally need a better errno value so that + * userland code can tell the user the + * reason we denied creating this dataset + * This could be hard to debug otherwise. + */ + return (ENOTSUP); + } + + /* + * Only need to check keyscope of pool since + * ZFS_KEYSCOPE_DSL is already checked in + * the zfs_ioctl layer where we copyin the key. + * If there is a future case where encrypted objsets + * are created other than via the ioctl interface + * then this may need an update. + */ + if (crypt != ZIO_CRYPT_OFF && + keyscope == ZFS_KEYSCOPE_POOL && + spa_keystatus(spa) != ZFS_CRYPT_KEY_AVAILABLE) { + return (EAGAIN); + } + } } return (0); @@ -590,8 +677,22 @@ osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds), ds, bp, oa->type, tx); + /* + * For new filesystems and ZVOLs we may need to do some + * crypto key setup, for clones we keep what we had before + */ + if (oa->type == DMU_OST_ZFS || oa->type == DMU_OST_ZVOL) { + zfs_creat_t *zct = oa->userarg; + VERIFY(0 == dsl_crypto_key_gen(ds, zct, cr, tx)); + } + if (oa->userfunc) oa->userfunc(&osi->os, oa->userarg, cr, tx); + } else { + if (oa->type == DMU_OST_ZFS || oa->type == DMU_OST_ZVOL) { + VERIFY(0 == dsl_crypto_key_clone(ds, + oa->clone_parent, cr, tx)); + } } spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa, @@ -908,6 +1009,7 @@ wp.wp_copies = os->os_copies; wp.wp_oschecksum = os->os_checksum; wp.wp_oscompress = os->os_compress; + wp.wp_oscrypt = os->os_crypt; if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { (void) dsl_dataset_block_kill(os->os_dsl_dataset, --- old/usr/src/uts/common/fs/zfs/dmu_send.c Tue Feb 3 13:18:14 2009 +++ new/usr/src/uts/common/fs/zfs/dmu_send.c Tue Feb 3 13:18:13 2009 @@ -130,6 +130,7 @@ ba->drr->drr_u.drr_object.drr_bonuslen = dnp->dn_bonuslen; ba->drr->drr_u.drr_object.drr_checksum = dnp->dn_checksum; ba->drr->drr_u.drr_object.drr_compress = dnp->dn_compress; + ba->drr->drr_u.drr_object.drr_crypt = dnp->dn_crypt; if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) return (EINTR); @@ -785,6 +786,7 @@ drro->drr_bonustype >= DMU_OT_NUMTYPES || drro->drr_checksum >= ZIO_CHECKSUM_FUNCTIONS || drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || + drro->drr_crypt >= ZIO_CRYPT_FUNCTIONS || P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || drro->drr_blksz < SPA_MINBLOCKSIZE || drro->drr_blksz > SPA_MAXBLOCKSIZE || @@ -838,6 +840,7 @@ dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx); dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); + dmu_object_set_crypt(os, drro->drr_object, drro->drr_crypt, tx); if (data != NULL) { dmu_buf_t *db; --- old/usr/src/uts/common/fs/zfs/dmu_traverse.c Tue Feb 3 13:18:16 2009 +++ new/usr/src/uts/common/fs/zfs/dmu_traverse.c Tue Feb 3 13:18:15 2009 @@ -125,7 +125,7 @@ zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, - claim_txg); + claim_txg, 0); zil_free(zilog); } --- old/usr/src/uts/common/fs/zfs/dnode.c Tue Feb 3 13:18:18 2009 +++ new/usr/src/uts/common/fs/zfs/dnode.c Tue Feb 3 13:18:16 2009 @@ -291,6 +291,7 @@ dn->dn_nblkptr = dnp->dn_nblkptr; dn->dn_checksum = dnp->dn_checksum; dn->dn_compress = dnp->dn_compress; + dn->dn_crypt = dnp->dn_crypt; dn->dn_bonustype = dnp->dn_bonustype; dn->dn_bonuslen = dnp->dn_bonuslen; dn->dn_maxblkid = dnp->dn_maxblkid; @@ -397,6 +398,7 @@ dn->dn_bonuslen = bonuslen; dn->dn_checksum = ZIO_CHECKSUM_INHERIT; dn->dn_compress = ZIO_COMPRESS_INHERIT; + dn->dn_crypt = ZIO_CRYPT_INHERIT; dn->dn_dirtyctx = 0; dn->dn_free_txg = 0; @@ -477,6 +479,7 @@ dn->dn_nblkptr = nblkptr; dn->dn_checksum = ZIO_CHECKSUM_INHERIT; dn->dn_compress = ZIO_COMPRESS_INHERIT; + dn->dn_crypt = ZIO_CRYPT_INHERIT; ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR); /* fix up the bonus db_size */ --- old/usr/src/uts/common/fs/zfs/dnode_sync.c Tue Feb 3 13:18:19 2009 +++ new/usr/src/uts/common/fs/zfs/dnode_sync.c Tue Feb 3 13:18:18 2009 @@ -577,6 +577,7 @@ */ dnp->dn_checksum = dn->dn_checksum; dnp->dn_compress = dn->dn_compress; + dnp->dn_crypt = dn->dn_crypt; mutex_exit(&dn->dn_mtx); --- old/usr/src/uts/common/fs/zfs/dsl_dataset.c Tue Feb 3 13:18:22 2009 +++ new/usr/src/uts/common/fs/zfs/dsl_dataset.c Tue Feb 3 13:18:20 2009 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -930,7 +931,8 @@ dsl_sync_task_group_t *dstg; objset_t *os; dsl_dir_t *dd; - uint64_t obj; + uint64_t obj, cryptkeyobj = ds->ds_object; + spa_t *spa = dsl_dataset_get_spa(ds); if (dsl_dataset_is_snapshot(ds)) { /* Destroying a snapshot is simpler */ @@ -1009,11 +1011,16 @@ dsl_dir_destroy_sync, dd, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); + /* if it is successful, dsl_dir_destroy_sync will close the dd */ if (err) dsl_dir_close(dd, FTAG); out: dsl_dataset_disown(ds, tag); + + /* Remove the key from the keystore for encrypted datasets. */ + (void) spa_keystore_remove(spa, cryptkeyobj); + return (err); } @@ -1924,6 +1931,8 @@ ds->ds_reserved); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, ds->ds_phys->ds_guid); + dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_KEYSTATUS, + dsl_dataset_keystatus(ds)); if (ds->ds_phys->ds_next_snap_obj) { /* --- old/usr/src/uts/common/fs/zfs/dsl_prop.c Tue Feb 3 13:18:24 2009 +++ new/usr/src/uts/common/fs/zfs/dsl_prop.c Tue Feb 3 13:18:23 2009 @@ -53,12 +53,19 @@ (zfs_prop_readonly(prop) && !zfs_prop_setonce(prop))) return (ENOENT); - if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) { + switch (zfs_prop_get_type(prop)) { + case PROP_TYPE_STRING: if (intsz != 1) return (EOVERFLOW); (void) strncpy(buf, zfs_prop_default_string(prop), numint); - } else { + break; + case PROP_TYPE_BINARY: + /* + * Binary properties don't have a default value. + */ + return (ENOENT); + default: if (intsz != 8 || numint < 1) return (EOVERFLOW); @@ -549,7 +556,7 @@ /* * Integer property */ - ASSERT(za.za_integer_length == 8); + ASSERT3U(za.za_integer_length, ==, 8); (void) nvlist_add_uint64(propval, ZPROP_VALUE, za.za_first_integer); } --- old/usr/src/uts/common/fs/zfs/dsl_scrub.c Tue Feb 3 13:18:25 2009 +++ new/usr/src/uts/common/fs/zfs/dsl_scrub.c Tue Feb 3 13:18:24 2009 @@ -401,7 +401,7 @@ zilog = zil_alloc(dp->dp_meta_objset, zh); (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, &zta, - claim_txg); + claim_txg, 0); zil_free(zilog); } --- old/usr/src/uts/common/fs/zfs/sha256.c Tue Feb 3 13:18:27 2009 +++ new/usr/src/uts/common/fs/zfs/sha256.c Tue Feb 3 13:18:26 2009 @@ -19,111 +19,94 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include +#include /* - * SHA-256 checksum, as specified in FIPS 180-3, available at: - * http://csrc.nist.gov/publications/PubsFIPS.html - * - * This is a very compact implementation of SHA-256. - * It is designed to be simple and portable, not to be fast. + * If we are building for Solaris then we use KCF in the kernel + * otherwise just use the system provided SHA256. */ +#include +#if defined(_KERNEL) +#include +#endif /* _KERNEL */ -/* - * The literal definitions of Ch() and Maj() according to FIPS 180-3 are: - * - * Ch(x, y, z) (x & y) ^ (~x & z) - * Maj(x, y, z) (x & y) ^ (x & z) ^ (y & z) - * - * We use equivalent logical reductions here that require one less op. - */ -#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -#define Maj(x, y, z) (((x) & (y)) ^ ((z) & ((x) ^ (y)))) -#define Rot32(x, s) (((x) >> s) | ((x) << (32 - s))) -#define SIGMA0(x) (Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22)) -#define SIGMA1(x) (Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25)) -#define sigma0(x) (Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3)) -#define sigma1(x) (Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10)) -static const uint32_t SHA256_K[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -static void -SHA256Transform(uint32_t *H, const uint8_t *cp) +void +zio_checksum_SHA256_common(const void *buf, uint64_t size, zio_cksum_t *zcp, + boolean_t truncate_digest) { - uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64]; + zio_cksum_t tmp; + SHA2_CTX ctx; - for (t = 0; t < 16; t++, cp += 4) - W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3]; +#ifdef _KERNEL + crypto_data_t ddata, digest; + crypto_mechanism_t mech; - for (t = 16; t < 64; t++) - W[t] = sigma1(W[t - 2]) + W[t - 7] + - sigma0(W[t - 15]) + W[t - 16]; + mech.cm_type = crypto_mech2id(SUN_CKM_SHA256); + /* + * Might get CRYPTO_INVALID_MECH very early in boot + * when we are booting from ZFS because KCF hasn't + * been told the software list yet and hardware providers + * haven't registered yet. + * If this is the case fall back to using the sha2 module + * directly. + * In userland we are linked to libmd which has a SHA2*() + * set of APIs. + */ + if (mech.cm_type == CRYPTO_MECH_INVALID) { +#endif + SHA2Init(SHA256, &ctx); + SHA2Update(&ctx, buf, size); + SHA2Final(&tmp, &ctx); +#ifdef _KERNEL + } else { + mech.cm_param = NULL; + mech.cm_param_len = 0; - a = H[0]; b = H[1]; c = H[2]; d = H[3]; - e = H[4]; f = H[5]; g = H[6]; h = H[7]; + ddata.cd_format = CRYPTO_DATA_RAW; + ddata.cd_offset = 0; + ddata.cd_length = size; + ddata.cd_raw.iov_base = (char *)buf; + ddata.cd_raw.iov_len = size; - for (t = 0; t < 64; t++) { - T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t]; - T2 = SIGMA0(a) + Maj(a, b, c); - h = g; g = f; f = e; e = d + T1; - d = c; c = b; b = a; a = T1 + T2; + digest.cd_format = CRYPTO_DATA_RAW; + digest.cd_offset = 0; + digest.cd_length = sizeof (tmp); + digest.cd_raw.iov_base = (char *)&tmp; + digest.cd_raw.iov_len = sizeof (tmp); + + (void) crypto_digest(&mech, &ddata, &digest, NULL); } +#endif /* _KERNEL */ - H[0] += a; H[1] += b; H[2] += c; H[3] += d; - H[4] += e; H[5] += f; H[6] += g; H[7] += h; + zcp->zc_word[0] = BE_64(tmp.zc_word[0]); + zcp->zc_word[1] = BE_64(tmp.zc_word[1]); + if (!truncate_digest) { + zcp->zc_word[2] = BE_64(tmp.zc_word[2]); + zcp->zc_word[3] = BE_64(tmp.zc_word[3]); + } + } void zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp) { - uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, - 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; - uint8_t pad[128]; - int i, padsize; + zio_checksum_SHA256_common(buf, size, zcp, FALSE); +} - for (i = 0; i < (size & ~63ULL); i += 64) - SHA256Transform(H, (uint8_t *)buf + i); - - for (padsize = 0; i < size; i++) - pad[padsize++] = *((uint8_t *)buf + i); - - for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++) - pad[padsize] = 0; - - for (i = 56; i >= 0; i -= 8) - pad[padsize++] = (size << 3) >> i; - - for (i = 0; i < padsize; i += 64) - SHA256Transform(H, pad + i); - - ZIO_SET_CHECKSUM(zcp, - (uint64_t)H[0] << 32 | H[1], - (uint64_t)H[2] << 32 | H[3], - (uint64_t)H[4] << 32 | H[5], - (uint64_t)H[6] << 32 | H[7]); +/* + * SHA256 truncated at 128 and stored in the the first two words + * of the checksum. The last two words store the MAC. + */ +void +zio_checksum_SHAMAC(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + zio_checksum_SHA256_common(buf, size, zcp, TRUE); } --- old/usr/src/uts/common/fs/zfs/spa.c Tue Feb 3 13:18:29 2009 +++ new/usr/src/uts/common/fs/zfs/spa.c Tue Feb 3 13:18:28 2009 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -99,10 +100,20 @@ VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); - if (strval != NULL) - VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0); - else + if (strval != NULL) { + switch (zpool_prop_get_type(prop)) { + case PROP_TYPE_STRING: + VERIFY(nvlist_add_string(propval, + ZPROP_VALUE, strval) == 0); + break; + case PROP_TYPE_BINARY: + VERIFY(nvlist_add_uint8_array(propval, + ZPROP_VALUE, (uint8_t *)strval, intval) == 0); + break; + } + } else { VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0); + } VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0); nvlist_free(propval); @@ -227,6 +238,12 @@ dsl_dataset_name(ds, strval); dsl_dataset_rele(ds, FTAG); rw_exit(&dp->dp_config_rwlock); + } else if (prop == ZPOOL_PROP_KEYSTATUS) { + if (za.za_first_integer == + ZFS_CRYPT_KEY_UNDEFINED) + intval = za.za_first_integer; + else + intval = spa_keystatus(spa); } else { strval = NULL; intval = za.za_first_integer; @@ -249,7 +266,9 @@ kmem_free(strval, za.za_num_integers); break; } - spa_prop_add_list(*nvp, prop, strval, 0, src); + if (zpool_prop_get_type(prop) == PROP_TYPE_BINARY) + intval = za.za_num_integers; + spa_prop_add_list(*nvp, prop, strval, intval, src); kmem_free(strval, za.za_num_integers); break; @@ -328,7 +347,7 @@ error = nvpair_value_string(elem, &strval); if (!error) { - uint64_t compress; + uint64_t compress, crypt; if (strval == NULL || strval[0] == '\0') { objnum = zpool_prop_default_numeric( @@ -340,13 +359,25 @@ DS_MODE_USER | DS_MODE_READONLY, &os)) break; - /* We don't support gzip bootable datasets */ + /* + * We don't support gzip or encrypted + * datasets as the bootfs. + */ if ((error = dsl_prop_get_integer(strval, zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL)) == 0 && !BOOTFS_COMPRESS_VALID(compress)) { error = ENOTSUP; - } else { + } + if (!error && + (error = dsl_prop_get_integer(strval, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), + &crypt, NULL)) == 0 && + !BOOTFS_CRYPT_VALID(crypt)) { + error = ENOTSUP; + } + + if (!error) { objnum = dmu_objset_id(os); } dmu_objset_close(os); @@ -397,6 +428,31 @@ strcmp(slash, "/..") == 0) error = EINVAL; break; + case ZPOOL_PROP_KEYSTATUS: + if (spa_version(spa) < SPA_VERSION_CRYPTO) { + error = ENOTSUP; + break; + } + error = nvpair_value_uint64(elem, &intval); + /* + * Only the UNDEFINED and DEFINED values are + * stored on disk AVAILABLE/UNAVAILABLE are + * in memory only. Callers of spa_prop_get() + * never see DEFINED only one of + * UNDEFINED/UNAVAILABLE/AVAILABLE. + * So make sure we don't attempt to persist + * on disk AVAILABLE/UNAVAILABLE. + */ + if (!error && intval > ZFS_CRYPT_KEY_DEFINED) { + error = EINVAL; + } + break; + case ZPOOL_PROP_KEYSOURCE: + if (spa_version(spa) < SPA_VERSION_CRYPTO) { + error = ENOTSUP; + break; + } + break; } if (error) @@ -566,6 +622,8 @@ avl_create(&spa->spa_errlist_last, spa_error_entry_compare, sizeof (spa_error_entry_t), offsetof(spa_error_entry_t, se_avl)); + + spa_keystore_init(spa); } /* @@ -604,6 +662,8 @@ */ spa_errlog_drain(spa); + spa_keystore_fini(spa); + avl_destroy(&spa->spa_errlist_scrub); avl_destroy(&spa->spa_errlist_last); @@ -689,6 +749,11 @@ mutex_exit(&spa->spa_async_root_lock); /* + * Remove the in memory crypto keystore + */ + spa_keystore_fini(spa); + + /* * Close the dsl pool. */ if (spa->spa_dsl_pool) { @@ -1994,6 +2059,13 @@ spa->spa_uberblock.ub_txg = txg - 1; + if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), + &version) != 0) + version = SPA_VERSION; + ASSERT(version <= SPA_VERSION); + spa->spa_uberblock.ub_version = version; + spa->spa_ubsync = spa->spa_uberblock; + if (props && (error = spa_prop_validate(spa, props))) { spa_unload(spa); spa_deactivate(spa); @@ -2002,13 +2074,6 @@ return (error); } - if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION), - &version) != 0) - version = SPA_VERSION; - ASSERT(version <= SPA_VERSION); - spa->spa_uberblock.ub_version = version; - spa->spa_ubsync = spa->spa_uberblock; - /* * Create the root vdev. */ @@ -3930,6 +3995,15 @@ VERIFY(zap_update(mos, spa->spa_pool_props_object, propname, 8, 1, &intval, tx) == 0); + } else if (nvpair_type(elem) == DATA_TYPE_UINT8_ARRAY) { + uint_t len; + ASSERT(proptype == PROP_TYPE_BINARY); + VERIFY(nvpair_value_uint8_array(elem, + (uint8_t **)&strval, + &len) == 0); + VERIFY3U(zap_update(mos, + spa->spa_pool_props_object, propname, + 1, len, strval, tx), ==, 0); } else { ASSERT(0); /* not allowed */ } --- old/usr/src/uts/common/fs/zfs/spa_misc.c Tue Feb 3 13:18:31 2009 +++ new/usr/src/uts/common/fs/zfs/spa_misc.c Tue Feb 3 13:18:30 2009 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1086,9 +1087,10 @@ } (void) snprintf(buf + strlen(buf), len - strlen(buf), - "%s %s %s %s birth=%llu fill=%llu cksum=%llx:%llx:%llx:%llx", + "%s %s %s %s %s birth=%llu fill=%llu cksum=%llx:%llx:%llx:%llx", zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name, zio_compress_table[BP_GET_COMPRESS(bp)].ci_name, + zio_crypt_table[BP_GET_CRYPT(bp)].ci_name, BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE", BP_IS_GANG(bp) ? "gang" : "contiguous", (u_longlong_t)bp->blk_birth, --- old/usr/src/uts/common/fs/zfs/sys/arc.h Tue Feb 3 13:18:32 2009 +++ new/usr/src/uts/common/fs/zfs/sys/arc.h Tue Feb 3 13:18:31 2009 @@ -103,6 +103,7 @@ uint8_t wp_copies; uint8_t wp_dncompress, wp_oscompress; uint8_t wp_dnchecksum, wp_oschecksum; + uint8_t wp_dncrypt, wp_oscrypt; } writeprops_t; void write_policy(spa_t *spa, const writeprops_t *wp, zio_prop_t *zp); --- old/usr/src/uts/common/fs/zfs/sys/dmu.h Tue Feb 3 13:18:34 2009 +++ new/usr/src/uts/common/fs/zfs/sys/dmu.h Tue Feb 3 13:18:33 2009 @@ -298,6 +298,13 @@ dmu_tx_t *tx); /* + * Set the crypt property on a dnode. The new crypt algorithm will + * apply to all newly written blocks; existing blocks will not be affected. + */ +void dmu_object_set_crypt(objset_t *os, uint64_t object, uint8_t crypt, + dmu_tx_t *tx); + +/* * Decide how many copies of a given block we should make. Can be from * 1 to SPA_DVAS_PER_BP. */ @@ -475,7 +482,8 @@ uint8_t doi_indirection; /* 2 = dnode->indirect->data */ uint8_t doi_checksum; uint8_t doi_compress; - uint8_t doi_pad[5]; + uint8_t doi_crypt; + uint8_t doi_pad[4]; /* Values below are number of 512-byte blocks. */ uint64_t doi_physical_blks; /* data + metadata */ uint64_t doi_max_block_offset; @@ -486,6 +494,7 @@ typedef struct dmu_object_type_info { arc_byteswap_func_t *ot_byteswap; boolean_t ot_metadata; + boolean_t ot_encrypt; char *ot_name; } dmu_object_type_info_t; --- old/usr/src/uts/common/fs/zfs/sys/dmu_objset.h Tue Feb 3 13:18:35 2009 +++ new/usr/src/uts/common/fs/zfs/sys/dmu_objset.h Tue Feb 3 13:18:34 2009 @@ -69,6 +69,7 @@ uint8_t os_copies; /* can change, under dsl_dir's locks */ uint8_t os_primary_cache; /* can change, under dsl_dir's locks */ uint8_t os_secondary_cache; /* can change, under dsl_dir's locks */ + uint8_t os_crypt; /* can change, under dsl_dir's locks */ /* no lock needed: */ struct dmu_tx *os_synctx; /* XXX sketchy */ --- old/usr/src/uts/common/fs/zfs/sys/dnode.h Tue Feb 3 13:18:37 2009 +++ new/usr/src/uts/common/fs/zfs/sys/dnode.h Tue Feb 3 13:18:36 2009 @@ -111,8 +111,8 @@ uint8_t dn_flags; /* DNODE_FLAG_* */ uint16_t dn_datablkszsec; /* data block size in 512b sectors */ uint16_t dn_bonuslen; /* length of dn_bonus */ - uint8_t dn_pad2[4]; - + uint8_t dn_crypt; /* ZIO_CRYPT type */ + uint8_t dn_pad2[3]; /* accounting is protected by dn_dirty_mtx */ uint64_t dn_maxblkid; /* largest allocated block ID */ uint64_t dn_used; /* bytes (or sectors) of disk space */ @@ -155,6 +155,7 @@ uint8_t dn_checksum; /* ZIO_CHECKSUM type */ uint8_t dn_compress; /* ZIO_COMPRESS type */ uint8_t dn_nlevels; + uint8_t dn_crypt; /* ZIO_CRYPT type */ uint8_t dn_indblkshift; uint8_t dn_datablkshift; /* zero if blksz not power of 2! */ uint16_t dn_datablkszsec; /* in 512b sectors */ --- old/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h Tue Feb 3 13:18:38 2009 +++ new/usr/src/uts/common/fs/zfs/sys/dsl_deleg.h Tue Feb 3 13:18:38 2009 @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -26,8 +26,6 @@ #ifndef _SYS_DSL_DELEG_H #define _SYS_DSL_DELEG_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include @@ -51,6 +49,8 @@ #define ZFS_DELEG_PERM_ALLOW "allow" #define ZFS_DELEG_PERM_USERPROP "userprop" #define ZFS_DELEG_PERM_VSCAN "vscan" +#define ZFS_DELEG_PERM_CRYPTO_DSL "keyuse" +#define ZFS_DELEG_PERM_CRYPTO_DSL_CHANGE "keychange" /* * Note: the names of properties that are marked delegatable are also --- old/usr/src/uts/common/fs/zfs/sys/spa.h Tue Feb 3 13:18:40 2009 +++ new/usr/src/uts/common/fs/zfs/sys/spa.h Tue Feb 3 13:18:39 2009 @@ -136,7 +136,7 @@ * +-------+-------+-------+-------+-------+-------+-------+-------+ * 6 |E| lvl | type | cksum | comp | PSIZE | LSIZE | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * 7 | padding | + * 7 | padding | crypt | * +-------+-------+-------+-------+-------+-------+-------+-------+ * 8 | padding | * +-------+-------+-------+-------+-------+-------+-------+-------+ @@ -165,6 +165,7 @@ * GRID RAID-Z layout information (reserved for future use) * cksum checksum function * comp compression function + * crypt encryption function (algorithm/mode/keylength) * G gang block indicator * E endianness * type DMU object type @@ -175,8 +176,8 @@ */ typedef struct blkptr { dva_t blk_dva[3]; /* 128-bit Data Virtual Address */ - uint64_t blk_prop; /* size, compression, type, etc */ - uint64_t blk_pad[3]; /* Extra space for the future */ + uint64_t blk_prop[2]; /* size, compression, type, etc */ + uint64_t blk_pad[2]; /* Extra space for the future */ uint64_t blk_birth; /* transaction group at birth */ uint64_t blk_fill; /* fill count */ zio_cksum_t blk_cksum; /* 256-bit checksum */ @@ -209,30 +210,33 @@ #define BP_GET_LSIZE(bp) \ (BP_IS_HOLE(bp) ? 0 : \ - BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)) + BF64_GET_SB((bp)->blk_prop[0], 0, 16, SPA_MINBLOCKSHIFT, 1)) #define BP_SET_LSIZE(bp, x) \ - BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x) + BF64_SET_SB((bp)->blk_prop[0], 0, 16, SPA_MINBLOCKSHIFT, 1, x) #define BP_GET_PSIZE(bp) \ - BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1) + BF64_GET_SB((bp)->blk_prop[0], 16, 16, SPA_MINBLOCKSHIFT, 1) #define BP_SET_PSIZE(bp, x) \ - BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x) + BF64_SET_SB((bp)->blk_prop[0], 16, 16, SPA_MINBLOCKSHIFT, 1, x) -#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8) -#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x) +#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop[0], 32, 8) +#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop[0], 32, 8, x) -#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8) -#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x) +#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop[0], 40, 8) +#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop[0], 40, 8, x) -#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8) -#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x) +#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop[0], 48, 8) +#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop[0], 48, 8, x) -#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5) -#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x) +#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop[0], 56, 5) +#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop[0], 56, 5, x) -#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop, 63, 1)) -#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x) +#define BP_GET_BYTEORDER(bp) (0 - BF64_GET((bp)->blk_prop[0], 63, 1)) +#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop[0], 63, 1, x) +#define BP_GET_CRYPT(bp) BF64_GET((bp)->blk_prop[1], 0, 8) +#define BP_SET_CRYPT(bp, x) BF64_SET((bp)->blk_prop[1], 0, 8, x) + #define BP_GET_ASIZE(bp) \ (DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ DVA_GET_ASIZE(&(bp)->blk_dva[2])) @@ -284,7 +288,8 @@ (bp)->blk_dva[1].dva_word[1] = 0; \ (bp)->blk_dva[2].dva_word[0] = 0; \ (bp)->blk_dva[2].dva_word[1] = 0; \ - (bp)->blk_prop = 0; \ + (bp)->blk_prop[0] = 0; \ + (bp)->blk_prop[1] = 0; \ (bp)->blk_pad[0] = 0; \ (bp)->blk_pad[1] = 0; \ (bp)->blk_pad[2] = 0; \ --- old/usr/src/uts/common/fs/zfs/sys/spa_impl.h Tue Feb 3 13:18:41 2009 +++ new/usr/src/uts/common/fs/zfs/sys/spa_impl.h Tue Feb 3 13:18:40 2009 @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -172,6 +173,7 @@ int spa_minref; /* num refs when first opened */ int spa_mode; /* FREAD | FWRITE */ spa_log_state_t spa_log_state; /* log state */ + spa_keystore_t *spa_keystore; /* Crypto keystore */ /* * spa_refcnt & spa_config_lock must be the last elements * because refcount_t changes size based on compilation options. @@ -190,6 +192,9 @@ ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) || \ (compress) == ZIO_COMPRESS_OFF) +#define BOOTFS_CRYPT_VALID(crypt) \ + ((crypt) == ZIO_CRYPT_OFF) + #ifdef __cplusplus } #endif --- old/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h Tue Feb 3 13:18:43 2009 +++ new/usr/src/uts/common/fs/zfs/sys/zfs_ioctl.h Tue Feb 3 13:18:42 2009 @@ -26,12 +26,11 @@ #ifndef _SYS_ZFS_IOCTL_H #define _SYS_ZFS_IOCTL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include #include +#include #ifdef _KERNEL #include @@ -85,8 +84,9 @@ uint32_t drr_bonuslen; uint8_t drr_checksum; uint8_t drr_compress; - uint8_t drr_pad[6]; - /* bonus content follows */ + uint8_t drr_crypt; + uint8_t drr_pad[5]; + /* bouns content follows */ } drr_object; struct drr_freeobjects { uint64_t drr_firstobj; @@ -166,18 +166,20 @@ dmu_objset_stats_t zc_objset_stats; struct drr_begin zc_begin_record; zinject_record_t zc_inject_record; + zfs_ioc_crypto_t zc_crypto; } zfs_cmd_t; #define ZVOL_MAX_MINOR (1 << 16) #define ZFS_MIN_MINOR (ZVOL_MAX_MINOR + 1) -#ifdef _KERNEL - typedef struct zfs_creat { nvlist_t *zct_zplprops; nvlist_t *zct_props; + zio_crypt_key_t *zct_cryptkey; } zfs_creat_t; +#ifdef _KERNEL + extern dev_info_t *zfs_dip; extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr); --- old/usr/src/uts/common/fs/zfs/sys/zil.h Tue Feb 3 13:18:44 2009 +++ new/usr/src/uts/common/fs/zfs/sys/zil.h Tue Feb 3 13:18:43 2009 @@ -56,7 +56,8 @@ uint64_t zh_replay_seq; /* highest replayed sequence number */ blkptr_t zh_log; /* log chain */ uint64_t zh_claim_seq; /* highest claimed sequence number */ - uint64_t zh_pad[5]; + uint64_t zh_last_seq; /* last log block sequence number used */ + uint64_t zh_pad[4]; } zil_header_t; /* @@ -68,7 +69,7 @@ * number passed in the blk_cksum field of the blkptr_t */ typedef struct zil_trailer { - uint64_t zit_pad; + uint64_t zit_mac; blkptr_t zit_next_blk; /* next block in chain */ uint64_t zit_nused; /* bytes in log block used */ zio_block_tail_t zit_bt; /* block trailer */ @@ -338,7 +339,7 @@ typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf, zio_t *zio); extern uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg); + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, int zio_flags); extern void zil_init(void); extern void zil_fini(void); --- old/usr/src/uts/common/fs/zfs/sys/zil_impl.h Tue Feb 3 13:18:45 2009 +++ new/usr/src/uts/common/fs/zfs/sys/zil_impl.h Tue Feb 3 13:18:44 2009 @@ -64,7 +64,7 @@ kmutex_t zl_lock; /* protects most zilog_t fields */ struct dsl_pool *zl_dmu_pool; /* DSL pool */ spa_t *zl_spa; /* handle for read/write log */ - const zil_header_t *zl_header; /* log header buffer */ + zil_header_t *zl_header; /* log header buffer */ objset_t *zl_os; /* object set we're logging */ zil_get_data_t *zl_get_data; /* callback to get object content */ zio_t *zl_root_zio; /* log writer root zio */ --- old/usr/src/uts/common/fs/zfs/sys/zio.h Tue Feb 3 13:18:47 2009 +++ new/usr/src/uts/common/fs/zfs/sys/zio.h Tue Feb 3 13:18:46 2009 @@ -34,6 +34,8 @@ #include #include +struct zio_crypt_key; + #ifdef __cplusplus extern "C" { #endif @@ -73,6 +75,7 @@ ZIO_CHECKSUM_FLETCHER_2, ZIO_CHECKSUM_FLETCHER_4, ZIO_CHECKSUM_SHA256, + ZIO_CHECKSUM_SHA256_MAC, /* SHA256 Trunc 128 + 16byte CCM MAC */ ZIO_CHECKSUM_FUNCTIONS }; @@ -104,6 +107,18 @@ #define ZIO_FAILURE_MODE_CONTINUE 1 #define ZIO_FAILURE_MODE_PANIC 2 +enum zio_crypt { + ZIO_CRYPT_INHERIT = 0, + ZIO_CRYPT_ON, + ZIO_CRYPT_OFF, + ZIO_CRYPT_AES_128_CCM, + ZIO_CRYPT_AES_256_CCM, + ZIO_CRYPT_FUNCTIONS +}; + +#define ZIO_CRYPT_ON_VALUE ZIO_CRYPT_AES_256_CCM +#define ZIO_CRYPT_DEFAULT ZIO_CRYPT_OFF + #define ZIO_PRIORITY_NOW (zio_priority_table[0]) #define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1]) #define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2]) @@ -141,6 +156,8 @@ #define ZIO_FLAG_GANG_CHILD 0x20000 #define ZIO_FLAG_RAW 0x40000 +#define ZIO_FLAG_ZILOG_REPLAY 0x80000 + #define ZIO_FLAG_GANG_INHERIT \ (ZIO_FLAG_CANFAIL | \ ZIO_FLAG_SPECULATIVE | \ @@ -230,6 +247,7 @@ typedef struct zio_prop { enum zio_checksum zp_checksum; enum zio_compress zp_compress; + enum zio_crypt zp_crypt; dmu_object_type_t zp_type; uint8_t zp_level; uint8_t zp_ndvas; @@ -375,7 +393,7 @@ boolean_t labels); extern int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, - blkptr_t *old_bp, uint64_t txg); + blkptr_t *old_bp, uint64_t txg, int crypt); extern void zio_free_blk(spa_t *spa, blkptr_t *bp, uint64_t txg); extern void zio_flush(zio_t *zio, vdev_t *vd); @@ -413,6 +431,7 @@ extern uint8_t zio_checksum_select(uint8_t child, uint8_t parent); extern uint8_t zio_compress_select(uint8_t child, uint8_t parent); +extern uint8_t zio_crypt_select(uint8_t child, uint8_t parent); extern void zio_suspend(spa_t *spa, zio_t *zio); extern void zio_resume(spa_t *spa); --- old/usr/src/uts/common/fs/zfs/sys/zio_checksum.h Tue Feb 3 13:18:48 2009 +++ new/usr/src/uts/common/fs/zfs/sys/zio_checksum.h Tue Feb 3 13:18:47 2009 @@ -61,6 +61,7 @@ extern zio_checksum_t fletcher_4_incremental_byteswap; extern zio_checksum_t zio_checksum_SHA256; +extern zio_checksum_t zio_checksum_SHAMAC; extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, void *data, uint64_t size); --- /dev/null Tue Feb 3 13:18:50 2009 +++ new/usr/src/uts/common/fs/zfs/sys/zio_crypt.h Tue Feb 3 13:18:48 2009 @@ -0,0 +1,242 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_ZIO_CRYPT_H +#define _SYS_ZIO_CRYPT_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Table of supported crypto algorithms, modes and keylengths. + */ +typedef struct zio_crypt_info { + crypto_mech_name_t ci_mechname; + size_t ci_keylen; + size_t ci_ivlen; + size_t ci_maclen; + size_t ci_zil_maclen; + char *ci_name; +} zio_crypt_info_t; + +extern zio_crypt_info_t zio_crypt_table[ZIO_CRYPT_FUNCTIONS]; + +#define ZPOOL_CRYPTO_KEK_LEN 32 + +/* + * Key management types + */ +typedef enum zfs_crypt_key_type { + ZFS_KEYSCOPE_INHERIT = 0, + ZFS_KEYSCOPE_POOL, /* Dataset key wrapped by pool key */ + ZFS_KEYSCOPE_DSL, /* Dataset specific key */ +} zfs_crypt_key_type_t; + +#define ZFS_KEYSCOPE_DEFAULT ZFS_KEYSCOPE_POOL + +/* + * SPA/DSL key: zio_crypt_key_t + * + * We use ref counting on the key to ensure it doesn't disappear after + * it is handed out but while it is still in use by and encrypt/decrypt. + * + * The refcounting is done in zio_crypt_key_use/zio_crypt_key_release + * and checked in spa_keystore_remove. + */ +typedef struct zio_crypt_key { + crypto_key_t zk_key; + crypto_ctx_template_t zk_ctx_tmpl; + uint64_t zk_crypt; + zfs_crypt_key_type_t zk_keyscope; + refcount_t zk_refcnt; + boolean_t zk_free_on_release; +} zio_crypt_key_t; + +/* + * keystatus is partially persistent and partially temporary. + * The are two states that persist on disk undefined and defined. + * If the on disk state is defined we return the appropriate "in memory" + * state of available or unavailable depending on wither or not the + * key is in the keystore. + * + * Old pool versions and datasets with encryption=off always have + * a keystatus of undefined. + */ +typedef enum zfs_crypt_key_status { + ZFS_CRYPT_KEY_UNDEFINED = 0, /* Wrapping key not defined (DISK) */ + ZFS_CRYPT_KEY_DEFINED, /* Wrapping key defined (DISK) */ + ZFS_CRYPT_KEY_UNAVAILABLE, /* Key defined, but not loaded (MEM) */ + ZFS_CRYPT_KEY_AVAILABLE /* Key defined and loaded (MEM) */ +} zfs_crypt_key_status_t; + + +/* + * In memory spa keystore. + */ +typedef struct spa_keystore_node spa_keystore_node_t; + +typedef struct spa_keystore { + krwlock_t sk_lock; + avl_tree_t sk_keyscope_pool; + avl_tree_t sk_keyscope_dsl; + zio_crypt_key_t *sk_spa_kek; + zio_crypt_key_t *sk_l2arc_key; +} spa_keystore_t; + + + +/* + * ZFS_IOC_CRYPTO key management interface. + * + * The size of the keydata struct element is hardcoded at 1k. + * This is the same as the largest PIN that the crypto framework + * supports and a larger number of *bytes* than any current key value + * in *bytes*, this is large enough even for case (2) below. + * + * The zfs_ioc_crypto_t is only supported when embedded in a + * zfs_cmd_t since the later contains the information on the SPA/DSL + * the key operation relates to. + * + * This needs to cover the following key types: + * + * 1. Raw key (Phase 1) + * The userland passphrase was converted to a key using PKCS#5 PBE + * before being passed over the ioctl. + * Or it was a raw key read from a file in userland. + * zic_keydatalen is the length in *BYTES* + * zic_keydata is the raw key value + * 2. Wrapped key (Phase 2) + * This was taken from a "userland" PKCS\#11 provider such as a + * smartcard ENOSUP + * 3. Key description + PIN (Phase 2) + * The PIN to be used to login to the token described in the + * kek property. + * zic_keydatalen is the PIN length. + * zic_keydata is the PIN value + * The token object locator info is stored in the kek pool property. + * An equivalent property will be needed for dataset token keys. + * + * Key management commands cover the following cases: + * + * 1. Load of SPA or DSL wrapping key + * 2. Unload of SPA or DSL wrapping key + * 3. Change of SPA or DSL wrapping key + * + * zfs_ioc_crypto_t must be same size in 32 & 64 compilation environments + * since it is passed over the /dev/zfs ioctl. + * + */ +typedef enum zfs_ioc_crypto_cmd { + ZFS_IOC_CRYPTO_NO_OP = 0, + ZFS_IOC_CRYPTO_LOAD_KEY_SPA, + ZFS_IOC_CRYPTO_UNLOAD_KEY_SPA, + ZFS_IOC_CRYPTO_CHANGE_KEY_SPA, + ZFS_IOC_CRYPTO_LOAD_KEY_DSL, + ZFS_IOC_CRYPTO_UNLOAD_KEY_DSL, + ZFS_IOC_CRYPTO_CHANGE_KEY_DSL, +} zfs_ioc_crypto_cmd_t; + +typedef enum zfs_ioc_crypto_key_type { + ZFS_IOC_CRYPTO_KEY_TYPE_RAW, + ZFS_IOC_CRYPTO_KEY_TYPE_TOKEN +} zfs_ioc_crypto_key_type_t; + +typedef struct zfs_ioc_crypt { + zfs_ioc_crypto_cmd_t zic_cmd; + zfs_ioc_crypto_key_type_t zic_keytype; + uint64_t zic_crypt; + uint64_t zic_keyscope; + uint64_t zic_salt; + uint64_t zic_keydatalen; + uint64_t zic_keydata; /* really (char *) */ +} zfs_ioc_crypto_t; + + +/* + * Entry points into zio_crypt.o. + * The function prefix is the module/layer we are calling from. + */ +#ifdef _KERNEL +extern int spa_crypto_key_load(spa_t *spa, zfs_ioc_crypto_t *key); +extern int spa_crypto_key_unload(spa_t *spa); +extern int dsl_crypto_key_load(char *dsname, zfs_ioc_crypto_t *key); +extern int dsl_crypto_key_unload(char *dsname); +#endif /* _KERNEL */ + +extern zfs_crypt_key_status_t spa_crypto_keystatus(spa_t *spa); + +extern zfs_crypt_key_status_t dsl_dataset_keystatus(dsl_dataset_t *); +extern int dsl_crypto_key_gen(dsl_dataset_t *ds, void *arg, + cred_t *cr, dmu_tx_t *tx); +extern int dsl_crypto_key_clone(dsl_dataset_t *ds, dsl_dataset_t *clone_parent, + cred_t *cr, dmu_tx_t *tx); +extern int zio_crypt_key_from_ioc(zfs_ioc_crypto_t *ioc_key, + zio_crypt_key_t **zck); + +extern boolean_t zio_crypt_key_free(zio_crypt_key_t *key); +extern zio_crypt_key_t *zio_crypt_key_allocate(); + +extern boolean_t zio_crypt_mech_available(uint64_t crypt); + +extern void zio_crypt_key_release(zio_crypt_key_t *key, void *tag); +extern void zio_crypt_key_hold(zio_crypt_key_t *key, void *tag); +extern zio_crypt_key_t *zio_crypt_key_lookup(spa_t *spa, + uint64_t objset, int crypt); + +extern int zio_encrypt_data(int crypt, spa_t *spa, + zbookmark_t *bookmark, uint64_t txg, int type, void *src, + uint64_t srcsize, void **destp, uint64_t *destbufsizep, void **macp); + +extern int zio_decrypt_data(int crypt, spa_t *spa, + zbookmark_t *bookmark, uint64_t txg, int type, void *src, + uint64_t srcsize, void *mac, void *dest, uint64_t destsize); + +extern void spa_keystore_init(spa_t *spa); +extern void spa_keystore_fini(spa_t *spa); +extern int spa_keystore_remove(spa_t *spa, uint64_t os); +extern zfs_crypt_key_status_t spa_keystatus(spa_t *spa); +extern void spa_key_hold(spa_t *spa); +extern void spa_key_release(spa_t *spa); + +extern int spa_crypt_key_change(spa_t *spa, zfs_ioc_crypto_t *key); +extern int dsl_crypt_key_change(char *dsname, zfs_ioc_crypto_t *key); + +extern boolean_t l2arc_encrypt_buf(spa_t *spa, dva_t *dva, + const void* ibuf, size_t ibufsz, void *obuf); +extern boolean_t l2arc_decrypt_buf(spa_t *spa, dva_t *dva, + void* buf, size_t bufsz); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZIO_CRYPT_H */ --- old/usr/src/uts/common/fs/zfs/zfs_ioctl.c Tue Feb 3 13:18:51 2009 +++ new/usr/src/uts/common/fs/zfs/zfs_ioctl.c Tue Feb 3 13:18:50 2009 @@ -63,6 +63,7 @@ #include #include #include +#include #include "zfs_namecheck.h" #include "zfs_prop.h" @@ -78,6 +79,9 @@ typedef int zfs_ioc_func_t(zfs_cmd_t *); typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *); +static int zfs_ioc_crypto(zfs_cmd_t *zc); +static int zfs_ioc_pool_destroy(zfs_cmd_t *zc); +static int zfs_ioc_destroy(zfs_cmd_t *zc); typedef struct zfs_ioc_vec { zfs_ioc_func_t *zvec_func; @@ -85,7 +89,8 @@ enum { NO_NAME, POOL_NAME, - DATASET_NAME + DATASET_NAME, + POOL_OR_DATASET_NAME } zvec_namecheck; boolean_t zvec_his_log; } zfs_ioc_vec_t; @@ -681,6 +686,49 @@ } } +static int +zfs_secpolicy_crypto(zfs_cmd_t *zc, cred_t *cr) +{ + int error = 0; + /* + * Any operation to do with the pool level key requires + * us to be in the global zone, and is similar to an import + * or other reconfig operation. + * + * For datasets loading the key is a delegated permission + * We use zfs_secpolicy_write_perms since this correctly deals + * with the dataset in a zone case. + */ + switch (zc->zc_crypto.zic_cmd) { + case ZFS_IOC_CRYPTO_LOAD_KEY_DSL: + /*FALLTHROUGH*/ + case ZFS_IOC_CRYPTO_UNLOAD_KEY_DSL: + error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_CRYPTO_DSL, cr); + break; + case ZFS_IOC_CRYPTO_CHANGE_KEY_DSL: + error = zfs_secpolicy_write_perms(zc->zc_name, + ZFS_DELEG_PERM_CRYPTO_DSL_CHANGE, cr); + break; + case ZFS_IOC_CRYPTO_LOAD_KEY_SPA: + /*FALLTHROUGH*/ + case ZFS_IOC_CRYPTO_UNLOAD_KEY_SPA: + /*FALLTHROUGH*/ + case ZFS_IOC_CRYPTO_CHANGE_KEY_SPA: + error = zfs_secpolicy_config(zc, cr); + break; + default: + /* + * Ensure that if we add other crypto commands + * to the ioctl there is a policy for them + */ + error = EINVAL; + break; + } + return (error); +} + + /* * Returns the nvlist as specified by the user in the zfs_cmd_t. */ @@ -799,6 +847,12 @@ if (buf != NULL) history_str_free(buf); + if (!error && zc->zc_crypto.zic_cmd > ZFS_IOC_CRYPTO_NO_OP) { + error = zfs_ioc_crypto(zc); + if (error) + (void) zfs_ioc_pool_destroy(zc); + } + pool_props_bad: nvlist_free(rootprops); nvlist_free(zplprops); @@ -1549,8 +1603,7 @@ default: if (nvpair_type(elem) == DATA_TYPE_STRING) { - if (zfs_prop_get_type(prop) != - PROP_TYPE_STRING) + if (zfs_prop_get_type(prop) != PROP_TYPE_STRING) return (EINVAL); VERIFY(nvpair_value_string(elem, &strval) == 0); if ((error = dsl_prop_set(name, @@ -1567,6 +1620,8 @@ break; case PROP_TYPE_STRING: return (EINVAL); + case PROP_TYPE_BINARY: + return (EINVAL); case PROP_TYPE_INDEX: if (zfs_prop_index_to_string(prop, intval, &unused) != 0) @@ -1581,6 +1636,16 @@ if ((error = dsl_prop_set(name, propname, 8, 1, &intval)) != 0) return (error); + } else if (nvpair_type(elem) == DATA_TYPE_UINT8_ARRAY) { + if (zfs_prop_get_type(prop) != PROP_TYPE_BINARY) + return (EINVAL); + VERIFY(nvpair_value_uint8_array(elem, + (uchar_t **)&strval, + (uint_t *)&intval) == 0); + if ((error = dsl_prop_set(name, propname, 1, + intval, strval)) != 0) + return (error); + } else { return (EINVAL); } @@ -2046,6 +2111,7 @@ return (error); } + /* * inputs: * zc_objset_type type of objset to create (fs vs zvol) @@ -2064,6 +2130,9 @@ nvlist_t *nvprops = NULL; void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx); dmu_objset_type_t type = zc->zc_objset_type; + uint64_t crypt = ZIO_CRYPT_DEFAULT; + uint64_t keyscope = ZFS_KEYSCOPE_DEFAULT; + boolean_t faked_inheritance = B_FALSE; switch (type) { @@ -2090,6 +2159,7 @@ zct.zct_zplprops = NULL; zct.zct_props = nvprops; + zct.zct_cryptkey = NULL; if (zc->zc_value[0] != '\0') { /* @@ -2172,6 +2242,71 @@ return (error); } } + + /* + * For encrypted datasets we may have key material + * to pass on in zct. + * + * This really only applies to datasets with keyscope=dataset, + * so the only ioc_crypto command we care about is a + * dataset key load. + * + * However we might be inheriting crypt & keyscope and if + * so hints to this are passed down in zct. + * + * Need to sanity check what is comming into the ioctl. + * If encryption is on and keyscope=dataset then we must + * have a key passed down not doing so will cause problems + * creating the dataset later. + * + * If we do have a key but we don't actually need it that + * isn't a big issue, but probably is a bug in userland + * somewhere. + * + * The only crypto command we should see when creating + * a dataset is ZFS_IOC_CRYPTO_LOAD_KEY_DSL, anything else + * is an error at this stage. + * + */ + + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt) != 0 && + zc->zc_crypto.zic_crypt != ZIO_CRYPT_INHERIT) { + crypt = zc->zc_crypto.zic_crypt; + faked_inheritance = B_TRUE; + } + + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), &keyscope) != 0 && + zc->zc_crypto.zic_keyscope != ZFS_KEYSCOPE_INHERIT) { + keyscope = zc->zc_crypto.zic_keyscope; + faked_inheritance = B_TRUE; + } + + if (crypt != ZIO_CRYPT_OFF && keyscope == ZFS_KEYSCOPE_DSL && + zc->zc_crypto.zic_cmd != ZFS_IOC_CRYPTO_LOAD_KEY_DSL) + return (EINVAL); + + if (zc->zc_crypto.zic_cmd == ZFS_IOC_CRYPTO_LOAD_KEY_DSL) { + if (crypt == ZIO_CRYPT_OFF || + keyscope != ZFS_KEYSCOPE_DSL) + return (EINVAL); + + error = zio_crypt_key_from_ioc(&zc->zc_crypto, + &zct.zct_cryptkey); + if (error != 0) { + nvlist_free(nvprops); + nvlist_free(zct.zct_zplprops); + return (error); + } + } else if (zc->zc_crypto.zic_cmd != ZFS_IOC_CRYPTO_NO_OP) { + return (EINVAL); + } else if (faked_inheritance) { + zct.zct_cryptkey = zio_crypt_key_allocate(); + zct.zct_cryptkey->zk_crypt = crypt; + zct.zct_cryptkey->zk_keyscope = keyscope; + } + error = dmu_objset_create(zc->zc_name, type, NULL, is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct); nvlist_free(zct.zct_zplprops); @@ -2185,6 +2320,9 @@ (void) dmu_objset_destroy(zc->zc_name); } nvlist_free(nvprops); + if (faked_inheritance) { + (void) zio_crypt_key_free(zct.zct_cryptkey); + } return (error); } @@ -2924,6 +3062,48 @@ } +static int +zfs_ioc_crypto(zfs_cmd_t *zc) +{ + spa_t *spa; + int error; + + if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) + return (error); + + if (spa_version(spa) < SPA_VERSION_CRYPTO) { + spa_close(spa, FTAG); + return (ENOTSUP); + } + + switch (zc->zc_crypto.zic_cmd) { + case ZFS_IOC_CRYPTO_LOAD_KEY_SPA: + error = spa_crypto_key_load(spa, &zc->zc_crypto); + break; + case ZFS_IOC_CRYPTO_UNLOAD_KEY_SPA: + error = spa_crypto_key_unload(spa); + break; + case ZFS_IOC_CRYPTO_CHANGE_KEY_SPA: + error = spa_crypt_key_change(spa, &zc->zc_crypto); + break; + case ZFS_IOC_CRYPTO_LOAD_KEY_DSL: + error = dsl_crypto_key_load(zc->zc_name, &zc->zc_crypto); + break; + case ZFS_IOC_CRYPTO_UNLOAD_KEY_DSL: + error = dsl_crypto_key_unload(zc->zc_name); + break; + case ZFS_IOC_CRYPTO_CHANGE_KEY_DSL: + error = dsl_crypt_key_change(zc->zc_name, &zc->zc_crypto); + break; + default: + error = ENOTSUP; + } + + spa_close(spa, FTAG); + + return (error); +} + /* * pool create, destroy, and export don't log the history as part of * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export @@ -2980,6 +3160,7 @@ DATASET_NAME, B_FALSE }, { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE }, { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE }, + { zfs_ioc_crypto, zfs_secpolicy_crypto, POOL_OR_DATASET_NAME, B_TRUE} }; static int @@ -3021,7 +3202,11 @@ if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0) error = EINVAL; break; - + case POOL_OR_DATASET_NAME: + if (!(dataset_namecheck(zc->zc_name, NULL, NULL) == 0 || + pool_namecheck(zc->zc_name, NULL, NULL) == 0)) + error = EINVAL; + break; case NO_NAME: break; } --- old/usr/src/uts/common/fs/zfs/zfs_log.c Tue Feb 3 13:18:53 2009 +++ new/usr/src/uts/common/fs/zfs/zfs_log.c Tue Feb 3 13:18:52 2009 @@ -507,7 +507,9 @@ * we retrieve the data using the dmu. */ slogging = spa_has_slogs(zilog->zl_spa); - if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz) + if (resid > zfs_immediate_write_sz && !slogging && + resid <= zp->z_blksz && + zilog->zl_os->os->os_crypt == ZIO_CRYPT_OFF) write_state = WR_INDIRECT; else if (ioflag & (FSYNC | FDSYNC)) write_state = WR_COPIED; --- old/usr/src/uts/common/fs/zfs/zil.c Tue Feb 3 13:18:55 2009 +++ new/usr/src/uts/common/fs/zfs/zil.c Tue Feb 3 13:18:54 2009 @@ -144,7 +144,7 @@ zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL); zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL); zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os); - zc->zc_word[ZIL_ZC_SEQ] = 1ULL; + zc->zc_word[ZIL_ZC_SEQ] = ++zilog->zl_header->zh_last_seq; } /* @@ -151,11 +151,12 @@ * Read a log block, make sure it's valid, and byteswap it if necessary. */ static int -zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, arc_buf_t **abufpp) +zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, char **datapp, + int flags) { - blkptr_t blk = *bp; + char *buf; + uint64_t blksz = BP_GET_LSIZE(bp); zbookmark_t zb; - uint32_t aflags = ARC_WAIT; int error; zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET]; @@ -163,20 +164,19 @@ zb.zb_level = -1; zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ]; - *abufpp = NULL; - /* * We shouldn't be doing any scrubbing while we're doing log * replay, it's OK to not lock. */ - error = arc_read_nolock(NULL, zilog->zl_spa, &blk, - arc_getbuf_func, abufpp, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | - ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB, &aflags, &zb); + buf = zio_data_buf_alloc(SPA_MAXBLOCKSIZE); + *datapp = buf; + error = zio_wait(zio_read(NULL, zilog->zl_spa, bp, buf, blksz, + NULL, NULL, ZIO_PRIORITY_SYNC_READ, + flags | ZIO_FLAG_SCRUB | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, + &zb)); if (error == 0) { - char *data = (*abufpp)->b_data; - uint64_t blksz = BP_GET_LSIZE(bp); - zil_trailer_t *ztp = (zil_trailer_t *)(data + blksz) - 1; + zil_trailer_t *ztp = (zil_trailer_t *)(buf + blksz) - 1; zio_cksum_t cksum = bp->blk_cksum; /* @@ -196,8 +196,8 @@ } if (error) { - VERIFY(arc_buf_remove_ref(*abufpp, abufpp) == 1); - *abufpp = NULL; + zio_data_buf_free(buf, SPA_MAXBLOCKSIZE); + *datapp = NULL; } } @@ -212,7 +212,7 @@ */ uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, - zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) + zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, int zio_flags) { const zil_header_t *zh = zilog->zl_header; uint64_t claim_seq = zh->zh_claim_seq; @@ -219,7 +219,6 @@ uint64_t seq = 0; uint64_t max_seq = 0; blkptr_t blk = zh->zh_log; - arc_buf_t *abuf; char *lrbuf, *lrp; zil_trailer_t *ztp; int reclen, error; @@ -246,7 +245,7 @@ ASSERT(max_seq < seq); max_seq = seq; - error = zil_read_log_block(zilog, &blk, &abuf); + error = zil_read_log_block(zilog, &blk, &lrbuf, zio_flags); if (parse_blk_func != NULL) parse_blk_func(zilog, &blk, arg, txg); @@ -254,12 +253,11 @@ if (error) break; - lrbuf = abuf->b_data; ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1; blk = ztp->zit_next_blk; if (parse_lr_func == NULL) { - VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + zio_data_buf_free(lrbuf, SPA_MAXBLOCKSIZE); continue; } @@ -269,7 +267,7 @@ ASSERT3U(reclen, >=, sizeof (lr_t)); parse_lr_func(zilog, lr, arg, txg); } - VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + zio_data_buf_free(lrbuf, SPA_MAXBLOCKSIZE); } zil_dva_tree_fini(&zilog->zl_dva_tree); @@ -366,7 +364,7 @@ } error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk, - NULL, txg); + NULL, txg, zilog->zl_os->os->os_crypt); if (error == 0) zil_init_log_chain(zilog, &blk); @@ -462,7 +460,7 @@ } else { if (!keep_first) { (void) zil_parse(zilog, zil_free_log_block, - zil_free_log_record, tx, zh->zh_claim_txg); + zil_free_log_record, tx, zh->zh_claim_txg, 0); } } mutex_exit(&zilog->zl_lock); @@ -498,7 +496,7 @@ ASSERT(zilog->zl_lwb_list.list_head.list_next == zilog->zl_lwb_list.list_head.list_prev); (void) zil_parse(zilog, zil_free_log_block, zil_free_log_record, - tx, zh->zh_claim_txg); + tx, zh->zh_claim_txg, 0); } int @@ -531,7 +529,8 @@ if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { zh->zh_claim_txg = first_txg; zh->zh_claim_seq = zil_parse(zilog, zil_claim_log_block, - zil_claim_log_record, tx, first_txg); + zil_claim_log_record, tx, first_txg, 0); + zh->zh_last_seq = MAX(zh->zh_last_seq, zh->zh_claim_seq); dsl_dataset_dirty(dmu_objset_ds(os), tx); } @@ -552,7 +551,6 @@ zilog_t *zilog; zil_header_t *zh; blkptr_t blk; - arc_buf_t *abuf; objset_t *os; char *lrbuf; zil_trailer_t *ztp; @@ -573,13 +571,12 @@ } for (;;) { - error = zil_read_log_block(zilog, &blk, &abuf); + error = zil_read_log_block(zilog, &blk, &lrbuf, 0); if (error) break; - lrbuf = abuf->b_data; ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1; blk = ztp->zit_next_blk; - VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + zio_data_buf_free(lrbuf, SPA_MAXBLOCKSIZE); } dmu_objset_close(os); if (error == ECKSUM) @@ -710,7 +707,7 @@ zilog_t *zilog = lwb->lwb_zilog; ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF); - ASSERT(BP_GET_CHECKSUM(zio->io_bp) == ZIO_CHECKSUM_ZILOG); + ASSERT3U(BP_GET_CHECKSUM(zio->io_bp), ==, ZIO_CHECKSUM_ZILOG); ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG); ASSERT(BP_GET_LEVEL(zio->io_bp) == 0); ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER); @@ -799,7 +796,14 @@ BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ - error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg); + error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, + txg, zilog->zl_os->os->os_crypt); + if (zilog->zl_os->os->os_crypt == ZIO_CRYPT_OFF || + zilog->zl_os->os->os_crypt == ZIO_CRYPT_INHERIT) { + ztp->zit_mac = 0; + } + ztp->zit_nused = lwb->lwb_nused; + ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum; if (error) { dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg); @@ -816,9 +820,6 @@ * Since we've just experienced an allocation failure so we * terminate the current lwb and send it on its way. */ - ztp->zit_pad = 0; - ztp->zit_nused = lwb->lwb_nused; - ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum; zio_nowait(lwb->lwb_zio); /* @@ -828,11 +829,8 @@ } ASSERT3U(bp->blk_birth, ==, txg); - ztp->zit_pad = 0; - ztp->zit_nused = lwb->lwb_nused; - ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum = lwb->lwb_blk.blk_cksum; - bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; + bp->blk_cksum.zc_word[ZIL_ZC_SEQ] = ++zilog->zl_header->zh_last_seq; /* * Allocate a new log write buffer (lwb). @@ -1224,11 +1222,13 @@ if (zilog->zl_destroy_txg == txg) { blkptr_t blk = zh->zh_log; + uint64_t last_seq = zh->zh_last_seq; ASSERT(list_head(&zilog->zl_lwb_list) == NULL); ASSERT(spa_sync_pass(spa) == 1); bzero(zh, sizeof (zil_header_t)); + zh->zh_last_seq = last_seq; /* restore the last block seq */ bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq)); if (zilog->zl_keep_first) { @@ -1350,15 +1350,15 @@ zil_empty(zilog_t *zilog) { const zil_header_t *zh = zilog->zl_header; - arc_buf_t *abuf = NULL; + char *buf = NULL; if (BP_IS_HOLE(&zh->zh_log)) return (B_TRUE); - if (zil_read_log_block(zilog, &zh->zh_log, &abuf) != 0) + if (zil_read_log_block(zilog, &zh->zh_log, &buf, 0) != 0) return (B_TRUE); - VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); + zio_data_buf_free(buf, SPA_MAXBLOCKSIZE); return (B_FALSE); } @@ -1621,7 +1621,7 @@ zilog->zl_replay_time = lbolt; ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, - zh->zh_claim_txg); + zh->zh_claim_txg, ZIO_FLAG_ZILOG_REPLAY); kmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); --- old/usr/src/uts/common/fs/zfs/zio.c Tue Feb 3 13:18:57 2009 +++ new/usr/src/uts/common/fs/zfs/zio.c Tue Feb 3 13:18:56 2009 @@ -31,8 +31,14 @@ #include #include #include +#include #include +#include +#include + +int zio_workaround_6765961 = 1; + /* * ========================================================================== * I/O priority table @@ -276,7 +282,7 @@ /* * ========================================================================== - * I/O transform callbacks for subblocks and decompression + * I/O transform callbacks for subblocks, decompression and decryption * ========================================================================== */ static void @@ -297,6 +303,64 @@ zio->io_error = EIO; } +static void +zio_decrypt(zio_t *zio, void *data, uint64_t size) +{ + blkptr_t *bp = zio->io_bp; + int type = BP_GET_TYPE(bp); + int crypt = BP_GET_CRYPT(bp); + uint64_t mac[2]; + int crypt_error = 0; + + ASSERT3U(spa_version(zio->io_spa), >=, SPA_VERSION_CRYPTO); + ASSERT3U(crypt, !=, ZIO_CRYPT_OFF); + ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); + + if (type == DMU_OT_INTENT_LOG) { + zil_trailer_t *ztp; + ztp = (zil_trailer_t *)((char *)zio->io_data + + zio->io_size) - 1; + mac[0] = BE_64(ztp->zit_mac); + mac[1] = 0; + } else { + ASSERT3U(16, ==, zio_crypt_table[crypt].ci_maclen); + /* MAC is in the blkptr as the top two words of the checksum */ + ASSERT3U(BP_GET_CHECKSUM(bp), ==, ZIO_CHECKSUM_SHA256_MAC); + mac[0] = BE_64(bp->blk_cksum.zc_word[2]); + mac[1] = BE_64(bp->blk_cksum.zc_word[3]); + } + crypt_error = zio_decrypt_data(crypt, zio->io_spa, + &zio->io_bookmark, bp->blk_birth, type, + zio->io_data, zio->io_size, &mac, data, size); + + /* + * If we didn't have the decryption key we get EAGAIN so post an + * FMA event. + */ + if (crypt_error != 0) { + if (crypt_error == EAGAIN) { + zfs_ereport_post(FM_EREPORT_ZFS_CRYPTO_KEY_UNAVAIL, + zio->io_spa, zio->io_vd, zio, 0, 0); + /* + * If it is a dnode we were looking for we need the + * clear text parts to be passed on. + */ +#ifdef _KERNEL + if (type == DMU_OT_DNODE || type == DMU_OT_INTENT_LOG) { +#endif + DTRACE_PROBE2(zio__ciphertext__dnode, + blkptr_t *, bp, zbookmark_t *, + &zio->io_bookmark); + bcopy(zio->io_data, data, size); + crypt_error = 0; +#ifdef _KERNEL + } +#endif + } + zio->io_error = crypt_error; + } +} + /* * ========================================================================== * I/O parent/child relationships and pipeline interlocks @@ -571,6 +635,7 @@ int priority, int flags, const zbookmark_t *zb) { zio_t *zio; + int crypt = BP_GET_CRYPT(bp); zio = zio_create(pio, spa, bp->blk_birth, (blkptr_t *)bp, data, size, done, private, @@ -602,6 +667,8 @@ zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS && zp->zp_compress >= ZIO_COMPRESS_OFF && zp->zp_compress < ZIO_COMPRESS_FUNCTIONS && + zp->zp_crypt >= ZIO_CRYPT_OFF && + zp->zp_crypt < ZIO_CRYPT_FUNCTIONS && zp->zp_type < DMU_OT_NUMTYPES && zp->zp_level < 32 && zp->zp_ndvas > 0 && @@ -619,9 +686,9 @@ } zio_t * -zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, - uint64_t size, zio_done_func_t *done, void *private, int priority, - int flags, zbookmark_t *zb) +zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, + void *data, uint64_t size, zio_done_func_t *done, + void *private, int priority, int flags, zbookmark_t *zb) { zio_t *zio; @@ -672,6 +739,9 @@ * * All claims *must* be resolved in the first txg -- before the SPA * starts allocating blocks -- so that nothing is allocated twice. + * + * This means that the claim is happening for encrypted datasets + * when the key is *not* present. */ ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa)); ASSERT3U(spa_first_txg(spa), <=, txg); @@ -834,13 +904,49 @@ zio_read_bp_init(zio_t *zio) { blkptr_t *bp = zio->io_bp; + uint64_t csize = BP_GET_PSIZE(bp); + void *cbuf; - if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && - zio->io_logical == zio && !(zio->io_flags & ZIO_FLAG_RAW)) { - uint64_t csize = BP_GET_PSIZE(bp); - void *cbuf = zio_buf_alloc(csize); + /* + * Don't add compression/crypto to the transform stack if this is a + * raw read (ie scrub/resilver). + * We don't need the actual data in that case and the decrypt would fail + * due to a lack of a key (in which case the datasets wouldn't + * be mounted but still need to be scrubbed). + * + * We need to distingish between zil_claim and zil_replay reads. + * During zil_claim we probably won't have the key(s) available + * as this is often very early in boot. That is okay because + * zil_claim only needs to walk the log chain it doesn't + * need the record contents. + */ + if (zio->io_logical == zio && !(zio->io_flags & ZIO_FLAG_RAW)) { + if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) { + cbuf = zio_buf_alloc(csize); + zio_push_transform(zio, cbuf, csize, csize, + zio_decompress); + } - zio_push_transform(zio, cbuf, csize, csize, zio_decompress); + if (BP_GET_CRYPT(bp) != ZIO_CRYPT_OFF && + spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO) { + boolean_t decrypt = B_TRUE; + boolean_t keypresent; + + keypresent = zio_crypt_key_lookup(zio->io_spa, + zio->io_bookmark.zb_objset, + BP_GET_CRYPT(bp)) != NULL; + + if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) { + decrypt = zio->io_flags & ZIO_FLAG_ZILOG_REPLAY; + } + + if (decrypt && keypresent) { + cbuf = zio_buf_alloc(csize); + zio_push_transform(zio, cbuf, csize, csize, + zio_decrypt); + } + + } } if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0) @@ -854,12 +960,17 @@ { zio_prop_t *zp = &zio->io_prop; int compress = zp->zp_compress; + int crypt = zp->zp_crypt; + int checksum = zp->zp_checksum; + int type = zp->zp_type; blkptr_t *bp = zio->io_bp; - void *cbuf; + void *cbuf, *encbuf; uint64_t lsize = zio->io_size; uint64_t csize = lsize; - uint64_t cbufsize = 0; + uint64_t cbufsize = 0, encbufsize = 0; int pass = 1; + uint64_t *mac = NULL; + size_t maclen = 0; /* * If our children haven't all reached the ready stage, @@ -869,12 +980,26 @@ zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY)) return (ZIO_PIPELINE_STOP); - if (!IO_IS_ALLOCATING(zio)) - return (ZIO_PIPELINE_CONTINUE); + if (!IO_IS_ALLOCATING(zio)) { + /* + * This is an already allocated block comming in via + * rewrite so it is what is in the bp that matters + * and io_prop will likeley be all zeros (inherit). + */ + type = BP_GET_TYPE(bp); + if (type == DMU_OT_INTENT_LOG) { + crypt = BP_GET_CRYPT(bp); + if (crypt == ZIO_CRYPT_OFF || + crypt == ZIO_CRYPT_INHERIT || + spa_version(zio->io_spa) < SPA_VERSION_CRYPTO) { + return (ZIO_PIPELINE_CONTINUE); + } + } else { + return (ZIO_PIPELINE_CONTINUE); + } + } - ASSERT(compress != ZIO_COMPRESS_INHERIT); - - if (bp->blk_birth == zio->io_txg) { + if (IO_IS_ALLOCATING(zio) && bp->blk_birth == zio->io_txg) { /* * We're rewriting an existing block, which means we're * working on behalf of spa_sync(). For spa_sync() to @@ -900,7 +1025,8 @@ spa_max_replication(zio->io_spa)) == BP_GET_NDVAS(bp)); } - if (compress != ZIO_COMPRESS_OFF) { + if (IO_IS_ALLOCATING(zio) && compress != ZIO_COMPRESS_OFF) { + ASSERT(type != DMU_OT_INTENT_LOG); if (!zio_compress_data(compress, zio->io_data, zio->io_size, &cbuf, &csize, &cbufsize)) { compress = ZIO_COMPRESS_OFF; @@ -909,7 +1035,129 @@ } } + if (spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO && + (csize == 0 || + (type == DMU_OT_DNODE && (zio->io_bookmark.zb_level != 0)))) { + /* + * Blocks that compress to nothing or are just indirect + * dnode blocks (which don't have bonusbufs) do not + * get encrypted. + */ + crypt = ZIO_CRYPT_OFF; + } + + if (zio_workaround_6765961 == 1 && + zio->io_bookmark.zb_object == 0 && + zio->io_bookmark.zb_level == 0 && + zio->io_bookmark.zb_blkid == 0) { + crypt = ZIO_CRYPT_OFF; + } + + if ((IO_IS_ALLOCATING(zio) || type == DMU_OT_INTENT_LOG) && + crypt != ZIO_CRYPT_OFF && + spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO) { + int crypt_error; + + ASSERT(crypt != ZIO_CRYPT_INHERIT); + ASSERT(csize != 0); + + crypt_error = zio_encrypt_data(crypt, zio->io_spa, + &zio->io_bookmark, zio->io_txg, type, + zio->io_data, zio->io_size, &encbuf, &encbufsize, + (void **)&mac); + + if (crypt_error != 0) { + if (crypt_error == EAGAIN) { + /* + * If we don't have access to the key material + * that the zbookmark_t says we needed, + * post an FMA event. + * + * This case really shouldn't happen in + * practice though because datasets + * are umounted when the key is removed + * and not mounted up if the key isn't available + * at import or an explicit zfs mount. + */ + zfs_ereport_post( + FM_EREPORT_ZFS_CRYPTO_KEY_UNAVAIL, + zio->io_spa, zio->io_vd, zio, 0, 0); + zio->io_error = EIO; + } + zio->io_error = crypt_error; + return (ZIO_PIPELINE_CONTINUE); + } + + /* + * All current encryption cases have a non zero sized MAC + * if this ever changes the following test needs to change. + */ + if (mac == NULL) { + /* + * This can happen when there are dnodes + * were no bonusbufs needing encrypting. + */ + crypt = ZIO_CRYPT_OFF; + BP_SET_CRYPT(bp, crypt); + } else if (type == DMU_OT_INTENT_LOG) { + zil_trailer_t *ztp; + ASSERT(IO_IS_ALLOCATING(zio) == 0); + BP_SET_CRYPT(bp, crypt); + /* + * For the intent log we have an 8 byte mac + * not 16 like all the other cases. + * It is stored in the zil_trailer_t. + */ + ztp = (zil_trailer_t *)((char *)encbuf + + encbufsize) - 1; + ztp->zit_mac = BE_64(mac[0]); + kmem_free(mac, zio_crypt_table[crypt].ci_zil_maclen); + mac = NULL; + } else { + /* + * Note don't free the mac here it is put in + * place later and freed then. + */ + maclen = zio_crypt_table[crypt].ci_maclen; + checksum = ZIO_CHECKSUM_SHA256_MAC; + } + zio_push_transform(zio, encbuf, zio->io_size, encbufsize, NULL); + } + /* + * If we aren't encrypting make sure the checksum isn't the + * truncated SHA256+MAC variant - force to SHA256 instead. + */ + if (checksum == ZIO_CHECKSUM_SHA256_MAC && + (crypt == ZIO_CRYPT_OFF || crypt == ZIO_CRYPT_INHERIT)) { + ASSERT3U(spa_version(zio->io_spa), >=, SPA_VERSION_CRYPTO); + checksum = ZIO_CHECKSUM_SHA256; + } + + if (!IO_IS_ALLOCATING(zio)) { + /* Must have dealt with the crypto mac if there was one */ + ASSERT(mac == NULL); + return (ZIO_PIPELINE_CONTINUE); + } + + + ASSERT(checksum != ZIO_CHECKSUM_INHERIT); + ASSERT(compress != ZIO_COMPRESS_INHERIT); + /* + * Encryption wasn't always available and the part of the bp + * it is stored in used to be zero'd padding so for older + * versions of the spa we have to be sure to put 0 (inherit) + * in there. + */ + if (spa_version(zio->io_spa) >= SPA_VERSION_CRYPTO) { + ASSERT(crypt != ZIO_CRYPT_INHERIT); + } else { + crypt = ZIO_CRYPT_INHERIT; + ASSERT(mac == NULL); + ASSERT(checksum != ZIO_CHECKSUM_SHA256_MAC); + } + + /* * The final pass of spa_sync() must be all rewrites, but the first * few passes offer a trade-off: allocating blocks defers convergence, * but newly allocated blocks are sequential, so they can be written @@ -930,17 +1178,59 @@ if (csize == 0) { zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; + /* + * We must not have a crypto MAC to deal with if we + * aren't filling in a bp + */ + ASSERT(mac == NULL); } else { - ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER); + ASSERT(checksum != ZIO_CHECKSUM_GANG_HEADER); BP_SET_LSIZE(bp, lsize); BP_SET_PSIZE(bp, csize); BP_SET_COMPRESS(bp, compress); - BP_SET_CHECKSUM(bp, zp->zp_checksum); + BP_SET_CRYPT(bp, crypt); + BP_SET_CHECKSUM(bp, checksum); BP_SET_TYPE(bp, zp->zp_type); BP_SET_LEVEL(bp, zp->zp_level); BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); + if (mac != NULL) { + /* + * The mac is stored in the blkptr as the top two + * words of the checksum, in bigendian form + * (same as the checksum). + * + * Currently all MAC's are 16 bytes and all + * crypto "on" values use a MAC. If an encryption + * mode is added that doesn't have a MAC or has a MAC + * of a different size this needs updating. + */ + ASSERT(maclen == 16); + ASSERT(mac[0] != 0); + ASSERT(mac[1] != 0); + ASSERT3U(crypt, >, ZIO_CRYPT_INHERIT); + ASSERT3U(crypt, <=, ZIO_CRYPT_FUNCTIONS); + ASSERT3U(crypt, !=, ZIO_CRYPT_OFF); + ASSERT3U(spa_version(zio->io_spa), >=, + SPA_VERSION_CRYPTO); + ASSERT3U(checksum, ==, ZIO_CHECKSUM_SHA256_MAC); + bp->blk_cksum.zc_word[2] = BE_64(mac[0]); + bp->blk_cksum.zc_word[3] = BE_64(mac[1]); + kmem_free(mac, maclen); + mac = NULL; + } else { + ASSERT3U(checksum, !=, ZIO_CHECKSUM_SHA256_MAC); + } + } + /* + * Must have dealt with the crypto MAC if checksum indicates + * we are using the truncated SHA256+MAC + */ + if (checksum == ZIO_CHECKSUM_SHA256_MAC) { + ASSERT(mac == NULL); + } + return (ZIO_PIPELINE_CONTINUE); } @@ -1316,8 +1606,9 @@ if (gn != NULL) { zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, - gn->gn_gbh, SPA_GANGBLOCKSIZE, NULL, NULL, pio->io_priority, - ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); + gn->gn_gbh, SPA_GANGBLOCKSIZE, NULL, NULL, + pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), + &pio->io_bookmark); /* * As we rewrite each gang header, the pipeline will compute * a new gang block header checksum for it; but no one will @@ -1394,6 +1685,11 @@ *gnpp = NULL; } +/* + * ========================================================================== + * Gang block support + * ========================================================================== + */ static void zio_gang_tree_free(zio_gang_node_t **gnpp) { @@ -1587,8 +1883,9 @@ /* * Create the gang header. */ - zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL, - pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); + zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, + NULL, NULL, pio->io_priority, + ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); /* * Create and nowait the gang children. @@ -1600,6 +1897,7 @@ zp.zp_checksum = lio->io_prop.zp_checksum; zp.zp_compress = ZIO_COMPRESS_OFF; + zp.zp_crypt = ZIO_CRYPT_OFF; zp.zp_type = DMU_OT_NONE; zp.zp_level = 0; zp.zp_ndvas = lio->io_prop.zp_ndvas; @@ -1725,7 +2023,7 @@ */ int zio_alloc_blk(spa_t *spa, uint64_t size, blkptr_t *new_bp, blkptr_t *old_bp, - uint64_t txg) + uint64_t txg, int crypt) { int error; @@ -1740,6 +2038,7 @@ BP_SET_LSIZE(new_bp, size); BP_SET_PSIZE(new_bp, size); BP_SET_COMPRESS(new_bp, ZIO_COMPRESS_OFF); + BP_SET_CRYPT(new_bp, crypt); BP_SET_CHECKSUM(new_bp, ZIO_CHECKSUM_ZILOG); BP_SET_TYPE(new_bp, DMU_OT_INTENT_LOG); BP_SET_LEVEL(new_bp, 0); @@ -2143,7 +2442,6 @@ if (bp != NULL) { ASSERT(bp->blk_pad[0] == 0); ASSERT(bp->blk_pad[1] == 0); - ASSERT(bp->blk_pad[2] == 0); ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 || (bp == zio_unique_parent(zio)->io_bp)); if (zio->io_type == ZIO_TYPE_WRITE && !BP_IS_HOLE(bp) && --- old/usr/src/uts/common/fs/zfs/zio_checksum.c Tue Feb 3 13:18:59 2009 +++ new/usr/src/uts/common/fs/zfs/zio_checksum.c Tue Feb 3 13:18:58 2009 @@ -75,6 +75,7 @@ {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, "fletcher2"}, {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, "fletcher4"}, {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, "SHA256"}, + {{zio_checksum_SHAMAC, zio_checksum_SHAMAC}, 1, 0, "SHA256+MAC"}, }; uint8_t @@ -131,10 +132,33 @@ zio_block_tail_t *zbt = (zio_block_tail_t *)((char *)data + size) - 1; zio_checksum_info_t *ci = &zio_checksum_table[checksum]; zio_cksum_t zbt_cksum; +#ifdef DEBUG + zio_cksum_t blk_cksum_copy; +#endif /* DEBUG */ ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); ASSERT(ci->ci_func[0] != NULL); +#if DEBUG + /* + * If this is the special case of ZIO_CHECKSUM_SHA256_MAC + * then the top two words should already contain the MAC + * from the encryption by this point so ASSERT that. + * + * We also make a copy of those words and check they didn't + * get changed by the checksum function. + */ + if (checksum == ZIO_CHECKSUM_SHA256_MAC) { + ASSERT3U(BP_GET_CRYPT(bp), !=, ZIO_CRYPT_OFF); + ASSERT3U(BP_GET_CRYPT(bp), !=, ZIO_CRYPT_INHERIT); + ASSERT(bp->blk_cksum.zc_word[2] != 0); + ASSERT(bp->blk_cksum.zc_word[3] != 0); + blk_cksum_copy.zc_word[0] = bp->blk_cksum.zc_word[0]; + blk_cksum_copy.zc_word[1] = bp->blk_cksum.zc_word[1]; + blk_cksum_copy.zc_word[2] = bp->blk_cksum.zc_word[2]; + blk_cksum_copy.zc_word[3] = bp->blk_cksum.zc_word[3]; + } +#endif /* DEBUG */ if (ci->ci_zbt) { if (checksum == ZIO_CHECKSUM_GANG_HEADER) zio_checksum_gang_verifier(&zbt->zbt_cksum, bp); @@ -148,6 +172,13 @@ } else { ci->ci_func[0](data, size, &bp->blk_cksum); } + +#if DEBUG + if (checksum == ZIO_CHECKSUM_SHA256_MAC) { + ASSERT(bp->blk_cksum.zc_word[2] == blk_cksum_copy.zc_word[2]); + ASSERT(bp->blk_cksum.zc_word[3] == blk_cksum_copy.zc_word[3]); + } +#endif /* DEBUG */ } int @@ -196,8 +227,24 @@ ci->ci_func[byteswap](data, size, &actual_cksum); } - if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) + /* + * Special case for truncated SHA256 with CCM_MAC + * This may not be the best place to deal with this but it is here now. + * + * Words 0 and 1 of the checksum are the first 128 bytes of SHA256 hash. + * Words 2 and 3 are the CCM MAC so ignore those because we + * can't check them until we do the decryption later, nor could + * we do them if the key wasn't present. + */ + if (checksum == ZIO_CHECKSUM_SHA256_MAC) { + if (!(0 == ( + (actual_cksum.zc_word[0] - expected_cksum.zc_word[0]) | + (actual_cksum.zc_word[1] - expected_cksum.zc_word[1])))) { + return (ECKSUM); + } + } else if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) { return (ECKSUM); + } if (zio_injection_enabled && !zio->io_error) return (zio_handle_fault_injection(zio, ECKSUM)); --- /dev/null Tue Feb 3 13:19:00 2009 +++ new/usr/src/uts/common/fs/zfs/zio_crypt.c Tue Feb 3 13:18:59 2009 @@ -0,0 +1,2410 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef _KERNEL +#include +#endif + +#define ZIO_CRYPT_MAX_CCM_DATA 16777215 /* Based on CCM noncesize of 12 */ +#define ZIO_CRYPT_WRAPKEY_IVLEN 13 /* Max Nonce size for AES_CCM wrap */ +#define WRAPPING_MAC_LEN 16 /* Max MAC len for AES_CCM */ +#define WRAPPEDKEY_LEN(len) (len) + ZIO_CRYPT_WRAPKEY_IVLEN + WRAPPING_MAC_LEN; + +#define SET_CRYPTO_DATA(cd, buf, len) \ + (cd).cd_format = CRYPTO_DATA_RAW;\ + (cd).cd_offset = 0;\ + (cd).cd_length = (len);\ + (cd).cd_miscdata = NULL;\ + (cd).cd_raw.iov_base = (buf);\ + (cd).cd_raw.iov_len = (len); + +int zio_return_cipher_text = 0; + +uint8_t +zio_crypt_select(uint8_t child, uint8_t parent) +{ + ASSERT(child < ZIO_CRYPT_FUNCTIONS); + ASSERT(parent < ZIO_CRYPT_FUNCTIONS); + ASSERT(parent != ZIO_CRYPT_INHERIT && parent != ZIO_CRYPT_ON); + + if (child == ZIO_CRYPT_INHERIT) + return (parent); + + if (child == ZIO_CRYPT_ON) + return (ZIO_CRYPT_ON_VALUE); + + return (child); +} + + +boolean_t +zio_crypt_mech_available(uint64_t crypt) +{ + crypto_mech_type_t mech; + + mech = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + + if (mech == CRYPTO_MECH_INVALID) { + return (FALSE); + } + return (TRUE); +} + +zio_crypt_key_t * +zio_crypt_key_allocate(void) +{ + zio_crypt_key_t *key; + + key = kmem_zalloc(sizeof (zio_crypt_key_t), KM_SLEEP); + refcount_create(&key->zk_refcnt); + key->zk_free_on_release = FALSE; + + return (key); +} + +boolean_t +zio_crypt_key_free(zio_crypt_key_t *key) +{ + if (key == NULL) + return (TRUE); + + if (!refcount_is_zero(&key->zk_refcnt)) { + return (FALSE); + } + + if (key->zk_key.ck_length != 0) { + /* + * This will need updating for key + * types other than CRYPTO_KEY_RAW. + */ + bzero(key->zk_key.ck_data, key->zk_key.ck_length / 8); + kmem_free(key->zk_key.ck_data, key->zk_key.ck_length / 8); + } + if (key->zk_ctx_tmpl != NULL) { + crypto_destroy_ctx_template(key->zk_ctx_tmpl); + key->zk_ctx_tmpl = NULL; + } + refcount_destroy(&key->zk_refcnt); + bzero(key, sizeof (zio_crypt_key_t)); + kmem_free(key, sizeof (zio_crypt_key_t)); + key = NULL; + return (TRUE); +} + +void +zio_crypt_key_hold(zio_crypt_key_t *key, void *tag) +{ + ASSERT(key != NULL); + (void) refcount_add(&key->zk_refcnt, tag); +} + +void +zio_crypt_key_release(zio_crypt_key_t *key, void *tag) +{ + ASSERT(key != NULL); + (void) refcount_remove(&key->zk_refcnt, tag); + if (refcount_is_zero(&key->zk_refcnt) && key->zk_free_on_release) { + (void) zio_crypt_key_free(key); + } +} + + +/* + * In memory per SPA keystore + * + * The keystore contains the following: + * SPA wrapping key for keyscope=pool datasets (zio_crypt_key_t) + * L2ARC ephemeral key (zio_crypt_key_t) + * AVL tree of zio_crypt_key_t for keyscope=pool indexed by object id + * AVL tree of zio_crypt_key_t for keyscope=dataset indexed by object id + * + * All of the above are protected by sk_lock including insert/delete from + * the AVL trees. + * + * Each key is also reference counted. + * + * The spa_keystore_t entries should not be modified outside of the routines + * in this file. + * The recommended interface to the keystore outside of this file is via + * + * zio_crypt_key_lookup() not the spa_keystore routines. + */ +struct spa_keystore_node { + avl_node_t zik_link; + uint64_t zik_os; + zio_crypt_key_t *zik_key; +}; + + +static int +spa_keystore_compare(const void *a, const void *b) +{ + const spa_keystore_node_t *zka = a; + const spa_keystore_node_t *zkb = b; + + if (zka->zik_os < zkb->zik_os) + return (-1); + if (zka->zik_os > zkb->zik_os) + return (+1); + return (0); +} + +void +spa_keystore_init(spa_t *spa) +{ + spa->spa_keystore = kmem_zalloc(sizeof (spa_keystore_t), KM_SLEEP); + rw_init(&spa->spa_keystore->sk_lock, NULL, NULL, NULL); + avl_create(&spa->spa_keystore->sk_keyscope_pool, spa_keystore_compare, + sizeof (spa_keystore_node_t), + offsetof(spa_keystore_node_t, zik_link)); + avl_create(&spa->spa_keystore->sk_keyscope_dsl, spa_keystore_compare, + sizeof (spa_keystore_node_t), + offsetof(spa_keystore_node_t, zik_link)); +} + +void +spa_keystore_fini(spa_t *spa) +{ + void *cookie = NULL; + avl_tree_t *tree; + spa_keystore_node_t *node; + + if (spa->spa_keystore == NULL) + return; + + rw_enter(&spa->spa_keystore->sk_lock, RW_WRITER); + /* + * Note we don't bother with the refcnt of the keys in here + * because this function can't return failure so we just need to + * destroy everything. + */ + tree = &spa->spa_keystore->sk_keyscope_pool; + while ((node = avl_destroy_nodes(tree, &cookie)) != NULL) { + (void) zio_crypt_key_free(node->zik_key); + kmem_free(node, sizeof (spa_keystore_node_t)); + } + avl_destroy(tree); + + cookie = NULL; + tree = &spa->spa_keystore->sk_keyscope_dsl; + while ((node = avl_destroy_nodes(tree, &cookie)) != NULL) { + (void) zio_crypt_key_free(node->zik_key); + kmem_free(node, sizeof (spa_keystore_node_t)); + } + avl_destroy(tree); + + if (spa->spa_keystore->sk_l2arc_key != NULL) + (void) zio_crypt_key_free(spa->spa_keystore->sk_l2arc_key); + if (spa->spa_keystore->sk_spa_kek != NULL) + (void) zio_crypt_key_free(spa->spa_keystore->sk_spa_kek); + + rw_exit(&spa->spa_keystore->sk_lock); + rw_destroy(&spa->spa_keystore->sk_lock); + kmem_free(spa->spa_keystore, sizeof (spa_keystore_t)); + spa->spa_keystore = NULL; +} + +static zio_crypt_key_t * +spa_keystore_find(spa_t *spa, uint64_t os) +{ + spa_keystore_node_t search_zk; + spa_keystore_node_t *found_zk; + avl_tree_t *tree; + + search_zk.zik_os = os; + rw_enter(&spa->spa_keystore->sk_lock, RW_READER); + tree = &spa->spa_keystore->sk_keyscope_pool; + found_zk = avl_find(tree, &search_zk, NULL); + if (found_zk == NULL) { + tree = &spa->spa_keystore->sk_keyscope_dsl; + found_zk = avl_find(tree, &search_zk, NULL); + } + rw_exit(&spa->spa_keystore->sk_lock); + if (found_zk == NULL) + return (NULL); + + return (found_zk->zik_key); +} + +static void +spa_keystore_insert(spa_t *spa, uint64_t os, zio_crypt_key_t *key, + uint64_t keyscope) +{ + avl_index_t where; + spa_keystore_node_t *zk; + avl_tree_t *tree; + + ASSERT(spa->spa_keystore != NULL); + + if (keyscope == ZFS_KEYSCOPE_POOL) { + tree = &spa->spa_keystore->sk_keyscope_pool; + } else if (keyscope == ZFS_KEYSCOPE_DSL) { + tree = &spa->spa_keystore->sk_keyscope_dsl; + } + ASSERT(tree != NULL); + + zk = kmem_zalloc(sizeof (spa_keystore_node_t), KM_SLEEP); + + zk->zik_key = key; + zk->zik_os = os; + + rw_enter(&spa->spa_keystore->sk_lock, RW_WRITER); + if (avl_find(tree, zk, &where) != NULL) { + /* Attempt to insert a key that is already present */ + kmem_free(zk, sizeof (spa_keystore_node_t)); + rw_exit(&spa->spa_keystore->sk_lock); + return; + } + avl_insert(tree, zk, where); + rw_exit(&spa->spa_keystore->sk_lock); +} + +int +spa_keystore_remove(spa_t *spa, uint64_t os) +{ + spa_keystore_node_t zk_tofind; + spa_keystore_node_t *zk; + avl_tree_t *tree; + + ASSERT(spa->spa_keystore != NULL); + + zk_tofind.zik_os = os; + rw_enter(&spa->spa_keystore->sk_lock, RW_WRITER); + tree = &spa->spa_keystore->sk_keyscope_pool; + zk = avl_find(tree, &zk_tofind, NULL); + if (zk == NULL) { + tree = &spa->spa_keystore->sk_keyscope_dsl; + zk = avl_find(tree, &zk_tofind, NULL); + if (zk == NULL) { + rw_exit(&spa->spa_keystore->sk_lock); + return (0); + } + } + if (!zio_crypt_key_free(zk->zik_key)) { + rw_exit(&spa->spa_keystore->sk_lock); + return (EBUSY); + } + avl_remove(tree, zk); + rw_exit(&spa->spa_keystore->sk_lock); + + kmem_free(zk, sizeof (spa_keystore_node_t)); + + return (0); + +} + +zfs_crypt_key_status_t +spa_keystatus(spa_t *spa) +{ + zfs_crypt_key_status_t ret = ZFS_CRYPT_KEY_UNAVAILABLE; + + if (spa_version(spa) < SPA_VERSION_CRYPTO) + return (ZFS_CRYPT_KEY_UNDEFINED); + + rw_enter(&spa->spa_keystore->sk_lock, RW_READER); + if (spa->spa_keystore->sk_spa_kek != NULL) + ret = ZFS_CRYPT_KEY_AVAILABLE; + rw_exit(&spa->spa_keystore->sk_lock); + return (ret); +} + +void +spa_key_hold(spa_t *spa) +{ + zio_crypt_key_t *key = spa->spa_keystore->sk_spa_kek; + zio_crypt_key_hold(key, &spa); +} + +void +spa_key_release(spa_t *spa) +{ + zio_crypt_key_t *key = spa->spa_keystore->sk_spa_kek; + zio_crypt_key_release(key, &spa); +} + +/* + * Key wrap/unwrap support: + * + * The wrapping mechanism is hardcoded as AES for now, + * if this changes (to say RSA) then a pool/dataset property or some other + * method will be need to store the wrapping mechanism explicitly + * or a means to derive it. + * + * Expected future wrapping algorithms include (but are not limited to): + * AES NIST Keywrap + * RSA keypair and X.509 certificate. + */ + +/* + * zio_crypt_unwrap_key + * + * Using the provided wrapping key unwrap the key into a zio_crypt_key_t. + * The wrappedkey contains a 16 byte iv at the start of it. + * + * Allocates a zio_crypt_key_t using kmem_alloc(), caller should free + * using zio_crypt_key_free(). + * + * returns 0 on success + */ +static int +zio_crypt_unwrap_key(zio_crypt_key_t *wk, int crypt, + caddr_t wkeybuf, size_t wkeylen, zio_crypt_key_t **zck) +{ + crypto_mechanism_t wmech; + crypto_data_t wkey_cdt, ptkey_cdt; + zio_crypt_key_t *tmpzck; + caddr_t uwrapbuf; + size_t uwrapbuflen; + size_t keylen; + int ret; + CK_AES_CCM_PARAMS *ccmp; + + if (wkeybuf == NULL || wkeylen == 0) { + return (-1); + } + /* + * We maybe unwrapping a key of a smaller length than the wrapping + * key so unwrapbuflen and keylen need to take that into account. + * + * The incoming wkeybuf also has the iv stored at the start. + */ + wmech.cm_type = crypto_mech2id(SUN_CKM_AES_CCM); + if (wmech.cm_type == CRYPTO_MECH_INVALID) + return (-1); + keylen = zio_crypt_table[crypt].ci_keylen; + + ccmp = kmem_zalloc(sizeof (CK_AES_CCM_PARAMS), KM_SLEEP); + ccmp->ulNonceSize = ZIO_CRYPT_WRAPKEY_IVLEN; + ccmp->nonce = (uchar_t *)wkeybuf; + ccmp->ulAuthDataSize = 0; + ccmp->authData = NULL; + ccmp->ulDataSize = keylen + WRAPPING_MAC_LEN; + ccmp->ulMACSize = WRAPPING_MAC_LEN; + wmech.cm_param = (char *)ccmp; + wmech.cm_param_len = sizeof (CK_AES_CCM_PARAMS); + + wkey_cdt.cd_format = CRYPTO_DATA_RAW; + wkey_cdt.cd_offset = ZIO_CRYPT_WRAPKEY_IVLEN; + wkey_cdt.cd_length = wkeylen - wkey_cdt.cd_offset; + wkey_cdt.cd_miscdata = NULL; + wkey_cdt.cd_raw.iov_base = wkeybuf; + wkey_cdt.cd_raw.iov_len = wkeylen; + + uwrapbuflen = keylen + WRAPPING_MAC_LEN; + uwrapbuf = kmem_zalloc(uwrapbuflen, KM_SLEEP); + SET_CRYPTO_DATA(ptkey_cdt, uwrapbuf, uwrapbuflen); + + ret = crypto_decrypt(&wmech, &wkey_cdt, &wk->zk_key, + NULL, &ptkey_cdt, NULL); + kmem_free(ccmp, sizeof (CK_AES_CCM_PARAMS)); + if (ret != CRYPTO_SUCCESS) { + bzero(uwrapbuf, uwrapbuflen); + kmem_free(uwrapbuf, uwrapbuflen); + zck = NULL; + return (ret); + } + + tmpzck = zio_crypt_key_allocate(); + tmpzck->zk_key.ck_format = CRYPTO_KEY_RAW; + tmpzck->zk_key.ck_data = kmem_alloc(keylen, KM_SLEEP); + tmpzck->zk_key.ck_length = keylen * 8; + bcopy(uwrapbuf, tmpzck->zk_key.ck_data, keylen); + kmem_free(uwrapbuf, uwrapbuflen); + *zck = tmpzck; + + return (0); +} + + +/* + * zio_crypt_wrap_key + * + * Using the provided wrapping key (usually the spa key) wrap the + * (usually the dataset) key into a form suitable for storage in a + * property. + * + * Uses kmem_alloc to create space for the wrapped key, the caller + * should free with kmem_free when it is finished with the wrapped key. + * + * returns 0 on success + */ +static int +zio_crypt_wrap_key(zio_crypt_key_t *wrappingkey, zio_crypt_key_t *ptkey, + caddr_t *wkeybuf, size_t *wkeylen, void *iv, size_t ivlen) +{ + crypto_mechanism_t wmech; + crypto_data_t wkey_cdt, ptkey_cdt; + char *ptkeybuf; + size_t ptkeylen; + CK_AES_CCM_PARAMS *ccmp; + int ret; + + /* + * Currently we only support wrapping keys of CRYPTO_KEY_RAW + */ + if (ptkey->zk_key.ck_format != CRYPTO_KEY_RAW) { + return (-1); + } + ptkeybuf = ptkey->zk_key.ck_data; + ptkeylen = CRYPTO_BITS2BYTES(ptkey->zk_key.ck_length); + SET_CRYPTO_DATA(ptkey_cdt, ptkeybuf, ptkeylen); + wmech.cm_type = crypto_mech2id(SUN_CKM_AES_CCM); + if (wmech.cm_type == CRYPTO_MECH_INVALID) + return (-1); + ccmp = kmem_zalloc(sizeof (CK_AES_CCM_PARAMS), KM_SLEEP); + ccmp->ulNonceSize = ivlen; + ccmp->nonce = iv; + ccmp->ulAuthDataSize = 0; + ccmp->authData = NULL; + ccmp->ulDataSize = ptkeylen; + ccmp->ulMACSize = WRAPPING_MAC_LEN; + wmech.cm_param = (char *)ccmp; + wmech.cm_param_len = sizeof (CK_AES_CCM_PARAMS); + + *wkeylen = WRAPPEDKEY_LEN(ptkeylen); + *wkeybuf = kmem_zalloc(*wkeylen, KM_SLEEP); + bcopy(iv, *wkeybuf, ivlen); + wkey_cdt.cd_format = CRYPTO_DATA_RAW; + wkey_cdt.cd_offset = ivlen; + wkey_cdt.cd_length = *wkeylen - wkey_cdt.cd_offset; + wkey_cdt.cd_miscdata = NULL; + wkey_cdt.cd_raw.iov_base = *wkeybuf; + wkey_cdt.cd_raw.iov_len = *wkeylen; + ret = crypto_encrypt(&wmech, &ptkey_cdt, &wrappingkey->zk_key, + NULL, &wkey_cdt, NULL); + + bzero(ccmp, sizeof (CK_AES_CCM_PARAMS)); + kmem_free(ccmp, sizeof (CK_AES_CCM_PARAMS)); + if (ret != CRYPTO_SUCCESS) { + bzero(*wkeybuf, *wkeylen); + kmem_free(*wkeybuf, *wkeylen); + wkeybuf = NULL; + *wkeylen = 0; + return (ret); + } + + return (0); +} + +/* + * zio_crypt_key_from_ioc + * + * Turn the ioctl variant of the key into a zio_crypt_key_t + * + * For now this only supports key by value (RAW) keys but will + * be extended to support token keys later. + * + * This function allocates memory with kmem_alloc the resulting zio_crypt_key_t + * should be freed by zio_crypt_key_free() + */ +int +zio_crypt_key_from_ioc(zfs_ioc_crypto_t *ioc_key, zio_crypt_key_t **zck) +{ + uint64_t keydatalen = ioc_key->zic_keydatalen; + void *keydata = (void *)(uintptr_t)ioc_key->zic_keydata; + zio_crypt_key_t *zktmp; + + /* + * For now the maximum key length we support is an AES 256 bit key + * so sanity check the data in the ioctl call based on that. + * + * In fact we only support AES 256 & AES 128 so keydatalen should + * only ever be 16 or 32 bytes. + */ + if (keydatalen != 16 && keydatalen != 32) { + return (EINVAL); + } + + switch (ioc_key->zic_keytype) { + case ZFS_IOC_CRYPTO_KEY_TYPE_RAW: + zktmp = zio_crypt_key_allocate(); + zktmp->zk_key.ck_format = CRYPTO_KEY_RAW; + + zktmp->zk_key.ck_data = kmem_alloc(keydatalen, KM_SLEEP); + zktmp->zk_key.ck_length = keydatalen * 8; +#ifdef _KERNEL + if (xcopyin(keydata, zktmp->zk_key.ck_data, keydatalen) != 0) { + (void) zio_crypt_key_free(zktmp); + return (EFAULT); + } +#else + bcopy(keydata, zktmp->zk_key.ck_data, keydatalen); +#endif /* _KERNEL */ + zktmp->zk_crypt = ioc_key->zic_crypt; + zktmp->zk_keyscope = ioc_key->zic_keyscope; + break; + case ZFS_IOC_CRYPTO_KEY_TYPE_TOKEN: + default: + return (ENOTSUP); + } + + *zck = zktmp; + return (0); +} + +/* + * dsl_dataset_keystatus + * + * Determine if key is present for a given dataset, we have to take out + * a lock on the dataset since we need to lookup the encryption + * property. + */ +zfs_crypt_key_status_t +dsl_dataset_keystatus(dsl_dataset_t *ds) +{ + spa_t *spa; + uint64_t crypt, keyscope; + + spa = dsl_dataset_get_spa(ds); + if (spa_version(spa) < SPA_VERSION_CRYPTO) { + return (ZFS_CRYPT_KEY_UNDEFINED); + } + + /* + * Ideally we should use a dsl_dataset_hold variant + * but there doesn't appear to be one that takes a + * dsl_dataset_t so this will do for now. + */ + rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); + + VERIFY(dsl_prop_get_ds(ds, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), 8, 1, &crypt, FTAG) == 0); + VERIFY(dsl_prop_get_ds(ds, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), 8, 1, &keyscope, FTAG) == 0); + + rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); + + if (crypt == ZIO_CRYPT_OFF) { + return (ZFS_CRYPT_KEY_UNDEFINED); + } + + switch (keyscope) { + case ZFS_KEYSCOPE_POOL: + /* + * If have a spa key we can unwrap the dataset key + * so say it is available even if it hasn't yet been + * unwrapped. + */ + if (spa->spa_keystore->sk_spa_kek != NULL) { + return (ZFS_CRYPT_KEY_AVAILABLE); + } + break; + case ZFS_KEYSCOPE_DSL: + if (spa_keystore_find(spa, ds->ds_object) != NULL) { + return (ZFS_CRYPT_KEY_AVAILABLE); + } + break; + default: + ASSERT(0); + } + + return (ZFS_CRYPT_KEY_UNAVAILABLE); +} + + +/* + * spa_crypto_key_validate + * + * Check we have the correct key for the spa. + * We do this by storing the spa guid encrypted by AES_CCM in the + * KEYCHECK property. + * If KEYSTATUS is undefined then we have never loaded a key + * before so setup KEYCHECK instead of verifiying the existing value. + * If called from spa_crypt_key_change() keychange is true and other + * than not looking at keystatus this is the same as the inital setup + * case. + */ +int +spa_crypto_key_validate(spa_t *spa, zio_crypt_key_t *key, uint64_t salt, + boolean_t keychange) +{ + int ret = EINVAL; + nvlist_t *nvl = NULL, *nv = NULL; + crypto_data_t pt, ct; + uint64_t guid; + uint64_t keystatus; + uchar_t *clear_check = NULL; + uchar_t *key_check = NULL; + uint_t clear_check_len, key_check_len; + crypto_mechanism_t mech; + CK_AES_CCM_PARAMS *ccmp; + + /* + * A keychange by definition has an on disk keystatus of + * ZFS_CRYPT_KEY_DEFINED and by the time we have gotten + * this far into a 'zpool key -c' we are using the new key + * so the in memory keystatus doesn't matter. + * + * For all other cases: initial key setting or key load + * we do need to check the keystatus. + */ + if (!keychange) { + VERIFY(spa_prop_get(spa, &nvl) == 0); + if (nvlist_lookup_nvlist(nvl, + zpool_prop_to_name(ZPOOL_PROP_KEYSTATUS), &nv) == 0) { + VERIFY(nvlist_lookup_uint64(nv, + ZPROP_VALUE, &keystatus) == 0); + } else { + keystatus = zpool_prop_default_numeric( + ZPOOL_PROP_KEYSTATUS); + } + } + + guid = spa_guid(spa); + mech.cm_type = crypto_mech2id(SUN_CKM_AES_CCM); + if (mech.cm_type == CRYPTO_MECH_INVALID) { + return (ENOTSUP); + } + ccmp = kmem_zalloc(sizeof (CK_AES_CCM_PARAMS), KM_SLEEP); + ccmp->ulNonceSize = ZIO_CRYPT_WRAPKEY_IVLEN; + ccmp->ulAuthDataSize = 0; + ccmp->authData = NULL; + ccmp->ulMACSize = WRAPPING_MAC_LEN; + mech.cm_param = (char *)ccmp; + mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS); + + key_check_len = WRAPPEDKEY_LEN(sizeof (guid)); + + if (keychange || keystatus == ZFS_CRYPT_KEY_UNDEFINED) { + clear_check_len = sizeof (guid); + clear_check = kmem_alloc(clear_check_len, KM_SLEEP); + key_check = kmem_alloc(key_check_len, KM_SLEEP); + if (random_get_bytes(key_check, ZIO_CRYPT_WRAPKEY_IVLEN) != 0) { + return (EINVAL); + } + bcopy(&guid, clear_check, sizeof (guid)); + + ccmp->nonce = key_check; + ccmp->ulDataSize = sizeof (guid); + SET_CRYPTO_DATA(pt, (char *)clear_check, clear_check_len); + ct.cd_format = CRYPTO_DATA_RAW; + ct.cd_offset = ZIO_CRYPT_WRAPKEY_IVLEN; + ct.cd_length = key_check_len - ZIO_CRYPT_WRAPKEY_IVLEN; + ct.cd_miscdata = NULL; + ct.cd_raw.iov_base = (char *)key_check; + ct.cd_raw.iov_len = key_check_len; + + ret = crypto_encrypt(&mech, &pt, &key->zk_key, NULL, &ct, NULL); + if (ret != CRYPTO_SUCCESS) { + if (ret == CRYPTO_MECH_NOT_SUPPORTED) { + ret = ENOTSUP; + } else { + ret = EINVAL; + } + kmem_free(key_check, key_check_len); + goto out; + } + + if (nvl != NULL) + nvlist_free(nvl); + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_uint8_array(nvl, + zpool_prop_to_name(ZPOOL_PROP_KEYCHECK), + (uint8_t *)key_check, key_check_len) == 0); + VERIFY(nvlist_add_uint64(nvl, + zpool_prop_to_name(ZPOOL_PROP_KEYSTATUS), + ZFS_CRYPT_KEY_DEFINED) == 0); + VERIFY(nvlist_add_uint64(nvl, + zpool_prop_to_name(ZPOOL_PROP_SALT), salt) == 0); + ret = spa_prop_set(spa, nvl); + kmem_free(key_check, key_check_len); + } else { + clear_check_len = sizeof (guid) + ccmp->ulMACSize; + clear_check = kmem_alloc(clear_check_len, KM_SLEEP); + VERIFY(nvlist_lookup_nvlist(nvl, + zpool_prop_to_name(ZPOOL_PROP_KEYCHECK), &nv) == 0); + ret = nvlist_lookup_uint8_array(nv, ZPROP_VALUE, + (uint8_t **)&key_check, &key_check_len); + + ccmp->nonce = key_check; + ccmp->ulDataSize = clear_check_len; + ct.cd_format = CRYPTO_DATA_RAW; + ct.cd_offset = ZIO_CRYPT_WRAPKEY_IVLEN; + ct.cd_length = key_check_len - ZIO_CRYPT_WRAPKEY_IVLEN; + ct.cd_miscdata = NULL; + ct.cd_raw.iov_base = (char *)key_check; + ct.cd_raw.iov_len = key_check_len; + SET_CRYPTO_DATA(pt, (char *)clear_check, clear_check_len); + + /* + * Since we are using AES CCM we don't actually need to + * do anything with the decrypted output since the decrypt + * would fail if the MAC was wrong. + */ + ret = crypto_decrypt(&mech, &ct, &key->zk_key, NULL, &pt, NULL); + if (ret != CRYPTO_SUCCESS) { + if (ret == CRYPTO_MECH_NOT_SUPPORTED) { + ret = ENOTSUP; + } else { + ret = EINVAL; + } + goto out; + } + ret = 0; + } +out: + if (nvl != NULL) + nvlist_free(nvl); + if (clear_check != NULL) { + bzero(clear_check, clear_check_len); + kmem_free(clear_check, clear_check_len); + } + + return (ret); +} + +int +spa_crypto_key_unload(spa_t *spa) +{ + spa_keystore_node_t *node; + avl_tree_t *tree; + void *cookie = NULL; + + if (spa_keystatus(spa) != ZFS_CRYPT_KEY_AVAILABLE) + return (ENOENT); + + tree = &spa->spa_keystore->sk_keyscope_pool; + /* + * Remove all the keys in the pool keystore. + * Note that we don't avl_destroy() the tree itself since + * the key may get loaded again while the spa is still active. + * + * For keys that have a positive refcnt when we remove them + * from the keystore mark them to be freed on refcnt reaching + * zero by zio_crypt_key_release(). The node is removed from + * the keystore tree though so that we don't stall this function. + */ + rw_enter(&spa->spa_keystore->sk_lock, RW_WRITER); + while ((node = avl_destroy_nodes(tree, &cookie)) != NULL) { + if (!zio_crypt_key_free(node->zik_key)) { + node->zik_key->zk_free_on_release = TRUE; + } + kmem_free(node, sizeof (spa_keystore_node_t)); + } + + spa->spa_keystore->sk_spa_kek->zk_free_on_release = TRUE; + + spa->spa_keystore->sk_spa_kek = NULL; + rw_exit(&spa->spa_keystore->sk_lock); + + return (0); +} + +/* + * Convert the key or key reference that came over the ioctl + * into a zio_crypt_key_t held for the spa. + * + * Assumes we are holding the relevant spa_t locks when called. + */ +int +spa_crypto_key_load(spa_t *spa, zfs_ioc_crypto_t *ioc_key) +{ + zio_crypt_key_t *spa_kek = NULL; + int ret; + + if (spa_keystatus(spa) == ZFS_CRYPT_KEY_AVAILABLE) { + return (EEXIST); + } + + ret = zio_crypt_key_from_ioc(ioc_key, &spa_kek); + if (ret != 0 || spa_kek == NULL) { + return (ret); + } + + ret = spa_crypto_key_validate(spa, spa_kek, ioc_key->zic_salt, FALSE); + if (ret != 0) { + (void) zio_crypt_key_free(spa_kek); + return (ret); + } + rw_enter(&spa->spa_keystore->sk_lock, RW_WRITER); + spa->spa_keystore->sk_spa_kek = spa_kek; + rw_exit(&spa->spa_keystore->sk_lock); + return (0); +} + + +/* + * dsl_crypto_key_unload + * + * Just remove the key from the in memory keystore. + * + * For safe operation we assume that userland has already successfully + * unmounted the dataset - and thus flushed all pending IO for it to disk + * so we won't need to encrypt anything with this key. + * + * If the key can't be removed return the failure back to our caller. + */ +int +dsl_crypto_key_unload(char *dsname) +{ + dsl_dataset_t *ds; + int ret; + + if ((ret = dsl_dataset_hold(dsname, FTAG, &ds)) != 0) + return (ret); + + ret = spa_keystore_remove(dsl_dataset_get_spa(ds), ds->ds_object); + + dsl_dataset_rele(ds, FTAG); + + return (ret); +} + +/* + * dsl_crypto_key_load - + * + * To do this we need to get the dataset wrappedkey and unwrap + * the key using the key in ioc_key. + * + * This function is used ONLY after a dataset already exists it is NOT + * to be used during dataset creation. + */ +int +dsl_crypto_key_load(char *dsname, zfs_ioc_crypto_t *ioc_key) +{ + dsl_dataset_t *ds; + zio_crypt_key_t *wrappingkey; + zio_crypt_key_t *dslkey; + char *wkeybuf = NULL; + uint64_t crypt; + int ret, wkeylen, keylen; + + if ((ret = dsl_dataset_hold(dsname, FTAG, &ds)) != 0) + return (ret); + + /* + * This is key load not key change so if ds->ds_key is already + * set we fail. + */ + if (spa_keystore_find(dsl_dataset_get_spa(ds), ds->ds_object) != NULL) { + dsl_dataset_rele(ds, FTAG); + return (EEXIST); + } + + /* + * Find out what size of key we expect. + * + * For now the wrapping key size (and type) matches the size + * of the dataset key, this may not always be the case + * (particularly if we ever support wrapping dataset keys + * with asymmetric keys (eg RSA)). + * + * When alternate wrapping keys are added it maybe done using + * a index property. + * + * Then get the wrapped key and unwrap using ioc_key. + */ + VERIFY(dsl_prop_get(dsname, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), 8, 1, &crypt, FTAG) == 0); + keylen = zio_crypt_table[crypt].ci_keylen; + + ret = zio_crypt_key_from_ioc(ioc_key, &wrappingkey); + if (ret != 0 || wrappingkey == NULL) { + goto out; + } + wkeylen = WRAPPEDKEY_LEN(keylen); + wkeybuf = kmem_alloc(wkeylen, KM_SLEEP); + + VERIFY(dsl_prop_get(dsname, zfs_prop_to_name(ZFS_PROP_WRAPPEDKEY), 1, + wkeylen, wkeybuf, FTAG) == 0); + + if ((ret = zio_crypt_unwrap_key(wrappingkey, crypt, + wkeybuf, wkeylen, &dslkey)) != 0) { + ret = EACCES; + goto out; + } + + dslkey->zk_crypt = crypt; + dslkey->zk_keyscope = ZFS_KEYSCOPE_DSL; + spa_keystore_insert(dsl_dataset_get_spa(ds), ds->ds_object, + dslkey, ZFS_KEYSCOPE_DSL); +out: + if (wkeybuf != NULL) + kmem_free(wkeybuf, wkeylen); + + dsl_dataset_rele(ds, FTAG); + + return (ret); +} + + + +/* + * dsl_crypto_key_gen - Generate dataset key + * + * Generate a new key for this dataset based on its encryption property type. + * Store the key as a usable zio_crypt_key_t in the dsl_dataset_t and + * put the wrapped version of it in the wrappedkey property. + * + * returns 0 on success + */ +int +dsl_crypto_key_gen(dsl_dataset_t *ds, void *arg, cred_t *cr, dmu_tx_t *tx) +{ + int ret = -1; + char *dsname = NULL; /* avoid E_VAR_USED_BEFORE_SET dsl_dataset_name */ +#ifdef DEBUG + zio_crypt_key_t *debugkey; +#endif /* DEBUG */ + zio_crypt_key_t *wrappingkey, *dslkey; + caddr_t genkeybuf; + size_t genkeylen; + caddr_t wkeybuf = NULL; + size_t wkeylen; + uchar_t iv[ZIO_CRYPT_WRAPKEY_IVLEN]; + uint64_t crypt; + uint64_t keyscope; + spa_t *spa = dsl_dataset_get_spa(ds); + dsl_dir_t *dd = ds->ds_dir; + objset_t *os = dd->dd_pool->dp_meta_objset; + uint64_t props_zapobj = dd->dd_phys->dd_props_zapobj; + zfs_creat_t *zct = (zfs_creat_t *)arg; + nvlist_t *nvprops = zct->zct_props; + + /* + * We need the create or inherited values for crypt and keyscope + * so we know the size of the key to generate and which keystore + * it goes into. + * + * zfs_ioc_create saved away the crypt and keyscope userland used + * and this dataset will inherit if they weren't explict. + * + * We can't use dsl_prop_get_ds here because this function is called + * while the dataset is being created but we do have the nvlist list + * of properties. For the later update we can use zap_update. + * + */ + if (spa_version(spa) < SPA_VERSION_CRYPTO) { + return (0); + } + + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), &crypt) != 0) { + if (zct->zct_cryptkey != NULL) { + crypt = zct->zct_cryptkey->zk_crypt; + } else { + crypt = ZIO_CRYPT_DEFAULT; + } + } + + if (crypt == ZIO_CRYPT_OFF) { + return (0); + } + + ASSERT3U(crypt, !=, ZIO_CRYPT_INHERIT); + + if (nvlist_lookup_uint64(nvprops, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), &keyscope) != 0) { + if (zct->zct_cryptkey != NULL) { + keyscope = zct->zct_cryptkey->zk_keyscope; + } else { + keyscope = ZFS_KEYSCOPE_DEFAULT; + } + } + + /* + * crypt tells us which algorithm is being used and + * thus the type and size of key we need generated. + * + * For now we are using random_get_bytes() to generate the + * raw key. Ideally use crypto_key_generate() however that needs + * a crypto_provider_t so only works with hardware providers. + * + * This needs to change for FIPS 140, the key really should be + * generated on the hardware crypto device as a sensitive + * key object and extracted in wrapped form, but that needs + * a crypto_key_wrap() that returns a key stream not an object. + */ + + genkeylen = zio_crypt_table[crypt].ci_keylen; + genkeybuf = kmem_alloc(genkeylen, KM_SLEEP); + ret = random_get_bytes((uchar_t *)genkeybuf, genkeylen); + if (ret != CRYPTO_SUCCESS) { + bzero(genkeybuf, genkeylen); + kmem_free(genkeybuf, genkeylen); + dsl_dataset_name(ds, dsname); + cmn_err(CE_WARN, + "key generation failed for dataset %s in pool %s", + dsname, spa->spa_name); + spa_history_internal_log(LOG_DS_CRYPTO_KEY_CREATE, spa, tx, cr, + "failed for dataset %llu: %d", ds->ds_object, ret); + return (ret); + } + + dslkey = zio_crypt_key_allocate(); + dslkey->zk_key.ck_format = CRYPTO_KEY_RAW; + dslkey->zk_key.ck_data = genkeybuf; + dslkey->zk_key.ck_length = (genkeylen * 8); + dslkey->zk_crypt = crypt; + dslkey->zk_keyscope = keyscope; + + /* + * The iv needs to be something unique to the dataset key so we + * don't resuse the same iv for different dataset keys when the + * wrapping key is the pool level one. + * (reuse of same iv with same key) + * The iv also needs to change when we rewrap the key due to + * wrapping key change (reuse of same iv with different key). + * AES_CCM allows for 13 bytes of nonce. This is sufficient + * for the initial IV. On key change (rewrap basically) the + * IV is treated as a number and incremented. + */ + ret = random_get_bytes(iv, ZIO_CRYPT_WRAPKEY_IVLEN); + if (ret != 0) { + (void) zio_crypt_key_free(dslkey); + goto out; + } + + /* + * Put the key in to the in memory dataset_t since + * since it is likely data will written in the dataset soon. + * This saves us reading from disk (or cache) the just created + * key and having to unwrap it. More importantly it makes + * sure that the key is in the in memory keystore before it + * is wrapped. This ensures there isn't a race condition + * on the spa key if this create is happening during a key + * spa change operation. We won't get to complete the insert + * until the key change finishes by which time we have the + * new spa wrapping key. + * + * We have to do it this way for ZFS_KEYSCOPE_DSL as well since this + * function is being called during dataset creation time and there + * won't be another ioctl call to load the key before the mount + * happens. + * + */ + spa_keystore_insert(dsl_dataset_get_spa(ds), ds->ds_object, + dslkey, keyscope); + + if (keyscope == ZFS_KEYSCOPE_POOL) { + wrappingkey = spa->spa_keystore->sk_spa_kek; + } else if (keyscope == ZFS_KEYSCOPE_DSL) { + wrappingkey = zct->zct_cryptkey; + } + if (wrappingkey == NULL) { + (void) spa_keystore_remove(dsl_dataset_get_spa(ds), + ds->ds_object); + dsl_dataset_name(ds, dsname); + cmn_err(CE_WARN, + "key generation failed for dataset %s in pool %s: " + "no wrapping key", + dsname, spa->spa_name); + spa_history_internal_log(LOG_DS_CRYPTO_KEY_CREATE, spa, tx, cr, + "failed for dataset %llu: no wrapping key", + ds->ds_object); + goto out; + } + zio_crypt_key_hold(wrappingkey, FTAG); + ret = zio_crypt_wrap_key(wrappingkey, dslkey, + &wkeybuf, &wkeylen, &iv, ZIO_CRYPT_WRAPKEY_IVLEN); + if (ret != 0) { + (void) spa_keystore_remove(dsl_dataset_get_spa(ds), + ds->ds_object); + spa_history_internal_log(LOG_DS_CRYPTO_KEY_CREATE, spa, tx, cr, + "failed for dataset %llu unable to wrap key", + ds->ds_object, ret); + goto out; + } +#ifdef DEBUG + ASSERT3U(zio_crypt_unwrap_key(wrappingkey, crypt, + wkeybuf, wkeylen, &debugkey), ==, 0); +#endif /* DEBUG */ + zio_crypt_key_release(wrappingkey, FTAG); + + VERIFY(zap_update(os, props_zapobj, + zfs_prop_to_name(ZFS_PROP_WRAPPEDKEY), + 1, wkeylen, wkeybuf, tx) == 0); + + spa_history_internal_log(LOG_DS_CRYPTO_KEY_CREATE, spa, tx, cr, + "succeeded for dataset %llu", ds->ds_object); + +out: + if (wkeylen > 0) { + bzero(wkeybuf, wkeylen); + kmem_free(wkeybuf, wkeylen); + } + + return (ret); +} + + +/* + * dsl_crypto_key_clone + * + * Clones MUST have the same keyscope value and the same clear text + * encryption key as their clone_parent. + * + * Our caller (dmu_objset_create_sync) must have a lock on the dataset. + */ +int +dsl_crypto_key_clone(dsl_dataset_t *ds, dsl_dataset_t *clone_parent, + cred_t *cr, dmu_tx_t *tx) +{ + uchar_t *wkeybuf; + size_t wkeylen; + uint64_t crypt, checksum, keyscope; + spa_t *spa = dsl_dataset_get_spa(ds); + dsl_dir_t *dd = ds->ds_dir; + objset_t *os = dd->dd_pool->dp_meta_objset; + uint64_t props_zapobj = dd->dd_phys->dd_props_zapobj; + + + VERIFY(dsl_prop_get_ds(clone_parent, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), 8, 1, &crypt, FTAG) == 0); + + if (spa_version(spa) < SPA_VERSION_CRYPTO || crypt == ZIO_CRYPT_OFF) { + return (0); + } + + wkeylen = WRAPPEDKEY_LEN(zio_crypt_table[crypt].ci_keylen); + wkeybuf = kmem_alloc(wkeylen, KM_SLEEP); + VERIFY(dsl_prop_get_ds(clone_parent, + zfs_prop_to_name(ZFS_PROP_WRAPPEDKEY), + 1, wkeylen, wkeybuf, FTAG) == 0); + + VERIFY(dsl_prop_get_ds(clone_parent, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), 8, 1, &keyscope, FTAG) == 0); + VERIFY(dsl_prop_get_ds(clone_parent, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), 8, 1, &checksum, FTAG) == 0); + + VERIFY(zap_update(os, props_zapobj, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), + 8, 1, &crypt, tx) == 0); + VERIFY(zap_update(os, props_zapobj, + zfs_prop_to_name(ZFS_PROP_CHECKSUM), + 8, 1, &checksum, tx) == 0); + VERIFY(zap_update(os, props_zapobj, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), + 8, 1, &keyscope, tx) == 0); + VERIFY(zap_update(os, props_zapobj, + zfs_prop_to_name(ZFS_PROP_WRAPPEDKEY), + 1, wkeylen, wkeybuf, tx) == 0); + + spa_history_internal_log(LOG_DS_CRYPTO_KEY_CREATE, spa, tx, cr, + "cloned for dataset %llu from %llu", + ds->ds_object, clone_parent->ds_object); + + bzero(wkeybuf, wkeylen); + kmem_free(wkeybuf, wkeylen); + + return (0); +} + +/* + * Change the kek for a spa. + * + * We need to visit every dataset in this spa that has keyscope=pool + * and encryption != off. For each one we need to unwrap the wrappedkey + * property with the old (current) spa key and rewrap it with the new spa + * key. + * + * The old (current) spa key must have already been loaded before we + * attempt this. + * + * We do not assume that the in memory spa_keystore_pool has all the + * keys and we do not add keys to the in memory keystore as a result + * of rewrapping them. + */ +struct rewrap_arg { + zio_crypt_key_t *skc_old_key; + zio_crypt_key_t *skc_new_key; +}; + + +static int +zio_crypt_rewrap_key(struct rewrap_arg *wrappingkeys, int crypt, + char *old_wkeybuf, size_t old_wkeylen, + char **new_wkeybuf, size_t *new_wkeylen) +{ + zio_crypt_key_t *ptkey; + uchar_t niv[ZIO_CRYPT_WRAPKEY_IVLEN]; + int ret = 0; + + ret = zio_crypt_unwrap_key(wrappingkeys->skc_old_key, crypt, + old_wkeybuf, old_wkeylen, &ptkey); + if (ret != 0) + return (ret); + + /* + * XXX for AES_CCM IV safety we should also check that there + * are no previous calls to this function in this key change + * operation (ie under the same key) that have the same IV. + * One possible way to do that is to start with a random IV and + * increment it for each dataset. + * In the mean time the sanity check is a simple one that the + * new and old IVs differ. + */ + do { + ret = random_get_bytes(niv, ZIO_CRYPT_WRAPKEY_IVLEN); + } while (ret != 0 || + memcmp(old_wkeybuf, niv, ZIO_CRYPT_WRAPKEY_IVLEN) == 0); + + ret = zio_crypt_wrap_key(wrappingkeys->skc_new_key, ptkey, + new_wkeybuf, new_wkeylen, niv, ZIO_CRYPT_WRAPKEY_IVLEN); + + return (ret); +} + +static int +dsl_crypt_rewrap_key(char *name, void *arg) +{ + struct rewrap_arg *wrapping_keys = arg; + int ret; + uint64_t crypt = ZIO_CRYPT_DEFAULT, keyscope = ZFS_KEYSCOPE_DEFAULT; + char *old_wkeybuf = NULL, *new_wkeybuf = NULL; + size_t old_wkeylen, new_wkeylen; + + if (name == NULL) + return (0); + + (void) dsl_prop_get(name, + zfs_prop_to_name(ZFS_PROP_ENCRYPTION), 8, 1, &crypt, FTAG); + (void) dsl_prop_get(name, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), 8, 1, &keyscope, FTAG); + + if (crypt == ZIO_CRYPT_OFF || keyscope != ZFS_KEYSCOPE_POOL) { + return (0); + } + + /* + * In the future we may need to deal with a new wrapping key + * that uses a different wrapping algorithm than the old key + * when that happens the IV generation for the new key and + * the size of the new wrapped key will likely be different. + */ + old_wkeylen = WRAPPEDKEY_LEN(zio_crypt_table[crypt].ci_keylen); + old_wkeybuf = kmem_alloc(old_wkeylen, KM_SLEEP); + new_wkeylen = old_wkeylen; + VERIFY(dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_WRAPPEDKEY), 1, + old_wkeylen, old_wkeybuf, FTAG) == 0); + + VERIFY3U(zio_crypt_rewrap_key(wrapping_keys, crypt, + old_wkeybuf, old_wkeylen, &new_wkeybuf, &new_wkeylen), ==, 0); + + ret = dsl_prop_set(name, zfs_prop_to_name(ZFS_PROP_WRAPPEDKEY), + 1, (uint64_t)new_wkeylen, new_wkeybuf); + + if (old_wkeybuf != NULL) + kmem_free(old_wkeybuf, old_wkeylen); + if (new_wkeybuf != NULL) + kmem_free(new_wkeybuf, new_wkeylen); + + return (ret); +} + + +int +spa_crypt_key_change(spa_t *spa, zfs_ioc_crypto_t *ioc_key) +{ + zio_crypt_key_t *newkey = NULL; + struct rewrap_arg wrappingkeys = { 0 }; + uint64_t nsalt; + int ret; + + /* Need the existing key to be available */ + if (spa_keystatus(spa) != ZFS_CRYPT_KEY_AVAILABLE) { + return (ENOENT); + } + + if (!refcount_is_zero(&spa->spa_keystore->sk_spa_kek->zk_refcnt)) { + return (EBUSY); + } + + ret = zio_crypt_key_from_ioc(ioc_key, &newkey); + if (ret != 0 || newkey == NULL) { + return (ret); + } + nsalt = ioc_key->zic_salt; + + /* + * We hold a reference on the spa key and a write lock on + * the whole keystore for the whole time we are doing this + * key change since we don't want either the spa key or keystore to + * be pulled away part way through the change. + */ + rw_enter(&spa->spa_keystore->sk_lock, RW_WRITER); + spa_key_hold(spa); + zio_crypt_key_hold(newkey, FTAG); + wrappingkeys.skc_old_key = spa->spa_keystore->sk_spa_kek; + wrappingkeys.skc_new_key = newkey; + + VERIFY3U(dmu_objset_find(spa->spa_name, + dsl_crypt_rewrap_key, &wrappingkeys, DS_FIND_CHILDREN), ==, 0); + + /* + * Now update sk_spa_kek to be the newkey. + * Since this is an update of the spa_kek rather than an unload + * we don't want to remove all the keys in the keystore, as they could + * be in use for other zios. + * + * We also don't use spa_crypto_key_load() since we already did + * the copyin of the key above. Doing another copyin introduces + * the risk of setting a different key to what we just used to do + * the rewrapping if the user buffer was modified while we operated + * on the datasets. + * We do call spa_crypto_key_validate() to get the key check + * information updated. + */ + if (spa_crypto_key_validate(spa, newkey, nsalt, TRUE) != 0) { + VERIFY(zio_crypt_key_free(newkey) == 0); + } + spa->spa_keystore->sk_spa_kek->zk_free_on_release = TRUE; + spa_key_release(spa); + spa->spa_keystore->sk_spa_kek = newkey; + zio_crypt_key_release(newkey, FTAG); + rw_exit(&spa->spa_keystore->sk_lock); + + return (ret); +} + + +/* + * dsl_crypt_key_change + * + * The old key must already be present in memory since the user interface + * doesn't provide away to prompt or retrieve the old key. + * Given that we don't actually need to unwrap the old key in the + * CRYPTO_KEY_RAW case all we do is wrap the in memory copy and + * update the property. When we support keys in tokens this may be different. + * + * However we do need to read the old property to get the old IV for + * the generation number. + */ +int +dsl_crypt_key_change(char *dsname, zfs_ioc_crypto_t *newkey) +{ + int ret = 0; + size_t wkeylen, keylen; + char *wkeybuf = NULL; + uint64_t keyscope; + uchar_t iv[ZIO_CRYPT_WRAPKEY_IVLEN]; + dsl_dataset_t *ds; + zio_crypt_key_t *wrappingkey; + zio_crypt_key_t *ekey; + spa_t *spa; + + if ((ret = dsl_dataset_hold(dsname, FTAG, &ds)) != 0) + return (ret); + + /* + * Existing key must be already loaded by this point. + */ + spa = dsl_dataset_get_spa(ds); + if ((ekey = spa_keystore_find(spa, ds->ds_object)) == NULL) { + ret = ENOENT; + goto out; + } + + keylen = CRYPTO_BITS2BYTES(ekey->zk_key.ck_length); + /* + * A change of keyscope means a change of wrapping key. + * Validate the incoming data is self consistent and setup the + * correct wrapping key. + */ + ret = dsl_prop_get(dsname, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), 8, 1, &keyscope, FTAG); + if (ret != 0) + goto out; + + if (newkey->zic_keyscope != ZFS_KEYSCOPE_INHERIT && + keyscope != newkey->zic_keyscope) { + (void) spa_keystore_remove(spa, ds->ds_object); + keyscope = newkey->zic_keyscope; + spa_keystore_insert(spa, ds->ds_object, ekey, keyscope); + } + + if (keyscope == ZFS_KEYSCOPE_POOL) { + /* Passed a new key when we don't need one */ + if (newkey->zic_keydatalen != 0) { + ret = EINVAL; + goto out; + } + if (spa_keystatus(spa) != ZFS_CRYPT_KEY_AVAILABLE) { + ret = ENOENT; + goto out; + } + wrappingkey = spa->spa_keystore->sk_spa_kek; + } else if (keyscope == ZFS_KEYSCOPE_DSL) { + /* New key must be the same size as the existing one. */ + if (keylen != newkey->zic_keydatalen) { + ret = EINVAL; + goto out; + } + ret = zio_crypt_key_from_ioc(newkey, &wrappingkey); + if (ret != 0 || wrappingkey == NULL) { + goto out; + } + } + + ret = random_get_bytes(iv, ZIO_CRYPT_WRAPKEY_IVLEN); + if (ret != 0) { + goto out; + } + + wkeylen = WRAPPEDKEY_LEN(keylen); + wkeybuf = kmem_alloc(wkeylen, KM_SLEEP); + zio_crypt_key_hold(wrappingkey, FTAG); + ret = zio_crypt_wrap_key(wrappingkey, ekey, + &wkeybuf, &wkeylen, &iv, ZIO_CRYPT_WRAPKEY_IVLEN); + zio_crypt_key_release(wrappingkey, FTAG); + if (keyscope == ZFS_KEYSCOPE_DSL) { + (void) zio_crypt_key_free(wrappingkey); + } + if (ret != 0) { + goto out; + } + + /* + * Now write out all the wrapped key and the salt (if any) which + * has to be set in kernel because it is hidden from users, and + * not subject to user delegation. + * + * The keysource & keyscope properties are updated from userland + * because they do need to be subject to user delegation checks. + */ + if (newkey->zic_salt != 0) { + ret = dsl_prop_set(dsname, + zfs_prop_to_name(ZFS_PROP_SALT), + 8, 1, &(newkey->zic_salt)); + } + + ret = dsl_prop_set(dsname, zfs_prop_to_name(ZFS_PROP_WRAPPEDKEY), + 1, (uint64_t)wkeylen, wkeybuf); + +out: + if (wkeybuf != NULL) + kmem_free(wkeybuf, wkeylen); + dsl_dataset_rele(ds, FTAG); + return (ret); +} + + +/* + * zio_crypt_key_lookup + * + * This function looks up the key we need based on the bookmark. + * It returns a reference to the key that the caller should NOT free. + * The caller should use zio_crypt_key_release() + * On failure it returns an errno value. + * + * This is where we "plugin" alternate key management systems. + * A possible alternative would ask some "remote" key manager + * for the keys. + */ +zio_crypt_key_t * +zio_crypt_key_lookup(spa_t *spa, uint64_t objset, int crypt) +{ + zio_crypt_key_t *zck = NULL; + caddr_t wkeybuf = NULL; + size_t wkeylen = 0; + dsl_dataset_t *ds; + int ret; + uint64_t keyscope; + + +#ifndef _KERNEL + return (NULL); +#endif + /* + * First lookup the in memory keystore if we find the key there + * we are done. If we don't then for keyscope=pool we can try + * unwrapping it and putting it in the keystore if spa_key is present. + * For keyscope=dataset we fail. + */ + if ((zck = spa_keystore_find(spa, objset)) != NULL) { + return (zck); + } + rw_enter(&spa->spa_dsl_pool->dp_config_rwlock, RW_READER); + + ret = dsl_dataset_hold_obj(spa->spa_dsl_pool, objset, FTAG, &ds); + if (ret != 0) { + rw_exit(&spa->spa_dsl_pool->dp_config_rwlock); + return (NULL); + } + + ret = dsl_prop_get_ds(ds, + zfs_prop_to_name(ZFS_PROP_KEYSCOPE), 8, 1, &keyscope, FTAG); + if (ret != 0) { + goto out; + } + + switch (keyscope) { + case ZFS_KEYSCOPE_POOL: + if (spa_keystatus(spa) != ZFS_CRYPT_KEY_AVAILABLE) + goto out; + spa_key_hold(spa); + wkeylen = WRAPPEDKEY_LEN(zio_crypt_table[crypt].ci_keylen); + wkeybuf = kmem_alloc(wkeylen, KM_SLEEP); + ret = dsl_prop_get_ds(ds, + zfs_prop_to_name(ZFS_PROP_WRAPPEDKEY), 1, + wkeylen, wkeybuf, FTAG); + if (ret != 0) { + goto out; + } + if ((ret = zio_crypt_unwrap_key(spa->spa_keystore->sk_spa_kek, + crypt, wkeybuf, wkeylen, &zck)) != 0) { + zck = NULL; + } + spa_key_release(spa); + break; + case ZFS_KEYSCOPE_DSL: + zck = NULL; + break; + } + + if (zck != NULL) { + zck->zk_crypt = crypt; + zck->zk_keyscope = keyscope; + spa_keystore_insert(spa, objset, zck, keyscope); + } +out: + if (wkeybuf != NULL) { + bzero(wkeybuf, wkeylen); + kmem_free(wkeybuf, wkeylen); + } + + dsl_dataset_rele(ds, FTAG); + rw_exit(&spa->spa_dsl_pool->dp_config_rwlock); + + return (zck); +} + + +static crypto_mechanism_t * +zio_crypt_setup_mech(int crypt, int type, uint64_t txg, zbookmark_t *bookmark, + size_t datalen) +{ + crypto_mechanism_t *mech = NULL; + CK_AES_CCM_PARAMS *ccmp; + char noncedigest[SHA256_DIGEST_LENGTH]; + SHA2_CTX ctx; + + mech = kmem_zalloc(sizeof (crypto_mechanism_t), KM_SLEEP); + mech->cm_type = crypto_mech2id(zio_crypt_table[crypt].ci_mechname); + if (mech->cm_type == CRYPTO_MECH_INVALID) + return (NULL); + + ccmp = kmem_alloc(sizeof (CK_AES_CCM_PARAMS), KM_SLEEP); + ccmp->ulNonceSize = zio_crypt_table[crypt].ci_ivlen; + ccmp->nonce = kmem_zalloc(ccmp->ulNonceSize, KM_SLEEP); + ccmp->ulAuthDataSize = 0; + ccmp->authData = NULL; + ccmp->ulDataSize = datalen; + if (type == DMU_OT_INTENT_LOG) { + ccmp->ulMACSize = zio_crypt_table[crypt].ci_zil_maclen; + } else { + ccmp->ulMACSize = zio_crypt_table[crypt].ci_maclen; + } + + /* + * We need more than just the txg for the nonce because there could + * be writting a lot of data getting encrypted with a given key in this + * txg and nonce values need to be unique under a given key for + * CCM to be safe. + * + * A given bookmark can only be written once in a txg + * and the txg is monotonically increasing. + * + * For the ZIL the txg will be 0 on a write but !0 on a replay + * but the zb_blkid is safe to use since it is the ZIL sequence + * number, and it never repeats. + * + * We can not add in the zb_objset because a snapshot has a + * different zb_objset to its parent yet shares a key with it. + * + * For CCM the max NonceSize is 13, however we actually + * only have 12 bytes of nonce because a 13 byte nonce with CCM + * doesn't give us enough dataspace 65536 vs 16777215 for a + * 12 byte nonce. + * + * We need to make sure we don't "lose" any of this information + * so the safest thing to do is hash it down. Any good hash + * function would probably do here, but since this is for crypto + * purposes we best use a FIPS approved algorithm and thus use + * SHA256 truncated to 12 bytes. + * + * Don't call crypto_digest() here, we 'cheat' by knowing + * that the size of our input is lower than the hardware + * offload boundary, so we can avoid some overhead. This should + * probably be changed to do the crypto_digest() though for + * FIPS 140-2 reasons of only using evaluated crypto providers. + */ + SHA2Init(SHA256, &ctx); + if (type != DMU_OT_INTENT_LOG) { + SHA2Update(&ctx, &txg, sizeof (uint64_t)); + } + SHA2Update(&ctx, &bookmark->zb_object, sizeof (bookmark->zb_object)); + SHA2Update(&ctx, &bookmark->zb_level, sizeof (bookmark->zb_level)); + SHA2Update(&ctx, &bookmark->zb_blkid, sizeof (bookmark->zb_blkid)); + SHA2Final(&noncedigest, &ctx); + + bcopy(noncedigest, ccmp->nonce, ccmp->ulNonceSize); + + mech->cm_param = (char *)ccmp; + mech->cm_param_len = sizeof (CK_AES_CCM_PARAMS); + + return (mech); +} + +static void +zio_crypt_free_mech(crypto_mechanism_t *mech) +{ + if (mech->cm_type == crypto_mech2id(SUN_CKM_AES_CCM)) { + CK_AES_CCM_PARAMS *ccmp = (CK_AES_CCM_PARAMS *)mech->cm_param; + if (ccmp != NULL) { + bzero(ccmp->nonce, ccmp->ulNonceSize); + kmem_free(ccmp->nonce, ccmp->ulNonceSize); + } + } + if (mech->cm_param_len != 0) { + bzero(mech->cm_param, mech->cm_param_len); + kmem_free(mech->cm_param, mech->cm_param_len); + } + bzero(mech, sizeof (crypto_mechanism_t)); + kmem_free(mech, sizeof (crypto_mechanism_t)); +} + + +/* + * dnode_setup_crypto_data + * + * For dnodes we only encrypt the bonusbufs (if any). So we start out + * by just doing a bcopy of src into dest. + * When we reach the zio pipeline we my have many dnodes in a single + * zio buffer. Use a UIO with the crypto framework to build up just + * the bonus buffers for encryption. The MAC will be just of the + * bonus buffers. We could add other dnode_phys_t fields as authenticated + * data in the future - that involves having a separate setup_mech + * function for dnodes as well. + * + * returns the number of dnode bonusbufs we need encrypted if non-zero + * then cdt is valid, destp is always valid (incase there are no bonusbufs). + * + * The concept for this and the traversal algorithm was cribbed from + * dnode_buf_byteswap(). + */ +static int +dnode_setup_crypto_data(void *src, size_t size, + void *dest, iovec_t **srciovp, iovec_t **dstiovp, size_t *cdlen, + boolean_t encrypting) +{ + dnode_phys_t *sdnp = src; + dnode_phys_t *ddnp = dest; + int i, iovcnt; + size_t num_dnodes; + size_t csize = 0; + iovec_t *srciov, *dstiov; + + ASSERT3U(sizeof (dnode_phys_t), ==, (1 << DNODE_SHIFT)); + ASSERT((size & (sizeof (dnode_phys_t) - 1)) == 0); + + bcopy(src, dest, size); + /* First count how many bonusbufs we have, there could be none */ + num_dnodes = size >> DNODE_SHIFT; + ASSERT3U(num_dnodes, <=, DNODES_PER_BLOCK); + + for (i = 0, iovcnt = 0; i < num_dnodes; i++) { + /* + * If dn_type is DMU_OT_NODE then bonuslen is 0 + * so there is nothing to do for that that dn_type + * + * We can check bonuslen for zero without byteswapping + * since it won't matter if we have the wrong byte order. + */ + if (sdnp->dn_type != DMU_OT_NONE && sdnp->dn_bonuslen != 0 && + dmu_ot[sdnp->dn_bonustype].ot_encrypt == TRUE) { + iovcnt++; + } + sdnp++; + } + if (iovcnt == 0) { + return (0); + } + + /* + * We have iovcnt bonusbufs that need encrypting so build the iov. + * Plus one addtional iov for the MAC. + */ + if (encrypting) { + srciov = kmem_alloc(sizeof (iovec_t) * iovcnt, KM_SLEEP); + dstiov = kmem_alloc(sizeof (iovec_t) * (iovcnt + 1), KM_SLEEP); + } else { + srciov = kmem_alloc(sizeof (iovec_t) * (iovcnt + 1), KM_SLEEP); + dstiov = kmem_alloc(sizeof (iovec_t) * iovcnt, KM_SLEEP); + } + + /* From the beginning again build up the iov */ + sdnp = src; + for (i = 0, iovcnt = 0; i < num_dnodes; i++) { + if (sdnp->dn_type != DMU_OT_NONE && sdnp->dn_bonuslen != 0 && + dmu_ot[sdnp->dn_bonustype].ot_encrypt == TRUE) { + int off = (sdnp->dn_nblkptr - 1) * sizeof (blkptr_t); + size_t len = DN_MAX_BONUSLEN - off; + ASSERT3U(sdnp->dn_nblkptr, >, 0); + ASSERT3U(sdnp->dn_nblkptr, <=, DN_MAX_NBLKPTR); + ASSERT3U(sdnp->dn_bonustype, <, DMU_OT_NUMTYPES); + ASSERT3U(len, <=, DN_MAX_BONUSLEN); + + srciov[iovcnt].iov_base = (char *)sdnp->dn_bonus + off; + srciov[iovcnt].iov_len = len; + dstiov[iovcnt].iov_base = (char *)ddnp->dn_bonus + off; + dstiov[iovcnt].iov_len = len; + csize += len; + iovcnt++; + } + sdnp++; + ddnp++; + } + + /* + * A few things we can assert to sanity check + * If we got here there as at least one bonusbuf found so + * iovcnt must have space for the MAC too + * We can't have any more iovs than the num_nodes + one for the MAC + * The total to be encrypted must be make sense too, it must be strictly + * less than the total size but we can do better than that and check + * that the bonusbufs sizes in range too. + */ + ASSERT3U(iovcnt, >=, 1); + ASSERT3U(iovcnt, <=, num_dnodes + 1); + ASSERT3U(csize, >, 0); + ASSERT3U(csize, <, size); + ASSERT3U(csize, <=, DN_MAX_BONUSLEN * iovcnt); + + *srciovp = srciov; + *dstiovp = dstiov; + *cdlen = csize; + + return (iovcnt); +} + +static void +dnode_check_buf(void *buf, size_t size) +{ + dnode_phys_t *dnp = buf; + int i; + size_t iovcnt; + + ASSERT3U(sizeof (dnode_phys_t), ==, (1 << DNODE_SHIFT)); + ASSERT((size & (sizeof (dnode_phys_t) - 1)) == 0); + + /* First count how many bonusbufs we have, there could be none */ + iovcnt = size >> DNODE_SHIFT; + ASSERT3U(iovcnt, <=, DNODES_PER_BLOCK); + + for (i = 0; i < iovcnt; i++) { + /* + * If dn_type is DMU_OT_NODE then bonuslen is 0 + * so there is nothing to do for that that dn_type + * + * We can check bonuslen for zero without byteswapping + * since it won't matter if we have the wrong byte order. + */ + if (dnp->dn_type != DMU_OT_NONE && dnp->dn_bonuslen != 0) { + int off = (dnp->dn_nblkptr - 1) * sizeof (blkptr_t); + size_t len = DN_MAX_BONUSLEN - off; + ASSERT3U(dnp->dn_nblkptr, >, 0); + ASSERT3U(dnp->dn_nblkptr, <=, DN_MAX_NBLKPTR); + ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES); + ASSERT3U(len, <=, DN_MAX_BONUSLEN); + } + dnp++; + } +} + + +/* + * ZIL Log Block encryption. + * Each log block has 1 or more log records. + * The common part of each log record and the zil_trailer_t at the end + * of the log block needs to be left in the clear. The rest should be + * encrypted as it contains the actual transcation data. + * + * TX_WRITE log records are "special" since they have a blkptr_t embedded + * in them that needs to be in the clear, but for a small write instead of + * a block pointer there could be actual data in the log recrod. + * clear for a TX_WRITE. + */ +int +zil_setup_crypto_data(char *src, size_t size, + void *dest, iovec_t **srciovp, iovec_t **dstiovp, size_t *cdlen, + boolean_t encrypting) +{ + char *slrp, *dlrp; + zil_trailer_t *ztp; + int reclen = 0, iovcnt = 0, csize = 0, cryptreclen = 0; + iovec_t *srciov, *dstiov; + int iovcheck = 0; + + bcopy(src, dest, size); + ztp = (zil_trailer_t *)(src + size - sizeof (zil_trailer_t)); + for (slrp = src; slrp < src + ztp->zit_nused; slrp += reclen) { + lr_t *slr = (lr_t *)slrp; + reclen = slr->lrc_reclen; + if (slr->lrc_txtype == TX_WRITE && + reclen != sizeof (lr_write_t)) { + /* We need 2 iovecs for TX_WRITE with embedded data */ + iovcnt++; + } + iovcnt++; + } + if (iovcnt == 0) { + return (0); + } + + /* + * We have iovcnt log records that need encrypting. + * Plus one more for the MAC + */ + iovcheck = iovcnt; + if (encrypting) { + srciov = kmem_alloc(sizeof (iovec_t) * iovcnt, KM_SLEEP); + dstiov = kmem_alloc(sizeof (iovec_t) * (iovcnt + 1), KM_SLEEP); + } else { + srciov = kmem_alloc(sizeof (iovec_t) * (iovcnt + 1), KM_SLEEP); + dstiov = kmem_alloc(sizeof (iovec_t) * iovcnt, KM_SLEEP); + } + + iovcnt = 0; + for (slrp = src, dlrp = dest; slrp < src + ztp->zit_nused; + slrp += reclen, dlrp += reclen) { + lr_t *slr = (lr_t *)slrp; + lr_t *dlr = (lr_t *)dlrp; + reclen = slr->lrc_reclen; + + cryptreclen = reclen - sizeof (lr_t); + if (slr->lrc_txtype == TX_WRITE) { + /* + * This is a TX_WRITE + * The blkptr needs to be in the clear for a claim + * but the rest should be encrypted. + */ + cryptreclen = sizeof (lr_write_t) - sizeof (lr_t) - + sizeof (blkptr_t); + srciov[iovcnt].iov_base = (char *)slr + sizeof (lr_t); + srciov[iovcnt].iov_len = cryptreclen; + dstiov[iovcnt].iov_base = (char *)dlr + sizeof (lr_t); + dstiov[iovcnt].iov_len = cryptreclen; + csize += cryptreclen; + if (reclen != sizeof (lr_write_t)) { + iovcnt++; + cryptreclen = reclen - sizeof (lr_write_t); + srciov[iovcnt].iov_base = (char *)slr + + sizeof (lr_write_t); + srciov[iovcnt].iov_len = cryptreclen; + dstiov[iovcnt].iov_base = (char *)dlr + + sizeof (lr_write_t); + dstiov[iovcnt].iov_len = cryptreclen; + csize += cryptreclen; + } + } else { + srciov[iovcnt].iov_base = (char *)slr + sizeof (lr_t); + srciov[iovcnt].iov_len = cryptreclen; + dstiov[iovcnt].iov_base = (char *)dlr + sizeof (lr_t); + dstiov[iovcnt].iov_len = cryptreclen; + csize += cryptreclen; + } + + iovcnt++; + } + ASSERT(iovcnt != 0); + ASSERT3U(iovcheck, ==, iovcnt); + ASSERT3U(csize, >, 0); + ASSERT3U(csize, <, size); + ASSERT3U(csize, <, ztp->zit_nused); + ASSERT3U(csize, <, ZIL_MAX_BLKSZ); + + *srciovp = srciov; + *dstiovp = dstiov; + *cdlen = csize; + + return (iovcnt); +} + +/* + * zio_encrypt_data + * + * To be called only from the zio pipeline. + */ +int +zio_encrypt_data(int crypt, spa_t *spa, zbookmark_t *bookmark, + uint64_t txg, int type, void *src, uint64_t srcsize, + void **destp, uint64_t *destbufsizep, void **macp) +{ + crypto_data_t plaintext, ciphertext; + crypto_mechanism_t *mech; + zio_crypt_key_t *key; + uio_t srcuio = { 0 }, dstuio = { 0 }; + uint64_t destbufsize; + caddr_t dest, mac; + uint_t iovcnt; + size_t maclen; + int ret; +#ifdef DEBUG + void *srccopy; + + srccopy = kmem_alloc(srcsize, KM_SLEEP); + bcopy(src, srccopy, srcsize); +#endif /* DEBUG */ + + ASSERT(crypt < ZIO_CRYPT_FUNCTIONS); + ASSERT3U(crypt, !=, ZIO_CRYPT_OFF); + ASSERT3U(srcsize, <=, ZIO_CRYPT_MAX_CCM_DATA); + + key = zio_crypt_key_lookup(spa, bookmark->zb_objset, crypt); + if (key == NULL) + return (EAGAIN); + zio_crypt_key_hold(key, FTAG); + + destbufsize = P2ALIGN(srcsize, SPA_MINBLOCKSIZE); + dest = zio_buf_alloc(destbufsize); + *destp = dest; + /* + * Note that the destbufsizep is NOT ciphertext.cd_length because + * for CCM (and similar) mode(s) that includes the MAC, which we + * placed elsewhere. Also for dnode and zil blocks the cd_length is + * shorter than that src/dest buffer we deal with in zio. + */ + *destbufsizep = destbufsize; + if (type == DMU_OT_INTENT_LOG) { + maclen = zio_crypt_table[crypt].ci_zil_maclen; + } else { + maclen = zio_crypt_table[crypt].ci_maclen; + } + mac = kmem_zalloc(maclen, KM_SLEEP); + *macp = mac; + + if (type == DMU_OT_DNODE || type == DMU_OT_INTENT_LOG) { + iovec_t *srciov, *dstiov; + if (type == DMU_OT_DNODE) { + iovcnt = dnode_setup_crypto_data(src, srcsize, dest, + &srciov, &dstiov, &plaintext.cd_length, B_TRUE); + } else if (type == DMU_OT_INTENT_LOG) { + iovcnt = zil_setup_crypto_data(src, srcsize, dest, + &srciov, &dstiov, &plaintext.cd_length, B_TRUE); + } + if (iovcnt == 0) { + zio_crypt_key_release(key, FTAG); + kmem_free(mac, maclen); + *macp = NULL; + return (0); + } + srcuio.uio_iov = srciov; + srcuio.uio_iovcnt = iovcnt; + plaintext.cd_format = CRYPTO_DATA_UIO; + plaintext.cd_offset = 0; + plaintext.cd_uio = &srcuio; + plaintext.cd_miscdata = NULL; + + dstiov[iovcnt].iov_base = mac; + dstiov[iovcnt].iov_len = maclen; + dstuio.uio_iov = dstiov; + dstuio.uio_iovcnt = iovcnt + 1; + ciphertext.cd_length = plaintext.cd_length + maclen; +#ifdef DEBUG + if (type == DMU_OT_DNODE) { + dnode_check_buf(src, srcsize); + dnode_check_buf(dest, srcsize); + } +#endif + } else { + iovec_t dstiov[2]; + + SET_CRYPTO_DATA(plaintext, src, srcsize); + + dstiov[0].iov_base = dest; + dstiov[0].iov_len = destbufsize; + dstiov[1].iov_base = mac; + dstiov[1].iov_len = maclen; + dstuio.uio_iov = dstiov; + dstuio.uio_iovcnt = 2; + ciphertext.cd_length = destbufsize + maclen; + } +#ifdef _KERNEL + srcuio.uio_segflg = dstuio.uio_segflg = UIO_SYSSPACE; +#else + srcuio.uio_segflg = dstuio.uio_segflg = UIO_USERSPACE; +#endif /* _KERNEL */ + ciphertext.cd_format = CRYPTO_DATA_UIO; + ciphertext.cd_offset = 0; + ciphertext.cd_uio = &dstuio; + ciphertext.cd_miscdata = NULL; + + /* + * Can NOT use inplace crypto here otherwise we endup + * encrypting the copy in the ARC. Having encrypted + * data in the ARC is an interesting idea - particularly + * if we want to use crypto keys to enhance access control. + * However that isn't the current goal, and even it it was + * encrypted content in the ARC might not be the best solution. + * Given that we can control which datasets have data in the ARC + * or L2ARC using the primarycache and secondary cache + * properties that is probably the best solution to not storing + * large volumes of data that is encrypted on disk in the clear + * in memory. + */ + mech = zio_crypt_setup_mech(crypt, type, txg, + bookmark, plaintext.cd_length); + ASSERT(mech != NULL); + if (key->zk_ctx_tmpl == NULL) + if (crypto_create_ctx_template(mech, &key->zk_key, + &key->zk_ctx_tmpl, KM_SLEEP) != CRYPTO_SUCCESS) + key->zk_ctx_tmpl = NULL; +retry: + ret = crypto_encrypt(mech, &plaintext, &key->zk_key, + key->zk_ctx_tmpl, &ciphertext, NULL); + if (ret == CRYPTO_OLD_CTX_TEMPLATE) { + if (key->zk_ctx_tmpl != NULL) + crypto_destroy_ctx_template(key->zk_ctx_tmpl); + key->zk_ctx_tmpl = NULL; + goto retry; + } + + switch (ret) { + case CRYPTO_SUCCESS: + ret = 0; + break; + case CRYPTO_BUSY: + goto retry; + case CRYPTO_KEY_HANDLE_INVALID: + case CRYPTO_KEY_NEEDED: + case CRYPTO_KEY_CHANGED: + case CRYPTO_PIN_INCORRECT: + case CRYPTO_PIN_EXPIRED: + case CRYPTO_PIN_LOCKED: + case CRYPTO_USER_NOT_LOGGED_IN: + ret = EAGAIN; + break; + default: + cmn_err(CE_WARN, + "zio_encrypt_data: crypto_encrypt %x\n", ret); + ret = EIO; + } + + zio_crypt_key_release(key, FTAG); + zio_crypt_free_mech(mech); +#if DEBUG + ASSERT3U(bcmp(src, srccopy, srcsize), ==, 0); +#ifdef _KERNEL + if (ret == 0) + ASSERT3U(bcmp(src, dest, srcsize), !=, 0); +#endif /* _KERNEL */ + if (type == DMU_OT_DNODE) { + dnode_check_buf(src, srcsize); + dnode_check_buf(dest, srcsize); + } +#endif /* DEBUG */ + if (type == DMU_OT_DNODE || type == DMU_OT_INTENT_LOG) { + kmem_free(srcuio.uio_iov, sizeof (iovec_t) * srcuio.uio_iovcnt); + kmem_free(dstuio.uio_iov, sizeof (iovec_t) * dstuio.uio_iovcnt); + } + + return (ret); +} + +/* + * zio_decrypt_data + * + * To be called only from the zio pipeline. + */ +int +zio_decrypt_data(int crypt, spa_t *spa, zbookmark_t *bookmark, + uint64_t txg, int type, void *src, uint64_t srcsize, void *mac, + void *dest, uint64_t destsize) +{ + crypto_data_t ciphertext, plaintext; + crypto_mechanism_t *mech; + zio_crypt_key_t *key; + uio_t srcuio = { 0 }, dstuio = { 0 }; + size_t maclen; + uint_t iovcnt; + int ret; + + ASSERT(crypt < ZIO_CRYPT_FUNCTIONS); + ASSERT3U(crypt, !=, ZIO_CRYPT_OFF); + ASSERT3U(destsize, <=, ZIO_CRYPT_MAX_CCM_DATA); + + key = zio_crypt_key_lookup(spa, bookmark->zb_objset, crypt); + if (key == NULL) + return (EAGAIN); + zio_crypt_key_hold(key, FTAG); + +#ifdef _KERNEL + srcuio.uio_segflg = dstuio.uio_segflg = UIO_SYSSPACE; +#else + srcuio.uio_segflg = dstuio.uio_segflg = UIO_USERSPACE; +#endif /* _KERNEL */ + ciphertext.cd_format = CRYPTO_DATA_UIO; + ciphertext.cd_offset = 0; + ciphertext.cd_uio = &srcuio; + ciphertext.cd_miscdata = NULL; + plaintext.cd_format = CRYPTO_DATA_UIO; + plaintext.cd_offset = 0; + plaintext.cd_uio = &dstuio; + plaintext.cd_miscdata = NULL; + + if (type == DMU_OT_INTENT_LOG) { + maclen = zio_crypt_table[crypt].ci_zil_maclen; + } else { + maclen = zio_crypt_table[crypt].ci_maclen; + } + ASSERT(mac != NULL); + if (type == DMU_OT_DNODE || type == DMU_OT_INTENT_LOG) { + iovec_t *srciov, *dstiov; + if (type == DMU_OT_DNODE) { + iovcnt = dnode_setup_crypto_data(src, srcsize, dest, + &srciov, &dstiov, &plaintext.cd_length, B_FALSE); + } else if (type == DMU_OT_INTENT_LOG) { + iovcnt = zil_setup_crypto_data(src, srcsize, dest, + &srciov, &dstiov, &plaintext.cd_length, B_FALSE); + } + if (iovcnt == 0) { + zio_crypt_key_release(key, FTAG); + return (0); + } + dstuio.uio_iovcnt = iovcnt; + dstuio.uio_iov = dstiov; + + ciphertext.cd_length = plaintext.cd_length + maclen; + srcuio.uio_iov = srciov; + srcuio.uio_iovcnt = iovcnt + 1; + srcuio.uio_iov[iovcnt].iov_base = mac; + srcuio.uio_iov[iovcnt].iov_len = maclen; +#ifdef DEBUG + if (type == DMU_OT_DNODE) { + dnode_check_buf(src, srcsize); + dnode_check_buf(dest, srcsize); + } +#endif + } else { + iovec_t srciov[2]; + + srciov[0].iov_base = src; + srciov[0].iov_len = srcsize; + srciov[1].iov_base = mac; + srciov[1].iov_len = maclen; + srcuio.uio_iov = &srciov[0]; + srcuio.uio_iovcnt = 2; + ciphertext.cd_length = srcsize + maclen; + + SET_CRYPTO_DATA(plaintext, dest, destsize); + } + + mech = zio_crypt_setup_mech(crypt, type, txg, + bookmark, ciphertext.cd_length); + ASSERT(mech != NULL); + if (key->zk_ctx_tmpl == NULL) + if (crypto_create_ctx_template(mech, &key->zk_key, + &key->zk_ctx_tmpl, KM_SLEEP) != CRYPTO_SUCCESS) + key->zk_ctx_tmpl = NULL; +retry: + ret = crypto_decrypt(mech, &ciphertext, &key->zk_key, key->zk_ctx_tmpl, + &plaintext, NULL); + if (ret == CRYPTO_OLD_CTX_TEMPLATE) { + if (key->zk_ctx_tmpl != NULL) + crypto_destroy_ctx_template(key->zk_ctx_tmpl); + key->zk_ctx_tmpl = NULL; + goto retry; + } + + switch (ret) { + case CRYPTO_SUCCESS: + ret = 0; + break; + case CRYPTO_BUSY: + goto retry; + case CRYPTO_KEY_HANDLE_INVALID: + case CRYPTO_KEY_NEEDED: + case CRYPTO_KEY_CHANGED: + case CRYPTO_PIN_INCORRECT: + case CRYPTO_PIN_EXPIRED: + case CRYPTO_PIN_LOCKED: + case CRYPTO_USER_NOT_LOGGED_IN: { + ret = EAGAIN; + if (zio_return_cipher_text) { + bcopy(src, dest, srcsize); + } + break; + } + case CRYPTO_DATA_LEN_RANGE: + cmn_err(CE_PANIC, "zio_decrypt_data crypto_decrypt failed:" + "CRYPTO_DATA_LEN_RANGE"); + break; + case CRYPTO_INVALID_MAC: + ret = ECKSUM; + break; + default: + cmn_err(CE_WARN, "zio_decrypt_data crypto_decrypt: %x\n", ret); + ret = EIO; + } + + zio_crypt_key_release(key, FTAG); + zio_crypt_free_mech(mech); +#ifdef DEBUG + if (type == DMU_OT_DNODE) { + dnode_check_buf(src, srcsize); + dnode_check_buf(dest, srcsize); + } +#endif + if (type == DMU_OT_DNODE || type == DMU_OT_INTENT_LOG) { + kmem_free(srcuio.uio_iov, sizeof (iovec_t) * srcuio.uio_iovcnt); + kmem_free(dstuio.uio_iov, sizeof (iovec_t) * dstuio.uio_iovcnt); + } + return (ret); +} + +/* + * L2ARC special case encryption. + * + * Since the L2ARC is currently written to disk by zio_phys_write + * it bypasses the normal calls to zio_write_encrypt. So it has to + * be special cased for now. + * + * Note that the L2ARC uses AES in CBC mode. Normally that wouldn't + * be a safe thing to do for disk encryption (which is why we use CCM). + * However there is an in memory checksum held and verified for all + * data written to vdevs by the L2ARC. There is a risk that if someone + * can modify the on disk blocks written by the L2ARC feeder thread + * and modify the in memory checksums they can cause modified ciphertext + * to be fed for decryption that will verify. However that risk isn't + * worth protecting against since doing so requires all privilege and + * there are much easier ways to exploit the ZFS encryption in that case. + */ +static int +l2arc_spa_key_gen(spa_t *spa) +{ + char *genkeybuf; + size_t genkeylen = ZPOOL_CRYPTO_KEK_LEN; + int ret; + zio_crypt_key_t *l2arc_key; + + ASSERT(spa->spa_keystore->sk_l2arc_key == NULL); + + genkeybuf = kmem_alloc(genkeylen, KM_SLEEP); + + ret = random_get_bytes((uchar_t *)genkeybuf, genkeylen); + if (ret != CRYPTO_SUCCESS) { + bzero(genkeybuf, genkeylen); + kmem_free(genkeybuf, genkeylen); + return (ret); + } + + l2arc_key = zio_crypt_key_allocate(); + l2arc_key->zk_key.ck_format = CRYPTO_KEY_RAW; + l2arc_key->zk_key.ck_data = genkeybuf; + l2arc_key->zk_key.ck_length = (genkeylen * 8); + + rw_enter(&spa->spa_keystore->sk_lock, RW_WRITER); + spa->spa_keystore->sk_l2arc_key = l2arc_key; + rw_exit(&spa->spa_keystore->sk_lock); + + return (CRYPTO_SUCCESS); +} + +boolean_t +l2arc_encrypt_buf(spa_t *spa, dva_t *dva, + const void* ibuf, size_t ibufsz, void *obuf) +{ + crypto_mechanism_t mech; + crypto_data_t ciphertext, plaintext; + crypto_key_t *key; + size_t obufsz = ibufsz; + int ret = CRYPTO_SUCCESS; + + ASSERT3U(ibufsz, >=, 0); + ASSERT(ibufsz % 16 == 0); + + if (spa->spa_keystore->sk_l2arc_key == NULL) { + if (l2arc_spa_key_gen(spa) != CRYPTO_SUCCESS) + return (FALSE); + } + ASSERT(spa->spa_keystore->sk_l2arc_key != NULL); + key = &spa->spa_keystore->sk_l2arc_key->zk_key; + + SET_CRYPTO_DATA(plaintext, (char *)ibuf, ibufsz); + SET_CRYPTO_DATA(ciphertext, (char *)obuf, obufsz); + + mech.cm_type = crypto_mech2id(SUN_CKM_AES_CBC); + if (mech.cm_type == CRYPTO_MECH_INVALID) + return (FALSE); + mech.cm_param = (char *)dva; + mech.cm_param_len = sizeof (dva_t); + ASSERT3U(mech.cm_param_len, ==, 16); + ret = crypto_encrypt(&mech, &plaintext, key, NULL, &ciphertext, NULL); + + ASSERT3U(ibufsz, ==, obufsz); + + return (ret == CRYPTO_SUCCESS); +} + +boolean_t +l2arc_decrypt_buf(spa_t *spa, dva_t *dva, void* buf, size_t bufsz) +{ + crypto_mechanism_t mech; + crypto_data_t cdt; + int ret = CRYPTO_SUCCESS; + crypto_key_t *key; + + if (spa->spa_keystore->sk_l2arc_key == NULL) { + return (FALSE); + } + key = &spa->spa_keystore->sk_l2arc_key->zk_key; + + ASSERT3U(bufsz, >=, 0); + + SET_CRYPTO_DATA(cdt, buf, bufsz); + + mech.cm_type = crypto_mech2id(SUN_CKM_AES_CBC); + if (mech.cm_type == CRYPTO_MECH_INVALID) + return (FALSE); + mech.cm_param = (char *)dva; + mech.cm_param_len = sizeof (dva_t); + ASSERT3U(mech.cm_param_len, ==, 16); + ret = crypto_decrypt(&mech, &cdt, key, NULL, NULL, NULL); + + return (ret == CRYPTO_SUCCESS); +} --- old/usr/src/uts/common/fs/zfs/zvol.c Tue Feb 3 13:19:02 2009 +++ new/usr/src/uts/common/fs/zfs/zvol.c Tue Feb 3 13:19:01 2009 @@ -604,6 +604,12 @@ zil_close(zv->zv_zilog); zv->zv_zilog = NULL; + + if (dmu_objset_evict_dbufs(zv->zv_objset)) { + txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); + (void) dmu_objset_evict_dbufs(zv->zv_objset); + } + dmu_objset_close(zv->zv_objset); zv->zv_objset = NULL; avl_destroy(&zv->zv_znode.z_range_avl); @@ -973,6 +979,7 @@ uint32_t blocksize = zv->zv_volblocksize; zilog_t *zilog = zv->zv_zilog; lr_write_t *lr; + dmu_object_info_t doi; if (zilog->zl_replay) { dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); @@ -981,12 +988,18 @@ return; } + VERIFY(dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi) == 0); while (len) { ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize)); itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr)); - itx->itx_wr_state = - len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY; + if (len > zvol_immediate_write_sz && + doi.doi_crypt == ZIO_CRYPT_OFF || + doi.doi_crypt == ZIO_CRYPT_INHERIT) { + itx->itx_wr_state = WR_INDIRECT; + } else { + itx->itx_wr_state = WR_NEED_COPY; + } itx->itx_private = zv; lr = (lr_write_t *)&itx->itx_lr; lr->lr_foid = ZVOL_OBJ; --- old/usr/src/uts/common/sys/crypto/api.h Tue Feb 3 13:19:03 2009 +++ new/usr/src/uts/common/sys/crypto/api.h Tue Feb 3 13:19:02 2009 @@ -33,9 +33,6 @@ #include #include -#ifdef _KERNEL - - typedef long crypto_req_id_t; typedef void *crypto_bc_t; typedef void *crypto_context_t; @@ -389,8 +386,6 @@ extern int crypto_bufcall(crypto_bc_t bc, void (*func)(void *arg), void *arg); extern int crypto_unbufcall(crypto_bc_t bc); -#endif /* _KERNEL */ - /* * To obtain the list of key size ranges supported by a mechanism. */ @@ -408,7 +403,6 @@ crypto_mech_usage_t mi_usage; } crypto_mechanism_info_t; -#ifdef _KERNEL #ifdef _SYSCALL32 typedef struct crypto_mechanism_info32 { @@ -419,7 +413,6 @@ } crypto_mechanism_info32_t; #endif /* _SYSCALL32 */ -#endif /* _KERNEL */ extern int crypto_get_all_mech_info(crypto_mech_type_t, crypto_mechanism_info_t **, uint_t *, int); --- old/usr/src/uts/common/sys/fm/fs/zfs.h Tue Feb 3 13:19:05 2009 +++ new/usr/src/uts/common/sys/fm/fs/zfs.h Tue Feb 3 13:19:04 2009 @@ -35,6 +35,7 @@ #define ZFS_ERROR_CLASS "fs.zfs" #define FM_EREPORT_ZFS_CHECKSUM "checksum" +#define FM_EREPORT_ZFS_CRYPTO_KEY_UNAVAIL "crypto.key_unavailable" #define FM_EREPORT_ZFS_IO "io" #define FM_EREPORT_ZFS_DATA "data" #define FM_EREPORT_ZFS_POOL "zpool" --- old/usr/src/uts/common/sys/fs/zfs.h Tue Feb 3 13:19:06 2009 +++ new/usr/src/uts/common/sys/fs/zfs.h Tue Feb 3 13:19:05 2009 @@ -105,6 +105,12 @@ ZFS_PROP_USEDDS, ZFS_PROP_USEDCHILD, ZFS_PROP_USEDREFRESERV, + ZFS_PROP_ENCRYPTION, + ZFS_PROP_KEYSOURCE, + ZFS_PROP_WRAPPEDKEY, /* not exposed to the user */ + ZFS_PROP_KEYSCOPE, + ZFS_PROP_KEYSTATUS, + ZFS_PROP_SALT, /* not exposed to the user */ ZFS_NUM_PROPS } zfs_prop_t; @@ -130,6 +136,10 @@ ZPOOL_PROP_CACHEFILE, ZPOOL_PROP_FAILUREMODE, ZPOOL_PROP_LISTSNAPS, + ZPOOL_PROP_KEYSOURCE, + ZPOOL_PROP_KEYCHECK, /* not exposed to the user */ + ZPOOL_PROP_KEYSTATUS, + ZPOOL_PROP_SALT, /* not exposed to the user */ ZPOOL_NUM_PROPS } zpool_prop_t; @@ -250,14 +260,17 @@ #define SPA_VERSION_12 12ULL #define SPA_VERSION_13 13ULL #define SPA_VERSION_14 14ULL +#define SPA_VERSION_15 15ULL + /* * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk * format change. Go to usr/src/grub/grub-0.95/stage2/{zfs-include/, fsys_zfs*}, * and do the appropriate changes. */ -#define SPA_VERSION SPA_VERSION_14 -#define SPA_VERSION_STRING "14" +#define SPA_VERSION SPA_VERSION_15 +#define SPA_VERSION_STRING "15" + /* * Symbolic names for the changes that caused a SPA_VERSION switch. * Used in the code when checking for presence or absence of a feature. @@ -292,7 +305,10 @@ #define SPA_VERSION_SNAP_PROPS SPA_VERSION_12 #define SPA_VERSION_USED_BREAKDOWN SPA_VERSION_13 #define SPA_VERSION_PASSTHROUGH_X SPA_VERSION_14 +#define SPA_VERSION_CRYPTO SPA_VERSION_15 + + /* * ZPL version - rev'd whenever an incompatible on-disk format change * occurs. This is independent of SPA/DMU/ZAP versioning. You must @@ -560,7 +576,8 @@ ZFS_IOC_GET_FSACL, ZFS_IOC_ISCSI_PERM_CHECK, ZFS_IOC_SHARE, - ZFS_IOC_INHERIT_PROP + ZFS_IOC_INHERIT_PROP, + ZFS_IOC_CRYPTO } zfs_ioc_t; /* @@ -673,6 +690,7 @@ LOG_DS_REFQUOTA, LOG_DS_REFRESERV, LOG_POOL_SCRUB_DONE, + LOG_DS_CRYPTO_KEY_CREATE, LOG_END } history_internal_events_t; --- old/usr/src/uts/intel/zfs/Makefile Tue Feb 3 13:19:08 2009 +++ new/usr/src/uts/intel/zfs/Makefile Tue Feb 3 13:19:07 2009 @@ -60,7 +60,7 @@ # Overrides and depends_on # MODSTUBS_DIR = $(OBJS_DIR) -LDFLAGS += -dy -Nfs/specfs -Ncrypto/swrand -Nmisc/idmap +LDFLAGS += -dy -Nfs/specfs -Ncrypto/swrand -Ncrypto/sha2 -Nmisc/idmap INC_PATH += -I$(UTSBASE)/common/fs/zfs INC_PATH += -I$(SRC)/common --- old/usr/src/uts/sparc/zfs/Makefile Tue Feb 3 13:19:09 2009 +++ new/usr/src/uts/sparc/zfs/Makefile Tue Feb 3 13:19:08 2009 @@ -60,7 +60,7 @@ # Overrides and depends_on # MODSTUBS_DIR = $(OBJS_DIR) -LDFLAGS += -dy -Nfs/specfs -Ncrypto/swrand -Nmisc/idmap +LDFLAGS += -dy -Nfs/specfs -Ncrypto/swrand -Ncrypto/sha2 -Nmisc/idmap INC_PATH += -I$(UTSBASE)/common/fs/zfs INC_PATH += -I$(SRC)/common