Print this page
expandable RAID-Z
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/zfs/vdev_file.c
+++ new/usr/src/uts/common/fs/zfs/vdev_file.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 #pragma ident "%Z%%M% %I% %E% SMI"
27 27
28 28 #include <sys/zfs_context.h>
29 29 #include <sys/spa.h>
30 30 #include <sys/vdev_file.h>
31 31 #include <sys/vdev_impl.h>
32 32 #include <sys/zio.h>
33 33 #include <sys/fs/zfs.h>
34 34
35 35 /*
36 36 * Virtual device vector for files.
37 37 */
38 38
39 39 static int
40 40 vdev_file_open_common(vdev_t *vd)
41 41 {
42 42 vdev_file_t *vf;
43 43 vnode_t *vp;
44 44 int error;
45 45
46 46 /*
47 47 * We must have a pathname, and it must be absolute.
48 48 */
49 49 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
50 50 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
51 51 return (EINVAL);
52 52 }
53 53
54 54 vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
55 55
56 56 /*
57 57 * We always open the files from the root of the global zone, even if
58 58 * we're in a local zone. If the user has gotten to this point, the
59 59 * administrator has already decided that the pool should be available
60 60 * to local zone users, so the underlying devices should be as well.
61 61 */
62 62 ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
63 63 error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE,
64 64 spa_mode | FOFFMAX, 0, &vp, 0, 0, rootdir, -1);
65 65
66 66 if (error) {
67 67 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
68 68 return (error);
69 69 }
70 70
71 71 vf->vf_vnode = vp;
72 72
73 73 #ifdef _KERNEL
74 74 /*
75 75 * Make sure it's a regular file.
76 76 */
77 77 if (vp->v_type != VREG) {
78 78 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
79 79 return (ENODEV);
80 80 }
81 81 #endif
82 82
83 83 return (0);
84 84 }
85 85
86 86 static int
87 87 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
88 88 {
89 89 vdev_file_t *vf;
90 90 vattr_t vattr;
91 91 int error;
92 92
93 93 if ((error = vdev_file_open_common(vd)) != 0)
94 94 return (error);
95 95
96 96 vf = vd->vdev_tsd;
97 97
98 98 /*
99 99 * Determine the physical size of the file.
100 100 */
101 101 vattr.va_mask = AT_SIZE;
102 102 error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL);
103 103 if (error) {
104 104 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
105 105 return (error);
106 106 }
107 107
108 108 *psize = vattr.va_size;
109 109 *ashift = SPA_MINBLOCKSHIFT;
110 110
111 111 return (0);
112 112 }
113 113
114 114 static void
115 115 vdev_file_close(vdev_t *vd)
116 116 {
117 117 vdev_file_t *vf = vd->vdev_tsd;
118 118
119 119 if (vf == NULL)
120 120 return;
121 121
122 122 if (vf->vf_vnode != NULL) {
123 123 (void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
124 124 (void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred, NULL);
125 125 VN_RELE(vf->vf_vnode);
126 126 }
127 127
128 128 kmem_free(vf, sizeof (vdev_file_t));
129 129 vd->vdev_tsd = NULL;
130 130 }
131 131
132 132 static int
133 133 vdev_file_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset,
134 134 enum uio_rw rw)
135 135 {
136 136 vdev_file_t *vf = vd->vdev_tsd;
137 137 ssize_t resid;
138 138 int error = 0;
139 139
140 140 if (vd == NULL || vf == NULL || vf->vf_vnode == NULL)
141 141 return (EINVAL);
142 142
143 143 ASSERT(rw == UIO_READ || rw == UIO_WRITE);
144 144
145 145 error = vn_rdwr(rw, vf->vf_vnode, data, size, offset, UIO_SYSSPACE,
146 146 0, RLIM64_INFINITY, kcred, &resid);
147 147 if (error || resid != 0)
148 148 return (EIO);
149 149 return (0);
150 150 }
151 151
152 152 /*
153 153 * Determine if the underlying device is accessible by reading and writing
154 154 * to a known location. We must be able to do this during syncing context
155 155 * and thus we cannot set the vdev state directly.
156 156 */
157 157 static int
158 158 vdev_file_probe(vdev_t *vd)
159 159 {
160 160 vdev_t *nvd;
161 161 char *vl_boot;
162 162 uint64_t offset;
163 163 int l, error = 0, retries = 0;
164 164
165 165 if (vd == NULL)
166 166 return (EINVAL);
167 167
168 168 /* Hijack the current vdev */
169 169 nvd = vd;
170 170
171 171 /*
172 172 * Pick a random label to rewrite.
173 173 */
174 174 l = spa_get_random(VDEV_LABELS);
175 175 ASSERT(l < VDEV_LABELS);
176 176
177 177 offset = vdev_label_offset(vd->vdev_psize, l,
178 178 offsetof(vdev_label_t, vl_boot_header));
179 179
180 180 vl_boot = kmem_alloc(VDEV_BOOT_HEADER_SIZE, KM_SLEEP);
181 181
182 182 while ((error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE,
183 183 offset, UIO_READ)) != 0 && retries == 0) {
184 184
185 185 /*
186 186 * If we failed with the vdev that was passed in then
187 187 * try allocating a new one and try again.
188 188 */
189 189 nvd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP);
190 190 if (vd->vdev_path)
191 191 nvd->vdev_path = spa_strdup(vd->vdev_path);
192 192 retries++;
193 193
194 194 error = vdev_file_open_common(nvd);
195 195 if (error)
196 196 break;
197 197 }
198 198
199 199 if ((spa_mode & FWRITE) && !error) {
200 200 error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE,
201 201 offset, UIO_WRITE);
202 202 }
203 203
204 204 if (retries) {
205 205 vdev_file_close(nvd);
206 206 if (nvd->vdev_path)
207 207 spa_strfree(nvd->vdev_path);
208 208 kmem_free(nvd, sizeof (vdev_t));
209 209 }
210 210 kmem_free(vl_boot, VDEV_BOOT_HEADER_SIZE);
211 211
212 212 if (!error)
213 213 vd->vdev_is_failing = B_FALSE;
214 214
215 215 return (error);
216 216 }
217 217
218 218 static int
219 219 vdev_file_io_start(zio_t *zio)
220 220 {
221 221 vdev_t *vd = zio->io_vd;
222 222 vdev_file_t *vf = vd->vdev_tsd;
223 223 ssize_t resid;
224 224 int error;
225 225
226 226 if (zio->io_type == ZIO_TYPE_IOCTL) {
227 227 zio_vdev_io_bypass(zio);
228 228
229 229 /* XXPOLICY */
230 230 if (!vdev_readable(vd)) {
231 231 zio->io_error = ENXIO;
232 232 return (ZIO_PIPELINE_CONTINUE);
233 233 }
234 234
235 235 switch (zio->io_cmd) {
236 236 case DKIOCFLUSHWRITECACHE:
237 237 zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
238 238 kcred, NULL);
239 239 dprintf("fsync(%s) = %d\n", vdev_description(vd),
240 240 zio->io_error);
241 241 break;
242 242 default:
243 243 zio->io_error = ENOTSUP;
244 244 }
245 245
246 246 return (ZIO_PIPELINE_CONTINUE);
247 247 }
248 248
249 249 /*
250 250 * In the kernel, don't bother double-caching, but in userland,
251 251 * we want to test the vdev_cache code.
252 252 */
253 253 #ifndef _KERNEL
254 254 if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
255 255 return (ZIO_PIPELINE_STOP);
256 256 #endif
257 257
258 258 if ((zio = vdev_queue_io(zio)) == NULL)
259 259 return (ZIO_PIPELINE_STOP);
260 260
261 261 /* XXPOLICY */
262 262 if (zio->io_type == ZIO_TYPE_WRITE)
263 263 error = vdev_writeable(vd) ? vdev_error_inject(vd, zio) : ENXIO;
264 264 else
265 265 error = vdev_readable(vd) ? vdev_error_inject(vd, zio) : ENXIO;
266 266 error = (vd->vdev_remove_wanted || vd->vdev_is_failing) ? ENXIO : error;
267 267 if (error) {
268 268 zio->io_error = error;
269 269 zio_interrupt(zio);
270 270 return (ZIO_PIPELINE_STOP);
271 271 }
272 272
273 273 zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
274 274 UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
275 275 zio->io_size, zio->io_offset, UIO_SYSSPACE,
276 276 0, RLIM64_INFINITY, kcred, &resid);
277 277
278 278 if (resid != 0 && zio->io_error == 0)
279 279 zio->io_error = ENOSPC;
280 280
281 281 zio_interrupt(zio);
282 282
283 283 return (ZIO_PIPELINE_STOP);
284 284 }
285 285
286 286 static int
287 287 vdev_file_io_done(zio_t *zio)
288 288 {
289 289 vdev_t *vd = zio->io_vd;
290 290
291 291 if (zio_injection_enabled && zio->io_error == 0)
292 292 zio->io_error = zio_handle_device_injection(vd, EIO);
293 293
294 294 /*
295 295 * If an error has been encountered then attempt to probe the device
296 296 * to determine if it's still accessible.
297 297 */
298 298 if (zio->io_error == EIO && vdev_probe(vd) != 0)
299 299 vd->vdev_is_failing = B_TRUE;
300 300
301 301 vdev_queue_io_done(zio);
302 302
303 303 #ifndef _KERNEL
304 304 if (zio->io_type == ZIO_TYPE_WRITE)
305 305 vdev_cache_write(zio);
306 306 #endif
307 307
308 308 return (ZIO_PIPELINE_CONTINUE);
|
↓ open down ↓ |
308 lines elided |
↑ open up ↑ |
309 309 }
310 310
311 311 vdev_ops_t vdev_file_ops = {
312 312 vdev_file_open,
313 313 vdev_file_close,
314 314 vdev_file_probe,
315 315 vdev_default_asize,
316 316 vdev_file_io_start,
317 317 vdev_file_io_done,
318 318 NULL,
319 + NULL,
319 320 VDEV_TYPE_FILE, /* name of this vdev type */
320 321 B_TRUE /* leaf vdev */
321 322 };
322 323
323 324 /*
324 325 * From userland we access disks just like files.
325 326 */
326 327 #ifndef _KERNEL
327 328
328 329 vdev_ops_t vdev_disk_ops = {
329 330 vdev_file_open,
330 331 vdev_file_close,
331 332 vdev_file_probe,
332 333 vdev_default_asize,
333 334 vdev_file_io_start,
334 335 vdev_file_io_done,
335 336 NULL,
336 337 VDEV_TYPE_DISK, /* name of this vdev type */
337 338 B_TRUE /* leaf vdev */
338 339 };
339 340
340 341 #endif
|
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX