diff --git a/config/kernel-bio-args.m4 b/config/kernel-bio-args.m4 deleted file mode 100644 index 0e652dbc40..0000000000 --- a/config/kernel-bio-args.m4 +++ /dev/null @@ -1,24 +0,0 @@ -dnl # -dnl # 2.6.x API change -dnl # bio_end_io_t uses 2 args (size was dropped from prototype) -dnl # -AC_DEFUN([ZFS_AC_CONFIG_KERNEL_BIO_ARGS], - [AC_MSG_CHECKING([whether bio_end_io_t wants 2 args]) - tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" - ZFS_LINUX_TRY_COMPILE([ - #include - ],[ - void (*wanted_end_io)(struct bio *, int) = NULL; - bio_end_io_t *local_end_io; - - local_end_io = wanted_end_io; - ],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_2ARGS_BIO_END_IO_T, 1, - [bio_end_io_t wants 2 args]) - ],[ - AC_MSG_RESULT(no) - ]) - EXTRA_KCFLAGS="$tmp_flags" -]) diff --git a/config/kernel-bio-empty-barrier.m4 b/config/kernel-bio-empty-barrier.m4 new file mode 100644 index 0000000000..99549fe24a --- /dev/null +++ b/config/kernel-bio-empty-barrier.m4 @@ -0,0 +1,20 @@ +dnl # +dnl # 2.6.24 API change +dnl # Empty write barriers are now supported and we should use them. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BIO_EMPTY_BARRIER], [ + AC_MSG_CHECKING([whether bio_empty_barrier() is defined]) + EXTRA_KCFLAGS="-Werror" + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + struct bio bio; + (void)bio_empty_barrier(&bio); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_EMPTY_BARRIER, 1, + [bio_empy_barrier() is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-bio-end-io-t-args.m4 b/config/kernel-bio-end-io-t-args.m4 new file mode 100644 index 0000000000..ea69bdba56 --- /dev/null +++ b/config/kernel-bio-end-io-t-args.m4 @@ -0,0 +1,29 @@ +dnl # +dnl # 2.6.24 API change +dnl # Size argument dropped from bio_endio and bi_end_io, because the +dnl # bi_end_io is only called once now when the request is complete. +dnl # There is no longer any need for a size argument. This also means +dnl # that partial IO's are no longer possibe and the end_io callback +dnl # should not check bi->bi_size. Finally, the return type was updated +dnl # to void. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BIO_END_IO_T_ARGS], [ + AC_MSG_CHECKING([whether bio_end_io_t wants 2 args]) + tmp_flags="$EXTRA_KCFLAGS" + EXTRA_KCFLAGS="-Werror" + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + void (*wanted_end_io)(struct bio *, int) = NULL; + bio_end_io_t *local_end_io; + + local_end_io = wanted_end_io; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_2ARGS_BIO_END_IO_T, 1, + [bio_end_io_t wants 2 args]) + ],[ + AC_MSG_RESULT(no) + ]) + EXTRA_KCFLAGS="$tmp_flags" +]) diff --git a/config/kernel-bio-rw-syncio.m4 b/config/kernel-bio-rw-syncio.m4 new file mode 100644 index 0000000000..93a32e659f --- /dev/null +++ b/config/kernel-bio-rw-syncio.m4 @@ -0,0 +1,18 @@ +dnl # +dnl # 2.6.29 API change +dnl # BIO_RW_SYNC renamed to BIO_RW_SYNCIO +dnl # +AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNCIO], [ + AC_MSG_CHECKING([whether BIO_RW_SYNCIO is defined]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + int flags = BIO_RW_SYNCIO; + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_BIO_RW_SYNCIO, 1, + [BIO_RW_SYNCIO is defined]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-invalidate-bdev-args.m4 b/config/kernel-invalidate-bdev-args.m4 new file mode 100644 index 0000000000..c768f6275f --- /dev/null +++ b/config/kernel-invalidate-bdev-args.m4 @@ -0,0 +1,19 @@ +dnl # +dnl # 2.6.22 API change +dnl # Unused destroy_dirty_buffers arg removed from prototype. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS], [ + AC_MSG_CHECKING([whether invalidate_bdev() wants 1 arg]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + struct block_device *bdev; + invalidate_bdev(bdev); + ],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_1ARG_INVALIDATE_BDEV, 1, + [invalidate_bdev() wants 1 arg]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/config/kernel-open-bdev-exclusive.m4 b/config/kernel-open-bdev-exclusive.m4 new file mode 100644 index 0000000000..734b2134cc --- /dev/null +++ b/config/kernel-open-bdev-exclusive.m4 @@ -0,0 +1,12 @@ +dnl # +dnl # 2.6.28 API change +dnl # open/close_bdev_excl() renamed to open/close_bdev_exclusive() +dnl # +AC_DEFUN([ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE], [ + ZFS_CHECK_SYMBOL_EXPORT( + [open_bdev_exclusive], + [fs/block_dev.c], + [AC_DEFINE(HAVE_OPEN_BDEV_EXCLUSIVE, 1, + [open_bdev_exclusive() is available])], + []) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index d79ccc4d6d..521e69f27b 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -4,7 +4,11 @@ dnl # AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL ZFS_AC_SPL - ZFS_AC_CONFIG_KERNEL_BIO_ARGS + ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE + ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS + ZFS_AC_KERNEL_BIO_END_IO_T_ARGS + ZFS_AC_KERNEL_BIO_RW_SYNCIO + ZFS_AC_KERNEL_BIO_EMPTY_BARRIER dnl # Kernel build make options dnl # KERNELMAKE_PARAMS="V=1" # Enable verbose module build diff --git a/module/zfs/include/sys/vdev_disk.h b/module/zfs/include/sys/vdev_disk.h index 520db0103d..06063d2c3d 100644 --- a/module/zfs/include/sys/vdev_disk.h +++ b/module/zfs/include/sys/vdev_disk.h @@ -10,15 +10,51 @@ extern "C" { #include #include #include +#include typedef struct vdev_disk { - ddi_devid_t vd_devid; - char *vd_minor; - ldi_handle_t vd_lh; + ddi_devid_t vd_devid; + char *vd_minor; + struct block_device *vd_bdev; } vdev_disk_t; -extern int vdev_disk_physio(ldi_handle_t, caddr_t, size_t, uint64_t, int); +extern int vdev_disk_physio(struct block_device *, caddr_t, + size_t, uint64_t, int); extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **); + +/* 2.6.24 API change */ +#ifdef HAVE_2ARGS_BIO_END_IO_T +# define BIO_END_IO_PROTO(fn, x, y, z) static void fn(struct bio *x, int z) +# define BIO_END_IO_RETURN(rc) return +#else +# define BIO_END_IO_PROTO(fn, x, y, z) static int fn(struct bio *x, \ + unsigned int y, int z) +# define BIO_END_IO_RETURN(rc) return rc +#endif /* HAVE_2ARGS_BIO_END_IO_T */ + +/* 2.6.29 API change */ +#ifdef HAVE_BIO_RW_SYNCIO +# define DIO_RW_SYNCIO BIO_RW_SYNCIO +#else +# define DIO_RW_SYNCIO BIO_RW_SYNC +#endif /* HAVE_BIO_RW_SYNCIO */ + +/* 2.6.28 API change */ +#ifdef HAVE_OPEN_BDEV_EXCLUSIVE +# define vdev_bdev_open(path, md, hld) open_bdev_exclusive(path, md, hld) +# define vdev_bdev_close(bdev, md) close_bdev_exclusive(bdev, md) +#else +# define vdev_bdev_open(path, md, hld) open_bdev_excl(path, md, hld) +# define vdev_bdev_close(bdev, md) close_bdev_excl(bdev) +#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */ + +/* 2.6.22 API change */ +#ifdef HAVE_1ARG_INVALIDATE_BDEV +# define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev) +#else +# define vdev_bdev_invalidate(bdev) invalidate_bdev(bdev, 1) +#endif /* HAVE_1ARG_INVALIDATE_BDEV */ + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 56357addb4..442ab79dc8 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -30,7 +30,6 @@ #include #include #include -#include /* * Virtual device vector for disks. @@ -46,58 +45,90 @@ typedef struct dio_request { struct bio *dr_bio[0]; /* Attached bio's */ } dio_request_t; -static int -vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) + +#ifdef HAVE_OPEN_BDEV_EXCLUSIVE +static fmode_t +vdev_bdev_mode(int smode) { - struct block_device *vd_lh; - vdev_disk_t *dvd; + fmode_t mode = 0; + + ASSERT3S(smode & (FREAD | FWRITE), !=, 0); + + if (smode & FREAD) + mode |= FMODE_READ; + + if (smode & FWRITE) + mode |= FMODE_WRITE; + + return mode; +} +#else +static int +vdev_bdev_mode(int smode) +{ + int mode = 0; + + ASSERT3S(smode & (FREAD | FWRITE), !=, 0); + + if ((smode & FREAD) && !(smode & FWRITE)) + mode = MS_RDONLY; + + return mode; +} +#endif /* HAVE_OPEN_BDEV_EXCLUSIVE */ + +static int +vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) +{ + struct block_device *bdev; + vdev_disk_t *vd; + int mode; /* Must have a pathname and it must be absolute. */ - if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { - vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; + if (v->vdev_path == NULL || v->vdev_path[0] != '/') { + v->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; return EINVAL; } - dvd = kmem_zalloc(sizeof(vdev_disk_t), KM_SLEEP); - if (dvd == NULL) + vd = kmem_zalloc(sizeof(vdev_disk_t), KM_SLEEP); + if (vd == NULL) return ENOMEM; - /* XXX: Since we do not have devid support like Solaris we + /* + * XXX: Since we do not have devid support like Solaris we * currently can't be as clever about opening the right device. * For now we will simply open the device name provided and * fail when it doesn't exist. If your devices get reordered * your going to be screwed, use udev for now to prevent this. - * - * XXX: mode here could be the global spa_mode with a little - * munging of the flags to make then more agreeable to linux. - * However, simply passing a 0 for now gets us W/R behavior. */ - vd_lh = open_bdev_excl(vd->vdev_path, 0, dvd); - if (IS_ERR(vd_lh)) { - kmem_free(dvd, sizeof(vdev_disk_t)); - return -PTR_ERR(vd_lh); + mode = spa_mode(v->vdev_spa); + bdev = vdev_bdev_open(v->vdev_path, vdev_bdev_mode(mode), vd); + if (IS_ERR(bdev)) { + kmem_free(vd, sizeof(vdev_disk_t)); + return -PTR_ERR(bdev); } - /* XXX: Long term validate stored dvd->vd_devid with a unique + /* + * XXX: Long term validate stored vd->vd_devid with a unique * identifier read from the disk, likely EFI support. */ - vd->vdev_tsd = dvd; - dvd->vd_lh = vd_lh; + v->vdev_tsd = vd; + vd->vd_bdev = bdev; - /* Check if this is a whole device. When vd_lh->bd_contains == - * vd_lh we have a whole device and not simply a partition. */ - vd->vdev_wholedisk = !!(vd_lh->bd_contains == vd_lh); + /* Check if this is a whole device. When bdev->bd_contains == + * bdev we have a whole device and not simply a partition. */ + v->vdev_wholedisk = !!(bdev->bd_contains == bdev); /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ - vd->vdev_nowritecache = B_FALSE; + v->vdev_nowritecache = B_FALSE; /* Determine the actual size of the device (in bytes) * * XXX: SECTOR_SIZE is defined to 512b which may not be true for * your device, we must use the actual hardware sector size. */ - *psize = get_capacity(vd_lh->bd_disk) * SECTOR_SIZE; + *psize = get_capacity(bdev->bd_disk) * SECTOR_SIZE; /* Based on the minimum sector size set the block size */ *ashift = highbit(MAX(SECTOR_SIZE, SPA_MINBLOCKSIZE)) - 1; @@ -106,27 +137,22 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift) } static void -vdev_disk_close(vdev_t *vd) +vdev_disk_close(vdev_t *v) { - vdev_disk_t *dvd = vd->vdev_tsd; + vdev_disk_t *vd = v->vdev_tsd; - if (dvd == NULL) + if (vd == NULL) return; - if (dvd->vd_lh != NULL) - close_bdev_excl(dvd->vd_lh); + if (vd->vd_bdev != NULL) + vdev_bdev_close(vd->vd_bdev, + vdev_bdev_mode(spa_mode(v->vdev_spa))); - kmem_free(dvd, sizeof(vdev_disk_t)); - vd->vdev_tsd = NULL; + kmem_free(vd, sizeof(vdev_disk_t)); + v->vdev_tsd = NULL; } -#ifdef HAVE_2ARGS_BIO_END_IO_T -static void -vdev_disk_physio_completion(struct bio *bio, int rc) -#else -static int -vdev_disk_physio_completion(struct bio *bio, unsigned int size, int rc) -#endif /* HAVE_2ARGS_BIO_END_IO_T */ +BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, size, rc) { dio_request_t *dr = bio->bi_private; zio_t *zio; @@ -168,7 +194,7 @@ vdev_disk_physio_completion(struct bio *bio, unsigned int size, int rc) spin_unlock(&dr->dr_lock); /* Synchronous dio cleanup handled by waiter */ - if (dr->dr_rw & (1 << BIO_RW_SYNC)) { + if (dr->dr_rw & (1 << DIO_RW_SYNCIO)) { complete(&dr->dr_comp); } else { for (i = 0; i < dr->dr_bio_count; i++) @@ -186,13 +212,7 @@ vdev_disk_physio_completion(struct bio *bio, unsigned int size, int rc) spin_unlock(&dr->dr_lock); } - rc = 0; - -#ifdef HAVE_2ARGS_BIO_END_IO_T - return; -#else - return rc; -#endif /* HAVE_2ARGS_BIO_END_IO_T */ + BIO_END_IO_RETURN(0); } static struct bio * @@ -222,7 +242,7 @@ bio_map_virt(struct request_queue *q, void *data, bytes = len; VERIFY3P(page = vmalloc_to_page(data), !=, NULL); - VERIFY3U(bio_add_pc_page(q, bio, page, bytes, offset), ==, bytes); + VERIFY3U(bio_add_pc_page(q, bio, page, bytes, offset),==,bytes); data += bytes; len -= bytes; @@ -250,16 +270,19 @@ bio_map(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask) } static int -__vdev_disk_physio(struct block_device *vd_lh, zio_t *zio, caddr_t kbuf_ptr, +__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, size_t kbuf_size, uint64_t kbuf_offset, int flags) { - struct request_queue *q = vd_lh->bd_disk->queue; + struct request_queue *q; dio_request_t *dr; caddr_t bio_ptr; uint64_t bio_offset; int i, j, error = 0, bio_count, bio_size, dio_size; ASSERT3S(kbuf_offset % SECTOR_SIZE, ==, 0); + q = bdev_get_queue(bdev); + if (!q) + return ENXIO; bio_count = (kbuf_size / (q->max_hw_sectors << 9)) + 1; dio_size = sizeof(dio_request_t) + sizeof(struct bio *) * bio_count; @@ -271,13 +294,10 @@ __vdev_disk_physio(struct block_device *vd_lh, zio_t *zio, caddr_t kbuf_ptr, spin_lock_init(&dr->dr_lock); dr->dr_ref = 0; dr->dr_zio = zio; - dr->dr_rw = READ; + dr->dr_rw = flags; dr->dr_error = 0; dr->dr_bio_count = bio_count; - if (flags & (1 << BIO_RW)) - dr->dr_rw = (flags & (1 << BIO_RW_SYNC)) ? WRITE_SYNC : WRITE; - #ifdef BIO_RW_FAILFAST if (flags & (1 << BIO_RW_FAILFAST)) dr->dr_rw |= 1 << BIO_RW_FAILFAST; @@ -305,7 +325,7 @@ __vdev_disk_physio(struct block_device *vd_lh, zio_t *zio, caddr_t kbuf_ptr, return error; } - dr->dr_bio[i]->bi_bdev = vd_lh; + dr->dr_bio[i]->bi_bdev = bdev; dr->dr_bio[i]->bi_sector = bio_offset >> 9; dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion; dr->dr_bio[i]->bi_private = dr; @@ -320,12 +340,14 @@ __vdev_disk_physio(struct block_device *vd_lh, zio_t *zio, caddr_t kbuf_ptr, submit_bio(dr->dr_rw, dr->dr_bio[i]); /* - * On syncronous blocking requests we wait for all bio the completion + * On synchronous blocking requests we wait for all bio the completion * callbacks to run. We will be woken when the last callback runs * for this dio. We are responsible for freeing the dio_request_t as - * well as the final reference on all attached bios. + * well as the final reference on all attached bios. Currently, the + * only synchronous consumer is vdev_disk_read_rootlabel() all other + * IO originating from vdev_disk_io_start() is asynchronous. */ - if (dr->dr_rw & (1 << BIO_RW_SYNC)) { + if (dr->dr_rw & (1 << DIO_RW_SYNCIO)) { wait_for_completion(&dr->dr_comp); ASSERT(dr->dr_ref == 0); error = dr->dr_error; @@ -340,87 +362,90 @@ __vdev_disk_physio(struct block_device *vd_lh, zio_t *zio, caddr_t kbuf_ptr, } int -vdev_disk_physio(ldi_handle_t vd_lh, caddr_t kbuf, +vdev_disk_physio(struct block_device *bdev, caddr_t kbuf, size_t size, uint64_t offset, int flags) { - return __vdev_disk_physio(vd_lh, NULL, kbuf, size, offset, flags); + return __vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags); } -#if 0 -/* XXX: Not yet supported */ -static void -vdev_disk_ioctl_done(void *zio_arg, int error) +/* 2.6.24 API change */ +#ifdef HAVE_BIO_EMPTY_BARRIER +BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, size, rc) { - zio_t *zio = zio_arg; + zio_t *zio = bio->bi_private; - zio->io_error = error; + zio->io_error = -rc; + if (rc && (rc == -EOPNOTSUPP)) + zio->io_vd->vdev_nowritecache = B_TRUE; + bio_put(bio); zio_interrupt(zio); + + BIO_END_IO_RETURN(0); } -#endif + +static int +vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) +{ + struct request_queue *q; + struct bio *bio; + + q = bdev_get_queue(bdev); + if (!q) + return ENXIO; + + bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return ENOMEM; + + bio->bi_end_io = vdev_disk_io_flush_completion; + bio->bi_private = zio; + bio->bi_bdev = bdev; + submit_bio(WRITE_BARRIER, bio); + + return 0; +} +#else +static int +vdev_disk_io_flush(struct block_device *bdev, zio_t *zio) +{ + return ENOTSUP; +} +#endif /* HAVE_BIO_EMPTY_BARRIER */ static int vdev_disk_io_start(zio_t *zio) { - vdev_t *vd = zio->io_vd; - vdev_disk_t *dvd = vd->vdev_tsd; + vdev_t *v = zio->io_vd; + vdev_disk_t *vd = v->vdev_tsd; int flags, error; - if (zio->io_type == ZIO_TYPE_IOCTL) { + switch (zio->io_type) { + case ZIO_TYPE_IOCTL: - /* XXPOLICY */ - if (!vdev_readable(vd)) { + if (!vdev_readable(v)) { zio->io_error = ENXIO; return ZIO_PIPELINE_CONTINUE; } switch (zio->io_cmd) { - case DKIOCFLUSHWRITECACHE: if (zfs_nocacheflush) break; - if (vd->vdev_nowritecache) { + if (v->vdev_nowritecache) { zio->io_error = ENOTSUP; break; } -#if 0 - /* XXX: Not yet supported */ - vdev_disk_t *dvd = vd->vdev_tsd; - - zio->io_dk_callback.dkc_callback = vdev_disk_ioctl_done; - zio->io_dk_callback.dkc_flag = FLUSH_VOLATILE; - zio->io_dk_callback.dkc_cookie = zio; - - error = ldi_ioctl(dvd->vd_lh, zio->io_cmd, - (uintptr_t)&zio->io_dk_callback, - FKIOCTL, kcred, NULL); - - if (error == 0) { - /* - * The ioctl will be done asychronously, - * and will call vdev_disk_ioctl_done() - * upon completion. - */ + error = vdev_disk_io_flush(vd->vd_bdev, zio); + if (error == 0) return ZIO_PIPELINE_STOP; - } -#else - error = ENOTSUP; -#endif - if (error == ENOTSUP || error == ENOTTY) { - /* - * If we get ENOTSUP or ENOTTY, we know that - * no future attempts will ever succeed. - * In this case we set a persistent bit so - * that we don't bother with the ioctl in the - * future. - */ - vd->vdev_nowritecache = B_TRUE; - } zio->io_error = error; + if (error == ENOTSUP) + v->vdev_nowritecache = B_TRUE; break; @@ -429,20 +454,30 @@ vdev_disk_io_start(zio_t *zio) } return ZIO_PIPELINE_CONTINUE; + + case ZIO_TYPE_WRITE: + flags = WRITE; + break; + + case ZIO_TYPE_READ: + if (zio->io_flags & ZIO_FLAG_SPECULATIVE) + flags = READA; + else + flags = READ; + + break; + + default: + zio->io_error = ENOTSUP; + return ZIO_PIPELINE_CONTINUE; } - /* - * B_BUSY XXX: Not supported - * B_NOCACHE XXX: Not supported - */ - flags = ((zio->io_type == ZIO_TYPE_READ) ? READ : WRITE); - #ifdef BIO_RW_FAILFAST - if (zio->io_flags & ZIO_FLAG_IO_RETRY) + if (zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) flags |= (1 << BIO_RW_FAILFAST); #endif /* BIO_RW_FAILFAST */ - error = __vdev_disk_physio(dvd->vd_lh, zio, zio->io_data, + error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data, zio->io_size, zio->io_offset, flags); if (error) { zio->io_error = error; @@ -456,22 +491,20 @@ static void vdev_disk_io_done(zio_t *zio) { /* - * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if - * the device has been removed. If this is the case, then we trigger an - * asynchronous removal of the device. Otherwise, probe the device and - * make sure it's still accessible. + * If the device returned EIO, we revalidate the media. If it is + * determined the media has changed this triggers the asynchronous + * removal of the device from the configuration. */ - VERIFY3S(zio->io_error, ==, 0); -#if 0 - vdev_disk_t *dvd = vd->vdev_tsd; - int state = DKIO_NONE; + if (zio->io_error == EIO) { + vdev_t *v = zio->io_vd; + vdev_disk_t *vd = v->vdev_tsd; - if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state, - FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) { - vd->vdev_remove_wanted = B_TRUE; + if (check_disk_change(vd->vd_bdev)) { + vdev_bdev_invalidate(vd->vd_bdev); + v->vdev_remove_wanted = B_TRUE; spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE); } -#endif + } } vdev_ops_t vdev_disk_ops = { @@ -492,32 +525,18 @@ vdev_ops_t vdev_disk_ops = { int vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) { - struct block_device *vd_lh; + struct block_device *bdev; vdev_label_t *label; uint64_t s, size; int i; - /* - * Read the device label and build the nvlist. - * XXX: Not yet supported - */ -#if 0 - if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid, - &minor_name) == 0) { - error = ldi_open_by_devid(tmpdevid, minor_name, spa_mode, - kcred, &vd_lh, zfs_li); - ddi_devid_free(tmpdevid); - ddi_devid_str_free(minor_name); - } -#endif + bdev = vdev_bdev_open(devpath, vdev_bdev_mode(FREAD), NULL); + if (IS_ERR(bdev)) + return -PTR_ERR(bdev); - vd_lh = open_bdev_excl(devpath, MS_RDONLY, NULL); - if (IS_ERR(vd_lh)) - return -PTR_ERR(vd_lh); - - s = get_capacity(vd_lh->bd_disk) * SECTOR_SIZE; + s = get_capacity(bdev->bd_disk) * SECTOR_SIZE; if (s == 0) { - close_bdev_excl(vd_lh); + vdev_bdev_close(bdev, vdev_bdev_mode(FREAD)); return EIO; } @@ -529,8 +548,8 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) /* read vdev label */ offset = vdev_label_offset(size, i, 0); - if (vdev_disk_physio(vd_lh, (caddr_t)label, - VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, READ) != 0) + if (vdev_disk_physio(bdev, (caddr_t)label, + VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, READ_SYNC) != 0) continue; if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist, @@ -557,7 +576,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) } vmem_free(label, sizeof(vdev_label_t)); - close_bdev_excl(vd_lh); + vdev_bdev_close(bdev, vdev_bdev_mode(FREAD)); return 0; }