diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 25a3c85543..5f540ac71c 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -125,7 +125,7 @@ check_file(const char *file, boolean_t force, boolean_t isspare) pool_state_t state; boolean_t inuse; - if ((fd = open(file, O_RDONLY)) < 0) + if ((fd = open(file, O_RDONLY|O_EXCL)) < 0) return (0); if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) { @@ -250,7 +250,7 @@ check_disk(const char *path, blkid_cache cache, int force, * not easily decode the MBR return a failure and prompt to the * user to use force option since we cannot check the partitions. */ - if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) { + if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) { check_error(errno); return -1; } @@ -366,7 +366,7 @@ is_whole_disk(const char *arg) (void) snprintf(path, sizeof (path), "%s%s%s", RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE); - if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) + if ((fd = open(path, O_RDWR|O_DIRECT|O_EXCL)) < 0) return (B_FALSE); if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) { (void) close(fd); @@ -498,7 +498,7 @@ make_leaf_vdev(const char *arg, uint64_t is_log) ddi_devid_t devid; char *minor = NULL, *devid_str = NULL; - if ((fd = open(path, O_RDONLY)) < 0) { + if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) { (void) fprintf(stderr, gettext("cannot open '%s': " "%s\n"), path, strerror(errno)); nvlist_free(vdev); @@ -891,7 +891,7 @@ zero_label(char *path) char buf[size]; int err, fd; - if ((fd = open(path, O_WRONLY)) < 0) { + if ((fd = open(path, O_WRONLY|O_EXCL)) < 0) { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), path, strerror(errno)); return (-1); @@ -1045,7 +1045,7 @@ is_spare(nvlist_t *config, const char *path) uint_t i, nspares; boolean_t inuse; - if ((fd = open(path, O_RDONLY)) < 0) + if ((fd = open(path, O_RDONLY|O_EXCL)) < 0) return (B_FALSE); if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 || diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index de7ddd7777..7c0f5b4787 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -191,7 +191,7 @@ efi_get_info(int fd, struct dk_cinfo *dki_info) &dki_info->dki_partition); } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) { strcpy(dki_info->dki_cname, "pseudo"); - dki_info->dki_ctype = DKC_MD; + dki_info->dki_ctype = DKC_VBD; rval = sscanf(dev_path, "/dev/%[a-zA-Z0-9-]p%hu", dki_info->dki_dname, &dki_info->dki_partition); @@ -281,8 +281,7 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) || (dki_info.dki_ctype == DKC_VBD) || - (dki_info.dki_ctype == DKC_UNKNOWN) || - (dki_info.dki_ctype == DKC_MD)) + (dki_info.dki_ctype == DKC_UNKNOWN)) return (-1); #endif diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 62ea804101..656e62edf7 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -107,11 +107,18 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) return ENOMEM; /* - * XXX: Since we do not have devid support like Solaris we - * currently can't be as clever about opening the right device. - * For now we will simply open the device name provided and - * fail when it doesn't exist. If your devices get reordered - * your going to be screwed, use udev for now to prevent this. + * Devices are always opened by the path provided at configuration + * time. This means that if the provided path is a udev by-id path + * then drives may be recabled without an issue. If the provided + * path is a udev by-path path then the physical location information + * will be preserved. This can be critical for more complicated + * configurations where drives are located in specific physical + * locations to maximize the systems tolerence to component failure. + * Alternately you can provide your own udev rule to flexibly map + * the drives as you see fit. It is not advised that you use the + * /dev/[hd]d devices which may be reorder due to probing order. + * Devices in the wrong locations will be detected by the higher + * level vdev validation. */ mode = spa_mode(v->vdev_spa); bdev = vdev_bdev_open(v->vdev_path, vdev_bdev_mode(mode), vd); @@ -120,11 +127,6 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) return -PTR_ERR(bdev); } - /* - * XXX: Long term validate stored vd->vd_devid with a unique - * identifier read from the disk, likely EFI support. - */ - v->vdev_tsd = vd; vd->vd_bdev = bdev; @@ -205,8 +207,10 @@ vdev_disk_dio_put(dio_request_t *dr) { int rc = atomic_dec_return(&dr->dr_ref); - /* Free the dio_request when the last reference is dropped and - * ensure zio_interpret is called only once with the correct zio */ + /* + * Free the dio_request when the last reference is dropped and + * ensure zio_interpret is called only once with the correct zio + */ if (rc == 0) { zio_t *zio = dr->dr_zio; int error = dr->dr_error; @@ -259,76 +263,56 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, size, error) BIO_END_IO_RETURN(0); } -static struct bio * -bio_map_virt(struct request_queue *q, void *data, - unsigned int len, gfp_t gfp_mask) +static inline unsigned long +bio_nr_pages(void *bio_ptr, unsigned int bio_size) { - unsigned long kaddr = (unsigned long)data; - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = kaddr >> PAGE_SHIFT; - unsigned int offset, i, data_len = len; - const int nr_pages = end - start; - struct page *page; - struct bio *bio; - - bio = bio_alloc(gfp_mask, nr_pages); - if (!bio) - return ERR_PTR(-ENOMEM); - - offset = offset_in_page(kaddr); - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; - - VERIFY3P(page = vmalloc_to_page(data), !=, NULL); - VERIFY3U(bio_add_pc_page(q, bio, page, bytes, offset),==,bytes); - - data += bytes; - len -= bytes; - offset = 0; - bytes = PAGE_SIZE; - } - - VERIFY3U(bio->bi_size, ==, data_len); - return bio; + return ((((unsigned long)bio_ptr + bio_size + PAGE_SIZE - 1) >> + PAGE_SHIFT) - ((unsigned long)bio_ptr >> PAGE_SHIFT)); } -static struct bio * -bio_map(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask) +static unsigned int +bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size) { - struct bio *bio; + unsigned int offset, size, i; + struct page *page; - /* Cleanly map buffer we are passed in to a bio regardless - * of if the buffer is a virtual or physical address. */ - if (kmem_virt(data)) - bio = bio_map_virt(q, data, len, gfp_mask); - else - bio = bio_map_kern(q, data, len, gfp_mask); + offset = offset_in_page(bio_ptr); + for (i = 0; i < bio->bi_max_vecs; i++) { + size = PAGE_SIZE - offset; - return bio; + if (bio_size <= 0) + break; + + if (size > bio_size) + size = bio_size; + + if (kmem_virt(bio_ptr)) + page = vmalloc_to_page(bio_ptr); + else + page = virt_to_page(bio_ptr); + + if (bio_add_page(bio, page, size, offset) != size) + break; + + bio_ptr += size; + bio_size -= size; + offset = 0; + } + + return bio_size; } static int __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, size_t kbuf_size, uint64_t kbuf_offset, int flags) { - struct request_queue *q; dio_request_t *dr; caddr_t bio_ptr; uint64_t bio_offset; - int i, error = 0, bio_count, bio_size; + int bio_size, bio_count = 16; + int i = 0, error = 0; - ASSERT3S(kbuf_offset % bdev_hardsect_size(bdev), ==, 0); - q = bdev_get_queue(bdev); - if (!q) - return ENXIO; - - bio_count = (kbuf_size / (q->max_hw_sectors << 9)) + 1; +retry: dr = vdev_disk_dio_alloc(bio_count); if (dr == NULL) return ENOMEM; @@ -348,36 +332,58 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, * their volume block size to match the maximum request size and * the common case will be one bio per vdev IO request. */ - bio_ptr = kbuf_ptr; + bio_ptr = kbuf_ptr; bio_offset = kbuf_offset; - for (i = 0; i < dr->dr_bio_count; i++) { - bio_size = MIN(kbuf_size, q->max_hw_sectors << 9); + bio_size = kbuf_size; + for (i = 0; i <= dr->dr_bio_count; i++) { - dr->dr_bio[i] = bio_map(q, bio_ptr, bio_size, GFP_NOIO); - if (IS_ERR(dr->dr_bio[i])) { - error = -PTR_ERR(dr->dr_bio[i]); + /* Finished constructing bio's for given buffer */ + if (bio_size <= 0) + break; + + /* + * By default only 'bio_count' bio's per dio are allowed. + * However, if we find ourselves in a situation where more + * are needed we allocate a larger dio and warn the user. + */ + if (dr->dr_bio_count == i) { vdev_disk_dio_free(dr); - return error; + bio_count *= 2; + printk("WARNING: Resized bio's/dio to %d\n",bio_count); + goto retry; + } + + dr->dr_bio[i] = bio_alloc(GFP_NOIO, + bio_nr_pages(bio_ptr, bio_size)); + if (dr->dr_bio[i] == NULL) { + vdev_disk_dio_free(dr); + return ENOMEM; } /* Matching put called by vdev_disk_physio_completion */ vdev_disk_dio_get(dr); dr->dr_bio[i]->bi_bdev = bdev; - dr->dr_bio[i]->bi_sector = bio_offset >> 9; + dr->dr_bio[i]->bi_sector = bio_offset/bdev_hardsect_size(bdev); + dr->dr_bio[i]->bi_rw = dr->dr_rw; dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion; dr->dr_bio[i]->bi_private = dr; - bio_ptr += bio_size; - bio_offset += bio_size; - kbuf_size -= bio_size; + /* Remaining size is returned to become the new size */ + bio_size = bio_map(dr->dr_bio[i], bio_ptr, bio_size); + + /* Advance in buffer and construct another bio if needed */ + bio_ptr += dr->dr_bio[i]->bi_size; + bio_offset += dr->dr_bio[i]->bi_size; } /* Extra reference to protect dio_request during submit_bio */ vdev_disk_dio_get(dr); + /* Submit all bio's associated with this dio */ for (i = 0; i < dr->dr_bio_count; i++) - submit_bio(dr->dr_rw, dr->dr_bio[i]); + if (dr->dr_bio[i]) + submit_bio(dr->dr_rw, dr->dr_bio[i]); /* * On synchronous blocking requests we wait for all bio the completion diff --git a/scripts/Makefile.am b/scripts/Makefile.am index 45a889ecf8..0e9245a407 100644 --- a/scripts/Makefile.am +++ b/scripts/Makefile.am @@ -12,7 +12,7 @@ EXTRA_DIST = zfs-update.sh $(nobase_pkglibexec_SCRIPTS) ZFS=${top_srcdir}/scripts/zfs.sh ZCONFIG=${top_srcdir}/scripts/zconfig.sh ZTEST=${top_builddir}/cmd/ztest/ztest -ZPIOS=${top_srcdir}/scripts/zpios.sh +ZPIOS_SANITY=${top_srcdir}/scripts/zpios-sanity.sh check: @echo @@ -36,13 +36,6 @@ check: @echo "====================================" @echo @$(ZFS) - @$(ZPIOS) -c file-raid0 -t tiny - @$(ZPIOS) -c file-raid10 -t tiny | tail -1 - @$(ZPIOS) -c file-raidz -t tiny | tail -1 - @$(ZPIOS) -c file-raidz2 -t tiny | tail -1 - @$(ZPIOS) -c lo-raid0 -t tiny | tail -1 - @$(ZPIOS) -c lo-raid10 -t tiny | tail -1 - @$(ZPIOS) -c lo-raidz -t tiny | tail -1 - @$(ZPIOS) -c lo-raidz2 -t tiny | tail -1 + @$(ZPIOS_SANITY) @$(ZFS) -u @echo diff --git a/scripts/zpios-sanity.sh b/scripts/zpios-sanity.sh new file mode 100755 index 0000000000..d27a5b8ca6 --- /dev/null +++ b/scripts/zpios-sanity.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# +# ZFS/ZPOOL configuration test script. + +SCRIPT_COMMON=common.sh +if [ -f ./${SCRIPT_COMMON} ]; then +. ./${SCRIPT_COMMON} +elif [ -f /usr/libexec/zfs/${SCRIPT_COMMON} ]; then +. /usr/libexec/zfs/${SCRIPT_COMMON} +else +echo "Missing helper script ${SCRIPT_COMMON}" && exit 1 +fi + +PROG=zpios-sanity.sh +HEADER= + +usage() { +cat << EOF +USAGE: +$0 [hv] + +DESCRIPTION: + ZPIOS sanity tests + +OPTIONS: + -h Show this message + -v Verbose + -x Destructive hd/sd/md/dm/ram tests + -f Don't prompt due to -x + +EOF +} + +while getopts 'hvxf' OPTION; do + case $OPTION in + h) + usage + exit 1 + ;; + v) + VERBOSE=1 + ;; + x) + DANGEROUS=1 + ;; + f) + FORCE=1 + ;; + ?) + usage + exit + ;; + esac +done + +if [ $(id -u) != 0 ]; then + die "Must run as root" +fi + +zpios_test() { + CONFIG=$1 + TEST=$2 + LOG=`mktemp` + + ${ZPIOS_SH} -f -c ${CONFIG} -t ${TEST} &>${LOG} + if [ $? -ne 0 ]; then + if [ ${VERBOSE} ]; then + printf "FAIL: %-13s\n" ${CONFIG} + cat ${LOG} + else + if [ ! ${HEADER} ]; then + head -2 ${LOG} + HEADER=1 + fi + + printf "FAIL: %-13s" ${CONFIG} + tail -1 ${LOG} + fi + else + if [ ${VERBOSE} ]; then + cat ${LOG} + else + if [ ! ${HEADER} ]; then + head -2 ${LOG} + HEADER=1 + fi + + tail -1 ${LOG} + fi + fi + + rm -f ${LOG} +} + +if [ ${DANGEROUS} ] && [ ! ${FORCE} ]; then + cat << EOF +The -x option was passed which will result in UNRECOVERABLE DATA LOSS +on on the following block devices: + + /dev/sd[abcd] + /dev/hda + /dev/ram0 + /dev/md0 + /dev/dm-0 + +To continue please confirm by entering YES: +EOF + read CONFIRM + if [ ${CONFIRM} != "YES" ] && [ ${CONFIRM} != "yes" ]; then + exit 0; + fi +fi + +# +# These configurations are all safe and pose no risk to any data on +# the system which runs them. They will confine all their IO to a +# file in /tmp or a loopback device configured to use a file in /tmp. +# +SAFE_CONFIGS=( \ + file-raid0 file-raid10 file-raidz file-raidz2 \ + lo-raid0 lo-raid10 lo-raidz lo-raidz2 \ +) + +# +# These configurations are down right dangerous. They will attempt +# to use various real block devices on your system which may contain +# data you car about. You are STRONGLY advised not to run this unless +# you are certain there is no data on the system you care about. +# +DANGEROUS_CONFIGS=( \ + hda-raid0 \ + sda-raid0 \ + ram0-raid0 \ + md0-raid10 md0-raid5 \ + dm0-raid0 \ +) + +for CONFIG in ${SAFE_CONFIGS[*]}; do + zpios_test $CONFIG tiny +done + +if [ ${DANGEROUS} ]; then + for CONFIG in ${DANGEROUS_CONFIGS[*]}; do + zpios_test $CONFIG tiny + done +fi + +exit 0 diff --git a/scripts/zpool-config/dm0-raid0.sh b/scripts/zpool-config/dm0-raid0.sh index fa965ac019..89f66e73a8 100644 --- a/scripts/zpool-config/dm0-raid0.sh +++ b/scripts/zpool-config/dm0-raid0.sh @@ -15,10 +15,21 @@ LVCREATE=${LVCREATE:-/sbin/lvcreate} LVREMOVE=${LVREMOVE:-/sbin/lvremove} LVNAME=${LVNAME:-"lv_tank"} LVSTRIPES=${LVSTRIPES:-4} -LVSIZE=${LVSIZE:-4G} +LVSIZE=${LVSIZE:-32G} DEVICES="/dev/${VGNAME}/${LVNAME}" +zpool_dm_destroy() { + msg ${LVREMOVE} -f ${VGNAME}/${LVNAME} + ${LVREMOVE} -f ${VGNAME}/${LVNAME} >/dev/null + + msg ${VGREMOVE} -f ${VGNAME} + ${VGREMOVE} -f ${VGNAME} >/dev/null + + msg ${PVREMOVE} ${PVDEVICES} + ${PVREMOVE} ${PVDEVICES} >/dev/null +} + zpool_create() { # Remove EFI labels which cause pvcreate failure for DEVICE in ${PVDEVICES}; do @@ -26,30 +37,24 @@ zpool_create() { done msg ${PVCREATE} -f ${PVDEVICES} - ${PVCREATE} -f ${PVDEVICES} || exit 1 + ${PVCREATE} -f ${PVDEVICES} >/dev/null || exit 1 msg ${VGCREATE} ${VGNAME} ${PVDEVICES} - ${VGCREATE} ${VGNAME} ${PVDEVICES} || exit 2 + ${VGCREATE} ${VGNAME} ${PVDEVICES} >/dev/null || exit 2 msg ${LVCREATE} --size=${LVSIZE} --stripes=${LVSTRIPES} \ --name=${LVNAME} ${VGNAME} ${LVCREATE} --size=${LVSIZE} --stripes=${LVSTRIPES} \ - --name=${LVNAME} ${VGNAME} || exit 3 + --name=${LVNAME} ${VGNAME} >/dev/null || exit 3 msg ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} ${DEVICES} - ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} ${DEVICES} || exit 4 + ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} \ + ${DEVICES} || (zpool_dm_destroy && exit 4) } zpool_destroy() { msg ${ZPOOL} destroy ${ZPOOL_NAME} - ${ZPOOL} destroy ${ZPOOL_NAME} || exit 1 + ${ZPOOL} destroy ${ZPOOL_NAME} - msg ${LVREMOVE} -f ${VGNAME}/${LVNAME} - ${LVREMOVE} -f ${VGNAME}/${LVNAME} || exit 2 - - msg ${VGREMOVE} -f ${VGNAME} - ${VGREMOVE} -f ${VGNAME} || exit 3 - - msg ${PVREMOVE} ${PVDEVICES} - ${PVREMOVE} ${PVDEVICES} || exit 4 + zpool_dm_destroy } diff --git a/scripts/zpool-config/md0-raid10.sh b/scripts/zpool-config/md0-raid10.sh index 4b1a979041..ccc7170153 100644 --- a/scripts/zpool-config/md0-raid10.sh +++ b/scripts/zpool-config/md0-raid10.sh @@ -10,23 +10,29 @@ MDRAID=${MDRAID:-10} DEVICES="/dev/md0" +zpool_md_destroy() { + msg ${MDADM} --manage --stop ${DEVICES} + ${MDADM} --manage --stop ${DEVICES} &>/dev/null + + msg ${MDADM} --zero-superblock ${MDDEVICES} + ${MDADM} --zero-superblock ${MDDEVICES} >/dev/null +} + zpool_create() { msg ${MDADM} --create ${DEVICES} --level=${MDRAID} \ --raid-devices=${MDCOUNT} ${MDDEVICES} ${MDADM} --create ${DEVICES} --level=${MDRAID} \ - --raid-devices=${MDCOUNT} ${MDDEVICES} || exit 1 + --raid-devices=${MDCOUNT} ${MDDEVICES} \ + &>/dev/null || (zpool_md_destroy && exit 1) msg ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} ${DEVICES} - ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} ${DEVICES} || exit 2 + ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} \ + ${DEVICES} || (zpool_md_destroy && exit 2) } zpool_destroy() { msg ${ZPOOL} destroy ${ZPOOL_NAME} - ${ZPOOL} destroy ${ZPOOL_NAME} || exit 1 + ${ZPOOL} destroy ${ZPOOL_NAME} - msg ${MDADM} --manage --stop ${DEVICES} - ${MDADM} --manage --stop ${DEVICES} || exit 2 - - msg ${MDADM} --zero-superblock ${MDDEVICES} - ${MDADM} --zero-superblock ${MDDEVICES} || exit 3 + zpool_md_destroy } diff --git a/scripts/zpool-config/md0-raid5.sh b/scripts/zpool-config/md0-raid5.sh index 9bf17181b0..b5b22fe7a1 100644 --- a/scripts/zpool-config/md0-raid5.sh +++ b/scripts/zpool-config/md0-raid5.sh @@ -10,23 +10,29 @@ MDRAID=${MDRAID:-5} DEVICES="/dev/md0" +zpool_md_destroy() { + msg ${MDADM} --manage --stop ${DEVICES} + ${MDADM} --manage --stop ${DEVICES} &>/dev/null + + msg ${MDADM} --zero-superblock ${MDDEVICES} + ${MDADM} --zero-superblock ${MDDEVICES} >/dev/null +} + zpool_create() { msg ${MDADM} --create ${DEVICES} --level=${MDRAID} \ --raid-devices=${MDCOUNT} ${MDDEVICES} ${MDADM} --create ${DEVICES} --level=${MDRAID} \ - --raid-devices=${MDCOUNT} ${MDDEVICES} || exit 1 + --raid-devices=${MDCOUNT} ${MDDEVICES} \ + &>/dev/null || (zpool_md_destroy && exit 1) msg ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} ${DEVICES} - ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} ${DEVICES} || exit 2 + ${ZPOOL} create ${FORCE_FLAG} ${ZPOOL_NAME} \ + ${DEVICES} || (zpool_md_destroy && exit 2) } zpool_destroy() { msg ${ZPOOL} destroy ${ZPOOL_NAME} - ${ZPOOL} destroy ${ZPOOL_NAME} || exit 1 + ${ZPOOL} destroy ${ZPOOL_NAME} - msg ${MDADM} --manage --stop ${DEVICES} - ${MDADM} --manage --stop ${DEVICES} || exit 2 - - msg ${MDADM} --zero-superblock ${MDDEVICES} - ${MDADM} --zero-superblock ${MDDEVICES} || exit 3 + zpool_md_destroy }