From 18eee5d03f01946724d70a8dfa8506cd279fe07b Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 14 Oct 2009 16:02:51 -0700 Subject: [PATCH 1/2] Fix sector size and capacity calculation. Remove the hard coded 512 byte SECTOR_SIZE and replace it with bdev_hardsect_size() to get the correct hardware sector size. Usage of get_capacity() was incorrect. We the block_device references a partition we need to return bdev->part->nr_sects. If get_capacity() is used the entire device size will be returned ignoring partition information. This is however the correct thing to do when the block device in question has not partition table. --- module/zfs/vdev_disk.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 40b7530261..62ea804101 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -76,6 +76,19 @@ vdev_bdev_mode(int smode) } #endif /* HAVE_OPEN_BDEV_EXCLUSIVE */ +static uint64_t +bdev_capacity(struct block_device *bdev) +{ + struct hd_struct *part = bdev->bd_part; + + /* The partition capacity referenced by the block device */ + if (part) + return part->nr_sects; + + /* Otherwise assume the full device capacity */ + return get_capacity(bdev->bd_disk); +} + static int vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) { @@ -122,15 +135,11 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift) /* Clear the nowritecache bit, causes vdev_reopen() to try again. */ v->vdev_nowritecache = B_FALSE; - /* Determine the actual size of the device (in bytes) - * - * XXX: SECTOR_SIZE is defined to 512b which may not be true for - * your device, we must use the actual hardware sector size. - */ - *psize = get_capacity(bdev->bd_disk) * SECTOR_SIZE; + /* Physical volume size in bytes */ + *psize = bdev_capacity(bdev) * bdev_hardsect_size(bdev); /* Based on the minimum sector size set the block size */ - *ashift = highbit(MAX(SECTOR_SIZE, SPA_MINBLOCKSIZE)) - 1; + *ashift = highbit(MAX(bdev_hardsect_size(bdev), SPA_MINBLOCKSIZE)) - 1; return 0; } @@ -314,7 +323,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr, uint64_t bio_offset; int i, error = 0, bio_count, bio_size; - ASSERT3S(kbuf_offset % SECTOR_SIZE, ==, 0); + ASSERT3S(kbuf_offset % bdev_hardsect_size(bdev), ==, 0); q = bdev_get_queue(bdev); if (!q) return ENXIO; @@ -558,7 +567,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) if (IS_ERR(bdev)) return -PTR_ERR(bdev); - s = get_capacity(bdev->bd_disk) * SECTOR_SIZE; + s = bdev_capacity(bdev) * bdev_hardsect_size(bdev); if (s == 0) { vdev_bdev_close(bdev, vdev_bdev_mode(FREAD)); return EIO; From 992be351d5733617a22e222284df88ce8f351797 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 14 Oct 2009 16:07:48 -0700 Subject: [PATCH 2/2] Changes required to integrate libefi in to Linux. The major change here is to fix up libefi to be linux aware. For the most part this wasn't too hard but there were a few major issues. First off I needed to handle the DKIOCGMEDIAINFO and DKIOCINFO ioctls. There is no direct equivilant for these ioctls under linux. To handle this I added wrapper functions which under Solaris simple call the ioctls. But under Linux dig around the system a little bit getting the needed info to fill in the requested structures. Secondly the efi_ioctl() call was adapted such that under linux it directly read or writes out the partition table. Under Solaris this work was handed off to the kernel via an ioctl. In the efi_write() case we also ensure we prompt the kernel via BLKRRPART to re-scan the new partition table. The libefi generated partition tables are correct but older versions of ~parted-1.8.1 can not read them without a small patch. The kernel and fdisk are able to read them just fine. Thirdly efi_alloc_and_init() which is used by zpool to determine if a device is a 'wholedisk' was updated to be linux aware. This check is performed by using the partition number for the device, which the partition number is 0 on linux it is a 'wholedisk'. However, certain device type such as the loopback and ram disks needed to be excluded because they do not support partitioning. Forthly the zpool command was made symlink aware so it can correctly resolve udev entries such as /dev/disk/by-*/*. This symlinks are fully expanded ensuring all block devices are recognized. When a when a 'wholedisk' block device is detected we now properly write out an efi label and place zfs in the first partition (0th slice). This partition is created 1MiB in to the disk to ensure it is aligned nicely with all high end block devices I'm aware of. This all works for me now but it did take quite a bit of work to get it all sorted out. It would not surprise me if certain special cases were missed so we should keep any eye of for any odd behavior. --- cmd/zpool/zpool_vdev.c | 24 ++- lib/libefi/rdwr_efi.c | 312 +++++++++++++++++++++++++++++++----- lib/libzfs/include/libzfs.h | 4 + lib/libzfs/libzfs_pool.c | 38 ++++- 4 files changed, 326 insertions(+), 52 deletions(-) diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c index 6d41a6d136..986863ae5d 100644 --- a/cmd/zpool/zpool_vdev.c +++ b/cmd/zpool/zpool_vdev.c @@ -400,17 +400,23 @@ make_leaf_vdev(const char *arg, uint64_t is_log) if (arg[0] == '/') { /* * Complete device or file path. Exact type is determined by - * examining the file descriptor afterwards. + * examining the file descriptor afterwards. Symbolic links + * are resolved to their real paths for the is_whole_disk() + * and S_ISBLK/S_ISREG type checks. */ - wholedisk = is_whole_disk(arg); - if (!wholedisk && (stat64(arg, &statbuf) != 0)) { + if (realpath(arg, path) == NULL) { (void) fprintf(stderr, - gettext("cannot open '%s': %s\n"), - arg, strerror(errno)); + gettext("cannot resolve path '%s'\n"), arg); return (NULL); } - (void) strlcpy(path, arg, sizeof (path)); + wholedisk = is_whole_disk(path); + if (!wholedisk && (stat64(path, &statbuf) != 0)) { + (void) fprintf(stderr, + gettext("cannot open '%s': %s\n"), + path, strerror(errno)); + return (NULL); + } } else { /* * This may be a short path for a device, or it could be total @@ -888,8 +894,10 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) uint64_t wholedisk; int fd; int ret; +#if defined(__sun__) || defined(__sun) ddi_devid_t devid; char *minor = NULL, *devid_str = NULL; +#endif verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); @@ -918,7 +926,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) /* * Fill in the devid, now that we've labeled the disk. */ - (void) snprintf(buf, sizeof (buf), "%ss0", path); + (void) snprintf(buf, sizeof (buf), "%s%s", path, FIRST_SLICE); if ((fd = open(buf, O_RDONLY)) < 0) { (void) fprintf(stderr, gettext("cannot open '%s': %s\n"), @@ -926,6 +934,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) return (-1); } +#if defined(__sun__) || defined(__sun) if (devid_get(fd, &devid) == 0) { if (devid_get_minor_name(fd, &minor) == 0 && (devid_str = devid_str_encode(devid, minor)) != @@ -939,6 +948,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv) devid_str_free(minor); devid_free(devid); } +#endif /* * Update the path to refer to the 's0' slice. The presence of diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c index 31eb3d3f61..ae6138db8f 100644 --- a/lib/libefi/rdwr_efi.c +++ b/lib/libefi/rdwr_efi.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -39,7 +40,9 @@ #include #include #include -#include +#if defined(__linux__) +#include +#endif static struct uuid_to_ptag { struct uuid uuid; @@ -50,11 +53,11 @@ static struct uuid_to_ptag { { EFI_SWAP }, { EFI_USR }, { EFI_BACKUP }, - { 0 }, /* STAND is never used */ + { EFI_UNUSED }, /* STAND is never used */ { EFI_VAR }, { EFI_HOME }, { EFI_ALTSCTR }, - { 0 }, /* CACHE (cachefs) is never used */ + { EFI_UNUSED }, /* CACHE (cachefs) is never used */ { EFI_RESERVED }, { EFI_SYSTEM }, { EFI_LEGACY_MBR }, @@ -108,21 +111,115 @@ int efi_debug = 1; int efi_debug = 0; #endif -extern unsigned int efi_crc32(const unsigned char *, unsigned int); static int efi_read(int, struct dk_gpt *); +/* + * Return a 32-bit CRC of the contents of the buffer. Pre-and-post + * one's conditioning will be handled by crc32() internally. + */ +static uint32_t +efi_crc32(const unsigned char *buf, unsigned int size) +{ + uint32_t crc = crc32(0, Z_NULL, 0); + + crc = crc32(crc, buf, size); + + return (crc); +} + static int read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) { - struct dk_minfo disk_info; + int sector_size; + unsigned long long capacity_size; + + if (ioctl(fd, BLKSSZGET, §or_size) < 0) + return (-1); + + if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0) + return (-1); + + *lbsize = (uint_t)sector_size; + *capacity = (diskaddr_t)(capacity_size / sector_size); - if ((ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info)) == -1) - return (errno); - *capacity = disk_info.dki_capacity; - *lbsize = disk_info.dki_lbsize; return (0); } +static int +efi_get_info(int fd, struct dk_cinfo *dki_info) +{ +#if defined(__linux__) + char path[PATH_MAX]; + char *dev_path; + int rval; + + /* + * The simplest way to get the partition number under linux is + * to parse it out of the /dev/ block device name. + * The kernel creates this using the partition number when it + * populates /dev/ so it may be trusted. Another issue is that + * that the libefi API only provides the open fd and not the + * file path. To handle this realpath(3) is used to resolve + * the block device name from /proc/self/fd/. Aside from + * the partition number we collect some additional device info. + */ + memset(dki_info, 0, sizeof(*dki_info)); + (void) sprintf(path, "/proc/self/fd/%d", fd); + if ((dev_path = realpath(path, NULL)) == NULL) + goto error; + + rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", + dki_info->dki_dname, + &dki_info->dki_partition); + + switch (rval) { + case 0: + errno = EINVAL; + goto error; + case 1: + dki_info->dki_partition = 0; + } + + if ((strncmp(dki_info->dki_dname, "sd", 2) == 0)) { + strcpy(dki_info->dki_cname, "sd"); + dki_info->dki_ctype = DKC_SCSI_CCS; + } else if ((strncmp(dki_info->dki_dname, "hd", 2) == 0)) { + strcpy(dki_info->dki_cname, "hd"); + dki_info->dki_ctype = DKC_DIRECT; + } else if ((strncmp(dki_info->dki_dname, "md", 2) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_MD; + } else if ((strncmp(dki_info->dki_dname, "ram", 3) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_PCMCIA_MEM; + } else if ((strncmp(dki_info->dki_dname, "loop", 4) == 0)) { + strcpy(dki_info->dki_cname, "pseudo"); + dki_info->dki_ctype = DKC_VBD; + } else { + strcpy(dki_info->dki_cname, "unknown"); + dki_info->dki_ctype = DKC_UNKNOWN; + } + + free(dev_path); +#else + if (ioctl(fd, DKIOCINFO, (caddr_t)dki_info) == -1) + goto error; +#endif + return (0); +error: + if (efi_debug) + (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); + + switch (errno) { + case EIO: + return (VT_EIO); + case EINVAL: + return (VT_EINVAL); + default: + return (VT_ERROR); + } +} + /* * the number of blocks the EFI label takes up (round up to nearest * block) @@ -136,12 +233,13 @@ read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) int efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) { - diskaddr_t capacity; - uint_t lbsize; + diskaddr_t capacity = 0; + uint_t lbsize = 0; uint_t nblocks; size_t length; struct dk_gpt *vptr; struct uuid uuid; + struct dk_cinfo dki_info; if (read_disk_info(fd, &capacity, &lbsize) != 0) { if (efi_debug) @@ -149,6 +247,31 @@ efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) "couldn't read disk information\n"); return (-1); } +#if defined(__linux__) + if (efi_get_info(fd, &dki_info) != 0) { + if (efi_debug) + (void) fprintf(stderr, + "couldn't read disk information\n"); + return (-1); + } + + if (dki_info.dki_partition != 0) { + if (efi_debug) + (void) fprintf(stderr, + "can only partition whole devices\n"); + return (-1); + } + + if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) || + (dki_info.dki_ctype == DKC_VBD) || + (dki_info.dki_ctype == DKC_UNKNOWN)) { + if (efi_debug) + (void) fprintf(stderr, + "unpartitionable device type %d\n", + dki_info.dki_ctype); + return (-1); + } +#endif nblocks = NBLOCKS(nparts, lbsize); if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) { @@ -244,14 +367,117 @@ efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc) { void *data = dk_ioc->dki_data; int error; +#if defined(__linux__) + diskaddr_t capacity; + uint_t lbsize; + /* + * When the IO is not being performed in kernel as an ioctl we need + * to know the sector size so we can seek to the proper byte offset. + */ + if (read_disk_info(fd, &capacity, &lbsize) == -1) { + if (efi_debug) + fprintf(stderr,"unable to read disk info: %d",errno); + + errno = EIO; + return -1; + } + + switch (cmd) { + case DKIOCGETEFI: + if (lbsize == 0) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI assuming " + "LBA %d bytes\n", DEV_BSIZE); + + lbsize = DEV_BSIZE; + } + + error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); + if (error == -1) + return error; + + error = read(fd, data, dk_ioc->dki_length); + if (error == -1) + return error; + + if (error != dk_ioc->dki_length) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCGETEFI short " + "read of %d bytes\n", error); + errno = EIO; + return -1; + } + error = 0; + break; + + case DKIOCSETEFI: + if (lbsize == 0) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI unknown " + "LBA size\n"); + errno = EIO; + return -1; + } + + error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); + if (error == -1) + return error; + + error = write(fd, data, dk_ioc->dki_length); + if (error == -1) + return error; + + if (error != dk_ioc->dki_length) { + if (efi_debug) + (void) fprintf(stderr, "DKIOCSETEFI short " + "write of %d bytes\n", error); + errno = EIO; + return -1; + } + + error = fdatasync(fd); + if (error == -1) + return error; + + error = 0; + break; + + default: + if (efi_debug) + (void) fprintf(stderr, "unsupported ioctl()\n"); + + errno = EIO; + return -1; + } +#else dk_ioc->dki_data_64 = (uint64_t)(uintptr_t)data; error = ioctl(fd, cmd, (void *)dk_ioc); dk_ioc->dki_data = data; - +#endif return (error); } +#if defined(__linux__) +static int +efi_rescan(int fd) +{ + int retry = 5; + int error; + + /* Notify the kernel a devices partition table has been updated */ + while ((error = ioctl(fd, BLKRRPART)) != 0) { + if (--retry == 0) { + (void) fprintf(stderr, "the kernel failed to rescan " + "the partition table: %d\n", errno); + return (-1); + } + } + + return (0); +} +#endif + static int check_label(int fd, dk_efi_t *dk_ioc) { @@ -306,6 +532,8 @@ efi_read(int fd, struct dk_gpt *vtoc) int rval = 0; int md_flag = 0; int vdc_flag = 0; + diskaddr_t capacity = 0; + uint_t lbsize = 0; struct dk_minfo disk_info; dk_efi_t dk_ioc; efi_gpt_t *efi; @@ -317,19 +545,9 @@ efi_read(int fd, struct dk_gpt *vtoc) /* * get the partition number for this file descriptor. */ - if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) { - if (efi_debug) { - (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); - } - switch (errno) { - case EIO: - return (VT_EIO); - case EINVAL: - return (VT_EINVAL); - default: - return (VT_ERROR); - } - } + if ((rval = efi_get_info(fd, &dki_info)) != 0) + return rval; + if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && (strncmp(dki_info.dki_dname, "md", 3) == 0)) { md_flag++; @@ -343,14 +561,18 @@ efi_read(int fd, struct dk_gpt *vtoc) } /* get the LBA size */ - if (ioctl(fd, DKIOCGMEDIAINFO, (caddr_t)&disk_info) == -1) { + if (read_disk_info(fd, &capacity, &lbsize) == -1) { if (efi_debug) { (void) fprintf(stderr, - "assuming LBA 512 bytes %d\n", - errno); + "unable to read disk info: %d", + errno); } - disk_info.dki_lbsize = DEV_BSIZE; + return (VT_EINVAL); } + + disk_info.dki_lbsize = lbsize; + disk_info.dki_capacity = capacity; + if (disk_info.dki_lbsize == 0) { if (efi_debug) { (void) fprintf(stderr, @@ -840,22 +1062,13 @@ efi_write(int fd, struct dk_gpt *vtoc) efi_gpe_t *efi_parts; int i, j; struct dk_cinfo dki_info; + int rval; int md_flag = 0; int nblocks; diskaddr_t lba_backup_gpt_hdr; - if (ioctl(fd, DKIOCINFO, (caddr_t)&dki_info) == -1) { - if (efi_debug) - (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); - switch (errno) { - case EIO: - return (VT_EIO); - case EINVAL: - return (VT_EINVAL); - default: - return (VT_ERROR); - } - } + if ((rval = efi_get_info(fd, &dki_info)) != 0) + return rval; /* check if we are dealing wih a metadevice */ if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && @@ -942,6 +1155,10 @@ efi_write(int fd, struct dk_gpt *vtoc) return (VT_EINVAL); } + /* Zero's should be written for empty partitions */ + if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) + continue; + efi_parts[i].efi_gpe_StartingLBA = LE_64(vtoc->efi_parts[i].p_start); efi_parts[i].efi_gpe_EndingLBA = @@ -1033,6 +1250,13 @@ efi_write(int fd, struct dk_gpt *vtoc) /* write the PMBR */ (void) write_pmbr(fd, vtoc); free(dk_ioc.dki_data); + +#if defined(__linux__) + rval = efi_rescan(fd); + if (rval) + return (VT_ERROR); +#endif + return (0); } @@ -1050,6 +1274,7 @@ efi_free(struct dk_gpt *ptr) int efi_type(int fd) { +#if 0 struct vtoc vtoc; struct extvtoc extvtoc; @@ -1063,6 +1288,9 @@ efi_type(int fd) } } return (0); +#else + return (ENOSYS); +#endif } void @@ -1176,7 +1404,7 @@ efi_auto_sense(int fd, struct dk_gpt **vtoc) return (-1); } - for (i = 0; i < min((*vtoc)->efi_nparts, V_NUMPAR); i++) { + for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) { (*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag; (*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag; (*vtoc)->efi_parts[i].p_start = 0; diff --git a/lib/libzfs/include/libzfs.h b/lib/libzfs/include/libzfs.h index 8379970733..fc32086ae5 100644 --- a/lib/libzfs/include/libzfs.h +++ b/lib/libzfs/include/libzfs.h @@ -56,11 +56,15 @@ extern "C" { #if defined(__sun__) || defined(__sun) #define DISK_ROOT "/dev/dsk" #define RDISK_ROOT "/dev/rdsk" +#define FIRST_SLICE "s0" #define BACKUP_SLICE "s2" #endif #ifdef __linux__ #define DISK_ROOT "/dev" +#define RDISK_ROOT DISK_ROOT +#define FIRST_SLICE "1" +#define BACKUP_SLICE "" #endif /* diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index 496883ae89..a18155aabb 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -634,9 +634,12 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp) /* * Don't start the slice at the default block of 34; many storage - * devices will use a stripe width of 128k, so start there instead. + * devices will use a stripe width of 128k, other vendors prefer a 1m + * alignment. It is best to play it safe and ensure a 1m alignment + * give 512b blocks. When the block size is larger by a power of 2 + * we will still be 1m aligned. */ -#define NEW_START_BLOCK 256 +#define NEW_START_BLOCK 2048 /* * Validate the given pool name, optionally putting an extended error message in @@ -1758,6 +1761,7 @@ is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type) static int zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) { +#if 0 char path[MAXPATHLEN]; char errbuf[1024]; int fd, error; @@ -1788,6 +1792,12 @@ zpool_relabel_disk(libzfs_handle_t *hdl, const char *name) return (zfs_error(hdl, EZFS_NOCAP, errbuf)); } return (0); +#else + char errbuf[1024]; + zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot " + "relabel '%s/%s': libefi is unsupported"), DISK_ROOT, name); + return (zfs_error(hdl, EZFS_NOTSUP, errbuf)); +#endif } /* @@ -3117,7 +3127,10 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) uint64_t slice_size; diskaddr_t start_block; char errbuf[1024]; - +#if defined(__linux__) + struct stat64 statbuf; + int i; +#endif /* prepare an error message just in case */ (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot label '%s'"), name); @@ -3153,6 +3166,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) * This shouldn't happen. We've long since verified that this * is a valid device. */ + printf("errno =%d\n", errno); zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "unable to open device")); return (zfs_error(hdl, EZFS_OPENFAILED, errbuf)); @@ -3214,6 +3228,24 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name) (void) close(fd); efi_free(vtoc); + +#if defined(__linux__) + /* + * The efi partition table has been successfully written and the + * kernel notified. However, it still may take a moment for udev + * to notice the devfs update and properly populate /dev/. We will + * wait up to 3 seconds which is far far far longer than needed. + */ + (void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name, + FIRST_SLICE); + for (i = 0; i < 3000; i++) { + if (stat64(path, &statbuf) == 0 || errno != ENOENT) + break; + + usleep(1000); + } +#endif + return (0); }