Command 'zpool create' needs to wait on correct partition names.

When creating partition tables we always need to wait until not
only the /dev/<disk><part> device appears.  But just as importantly
if we were originally given a udev path we need to wait for the
/dev/disk/*/<name>-part<part> symlink to be created.  However,
since the partition naming convention differs between /dev/ and
/dev/disk we determine based on the path which convention to
expect and then wait (for a few seconds) for the device to be
created.  Based on my experience with udev on my test nodes it
takes about 300ms for the devices to be created after being
prompted by the kernel.  This time will vary somehwat based
on how complicated your udev rules are, so for safety I threw
in a factor of 10.  We wait 3 seconds for the devices to appears
before erroring out with a failure.

An additional minor fix includes checking the force flag in the
EFI_GPT_PRIMARY_CORRUPT case.  This allows you to force the
update even in the corrupt partition case.

Finally, since these are Linux only changes I've dropped the
devid code entirely here because I still can't think of why we
would need or want it on a Linux system.
This commit is contained in:
Brian Behlendorf 2009-10-21 11:50:42 -07:00
parent 5be28776fb
commit aec988734b
3 changed files with 55 additions and 59 deletions

View File

@ -279,7 +279,7 @@ check_disk(const char *path, blkid_cache cache, int force,
* label at the end of the device is intact. Rather than use this * label at the end of the device is intact. Rather than use this
* label we should play it safe and treat this as a non efi device. * label we should play it safe and treat this as a non efi device.
*/ */
if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) { if (!force && vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
vdev_error(gettext( vdev_error(gettext(
"%s contains a corrupt primary efi partition table. " "%s contains a corrupt primary efi partition table. "
"If you are\nsure you want to use this device use " "If you are\nsure you want to use this device use "
@ -938,12 +938,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
char *type, *path, *diskname; char *type, *path, *diskname;
char buf[MAXPATHLEN]; char buf[MAXPATHLEN];
uint64_t wholedisk; uint64_t wholedisk;
int fd;
int ret; int ret;
#if defined(__sun__) || defined(__sun)
ddi_devid_t devid;
char *minor = NULL, *devid_str = NULL;
#endif
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0); verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
@ -983,50 +978,34 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
return (-1); return (-1);
/* /*
* Fill in the devid, now that we've labeled the disk. We * Now the we've labeled the disk and the partitions have
* attempt to open the new zfs slice first by appending the * been created. We still need to wait for udev to create
* slice number. If that fails this may be a Linux udev * the symlinks to those partitions. If we are accessing
* path in which case the -part# convention is tried. * the devices via a udev disk path, /dev/disk, then wait
* for *-part# to be created. Otherwise just use the normal
* syntax for devices in /dev.
*/ */
(void) snprintf(buf, sizeof (buf), "%s%s", path, FIRST_SLICE); if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
if ((fd = open(buf, O_RDONLY)) < 0) { (void) snprintf(buf, sizeof (buf),
"%s%s%s", path, "-part", FIRST_SLICE);
else
(void) snprintf(buf, sizeof (buf),
"%s%s", path, FIRST_SLICE);
(void) snprintf(buf, sizeof (buf), "%s%s%s", if ((ret = zpool_label_disk_wait(buf, 1000)) != 0) {
path, "-part", FIRST_SLICE); (void) fprintf(stderr,
if ((fd = open(buf, O_RDONLY)) < 0) { gettext( "cannot resolve path '%s'\n"), buf);
(void) fprintf(stderr, return (-1);
gettext("cannot open '%s': %s\n"),
buf, strerror(errno));
return (-1);
}
} }
#if defined(__sun__) || defined(__sun)
if (devid_get(fd, &devid) == 0) {
if (devid_get_minor_name(fd, &minor) == 0 &&
(devid_str = devid_str_encode(devid, minor)) !=
NULL) {
verify(nvlist_add_string(nv,
ZPOOL_CONFIG_DEVID, devid_str) == 0);
}
if (devid_str != NULL)
devid_str_free(devid_str);
if (minor != NULL)
devid_str_free(minor);
devid_free(devid);
}
#endif
/* /*
* Update the path to refer to the 's0' slice. The presence of * Update the path to refer to FIRST_SLICE. The presence of
* the 'whole_disk' field indicates to the CLI that we should * the 'whole_disk' field indicates to the CLI that we should
* chop off the slice number when displaying the device in * chop off the slice number when displaying the device in
* future output. * future output.
*/ */
verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0); verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
(void) close(fd);
/* Just in case this partition already existed. */ /* Just in case this partition already existed. */
(void) zero_label(buf); (void) zero_label(buf);

View File

@ -56,6 +56,7 @@ extern "C" {
#if defined(__sun__) || defined(__sun) #if defined(__sun__) || defined(__sun)
#define DISK_ROOT "/dev/dsk" #define DISK_ROOT "/dev/dsk"
#define RDISK_ROOT "/dev/rdsk" #define RDISK_ROOT "/dev/rdsk"
#define UDISK_ROOT RDISK_ROOT
#define FIRST_SLICE "s0" #define FIRST_SLICE "s0"
#define BACKUP_SLICE "s2" #define BACKUP_SLICE "s2"
#endif #endif
@ -63,6 +64,7 @@ extern "C" {
#ifdef __linux__ #ifdef __linux__
#define DISK_ROOT "/dev" #define DISK_ROOT "/dev"
#define RDISK_ROOT DISK_ROOT #define RDISK_ROOT DISK_ROOT
#define UDISK_ROOT "/dev/disk"
#define FIRST_SLICE "1" #define FIRST_SLICE "1"
#define BACKUP_SLICE "" #define BACKUP_SLICE ""
#endif #endif
@ -253,6 +255,7 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
boolean_t *, boolean_t *); boolean_t *, boolean_t *);
extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *, extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
boolean_t *, boolean_t *, boolean_t *); boolean_t *, boolean_t *, boolean_t *);
extern int zpool_label_disk_wait(char *, int);
extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *); extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
/* /*

View File

@ -3116,6 +3116,33 @@ find_start_block(nvlist_t *config)
return (MAXOFFSET_T); return (MAXOFFSET_T);
} }
int
zpool_label_disk_wait(char *path, int timeout)
{
#if defined(__linux__)
struct stat64 statbuf;
int i;
/*
* Wait timeout miliseconds for a newly created device to be available
* from the given path. There is a small window when a /dev/ device
* will exist and the udev link will not, so we must wait for the
* symlink. Depending on the udev rules this may take a few seconds.
*/
for (i = 0; i < timeout; i++) {
usleep(1000);
errno = 0;
if ((stat64(path, &statbuf) == 0) && (errno == 0))
return (0);
}
return (ENOENT);
#else
return (0);
#endif
}
/* /*
* Label an individual disk. The name provided is the short name, * Label an individual disk. The name provided is the short name,
* stripped of any leading /dev path. * stripped of any leading /dev path.
@ -3130,10 +3157,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
uint64_t slice_size; uint64_t slice_size;
diskaddr_t start_block; diskaddr_t start_block;
char errbuf[1024]; char errbuf[1024];
#if defined(__linux__)
struct stat64 statbuf;
int i;
#endif
/* prepare an error message just in case */ /* prepare an error message just in case */
(void) snprintf(errbuf, sizeof (errbuf), (void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot label '%s'"), name); dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
@ -3233,23 +3257,13 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
efi_free(vtoc); efi_free(vtoc);
#if defined(__linux__) #if defined(__linux__)
/* /* Wait for the first expected slice to appear */
* The efi partition table has been successfully written and the (void) snprintf(path, sizeof (path), "%s/%s%s",
* kernel notified. However, it still may take a moment for udev DISK_ROOT, name, FIRST_SLICE);
* to notice the devfs update and properly populate /dev/. We will return zpool_label_disk_wait(path, 3000);
* wait up to 3 seconds which is far far far longer than needed. #else
*/
(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
FIRST_SLICE);
for (i = 0; i < 3000; i++) {
if (stat64(path, &statbuf) == 0 || errno != ENOENT)
break;
usleep(1000);
}
#endif
return (0); return (0);
#endif
} }
static boolean_t static boolean_t