libzutil: optimize zpool_read_label with AIO

Read all labels in parallel instead of sequentially.

Originally committed as
https://cgit.freebsd.org/src/commit/?id=b49e9abcf44cafaf5cfad7029c9a6adbb28346e8

Obtained from: FreeBSD
Sponsored by: Spectra Logic, Axcient
Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alek Pinchuk <apinchuk@axcient.com>
Signed-off-by: Alan Somers <asomers@gmail.com>
Closes #11467
This commit is contained in:
Alan Somers 2021-01-13 10:00:12 -07:00 committed by Brian Behlendorf
parent ec40ce8405
commit e50b5217e7
2 changed files with 45 additions and 11 deletions

View File

@ -45,7 +45,8 @@ libzutil_la_LIBADD = \
if BUILD_LINUX if BUILD_LINUX
libzutil_la_LIBADD += \ libzutil_la_LIBADD += \
$(abs_top_builddir)/lib/libefi/libefi.la $(abs_top_builddir)/lib/libefi/libefi.la \
-lrt
endif endif
libzutil_la_LIBADD += -lm $(LIBBLKID_LIBS) $(LIBUDEV_LIBS) libzutil_la_LIBADD += -lm $(LIBBLKID_LIBS) $(LIBUDEV_LIBS)

View File

@ -46,6 +46,7 @@
* using our derived config, and record the results. * using our derived config, and record the results.
*/ */
#include <aio.h>
#include <ctype.h> #include <ctype.h>
#include <dirent.h> #include <dirent.h>
#include <errno.h> #include <errno.h>
@ -887,11 +888,12 @@ int
zpool_read_label(int fd, nvlist_t **config, int *num_labels) zpool_read_label(int fd, nvlist_t **config, int *num_labels)
{ {
struct stat64 statbuf; struct stat64 statbuf;
int l, count = 0; struct aiocb aiocbs[VDEV_LABELS];
vdev_phys_t *label; struct aiocb *aiocbps[VDEV_LABELS];
vdev_phys_t *labels;
nvlist_t *expected_config = NULL; nvlist_t *expected_config = NULL;
uint64_t expected_guid = 0, size; uint64_t expected_guid = 0, size;
int error; int error, l, count = 0;
*config = NULL; *config = NULL;
@ -899,20 +901,51 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels)
return (0); return (0);
size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t); size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
error = posix_memalign((void **)&label, PAGESIZE, sizeof (*label)); error = posix_memalign((void **)&labels, PAGESIZE,
VDEV_LABELS * sizeof (*labels));
if (error) if (error)
return (-1); return (-1);
memset(aiocbs, 0, sizeof (aiocbs));
for (l = 0; l < VDEV_LABELS; l++) { for (l = 0; l < VDEV_LABELS; l++) {
uint64_t state, guid, txg;
off_t offset = label_offset(size, l) + VDEV_SKIP_SIZE; off_t offset = label_offset(size, l) + VDEV_SKIP_SIZE;
if (pread64(fd, label, sizeof (vdev_phys_t), aiocbs[l].aio_fildes = fd;
offset) != sizeof (vdev_phys_t)) aiocbs[l].aio_offset = offset;
aiocbs[l].aio_buf = &labels[l];
aiocbs[l].aio_nbytes = sizeof (vdev_phys_t);
aiocbs[l].aio_lio_opcode = LIO_READ;
aiocbps[l] = &aiocbs[l];
}
if (lio_listio(LIO_WAIT, aiocbps, VDEV_LABELS, NULL) != 0) {
int saved_errno = errno;
if (errno == EAGAIN || errno == EINTR || errno == EIO) {
/*
* A portion of the requests may have been submitted.
* Clean them up.
*/
for (l = 0; l < VDEV_LABELS; l++) {
errno = 0;
int r = aio_error(&aiocbs[l]);
if (r != EINVAL)
(void) aio_return(&aiocbs[l]);
}
}
free(labels);
errno = saved_errno;
return (-1);
}
for (l = 0; l < VDEV_LABELS; l++) {
uint64_t state, guid, txg;
if (aio_return(&aiocbs[l]) != sizeof (vdev_phys_t))
continue; continue;
if (nvlist_unpack(label->vp_nvlist, if (nvlist_unpack(labels[l].vp_nvlist,
sizeof (label->vp_nvlist), config, 0) != 0) sizeof (labels[l].vp_nvlist), config, 0) != 0)
continue; continue;
if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID, if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_GUID,
@ -949,7 +982,7 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels)
if (num_labels != NULL) if (num_labels != NULL)
*num_labels = count; *num_labels = count;
free(label); free(labels);
*config = expected_config; *config = expected_config;
return (0); return (0);