zfs_ioc_send: use a dedicated taskq thread for send

When stack space is tight, the stream is written to its target on a
separate taskq thread to make sure there's enough stack space to
complete it.

This has always used an IO taskq, but that doesn't really make sense for
it, and moving it onto a regular taskq lets us get rid of
spa_taskq_dispatch_sync(), which is not used anywhere else.

Stream writes may block for a long time depending on what the target is,
and we have no way of discovering this, so we can't risk using the
system taskq, as there may be many tens of sends in progress. Instead,
we create a dedicated taskq thread for each send writer to run on, and
clean it up when it's done.

Sponsored-by: Klara, Inc.
Sponsored-by: Wasabi Technology, Inc.
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #16151
This commit is contained in:
Rob Norris 2024-05-02 11:57:23 +10:00 committed by Brian Behlendorf
parent b64afa41d5
commit cc38691534
1 changed files with 70 additions and 33 deletions

View File

@ -38,7 +38,7 @@
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
* Copyright (c) 2019, 2021, Klara Inc.
* Copyright (c) 2019, 2021, 2024, Klara Inc.
* Copyright (c) 2019, Allan Jude
* Copyright 2024 Oxide Computer Company
*/
@ -5514,6 +5514,14 @@ out:
return (error);
}
/*
* When stack space is limited, we write replication stream data to the target
* on a separate taskq thread, to make sure there's enough stack space.
*/
#ifndef HAVE_LARGE_STACKS
#define USE_SEND_TASKQ 1
#endif
typedef struct dump_bytes_io {
zfs_file_t *dbi_fp;
caddr_t dbi_buf;
@ -5534,31 +5542,65 @@ dump_bytes_cb(void *arg)
dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
}
typedef struct dump_bytes_arg {
zfs_file_t *dba_fp;
#ifdef USE_SEND_TASKQ
taskq_t *dba_tq;
taskq_ent_t dba_tqent;
#endif
} dump_bytes_arg_t;
static int
dump_bytes(objset_t *os, void *buf, int len, void *arg)
{
dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg;
dump_bytes_io_t dbi;
dbi.dbi_fp = arg;
dbi.dbi_fp = dba->dba_fp;
dbi.dbi_buf = buf;
dbi.dbi_len = len;
#if defined(HAVE_LARGE_STACKS)
dump_bytes_cb(&dbi);
#ifdef USE_SEND_TASKQ
taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP,
&dba->dba_tqent);
taskq_wait(dba->dba_tq);
#else
/*
* The vn_rdwr() call is performed in a taskq to ensure that there is
* always enough stack space to write safely to the target filesystem.
* The ZIO_TYPE_FREE threads are used because there can be a lot of
* them and they are used in vdev_file.c for a similar purpose.
*/
spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
#endif /* HAVE_LARGE_STACKS */
dump_bytes_cb(&dbi);
#endif
return (dbi.dbi_err);
}
static int
dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out)
{
zfs_file_t *fp = zfs_file_get(fd);
if (fp == NULL)
return (SET_ERROR(EBADF));
dba->dba_fp = fp;
#ifdef USE_SEND_TASKQ
dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0);
taskq_init_ent(&dba->dba_tqent);
#endif
memset(out, 0, sizeof (dmu_send_outparams_t));
out->dso_outfunc = dump_bytes;
out->dso_arg = dba;
out->dso_dryrun = B_FALSE;
return (0);
}
static void
dump_bytes_fini(dump_bytes_arg_t *dba)
{
zfs_file_put(dba->dba_fp);
#ifdef USE_SEND_TASKQ
taskq_destroy(dba->dba_tq);
#endif
}
/*
* inputs:
* zc_name name of snapshot to send
@ -5643,21 +5685,18 @@ zfs_ioc_send(zfs_cmd_t *zc)
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
} else {
zfs_file_t *fp;
dmu_send_outparams_t out = {0};
dump_bytes_arg_t dba;
dmu_send_outparams_t out;
error = dump_bytes_init(&dba, zc->zc_cookie, &out);
if (error)
return (error);
if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
out.dso_outfunc = dump_bytes;
out.dso_arg = fp;
out.dso_dryrun = B_FALSE;
off = zfs_file_off(dba.dba_fp);
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
zc->zc_fromobj, embedok, large_block_ok, compressok,
rawok, savedok, zc->zc_cookie, &off, &out);
zfs_file_put(fp);
dump_bytes_fini(&dba);
}
return (error);
}
@ -6604,7 +6643,6 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
offset_t off;
const char *fromname = NULL;
int fd;
zfs_file_t *fp;
boolean_t largeblockok;
boolean_t embedok;
boolean_t compressok;
@ -6629,20 +6667,19 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
if ((fp = zfs_file_get(fd)) == NULL)
return (SET_ERROR(EBADF));
dump_bytes_arg_t dba;
dmu_send_outparams_t out;
error = dump_bytes_init(&dba, fd, &out);
if (error)
return (error);
off = zfs_file_off(fp);
dmu_send_outparams_t out = {0};
out.dso_outfunc = dump_bytes;
out.dso_arg = fp;
out.dso_dryrun = B_FALSE;
off = zfs_file_off(dba.dba_fp);
error = dmu_send(snapname, fromname, embedok, largeblockok,
compressok, rawok, savedok, resumeobj, resumeoff,
redactbook, fd, &off, &out);
zfs_file_put(fp);
dump_bytes_fini(&dba);
return (error);
}