From 37abac6d559a1da8ab8e5379442f491b73998f6a Mon Sep 17 00:00:00 2001 From: Bill Pijewski Date: Wed, 9 May 2012 15:05:14 -0700 Subject: [PATCH] Illumos #2703: add mechanism to report ZFS send progress Reviewed by: Matt Ahrens Reviewed by: Robert Mustacchi Reviewed by: Richard Lowe Approved by: Eric Schrock References: https://www.illumos.org/issues/2703 Ported by: Martin Matuska Signed-off-by: Brian Behlendorf --- cmd/zfs/zfs_main.c | 2 + include/libzfs.h | 4 + include/sys/dmu.h | 5 +- include/sys/dmu_impl.h | 28 ++++ include/sys/dsl_dataset.h | 4 + include/sys/fs/zfs.h | 2 + include/sys/zfs_context.h | 2 + lib/libzfs/libzfs_sendrecv.c | 82 +++++++++++- man/man8/zfs.8 | 4 +- module/zfs/dmu_send.c | 247 ++++++++++++++++++----------------- module/zfs/dsl_dataset.c | 7 + module/zfs/zfs_ioctl.c | 52 +++++++- 12 files changed, 312 insertions(+), 127 deletions(-) diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 1cb2ac9587..797c7a591a 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2012 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -3551,6 +3552,7 @@ zfs_do_send(int argc, char **argv) if (flags.verbose) extraverbose = B_TRUE; flags.verbose = B_TRUE; + flags.progress = B_TRUE; break; case 'D': flags.dedup = B_TRUE; diff --git a/include/libzfs.h b/include/libzfs.h index 9d0e8ce434..75e149334a 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _LIBZFS_H @@ -580,6 +581,9 @@ typedef struct sendflags { /* parsable verbose output (ie. -P) */ boolean_t parsable; + + /* show progress (ie. -v) */ + boolean_t progress; } sendflags_t; typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *); diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 5b2e25b78c..fe317c835b 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -705,8 +706,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp, void dmu_traverse_objset(objset_t *os, uint64_t txg_start, dmu_traverse_cb_t cb, void *arg); -int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - struct vnode *vp, offset_t *off); +int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + int outfd, struct vnode *vp, offset_t *off); int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign, uint64_t *sizep); diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 0b8748df10..f13a2a37ce 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_DMU_IMPL_H @@ -30,6 +31,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { @@ -239,6 +241,32 @@ typedef struct dmu_xuio { iovec_t *iovp; } dmu_xuio_t; +/* + * The list of data whose inclusion in a send stream can be pending from + * one call to backup_cb to another. Multiple calls to dump_free() and + * dump_freeobjects() can be aggregated into a single DRR_FREE or + * DRR_FREEOBJECTS replay record. + */ +typedef enum { + PENDING_NONE, + PENDING_FREE, + PENDING_FREEOBJECTS +} dmu_pendop_t; + +typedef struct dmu_sendarg { + list_node_t dsa_link; + dmu_replay_record_t *dsa_drr; + vnode_t *dsa_vp; + int dsa_outfd; + proc_t *dsa_proc; + offset_t *dsa_off; + objset_t *dsa_os; + zio_cksum_t dsa_zc; + uint64_t dsa_toguid; + int dsa_err; + dmu_pendop_t dsa_pending_op; +} dmu_sendarg_t; + #ifdef __cplusplus } #endif diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index 948abb020d..7cff7e3a05 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_DSL_DATASET_H @@ -150,6 +151,9 @@ typedef struct dsl_dataset { uint64_t ds_reserved; /* cached refreservation */ uint64_t ds_quota; /* cached refquota */ + kmutex_t ds_sendstream_lock; + list_t ds_sendstreams; + /* Protected by ds_lock; keep at end of struct for better locality */ char ds_snapname[MAXNAMELEN]; } dsl_dataset_t; diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 5238379920..f72c74fc93 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ /* Portions Copyright 2010 Robert Milkowski */ @@ -803,6 +804,7 @@ typedef enum zfs_ioc { ZFS_IOC_SPACE_WRITTEN, ZFS_IOC_SPACE_SNAPS, ZFS_IOC_POOL_REOPEN, + ZFS_IOC_SEND_PROGRESS, } zfs_ioc_t; /* diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 8982c000b1..de8b943be5 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -24,6 +24,7 @@ */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_ZFS_CONTEXT_H @@ -201,6 +202,7 @@ typedef struct proc { } proc_t; extern struct proc p0; +#define curproc (&p0) typedef void (*thread_func_t)(void *); typedef void (*thread_func_arg_t)(void *); diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c index c5d963a324..9dbfb1641a 100644 --- a/lib/libzfs/libzfs_sendrecv.c +++ b/lib/libzfs/libzfs_sendrecv.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 Pawel Jakub Dawidek . + * Copyright (c) 2012, Joyent, Inc. All rights reserved. * All rights reserved */ @@ -44,6 +45,7 @@ #include #include #include +#include #include @@ -69,6 +71,12 @@ typedef struct dedup_arg { libzfs_handle_t *dedup_hdl; } dedup_arg_t; +typedef struct progress_arg { + zfs_handle_t *pa_zhp; + int pa_fd; + boolean_t pa_parsable; +} progress_arg_t; + typedef struct dataref { uint64_t ref_guid; uint64_t ref_object; @@ -787,7 +795,7 @@ typedef struct send_dump_data { char prevsnap[ZFS_MAXNAMELEN]; uint64_t prevsnap_obj; boolean_t seenfrom, seento, replicate, doall, fromorigin; - boolean_t verbose, dryrun, parsable; + boolean_t verbose, dryrun, parsable, progress; int outfd; boolean_t err; nvlist_t *fss; @@ -979,10 +987,60 @@ hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) return (error); } +static void * +send_progress_thread(void *arg) +{ + progress_arg_t *pa = arg; + + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; + zfs_handle_t *zhp = pa->pa_zhp; + libzfs_handle_t *hdl = zhp->zfs_hdl; + unsigned long long bytes; + char buf[16]; + + time_t t; + struct tm *tm; + + assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); + (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name)); + + if (!pa->pa_parsable) + (void) fprintf(stderr, "TIME SENT SNAPSHOT\n"); + + /* + * Print the progress from ZFS_IOC_SEND_PROGRESS every second. + */ + for (;;) { + (void) sleep(1); + + zc.zc_cookie = pa->pa_fd; + if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0) + return ((void *)-1); + + (void) time(&t); + tm = localtime(&t); + bytes = zc.zc_cookie; + + if (pa->pa_parsable) { + (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", + tm->tm_hour, tm->tm_min, tm->tm_sec, + bytes, zhp->zfs_name); + } else { + zfs_nicenum(bytes, buf, sizeof (buf)); + (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", + tm->tm_hour, tm->tm_min, tm->tm_sec, + buf, zhp->zfs_name); + } + } +} + static int dump_snapshot(zfs_handle_t *zhp, void *arg) { send_dump_data_t *sdd = arg; + progress_arg_t pa = { 0 }; + pthread_t tid; + char *thissnap; int err; boolean_t isfromsnap, istosnap, fromorigin; @@ -1100,8 +1158,29 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) } if (!sdd->dryrun) { + /* + * If progress reporting is requested, spawn a new thread to + * poll ZFS_IOC_SEND_PROGRESS at a regular interval. + */ + if (sdd->progress) { + pa.pa_zhp = zhp; + pa.pa_fd = sdd->outfd; + pa.pa_parsable = sdd->parsable; + + if ((err = pthread_create(&tid, NULL, + send_progress_thread, &pa))) { + zfs_close(zhp); + return (err); + } + } + err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj, fromorigin, sdd->outfd, sdd->debugnv); + + if (sdd->progress) { + (void) pthread_cancel(tid); + (void) pthread_join(tid, NULL); + } } (void) strcpy(sdd->prevsnap, thissnap); @@ -1445,6 +1524,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap, sdd.fsavl = fsavl; sdd.verbose = flags->verbose; sdd.parsable = flags->parsable; + sdd.progress = flags->progress; sdd.dryrun = flags->dryrun; sdd.filter_cb = filter_func; sdd.filter_cb_arg = cb_arg; diff --git a/man/man8/zfs.8 b/man/man8/zfs.8 index 508270634f..7b355e0836 100644 --- a/man/man8/zfs.8 +++ b/man/man8/zfs.8 @@ -2,6 +2,7 @@ .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved. .\" Copyright (c) 2012 by Delphix. All rights reserved. .\" Copyright (c) 2012 Nexenta Systems, Inc. All Rights Reserved. +.\" Copyright (c) 2012, Joyent, Inc. All rights reserved. .\" Copyright 2011 Joshua M. Clulow .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. .\" See the License for the specific language governing permissions and limitations under the License. When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with @@ -2306,7 +2307,8 @@ Generate a stream package that sends all intermediary snapshots from the first s .ad .sp .6 .RS 4n -Print verbose information about the stream package generated. +Print verbose information about the stream package generated. This information +includes a per-second report of how much data has been sent. .RE .sp diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index 9f9003744b..949f4d7737 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -23,6 +23,7 @@ * Copyright (c) 2011 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -52,48 +53,30 @@ int zfs_send_corrupt_data = B_FALSE; static char *dmu_recv_tag = "dmu_recv_tag"; -/* - * The list of data whose inclusion in a send stream can be pending from - * one call to backup_cb to another. Multiple calls to dump_free() and - * dump_freeobjects() can be aggregated into a single DRR_FREE or - * DRR_FREEOBJECTS replay record. - */ -typedef enum { - PENDING_NONE, - PENDING_FREE, - PENDING_FREEOBJECTS -} pendop_t; - -struct backuparg { - dmu_replay_record_t *drr; - vnode_t *vp; - offset_t *off; - objset_t *os; - zio_cksum_t zc; - uint64_t toguid; - int err; - pendop_t pending_op; -}; - static int -dump_bytes(struct backuparg *ba, void *buf, int len) +dump_bytes(dmu_sendarg_t *dsp, void *buf, int len) { + dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset; ssize_t resid; /* have to get resid to get detailed errno */ ASSERT3U(len % 8, ==, 0); - fletcher_4_incremental_native(buf, len, &ba->zc); - ba->err = vn_rdwr(UIO_WRITE, ba->vp, + fletcher_4_incremental_native(buf, len, &dsp->dsa_zc); + dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp, (caddr_t)buf, len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); - *ba->off += len; - return (ba->err); + + mutex_enter(&ds->ds_sendstream_lock); + *dsp->dsa_off += len; + mutex_exit(&ds->ds_sendstream_lock); + + return (dsp->dsa_err); } static int -dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, +dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, uint64_t length) { - struct drr_free *drrf = &(ba->drr->drr_u.drr_free); + struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free); if (length != -1ULL && offset + length < offset) length = -1ULL; @@ -105,13 +88,15 @@ dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, * other DRR_FREE records. DRR_FREEOBJECTS records can only be * aggregated with other DRR_FREEOBJECTS records. */ - if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE && + dsp->dsa_pending_op != PENDING_FREE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } - if (ba->pending_op == PENDING_FREE) { + if (dsp->dsa_pending_op == PENDING_FREE) { /* * There should never be a PENDING_FREE if length is -1 * (because dump_dnode is the only place where this @@ -129,34 +114,35 @@ dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, return (0); } else { /* not a continuation. Push out pending record */ - if (dump_bytes(ba, ba->drr, + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } } /* create a FREE record and make it pending */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_FREE; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_FREE; drrf->drr_object = object; drrf->drr_offset = offset; drrf->drr_length = length; - drrf->drr_toguid = ba->toguid; + drrf->drr_toguid = dsp->dsa_toguid; if (length == -1ULL) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); } else { - ba->pending_op = PENDING_FREE; + dsp->dsa_pending_op = PENDING_FREE; } return (0); } static int -dump_data(struct backuparg *ba, dmu_object_type_t type, +dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type, uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data) { - struct drr_write *drrw = &(ba->drr->drr_u.drr_write); + struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write); /* @@ -165,19 +151,20 @@ dump_data(struct backuparg *ba, dmu_object_type_t type, * the stream, since aggregation can't be done across operations * of different types. */ - if (ba->pending_op != PENDING_NONE) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } /* write a DATA record */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_WRITE; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_WRITE; drrw->drr_object = object; drrw->drr_type = type; drrw->drr_offset = offset; drrw->drr_length = blksz; - drrw->drr_toguid = ba->toguid; + drrw->drr_toguid = dsp->dsa_toguid; drrw->drr_checksumtype = BP_GET_CHECKSUM(bp); if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup) drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP; @@ -186,42 +173,43 @@ dump_data(struct backuparg *ba, dmu_object_type_t type, DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp)); drrw->drr_key.ddk_cksum = bp->blk_cksum; - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - if (dump_bytes(ba, data, blksz) != 0) + if (dump_bytes(dsp, data, blksz) != 0) return (EINTR); return (0); } static int -dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data) +dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data) { - struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill); + struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill); - if (ba->pending_op != PENDING_NONE) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } /* write a SPILL record */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_SPILL; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_SPILL; drrs->drr_object = object; drrs->drr_length = blksz; - drrs->drr_toguid = ba->toguid; + drrs->drr_toguid = dsp->dsa_toguid; - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t))) return (EINTR); - if (dump_bytes(ba, data, blksz)) + if (dump_bytes(dsp, data, blksz)) return (EINTR); return (0); } static int -dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) +dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs) { - struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects); + struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects); /* * If there is a pending op, but it's not PENDING_FREEOBJECTS, @@ -230,13 +218,14 @@ dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records * can only be aggregated with other DRR_FREEOBJECTS records. */ - if (ba->pending_op != PENDING_NONE && - ba->pending_op != PENDING_FREEOBJECTS) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE && + dsp->dsa_pending_op != PENDING_FREEOBJECTS) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } - if (ba->pending_op == PENDING_FREEOBJECTS) { + if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) { /* * See whether this free object array can be aggregated * with pending one @@ -246,42 +235,43 @@ dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) return (0); } else { /* can't be aggregated. Push out pending record */ - if (dump_bytes(ba, ba->drr, + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } } /* write a FREEOBJECTS record */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_FREEOBJECTS; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_FREEOBJECTS; drrfo->drr_firstobj = firstobj; drrfo->drr_numobjs = numobjs; - drrfo->drr_toguid = ba->toguid; + drrfo->drr_toguid = dsp->dsa_toguid; - ba->pending_op = PENDING_FREEOBJECTS; + dsp->dsa_pending_op = PENDING_FREEOBJECTS; return (0); } static int -dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) +dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp) { - struct drr_object *drro = &(ba->drr->drr_u.drr_object); + struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object); if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) - return (dump_freeobjects(ba, object, 1)); + return (dump_freeobjects(dsp, object, 1)); - if (ba->pending_op != PENDING_NONE) { - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE) { + if (dump_bytes(dsp, dsp->dsa_drr, + sizeof (dmu_replay_record_t)) != 0) return (EINTR); - ba->pending_op = PENDING_NONE; + dsp->dsa_pending_op = PENDING_NONE; } /* write an OBJECT record */ - bzero(ba->drr, sizeof (dmu_replay_record_t)); - ba->drr->drr_type = DRR_OBJECT; + bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t)); + dsp->dsa_drr->drr_type = DRR_OBJECT; drro->drr_object = object; drro->drr_type = dnp->dn_type; drro->drr_bonustype = dnp->dn_bonustype; @@ -289,19 +279,19 @@ dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) drro->drr_bonuslen = dnp->dn_bonuslen; drro->drr_checksumtype = dnp->dn_checksum; drro->drr_compress = dnp->dn_compress; - drro->drr_toguid = ba->toguid; + drro->drr_toguid = dsp->dsa_toguid; - if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0) + if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) return (EINTR); - if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) + if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) return (EINTR); /* free anything past the end of the file */ - if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * + if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) return (EINTR); - if (ba->err) + if (dsp->dsa_err) return (EINTR); return (0); } @@ -315,7 +305,7 @@ static int backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) { - struct backuparg *ba = arg; + dmu_sendarg_t *dsp = arg; dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; int err = 0; @@ -328,10 +318,10 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) { uint64_t span = BP_SPAN(dnp, zb->zb_level); uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT; - err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); + err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT); } else if (bp == NULL) { uint64_t span = BP_SPAN(dnp, zb->zb_level); - err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span); + err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span); } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { @@ -350,7 +340,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, for (i = 0; i < blksz >> DNODE_SHIFT; i++) { uint64_t dnobj = (zb->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; - err = dump_dnode(ba, dnobj, blk+i); + err = dump_dnode(dsp, dnobj, blk+i); if (err) break; } @@ -365,7 +355,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, ZIO_FLAG_CANFAIL, &aflags, zb) != 0) return (EIO); - err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data); + err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } else { /* it's a level-0 block of a regular object */ uint32_t aflags = ARC_WAIT; @@ -389,7 +379,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, } } - err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz, + err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz, blksz, bp, abuf->b_data); (void) arc_buf_remove_ref(abuf, &abuf); } @@ -399,13 +389,13 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, } int -dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, - vnode_t *vp, offset_t *off) +dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, + int outfd, vnode_t *vp, offset_t *off) { dsl_dataset_t *ds = tosnap->os_dsl_dataset; dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL; dmu_replay_record_t *drr; - struct backuparg ba; + dmu_sendarg_t *dsp; int err; uint64_t fromtxg = 0; @@ -446,8 +436,10 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, #ifdef _KERNEL if (dmu_objset_type(tosnap) == DMU_OST_ZFS) { uint64_t version; - if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) + if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) { + kmem_free(drr, sizeof (dmu_replay_record_t)); return (EINVAL); + } if (version == ZPL_VERSION_SA) { DMU_SET_FEATUREFLAGS( drr->drr_u.drr_begin.drr_versioninfo, @@ -474,46 +466,59 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin, if (fromorigin) dsl_dataset_rele(fromds, FTAG); - ba.drr = drr; - ba.vp = vp; - ba.os = tosnap; - ba.off = off; - ba.toguid = ds->ds_phys->ds_guid; - ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); - ba.pending_op = PENDING_NONE; + dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP); - if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { - kmem_free(drr, sizeof (dmu_replay_record_t)); - return (ba.err); + dsp->dsa_drr = drr; + dsp->dsa_vp = vp; + dsp->dsa_outfd = outfd; + dsp->dsa_proc = curproc; + dsp->dsa_os = tosnap; + dsp->dsa_off = off; + dsp->dsa_toguid = ds->ds_phys->ds_guid; + ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0); + dsp->dsa_pending_op = PENDING_NONE; + + mutex_enter(&ds->ds_sendstream_lock); + list_insert_head(&ds->ds_sendstreams, dsp); + mutex_exit(&ds->ds_sendstream_lock); + + if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { + err = dsp->dsa_err; + goto out; } err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH, - backup_cb, &ba); + backup_cb, dsp); - if (ba.pending_op != PENDING_NONE) - if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) + if (dsp->dsa_pending_op != PENDING_NONE) + if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) err = EINTR; if (err) { - if (err == EINTR && ba.err) - err = ba.err; - kmem_free(drr, sizeof (dmu_replay_record_t)); - return (err); + if (err == EINTR && dsp->dsa_err) + err = dsp->dsa_err; + goto out; } bzero(drr, sizeof (dmu_replay_record_t)); drr->drr_type = DRR_END; - drr->drr_u.drr_end.drr_checksum = ba.zc; - drr->drr_u.drr_end.drr_toguid = ba.toguid; + drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc; + drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid; - if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) { - kmem_free(drr, sizeof (dmu_replay_record_t)); - return (ba.err); + if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) { + err = dsp->dsa_err; + goto out; } - kmem_free(drr, sizeof (dmu_replay_record_t)); +out: + mutex_enter(&ds->ds_sendstream_lock); + list_remove(&ds->ds_sendstreams, dsp); + mutex_exit(&ds->ds_sendstream_lock); - return (0); + kmem_free(drr, sizeof (dmu_replay_record_t)); + kmem_free(dsp, sizeof (dmu_sendarg_t)); + + return (err); } int diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index e7cbc51380..10e9b19d6e 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -399,6 +401,8 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); + rw_init(&ds->ds_rwlock, NULL, RW_DEFAULT, NULL); cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); @@ -406,6 +410,9 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); + list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), + offsetof(dmu_sendarg_t, dsa_link)); + if (err == 0) { err = dsl_dir_open_obj(dp, ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 7a27f70bbc..aeac01496a 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -25,6 +25,7 @@ * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include @@ -53,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -3890,8 +3892,8 @@ zfs_ioc_send(zfs_cmd_t *zc) } off = fp->f_offset; - error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, - fp->f_vnode, &off); + error = dmu_send(tosnap, fromsnap, zc->zc_obj, + zc->zc_cookie, fp->f_vnode, &off); if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0) fp->f_offset = off; @@ -3903,6 +3905,50 @@ zfs_ioc_send(zfs_cmd_t *zc) return (error); } +/* + * inputs: + * zc_name name of snapshot on which to report progress + * zc_cookie file descriptor of send stream + * + * outputs: + * zc_cookie number of bytes written in send stream thus far + */ +static int +zfs_ioc_send_progress(zfs_cmd_t *zc) +{ + dsl_dataset_t *ds; + dmu_sendarg_t *dsp = NULL; + int error; + + if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0) + return (error); + + mutex_enter(&ds->ds_sendstream_lock); + + /* + * Iterate over all the send streams currently active on this dataset. + * If there's one which matches the specified file descriptor _and_ the + * stream was started by the current process, return the progress of + * that stream. + */ + + for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL; + dsp = list_next(&ds->ds_sendstreams, dsp)) { + if (dsp->dsa_outfd == zc->zc_cookie && + dsp->dsa_proc->group_leader == curproc->group_leader) + break; + } + + if (dsp != NULL) + zc->zc_cookie = *(dsp->dsa_off); + else + error = ENOENT; + + mutex_exit(&ds->ds_sendstream_lock); + dsl_dataset_rele(ds, FTAG); + return (error); +} + static int zfs_ioc_inject_fault(zfs_cmd_t *zc) { @@ -4849,6 +4895,8 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = { POOL_CHECK_SUSPENDED }, { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED }, + { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE, + POOL_CHECK_NONE } }; int