Illumos 5056 - ZFS deadlock on db_mtx and dn_holds

5056 ZFS deadlock on db_mtx and dn_holds
Author: Justin Gibbs <justing@spectralogic.com>
Reviewed by: Will Andrews <willa@spectralogic.com>
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://www.illumos.org/issues/5056
  https://github.com/illumos/illumos-gate/commit/bc9014e

Porting Notes:

sa_handle_get_from_db():
  - the original patch includes an otherwise unmentioned fix for a
    possible usage of an uninitialised variable

dmu_objset_open_impl():
  - Under Illumos list_link_init() is the same as filling a list_node_t
    with NULLs, so they don't notice if they miss doing list_link_init()
    on a zero'd containing structure (e.g. allocated with kmem_zalloc as
    here). Under Linux, not so much: an uninitialised list_node_t goes
    "Boom!" some time later when it's used or destroyed.

dmu_objset_evict_dbufs():
  - reduce stack usage using kmem_alloc()

Ported-by: Chris Dunlop <chris@onthe.net.au>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Justin T. Gibbs 2015-04-02 14:44:32 +11:00 committed by Brian Behlendorf
parent d683ddbb72
commit 0c66c32d1d
35 changed files with 645 additions and 316 deletions

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_DBUF_H #ifndef _SYS_DBUF_H
@ -226,9 +227,8 @@ typedef struct dmu_buf_impl {
/* Data which is unique to data (leaf) blocks: */ /* Data which is unique to data (leaf) blocks: */
/* stuff we store for the user (see dmu_buf_set_user) */ /* User callback information. */
void *db_user_ptr; dmu_buf_user_t *db_user;
dmu_buf_evict_func_t *db_evict_func;
uint8_t db_immediate_evict; uint8_t db_immediate_evict;
uint8_t db_freed_in_flight; uint8_t db_freed_in_flight;

View File

@ -24,6 +24,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved. * Copyright 2014 HybridCluster. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
/* Portions Copyright 2010 Robert Milkowski */ /* Portions Copyright 2010 Robert Milkowski */
@ -39,11 +40,9 @@
* dmu_spa.h. * dmu_spa.h.
*/ */
#include <sys/zfs_context.h>
#include <sys/inttypes.h> #include <sys/inttypes.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/cred.h> #include <sys/cred.h>
#include <sys/time.h>
#include <sys/fs/zfs.h> #include <sys/fs/zfs.h>
#include <sys/uio.h> #include <sys/uio.h>
@ -288,8 +287,6 @@ typedef struct dmu_buf {
void *db_data; /* data in buffer */ void *db_data; /* data in buffer */
} dmu_buf_t; } dmu_buf_t;
typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
/* /*
* The names of zap entries in the DIRECTORY_OBJECT of the MOS. * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
*/ */
@ -475,36 +472,126 @@ int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp); uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag); void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
/* typedef void dmu_buf_evict_func_t(void *user_ptr);
* Returns NULL on success, or the existing user ptr if it's already
* been set.
*
* user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
*
* If non-NULL, pageout func will be called when this buffer is being
* excised from the cache, so that you can clean up the data structure
* pointed to by user_ptr.
*
* dmu_evict_user() will call the pageout func for all buffers in a
* objset with a given pageout func.
*/
void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr,
dmu_buf_evict_func_t *pageout_func);
/*
* set_user_ie is the same as set_user, but request immediate eviction
* when hold count goes to zero.
*/
void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
dmu_buf_evict_func_t *pageout_func);
void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
void *user_ptr, dmu_buf_evict_func_t *pageout_func);
void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
/* /*
* Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set. * A DMU buffer user object may be associated with a dbuf for the
* duration of its lifetime. This allows the user of a dbuf (client)
* to attach private data to a dbuf (e.g. in-core only data such as a
* dnode_children_t, zap_t, or zap_leaf_t) and be optionally notified
* when that dbuf has been evicted. Clients typically respond to the
* eviction notification by freeing their private data, thus ensuring
* the same lifetime for both dbuf and private data.
*
* The mapping from a dmu_buf_user_t to any client private data is the
* client's responsibility. All current consumers of the API with private
* data embed a dmu_buf_user_t as the first member of the structure for
* their private data. This allows conversions between the two types
* with a simple cast. Since the DMU buf user API never needs access
* to the private data, other strategies can be employed if necessary
* or convenient for the client (e.g. using container_of() to do the
* conversion for private data that cannot have the dmu_buf_user_t as
* its first member).
*
* Eviction callbacks are executed without the dbuf mutex held or any
* other type of mechanism to guarantee that the dbuf is still available.
* For this reason, users must assume the dbuf has already been freed
* and not reference the dbuf from the callback context.
*
* Users requesting "immediate eviction" are notified as soon as the dbuf
* is only referenced by dirty records (dirties == holds). Otherwise the
* notification occurs after eviction processing for the dbuf begins.
*/
typedef struct dmu_buf_user {
/*
* Asynchronous user eviction callback state.
*/
taskq_ent_t dbu_tqent;
/* This instance's eviction function pointer. */
dmu_buf_evict_func_t *dbu_evict_func;
#ifdef ZFS_DEBUG
/*
* Pointer to user's dbuf pointer. NULL for clients that do
* not associate a dbuf with their user data.
*
* The dbuf pointer is cleared upon eviction so as to catch
* use-after-evict bugs in clients.
*/
dmu_buf_t **dbu_clear_on_evict_dbufp;
#endif
} dmu_buf_user_t;
/*
* Initialize the given dmu_buf_user_t instance with the eviction function
* evict_func, to be called when the user is evicted.
*
* NOTE: This function should only be called once on a given dmu_buf_user_t.
* To allow enforcement of this, dbu must already be zeroed on entry.
*/
#ifdef __lint
/* Very ugly, but it beats issuing suppression directives in many Makefiles. */
extern void
dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
dmu_buf_t **clear_on_evict_dbufp);
#else /* __lint */
static inline void
dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
dmu_buf_t **clear_on_evict_dbufp)
{
ASSERT(dbu->dbu_evict_func == NULL);
ASSERT(evict_func != NULL);
dbu->dbu_evict_func = evict_func;
#ifdef ZFS_DEBUG
dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp;
#endif
}
#endif /* __lint */
/*
* Attach user data to a dbuf and mark it for normal (when the dbuf's
* data is cleared or its reference count goes to zero) eviction processing.
*
* Returns NULL on success, or the existing user if another user currently
* owns the buffer.
*/
void *dmu_buf_set_user(dmu_buf_t *db, dmu_buf_user_t *user);
/*
* Attach user data to a dbuf and mark it for immediate (its dirty and
* reference counts are equal) eviction processing.
*
* Returns NULL on success, or the existing user if another user currently
* owns the buffer.
*/
void *dmu_buf_set_user_ie(dmu_buf_t *db, dmu_buf_user_t *user);
/*
* Replace the current user of a dbuf.
*
* If given the current user of a dbuf, replaces the dbuf's user with
* "new_user" and returns the user data pointer that was replaced.
* Otherwise returns the current, and unmodified, dbuf user pointer.
*/
void *dmu_buf_replace_user(dmu_buf_t *db,
dmu_buf_user_t *old_user, dmu_buf_user_t *new_user);
/*
* Remove the specified user data for a DMU buffer.
*
* Returns the user that was removed on success, or the current user if
* another user currently owns the buffer.
*/
void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
/*
* Returns the user data (dmu_buf_user_t *) associated with this dbuf.
*/ */
void *dmu_buf_get_user(dmu_buf_t *db); void *dmu_buf_get_user(dmu_buf_t *db);
/* Block until any in-progress dmu buf user evictions complete. */
void dmu_buf_user_evict_wait(void);
/* /*
* Returns the blkptr associated with this dbuf, or NULL if not set. * Returns the blkptr associated with this dbuf, or NULL if not set.
*/ */

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
/* Portions Copyright 2010 Robert Milkowski */ /* Portions Copyright 2010 Robert Milkowski */
@ -74,22 +75,25 @@ struct objset {
arc_buf_t *os_phys_buf; arc_buf_t *os_phys_buf;
objset_phys_t *os_phys; objset_phys_t *os_phys;
/* /*
* The following "special" dnodes have no parent and are exempt from * The following "special" dnodes have no parent, are exempt
* dnode_move(), but they root their descendents in this objset using * from dnode_move(), and are not recorded in os_dnodes, but they
* handles anyway, so that all access to dnodes from dbufs consistently * root their descendents in this objset using handles anyway, so
* uses handles. * that all access to dnodes from dbufs consistently uses handles.
*/ */
dnode_handle_t os_meta_dnode; dnode_handle_t os_meta_dnode;
dnode_handle_t os_userused_dnode; dnode_handle_t os_userused_dnode;
dnode_handle_t os_groupused_dnode; dnode_handle_t os_groupused_dnode;
zilog_t *os_zil; zilog_t *os_zil;
list_node_t os_evicting_node;
/* can change, under dsl_dir's locks: */ /* can change, under dsl_dir's locks: */
enum zio_checksum os_checksum; enum zio_checksum os_checksum;
enum zio_compress os_compress; enum zio_compress os_compress;
uint8_t os_copies; uint8_t os_copies;
enum zio_checksum os_dedup_checksum; enum zio_checksum os_dedup_checksum;
boolean_t os_dedup_verify; boolean_t os_dedup_verify;
boolean_t os_evicting;
zfs_logbias_op_t os_logbias; zfs_logbias_op_t os_logbias;
zfs_cache_type_t os_primary_cache; zfs_cache_type_t os_primary_cache;
zfs_cache_type_t os_secondary_cache; zfs_cache_type_t os_secondary_cache;
@ -168,6 +172,8 @@ int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os); boolean_t dmu_objset_userspace_present(objset_t *os);
int dmu_fsname(const char *snapname, char *buf); int dmu_fsname(const char *snapname, char *buf);
void dmu_objset_evict_done(objset_t *os);
void dmu_objset_init(void); void dmu_objset_init(void);
void dmu_objset_fini(void); void dmu_objset_fini(void);

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_DNODE_H #ifndef _SYS_DNODE_H
@ -277,6 +278,7 @@ typedef struct dnode_handle {
} dnode_handle_t; } dnode_handle_t;
typedef struct dnode_children { typedef struct dnode_children {
dmu_buf_user_t dnc_dbu; /* User evict data */
size_t dnc_count; /* number of children */ size_t dnc_count; /* number of children */
dnode_handle_t dnc_children[]; /* sized dynamically */ dnode_handle_t dnc_children[]; /* sized dynamically */
} dnode_children_t; } dnode_children_t;
@ -287,7 +289,7 @@ typedef struct free_range {
uint64_t fr_nblks; uint64_t fr_nblks;
} free_range_t; } free_range_t;
dnode_t *dnode_special_open(struct objset *dd, dnode_phys_t *dnp, void dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
uint64_t object, dnode_handle_t *dnh); uint64_t object, dnode_handle_t *dnh);
void dnode_special_close(dnode_handle_t *dnh); void dnode_special_close(dnode_handle_t *dnh);

View File

@ -23,6 +23,7 @@
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_DSL_DATASET_H #ifndef _SYS_DSL_DATASET_H
@ -125,11 +126,14 @@ typedef struct dsl_dataset_phys {
} dsl_dataset_phys_t; } dsl_dataset_phys_t;
typedef struct dsl_dataset { typedef struct dsl_dataset {
dmu_buf_user_t ds_dbu;
/* Immutable: */ /* Immutable: */
struct dsl_dir *ds_dir; struct dsl_dir *ds_dir;
dmu_buf_t *ds_dbuf; dmu_buf_t *ds_dbuf;
uint64_t ds_object; uint64_t ds_object;
uint64_t ds_fsid_guid; uint64_t ds_fsid_guid;
boolean_t ds_is_snapshot;
/* only used in syncing context, only valid for non-snapshots: */ /* only used in syncing context, only valid for non-snapshots: */
struct dsl_dataset *ds_prev; struct dsl_dataset *ds_prev;
@ -188,12 +192,6 @@ dsl_dataset_phys(dsl_dataset_t *ds)
*/ */
#define MAX_TAG_PREFIX_LEN 17 #define MAX_TAG_PREFIX_LEN 17
static inline boolean_t
dsl_dataset_is_snapshot(dsl_dataset_t *ds)
{
return (dsl_dataset_phys(ds)->ds_num_children != 0);
}
#define DS_UNIQUE_IS_ACCURATE(ds) \ #define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0) ((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_DSL_DIR_H #ifndef _SYS_DSL_DIR_H
@ -84,6 +85,8 @@ typedef struct dsl_dir_phys {
} dsl_dir_phys_t; } dsl_dir_phys_t;
struct dsl_dir { struct dsl_dir {
dmu_buf_user_t dd_dbu;
/* These are immutable; no lock needed: */ /* These are immutable; no lock needed: */
uint64_t dd_object; uint64_t dd_object;
dsl_pool_t *dd_pool; dsl_pool_t *dd_pool;
@ -119,6 +122,7 @@ dsl_dir_phys(dsl_dir_t *dd)
} }
void dsl_dir_rele(dsl_dir_t *dd, void *tag); void dsl_dir_rele(dsl_dir_t *dd, void *tag);
void dsl_dir_async_rele(dsl_dir_t *dd, void *tag);
int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **, const char **tail); dsl_dir_t **, const char **tail);
int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,

View File

@ -133,7 +133,6 @@ int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *); uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
void sa_object_info(sa_handle_t *, dmu_object_info_t *); void sa_object_info(sa_handle_t *, dmu_object_info_t *);
void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *); void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
void sa_update_user(sa_handle_t *, sa_handle_t *);
void *sa_get_userdata(sa_handle_t *); void *sa_get_userdata(sa_handle_t *);
void sa_set_userp(sa_handle_t *, void *); void sa_set_userp(sa_handle_t *, void *);
dmu_buf_t *sa_get_db(sa_handle_t *); dmu_buf_t *sa_get_db(sa_handle_t *);

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_SA_IMPL_H #ifndef _SYS_SA_IMPL_H
@ -208,6 +209,7 @@ typedef enum sa_data_op {
*/ */
struct sa_handle { struct sa_handle {
dmu_buf_user_t sa_dbu;
kmutex_t sa_lock; kmutex_t sa_lock;
dmu_buf_t *sa_bonus; dmu_buf_t *sa_bonus;
dmu_buf_t *sa_spill; dmu_buf_t *sa_spill;

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_SPA_H #ifndef _SYS_SPA_H
@ -680,6 +681,7 @@ extern spa_t *spa_next(spa_t *prev);
/* Refcount functions */ /* Refcount functions */
extern void spa_open_ref(spa_t *spa, void *tag); extern void spa_open_ref(spa_t *spa, void *tag);
extern void spa_close(spa_t *spa, void *tag); extern void spa_close(spa_t *spa, void *tag);
extern void spa_async_close(spa_t *spa, void *tag);
extern boolean_t spa_refcount_zero(spa_t *spa); extern boolean_t spa_refcount_zero(spa_t *spa);
#define SCL_NONE 0x00 #define SCL_NONE 0x00
@ -789,6 +791,9 @@ extern uint64_t spa_version(spa_t *spa);
extern boolean_t spa_deflate(spa_t *spa); extern boolean_t spa_deflate(spa_t *spa);
extern metaslab_class_t *spa_normal_class(spa_t *spa); extern metaslab_class_t *spa_normal_class(spa_t *spa);
extern metaslab_class_t *spa_log_class(spa_t *spa); extern metaslab_class_t *spa_log_class(spa_t *spa);
extern void spa_evicting_os_register(spa_t *, objset_t *os);
extern void spa_evicting_os_deregister(spa_t *, objset_t *os);
extern void spa_evicting_os_wait(spa_t *spa);
extern int spa_max_replication(spa_t *spa); extern int spa_max_replication(spa_t *spa);
extern int spa_prev_software_version(spa_t *spa); extern int spa_prev_software_version(spa_t *spa);
extern uint8_t spa_get_failmode(spa_t *spa); extern uint8_t spa_get_failmode(spa_t *spa);

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_SPA_IMPL_H #ifndef _SYS_SPA_IMPL_H
@ -144,6 +145,9 @@ struct spa {
uint64_t spa_claim_max_txg; /* highest claimed birth txg */ uint64_t spa_claim_max_txg; /* highest claimed birth txg */
timespec_t spa_loaded_ts; /* 1st successful open time */ timespec_t spa_loaded_ts; /* 1st successful open time */
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */ objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
kmutex_t spa_evicting_os_lock; /* Evicting objset list lock */
list_t spa_evicting_os_list; /* Objsets being evicted. */
kcondvar_t spa_evicting_os_cv; /* Objset Eviction Completion */
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */ txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
vdev_t *spa_root_vdev; /* top-level vdev container */ vdev_t *spa_root_vdev; /* top-level vdev container */
uint64_t spa_config_guid; /* config pool guid */ uint64_t spa_config_guid; /* config pool guid */

View File

@ -20,6 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_ZAP_IMPL_H #ifndef _SYS_ZAP_IMPL_H
@ -140,6 +141,7 @@ typedef struct zap_phys {
typedef struct zap_table_phys zap_table_phys_t; typedef struct zap_table_phys zap_table_phys_t;
typedef struct zap { typedef struct zap {
dmu_buf_user_t zap_dbu;
objset_t *zap_objset; objset_t *zap_objset;
uint64_t zap_object; uint64_t zap_object;
struct dmu_buf *zap_dbuf; struct dmu_buf *zap_dbuf;
@ -196,7 +198,7 @@ boolean_t zap_match(zap_name_t *zn, const char *matchname);
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp); krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
void zap_unlockdir(zap_t *zap); void zap_unlockdir(zap_t *zap);
void zap_evict(dmu_buf_t *db, void *vmzap); void zap_evict(void *dbu);
zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt); zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
void zap_name_free(zap_name_t *zn); void zap_name_free(zap_name_t *zn);
int zap_hashbits(zap_t *zap); int zap_hashbits(zap_t *zap);

View File

@ -20,6 +20,7 @@
*/ */
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#ifndef _SYS_ZAP_LEAF_H #ifndef _SYS_ZAP_LEAF_H
@ -152,6 +153,7 @@ typedef union zap_leaf_chunk {
} zap_leaf_chunk_t; } zap_leaf_chunk_t;
typedef struct zap_leaf { typedef struct zap_leaf {
dmu_buf_user_t l_dbu;
krwlock_t l_rwlock; krwlock_t l_rwlock;
uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */ uint64_t l_blkid; /* 1<<ZAP_BLOCK_SHIFT byte block off */
int l_bs; /* block size shift */ int l_bs; /* block size shift */

View File

@ -23,6 +23,7 @@
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -78,10 +79,16 @@ static void dbuf_destroy(dmu_buf_impl_t *db);
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
#ifndef __lint
extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp);
#endif /* ! __lint */
/* /*
* Global data structures and functions for the dbuf cache. * Global data structures and functions for the dbuf cache.
*/ */
static kmem_cache_t *dbuf_cache; static kmem_cache_t *dbuf_cache;
static taskq_t *dbu_evict_taskq;
/* ARGSUSED */ /* ARGSUSED */
static int static int
@ -247,17 +254,72 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
static arc_evict_func_t dbuf_do_evict; static arc_evict_func_t dbuf_do_evict;
typedef enum {
DBVU_EVICTING,
DBVU_NOT_EVICTING
} dbvu_verify_type_t;
static void
dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
{
#ifdef ZFS_DEBUG
int64_t holds;
if (db->db_user == NULL)
return;
/* Only data blocks support the attachment of user data. */
ASSERT(db->db_level == 0);
/* Clients must resolve a dbuf before attaching user data. */
ASSERT(db->db.db_data != NULL);
ASSERT3U(db->db_state, ==, DB_CACHED);
holds = refcount_count(&db->db_holds);
if (verify_type == DBVU_EVICTING) {
/*
* Immediate eviction occurs when holds == dirtycnt.
* For normal eviction buffers, holds is zero on
* eviction, except when dbuf_fix_old_data() calls
* dbuf_clear_data(). However, the hold count can grow
* during eviction even though db_mtx is held (see
* dmu_bonus_hold() for an example), so we can only
* test the generic invariant that holds >= dirtycnt.
*/
ASSERT3U(holds, >=, db->db_dirtycnt);
} else {
if (db->db_immediate_evict == TRUE)
ASSERT3U(holds, >=, db->db_dirtycnt);
else
ASSERT3U(holds, >, 0);
}
#endif
}
static void static void
dbuf_evict_user(dmu_buf_impl_t *db) dbuf_evict_user(dmu_buf_impl_t *db)
{ {
dmu_buf_user_t *dbu = db->db_user;
ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(MUTEX_HELD(&db->db_mtx));
if (db->db_level != 0 || db->db_evict_func == NULL) if (dbu == NULL)
return; return;
db->db_evict_func(&db->db, db->db_user_ptr); dbuf_verify_user(db, DBVU_EVICTING);
db->db_user_ptr = NULL; db->db_user = NULL;
db->db_evict_func = NULL;
#ifdef ZFS_DEBUG
if (dbu->dbu_clear_on_evict_dbufp != NULL)
*dbu->dbu_clear_on_evict_dbufp = NULL;
#endif
/*
* Invoke the callback from a taskq to avoid lock order reversals
* and limit stack depth.
*/
taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0,
&dbu->dbu_tqent);
} }
boolean_t boolean_t
@ -331,6 +393,12 @@ retry:
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
dbuf_stats_init(h); dbuf_stats_init(h);
/*
* All entries are queued via taskq_dispatch_ent(), so min/maxalloc
* configuration is not required.
*/
dbu_evict_taskq = taskq_create("dbu_evict", 1, minclsyspri, 0, 0, 0);
} }
void void
@ -353,6 +421,7 @@ dbuf_fini(void)
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
#endif #endif
kmem_cache_destroy(dbuf_cache); kmem_cache_destroy(dbuf_cache);
taskq_destroy(dbu_evict_taskq);
} }
/* /*
@ -471,21 +540,27 @@ dbuf_verify(dmu_buf_impl_t *db)
#endif #endif
static void static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) dbuf_clear_data(dmu_buf_impl_t *db)
{ {
ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(MUTEX_HELD(&db->db_mtx));
db->db_buf = buf;
if (buf != NULL) {
ASSERT(buf->b_data != NULL);
db->db.db_data = buf->b_data;
if (!arc_released(buf))
arc_set_callback(buf, dbuf_do_evict, db);
} else {
dbuf_evict_user(db); dbuf_evict_user(db);
db->db_buf = NULL;
db->db.db_data = NULL; db->db.db_data = NULL;
if (db->db_state != DB_NOFILL) if (db->db_state != DB_NOFILL)
db->db_state = DB_UNCACHED; db->db_state = DB_UNCACHED;
} }
static void
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(buf != NULL);
db->db_buf = buf;
ASSERT(buf->b_data != NULL);
db->db.db_data = buf->b_data;
if (!arc_released(buf))
arc_set_callback(buf, dbuf_do_evict, db);
} }
/* /*
@ -507,7 +582,7 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
} else { } else {
abuf = db->db_buf; abuf = db->db_buf;
arc_loan_inuse_buf(abuf, db); arc_loan_inuse_buf(abuf, db);
dbuf_set_data(db, NULL); dbuf_clear_data(db);
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
} }
return (abuf); return (abuf);
@ -747,7 +822,7 @@ dbuf_noread(dmu_buf_impl_t *db)
dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type)); dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
db->db_state = DB_FILL; db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) { } else if (db->db_state == DB_NOFILL) {
dbuf_set_data(db, NULL); dbuf_clear_data(db);
} else { } else {
ASSERT3U(db->db_state, ==, DB_CACHED); ASSERT3U(db->db_state, ==, DB_CACHED);
} }
@ -803,7 +878,7 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type); dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else { } else {
dbuf_set_data(db, NULL); dbuf_clear_data(db);
} }
} }
@ -854,7 +929,8 @@ void
dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
dmu_tx_t *tx) dmu_tx_t *tx)
{ {
dmu_buf_impl_t *db, *db_next, *db_search; dmu_buf_impl_t *db_search;
dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg; uint64_t txg = tx->tx_txg;
avl_index_t where; avl_index_t where;
boolean_t freespill = boolean_t freespill =
@ -864,7 +940,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
end_blkid = dn->dn_maxblkid; end_blkid = dn->dn_maxblkid;
dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid); dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid);
db_seach = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP); db_search = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
db_search->db_level = 0; db_search->db_level = 0;
db_search->db_blkid = start_blkid; db_search->db_blkid = start_blkid;
db_search->db_state = DB_SEARCH; db_search->db_state = DB_SEARCH;
@ -1436,7 +1512,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
arc_buf_t *buf = db->db_buf; arc_buf_t *buf = db->db_buf;
ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
dbuf_set_data(db, NULL); dbuf_clear_data(db);
VERIFY(arc_buf_remove_ref(buf, db)); VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db); dbuf_evict(db);
return (B_TRUE); return (B_TRUE);
@ -1785,8 +1861,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_parent = parent; db->db_parent = parent;
db->db_blkptr = blkptr; db->db_blkptr = blkptr;
db->db_user_ptr = NULL; db->db_user = NULL;
db->db_evict_func = NULL;
db->db_immediate_evict = 0; db->db_immediate_evict = 0;
db->db_freed_in_flight = 0; db->db_freed_in_flight = 0;
@ -2273,7 +2348,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
/* /*
* This dbuf has anonymous data associated with it. * This dbuf has anonymous data associated with it.
*/ */
dbuf_set_data(db, NULL); dbuf_clear_data(db);
VERIFY(arc_buf_remove_ref(buf, db)); VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db); dbuf_evict(db);
} else { } else {
@ -2306,7 +2381,8 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
} else { } else {
dbuf_clear(db); dbuf_clear(db);
} }
} else if (arc_buf_eviction_needed(db->db_buf)) { } else if (db->db_objset->os_evicting ||
arc_buf_eviction_needed(db->db_buf)) {
dbuf_clear(db); dbuf_clear(db);
} else { } else {
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
@ -2325,51 +2401,57 @@ dbuf_refcount(dmu_buf_impl_t *db)
} }
void * void *
dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
dmu_buf_evict_func_t *evict_func) dmu_buf_user_t *new_user)
{ {
return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func)); dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
mutex_enter(&db->db_mtx);
dbuf_verify_user(db, DBVU_NOT_EVICTING);
if (db->db_user == old_user)
db->db_user = new_user;
else
old_user = db->db_user;
dbuf_verify_user(db, DBVU_NOT_EVICTING);
mutex_exit(&db->db_mtx);
return (old_user);
} }
void * void *
dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
dmu_buf_evict_func_t *evict_func) {
return (dmu_buf_replace_user(db_fake, NULL, user));
}
void *
dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
{ {
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
db->db_immediate_evict = TRUE; db->db_immediate_evict = TRUE;
return (dmu_buf_update_user(db_fake, NULL, user_ptr, evict_func)); return (dmu_buf_set_user(db_fake, user));
} }
void * void *
dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr, dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
dmu_buf_evict_func_t *evict_func)
{ {
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; return (dmu_buf_replace_user(db_fake, user, NULL));
ASSERT(db->db_level == 0);
ASSERT((user_ptr == NULL) == (evict_func == NULL));
mutex_enter(&db->db_mtx);
if (db->db_user_ptr == old_user_ptr) {
db->db_user_ptr = user_ptr;
db->db_evict_func = evict_func;
} else {
old_user_ptr = db->db_user_ptr;
}
mutex_exit(&db->db_mtx);
return (old_user_ptr);
} }
void * void *
dmu_buf_get_user(dmu_buf_t *db_fake) dmu_buf_get_user(dmu_buf_t *db_fake)
{ {
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
ASSERT(!refcount_is_zero(&db->db_holds));
return (db->db_user_ptr); dbuf_verify_user(db, DBVU_NOT_EVICTING);
return (db->db_user);
}
void
dmu_buf_user_evict_wait()
{
taskq_wait(dbu_evict_taskq);
} }
boolean_t boolean_t
@ -3037,7 +3119,6 @@ EXPORT_SYMBOL(dbuf_refcount);
EXPORT_SYMBOL(dbuf_sync_list); EXPORT_SYMBOL(dbuf_sync_list);
EXPORT_SYMBOL(dmu_buf_set_user); EXPORT_SYMBOL(dmu_buf_set_user);
EXPORT_SYMBOL(dmu_buf_set_user_ie); EXPORT_SYMBOL(dmu_buf_set_user_ie);
EXPORT_SYMBOL(dmu_buf_update_user);
EXPORT_SYMBOL(dmu_buf_get_user); EXPORT_SYMBOL(dmu_buf_get_user);
EXPORT_SYMBOL(dmu_buf_freeable); EXPORT_SYMBOL(dmu_buf_freeable);
EXPORT_SYMBOL(dmu_buf_get_blkptr); EXPORT_SYMBOL(dmu_buf_get_blkptr);

View File

@ -23,6 +23,7 @@
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
/* Portions Copyright 2010 Robert Milkowski */ /* Portions Copyright 2010 Robert Milkowski */
@ -347,7 +348,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
secondary_cache_changed_cb, os); secondary_cache_changed_cb, os);
} }
if (!dsl_dataset_is_snapshot(ds)) { if (!ds->ds_is_snapshot) {
if (err == 0) { if (err == 0) {
err = dsl_prop_register(ds, err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_CHECKSUM), zfs_prop_to_name(ZFS_PROP_CHECKSUM),
@ -404,7 +405,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
os->os_secondary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL;
} }
if (ds == NULL || !dsl_dataset_is_snapshot(ds)) if (ds == NULL || !ds->ds_is_snapshot)
os->os_zil_header = os->os_phys->os_zil_header; os->os_zil_header = os->os_phys->os_zil_header;
os->os_zil = zil_alloc(os, &os->os_zil_header); os->os_zil = zil_alloc(os, &os->os_zil_header);
@ -419,20 +420,19 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_link)); offsetof(dmu_buf_impl_t, db_link));
list_link_init(&os->os_evicting_node);
mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
DMU_META_DNODE(os) = dnode_special_open(os, dnode_special_open(os, &os->os_phys->os_meta_dnode,
&os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
&os->os_meta_dnode);
if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
DMU_USERUSED_DNODE(os) = dnode_special_open(os, dnode_special_open(os, &os->os_phys->os_userused_dnode,
&os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, DMU_USERUSED_OBJECT, &os->os_userused_dnode);
&os->os_userused_dnode); dnode_special_open(os, &os->os_phys->os_groupused_dnode,
DMU_GROUPUSED_DNODE(os) = dnode_special_open(os, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
&os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
&os->os_groupused_dnode);
} }
*osp = os; *osp = os;
@ -520,7 +520,7 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) { } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
dsl_dataset_disown(ds, tag); dsl_dataset_disown(ds, tag);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} else if (!readonly && dsl_dataset_is_snapshot(ds)) { } else if (!readonly && ds->ds_is_snapshot) {
dsl_dataset_disown(ds, tag); dsl_dataset_disown(ds, tag);
return (SET_ERROR(EROFS)); return (SET_ERROR(EROFS));
} }
@ -576,41 +576,57 @@ dmu_objset_disown(objset_t *os, void *tag)
void void
dmu_objset_evict_dbufs(objset_t *os) dmu_objset_evict_dbufs(objset_t *os)
{ {
dnode_t *dn_marker;
dnode_t *dn; dnode_t *dn;
dn_marker = kmem_alloc(sizeof (dnode_t), KM_SLEEP);
mutex_enter(&os->os_lock); mutex_enter(&os->os_lock);
dn = list_head(&os->os_dnodes);
/* process the mdn last, since the other dnodes have holds on it */ while (dn != NULL) {
list_remove(&os->os_dnodes, DMU_META_DNODE(os));
list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
/* /*
* Find the first dnode with holds. We have to do this dance * Skip dnodes without holds. We have to do this dance
* because dnode_add_ref() only works if you already have a * because dnode_add_ref() only works if there is already a
* hold. If there are no holds then it has no dbufs so OK to * hold. If the dnode has no holds, then it has no dbufs.
* skip.
*/ */
for (dn = list_head(&os->os_dnodes); if (dnode_add_ref(dn, FTAG)) {
dn && !dnode_add_ref(dn, FTAG); list_insert_after(&os->os_dnodes, dn, dn_marker);
dn = list_next(&os->os_dnodes, dn))
continue;
while (dn) {
dnode_t *next_dn = dn;
do {
next_dn = list_next(&os->os_dnodes, next_dn);
} while (next_dn && !dnode_add_ref(next_dn, FTAG));
mutex_exit(&os->os_lock); mutex_exit(&os->os_lock);
dnode_evict_dbufs(dn); dnode_evict_dbufs(dn);
dnode_rele(dn, FTAG); dnode_rele(dn, FTAG);
mutex_enter(&os->os_lock); mutex_enter(&os->os_lock);
dn = next_dn; dn = list_next(&os->os_dnodes, dn_marker);
list_remove(&os->os_dnodes, dn_marker);
} else {
dn = list_next(&os->os_dnodes, dn);
}
} }
mutex_exit(&os->os_lock); mutex_exit(&os->os_lock);
kmem_free(dn_marker, sizeof (dnode_t));
if (DMU_USERUSED_DNODE(os) != NULL) {
dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
}
dnode_evict_dbufs(DMU_META_DNODE(os));
} }
/*
* Objset eviction processing is split into into two pieces.
* The first marks the objset as evicting, evicts any dbufs that
* have a refcount of zero, and then queues up the objset for the
* second phase of eviction. Once os->os_dnodes has been cleared by
* dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
* The second phase closes the special dnodes, dequeues the objset from
* the list of those undergoing eviction, and finally frees the objset.
*
* NOTE: Due to asynchronous eviction processing (invocation of
* dnode_buf_pageout()), it is possible for the meta dnode for the
* objset to have no holds even though os->os_dnodes is not empty.
*/
void void
dmu_objset_evict(objset_t *os) dmu_objset_evict(objset_t *os)
{ {
@ -622,7 +638,7 @@ dmu_objset_evict(objset_t *os)
ASSERT(!dmu_objset_is_dirty(os, t)); ASSERT(!dmu_objset_is_dirty(os, t));
if (ds) { if (ds) {
if (!dsl_dataset_is_snapshot(ds)) { if (!ds->ds_is_snapshot) {
VERIFY0(dsl_prop_unregister(ds, VERIFY0(dsl_prop_unregister(ds,
zfs_prop_to_name(ZFS_PROP_CHECKSUM), zfs_prop_to_name(ZFS_PROP_CHECKSUM),
checksum_changed_cb, os)); checksum_changed_cb, os));
@ -656,8 +672,24 @@ dmu_objset_evict(objset_t *os)
if (os->os_sa) if (os->os_sa)
sa_tear_down(os); sa_tear_down(os);
os->os_evicting = B_TRUE;
dmu_objset_evict_dbufs(os); dmu_objset_evict_dbufs(os);
mutex_enter(&os->os_lock);
spa_evicting_os_register(os->os_spa, os);
if (list_is_empty(&os->os_dnodes)) {
mutex_exit(&os->os_lock);
dmu_objset_evict_done(os);
} else {
mutex_exit(&os->os_lock);
}
}
void
dmu_objset_evict_done(objset_t *os)
{
ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
dnode_special_close(&os->os_meta_dnode); dnode_special_close(&os->os_meta_dnode);
if (DMU_USERUSED_DNODE(os)) { if (DMU_USERUSED_DNODE(os)) {
dnode_special_close(&os->os_userused_dnode); dnode_special_close(&os->os_userused_dnode);
@ -665,8 +697,6 @@ dmu_objset_evict(objset_t *os)
} }
zil_free(os->os_zil); zil_free(os->os_zil);
ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
/* /*
@ -681,6 +711,7 @@ dmu_objset_evict(objset_t *os)
mutex_destroy(&os->os_lock); mutex_destroy(&os->os_lock);
mutex_destroy(&os->os_obj_lock); mutex_destroy(&os->os_obj_lock);
mutex_destroy(&os->os_user_ptr_lock); mutex_destroy(&os->os_user_ptr_lock);
spa_evicting_os_deregister(os->os_spa, os);
kmem_free(os, sizeof (objset_t)); kmem_free(os, sizeof (objset_t));
} }
@ -888,7 +919,7 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
} }
/* You can only clone snapshots, not the head datasets. */ /* You can only clone snapshots, not the head datasets. */
if (!dsl_dataset_is_snapshot(origin)) { if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG); dsl_dataset_rele(origin, FTAG);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
@ -1453,7 +1484,7 @@ int
dmu_objset_is_snapshot(objset_t *os) dmu_objset_is_snapshot(objset_t *os)
{ {
if (os->os_dsl_dataset != NULL) if (os->os_dsl_dataset != NULL)
return (dsl_dataset_is_snapshot(os->os_dsl_dataset)); return (os->os_dsl_dataset->ds_is_snapshot);
else else
return (B_FALSE); return (B_FALSE);
} }

View File

@ -615,7 +615,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
fromtxg = fromzb->zbm_creation_txg; fromtxg = fromzb->zbm_creation_txg;
} }
dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
if (!dsl_dataset_is_snapshot(ds)) { if (!ds->ds_is_snapshot) {
(void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--", (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--",
sizeof (drr->drr_u.drr_begin.drr_toname)); sizeof (drr->drr_u.drr_begin.drr_toname));
} }
@ -820,7 +820,7 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
ASSERT(dsl_pool_config_held(dp)); ASSERT(dsl_pool_config_held(dp));
/* tosnap must be a snapshot */ /* tosnap must be a snapshot */
if (!dsl_dataset_is_snapshot(ds)) if (!ds->ds_is_snapshot)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
/* /*
@ -1057,7 +1057,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (error); return (error);
} }
if (!dsl_dataset_is_snapshot(origin)) { if (!origin->ds_is_snapshot) {
dsl_dataset_rele(origin, FTAG); dsl_dataset_rele(origin, FTAG);
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));

View File

@ -541,7 +541,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
/* See comment on ZIL traversal in dsl_scan_visitds. */ /* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) { if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
uint32_t flags = ARC_WAIT; uint32_t flags = ARC_WAIT;
objset_phys_t *osp; objset_phys_t *osp;
arc_buf_t *buf; arc_buf_t *buf;

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -405,8 +406,9 @@ static dnode_t *
dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
uint64_t object, dnode_handle_t *dnh) uint64_t object, dnode_handle_t *dnh)
{ {
dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP); dnode_t *dn;
dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
ASSERT(!POINTER_IS_VALID(dn->dn_objset)); ASSERT(!POINTER_IS_VALID(dn->dn_objset));
dn->dn_moved = 0; dn->dn_moved = 0;
@ -443,13 +445,31 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type)); ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
mutex_enter(&os->os_lock); mutex_enter(&os->os_lock);
if (dnh->dnh_dnode != NULL) {
/* Lost the allocation race. */
mutex_exit(&os->os_lock);
kmem_cache_free(dnode_cache, dn);
return (dnh->dnh_dnode);
}
/*
* Exclude special dnodes from os_dnodes so an empty os_dnodes
* signifies that the special dnodes have no references from
* their children (the entries in os_dnodes). This allows
* dnode_destroy() to easily determine if the last child has
* been removed and then complete eviction of the objset.
*/
if (!DMU_OBJECT_IS_SPECIAL(object))
list_insert_head(&os->os_dnodes, dn); list_insert_head(&os->os_dnodes, dn);
membar_producer(); membar_producer();
/* /*
* Everything else must be valid before assigning dn_objset makes the * Everything else must be valid before assigning dn_objset
* dnode eligible for dnode_move(). * makes the dnode eligible for dnode_move().
*/ */
dn->dn_objset = os; dn->dn_objset = os;
dnh->dnh_dnode = dn;
mutex_exit(&os->os_lock); mutex_exit(&os->os_lock);
arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER); arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
@ -463,12 +483,18 @@ static void
dnode_destroy(dnode_t *dn) dnode_destroy(dnode_t *dn)
{ {
objset_t *os = dn->dn_objset; objset_t *os = dn->dn_objset;
boolean_t complete_os_eviction = B_FALSE;
ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0); ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
mutex_enter(&os->os_lock); mutex_enter(&os->os_lock);
POINTER_INVALIDATE(&dn->dn_objset); POINTER_INVALIDATE(&dn->dn_objset);
if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
list_remove(&os->os_dnodes, dn); list_remove(&os->os_dnodes, dn);
complete_os_eviction =
list_is_empty(&os->os_dnodes) &&
list_link_active(&os->os_evicting_node);
}
mutex_exit(&os->os_lock); mutex_exit(&os->os_lock);
/* the dnode can no longer move, so we can release the handle */ /* the dnode can no longer move, so we can release the handle */
@ -503,6 +529,9 @@ dnode_destroy(dnode_t *dn)
dmu_zfetch_rele(&dn->dn_zfetch); dmu_zfetch_rele(&dn->dn_zfetch);
kmem_cache_free(dnode_cache, dn); kmem_cache_free(dnode_cache, dn);
arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER); arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
if (complete_os_eviction)
dmu_objset_evict_done(os);
} }
void void
@ -968,33 +997,32 @@ dnode_special_close(dnode_handle_t *dnh)
*/ */
while (refcount_count(&dn->dn_holds) > 0) while (refcount_count(&dn->dn_holds) > 0)
delay(1); delay(1);
ASSERT(dn->dn_dbuf == NULL ||
dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
zrl_add(&dnh->dnh_zrlock); zrl_add(&dnh->dnh_zrlock);
dnode_destroy(dn); /* implicit zrl_remove() */ dnode_destroy(dn); /* implicit zrl_remove() */
zrl_destroy(&dnh->dnh_zrlock); zrl_destroy(&dnh->dnh_zrlock);
dnh->dnh_dnode = NULL; dnh->dnh_dnode = NULL;
} }
dnode_t * void
dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object, dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
dnode_handle_t *dnh) dnode_handle_t *dnh)
{ {
dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh); dnode_t *dn;
dnh->dnh_dnode = dn;
dn = dnode_create(os, dnp, NULL, object, dnh);
zrl_init(&dnh->dnh_zrlock); zrl_init(&dnh->dnh_zrlock);
DNODE_VERIFY(dn); DNODE_VERIFY(dn);
return (dn);
} }
static void static void
dnode_buf_pageout(dmu_buf_t *db, void *arg) dnode_buf_pageout(void *dbu)
{ {
dnode_children_t *children_dnodes = arg; dnode_children_t *children_dnodes = dbu;
int i; int i;
int epb = db->db_size >> DNODE_SHIFT;
ASSERT(epb == children_dnodes->dnc_count); for (i = 0; i < children_dnodes->dnc_count; i++) {
for (i = 0; i < epb; i++) {
dnode_handle_t *dnh = &children_dnodes->dnc_children[i]; dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
dnode_t *dn; dnode_t *dn;
@ -1024,7 +1052,7 @@ dnode_buf_pageout(dmu_buf_t *db, void *arg)
dnh->dnh_dnode = NULL; dnh->dnh_dnode = NULL;
} }
kmem_free(children_dnodes, sizeof (dnode_children_t) + kmem_free(children_dnodes, sizeof (dnode_children_t) +
epb * sizeof (dnode_handle_t)); children_dnodes->dnc_count * sizeof (dnode_handle_t));
} }
/* /*
@ -1108,16 +1136,17 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
if (children_dnodes == NULL) { if (children_dnodes == NULL) {
int i; int i;
dnode_children_t *winner; dnode_children_t *winner;
children_dnodes = kmem_alloc(sizeof (dnode_children_t) + children_dnodes = kmem_zalloc(sizeof (dnode_children_t) +
epb * sizeof (dnode_handle_t), KM_SLEEP); epb * sizeof (dnode_handle_t), KM_SLEEP);
children_dnodes->dnc_count = epb; children_dnodes->dnc_count = epb;
dnh = &children_dnodes->dnc_children[0]; dnh = &children_dnodes->dnc_children[0];
for (i = 0; i < epb; i++) { for (i = 0; i < epb; i++) {
zrl_init(&dnh[i].dnh_zrlock); zrl_init(&dnh[i].dnh_zrlock);
dnh[i].dnh_dnode = NULL;
} }
if ((winner = dmu_buf_set_user(&db->db, children_dnodes, dmu_buf_init_user(&children_dnodes->dnc_dbu,
dnode_buf_pageout))) { dnode_buf_pageout, NULL);
winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
if (winner != NULL) {
for (i = 0; i < epb; i++) { for (i = 0; i < epb; i++) {
zrl_destroy(&dnh[i].dnh_zrlock); zrl_destroy(&dnh[i].dnh_zrlock);
@ -1132,17 +1161,11 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
dnh = &children_dnodes->dnc_children[idx]; dnh = &children_dnodes->dnc_children[idx];
zrl_add(&dnh->dnh_zrlock); zrl_add(&dnh->dnh_zrlock);
if ((dn = dnh->dnh_dnode) == NULL) { dn = dnh->dnh_dnode;
if (dn == NULL) {
dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx; dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
dnode_t *winner;
dn = dnode_create(os, phys, db, object, dnh); dn = dnode_create(os, phys, db, object, dnh);
winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
if (winner != NULL) {
zrl_add(&dnh->dnh_zrlock);
dnode_destroy(dn); /* implicit zrl_remove() */
dn = winner;
}
} }
mutex_enter(&dn->dn_mtx); mutex_enter(&dn->dn_mtx);
@ -1156,10 +1179,10 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
dbuf_rele(db, FTAG); dbuf_rele(db, FTAG);
return (type == DMU_OT_NONE ? ENOENT : EEXIST); return (type == DMU_OT_NONE ? ENOENT : EEXIST);
} }
mutex_exit(&dn->dn_mtx);
if (refcount_add(&dn->dn_holds, tag) == 1) if (refcount_add(&dn->dn_holds, tag) == 1)
dbuf_add_ref(db, dnh); dbuf_add_ref(db, dnh);
mutex_exit(&dn->dn_mtx);
/* Now we can rely on the hold to prevent the dnode from moving. */ /* Now we can rely on the hold to prevent the dnode from moving. */
zrl_remove(&dnh->dnh_zrlock); zrl_remove(&dnh->dnh_zrlock);

View File

@ -22,6 +22,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -402,17 +403,14 @@ dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
void void
dnode_evict_dbufs(dnode_t *dn) dnode_evict_dbufs(dnode_t *dn)
{ {
int progress; dmu_buf_impl_t *db_marker;
int pass = 0;
do {
dmu_buf_impl_t *db, *db_next; dmu_buf_impl_t *db, *db_next;
int evicting = FALSE;
progress = FALSE; db_marker = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
mutex_enter(&dn->dn_dbufs_mtx); mutex_enter(&dn->dn_dbufs_mtx);
for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) { for (db = avl_first(&dn->dn_dbufs); db != NULL; db = db_next) {
db_next = AVL_NEXT(&dn->dn_dbufs, db);
#ifdef DEBUG #ifdef DEBUG
DB_DNODE_ENTER(db); DB_DNODE_ENTER(db);
ASSERT3P(DB_DNODE(db), ==, dn); ASSERT3P(DB_DNODE(db), ==, dn);
@ -420,35 +418,26 @@ dnode_evict_dbufs(dnode_t *dn)
#endif /* DEBUG */ #endif /* DEBUG */
mutex_enter(&db->db_mtx); mutex_enter(&db->db_mtx);
if (db->db_state == DB_EVICTING) { if (db->db_state != DB_EVICTING &&
progress = TRUE; refcount_is_zero(&db->db_holds)) {
evicting = TRUE; db_marker->db_level = db->db_level;
mutex_exit(&db->db_mtx); db_marker->db_blkid = db->db_blkid;
} else if (refcount_is_zero(&db->db_holds)) { db_marker->db_state = DB_SEARCH;
progress = TRUE; avl_insert_here(&dn->dn_dbufs, db_marker, db,
dbuf_clear(db); /* exits db_mtx for us */ AVL_BEFORE);
dbuf_clear(db);
db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
avl_remove(&dn->dn_dbufs, db_marker);
} else { } else {
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
db_next = AVL_NEXT(&dn->dn_dbufs, db);
} }
} }
/*
* NB: we need to drop dn_dbufs_mtx between passes so
* that any DB_EVICTING dbufs can make progress.
* Ideally, we would have some cv we could wait on, but
* since we don't, just wait a bit to give the other
* thread a chance to run.
*/
mutex_exit(&dn->dn_dbufs_mtx); mutex_exit(&dn->dn_dbufs_mtx);
if (evicting)
delay(1);
pass++;
if ((pass % 100) == 0)
dprintf("Exceeded %d passes evicting dbufs\n", pass);
} while (progress);
if (pass >= 100) kmem_free(db_marker, sizeof (dmu_buf_impl_t));
dprintf("Required %d passes to evict dbufs\n", pass);
dnode_evict_bonus(dn); dnode_evict_bonus(dn);
} }
@ -513,7 +502,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn); dnode_evict_dbufs(dn);
ASSERT(avl_is_empty(&dn->dn_dbufs)); ASSERT(avl_is_empty(&dn->dn_dbufs));
ASSERT3P(dn->dn_bonus, ==, NULL);
/* /*
* XXX - It would be nice to assert this, but we may still * XXX - It would be nice to assert this, but we may still

View File

@ -120,7 +120,7 @@ dsl_bookmark_create_check_impl(dsl_dataset_t *snapds, const char *bookmark_name,
int error; int error;
zfs_bookmark_phys_t bmark_phys; zfs_bookmark_phys_t bmark_phys;
if (!dsl_dataset_is_snapshot(snapds)) if (!snapds->ds_is_snapshot)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
error = dsl_bookmark_hold_ds(dp, bookmark_name, error = dsl_bookmark_hold_ds(dp, bookmark_name,

View File

@ -23,6 +23,7 @@
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved. * Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems. * Copyright (c) 2014 RackTop Systems.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/dmu_objset.h> #include <sys/dmu_objset.h>
@ -62,7 +63,6 @@
#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds); extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
extern inline boolean_t dsl_dataset_is_snapshot(dsl_dataset_t *ds);
/* /*
* Figure out how much of this delta should be propogated to the dsl_dir * Figure out how much of this delta should be propogated to the dsl_dir
@ -146,7 +146,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
} }
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
ASSERT(!dsl_dataset_is_snapshot(ds)); ASSERT(!ds->ds_is_snapshot);
dmu_buf_will_dirty(ds->ds_dbuf, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx);
if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
@ -244,14 +244,15 @@ dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
return (B_TRUE); return (B_TRUE);
} }
/* ARGSUSED */
static void static void
dsl_dataset_evict(dmu_buf_t *db, void *dsv) dsl_dataset_evict(void *dbu)
{ {
dsl_dataset_t *ds = dsv; dsl_dataset_t *ds = dbu;
ASSERT(ds->ds_owner == NULL); ASSERT(ds->ds_owner == NULL);
ds->ds_dbuf = NULL;
unique_remove(ds->ds_fsid_guid); unique_remove(ds->ds_fsid_guid);
if (ds->ds_objset != NULL) if (ds->ds_objset != NULL)
@ -263,10 +264,10 @@ dsl_dataset_evict(dmu_buf_t *db, void *dsv)
} }
bplist_destroy(&ds->ds_pending_deadlist); bplist_destroy(&ds->ds_pending_deadlist);
if (dsl_dataset_phys(ds)->ds_deadlist_obj != 0) if (ds->ds_deadlist.dl_os != NULL)
dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_close(&ds->ds_deadlist);
if (ds->ds_dir) if (ds->ds_dir)
dsl_dir_rele(ds->ds_dir, ds); dsl_dir_async_rele(ds->ds_dir, ds);
ASSERT(!list_link_active(&ds->ds_synced_link)); ASSERT(!list_link_active(&ds->ds_synced_link));
@ -380,6 +381,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
ds->ds_dbuf = dbuf; ds->ds_dbuf = dbuf;
ds->ds_object = dsobj; ds->ds_object = dsobj;
ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0;
list_link_init(&ds->ds_synced_link); list_link_init(&ds->ds_synced_link);
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
@ -411,7 +413,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
return (err); return (err);
} }
if (!dsl_dataset_is_snapshot(ds)) { if (!ds->ds_is_snapshot) {
ds->ds_snapname[0] = '\0'; ds->ds_snapname[0] = '\0';
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
err = dsl_dataset_hold_obj(dp, err = dsl_dataset_hold_obj(dp,
@ -438,7 +440,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
} }
} }
if (err == 0 && !dsl_dataset_is_snapshot(ds)) { if (err == 0 && !ds->ds_is_snapshot) {
err = dsl_prop_get_int_ds(ds, err = dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), zfs_prop_to_name(ZFS_PROP_REFRESERVATION),
&ds->ds_reserved); &ds->ds_reserved);
@ -451,8 +453,11 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
ds->ds_reserved = ds->ds_quota = 0; ds->ds_reserved = ds->ds_quota = 0;
} }
if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
dsl_dataset_evict)) != NULL) { if (err == 0)
winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
if (err != 0 || winner != NULL) {
bplist_destroy(&ds->ds_pending_deadlist); bplist_destroy(&ds->ds_pending_deadlist);
dsl_deadlist_close(&ds->ds_deadlist); dsl_deadlist_close(&ds->ds_deadlist);
if (ds->ds_prev) if (ds->ds_prev)
@ -829,7 +834,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
uint64_t mrs_used; uint64_t mrs_used;
uint64_t dlused, dlcomp, dluncomp; uint64_t dlused, dlcomp, dluncomp;
ASSERT(!dsl_dataset_is_snapshot(ds)); ASSERT(!ds->ds_is_snapshot);
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0)
mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes;
@ -1579,7 +1584,7 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED,
dsl_dataset_phys(ds)->ds_uncompressed_bytes); dsl_dataset_phys(ds)->ds_uncompressed_bytes);
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
dsl_dataset_phys(ds)->ds_unique_bytes); dsl_dataset_phys(ds)->ds_unique_bytes);
@ -1643,7 +1648,7 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT; dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT;
stat->dds_guid = dsl_dataset_phys(ds)->ds_guid; stat->dds_guid = dsl_dataset_phys(ds)->ds_guid;
stat->dds_origin[0] = '\0'; stat->dds_origin[0] = '\0';
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
stat->dds_is_snapshot = B_TRUE; stat->dds_is_snapshot = B_TRUE;
stat->dds_num_clones = stat->dds_num_clones =
dsl_dataset_phys(ds)->ds_num_children - 1; dsl_dataset_phys(ds)->ds_num_children - 1;
@ -1920,7 +1925,7 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
return (error); return (error);
/* must not be a snapshot */ /* must not be a snapshot */
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
@ -2491,7 +2496,7 @@ promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag)
return (error); return (error);
dd = ddpa->ddpa_clone->ds_dir; dd = ddpa->ddpa_clone->ds_dir;
if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || if (ddpa->ddpa_clone->ds_is_snapshot ||
!dsl_dir_is_clone(dd)) { !dsl_dir_is_clone(dd)) {
dsl_dataset_rele(ddpa->ddpa_clone, tag); dsl_dataset_rele(ddpa->ddpa_clone, tag);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
@ -2582,8 +2587,8 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
int64_t unused_refres_delta; int64_t unused_refres_delta;
/* they should both be heads */ /* they should both be heads */
if (dsl_dataset_is_snapshot(clone) || if (clone->ds_is_snapshot ||
dsl_dataset_is_snapshot(origin_head)) origin_head->ds_is_snapshot)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
/* if we are not forcing, the branch point should be just before them */ /* if we are not forcing, the branch point should be just before them */
@ -2862,7 +2867,7 @@ dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx)
if (error != 0) if (error != 0)
return (error); return (error);
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
@ -2945,7 +2950,7 @@ dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx)
if (error != 0) if (error != 0)
return (error); return (error);
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
} }
@ -3158,8 +3163,8 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
uint64_t snapobj; uint64_t snapobj;
dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
ASSERT(dsl_dataset_is_snapshot(firstsnap)); ASSERT(firstsnap->ds_is_snapshot);
ASSERT(dsl_dataset_is_snapshot(lastsnap)); ASSERT(lastsnap->ds_is_snapshot);
/* /*
* Check that the snapshots are in the same dsl_dir, and firstsnap * Check that the snapshots are in the same dsl_dir, and firstsnap
@ -3214,12 +3219,12 @@ dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
dsl_dataset_t *origin; dsl_dataset_t *origin;
ASSERT(dsl_pool_config_held(dp)); ASSERT(dsl_pool_config_held(dp));
ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); ASSERT(earlier->ds_is_snapshot || earlier_txg != 0);
if (earlier_txg == 0) if (earlier_txg == 0)
earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg;
if (dsl_dataset_is_snapshot(later) && if (later->ds_is_snapshot &&
earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg)
return (B_FALSE); return (B_FALSE);

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/dsl_dataset.h> #include <sys/dsl_dataset.h>
@ -121,6 +122,8 @@ dsl_deadlist_close(dsl_deadlist_t *dl)
void *cookie = NULL; void *cookie = NULL;
dsl_deadlist_entry_t *dle; dsl_deadlist_entry_t *dle;
dl->dl_os = NULL;
if (dl->dl_oldfmt) { if (dl->dl_oldfmt) {
dl->dl_oldfmt = B_FALSE; dl->dl_oldfmt = B_FALSE;
bpobj_close(&dl->dl_bpobj); bpobj_close(&dl->dl_bpobj);

View File

@ -570,7 +570,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
SPA_VERSION_DELEGATED_PERMS) SPA_VERSION_DELEGATED_PERMS)
return (SET_ERROR(EPERM)); return (SET_ERROR(EPERM));
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
/* /*
* Snapshots are treated as descendents only, * Snapshots are treated as descendents only,
* local permissions do not apply. * local permissions do not apply.

View File

@ -51,7 +51,7 @@ typedef struct dmu_snapshots_destroy_arg {
int int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{ {
if (!dsl_dataset_is_snapshot(ds)) if (!ds->ds_is_snapshot)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
if (dsl_dataset_long_held(ds)) if (dsl_dataset_long_held(ds))
@ -356,7 +356,7 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
dsl_dataset_remove_clones_key(ds, dsl_dataset_remove_clones_key(ds,
dsl_dataset_phys(ds)->ds_creation_txg, tx); dsl_dataset_phys(ds)->ds_creation_txg, tx);
if (dsl_dataset_is_snapshot(ds_next)) { if (ds_next->ds_is_snapshot) {
dsl_dataset_t *ds_nextnext; dsl_dataset_t *ds_nextnext;
/* /*
@ -605,8 +605,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
uint64_t count; uint64_t count;
objset_t *mos; objset_t *mos;
ASSERT(!dsl_dataset_is_snapshot(ds)); ASSERT(!ds->ds_is_snapshot);
if (dsl_dataset_is_snapshot(ds)) if (ds->ds_is_snapshot)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
if (refcount_count(&ds->ds_longholds) != expected_holds) if (refcount_count(&ds->ds_longholds) != expected_holds)

View File

@ -23,6 +23,7 @@
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved. * Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved. * Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/dmu.h> #include <sys/dmu.h>
@ -126,14 +127,15 @@ extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
/* ARGSUSED */
static void static void
dsl_dir_evict(dmu_buf_t *db, void *arg) dsl_dir_evict(void *dbu)
{ {
dsl_dir_t *dd = arg; dsl_dir_t *dd = dbu;
int t; int t;
ASSERTV(dsl_pool_t *dp = dd->dd_pool); ASSERTV(dsl_pool_t *dp = dd->dd_pool);
dd->dd_dbuf = NULL;
for (t = 0; t < TXG_SIZE; t++) { for (t = 0; t < TXG_SIZE; t++) {
ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
ASSERT(dd->dd_tempreserved[t] == 0); ASSERT(dd->dd_tempreserved[t] == 0);
@ -141,9 +143,9 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
} }
if (dd->dd_parent) if (dd->dd_parent)
dsl_dir_rele(dd->dd_parent, dd); dsl_dir_async_rele(dd->dd_parent, dd);
spa_close(dd->dd_pool->dp_spa, dd); spa_async_close(dd->dd_pool->dp_spa, dd);
/* /*
* The props callback list should have been cleaned up by * The props callback list should have been cleaned up by
@ -239,8 +241,9 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
dmu_buf_rele(origin_bonus, FTAG); dmu_buf_rele(origin_bonus, FTAG);
} }
winner = dmu_buf_set_user_ie(dbuf, dd, dsl_dir_evict); dmu_buf_init_user(&dd->dd_dbu, dsl_dir_evict, &dd->dd_dbuf);
if (winner) { winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
if (winner != NULL) {
if (dd->dd_parent) if (dd->dd_parent)
dsl_dir_rele(dd->dd_parent, dd); dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock); mutex_destroy(&dd->dd_lock);
@ -284,6 +287,21 @@ dsl_dir_rele(dsl_dir_t *dd, void *tag)
dmu_buf_rele(dd->dd_dbuf, tag); dmu_buf_rele(dd->dd_dbuf, tag);
} }
/*
* Remove a reference to the given dsl dir that is being asynchronously
* released. Async releases occur from a taskq performing eviction of
* dsl datasets and dirs. This process is identical to a normal release
* with the exception of using the async API for releasing the reference on
* the spa.
*/
void
dsl_dir_async_rele(dsl_dir_t *dd, void *tag)
{
dprintf_dd(dd, "%s\n", "");
spa_async_close(dd->dd_pool->dp_spa, tag);
dmu_buf_rele(dd->dd_dbuf, tag);
}
/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */ /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
void void
dsl_dir_name(dsl_dir_t *dd, char *buf) dsl_dir_name(dsl_dir_t *dd, char *buf)
@ -417,7 +435,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
} }
while (next != NULL) { while (next != NULL) {
dsl_dir_t *child_ds; dsl_dir_t *child_dd;
err = getcomponent(next, buf, &nextnext); err = getcomponent(next, buf, &nextnext);
if (err != 0) if (err != 0)
break; break;
@ -436,11 +454,11 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
break; break;
} }
err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds); err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd);
if (err != 0) if (err != 0)
break; break;
dsl_dir_rele(dd, tag); dsl_dir_rele(dd, tag);
dd = child_ds; dd = child_dd;
next = nextnext; next = nextnext;
} }

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/dsl_pool.h> #include <sys/dsl_pool.h>
@ -319,6 +320,8 @@ dsl_pool_close(dsl_pool_t *dp)
arc_flush(dp->dp_spa); arc_flush(dp->dp_spa);
txg_fini(dp); txg_fini(dp);
dsl_scan_fini(dp); dsl_scan_fini(dp);
dmu_buf_user_evict_wait();
rrw_destroy(&dp->dp_config_rwlock); rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock); mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_iput_taskq); taskq_destroy(dp->dp_iput_taskq);

View File

@ -163,19 +163,17 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
{ {
zfs_prop_t prop = zfs_name_to_prop(propname); zfs_prop_t prop = zfs_name_to_prop(propname);
boolean_t inheritable; boolean_t inheritable;
boolean_t snapshot;
uint64_t zapobj; uint64_t zapobj;
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop)); inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
snapshot = dsl_dataset_is_snapshot(ds);
zapobj = dsl_dataset_phys(ds)->ds_props_obj; zapobj = dsl_dataset_phys(ds)->ds_props_obj;
if (zapobj != 0) { if (zapobj != 0) {
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
int err; int err;
ASSERT(snapshot); ASSERT(ds->ds_is_snapshot);
/* Check for a local value. */ /* Check for a local value. */
err = zap_lookup(mos, zapobj, propname, intsz, numints, buf); err = zap_lookup(mos, zapobj, propname, intsz, numints, buf);
@ -215,7 +213,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
} }
return (dsl_prop_get_dd(ds->ds_dir, propname, return (dsl_prop_get_dd(ds->ds_dir, propname,
intsz, numints, buf, setpoint, snapshot)); intsz, numints, buf, setpoint, ds->ds_is_snapshot));
} }
/* /*
@ -545,7 +543,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
isint = (dodefault(propname, 8, 1, &intval) == 0); isint = (dodefault(propname, 8, 1, &intval) == 0);
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
ASSERT(version >= SPA_VERSION_SNAP_PROPS); ASSERT(version >= SPA_VERSION_SNAP_PROPS);
if (dsl_dataset_phys(ds)->ds_props_obj == 0) { if (dsl_dataset_phys(ds)->ds_props_obj == 0) {
dmu_buf_will_dirty(ds->ds_dbuf, tx); dmu_buf_will_dirty(ds->ds_dbuf, tx);
@ -642,7 +640,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
if (isint) { if (isint) {
VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval)); VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval));
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
dsl_prop_cb_record_t *cbr; dsl_prop_cb_record_t *cbr;
/* /*
* It's a snapshot; nothing can inherit this * It's a snapshot; nothing can inherit this
@ -760,7 +758,7 @@ dsl_props_set_check(void *arg, dmu_tx_t *tx)
} }
} }
if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) { if (ds->ds_is_snapshot && version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG); dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(ENOTSUP)); return (SET_ERROR(ENOTSUP));
} }
@ -983,7 +981,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
if (dsl_dataset_is_snapshot(ds)) if (ds->ds_is_snapshot)
flags |= DSL_PROP_GET_SNAPSHOT; flags |= DSL_PROP_GET_SNAPSHOT;
ASSERT(dsl_pool_config_held(dp)); ASSERT(dsl_pool_config_held(dp));

View File

@ -415,7 +415,7 @@ static uint64_t
dsl_scan_ds_maxtxg(dsl_dataset_t *ds) dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
{ {
uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg; uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
if (dsl_dataset_is_snapshot(ds)) if (ds->ds_is_snapshot)
return (MIN(smt, dsl_dataset_phys(ds)->ds_creation_txg)); return (MIN(smt, dsl_dataset_phys(ds)->ds_creation_txg));
return (smt); return (smt);
} }
@ -846,7 +846,7 @@ dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
return; return;
if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) { if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
/* Note, scn_cur_{min,max}_txg stays the same. */ /* Note, scn_cur_{min,max}_txg stays the same. */
scn->scn_phys.scn_bookmark.zb_objset = scn->scn_phys.scn_bookmark.zb_objset =
dsl_dataset_phys(ds)->ds_next_snap_obj; dsl_dataset_phys(ds)->ds_next_snap_obj;
@ -868,7 +868,7 @@ dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds->ds_object, tx)); scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
if (dsl_dataset_is_snapshot(ds)) { if (ds->ds_is_snapshot) {
/* /*
* We keep the same mintxg; it could be > * We keep the same mintxg; it could be >
* ds_creation_txg if the previous snapshot was * ds_creation_txg if the previous snapshot was
@ -1053,7 +1053,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
* ZIL here, rather than in scan_recurse(), because the regular * ZIL here, rather than in scan_recurse(), because the regular
* snapshot block-sharing rules don't apply to it. * snapshot block-sharing rules don't apply to it.
*/ */
if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !dsl_dataset_is_snapshot(ds)) if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !ds->ds_is_snapshot)
dsl_scan_zil(dp, &os->os_zil_header); dsl_scan_zil(dp, &os->os_zil_header);
/* /*

View File

@ -354,7 +354,7 @@ dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
objset_t *mos; objset_t *mos;
int numholds; int numholds;
if (!dsl_dataset_is_snapshot(ds)) if (!ds->ds_is_snapshot)
return (SET_ERROR(EINVAL)); return (SET_ERROR(EINVAL));
if (nvlist_empty(holds)) if (nvlist_empty(holds))

View File

@ -22,6 +22,7 @@
/* /*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -209,6 +210,7 @@ sa_cache_constructor(void *buf, void *unused, int kmflag)
{ {
sa_handle_t *hdl = buf; sa_handle_t *hdl = buf;
hdl->sa_dbu.dbu_evict_func = NULL;
hdl->sa_bonus_tab = NULL; hdl->sa_bonus_tab = NULL;
hdl->sa_spill_tab = NULL; hdl->sa_spill_tab = NULL;
hdl->sa_os = NULL; hdl->sa_os = NULL;
@ -224,6 +226,7 @@ static void
sa_cache_destructor(void *buf, void *unused) sa_cache_destructor(void *buf, void *unused)
{ {
sa_handle_t *hdl = buf; sa_handle_t *hdl = buf;
hdl->sa_dbu.dbu_evict_func = NULL;
mutex_destroy(&hdl->sa_lock); mutex_destroy(&hdl->sa_lock);
} }
@ -1306,10 +1309,10 @@ sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
} }
/*ARGSUSED*/ /*ARGSUSED*/
void static void
sa_evict(dmu_buf_t *db, void *sap) sa_evict(void *dbu)
{ {
panic("evicting sa dbuf %p\n", (void *)db); panic("evicting sa dbuf\n");
} }
static void static void
@ -1361,9 +1364,10 @@ sa_spill_rele(sa_handle_t *hdl)
void void
sa_handle_destroy(sa_handle_t *hdl) sa_handle_destroy(sa_handle_t *hdl)
{ {
dmu_buf_t *db = hdl->sa_bonus;
mutex_enter(&hdl->sa_lock); mutex_enter(&hdl->sa_lock);
(void) dmu_buf_update_user((dmu_buf_t *)hdl->sa_bonus, hdl, (void) dmu_buf_remove_user(db, &hdl->sa_dbu);
NULL, NULL);
if (hdl->sa_bonus_tab) { if (hdl->sa_bonus_tab) {
sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab); sa_idx_tab_rele(hdl->sa_os, hdl->sa_bonus_tab);
@ -1388,7 +1392,7 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
sa_handle_type_t hdl_type, sa_handle_t **handlepp) sa_handle_type_t hdl_type, sa_handle_t **handlepp)
{ {
int error = 0; int error = 0;
sa_handle_t *handle; sa_handle_t *handle = NULL;
#ifdef ZFS_DEBUG #ifdef ZFS_DEBUG
dmu_object_info_t doi; dmu_object_info_t doi;
@ -1399,9 +1403,12 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
/* find handle, if it exists */ /* find handle, if it exists */
/* if one doesn't exist then create a new one, and initialize it */ /* if one doesn't exist then create a new one, and initialize it */
handle = (hdl_type == SA_HDL_SHARED) ? dmu_buf_get_user(db) : NULL; if (hdl_type == SA_HDL_SHARED)
handle = dmu_buf_get_user(db);
if (handle == NULL) { if (handle == NULL) {
sa_handle_t *newhandle; sa_handle_t *winner = NULL;
handle = kmem_cache_alloc(sa_cache, KM_SLEEP); handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
handle->sa_userp = userp; handle->sa_userp = userp;
handle->sa_bonus = db; handle->sa_bonus = db;
@ -1409,12 +1416,15 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
handle->sa_spill = NULL; handle->sa_spill = NULL;
error = sa_build_index(handle, SA_BONUS); error = sa_build_index(handle, SA_BONUS);
newhandle = (hdl_type == SA_HDL_SHARED) ?
dmu_buf_set_user_ie(db, handle, sa_evict) : NULL;
if (newhandle != NULL) { if (hdl_type == SA_HDL_SHARED) {
dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL);
winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
}
if (winner != NULL) {
kmem_cache_free(sa_cache, handle); kmem_cache_free(sa_cache, handle);
handle = newhandle; handle = winner;
} }
} }
*handlepp = handle; *handlepp = handle;
@ -1946,14 +1956,6 @@ sa_object_size(sa_handle_t *hdl, uint32_t *blksize, u_longlong_t *nblocks)
blksize, nblocks); blksize, nblocks);
} }
void
sa_update_user(sa_handle_t *newhdl, sa_handle_t *oldhdl)
{
(void) dmu_buf_update_user((dmu_buf_t *)newhdl->sa_bonus,
oldhdl, newhdl, sa_evict);
oldhdl->sa_bonus = NULL;
}
void void
sa_set_userp(sa_handle_t *hdl, void *ptr) sa_set_userp(sa_handle_t *hdl, void *ptr)
{ {
@ -2052,7 +2054,6 @@ EXPORT_SYMBOL(sa_size);
EXPORT_SYMBOL(sa_update_from_cb); EXPORT_SYMBOL(sa_update_from_cb);
EXPORT_SYMBOL(sa_object_info); EXPORT_SYMBOL(sa_object_info);
EXPORT_SYMBOL(sa_object_size); EXPORT_SYMBOL(sa_object_size);
EXPORT_SYMBOL(sa_update_user);
EXPORT_SYMBOL(sa_get_userdata); EXPORT_SYMBOL(sa_get_userdata);
EXPORT_SYMBOL(sa_set_userp); EXPORT_SYMBOL(sa_set_userp);
EXPORT_SYMBOL(sa_get_db); EXPORT_SYMBOL(sa_get_db);

View File

@ -23,6 +23,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
/* /*
@ -1095,6 +1096,8 @@ spa_activate(spa_t *spa, int mode)
list_create(&spa->spa_config_dirty_list, sizeof (vdev_t), list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
offsetof(vdev_t, vdev_config_dirty_node)); offsetof(vdev_t, vdev_config_dirty_node));
list_create(&spa->spa_evicting_os_list, sizeof (objset_t),
offsetof(objset_t, os_evicting_node));
list_create(&spa->spa_state_dirty_list, sizeof (vdev_t), list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
offsetof(vdev_t, vdev_state_dirty_node)); offsetof(vdev_t, vdev_state_dirty_node));
@ -1123,9 +1126,12 @@ spa_deactivate(spa_t *spa)
ASSERT(spa->spa_async_zio_root == NULL); ASSERT(spa->spa_async_zio_root == NULL);
ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED); ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
spa_evicting_os_wait(spa);
txg_list_destroy(&spa->spa_vdev_txg_list); txg_list_destroy(&spa->spa_vdev_txg_list);
list_destroy(&spa->spa_config_dirty_list); list_destroy(&spa->spa_config_dirty_list);
list_destroy(&spa->spa_evicting_os_list);
list_destroy(&spa->spa_state_dirty_list); list_destroy(&spa->spa_state_dirty_list);
taskq_cancel_id(system_taskq, spa->spa_deadman_tqid); taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
@ -2136,6 +2142,11 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
mosconfig, &ereport); mosconfig, &ereport);
} }
/*
* Don't count references from objsets that are already closed
* and are making their way through the eviction process.
*/
spa_evicting_os_wait(spa);
spa->spa_minref = refcount_count(&spa->spa_refcount); spa->spa_minref = refcount_count(&spa->spa_refcount);
if (error) { if (error) {
if (error != EEXIST) { if (error != EEXIST) {
@ -3775,6 +3786,11 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa_history_log_version(spa, "create"); spa_history_log_version(spa, "create");
/*
* Don't count references from objsets that are already closed
* and are making their way through the eviction process.
*/
spa_evicting_os_wait(spa);
spa->spa_minref = refcount_count(&spa->spa_refcount); spa->spa_minref = refcount_count(&spa->spa_refcount);
mutex_exit(&spa_namespace_lock); mutex_exit(&spa_namespace_lock);
@ -4314,8 +4330,10 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
* modify its state. Objsets may be open only because they're dirty, * modify its state. Objsets may be open only because they're dirty,
* so we have to force it to sync before checking spa_refcnt. * so we have to force it to sync before checking spa_refcnt.
*/ */
if (spa->spa_sync_on) if (spa->spa_sync_on) {
txg_wait_synced(spa->spa_dsl_pool, 0); txg_wait_synced(spa->spa_dsl_pool, 0);
spa_evicting_os_wait(spa);
}
/* /*
* A pool cannot be exported or destroyed if there are active * A pool cannot be exported or destroyed if there are active

View File

@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -524,6 +525,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_evicting_os_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
@ -533,6 +535,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL); cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
@ -619,6 +622,7 @@ spa_remove(spa_t *spa)
ASSERT(MUTEX_HELD(&spa_namespace_lock)); ASSERT(MUTEX_HELD(&spa_namespace_lock));
ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED); ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
ASSERT3U(refcount_count(&spa->spa_refcount), ==, 0);
nvlist_free(spa->spa_config_splitting); nvlist_free(spa->spa_config_splitting);
@ -651,6 +655,7 @@ spa_remove(spa_t *spa)
bplist_destroy(&spa->spa_free_bplist[t]); bplist_destroy(&spa->spa_free_bplist[t]);
cv_destroy(&spa->spa_async_cv); cv_destroy(&spa->spa_async_cv);
cv_destroy(&spa->spa_evicting_os_cv);
cv_destroy(&spa->spa_proc_cv); cv_destroy(&spa->spa_proc_cv);
cv_destroy(&spa->spa_scrub_io_cv); cv_destroy(&spa->spa_scrub_io_cv);
cv_destroy(&spa->spa_suspend_cv); cv_destroy(&spa->spa_suspend_cv);
@ -658,6 +663,7 @@ spa_remove(spa_t *spa)
mutex_destroy(&spa->spa_async_lock); mutex_destroy(&spa->spa_async_lock);
mutex_destroy(&spa->spa_errlist_lock); mutex_destroy(&spa->spa_errlist_lock);
mutex_destroy(&spa->spa_errlog_lock); mutex_destroy(&spa->spa_errlog_lock);
mutex_destroy(&spa->spa_evicting_os_lock);
mutex_destroy(&spa->spa_history_lock); mutex_destroy(&spa->spa_history_lock);
mutex_destroy(&spa->spa_proc_lock); mutex_destroy(&spa->spa_proc_lock);
mutex_destroy(&spa->spa_props_lock); mutex_destroy(&spa->spa_props_lock);
@ -714,6 +720,20 @@ spa_close(spa_t *spa, void *tag)
(void) refcount_remove(&spa->spa_refcount, tag); (void) refcount_remove(&spa->spa_refcount, tag);
} }
/*
* Remove a reference to the given spa_t held by a dsl dir that is
* being asynchronously released. Async releases occur from a taskq
* performing eviction of dsl datasets and dirs. The namespace lock
* isn't held and the hold by the object being evicted may contribute to
* spa_minref (e.g. dataset or directory released during pool export),
* so the asserts in spa_close() do not apply.
*/
void
spa_async_close(spa_t *spa, void *tag)
{
(void) refcount_remove(&spa->spa_refcount, tag);
}
/* /*
* Check to see if the spa refcount is zero. Must be called with * Check to see if the spa refcount is zero. Must be called with
* spa_namespace_lock held. We really compare against spa_minref, which is the * spa_namespace_lock held. We really compare against spa_minref, which is the
@ -1597,6 +1617,34 @@ spa_log_class(spa_t *spa)
return (spa->spa_log_class); return (spa->spa_log_class);
} }
void
spa_evicting_os_register(spa_t *spa, objset_t *os)
{
mutex_enter(&spa->spa_evicting_os_lock);
list_insert_head(&spa->spa_evicting_os_list, os);
mutex_exit(&spa->spa_evicting_os_lock);
}
void
spa_evicting_os_deregister(spa_t *spa, objset_t *os)
{
mutex_enter(&spa->spa_evicting_os_lock);
list_remove(&spa->spa_evicting_os_list, os);
cv_broadcast(&spa->spa_evicting_os_cv);
mutex_exit(&spa->spa_evicting_os_lock);
}
void
spa_evicting_os_wait(spa_t *spa)
{
mutex_enter(&spa->spa_evicting_os_lock);
while (!list_is_empty(&spa->spa_evicting_os_list))
cv_wait(&spa->spa_evicting_os_cv, &spa->spa_evicting_os_lock);
mutex_exit(&spa->spa_evicting_os_lock);
dmu_buf_user_evict_wait();
}
int int
spa_max_replication(spa_t *spa) spa_max_replication(spa_t *spa)
{ {

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
/* /*
@ -52,7 +53,6 @@ int fzap_default_block_shift = 14; /* 16k blocksize */
extern inline zap_phys_t *zap_f_phys(zap_t *zap); extern inline zap_phys_t *zap_f_phys(zap_t *zap);
static void zap_leaf_pageout(dmu_buf_t *db, void *vl);
static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks); static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
void void
@ -81,7 +81,7 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
zap->zap_ismicro = FALSE; zap->zap_ismicro = FALSE;
(void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, zap_evict); zap->zap_dbu.dbu_evict_func = zap_evict;
mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1; zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
@ -387,11 +387,20 @@ zap_allocate_blocks(zap_t *zap, int nblocks)
return (newblk); return (newblk);
} }
static void
zap_leaf_pageout(void *dbu)
{
zap_leaf_t *l = dbu;
rw_destroy(&l->l_rwlock);
kmem_free(l, sizeof (zap_leaf_t));
}
static zap_leaf_t * static zap_leaf_t *
zap_create_leaf(zap_t *zap, dmu_tx_t *tx) zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
{ {
void *winner; void *winner;
zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); zap_leaf_t *l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
@ -403,7 +412,8 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf, l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
DMU_READ_NO_PREFETCH)); DMU_READ_NO_PREFETCH));
winner = dmu_buf_set_user(l->l_dbuf, l, zap_leaf_pageout); dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
winner = dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
ASSERT(winner == NULL); ASSERT(winner == NULL);
dmu_buf_will_dirty(l->l_dbuf, tx); dmu_buf_will_dirty(l->l_dbuf, tx);
@ -435,16 +445,6 @@ zap_put_leaf(zap_leaf_t *l)
dmu_buf_rele(l->l_dbuf, NULL); dmu_buf_rele(l->l_dbuf, NULL);
} }
_NOTE(ARGSUSED(0))
static void
zap_leaf_pageout(dmu_buf_t *db, void *vl)
{
zap_leaf_t *l = vl;
rw_destroy(&l->l_rwlock);
kmem_free(l, sizeof (zap_leaf_t));
}
static zap_leaf_t * static zap_leaf_t *
zap_open_leaf(uint64_t blkid, dmu_buf_t *db) zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
{ {
@ -452,19 +452,20 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
ASSERT(blkid != 0); ASSERT(blkid != 0);
l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL); rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL);
rw_enter(&l->l_rwlock, RW_WRITER); rw_enter(&l->l_rwlock, RW_WRITER);
l->l_blkid = blkid; l->l_blkid = blkid;
l->l_bs = highbit64(db->db_size) - 1; l->l_bs = highbit64(db->db_size) - 1;
l->l_dbuf = db; l->l_dbuf = db;
winner = dmu_buf_set_user(db, l, zap_leaf_pageout); dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
winner = dmu_buf_set_user(db, &l->l_dbu);
rw_exit(&l->l_rwlock); rw_exit(&l->l_rwlock);
if (winner != NULL) { if (winner != NULL) {
/* someone else set it first */ /* someone else set it first */
zap_leaf_pageout(NULL, l); zap_leaf_pageout(&l->l_dbu);
l = winner; l = winner;
} }

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/ */
#include <sys/zio.h> #include <sys/zio.h>
@ -386,7 +387,8 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
* it, because zap_lockdir() checks zap_ismicro without the lock * it, because zap_lockdir() checks zap_ismicro without the lock
* held. * held.
*/ */
winner = dmu_buf_set_user(db, zap, zap_evict); dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
winner = dmu_buf_set_user(db, &zap->zap_dbu);
if (winner != NULL) { if (winner != NULL) {
rw_exit(&zap->zap_rwlock); rw_exit(&zap->zap_rwlock);
@ -675,11 +677,10 @@ zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
return (dmu_object_free(os, zapobj, tx)); return (dmu_object_free(os, zapobj, tx));
} }
_NOTE(ARGSUSED(0))
void void
zap_evict(dmu_buf_t *db, void *vzap) zap_evict(void *dbu)
{ {
zap_t *zap = vzap; zap_t *zap = dbu;
rw_destroy(&zap->zap_rwlock); rw_destroy(&zap->zap_rwlock);

View File

@ -22,8 +22,7 @@
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
*/ */
#include <sys/types.h> #include <sys/zfs_context.h>
#include <sys/param.h>
#include <sys/vnode.h> #include <sys/vnode.h>
#include <sys/sa.h> #include <sys/sa.h>
#include <sys/zfs_acl.h> #include <sys/zfs_acl.h>

View File

@ -497,7 +497,7 @@ zilog_dirty(zilog_t *zilog, uint64_t txg)
dsl_pool_t *dp = zilog->zl_dmu_pool; dsl_pool_t *dp = zilog->zl_dmu_pool;
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
if (dsl_dataset_is_snapshot(ds)) if (ds->ds_is_snapshot)
panic("dirtying snapshot!"); panic("dirtying snapshot!");
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) { if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) {