From 3a0400ad88f88077c37959ca8a829ffbe26f0d75 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 15:57:09 -0700 Subject: [PATCH 1/7] Update linux-events Minor updates to linux-events to handle some upstream changes. Luckily, this was done flexibly enough the first time it wasn't much of a problem. --- lib/libzfs/libzfs_pool.c | 4 ++-- module/zfs/dsl_scan.c | 9 ++++----- module/zfs/fm.c | 2 +- module/zfs/include/sys/fm/util.h | 4 ++-- module/zfs/spa.c | 2 +- module/zfs/zfs_fm.c | 10 +++++----- 6 files changed, 15 insertions(+), 16 deletions(-) diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c index cdc2d29438..e1e7d57804 100644 --- a/lib/libzfs/libzfs_pool.c +++ b/lib/libzfs/libzfs_pool.c @@ -3431,7 +3431,7 @@ zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp) int zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp, int *dropped, int block) { - zfs_cmd_t zc = { "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; int error = 0; *nvp = NULL; @@ -3489,7 +3489,7 @@ out: int zpool_events_clear(libzfs_handle_t *hdl, int *count) { - zfs_cmd_t zc = { "\0", "\0", "\0", 0 }; + zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 }; char msg[1024]; (void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN, diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 0eb1b1f782..18e2fa1690 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -50,9 +50,7 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *); -static scan_cb_t dsl_scan_defrag_cb; static scan_cb_t dsl_scan_scrub_cb; -static scan_cb_t dsl_scan_remove_cb; static dsl_syncfunc_t dsl_scan_cancel_sync; static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx); @@ -189,9 +187,9 @@ dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx) if (vdev_resilver_needed(spa->spa_root_vdev, &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) { - spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START); } else { - spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START); + spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START); } spa->spa_scrub_started = B_TRUE; @@ -292,7 +290,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE); if (complete) { spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ? - ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH); + FM_EREPORT_ZFS_RESILVER_FINISH : + FM_EREPORT_ZFS_SCRUB_FINISH); } spa_errlog_rotate(spa); diff --git a/module/zfs/fm.c b/module/zfs/fm.c index 5b06d49d01..0c94ade288 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -432,7 +432,7 @@ static void fm_event_free(zevent_t *ev) { /* Run provided cleanup callback */ - ev->ev_cb(ev->ev_nvl); + ev->ev_cb(ev->ev_nvl, ev->ev_detector); list_destroy(&ev->ev_zpd_list); kmem_free(ev, sizeof(zevent_t)); diff --git a/module/zfs/include/sys/fm/util.h b/module/zfs/include/sys/fm/util.h index 959931939a..03eb18ea29 100644 --- a/module/zfs/include/sys/fm/util.h +++ b/module/zfs/include/sys/fm/util.h @@ -79,7 +79,7 @@ typedef struct erpt_dump { #define ZEVENT_SHUTDOWN 0x1 -typedef void zevent_cb_t(nvlist_t *); +typedef void zevent_cb_t(nvlist_t *, nvlist_t *); typedef struct zevent_s { nvlist_t *ev_nvl; /* protected by the zevent_lock */ @@ -100,7 +100,7 @@ extern void fm_fini(void); extern void fm_nvprint(nvlist_t *); extern void fm_zevent_init(zfs_private_data_t *); extern void fm_zevent_fini(zfs_private_data_t *); -extern void fm_zevent_post(nvlist_t *, zevent_cb_t *); +extern void fm_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *); extern void fm_zevent_drain_all(int *); extern int fm_zevent_next(zfs_private_data_t *, zfs_cmd_t *); extern int fm_zevent_wait(zfs_private_data_t *); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 081a215844..74e6747da3 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -4038,7 +4038,7 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done) vd->vdev_detached = B_TRUE; vdev_dirty(tvd, VDD_DTL, vd, txg); - spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE); + spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE); error = spa_vdev_exit(spa, vd, txg, 0); diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 91d2104003..5138f49e52 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -235,7 +235,6 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, vd != NULL ? vd->vdev_guid : 0); fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL); - fm_nvlist_destroy(detector, FM_NVA_FREE); /* * Construct the per-ereport payload, depending on which parameters are @@ -440,12 +439,13 @@ shrink_ranges(zfs_ecksum_info_t *eip) uint32_t end = r[idx].zr_end; while (idx < max - 1) { + uint32_t nstart, nend, gap; + idx++; + nstart = r[idx].zr_start; + nend = r[idx].zr_end; - uint32_t nstart = r[idx].zr_start; - uint32_t nend = r[idx].zr_end; - - uint32_t gap = nstart - end; + gap = nstart - end; if (gap < new_allowed_gap) { end = nend; continue; From e92d6d861f646e33ae899a5c9189db96d15f8bf5 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 15:58:15 -0700 Subject: [PATCH 2/7] Update linux-kernel-module with refreshed EXPORT_SYMBOLs After such a large update many of the symbols which were previously exported are no longer available, and several new symbols have been added and are needed. Refresh to topic branch to reflect this. --- module/nvpair/nvpair.c | 5 +++- module/zcommon/zfs_comutil.c | 4 +++ module/zcommon/zprop_common.c | 11 ++++---- module/zfs/dmu_objset.c | 51 +++++++++++++++++------------------ module/zfs/dsl_dataset.c | 11 +++----- module/zfs/spa.c | 15 +++++++---- module/zfs/spa_history.c | 3 ++- module/zfs/spa_misc.c | 22 ++++++++++++--- module/zfs/txg.c | 2 -- 9 files changed, 73 insertions(+), 51 deletions(-) diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c index d98ca4d941..4edc5a4dc9 100644 --- a/module/nvpair/nvpair.c +++ b/module/nvpair/nvpair.c @@ -3348,9 +3348,13 @@ EXPORT_SYMBOL(nvlist_add_int64_array); EXPORT_SYMBOL(nvlist_add_uint64_array); EXPORT_SYMBOL(nvlist_add_string_array); EXPORT_SYMBOL(nvlist_add_nvlist_array); +EXPORT_SYMBOL(nvlist_next_nvpair); +EXPORT_SYMBOL(nvlist_prev_nvpair); +EXPORT_SYMBOL(nvlist_empty); EXPORT_SYMBOL(nvlist_add_hrtime); EXPORT_SYMBOL(nvlist_remove); +EXPORT_SYMBOL(nvlist_remove_nvpair); EXPORT_SYMBOL(nvlist_remove_all); EXPORT_SYMBOL(nvlist_lookup_boolean); @@ -3385,7 +3389,6 @@ EXPORT_SYMBOL(nvlist_lookup_nvpair); EXPORT_SYMBOL(nvlist_exists); /* processing nvpair */ -EXPORT_SYMBOL(nvlist_next_nvpair); EXPORT_SYMBOL(nvpair_name); EXPORT_SYMBOL(nvpair_type); EXPORT_SYMBOL(nvpair_value_boolean_value); diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c index 797cab7a14..ccf169be6d 100644 --- a/module/zcommon/zfs_comutil.c +++ b/module/zcommon/zfs_comutil.c @@ -203,4 +203,8 @@ const char *zfs_history_event_names[LOG_END] = { #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(zfs_allocatable_devs); +EXPORT_SYMBOL(zpool_get_rewind_policy); +EXPORT_SYMBOL(zfs_zpl_version_map); +EXPORT_SYMBOL(zfs_spa_version_map); +EXPORT_SYMBOL(zfs_history_event_names); #endif diff --git a/module/zcommon/zprop_common.c b/module/zcommon/zprop_common.c index 9d2e1c82ff..ab5b4662b7 100644 --- a/module/zcommon/zprop_common.c +++ b/module/zcommon/zprop_common.c @@ -427,17 +427,18 @@ zprop_width(int prop, boolean_t *fixed, zfs_type_t type) #if defined(_KERNEL) && defined(HAVE_SPL) /* Common routines to initialize property tables */ -EXPORT_SYMBOL(register_impl); -EXPORT_SYMBOL(register_string); -EXPORT_SYMBOL(register_number); -EXPORT_SYMBOL(register_index); -EXPORT_SYMBOL(register_hidden); +EXPORT_SYMBOL(zprop_register_impl); +EXPORT_SYMBOL(zprop_register_string); +EXPORT_SYMBOL(zprop_register_number); +EXPORT_SYMBOL(zprop_register_index); +EXPORT_SYMBOL(zprop_register_hidden); /* Common routines for zfs and zpool property management */ EXPORT_SYMBOL(zprop_iter_common); EXPORT_SYMBOL(zprop_name_to_prop); EXPORT_SYMBOL(zprop_string_to_index); EXPORT_SYMBOL(zprop_index_to_string); +EXPORT_SYMBOL(zprop_random_value); EXPORT_SYMBOL(zprop_values); EXPORT_SYMBOL(zprop_valid_for_type); #endif diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 3ced910049..629cba64a6 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -1686,35 +1686,34 @@ dmu_objset_get_user(objset_t *os) } #if defined(_KERNEL) && defined(HAVE_SPL) -EXPORT_SYMBOL(dmu_objset_spa); -EXPORT_SYMBOL(dmu_objset_zil); -EXPORT_SYMBOL(dmu_objset_pool); -EXPORT_SYMBOL(dmu_objset_ds); -EXPORT_SYMBOL(dmu_objset_name); -EXPORT_SYMBOL(dmu_objset_type); -EXPORT_SYMBOL(dmu_objset_id); -EXPORT_SYMBOL(dmu_snapshot_list_next); -EXPORT_SYMBOL(dmu_dir_list_next); -EXPORT_SYMBOL(dmu_objset_set_user); -EXPORT_SYMBOL(dmu_objset_get_user); - -/* Public routines to create, destroy, open, and close objsets. */ -EXPORT_SYMBOL(dmu_objset_open); -EXPORT_SYMBOL(dmu_objset_open_ds); -EXPORT_SYMBOL(dmu_objset_close); -EXPORT_SYMBOL(dmu_objset_evict_dbufs); +EXPORT_SYMBOL(dmu_objset_hold); +EXPORT_SYMBOL(dmu_objset_own); +EXPORT_SYMBOL(dmu_objset_rele); +EXPORT_SYMBOL(dmu_objset_disown); +EXPORT_SYMBOL(dmu_objset_from_ds); EXPORT_SYMBOL(dmu_objset_create); -EXPORT_SYMBOL(dmu_objset_create_impl); +EXPORT_SYMBOL(dmu_objset_clone); EXPORT_SYMBOL(dmu_objset_destroy); -EXPORT_SYMBOL(dmu_snapshots_destroy); -EXPORT_SYMBOL(dmu_objset_rollback); EXPORT_SYMBOL(dmu_objset_snapshot); -EXPORT_SYMBOL(dmu_objset_rename); -EXPORT_SYMBOL(dmu_objset_find); -EXPORT_SYMBOL(dmu_objset_byteswap); - -/* Get stats on a dataset. */ -EXPORT_SYMBOL(dmu_objset_fast_stat); EXPORT_SYMBOL(dmu_objset_stats); +EXPORT_SYMBOL(dmu_objset_fast_stat); EXPORT_SYMBOL(dmu_objset_space); +EXPORT_SYMBOL(dmu_objset_fsid_guid); +EXPORT_SYMBOL(dmu_objset_find); +EXPORT_SYMBOL(dmu_objset_find_spa); +EXPORT_SYMBOL(dmu_objset_prefetch); +EXPORT_SYMBOL(dmu_objset_byteswap); +EXPORT_SYMBOL(dmu_objset_evict_dbufs); +EXPORT_SYMBOL(dmu_objset_snap_cmtime); + +EXPORT_SYMBOL(dmu_objset_sync); +EXPORT_SYMBOL(dmu_objset_is_dirty); +EXPORT_SYMBOL(dmu_objset_create_impl); +EXPORT_SYMBOL(dmu_objset_open_impl); +EXPORT_SYMBOL(dmu_objset_evict); +EXPORT_SYMBOL(dmu_objset_do_userquota_updates); +EXPORT_SYMBOL(dmu_objset_userquota_get_ids); +EXPORT_SYMBOL(dmu_objset_userused_enabled); +EXPORT_SYMBOL(dmu_objset_userspace_upgrade); +EXPORT_SYMBOL(dmu_objset_userspace_present); #endif diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 8bbbb0815c..8cde8537f4 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -3935,12 +3935,13 @@ EXPORT_SYMBOL(dsl_dataset_destroy_check); EXPORT_SYMBOL(dsl_dataset_destroy_sync); EXPORT_SYMBOL(dsl_dataset_snapshot_check); EXPORT_SYMBOL(dsl_dataset_snapshot_sync); -EXPORT_SYMBOL(dsl_dataset_rollback); EXPORT_SYMBOL(dsl_dataset_rename); EXPORT_SYMBOL(dsl_dataset_promote); EXPORT_SYMBOL(dsl_dataset_clone_swap); -EXPORT_SYMBOL(dsl_dataset_set_user_ptr); -EXPORT_SYMBOL(dsl_dataset_get_user_ptr); +EXPORT_SYMBOL(dsl_dataset_user_hold); +EXPORT_SYMBOL(dsl_dataset_user_release); +EXPORT_SYMBOL(dsl_dataset_user_release_tmp); +EXPORT_SYMBOL(dsl_dataset_get_holds); EXPORT_SYMBOL(dsl_dataset_get_blkptr); EXPORT_SYMBOL(dsl_dataset_set_blkptr); EXPORT_SYMBOL(dsl_dataset_get_spa); @@ -3960,9 +3961,5 @@ EXPORT_SYMBOL(dsl_dataset_check_quota); EXPORT_SYMBOL(dsl_dataset_set_quota); EXPORT_SYMBOL(dsl_dataset_set_quota_sync); EXPORT_SYMBOL(dsl_dataset_set_reservation); -EXPORT_SYMBOL(dsl_dataset_user_hold); -EXPORT_SYMBOL(dsl_dataset_user_release); -EXPORT_SYMBOL(dsl_dataset_user_release_tmp); -EXPORT_SYMBOL(dsl_dataset_get_holds); EXPORT_SYMBOL(dsl_destroy_inconsistent); #endif diff --git a/module/zfs/spa.c b/module/zfs/spa.c index f81fd50bd6..7aa4667548 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -5703,9 +5703,12 @@ done: #if defined(_KERNEL) && defined(HAVE_SPL) /* state manipulation functions */ EXPORT_SYMBOL(spa_open); +EXPORT_SYMBOL(spa_open_rewind); EXPORT_SYMBOL(spa_get_stats); EXPORT_SYMBOL(spa_create); +EXPORT_SYMBOL(spa_import_rootpool); EXPORT_SYMBOL(spa_import); +EXPORT_SYMBOL(spa_import_verbatim); EXPORT_SYMBOL(spa_tryimport); EXPORT_SYMBOL(spa_destroy); EXPORT_SYMBOL(spa_export); @@ -5715,6 +5718,8 @@ EXPORT_SYMBOL(spa_async_suspend); EXPORT_SYMBOL(spa_async_resume); EXPORT_SYMBOL(spa_inject_addref); EXPORT_SYMBOL(spa_inject_delref); +EXPORT_SYMBOL(spa_scan_stat_init); +EXPORT_SYMBOL(spa_scan_get_stats); /* device maniion */ EXPORT_SYMBOL(spa_vdev_add); @@ -5722,6 +5727,8 @@ EXPORT_SYMBOL(spa_vdev_attach); EXPORT_SYMBOL(spa_vdev_detach); EXPORT_SYMBOL(spa_vdev_remove); EXPORT_SYMBOL(spa_vdev_setpath); +EXPORT_SYMBOL(spa_vdev_setfru); +EXPORT_SYMBOL(spa_vdev_split_mirror); /* spare statech is global across all pools) */ EXPORT_SYMBOL(spa_spare_add); @@ -5735,10 +5742,10 @@ EXPORT_SYMBOL(spa_l2cache_remove); EXPORT_SYMBOL(spa_l2cache_exists); EXPORT_SYMBOL(spa_l2cache_activate); EXPORT_SYMBOL(spa_l2cache_drop); -EXPORT_SYMBOL(spa_l2cache_space_update); -/* scrubbing */ -EXPORT_SYMBOL(spa_scrub); +/* scanning */ +EXPORT_SYMBOL(spa_scan); +EXPORT_SYMBOL(spa_scan_stop); /* spa syncing */ EXPORT_SYMBOL(spa_sync); /* only for DMU use */ @@ -5749,8 +5756,6 @@ EXPORT_SYMBOL(spa_prop_set); EXPORT_SYMBOL(spa_prop_get); EXPORT_SYMBOL(spa_prop_clear_bootfs); -#if defined(HAVE_SYSEVENT) /* asynchronous event notification */ EXPORT_SYMBOL(spa_event_notify); #endif -#endif diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 7eb928519f..99e2eaa739 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -509,5 +509,6 @@ spa_history_log_version(spa_t *spa, history_internal_events_t event) EXPORT_SYMBOL(spa_history_create_obj); EXPORT_SYMBOL(spa_history_get); EXPORT_SYMBOL(spa_history_log); -EXPORT_SYMBOL(spa_history_internal_log); +EXPORT_SYMBOL(spa_history_log_internal); +EXPORT_SYMBOL(spa_history_log_version); #endif diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 724b0a17a5..3472dacb1b 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1711,16 +1711,24 @@ EXPORT_SYMBOL(spa_name); EXPORT_SYMBOL(spa_guid); EXPORT_SYMBOL(spa_last_synced_txg); EXPORT_SYMBOL(spa_first_txg); +EXPORT_SYMBOL(spa_syncing_txg); EXPORT_SYMBOL(spa_version); EXPORT_SYMBOL(spa_state); +EXPORT_SYMBOL(spa_load_state); EXPORT_SYMBOL(spa_freeze_txg); -EXPORT_SYMBOL(spa_get_alloc); -EXPORT_SYMBOL(spa_get_space); -EXPORT_SYMBOL(spa_get_dspace); EXPORT_SYMBOL(spa_get_asize); +EXPORT_SYMBOL(spa_get_dspace); +EXPORT_SYMBOL(spa_update_dspace); +EXPORT_SYMBOL(spa_deflate); +EXPORT_SYMBOL(spa_normal_class); +EXPORT_SYMBOL(spa_log_class); EXPORT_SYMBOL(spa_max_replication); +EXPORT_SYMBOL(spa_prev_software_version); EXPORT_SYMBOL(spa_get_failmode); EXPORT_SYMBOL(spa_suspended); +EXPORT_SYMBOL(spa_bootfs); +EXPORT_SYMBOL(spa_delegation); +EXPORT_SYMBOL(spa_meta_objset); /* Miscellaneous support routines */ EXPORT_SYMBOL(spa_rename); @@ -1728,15 +1736,21 @@ EXPORT_SYMBOL(spa_guid_exists); EXPORT_SYMBOL(spa_strdup); EXPORT_SYMBOL(spa_strfree); EXPORT_SYMBOL(spa_get_random); +EXPORT_SYMBOL(spa_generate_guid); EXPORT_SYMBOL(sprintf_blkptr); EXPORT_SYMBOL(spa_freeze); EXPORT_SYMBOL(spa_upgrade); EXPORT_SYMBOL(spa_evict_all); EXPORT_SYMBOL(spa_lookup_by_guid); EXPORT_SYMBOL(spa_has_spare); -EXPORT_SYMBOL(bp_get_dasize); +EXPORT_SYMBOL(dva_get_dsize_sync); +EXPORT_SYMBOL(bp_get_dsize_sync); +EXPORT_SYMBOL(bp_get_dsize); EXPORT_SYMBOL(spa_has_slogs); EXPORT_SYMBOL(spa_is_root); +EXPORT_SYMBOL(spa_writeable); +EXPORT_SYMBOL(spa_rewind_data_to_nvlist); +EXPORT_SYMBOL(spa_mode); EXPORT_SYMBOL(spa_namespace_lock); #endif diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 7fc90f91bd..3382629b00 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -732,8 +732,6 @@ EXPORT_SYMBOL(txg_hold_open); EXPORT_SYMBOL(txg_rele_to_quiesce); EXPORT_SYMBOL(txg_rele_to_sync); EXPORT_SYMBOL(txg_register_callbacks); -EXPORT_SYMBOL(txg_suspend); -EXPORT_SYMBOL(txg_resume); EXPORT_SYMBOL(txg_delay); EXPORT_SYMBOL(txg_wait_synced); EXPORT_SYMBOL(txg_wait_open); From 1b55fad32f43a17504d2dd4b434923dff0f3f811 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 15:59:11 -0700 Subject: [PATCH 3/7] Updates to linux-user-disk The major change is removing the thread pool when importing devices. This may be reintroduced at some point if needed, but it is added complexity which has already been handled by blkid on modern Linux systems. We only need to fallback to probing everything is /dev/ if you config file is toast and even then it only takes a few seconds. --- cmd/zpool/zpool_main.c | 8 +------- lib/libzfs/libzfs_import.c | 42 ++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index c5eed7780c..65bca8033f 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -1590,7 +1590,7 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts, * -c Read pool information from a cachefile instead of searching * devices. * - * -d Scan in a specific directory, other than /dev/dsk. More than + * -d Scan in a specific directory, other than /dev/. More than * one directory can be specified using multiple '-d' options. * * -D Scan for previously destroyed pools or import all or only @@ -1749,12 +1749,6 @@ zpool_do_import(int argc, char **argv) nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0) goto error; - if (searchdirs == NULL) { - searchdirs = safe_malloc(sizeof (char *)); - searchdirs[0] = "/dev/dsk"; - nsearch = 1; - } - /* check argument count */ if (do_all) { if (argc != 0) { diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c index 19cdbee255..f528dba27b 100644 --- a/lib/libzfs/libzfs_import.c +++ b/lib/libzfs/libzfs_import.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #ifdef HAVE_LIBBLKID @@ -1004,13 +1003,10 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) vdev_entry_t *ve, *venext; config_entry_t *ce, *cenext; name_entry_t *ne, *nenext; - avl_tree_t slice_cache; - rdsk_node_t *slice; - void *cookie; - verify(poolname == NULL || guid == 0); + verify(iarg->poolname == NULL || iarg->guid == 0); - if (argc == 0) { + if (dirs == 0) { #ifdef HAVE_LIBBLKID /* Use libblkid to scan all device for their type */ if (zpool_find_import_blkid(hdl, &pools) == 0) @@ -1020,8 +1016,8 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) dgettext(TEXT_DOMAIN, "blkid failure falling back " "to manual probing")); #endif /* HAVE_LIBBLKID */ - argc = 1; - argv = &default_dir; + dirs = 1; + dir = &default_dir; } /* @@ -1030,7 +1026,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) * and toplevel GUID. */ for (i = 0; i < dirs; i++) { - tpool_t *t; char *rdsk; int dfd; @@ -1064,8 +1059,6 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) goto error; } - avl_create(&slice_cache, slice_cache_compare, - sizeof (rdsk_node_t), offsetof(rdsk_node_t, rn_node)); /* * This is not MT-safe, but we have no MT consumers of libzfs */ @@ -1076,11 +1069,23 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) continue; /* - * Do not open /dev/watchdog to stat it because - * it requires a special close or the watchdog - * with be triggered and the system reset. + * Skip checking devices with well known prefixes: + * watchdog - A special close is required to avoid + * triggering it and resetting the system. + * fuse - Fuse control device. + * ppp - Generic PPP driver. + * tty* - Generic serial interface. + * vcs* - Virtual console memory. + * parport* - Parallel port interface. + * lp* - Printer interface. */ - if (strcmp(name, "watchdog") == 0) + if ((strncmp(name, "watchdog", 8) == 0) || + (strncmp(name, "fuse", 4) == 0) || + (strncmp(name, "ppp", 3) == 0) || + (strncmp(name, "tty", 3) == 0) || + (strncmp(name, "vcs", 3) == 0) || + (strncmp(name, "parport", 7) == 0) || + (strncmp(name, "lp", 2) == 0)) continue; if ((fd = openat64(dfd, name, O_RDONLY)) < 0) @@ -1129,14 +1134,11 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) continue; } /* use the non-raw path for the config */ - (void) strlcpy(end, slice->rn_name, pathleft); + (void) strlcpy(end, name, pathleft); if (add_config(hdl, &pools, path, config) != 0) goto error; } - free(slice->rn_name); - free(slice); } - avl_destroy(&slice_cache); (void) closedir(dirp); dirp = NULL; @@ -1145,7 +1147,7 @@ zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg) #ifdef HAVE_LIBBLKID skip_scanning: #endif - ret = get_configs(hdl, &pools, active_ok); + ret = get_configs(hdl, &pools, iarg->can_be_active); error: for (pe = pools.pools; pe != NULL; pe = penext) { From 800b7a03e11d4f36f9b0e4af955c049e57220866 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 16:02:03 -0700 Subject: [PATCH 4/7] Update linux-kernel-disk ZVOL implementation The ZVOL interfaces changed significantly with the latest update. I've updated the Linux version of the code to handle this and it looks like the net result has been a simpler implementation which is good! Plus, I'm relatively sure the ZIL integration is right this time although it needs some serious crash testing to verify that. Also minor additions to vdev_disk for .hold and .rele callbacks. Currently, they do nothing and I may be able to simply stub them out with NULLs for Linux since opening the device in Linux should have much the same effort. More investigation is needed though since the ZFS interface may make some demands here I'm overlooking. --- module/zfs/include/sys/spa_impl.h | 2 +- module/zfs/include/sys/zfs_fuid.h | 1 + module/zfs/vdev.c | 7 + module/zfs/vdev_disk.c | 31 +++ module/zfs/zfs_ioctl.c | 14 +- module/zfs/zvol.c | 368 ++++++++++++++++-------------- 6 files changed, 244 insertions(+), 179 deletions(-) diff --git a/module/zfs/include/sys/spa_impl.h b/module/zfs/include/sys/spa_impl.h index e2e1851ecc..44443e1722 100644 --- a/module/zfs/include/sys/spa_impl.h +++ b/module/zfs/include/sys/spa_impl.h @@ -208,7 +208,7 @@ struct spa { kmutex_t spa_proc_lock; /* protects spa_proc* */ kcondvar_t spa_proc_cv; /* spa_proc_state transitions */ spa_proc_state_t spa_proc_state; /* see definition */ - struct proc *spa_proc; /* "zpool-poolname" process */ + proc_t *spa_proc; /* "zpool-poolname" process */ uint64_t spa_did; /* if procp != p0, did of t1 */ boolean_t spa_autoreplace; /* autoreplace set in open */ int spa_vdev_locks; /* locks grabbed */ diff --git a/module/zfs/include/sys/zfs_fuid.h b/module/zfs/include/sys/zfs_fuid.h index 6cdebe8aea..d2989c8522 100644 --- a/module/zfs/include/sys/zfs_fuid.h +++ b/module/zfs/include/sys/zfs_fuid.h @@ -33,6 +33,7 @@ #include #endif #include +#include #ifdef __cplusplus extern "C" { diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index e1018ccc6a..14de500fa7 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1072,6 +1072,12 @@ vdev_open_child(void *arg) boolean_t vdev_uses_zvols(vdev_t *vd) { +/* + * NOTE: Disabled because under Linux I've choosen not to put all the zvols + * in their own directory. This could be changed or this code can be updated + * to perhap run an ioctl() on the vdev path to determine if it is a zvol. + */ +#if 0 int c; if (vd->vdev_path && strncmp(vd->vdev_path, ZVOL_DIR, @@ -1080,6 +1086,7 @@ vdev_uses_zvols(vdev_t *vd) for (c = 0; c < vd->vdev_children; c++) if (vdev_uses_zvols(vd->vdev_child[c])) return (B_TRUE); +#endif return (B_FALSE); } diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index 625fc482b0..799587a809 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -550,6 +550,35 @@ vdev_disk_io_done(zio_t *zio) } } +static void +vdev_disk_hold(vdev_t *vd) +{ + ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); + + /* We must have a pathname, and it must be absolute. */ + if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') + return; + + /* + * Only prefetch path and devid info if the device has + * never been opened. + */ + if (vd->vdev_tsd != NULL) + return; + + /* XXX: Implement me as a vnode lookup for the device */ + vd->vdev_name_vp = NULL; + vd->vdev_devid_vp = NULL; +} + +static void +vdev_disk_rele(vdev_t *vd) +{ + ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER)); + + /* XXX: Implement me as a vnode rele for the device */ +} + vdev_ops_t vdev_disk_ops = { vdev_disk_open, vdev_disk_close, @@ -557,6 +586,8 @@ vdev_ops_t vdev_disk_ops = { vdev_disk_io_start, vdev_disk_io_done, NULL, + vdev_disk_hold, + vdev_disk_rele, VDEV_TYPE_DISK, /* name of this vdev type */ B_TRUE /* leaf vdev */ }; diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index acb6777883..fb1dc03637 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1965,8 +1965,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, err = dsl_dataset_set_reservation(dsname, source, intval); break; case ZFS_PROP_VOLSIZE: - err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip), - intval); + err = zvol_set_volsize(dsname, intval); break; case ZFS_PROP_VERSION: { @@ -2832,9 +2831,18 @@ zfs_ioc_create(zfs_cmd_t *zc) if (error == 0) { error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL, nvprops, NULL); - if (error != 0) + if (error != 0) { (void) dmu_objset_destroy(zc->zc_name, B_FALSE); + goto out; + } + + if (type == DMU_OST_ZVOL) { + error = zvol_create_minor(zc->zc_name); + if (error != 0) + (void) dmu_objset_destroy(zc->zc_name, B_FALSE); + } } +out: nvlist_free(nvprops); return (error); } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 4443e2d46d..333b3fb9d1 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -51,6 +51,7 @@ unsigned int zvol_threads = 0; static taskq_t *zvol_taskq; static kmutex_t zvol_state_lock; static list_t zvol_state_list; +static char *zvol_tag = "zvol_tag"; /* * The in-core state of each volume. @@ -59,11 +60,12 @@ typedef struct zvol_state { uint64_t zv_volsize; /* advertised space */ uint64_t zv_volblocksize;/* volume block size */ objset_t *zv_objset; /* objset handle */ - uint32_t zv_mode; /* DS_MODE_* at open time */ + uint32_t zv_flags; /* ZVOL_* flags */ uint32_t zv_open_count; /* open counts */ uint32_t zv_changed; /* disk changed */ zilog_t *zv_zilog; /* ZIL handle */ znode_t zv_znode; /* for range locking */ + dmu_buf_t *zv_dbuf; /* bonus handle */ dev_t zv_dev; /* device id */ struct gendisk *zv_disk; /* generic disk */ struct request_queue *zv_queue; /* request queue */ @@ -71,6 +73,8 @@ typedef struct zvol_state { list_node_t zv_next; /* next zvol_state_t linkage */ } zvol_state_t; +#define ZVOL_RDONLY 0x1 + /* * Find the next available range of ZVOL_MINORS minor numbers. The * zvol_state_list is kept in ascending minor order so we simply need @@ -197,15 +201,6 @@ zvol_get_stats(objset_t *os, nvlist_t *nv) return (error); } -/* - * Notification handler for objset readonly property changes. - */ -static void -zvol_readonly_changed_cb(void *arg, uint64_t value) -{ - set_disk_ro(((zvol_state_t *)arg)->zv_disk, !!value); -} - /* * Sanity check volume size. */ @@ -278,40 +273,44 @@ int zvol_set_volsize(const char *name, uint64_t volsize) { zvol_state_t *zv; - int error; dmu_object_info_t doi; - uint64_t old_volsize = 0ULL; + objset_t *os = NULL; zvol_state_t state = { 0 }; + uint64_t old_volsize = 0ULL; + uint64_t readonly; + int error; mutex_enter(&zvol_state_lock); zv = zvol_find_by_name(name); if (zv == NULL) { - /* - * If we are doing a "zfs clone -o volsize=", then the - * minor node won't exist yet. - */ - error = dmu_objset_open(name, DMU_OST_ZVOL, DS_MODE_OWNER, - &state.zv_objset); - if (error != 0) + error = dmu_objset_hold(name, FTAG, &os); + if (error) goto out; zv = &state; } + + VERIFY(dsl_prop_get_integer(name, "readonly", &readonly, NULL) == 0); + if (readonly) { + error = EROFS; + goto out; + } + old_volsize = zv->zv_volsize; - if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 || + if ((error = dmu_object_info(os, ZVOL_OBJ, &doi)) != 0 || (error = zvol_check_volsize(volsize,doi.doi_data_block_size)) != 0) goto out; - if (get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY)) { + if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) { error = EROFS; goto out; } error = zvol_update_volsize(zv, volsize); out: - if (state.zv_objset) - dmu_objset_close(state.zv_objset); + if (os) + dmu_objset_rele(os, FTAG); mutex_exit(&zvol_state_lock); @@ -348,7 +347,7 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize) if (zv == NULL) return (ENXIO); - if (get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY)) + if (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY)) return (EROFS); tx = dmu_tx_create(zv->zv_objset); @@ -441,16 +440,9 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, zilog_t *zilog = zv->zv_zilog; boolean_t slogging; - if (zil_disable) + if (zil_replaying(zilog, tx)) return; - if (zilog->zl_replay) { - dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); - zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = - zilog->zl_replaying_seq; - return; - } - slogging = spa_has_slogs(zilog->zl_spa); while (size) { @@ -480,8 +472,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, lr = (lr_write_t *)&itx->itx_lr; if (write_state == WR_COPIED && dmu_read(zv->zv_objset, ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) { - kmem_free(itx, offsetof(itx_t, itx_lr) + - itx->itx_lr.lrc_reclen); + zil_itx_destroy(itx); itx = zil_itx_create(TX_WRITE, sizeof (*lr)); lr = (lr_write_t *)&itx->itx_lr; write_state = WR_NEED_COPY; @@ -493,8 +484,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, lr->lr_foid = ZVOL_OBJ; lr->lr_offset = offset; lr->lr_length = len; - lr->lr_blkoff = offset - - P2ALIGN_TYPED(offset, blocksize, uint64_t); + lr->lr_blkoff = 0; BP_ZERO(&lr->lr_blkptr); itx->itx_private = zv; @@ -520,13 +510,10 @@ zvol_write(void *arg) zvol_state_t *zv = q->queuedata; uint64_t offset = blk_rq_pos(req) << 9; uint64_t size = blk_rq_bytes(req); - int sync = 0, error = 0; + int error = 0; dmu_tx_t *tx; rl_t *rl; - if (rq_is_sync(req) && !zil_disable) - sync = 1; - rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); @@ -541,13 +528,14 @@ zvol_write(void *arg) return; } - dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx); - zvol_log_write(zv, tx, offset, size, sync); + error = dmu_write_req(zv->zv_objset, ZVOL_OBJ, req, tx); + if (error == 0) + zvol_log_write(zv, tx, offset, size, rq_is_sync(req)); dmu_tx_commit(tx); zfs_range_unlock(rl); - if (sync) + if (rq_is_sync(req)) zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ); blk_end_request(req, -error, size); @@ -643,7 +631,7 @@ zvol_request(struct request_queue *q) break; case WRITE: if (unlikely(get_disk_ro(zv->zv_disk)) || - unlikely(zv->zv_mode & DS_MODE_READONLY)) { + unlikely(zv->zv_flags & ZVOL_RDONLY)) { __blk_end_request(req, -EROFS, size); break; } @@ -659,6 +647,77 @@ zvol_request(struct request_queue *q) } } +static void +zvol_get_done(zgd_t *zgd, int error) +{ + if (zgd->zgd_db) + dmu_buf_rele(zgd->zgd_db, zgd); + + zfs_range_unlock(zgd->zgd_rl); + + if (error == 0 && zgd->zgd_bp) + zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); + + kmem_free(zgd, sizeof (zgd_t)); +} + +/* + * Get data to generate a TX_WRITE intent log record. + */ +static int +zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) +{ + zvol_state_t *zv = arg; + objset_t *os = zv->zv_objset; + uint64_t offset = lr->lr_offset; + uint64_t size = lr->lr_length; + dmu_buf_t *db; + zgd_t *zgd; + int error; + + ASSERT(zio != NULL); + ASSERT(size != 0); + + zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); + zgd->zgd_zilog = zv->zv_zilog; + zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); + + /* + * Write records come in two flavors: immediate and indirect. + * For small writes it's cheaper to store the data with the + * log record (immediate); for large writes it's cheaper to + * sync the data and get a pointer to it (indirect) so that + * we don't have to write the data twice. + */ + if (buf != NULL) { /* immediate write */ + error = dmu_read(os, ZVOL_OBJ, offset, size, buf, + DMU_READ_NO_PREFETCH); + } else { + size = zv->zv_volblocksize; + offset = P2ALIGN_TYPED(offset, size, uint64_t); + error = dmu_buf_hold(os, ZVOL_OBJ, offset, zgd, &db, + DMU_READ_NO_PREFETCH); + if (error == 0) { + zgd->zgd_db = db; + zgd->zgd_bp = &lr->lr_blkptr; + + ASSERT(db != NULL); + ASSERT(db->db_offset == offset); + ASSERT(db->db_size == size); + + error = dmu_sync(zio, lr->lr_common.lrc_txg, + zvol_get_done, zgd); + + if (error == 0) + return (0); + } + } + + zvol_get_done(zgd, error); + + return (error); +} + /* * The zvol_state_t's are inserted in increasing MINOR(dev_t) order. */ @@ -688,27 +747,94 @@ zvol_remove(zvol_state_t *zv_remove) list_remove(&zvol_state_list, zv_remove); } +static int +zvol_first_open(zvol_state_t *zv) +{ + objset_t *os; + uint64_t volsize; + int error; + uint64_t readonly; + + /* lie and say we're read-only */ + error = dmu_objset_own(zv->zv_disk->disk_name, + DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); + if (error) + return (-error); + + error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); + if (error) { + dmu_objset_disown(os, zvol_tag); + return (-error); + } + + zv->zv_objset = os; + error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf); + if (error) { + dmu_objset_disown(os, zvol_tag); + return (-error); + } + + set_capacity(zv->zv_disk, volsize >> 9); + zv->zv_volsize = volsize; + zv->zv_zilog = zil_open(os, zvol_get_data); + + VERIFY(dsl_prop_get_integer(zv->zv_disk->disk_name, + "readonly", &readonly, NULL) == 0); + if (readonly || dmu_objset_is_snapshot(os)) { + set_disk_ro(zv->zv_disk, 1); + zv->zv_flags |= ZVOL_RDONLY; + } else { + set_disk_ro(zv->zv_disk, 0); + zv->zv_flags &= ~ZVOL_RDONLY; + } + + return (-error); +} + +static void +zvol_last_close(zvol_state_t *zv) +{ + zil_close(zv->zv_zilog); + zv->zv_zilog = NULL; + dmu_buf_rele(zv->zv_dbuf, zvol_tag); + zv->zv_dbuf = NULL; + dmu_objset_disown(zv->zv_objset, zvol_tag); + zv->zv_objset = NULL; +} + static int zvol_open(struct block_device *bdev, fmode_t flag) { zvol_state_t *zv = bdev->bd_disk->private_data; + int error = 0; mutex_enter(&zvol_state_lock); ASSERT3P(zv, !=, NULL); - ASSERT3P(zv->zv_objset, !=, NULL); + + if (zv->zv_open_count == 0) { + error = zvol_first_open(zv); + if (error) + goto out_mutex; + } if ((flag & FMODE_WRITE) && - (get_disk_ro(zv->zv_disk) || (zv->zv_mode & DS_MODE_READONLY))) { - mutex_exit(&zvol_state_lock); - return (-EROFS); + (get_disk_ro(zv->zv_disk) || (zv->zv_flags & ZVOL_RDONLY))) { + error = -EROFS; + goto out_open_count; } zv->zv_open_count++; + +out_open_count: + if (zv->zv_open_count == 0) + zvol_last_close(zv); + +out_mutex: mutex_exit(&zvol_state_lock); check_disk_change(bdev); - return (0); + return (error); } static int @@ -720,88 +846,14 @@ zvol_release(struct gendisk *disk, fmode_t mode) ASSERT3P(zv, !=, NULL); ASSERT3U(zv->zv_open_count, >, 0); zv->zv_open_count--; + if (zv->zv_open_count == 0) + zvol_last_close(zv); + mutex_exit(&zvol_state_lock); return (0); } -static void -zvol_get_done(dmu_buf_t *db, void *vzgd) -{ - zgd_t *zgd = (zgd_t *)vzgd; - rl_t *rl = zgd->zgd_rl; - - dmu_buf_rele(db, vzgd); - zfs_range_unlock(rl); - zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); - kmem_free(zgd, sizeof (zgd_t)); -} - -/* - * Get data to generate a TX_WRITE intent log record. - */ -static int -zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) -{ - zvol_state_t *zv = arg; - objset_t *os = zv->zv_objset; - dmu_buf_t *db; - rl_t *rl; - zgd_t *zgd; - uint64_t boff; /* block starting offset */ - int dlen = lr->lr_length; /* length of user data */ - int error; - - ASSERT(zio); - ASSERT(dlen != 0); - - /* - * Write records come in two flavors: immediate and indirect. - * For small writes it's cheaper to store the data with the - * log record (immediate); for large writes it's cheaper to - * sync the data and get a pointer to it (indirect) so that - * we don't have to write the data twice. - */ - if (buf != NULL) /* immediate write */ - return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf, - DMU_READ_NO_PREFETCH)); - - zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP); - zgd->zgd_zilog = zv->zv_zilog; - zgd->zgd_bp = &lr->lr_blkptr; - - /* - * Lock the range of the block to ensure that when the data is - * written out and its checksum is being calculated that no other - * thread can change the block. - */ - boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t); - rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize, - RL_READER); - zgd->zgd_rl = rl; - - VERIFY3S(dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db), ==, 0); - error = dmu_sync(zio, db, &lr->lr_blkptr, - lr->lr_common.lrc_txg, zvol_get_done, zgd); - if (error == 0) - zil_add_block(zv->zv_zilog, &lr->lr_blkptr); - /* - * If we get EINPROGRESS, then we need to wait for a - * write IO initiated by dmu_sync() to complete before - * we can release this dbuf. We will finish everything - * up in the zvol_get_done() callback. - */ - if (error == EINPROGRESS) - return (0); - - dmu_buf_rele(db, zgd); - zfs_range_unlock(rl); - kmem_free(zgd, sizeof (zgd_t)); - - return (error); -} - - static int zvol_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) @@ -1026,9 +1078,7 @@ zvol_create_minor(const char *name) zvol_state_t *zv; objset_t *os; dmu_object_info_t doi; - uint64_t volsize; unsigned minor = 0; - int ds_mode = DS_MODE_OWNER; int error = 0; mutex_enter(&zvol_state_lock); @@ -1039,57 +1089,40 @@ zvol_create_minor(const char *name) goto out; } - /* Snapshot may only be read-only */ - if (strchr(name, '@') != 0) - ds_mode |= DS_MODE_READONLY; - - error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os); + error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) goto out; error = dmu_object_info(os, ZVOL_OBJ, &doi); if (error) - goto out_dmu_objset_close; - - error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); - if (error) - goto out_dmu_objset_close; + goto out_dmu_objset_disown; error = zvol_find_minor(&minor); if (error) - goto out_dmu_objset_close; + goto out_dmu_objset_disown; zv = zvol_alloc(MKDEV(zvol_major, minor), name); if (zv == NULL) { error = EAGAIN; - goto out_dmu_objset_close; + goto out_dmu_objset_disown; } - set_disk_ro(zv->zv_disk, !!(ds_mode & DS_MODE_READONLY)); - set_capacity(zv->zv_disk, volsize >> 9); + if (dmu_objset_is_snapshot(os)) + zv->zv_flags |= ZVOL_RDONLY; - zv->zv_volsize = volsize; zv->zv_volblocksize = doi.doi_data_block_size; - zv->zv_objset = os; - zv->zv_mode = ds_mode; - zv->zv_zilog = zil_open(os, zvol_get_data); - zil_replay(os, zv, zvol_replay_vector); - error = dsl_prop_register(dmu_objset_ds(zv->zv_objset), "readonly", - zvol_readonly_changed_cb, zv); - if (error) - goto out_zvol_alloc; + if (zil_replay_disable) + zil_destroy(dmu_objset_zil(os), B_FALSE); + else + zil_replay(os, zv, zvol_replay_vector); zvol_insert(zv); - mutex_exit(&zvol_state_lock); add_disk(zv->zv_disk); + error = 0; - return 0; - -out_zvol_alloc: - zvol_free(zv); -out_dmu_objset_close: - dmu_objset_close(os); +out_dmu_objset_disown: + dmu_objset_disown(os, zvol_tag); out: mutex_exit(&zvol_state_lock); @@ -1137,14 +1170,6 @@ zvol_remove_minor(const char *name) goto out; } - error = dsl_prop_unregister(dmu_objset_ds(zv->zv_objset), - "readonly", zvol_readonly_changed_cb, zv); - if (error) - goto out; - - zil_close(zv->zv_zilog); - dmu_objset_close(zv->zv_objset); - zvol_remove(zv); zvol_free(zv); out: @@ -1158,20 +1183,14 @@ out: * zvol_fini() which means the module reference count must have * dropped to zero and none of the zvol devices may be open. */ -static void -zvol_remove_minors(void) +void +zvol_remove_minors(const char *name) { zvol_state_t *zv; mutex_enter(&zvol_state_lock); while ((zv = list_head(&zvol_state_list)) != NULL) { ASSERT3U(zv->zv_open_count, ==, 0); - - (void)dsl_prop_unregister(dmu_objset_ds(zv->zv_objset), - "readonly", zvol_readonly_changed_cb, zv); - zil_close(zv->zv_zilog); - dmu_objset_close(zv->zv_objset); - zvol_remove(zv); zvol_free(zv); } @@ -1215,7 +1234,6 @@ zvol_init(void) void zvol_fini(void) { - zvol_remove_minors(); blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS); unregister_blkdev(zvol_major, ZVOL_DRIVER); taskq_destroy(zvol_taskq); From 302238d84fe8dc81cc7d0176cf27c06378d865f7 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 16:03:02 -0700 Subject: [PATCH 5/7] Update linux-user-disk Minor updates to handle changes in the user side of ZFS, utils and such. --- cmd/zdb/zdb.c | 1 - cmd/zfs/zfs_main.c | 5 +++++ cmd/ztest/ztest.c | 3 ++- module/zfs/sa.c | 5 ++++- module/zfs/zfs_sa.c | 3 ++- 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index d39178fae9..4216145919 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -55,7 +55,6 @@ #include #include #undef ZFS_MAXNAMELEN -#undef verify #include #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ? \ diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 3a1a02d7da..f9c0ac17a8 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -311,6 +311,7 @@ safe_malloc(size_t size) return (data); } +#ifdef HAVE_ZPL static char * safe_strdup(char *str) { @@ -321,6 +322,7 @@ safe_strdup(char *str) return (dupstr); } +#endif /* HAVE_ZPL */ /* * Callback routine that will print out information for each of @@ -488,6 +490,7 @@ parse_depth(char *opt, int *flags) #define PROGRESS_DELAY 2 /* seconds */ +#ifdef HAVE_ZPL static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"; static time_t pt_begin; static char *pt_header = NULL; @@ -539,6 +542,8 @@ finish_progress(char *done) free(pt_header); pt_header = NULL; } +#endif /* HAVE_ZPL */ + /* * zfs clone [-p] [-o prop=value] ... * diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index 97211e01e7..015b1b10c2 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -1042,6 +1042,7 @@ ztest_pattern_set(void *buf, uint64_t size, uint64_t value) *ip++ = value; } +#ifndef NDEBUG static boolean_t ztest_pattern_match(void *buf, uint64_t size, uint64_t value) { @@ -1054,6 +1055,7 @@ ztest_pattern_match(void *buf, uint64_t size, uint64_t value) return (diff == 0); } +#endif static void ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object, @@ -4779,7 +4781,6 @@ ztest_run_zdb(char *pool) isa, zopt_verbose >= 3 ? "s" : "", zopt_verbose >= 4 ? "v" : "", - spa_config_path, pool); free(isa); diff --git a/module/zfs/sa.c b/module/zfs/sa.c index ccbc620494..600b3db92a 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -1353,6 +1353,7 @@ sa_lookup(sa_handle_t *hdl, sa_attr_type_t attr, void *buf, uint32_t buflen) int sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) { +#ifdef HAVE_ZPL int error; sa_bulk_attr_t bulk; @@ -1371,7 +1372,9 @@ sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) } mutex_exit(&hdl->sa_lock); return (error); - +#else + return ENOSYS; +#endif /* HAVE_ZPL */ } #endif diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index 73a40aa4fe..1a217285c2 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -67,7 +67,7 @@ sa_attr_reg_t zfs_attr_table[ZPL_END+1] = { }; #ifdef _KERNEL - +#ifdef HAVE_ZPL int zfs_sa_readlink(znode_t *zp, uio_t *uio) { @@ -309,4 +309,5 @@ zfs_sa_upgrade_txholds(dmu_tx_t *tx, znode_t *zp) } } +#endif /* HAVE_ZPL */ #endif From 67e8f48f1bf5d18f6afc4c8623e0ce7160b999e9 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 16:04:00 -0700 Subject: [PATCH 6/7] Update linux-kernel-device to handle ioc changes Upstream they modified the ioctl code so we need to make similiar updates since we modify the API ourselves to always pass a pointer to file pointer around. This allows us to track per file handle state which is used by the zevent code. --- module/zfs/zfs_ioctl.c | 52 ++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index fcee7ffac2..ffb4c6515a 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -104,8 +104,8 @@ static const char *userquota_perms[] = { static int zfs_ioc_userspace_upgrade(struct file *filp, zfs_cmd_t *zc); static int zfs_check_settable(struct file *filp, const char *name, nvpair_t *property, cred_t *cr); -static int zfs_check_clearable(char *dataset, nvlist_t *props, - nvlist_t **errors); +static int zfs_check_clearable(struct file *filp, char *dataset, + nvlist_t *props, nvlist_t **errors); static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *, boolean_t *); int zfs_set_prop_nvlist(struct file *filp, const char *, zprop_source_t, @@ -339,6 +339,7 @@ zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr) static int zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr) { +#if 0 char ds_hexsl[MAXNAMELEN]; bslabel_t ds_sl, new_sl; boolean_t new_default = FALSE; @@ -426,6 +427,9 @@ out_check: if (needed_priv != -1) return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL)); return (0); +#else + return EPERM; +#endif } static int @@ -958,7 +962,7 @@ fit_error_list(zfs_cmd_t *zc, nvlist_t **errors) return (0); } -static int +int put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl) { char *packed = NULL; @@ -1009,6 +1013,7 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) dmu_objset_rele(os, FTAG); return (error); } +#endif /* * Find a zfsvfs_t for a mounted filesystem, or create our own, in which @@ -1017,6 +1022,7 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp) static int zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp) { +#ifdef HAVE_ZPL int error = 0; if (getzfsvfs(name, zfvp) != 0) @@ -1034,11 +1040,15 @@ zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp) } } return (error); +#else + return ENOTSUP; +#endif } static void zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) { +#ifdef HAVE_ZPL rrw_exit(&zfsvfs->z_teardown_lock, tag); if (zfsvfs->z_vfs) { @@ -1047,8 +1057,8 @@ zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag) dmu_objset_disown(zfsvfs->z_os, zfsvfs); zfsvfs_free(zfsvfs); } +#endif } -#endif /* HAVE_ZPL */ static int zfs_ioc_pool_create(struct file *filp, zfs_cmd_t *zc) @@ -1559,8 +1569,7 @@ zfs_ioc_vdev_split(struct file *filp, zfs_cmd_t *zc) } static int -zfs_ioc_vdev_setpath(zfs_cmd_t *zc) ->>>>>>> refs/top-bases/linux-kernel-device +zfs_ioc_vdev_setpath(struct file *filp, zfs_cmd_t *zc) { spa_t *spa; char *path = zc->zc_value; @@ -1888,6 +1897,7 @@ top: static int zfs_prop_set_userquota(struct file *filp, const char *dsname, nvpair_t *pair) { +#ifdef HAVE_ZPL const char *propname = nvpair_name(pair); uint64_t *valary; unsigned int vallen; @@ -1928,6 +1938,9 @@ zfs_prop_set_userquota(struct file *filp, const char *dsname, nvpair_t *pair) } return (err); +#else + return ENOTSUP; +#endif } /* @@ -1949,7 +1962,7 @@ zfs_prop_set_special(struct file *filp, const char *dsname, if (prop == ZPROP_INVAL) { if (zfs_prop_userquota(propname)) - return (zfs_prop_set_userquota(dsname, pair)); + return (zfs_prop_set_userquota(filp, dsname, pair)); return (-1); } @@ -1989,7 +2002,9 @@ zfs_prop_set_special(struct file *filp, const char *dsname, if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs)) != 0) break; +#ifdef HAVE_ZPL err = zfs_set_version(zfsvfs, intval); +#endif zfsvfs_rele(zfsvfs, FTAG); if (err == 0 && intval >= ZPL_VERSION_USERSPACE) { @@ -1997,7 +2012,7 @@ zfs_prop_set_special(struct file *filp, const char *dsname, zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); (void) strcpy(zc->zc_name, dsname); - (void) zfs_ioc_userspace_upgrade(zc); + (void) zfs_ioc_userspace_upgrade(filp, zc); kmem_free(zc, sizeof (zfs_cmd_t)); } break; @@ -2097,10 +2112,10 @@ retry: /* Validate permissions */ if (err == 0) - err = zfs_check_settable(dsname, pair, CRED()); + err = zfs_check_settable(filp, dsname, pair, CRED()); if (err == 0) { - err = zfs_prop_set_special(dsname, source, pair); + err = zfs_prop_set_special(filp, dsname, source, pair); if (err == -1) { /* * For better performance we build up a list of @@ -2229,8 +2244,8 @@ props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops) } static int -clear_received_props(objset_t *os, const char *fs, nvlist_t *props, - nvlist_t *skipped) +clear_received_props(struct file *filp, objset_t *os, + const char *fs, nvlist_t *props, nvlist_t *skipped) { int err = 0; nvlist_t *cleared_props = NULL; @@ -2242,7 +2257,7 @@ clear_received_props(objset_t *os, const char *fs, nvlist_t *props, */ zprop_source_t flags = (ZPROP_SRC_NONE | (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0)); - err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL); + err = zfs_set_prop_nvlist(filp, fs, flags, cleared_props, NULL); } nvlist_free(cleared_props); return (err); @@ -2355,7 +2370,7 @@ zfs_ioc_inherit_prop(struct file *filp, zfs_cmd_t *zc) } pair = nvlist_next_nvpair(dummy, NULL); - err = zfs_prop_set_special(zc->zc_name, source, pair); + err = zfs_prop_set_special(filp, zc->zc_name, source, pair); nvlist_free(dummy); if (err != -1) return (err); /* special property already handled */ @@ -2521,6 +2536,7 @@ zfs_ioc_get_fsacl(struct file *filp, zfs_cmd_t *zc) return (error); } +#ifdef HAVE_ZPL /* * Search the vfs list for a specified resource. Returns a pointer to it * or NULL if no suitable entry is found. The caller of this routine @@ -3379,9 +3395,7 @@ static int zfs_ioc_recv(struct file *filp, zfs_cmd_t *zc) { file_t *fp; -#ifdef HAVE_ZPL objset_t *os; -#endif /* HAVE_ZPL */ dmu_recv_cookie_t drc; boolean_t force = (boolean_t)zc->zc_guid; int fd; @@ -3417,7 +3431,6 @@ zfs_ioc_recv(struct file *filp, zfs_cmd_t *zc) return (EBADF); } -#ifdef HAVE_ZPL VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) { @@ -3443,7 +3456,7 @@ zfs_ioc_recv(struct file *filp, zfs_cmd_t *zc) */ if (!first_recvd_props) props_reduce(props, origprops); - if (zfs_check_clearable(tofs, origprops, + if (zfs_check_clearable(filp, tofs, origprops, &errlist) != 0) (void) nvlist_merge(errors, errlist, 0); nvlist_free(errlist); @@ -3451,7 +3464,6 @@ zfs_ioc_recv(struct file *filp, zfs_cmd_t *zc) dmu_objset_rele(os, FTAG); } -#endif /* HAVE_ZPL */ if (zc->zc_string[0]) { error = dmu_objset_hold(zc->zc_string, FTAG, &origin); @@ -3480,7 +3492,7 @@ zfs_ioc_recv(struct file *filp, zfs_cmd_t *zc) SPA_VERSION_RECVD_PROPS) first_recvd_props = B_TRUE; } else if (origprops != NULL) { - if (clear_received_props(os, tofs, origprops, + if (clear_received_props(filp,os,tofs,origprops, first_recvd_props ? NULL : props) != 0) zc->zc_obj |= ZPROP_ERR_NOCLEAR; } else { From e7e0311ce48bfb318f507cddb7a8dec8b904bdbd Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 14 Jun 2010 16:08:15 -0700 Subject: [PATCH 7/7] Update linux-kernel-mem Additional minor memory related tweak to move certain large allocations to virtual memory and in one case to simply suppress the warning message since it is not that far over the warning limit. --- module/zfs/ddt.c | 5 ++++- module/zfs/spa_history.c | 6 +----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 7420173cc9..abf371ce6d 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -499,6 +499,7 @@ ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) { ddt_histogram_t *ddh_total; + /* XXX: Move to a slab */ ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddt_get_dedup_histogram(spa, ddh_total); ddt_histogram_stat(dds_total, ddh_total); @@ -647,6 +648,7 @@ ddt_alloc(const ddt_key_t *ddk) { ddt_entry_t *dde; + /* XXX: Move to a slab */ dde = kmem_zalloc(sizeof (ddt_entry_t), KM_SLEEP); cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); @@ -795,7 +797,8 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c) { ddt_t *ddt; - ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP); + /* XXX: Move to a slab */ + ddt = kmem_zalloc(sizeof (*ddt), KM_SLEEP | KM_NODEBUG); mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&ddt->ddt_tree, ddt_entry_compare, diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index a65f16bccd..b7b5e32271 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -437,11 +437,7 @@ log_internal(history_internal_events_t event, spa_t *spa, return; ha = kmem_alloc(sizeof (history_arg_t), KM_SLEEP); - ha->ha_history_str = kmem_alloc(vsnprintf(NULL, 0, fmt, adx) + 1, - KM_SLEEP); - - (void) vsprintf(ha->ha_history_str, fmt, adx); - + ha->ha_history_str = kmem_asprintf(fmt, adx); ha->ha_log_type = LOG_INTERNAL; ha->ha_event = event; ha->ha_zone = NULL;