From 8510c572f34a04745bbe3b26a8eab710b07d1dbf Mon Sep 17 00:00:00 2001 From: Richard Elling Date: Sun, 23 Feb 2020 21:19:44 -0800 Subject: [PATCH] updated to include many tunables planned for v2. also merged in the icp and spl tunables --- ZFS-on-Linux-Module-Parameters.md | 1920 ++++++++++++++++++++++++++++- 1 file changed, 1881 insertions(+), 39 deletions(-) diff --git a/ZFS-on-Linux-Module-Parameters.md b/ZFS-on-Linux-Module-Parameters.md index 5b5de27..a75d663 100644 --- a/ZFS-on-Linux-Module-Parameters.md +++ b/ZFS-on-Linux-Module-Parameters.md @@ -1,6 +1,6 @@ # ZFS on Linux Module Parameters -The ZFS kernel module parameters are accessible in the SysFS +Most of the ZFS kernel module parameters are accessible in the SysFS `/sys/module/zfs/paramaters` directory. Current value can be observed by ```shell @@ -20,9 +20,22 @@ PARAMETER file in SysFS. In some cases, the parameter must be set prior to loading the kernel modules or it is desired to have the parameters set automatically at boot time. For many distros, this can be accomplished by creating a file named -`/etc/modprobe.d/zfs.conf` containing text lines of the format +`/etc/modprobe.d/zfs.conf` containing a text line for each module parameter +using the format: -```options zfs PARAMETER=VALUE``` +``` +# change PARAMETER for workload XZY to solve problem PROBLEM_DESCRIPTION +# changed by YOUR_NAME on DATE +options zfs PARAMETER=VALUE +``` + +Some parameters related to ZFS operations are located in module parameters +other than in the `zfs` kernel module. These are documented in the individual +parameter description. +Unless otherwise noted, the tunable applies to the `zfs` kernel module. +For example, the `icp` kernel module parameters are visible +in the `/sys/module/icp/parameters` directory and can be set by default at boot +time by changing the `/etc/modprobe.d/icp.conf` file. See the man page for _modprobe.d_ for more information. @@ -56,6 +69,7 @@ has a "Tags" row with keywords for frequent searches. * [metaslab_bias_enabled](#metaslab_bias_enabled) * [metaslab_debug_load](#metaslab_debug_load) * [metaslab_debug_unload](#metaslab_debug_unload) + * [metaslab_force_ganging](#metaslab_force_ganging) * [metaslab_fragmentation_factor_enabled](#metaslab_fragmentation_factor_enabled) * [zfs_metaslab_fragmentation_threshold](#zfs_metaslab_fragmentation_threshold) * [metaslab_lba_weighting_enabled](#metaslab_lba_weighting_enabled) @@ -68,7 +82,9 @@ has a "Tags" row with keywords for frequent searches. * [spa_asize_inflation](#spa_asize_inflation) * [spa_load_verify_data](#spa_load_verify_data) * [spa_slop_shift](#spa_slop_shift) + * [zfs_vdev_default_ms_count](#zfs_vdev_default_ms_count) #### ARC + * [zfs_abd_scatter_min_size](#zfs_abd_scatter_min_size) * [zfs_arc_average_blocksize](#zfs_arc_average_blocksize) * [zfs_arc_dnode_limit](#zfs_arc_dnode_limit) * [zfs_arc_dnode_limit_percent](#zfs_arc_dnode_limit_percent) @@ -84,6 +100,7 @@ has a "Tags" row with keywords for frequent searches. * [zfs_arc_meta_prune](#zfs_arc_meta_prune) * [zfs_arc_meta_strategy](#zfs_arc_meta_strategy) * [zfs_arc_min](#zfs_arc_min) + * [zfs_arc_min_prefetch_lifespan](#zfs_arc_min_prefetch_lifespan) * [zfs_arc_min_prefetch_ms](#zfs_arc_min_prefetch_ms) * [zfs_arc_min_prescient_prefetch_ms](#zfs_arc_min_prescient_prefetch_ms) * [zfs_arc_overflow_shift](#zfs_arc_overflow_shift) @@ -92,6 +109,9 @@ has a "Tags" row with keywords for frequent searches. * [zfs_arc_pc_percent](#zfs_arc_pc_percent) * [zfs_arc_shrink_shift](#zfs_arc_shrink_shift) * [zfs_arc_sys_free](#zfs_arc_sys_free) + * [dbuf_cache_max_bytes](#dbuf_cache_max_bytes) + * [dbuf_cache_shift](#dbuf_cache_shift) + * [dbuf_metadata_cache_shift](#dbuf_metadata_cache_shift) * [zfs_disable_dup_eviction](#zfs_disable_dup_eviction) * [l2arc_feed_again](#l2arc_feed_again) * [l2arc_feed_min_ms](#l2arc_feed_min_ms) @@ -104,6 +124,12 @@ has a "Tags" row with keywords for frequent searches. * [l2arc_write_boost](#l2arc_write_boost) * [l2arc_write_max](#l2arc_write_max) * [zfs_multilist_num_sublists](#zfs_multilist_num_sublists) + * [spa_load_verify_shift](#spa_load_verify_shift) +#### channel_programs + * [zfs_lua_max_instrlimit](#zfs_lua_max_instrlimit) + * [zfs_lua_max_memlimit](#zfs_lua_max_memlimit) +#### checkpoint + * [zfs_spa_discard_memory_limit](#zfs_spa_discard_memory_limit) #### checksum * [zfs_checksums_per_second](#zfs_checksums_per_second) * [zfs_fletcher_4_impl](#zfs_fletcher_4_impl) @@ -116,12 +142,23 @@ has a "Tags" row with keywords for frequent searches. #### CPU * [zfs_fletcher_4_impl](#zfs_fletcher_4_impl) * [zfs_mdcomp_disable](#zfs_mdcomp_disable) + * [spl_kmem_cache_kmem_threads](#spl_kmem_cache_kmem_threads) + * [spl_kmem_cache_magazine_size](#spl_kmem_cache_magazine_size) + * [spl_taskq_thread_bind](#spl_taskq_thread_bind) + * [spl_taskq_thread_priority](#spl_taskq_thread_priority) + * [spl_taskq_thread_sequential](#spl_taskq_thread_sequential) * [zfs_vdev_raidz_impl](#zfs_vdev_raidz_impl) +#### dataset + * [zfs_max_dataset_nesting](#zfs_max_dataset_nesting) #### dbuf_cache * [dbuf_cache_hiwater_pct](#dbuf_cache_hiwater_pct) * [dbuf_cache_lowater_pct](#dbuf_cache_lowater_pct) * [dbuf_cache_max_bytes](#dbuf_cache_max_bytes) + * [dbuf_cache_max_bytes](#dbuf_cache_max_bytes) * [dbuf_cache_max_shift](#dbuf_cache_max_shift) + * [dbuf_cache_shift](#dbuf_cache_shift) + * [dbuf_metadata_cache_max_bytes](#dbuf_metadata_cache_max_bytes) + * [dbuf_metadata_cache_shift](#dbuf_metadata_cache_shift) #### debug * [zfs_dbgmsg_enable](#zfs_dbgmsg_enable) * [zfs_dbgmsg_maxsize](#zfs_dbgmsg_maxsize) @@ -137,30 +174,40 @@ has a "Tags" row with keywords for frequent searches. * [zfs_object_mutex_size](#zfs_object_mutex_size) * [zfs_read_history](#zfs_read_history) * [zfs_read_history_hits](#zfs_read_history_hits) + * [spl_panic_halt](#spl_panic_halt) * [zfs_txg_history](#zfs_txg_history) * [zfs_zevent_cols](#zfs_zevent_cols) * [zfs_zevent_console](#zfs_zevent_console) * [zfs_zevent_len_max](#zfs_zevent_len_max) * [zil_replay_disable](#zil_replay_disable) + * [zio_deadman_log_all](#zio_deadman_log_all) + * [zio_decompress_fail_fraction](#zio_decompress_fail_fraction) * [zio_delay_max](#zio_delay_max) #### dedup + * [zfs_ddt_data_is_special](#zfs_ddt_data_is_special) * [zfs_disable_dup_eviction](#zfs_disable_dup_eviction) #### delay * [zfs_delays_per_second](#zfs_delays_per_second) #### delete + * [zfs_async_block_max_blocks](#zfs_async_block_max_blocks) * [zfs_delete_blocks](#zfs_delete_blocks) * [zfs_free_bpobj_enabled](#zfs_free_bpobj_enabled) * [zfs_free_max_blocks](#zfs_free_max_blocks) * [zfs_free_min_time_ms](#zfs_free_min_time_ms) + * [zfs_obsolete_min_time_ms](#zfs_obsolete_min_time_ms) * [zfs_per_txg_dirty_frees_percent](#zfs_per_txg_dirty_frees_percent) #### discard * [zvol_max_discard_blocks](#zvol_max_discard_blocks) #### disks * [zfs_nocacheflush](#zfs_nocacheflush) + * [zil_nocacheflush](#zil_nocacheflush) #### DMU + * [zfs_async_block_max_blocks](#zfs_async_block_max_blocks) * [dmu_object_alloc_chunk_shift](#dmu_object_alloc_chunk_shift) * [zfs_dmu_offset_next_sync](#zfs_dmu_offset_next_sync) #### encryption + * [icp_aes_impl](#icp_aes_impl) + * [icp_gcm_impl](#icp_gcm_impl) * [zfs_key_max_salt_uses](#zfs_key_max_salt_uses) * [zfs_qat_encrypt_disable](#zfs_qat_encrypt_disable) #### filesystem @@ -175,19 +222,26 @@ has a "Tags" row with keywords for frequent searches. * [zfs_mg_fragmentation_threshold](#zfs_mg_fragmentation_threshold) * [zfs_mg_noalloc_threshold](#zfs_mg_noalloc_threshold) #### HDD + * [metaslab_lba_weighting_enabled](#metaslab_lba_weighting_enabled) * [zfs_vdev_mirror_rotating_inc](#zfs_vdev_mirror_rotating_inc) * [zfs_vdev_mirror_rotating_seek_inc](#zfs_vdev_mirror_rotating_seek_inc) * [zfs_vdev_mirror_rotating_seek_offset](#zfs_vdev_mirror_rotating_seek_offset) +#### hostid + * [spl_hostid](#spl_hostid) + * [spl_hostid_path](#spl_hostid_path) #### import * [zfs_autoimport_disable](#zfs_autoimport_disable) + * [zfs_max_missing_tvds](#zfs_max_missing_tvds) * [zfs_multihost_fail_intervals](#zfs_multihost_fail_intervals) * [zfs_multihost_history](#zfs_multihost_history) * [zfs_multihost_import_intervals](#zfs_multihost_import_intervals) * [zfs_multihost_interval](#zfs_multihost_interval) * [zfs_recover](#zfs_recover) * [spa_config_path](#spa_config_path) + * [spa_load_print_vdev_tree](#spa_load_print_vdev_tree) * [spa_load_verify_maxinflight](#spa_load_verify_maxinflight) * [spa_load_verify_metadata](#spa_load_verify_metadata) + * [spa_load_verify_shift](#spa_load_verify_shift) * [zvol_inhibit_dev](#zvol_inhibit_dev) #### L2ARC * [l2arc_feed_again](#l2arc_feed_again) @@ -216,6 +270,17 @@ has a "Tags" row with keywords for frequent searches. * [metaslab_debug_unload](#metaslab_debug_unload) * [zfs_scan_mem_lim_fact](#zfs_scan_mem_lim_fact) * [zfs_scan_strict_mem_lim](#zfs_scan_strict_mem_lim) + * [spl_kmem_alloc_max](#spl_kmem_alloc_max) + * [spl_kmem_alloc_warn](#spl_kmem_alloc_warn) + * [spl_kmem_cache_expire](#spl_kmem_cache_expire) + * [spl_kmem_cache_kmem_limit](#spl_kmem_cache_kmem_limit) + * [spl_kmem_cache_kmem_threads](#spl_kmem_cache_kmem_threads) + * [spl_kmem_cache_magazine_size](#spl_kmem_cache_magazine_size) + * [spl_kmem_cache_max_size](#spl_kmem_cache_max_size) + * [spl_kmem_cache_obj_per_slab](#spl_kmem_cache_obj_per_slab) + * [spl_kmem_cache_obj_per_slab_min](#spl_kmem_cache_obj_per_slab_min) + * [spl_kmem_cache_reclaim](#spl_kmem_cache_reclaim) + * [spl_kmem_cache_slab_limit](#spl_kmem_cache_slab_limit) #### metadata * [zfs_mdcomp_disable](#zfs_mdcomp_disable) #### metaslab @@ -229,6 +294,8 @@ has a "Tags" row with keywords for frequent searches. * [zfs_metaslab_segment_weight_enabled](#zfs_metaslab_segment_weight_enabled) * [zfs_metaslab_switch_threshold](#zfs_metaslab_switch_threshold) * [metaslabs_per_vdev](#metaslabs_per_vdev) + * [zfs_vdev_min_ms_count](#zfs_vdev_min_ms_count) + * [zfs_vdev_ms_count_limit](#zfs_vdev_ms_count_limit) #### mirror * [zfs_vdev_mirror_non_rotating_inc](#zfs_vdev_mirror_non_rotating_inc) * [zfs_vdev_mirror_non_rotating_seek_inc](#zfs_vdev_mirror_non_rotating_seek_inc) @@ -240,6 +307,10 @@ has a "Tags" row with keywords for frequent searches. * [zfs_multihost_history](#zfs_multihost_history) * [zfs_multihost_import_intervals](#zfs_multihost_import_intervals) * [zfs_multihost_interval](#zfs_multihost_interval) + * [spl_hostid](#spl_hostid) + * [spl_hostid_path](#spl_hostid_path) +#### panic + * [spl_panic_halt](#spl_panic_halt) #### prefetch * [zfs_arc_min_prefetch_ms](#zfs_arc_min_prefetch_ms) * [zfs_arc_min_prescient_prefetch_ms](#zfs_arc_min_prescient_prefetch_ms) @@ -254,23 +325,35 @@ has a "Tags" row with keywords for frequent searches. * [zfetch_min_sec_reap](#zfetch_min_sec_reap) * [zvol_prefetch_bytes](#zvol_prefetch_bytes) #### QAT - * [zfs_qat_disable](#zfs_qat_disable) * [zfs_qat_checksum_disable](#zfs_qat_checksum_disable) * [zfs_qat_compress_disable](#zfs_qat_compress_disable) + * [zfs_qat_disable](#zfs_qat_disable) * [zfs_qat_encrypt_disable](#zfs_qat_encrypt_disable) #### raidz * [zfs_vdev_raidz_impl](#zfs_vdev_raidz_impl) +#### receive + * [zfs_disable_ivset_guid_check](#zfs_disable_ivset_guid_check) + * [zfs_recv_queue_length](#zfs_recv_queue_length) +#### remove + * [zfs_obsolete_min_time_ms](#zfs_obsolete_min_time_ms) + * [zfs_remove_max_segment](#zfs_remove_max_segment) #### resilver + * [zfs_resilver_delay](#zfs_resilver_delay) + * [zfs_resilver_disable_defer](#zfs_resilver_disable_defer) * [zfs_resilver_min_time_ms](#zfs_resilver_min_time_ms) * [zfs_scan_checkpoint_intval](#zfs_scan_checkpoint_intval) * [zfs_scan_fill_weight](#zfs_scan_fill_weight) + * [zfs_scan_idle](#zfs_scan_idle) + * [zfs_scan_ignore_errors](#zfs_scan_ignore_errors) * [zfs_scan_issue_strategy](#zfs_scan_issue_strategy) * [zfs_scan_legacy](#zfs_scan_legacy) * [zfs_scan_max_ext_gap](#zfs_scan_max_ext_gap) * [zfs_scan_mem_lim_fact](#zfs_scan_mem_lim_fact) * [zfs_scan_mem_lim_soft_fact](#zfs_scan_mem_lim_soft_fact) * [zfs_scan_strict_mem_lim](#zfs_scan_strict_mem_lim) + * [zfs_scan_suspend_progress](#zfs_scan_suspend_progress) * [zfs_scan_vdev_limit](#zfs_scan_vdev_limit) + * [zfs_top_maxinflight](#zfs_top_maxinflight) * [zfs_vdev_scrub_max_active](#zfs_vdev_scrub_max_active) * [zfs_vdev_scrub_min_active](#zfs_vdev_scrub_min_active) #### scrub @@ -278,36 +361,68 @@ has a "Tags" row with keywords for frequent searches. * [zfs_no_scrub_prefetch](#zfs_no_scrub_prefetch) * [zfs_scan_checkpoint_intval](#zfs_scan_checkpoint_intval) * [zfs_scan_fill_weight](#zfs_scan_fill_weight) + * [zfs_scan_idle](#zfs_scan_idle) * [zfs_scan_issue_strategy](#zfs_scan_issue_strategy) * [zfs_scan_legacy](#zfs_scan_legacy) * [zfs_scan_max_ext_gap](#zfs_scan_max_ext_gap) * [zfs_scan_mem_lim_fact](#zfs_scan_mem_lim_fact) * [zfs_scan_mem_lim_soft_fact](#zfs_scan_mem_lim_soft_fact) + * [zfs_scan_min_time_ms](#zfs_scan_min_time_ms) * [zfs_scan_strict_mem_lim](#zfs_scan_strict_mem_lim) + * [zfs_scan_suspend_progress](#zfs_scan_suspend_progress) * [zfs_scan_vdev_limit](#zfs_scan_vdev_limit) + * [zfs_scrub_delay](#zfs_scrub_delay) * [zfs_scrub_min_time_ms](#zfs_scrub_min_time_ms) + * [zfs_top_maxinflight](#zfs_top_maxinflight) * [zfs_vdev_scrub_max_active](#zfs_vdev_scrub_max_active) * [zfs_vdev_scrub_min_active](#zfs_vdev_scrub_min_active) #### send * [ignore_hole_birth](#ignore_hole_birth) + * [zfs_override_estimate_recordsize](#zfs_override_estimate_recordsize) * [zfs_pd_bytes_max](#zfs_pd_bytes_max) * [zfs_send_corrupt_data](#zfs_send_corrupt_data) + * [zfs_send_queue_length](#zfs_send_queue_length) + * [zfs_send_unmodified_spill_blocks](#zfs_send_unmodified_spill_blocks) #### snapshot * [zfs_admin_snapshot](#zfs_admin_snapshot) * [zfs_expire_snapshot](#zfs_expire_snapshot) #### SPA * [spa_asize_inflation](#spa_asize_inflation) + * [spa_load_print_vdev_tree](#spa_load_print_vdev_tree) * [spa_load_verify_data](#spa_load_verify_data) + * [spa_load_verify_shift](#spa_load_verify_shift) * [spa_slop_shift](#spa_slop_shift) * [zfs_sync_pass_deferred_free](#zfs_sync_pass_deferred_free) * [zfs_sync_pass_dont_compress](#zfs_sync_pass_dont_compress) * [zfs_sync_pass_rewrite](#zfs_sync_pass_rewrite) * [zfs_sync_taskq_batch_pct](#zfs_sync_taskq_batch_pct) * [zfs_txg_timeout](#zfs_txg_timeout) +#### special_vdev + * [zfs_ddt_data_is_special](#zfs_ddt_data_is_special) + * [zfs_special_class_metadata_reserve_pct](#zfs_special_class_metadata_reserve_pct) + * [zfs_user_indirect_is_special](#zfs_user_indirect_is_special) #### SSD + * [metaslab_lba_weighting_enabled](#metaslab_lba_weighting_enabled) * [zfs_vdev_mirror_non_rotating_inc](#zfs_vdev_mirror_non_rotating_inc) * [zfs_vdev_mirror_non_rotating_seek_inc](#zfs_vdev_mirror_non_rotating_seek_inc) +#### taskq + * [spl_max_show_tasks](#spl_max_show_tasks) + * [spl_taskq_kick](#spl_taskq_kick) + * [spl_taskq_thread_bind](#spl_taskq_thread_bind) + * [spl_taskq_thread_dynamic](#spl_taskq_thread_dynamic) + * [spl_taskq_thread_priority](#spl_taskq_thread_priority) + * [spl_taskq_thread_sequential](#spl_taskq_thread_sequential) + * [zfs_zil_clean_taskq_nthr_pct](#zfs_zil_clean_taskq_nthr_pct) + * [zio_taskq_batch_pct](#zio_taskq_batch_pct) +#### trim + * [zfs_trim_extent_bytes_max](#zfs_trim_extent_bytes_max) + * [zfs_trim_extent_bytes_min](#zfs_trim_extent_bytes_min) + * [zfs_trim_metaslab_skip](#zfs_trim_metaslab_skip) + * [zfs_trim_queue_limit](#zfs_trim_queue_limit) + * [zfs_trim_txg_batch](#zfs_trim_txg_batch) + * [zfs_vdev_aggregate_trim](#zfs_vdev_aggregate_trim) #### vdev + * [zfs_checksum_events_per_second](#zfs_checksum_events_per_second) * [metaslab_aliquot](#metaslab_aliquot) * [metaslab_bias_enabled](#metaslab_bias_enabled) * [zfs_metaslab_fragmentation_threshold](#zfs_metaslab_fragmentation_threshold) @@ -316,7 +431,10 @@ has a "Tags" row with keywords for frequent searches. * [zfs_mg_noalloc_threshold](#zfs_mg_noalloc_threshold) * [zfs_multihost_interval](#zfs_multihost_interval) * [zfs_scan_vdev_limit](#zfs_scan_vdev_limit) + * [zfs_slow_io_events_per_second](#zfs_slow_io_events_per_second) + * [zfs_vdev_aggregate_trim](#zfs_vdev_aggregate_trim) * [zfs_vdev_aggregation_limit](#zfs_vdev_aggregation_limit) + * [zfs_vdev_aggregation_limit_non_rotating](#zfs_vdev_aggregation_limit_non_rotating) * [zfs_vdev_async_read_max_active](#zfs_vdev_async_read_max_active) * [zfs_vdev_async_read_min_active](#zfs_vdev_async_read_min_active) * [zfs_vdev_async_write_active_max_dirty_percent](#zfs_vdev_async_write_active_max_dirty_percent) @@ -326,15 +444,21 @@ has a "Tags" row with keywords for frequent searches. * [zfs_vdev_cache_bshift](#zfs_vdev_cache_bshift) * [zfs_vdev_cache_max](#zfs_vdev_cache_max) * [zfs_vdev_cache_size](#zfs_vdev_cache_size) + * [zfs_vdev_initializing_max_active](#zfs_vdev_initializing_max_active) + * [zfs_vdev_initializing_min_active](#zfs_vdev_initializing_min_active) * [zfs_vdev_max_active](#zfs_vdev_max_active) + * [zfs_vdev_min_ms_count](#zfs_vdev_min_ms_count) * [zfs_vdev_mirror_non_rotating_inc](#zfs_vdev_mirror_non_rotating_inc) * [zfs_vdev_mirror_non_rotating_seek_inc](#zfs_vdev_mirror_non_rotating_seek_inc) * [zfs_vdev_mirror_rotating_inc](#zfs_vdev_mirror_rotating_inc) * [zfs_vdev_mirror_rotating_seek_inc](#zfs_vdev_mirror_rotating_seek_inc) * [zfs_vdev_mirror_rotating_seek_offset](#zfs_vdev_mirror_rotating_seek_offset) + * [zfs_vdev_ms_count_limit](#zfs_vdev_ms_count_limit) * [zfs_vdev_queue_depth_pct](#zfs_vdev_queue_depth_pct) * [zfs_vdev_raidz_impl](#zfs_vdev_raidz_impl) * [zfs_vdev_read_gap_limit](#zfs_vdev_read_gap_limit) + * [zfs_vdev_removal_max_active](#zfs_vdev_removal_max_active) + * [zfs_vdev_removal_min_active](#zfs_vdev_removal_min_active) * [zfs_vdev_scheduler](#zfs_vdev_scheduler) * [zfs_vdev_scrub_max_active](#zfs_vdev_scrub_max_active) * [zfs_vdev_scrub_min_active](#zfs_vdev_scrub_min_active) @@ -342,12 +466,27 @@ has a "Tags" row with keywords for frequent searches. * [zfs_vdev_sync_read_min_active](#zfs_vdev_sync_read_min_active) * [zfs_vdev_sync_write_max_active](#zfs_vdev_sync_write_max_active) * [zfs_vdev_sync_write_min_active](#zfs_vdev_sync_write_min_active) + * [zfs_vdev_trim_max_active](#zfs_vdev_trim_max_active) + * [zfs_vdev_trim_min_active](#zfs_vdev_trim_min_active) + * [vdev_validate_skip](#vdev_validate_skip) * [zfs_vdev_write_gap_limit](#zfs_vdev_write_gap_limit) * [zio_dva_throttle_enabled](#zio_dva_throttle_enabled) + * [zio_slow_io_ms](#zio_slow_io_ms) #### vdev_cache * [zfs_vdev_cache_bshift](#zfs_vdev_cache_bshift) * [zfs_vdev_cache_max](#zfs_vdev_cache_max) * [zfs_vdev_cache_size](#zfs_vdev_cache_size) +#### vdev_initialize + * [zfs_initialize_value](#zfs_initialize_value) +#### vdev_removal + * [zfs_condense_indirect_commit_entry_delay_ms](#zfs_condense_indirect_commit_entry_delay_ms) + * [zfs_condense_indirect_vdevs_enable](#zfs_condense_indirect_vdevs_enable) + * [zfs_condense_max_obsolete_bytes](#zfs_condense_max_obsolete_bytes) + * [zfs_condense_min_mapping_bytes](#zfs_condense_min_mapping_bytes) + * [zfs_reconstruct_indirect_combinations_max](#zfs_reconstruct_indirect_combinations_max) + * [zfs_removal_ignore_errors](#zfs_removal_ignore_errors) + * [zfs_removal_suspend_progress](#zfs_removal_suspend_progress) + * [vdev_removal_max_span](#vdev_removal_max_span) #### volume * [zfs_max_recordsize](#zfs_max_recordsize) * [zvol_inhibit_dev](#zvol_inhibit_dev) @@ -365,29 +504,44 @@ has a "Tags" row with keywords for frequent searches. * [zfs_dirty_data_max_max_percent](#zfs_dirty_data_max_max_percent) * [zfs_dirty_data_max_percent](#zfs_dirty_data_max_percent) * [zfs_dirty_data_sync](#zfs_dirty_data_sync) + * [zfs_dirty_data_sync_percent](#zfs_dirty_data_sync_percent) #### zed * [zfs_checksums_per_second](#zfs_checksums_per_second) * [zfs_delays_per_second](#zfs_delays_per_second) + * [zio_slow_io_ms](#zio_slow_io_ms) #### ZIL * [zfs_commit_timeout_pct](#zfs_commit_timeout_pct) * [zfs_immediate_write_sz](#zfs_immediate_write_sz) * [zfs_zil_clean_taskq_maxalloc](#zfs_zil_clean_taskq_maxalloc) * [zfs_zil_clean_taskq_minalloc](#zfs_zil_clean_taskq_minalloc) * [zfs_zil_clean_taskq_nthr_pct](#zfs_zil_clean_taskq_nthr_pct) + * [zil_nocacheflush](#zil_nocacheflush) * [zil_replay_disable](#zil_replay_disable) * [zil_slog_bulk](#zil_slog_bulk) #### ZIO_scheduler + * [zfs_dirty_data_sync](#zfs_dirty_data_sync) + * [zfs_dirty_data_sync_percent](#zfs_dirty_data_sync_percent) + * [zfs_resilver_delay](#zfs_resilver_delay) + * [zfs_scan_idle](#zfs_scan_idle) + * [zfs_scrub_delay](#zfs_scrub_delay) + * [zfs_top_maxinflight](#zfs_top_maxinflight) * [zfs_txg_timeout](#zfs_txg_timeout) + * [zfs_vdev_aggregate_trim](#zfs_vdev_aggregate_trim) * [zfs_vdev_aggregation_limit](#zfs_vdev_aggregation_limit) + * [zfs_vdev_aggregation_limit_non_rotating](#zfs_vdev_aggregation_limit_non_rotating) * [zfs_vdev_async_read_max_active](#zfs_vdev_async_read_max_active) * [zfs_vdev_async_read_min_active](#zfs_vdev_async_read_min_active) * [zfs_vdev_async_write_active_max_dirty_percent](#zfs_vdev_async_write_active_max_dirty_percent) * [zfs_vdev_async_write_active_min_dirty_percent](#zfs_vdev_async_write_active_min_dirty_percent) * [zfs_vdev_async_write_max_active](#zfs_vdev_async_write_max_active) * [zfs_vdev_async_write_min_active](#zfs_vdev_async_write_min_active) + * [zfs_vdev_initializing_max_active](#zfs_vdev_initializing_max_active) + * [zfs_vdev_initializing_min_active](#zfs_vdev_initializing_min_active) * [zfs_vdev_max_active](#zfs_vdev_max_active) * [zfs_vdev_queue_depth_pct](#zfs_vdev_queue_depth_pct) * [zfs_vdev_read_gap_limit](#zfs_vdev_read_gap_limit) + * [zfs_vdev_removal_max_active](#zfs_vdev_removal_max_active) + * [zfs_vdev_removal_min_active](#zfs_vdev_removal_min_active) * [zfs_vdev_scheduler](#zfs_vdev_scheduler) * [zfs_vdev_scrub_max_active](#zfs_vdev_scrub_max_active) * [zfs_vdev_scrub_min_active](#zfs_vdev_scrub_min_active) @@ -395,13 +549,17 @@ has a "Tags" row with keywords for frequent searches. * [zfs_vdev_sync_read_min_active](#zfs_vdev_sync_read_min_active) * [zfs_vdev_sync_write_max_active](#zfs_vdev_sync_write_max_active) * [zfs_vdev_sync_write_min_active](#zfs_vdev_sync_write_min_active) + * [zfs_vdev_trim_max_active](#zfs_vdev_trim_max_active) + * [zfs_vdev_trim_min_active](#zfs_vdev_trim_min_active) * [zfs_vdev_write_gap_limit](#zfs_vdev_write_gap_limit) * [zio_dva_throttle_enabled](#zio_dva_throttle_enabled) * [zio_requeue_io_start_cut_in_line](#zio_requeue_io_start_cut_in_line) * [zio_taskq_batch_pct](#zio_taskq_batch_pct) + ## Index * [zfs_abd_scatter_enabled](#zfs_abd_scatter_enabled) * [zfs_abd_scatter_max_order](#zfs_abd_scatter_max_order) + * [zfs_abd_scatter_min_size](#zfs_abd_scatter_min_size) * [zfs_admin_snapshot](#zfs_admin_snapshot) * [zfs_arc_average_blocksize](#zfs_arc_average_blocksize) * [zfs_arc_dnode_limit](#zfs_arc_dnode_limit) @@ -418,6 +576,7 @@ has a "Tags" row with keywords for frequent searches. * [zfs_arc_meta_prune](#zfs_arc_meta_prune) * [zfs_arc_meta_strategy](#zfs_arc_meta_strategy) * [zfs_arc_min](#zfs_arc_min) + * [zfs_arc_min_prefetch_lifespan](#zfs_arc_min_prefetch_lifespan) * [zfs_arc_min_prefetch_ms](#zfs_arc_min_prefetch_ms) * [zfs_arc_min_prescient_prefetch_ms](#zfs_arc_min_prescient_prefetch_ms) * [zfs_arc_overflow_shift](#zfs_arc_overflow_shift) @@ -426,17 +585,27 @@ has a "Tags" row with keywords for frequent searches. * [zfs_arc_pc_percent](#zfs_arc_pc_percent) * [zfs_arc_shrink_shift](#zfs_arc_shrink_shift) * [zfs_arc_sys_free](#zfs_arc_sys_free) + * [zfs_async_block_max_blocks](#zfs_async_block_max_blocks) * [zfs_autoimport_disable](#zfs_autoimport_disable) + * [zfs_checksum_events_per_second](#zfs_checksum_events_per_second) * [zfs_checksums_per_second](#zfs_checksums_per_second) * [zfs_commit_timeout_pct](#zfs_commit_timeout_pct) * [zfs_compressed_arc_enabled](#zfs_compressed_arc_enabled) + * [zfs_condense_indirect_commit_entry_delay_ms](#zfs_condense_indirect_commit_entry_delay_ms) + * [zfs_condense_indirect_vdevs_enable](#zfs_condense_indirect_vdevs_enable) + * [zfs_condense_max_obsolete_bytes](#zfs_condense_max_obsolete_bytes) + * [zfs_condense_min_mapping_bytes](#zfs_condense_min_mapping_bytes) * [zfs_dbgmsg_enable](#zfs_dbgmsg_enable) * [zfs_dbgmsg_maxsize](#zfs_dbgmsg_maxsize) * [dbuf_cache_hiwater_pct](#dbuf_cache_hiwater_pct) * [dbuf_cache_lowater_pct](#dbuf_cache_lowater_pct) * [dbuf_cache_max_bytes](#dbuf_cache_max_bytes) * [dbuf_cache_max_shift](#dbuf_cache_max_shift) + * [dbuf_cache_shift](#dbuf_cache_shift) + * [dbuf_metadata_cache_max_bytes](#dbuf_metadata_cache_max_bytes) + * [dbuf_metadata_cache_shift](#dbuf_metadata_cache_shift) * [zfs_dbuf_state_index](#zfs_dbuf_state_index) + * [zfs_ddt_data_is_special](#zfs_ddt_data_is_special) * [zfs_deadman_checktime_ms](#zfs_deadman_checktime_ms) * [zfs_deadman_enabled](#zfs_deadman_enabled) * [zfs_deadman_failmode](#zfs_deadman_failmode) @@ -452,7 +621,9 @@ has a "Tags" row with keywords for frequent searches. * [zfs_dirty_data_max_max_percent](#zfs_dirty_data_max_max_percent) * [zfs_dirty_data_max_percent](#zfs_dirty_data_max_percent) * [zfs_dirty_data_sync](#zfs_dirty_data_sync) + * [zfs_dirty_data_sync_percent](#zfs_dirty_data_sync_percent) * [zfs_disable_dup_eviction](#zfs_disable_dup_eviction) + * [zfs_disable_ivset_guid_check](#zfs_disable_ivset_guid_check) * [dmu_object_alloc_chunk_shift](#dmu_object_alloc_chunk_shift) * [zfs_dmu_offset_next_sync](#zfs_dmu_offset_next_sync) * [zfs_expire_snapshot](#zfs_expire_snapshot) @@ -462,8 +633,11 @@ has a "Tags" row with keywords for frequent searches. * [zfs_free_leak_on_eio](#zfs_free_leak_on_eio) * [zfs_free_max_blocks](#zfs_free_max_blocks) * [zfs_free_min_time_ms](#zfs_free_min_time_ms) + * [icp_aes_impl](#icp_aes_impl) + * [icp_gcm_impl](#icp_gcm_impl) * [ignore_hole_birth](#ignore_hole_birth) * [zfs_immediate_write_sz](#zfs_immediate_write_sz) + * [zfs_initialize_value](#zfs_initialize_value) * [zfs_key_max_salt_uses](#zfs_key_max_salt_uses) * [l2arc_feed_again](#l2arc_feed_again) * [l2arc_feed_min_ms](#l2arc_feed_min_ms) @@ -475,12 +649,17 @@ has a "Tags" row with keywords for frequent searches. * [l2arc_norw](#l2arc_norw) * [l2arc_write_boost](#l2arc_write_boost) * [l2arc_write_max](#l2arc_write_max) + * [zfs_lua_max_instrlimit](#zfs_lua_max_instrlimit) + * [zfs_lua_max_memlimit](#zfs_lua_max_memlimit) + * [zfs_max_dataset_nesting](#zfs_max_dataset_nesting) + * [zfs_max_missing_tvds](#zfs_max_missing_tvds) * [zfs_max_recordsize](#zfs_max_recordsize) * [zfs_mdcomp_disable](#zfs_mdcomp_disable) * [metaslab_aliquot](#metaslab_aliquot) * [metaslab_bias_enabled](#metaslab_bias_enabled) * [metaslab_debug_load](#metaslab_debug_load) * [metaslab_debug_unload](#metaslab_debug_unload) + * [metaslab_force_ganging](#metaslab_force_ganging) * [metaslab_fragmentation_factor_enabled](#metaslab_fragmentation_factor_enabled) * [zfs_metaslab_fragmentation_threshold](#zfs_metaslab_fragmentation_threshold) * [metaslab_lba_weighting_enabled](#metaslab_lba_weighting_enabled) @@ -500,6 +679,8 @@ has a "Tags" row with keywords for frequent searches. * [zfs_nocacheflush](#zfs_nocacheflush) * [zfs_nopwrite_enabled](#zfs_nopwrite_enabled) * [zfs_object_mutex_size](#zfs_object_mutex_size) + * [zfs_obsolete_min_time_ms](#zfs_obsolete_min_time_ms) + * [zfs_override_estimate_recordsize](#zfs_override_estimate_recordsize) * [zfs_pd_bytes_max](#zfs_pd_bytes_max) * [zfs_per_txg_dirty_frees_percent](#zfs_per_txg_dirty_frees_percent) * [zfs_prefetch_disable](#zfs_prefetch_disable) @@ -510,33 +691,82 @@ has a "Tags" row with keywords for frequent searches. * [zfs_read_chunk_size](#zfs_read_chunk_size) * [zfs_read_history](#zfs_read_history) * [zfs_read_history_hits](#zfs_read_history_hits) + * [zfs_reconstruct_indirect_combinations_max](#zfs_reconstruct_indirect_combinations_max) * [zfs_recover](#zfs_recover) + * [zfs_recv_queue_length](#zfs_recv_queue_length) + * [zfs_removal_ignore_errors](#zfs_removal_ignore_errors) + * [zfs_removal_suspend_progress](#zfs_removal_suspend_progress) + * [zfs_remove_max_segment](#zfs_remove_max_segment) + * [zfs_resilver_delay](#zfs_resilver_delay) + * [zfs_resilver_disable_defer](#zfs_resilver_disable_defer) * [zfs_resilver_min_time_ms](#zfs_resilver_min_time_ms) * [zfs_scan_checkpoint_intval](#zfs_scan_checkpoint_intval) * [zfs_scan_fill_weight](#zfs_scan_fill_weight) + * [zfs_scan_idle](#zfs_scan_idle) + * [zfs_scan_ignore_errors](#zfs_scan_ignore_errors) * [zfs_scan_issue_strategy](#zfs_scan_issue_strategy) * [zfs_scan_legacy](#zfs_scan_legacy) * [zfs_scan_max_ext_gap](#zfs_scan_max_ext_gap) * [zfs_scan_mem_lim_fact](#zfs_scan_mem_lim_fact) * [zfs_scan_mem_lim_soft_fact](#zfs_scan_mem_lim_soft_fact) + * [zfs_scan_min_time_ms](#zfs_scan_min_time_ms) * [zfs_scan_strict_mem_lim](#zfs_scan_strict_mem_lim) + * [zfs_scan_suspend_progress](#zfs_scan_suspend_progress) * [zfs_scan_vdev_limit](#zfs_scan_vdev_limit) + * [zfs_scrub_delay](#zfs_scrub_delay) * [zfs_scrub_min_time_ms](#zfs_scrub_min_time_ms) * [zfs_send_corrupt_data](#zfs_send_corrupt_data) * [send_holes_without_birth_time](#send_holes_without_birth_time) + * [zfs_send_queue_length](#zfs_send_queue_length) + * [zfs_send_unmodified_spill_blocks](#zfs_send_unmodified_spill_blocks) + * [zfs_slow_io_events_per_second](#zfs_slow_io_events_per_second) * [spa_asize_inflation](#spa_asize_inflation) * [spa_config_path](#spa_config_path) + * [zfs_spa_discard_memory_limit](#zfs_spa_discard_memory_limit) + * [spa_load_print_vdev_tree](#spa_load_print_vdev_tree) * [spa_load_verify_data](#spa_load_verify_data) * [spa_load_verify_maxinflight](#spa_load_verify_maxinflight) * [spa_load_verify_metadata](#spa_load_verify_metadata) + * [spa_load_verify_shift](#spa_load_verify_shift) * [spa_slop_shift](#spa_slop_shift) + * [zfs_special_class_metadata_reserve_pct](#zfs_special_class_metadata_reserve_pct) + * [spl_hostid](#spl_hostid) + * [spl_hostid_path](#spl_hostid_path) + * [spl_kmem_alloc_max](#spl_kmem_alloc_max) + * [spl_kmem_alloc_warn](#spl_kmem_alloc_warn) + * [spl_kmem_cache_expire](#spl_kmem_cache_expire) + * [spl_kmem_cache_kmem_limit](#spl_kmem_cache_kmem_limit) + * [spl_kmem_cache_kmem_threads](#spl_kmem_cache_kmem_threads) + * [spl_kmem_cache_magazine_size](#spl_kmem_cache_magazine_size) + * [spl_kmem_cache_max_size](#spl_kmem_cache_max_size) + * [spl_kmem_cache_obj_per_slab](#spl_kmem_cache_obj_per_slab) + * [spl_kmem_cache_obj_per_slab_min](#spl_kmem_cache_obj_per_slab_min) + * [spl_kmem_cache_reclaim](#spl_kmem_cache_reclaim) + * [spl_kmem_cache_slab_limit](#spl_kmem_cache_slab_limit) + * [spl_max_show_tasks](#spl_max_show_tasks) + * [spl_panic_halt](#spl_panic_halt) + * [spl_taskq_kick](#spl_taskq_kick) + * [spl_taskq_thread_bind](#spl_taskq_thread_bind) + * [spl_taskq_thread_dynamic](#spl_taskq_thread_dynamic) + * [spl_taskq_thread_priority](#spl_taskq_thread_priority) + * [spl_taskq_thread_sequential](#spl_taskq_thread_sequential) * [zfs_sync_pass_deferred_free](#zfs_sync_pass_deferred_free) * [zfs_sync_pass_dont_compress](#zfs_sync_pass_dont_compress) * [zfs_sync_pass_rewrite](#zfs_sync_pass_rewrite) * [zfs_sync_taskq_batch_pct](#zfs_sync_taskq_batch_pct) + * [zfs_top_maxinflight](#zfs_top_maxinflight) + * [zfs_trim_extent_bytes_max](#zfs_trim_extent_bytes_max) + * [zfs_trim_extent_bytes_min](#zfs_trim_extent_bytes_min) + * [zfs_trim_metaslab_skip](#zfs_trim_metaslab_skip) + * [zfs_trim_queue_limit](#zfs_trim_queue_limit) + * [zfs_trim_txg_batch](#zfs_trim_txg_batch) * [zfs_txg_history](#zfs_txg_history) * [zfs_txg_timeout](#zfs_txg_timeout) + * [zfs_unlink_suspend_progress](#zfs_unlink_suspend_progress) + * [zfs_user_indirect_is_special](#zfs_user_indirect_is_special) + * [zfs_vdev_aggregate_trim](#zfs_vdev_aggregate_trim) * [zfs_vdev_aggregation_limit](#zfs_vdev_aggregation_limit) + * [zfs_vdev_aggregation_limit_non_rotating](#zfs_vdev_aggregation_limit_non_rotating) * [zfs_vdev_async_read_max_active](#zfs_vdev_async_read_max_active) * [zfs_vdev_async_read_min_active](#zfs_vdev_async_read_min_active) * [zfs_vdev_async_write_active_max_dirty_percent](#zfs_vdev_async_write_active_max_dirty_percent) @@ -546,15 +776,23 @@ has a "Tags" row with keywords for frequent searches. * [zfs_vdev_cache_bshift](#zfs_vdev_cache_bshift) * [zfs_vdev_cache_max](#zfs_vdev_cache_max) * [zfs_vdev_cache_size](#zfs_vdev_cache_size) + * [zfs_vdev_default_ms_count](#zfs_vdev_default_ms_count) + * [zfs_vdev_initializing_max_active](#zfs_vdev_initializing_max_active) + * [zfs_vdev_initializing_min_active](#zfs_vdev_initializing_min_active) * [zfs_vdev_max_active](#zfs_vdev_max_active) + * [zfs_vdev_min_ms_count](#zfs_vdev_min_ms_count) * [zfs_vdev_mirror_non_rotating_inc](#zfs_vdev_mirror_non_rotating_inc) * [zfs_vdev_mirror_non_rotating_seek_inc](#zfs_vdev_mirror_non_rotating_seek_inc) * [zfs_vdev_mirror_rotating_inc](#zfs_vdev_mirror_rotating_inc) * [zfs_vdev_mirror_rotating_seek_inc](#zfs_vdev_mirror_rotating_seek_inc) * [zfs_vdev_mirror_rotating_seek_offset](#zfs_vdev_mirror_rotating_seek_offset) + * [zfs_vdev_ms_count_limit](#zfs_vdev_ms_count_limit) * [zfs_vdev_queue_depth_pct](#zfs_vdev_queue_depth_pct) * [zfs_vdev_raidz_impl](#zfs_vdev_raidz_impl) * [zfs_vdev_read_gap_limit](#zfs_vdev_read_gap_limit) + * [zfs_vdev_removal_max_active](#zfs_vdev_removal_max_active) + * [vdev_removal_max_span](#vdev_removal_max_span) + * [zfs_vdev_removal_min_active](#zfs_vdev_removal_min_active) * [zfs_vdev_scheduler](#zfs_vdev_scheduler) * [zfs_vdev_scrub_max_active](#zfs_vdev_scrub_max_active) * [zfs_vdev_scrub_min_active](#zfs_vdev_scrub_min_active) @@ -562,6 +800,9 @@ has a "Tags" row with keywords for frequent searches. * [zfs_vdev_sync_read_min_active](#zfs_vdev_sync_read_min_active) * [zfs_vdev_sync_write_max_active](#zfs_vdev_sync_write_max_active) * [zfs_vdev_sync_write_min_active](#zfs_vdev_sync_write_min_active) + * [zfs_vdev_trim_max_active](#zfs_vdev_trim_max_active) + * [zfs_vdev_trim_min_active](#zfs_vdev_trim_min_active) + * [vdev_validate_skip](#vdev_validate_skip) * [zfs_vdev_write_gap_limit](#zfs_vdev_write_gap_limit) * [zfs_zevent_cols](#zfs_zevent_cols) * [zfs_zevent_console](#zfs_zevent_console) @@ -573,11 +814,15 @@ has a "Tags" row with keywords for frequent searches. * [zfs_zil_clean_taskq_maxalloc](#zfs_zil_clean_taskq_maxalloc) * [zfs_zil_clean_taskq_minalloc](#zfs_zil_clean_taskq_minalloc) * [zfs_zil_clean_taskq_nthr_pct](#zfs_zil_clean_taskq_nthr_pct) + * [zil_nocacheflush](#zil_nocacheflush) * [zil_replay_disable](#zil_replay_disable) * [zil_slog_bulk](#zil_slog_bulk) + * [zio_deadman_log_all](#zio_deadman_log_all) + * [zio_decompress_fail_fraction](#zio_decompress_fail_fraction) * [zio_delay_max](#zio_delay_max) * [zio_dva_throttle_enabled](#zio_dva_throttle_enabled) * [zio_requeue_io_start_cut_in_line](#zio_requeue_io_start_cut_in_line) + * [zio_slow_io_ms](#zio_slow_io_ms) * [zio_taskq_batch_pct](#zio_taskq_batch_pct) * [zvol_inhibit_dev](#zvol_inhibit_dev) * [zvol_major](#zvol_major) @@ -1910,14 +2155,32 @@ when there is dirty data to be written. | zfs_dirty_data_sync | Notes |---|--- -| Tags | [write_throttle](#write_throttle) +| Tags | [write_throttle](#write_throttle), [ZIO_scheduler](#ZIO_scheduler) | When to change | TBD | Data Type | ulong | Units | bytes | Range | 1 to ULONG_MAX | Default | 67,108,864 (64 MiB) | Change | Dynamic -| Versions Affected | v0.6.4 and later +| Versions Affected | v0.6.4 through v0.8.x, deprecation planned for v2 + +### zfs_dirty_data_sync_percent +When there is at least `zfs_dirty_data_sync_percent` of [zfs_dirty_data_max](#zfs_dirty_data_max) +dirty data, a transaction group sync is started. +This allows a transaction group sync to occur more frequently +than the transaction group timeout interval (see [zfs_txg_timeout](#zfs_txg_timeout)) +when there is dirty data to be written. + +| zfs_dirty_data_sync_percent | Notes +|---|--- +| Tags | [write_throttle](#write_throttle), [ZIO_scheduler](#ZIO_scheduler) +| When to change | TBD +| Data Type | int +| Units | percent +| Range | 1 to [zfs_vdev_async_write_active_min_dirty_percent](#zfs_vdev_async_write_active_min_dirty_percent) +| Default | 20 +| Change | Dynamic +| Versions Affected | planned for v2, deprecates [zfs_dirty_data_sync](#zfs_dirty_data_sync) ### zfs_fletcher_4_impl Fletcher-4 is the default checksum algorithm for metadata and data. @@ -1983,7 +2246,7 @@ Maximum asynchronous read I/Os active to each device. | zfs_vdev_async_read_max_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_max_active](#zfs_vdev_max_active) @@ -1997,7 +2260,7 @@ Minimum asynchronous read I/Os active to each device. | zfs_vdev_async_read_min_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to ([zfs_vdev_async_read_max_active](#zfs_vdev_async_read_max_active) - 1) @@ -2019,7 +2282,7 @@ and [zfs_vdev_async_write_max_active](#zfs_vdev_async_write_max_active) | zfs_vdev_async_write_active_max_dirty_percent | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | int | Units | percent of [zfs_dirty_data_max](#zfs_dirty_data_max) | Range | 0 to 100 @@ -2039,7 +2302,7 @@ the active I/O limit is linearly interpolated between | zfs_vdev_async_write_active_min_dirty_percent | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | int | Units | percent of zfs_dirty_data_max | Range | 0 to ([zfs_vdev_async_write_active_max_dirty_percent](#zfs_vdev_async_write_active_max_dirty_percent) - 1) @@ -2054,7 +2317,7 @@ write I/Os active to each device. | zfs_vdev_async_write_max_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_max_active](#zfs_vdev_max_active) @@ -2073,7 +2336,7 @@ further increasing latency. | zfs_vdev_async_write_min_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_async_write_max_active](#zfs_vdev_async_write_max_active) @@ -2094,7 +2357,7 @@ the intervening device driver layers. | zfs_vdev_max_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | sum of each queue's min_active to UINT32_MAX @@ -2109,7 +2372,7 @@ read I/Os active to each device. | zfs_vdev_scrub_max_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler), [scrub](#scrub), [resilver](#resilver) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_max_active](#zfs_vdev_max_active) @@ -2124,7 +2387,7 @@ to each device. | zfs_vdev_scrub_min_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler), [scrub](#scrub), [resilver](#resilver) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_scrub_max_active](#zfs_vdev_scrub_max_active) @@ -2138,7 +2401,7 @@ Maximum synchronous read I/Os active to each device. | zfs_vdev_sync_read_max_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_max_active](#zfs_vdev_max_active) @@ -2153,7 +2416,7 @@ active to each device. | zfs_vdev_sync_read_min_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_sync_read_max_active](#zfs_vdev_sync_read_max_active) @@ -2168,7 +2431,7 @@ to each device. | zfs_vdev_sync_write_max_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_max_active](#zfs_vdev_max_active) @@ -2183,7 +2446,7 @@ active to each device. | zfs_vdev_sync_write_min_active | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to [zfs_vdev_sync_write_max_active](#zfs_vdev_sync_write_max_active) @@ -2207,7 +2470,7 @@ See also [zio_dva_throttle_enabled](#zio_dva_throttle_enabled) | zfs_vdev_queue_depth_pct | Notes |---|--- | Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | uint32 | Units | I/O operations | Range | 1 to UINT32_MAX @@ -2658,7 +2921,7 @@ To ensure on-media consistency, keep cache flush enabled. | zfs_nocacheflush | Notes |---|--- | Tags | [disks](#disks) -| When to change | If the storage device has nonvolatile cache, then disabling cache flush can save the cost of occasional cache flush comamnds. +| When to change | If the storage device has nonvolatile cache, then disabling cache flush can save the cost of occasional cache flush comamnds | Data Type | boolean | Range | 0=send cache flush commands, 1=do not send cache flush commands | Default | 0 @@ -2735,7 +2998,7 @@ A value of zero will disable this throttle. | zfs_per_txg_dirty_frees_percent | Notes |---|--- | Tags | [delete](#delete) -| When to change | For `zfs receive` workloads, consider increasing or disabling. See section "ZFS I/O SCHEDULER" +| When to change | For `zfs receive` workloads, consider increasing or disabling. See section [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | ulong | Units | percent | Range | 0 to 100 @@ -2827,7 +3090,13 @@ Resilvers are processed by the sync thread in syncing context. While resilvering, ZFS spends at least `zfs_resilver_min_time_ms` time working on a resilver between txg commits. -See also zfs_txg_timeout. +The [zfs_txg_timeout](#zfs_txg_timeout) tunable sets a nominal timeout value +for the txg commits. By default, this timeout is 5 seconds and the `zfs_resilver_min_time_ms` +is 3 seconds. However, many variables contribute to changing the actual txg times. +The measured txg interval is observed as the `otime` column (in nanoseconds) in +the `/proc/spl/kstat/zfs/POOL_NAME/txgs` file. + +See also [zfs_txg_timeout](#zfs_txg_timeout) and [zfs_scan_min_time_ms](#zfs_scan_min_time_ms) | zfs_resilver_min_time_ms | Notes |---|--- @@ -2840,17 +3109,17 @@ See also zfs_txg_timeout. | Change | Dynamic | Versions Affected | all -### zfs_scrub_min_time_ms +### zfs_scan_min_time_ms Scrubs are processed by the sync thread in syncing context. While -scrubbing, ZFS spends at least `zfs_scrub_min_time_ms` time working on a -resilver between txg commits. +scrubbing, ZFS spends at least `zfs_scan_min_time_ms` time working on a +scrub between txg commits. -See also zfs_txg_timeout. +See also [zfs_txg_timeout](#zfs_txg_timeout) and [zfs_resilver_min_time_ms](#zfs_resilver_min_time_ms) -| zfs_scrub_min_time_ms | Notes +| zfs_scan_min_time_ms | Notes |---|--- | Tags | [scrub](#scrub) -| When to change | In some scrub cases, increasing `zfs_scrub_min_time_ms` can result in faster completion +| When to change | In some scrub cases, increasing `zfs_scan_min_time_ms` can result in faster completion | Data Type | int | Units | milliseconds | Range | 1 to [zfs_txg_timeout](#zfs_txg_timeout) converted to milliseconds @@ -2919,9 +3188,8 @@ instead of the newer sequential behavior. | zfs_scan_legacy | Notes |---|--- | Tags | [resilver](#resilver), [scrub](#scrub) -| When to change | TBD -| Data Type | TBD -| Units | TBD +| When to change | In some cases, the new scan mode can consumer more memory as it collects and sorts I/Os; using the legacy algorithm can be more memory efficient at the expense of HDD read efficiency +| Data Type | boolean | Range | 0=use new method: scrubs and resilvers will gather metadata in memory before issuing sequential I/O, 1=use legacy algorithm will be used where I/O is initiated as soon as it is discovered | Default | 0 | Change | Dynamic, however changing to 0 does not affect in-progress scrubs or resilvers @@ -3084,7 +3352,7 @@ DSL pool sync taskq, `dp_sync_taskq` | zfs_sync_taskq_batch_pct | Notes |---|--- | Tags | [SPA](#spa) -| When to change | To adjust the number of `dp_sync_taskq` threads +| When to change | to adjust the number of `dp_sync_taskq` threads | Data Type | int | Units | percent of number of online CPUs | Range | 1 to 100 @@ -3119,8 +3387,13 @@ txg commits can occur more frequently and a rapid rate of txg commits often indicates a busy write workload, quota limits reached, or the free space is critically low. +Many variables contribute to changing the actual txg times. txg commits can also take longer than `zfs_txg_timeout` if the ZFS write throttle -is not properly tuned or the time to sync is otherwise delayed (eg slow device) +is not properly tuned or the time to sync is otherwise delayed (eg slow device). +Shorter txg commit intervals can occur due to [zfs_dirty_data_sync](#zfs_dirty_data_sync) +for write-intensive workloads. +The measured txg interval is observed as the `otime` column (in nanoseconds) in +the `/proc/spl/kstat/zfs/POOL_NAME/txgs` file. See also [zfs_dirty_data_sync](#zfs_dirty_data_sync) and [zfs_txg_history](#zfs_txg_history) @@ -3128,7 +3401,7 @@ See also [zfs_dirty_data_sync](#zfs_dirty_data_sync) and | zfs_txg_timeout | Notes |---|--- | Tags | [SPA](#spa), [ZIO_scheduler](#zio_scheduler) -| When to change | To optimize the work done by txg commit relative to the pool requirements. See also section "ZFS I/O SCHEDULER" +| When to change | To optimize the work done by txg commit relative to the pool requirements. See also section [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | int | Units | seconds | Range | 1 to INT_MAX @@ -3151,7 +3424,9 @@ devices, such as modern HDDs, contain schedulers that can aggregate I/Os. In general, I/O aggregation can improve performance for devices, such as HDDs, where ordering I/O operations for contiguous LBAs is a benefit. For random access devices, such as SSDs, aggregation might not improve performance relative to the -CPU cycles needed to aggregate. +CPU cycles needed to aggregate. For devices that represent themselves as having +no rotation, the [zfs_vdev_aggregation_limit_non_rotating](#zfs_vdev_aggregation_limit_non_rotating) +parameter is used instead of `zfs_vdev_aggregation_limit` | zfs_vdev_aggregation_limit | Notes |---|--- @@ -3522,7 +3797,7 @@ The default value of 100% will create a maximum of one thread per cpu. | zfs_zil_clean_taskq_nthr_pct | Notes |---|--- -| Tags | [ZIL](#zil) +| Tags | [taskq](#taskq), [ZIL](#zil) | When to change | Testing ZIL clean and SPA sync performance | Data Type | int | Units | percent of number of CPUs @@ -3556,7 +3831,7 @@ to reduct potential log device abuse by a single active ZIL writer. | zil_slog_bulk | Notes |---|--- | Tags | [ZIL](#zil) -| When to change | See the section "ZFS I/O SCHEDULER" +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) | Data Type | ulong | Units | bytes | Range | 0 to ULONG_MAX @@ -3633,7 +3908,7 @@ require recompiling the kernel module to adjust. | zio_taskq_batch_pct | Notes |---|--- -| Tags | [ZIO_scheduler](#zio_scheduler) +| Tags | [taskq](#taskq), [ZIO_scheduler](#zio_scheduler) | When to change | To tune parallelism in multiprocessor systems | Data Type | int | Units | percent of number of CPUs @@ -4057,3 +4332,1570 @@ set to 1 to enable checking before scanning each block. | Change | Dynamic | Versions Affected | v0.8.0 +### zfs_send_queue_length +`zfs_send_queue_length` is the maximum number of bytes allowed in the zfs send queue. + +| zfs_send_queue_length| Notes +|---|--- +| Tags | [send](#send) +| When to change | When using the largest recordsize or volblocksize (16 MiB), increasing can improve send efficiency +| Data Type | int +| Units | bytes +| Range | Must be at least twice the maximum recordsize or volblocksize in use +| Default | 16,777,216 bytes (16 MiB) +| Change | Dynamic +| Versions Affected | v0.8.1 + +### zfs_recv_queue_length +`zfs_recv_queue_length` is the maximum number of bytes allowed in the zfs receive queue. + + +| zfs_recv_queue_length | Notes +|---|--- +| Tags | [receive](#receive) +| When to change | When using the largest recordsize or volblocksize (16 MiB), increasing can improve receive efficiency +| Data Type | int +| Units | bytes +| Range | Must be at least twice the maximum recordsize or volblocksize in use +| Default | 16,777,216 bytes (16 MiB) +| Change | Dynamic +| Versions Affected | v0.8.1 + +### zfs_arc_min_prefetch_lifespan +`arc_min_prefetch_lifespan` is the minimum time for a prefetched block to remain in ARC before +it is eligible for eviction. + +| zfs_arc_min_prefetch_lifespan | Notes +|---|--- +| Tags | [ARC](#ARC) +| When to change | TBD +| Data Type | int +| Units | clock ticks +| Range | 0 = use default value +| Default | 1 second (as expressed in clock ticks) +| Change | Dynamic +| Versions Affected | v0.7.0 + +### zfs_scan_ignore_errors +`zfs_scan_ignore_errors` allows errors discovered during scrub or resilver to be +ignored. This can be tuned as a workaround to remove the dirty time list (DTL) +when completing a pool scan. It is intended to be used during pool repair or +recovery to prevent resilvering when the pool is imported. + +| zfs_scan_ignore_errors | Notes +|---|--- +| Tags | [resilver](#resilver) +| When to change | See description above +| Data Type | boolean +| Range | 0 = do not ignore errors, 1 = ignore errors during pool scrub or resilver +| Default | 0 +| Change | Dynamic +| Versions Affected | v0.8.1 + +### zfs_top_maxinflight +`zfs_top_maxinflight` is used to limit the maximum number of I/Os queued to top-level +vdevs during scrub or resilver operations. The actual top-level vdev limit is calculated +by multiplying the number of child vdevs by `zfs_top_maxinflight` This limit is an +additional cap over and above the scan limits + +| zfs_top_maxinflight | Notes +|---|--- +| Tags | [resilver](#resilver), [scrub](#scrub), [ZIO_scheduler](#zio_scheduler) +| When to change | for modern ZFS versions, the ZIO scheduler limits usually take precedence +| Data Type | int +| Units | I/O operations +| Range | 1 to MAX_INT +| Default | 32 +| Change | Dynamic +| Versions Affected | v0.6.0 + +### zfs_resilver_delay +`zfs_resilver_delay` sets a time-based delay for resilver I/Os. This delay is +in addition to the ZIO scheduler's treatement of scrub workloads. See also +[zfs_scan_idle](#zfs_scan_idle) + +| zfs_resilver_delay | Notes +|---|--- +| Tags | [resilver](#resilver), [ZIO_scheduler](#zio_scheduler) +| When to change | increasing can reduce impact of resilver workload on dynamic workloads +| Data Type | int +| Units | clock ticks +| Range | 0 to MAX_INT +| Default | 2 +| Change | Dynamic +| Versions Affected | v0.6.0 + +### zfs_scrub_delay +`zfs_scrub_delay` sets a time-based delay for scrub I/Os. This delay is +in addition to the ZIO scheduler's treatment of scrub workloads. See also +[zfs_scan_idle](#zfs_scan_idle) + +| zfs_scrub_delay | Notes +|---|--- +| Tags | [scrub](#scrub), [ZIO_scheduler](#zio_scheduler) +| When to change | increasing can reduce impact of scrub workload on dynamic workloads +| Data Type | int +| Units | clock ticks +| Range | 0 to MAX_INT +| Default | 4 +| Change | Dynamic +| Versions Affected | v0.6.0 + +### zfs_scan_idle +When a non-scan I/O has occurred in the past `zfs_scan_idle` clock ticks, then +[zfs_resilver_delay](#zfs_resilver_delay) or [zfs_scrub_delay](#zfs_scrub_delay) +are enabled. + +| zfs_scan_idle | Notes +|---|--- +| Tags | [resilver](#resilver), [scrub](#scrub), [ZIO_scheduler](#zio_scheduler) +| When to change | as part of a resilver/scrub tuning effort +| Data Type | int +| Units | clock ticks +| Range | 0 to MAX_INT +| Default | 50 +| Change | Dynamic +| Versions Affected | v0.6.0 + +### icp_aes_impl +By default, ZFS will choose the highest performance, hardware-optimized implementation of the +AES encryption algorithm. The `icp_aes_impl` tunable overrides this automatic choice. + +Note: `icp_aes_impl` is set in the `icp` kernel module, not the `zfs` kernel module. + +To observe the available options `cat /sys/module/icp/parameters/icp_aes_impl` +The default option is shown in brackets '[]' + +| icp_aes_impl | Notes +|---|--- +| Tags | [encryption](#encryption) +| Kernel module | icp +| When to change | debugging ZFS encryption on hardware +| Data Type | string +| Range | varies by hardware +| Default | automatic, depends on the hardware +| Change | dynamic +| Versions Affected | planned for v2 + +### icp_gcm_impl +By default, ZFS will choose the highest performance, hardware-optimized implementation of the +GCM encryption algorithm. The `icp_gcm_impl` tunable overrides this automatic choice. + +Note: `icp_gcm_impl` is set in the `icp` kernel module, not the `zfs` kernel module. + +To observe the available options `cat /sys/module/icp/parameters/icp_gcm_impl` +The default option is shown in brackets '[]' + +| icp_gcm_impl | Notes +|---|--- +| Tags | [encryption](#encryption) +| Kernel module | icp +| When to change | debugging ZFS encryption on hardware +| Data Type | string +| Range | varies by hardware +| Default | automatic, depends on the hardware +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_abd_scatter_min_size +`zfs_abd_scatter_min_size` changes the ARC buffer data (ABD) allocator's threshold +for using linear or page-based scatter buffers. Allocations smaller than `zfs_abd_scatter_min_size` +use linear ABDs. + +Scatter ABD's use at least one page each, so sub-page allocations waste some space +when allocated as scatter allocations. For example, 2KB scatter allocation wastes +half of each page. +Using linear ABD's for small allocations results in slabs containing many allocations. +This can improve memory efficiency, at the expense of more work for ARC evictions +attempting to free pages, because all the buffers on one slab +need to be freed in order to free the slab and its underlying pages. + +Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's possible +for them to actually waste more memory than scatter allocations: +* one page per buf = wasting 3/4 or 7/8 +* one buf per slab = wasting 15/16 + +Spill blocks are typically 512B and are heavily used on systems running _selinux_ +with the default dnode size and the `xattr=sa` property set. + +By default, linear allocations for 512B and 1KB, and scatter allocations for +larger (>= 1.5KB) allocation requests. + +| zfs_abd_scatter_min_size | Notes +|---|--- +| Tags | [ARC](#ARC) +| When to change | debugging memory allocation, especially for large pages +| Data Type | int +| Units | bytes +| Range | 0 to MAX_INT +| Default | 1536 (512B and 1KB allocations will be linear) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_unlink_suspend_progress +`zfs_unlink_suspend_progress` changes the policy for removing pending unlinks. +When enabled, files will not be asynchronously removed from the list of pending +unlinks and the space they consume will be leaked. Once this option has been +disabled and the dataset is remounted, the pending unlinks will be processed +and the freed space returned to the pool. + +| zfs_unlink_suspend_progress | Notes +|---|--- +| Tags | +| When to change | used by the ZFS test suite (ZTS) to facilitate testing +| Data Type | boolean +| Range | 0 = use async unlink removal, 1 = do not async unlink thus leaking space +| Default | 0 +| Change | prior to dataset mount +| Versions Affected | planned for v2 + +### spa_load_verify_shift +`spa_load_verify_shift` sets the fraction of ARC that can be used by inflight I/Os when +verifying the pool during import. This value is a "shift" representing the fraction +of ARC target size (`grep -w c /proc/spl/kstat/zfs/arcstats`). The ARC target size is +shifted to the right. Thus a value of '2' results in the fraction = 1/4, while a value +of '4' results in the fraction = 1/8. + +For large memory machines, pool import can consume large amounts of ARC: much larger than +the value of maxinflight. This can result in [spa_load_verify_maxinflight](#spa_load_verify_maxinflight) +having a value of 0 causing the system to hang. +Setting `spa_load_verify_shift` can reduce this limit and allow importing without hanging. + +| spa_load_verify_shift | Notes +|---|--- +| Tags | [import](#import), [ARC](#ARC), [SPA](#SPA) +| When to change | troubleshooting pool import on large memory machines +| Data Type | int +| Units | shift +| Range | 1 to MAX_INT +| Default | 4 +| Change | prior to importing a pool +| Versions Affected | planned for v2 + +### spa_load_print_vdev_tree +`spa_load_print_vdev_tree` enables printing of the attempted pool import's vdev tree to +kernel message to the ZFS debug message log `/proc/spl/kstat/zfs/dbgmsg` +Both the provided vdev tree and MOS vdev tree are printed, which can be useful +for debugging problems with the zpool `cachefile` + +| spa_load_print_vdev_tree | Notes +|---|--- +| Tags | [import](#import), [SPA](#SPA) +| When to change | troubleshooting pool import failures +| Data Type | boolean +| Range | 0 = do not print pool configuration in logs, 1 = print pool configuration in logs +| Default | 0 +| Change | prior to pool import +| Versions Affected | planned for v2 + +### zfs_max_missing_tvds +When importing a pool in readonly mode (`zpool import -o readonly=on ...`) +then up to `zfs_max_missing_tvds` top-level vdevs can be missing, but the +import can attempt to progress. + +Note: This is strictly intended for advanced pool recovery cases since +missing data is almost inevitable. Pools with missing devices can only be imported +read-only for safety reasons, and the pool's `failmode` property is automatically +set to `continue` + +The expected use case is to recover pool data immediately after accidentally adding a +non-protected vdev to a protected pool. + +* With 1 missing top-level vdev, ZFS should be able to import the pool and mount all + datasets. User data that was not modified after the missing device has been + added should be recoverable. Thus snapshots created prior to the + addition of that device should be completely intact. + +* With 2 missing top-level vdevs, some datasets may fail to mount since there are + dataset statistics that are stored as regular metadata. Some data might be + recoverable if those vdevs were added recently. + +* With 3 or more top-level missing vdevs, the pool is severely damaged and MOS entries + may be missing entirely. Chances of data recovery are very low. Note that + there are also risks of performing an inadvertent rewind as we might be + missing all the vdevs with the latest uberblocks. + +| zfs_max_missing_tvds | Notes +|---|--- +| Tags | [import](#import) +| When to change | troubleshooting pools with missing devices +| Data Type | int +| Units | missing top-level vdevs +| Range | 0 to MAX_INT +| Default | 0 +| Change | prior to pool import +| Versions Affected | planned for v2 + +### dbuf_metadata_cache_shift +`dbuf_metadata_cache_shift` sets the size of the dbuf metadata cache +as a fraction of ARC target size. This is an alternate method for setting dbuf metadata +cache size than [dbuf_metadata_cache_max_bytes](#dbuf_metadata_cache_max_bytes). + +[dbuf_metadata_cache_max_bytes](#dbuf_metadata_cache_max_bytes) overrides `dbuf_metadata_cache_shift` + +This value is a "shift" representing the fraction +of ARC target size (`grep -w c /proc/spl/kstat/zfs/arcstats`). The ARC target size is +shifted to the right. Thus a value of '2' results in the fraction = 1/4, while a value +of '6' results in the fraction = 1/64. + +| dbuf_metadata_cache_shift | Notes +|---|--- +| Tags | [ARC](#ARC), [dbuf_cache](#dbuf_cache) +| When to change | +| Data Type | int +| Units | shift +| Range | practical range is ([dbuf_cache_shift](#dbuf_cache_shift) + 1) to MAX_INT +| Default | 6 +| Change | Dynamic +| Versions Affected | planned for v2 + +### dbuf_metadata_cache_max_bytes +`dbuf_metadata_cache_max_bytes` sets the size of the dbuf metadata cache +as a number of bytes. This is an alternate method for setting dbuf metadata +cache size than [dbuf_metadata_cache_shift](#dbuf_metadata_cache_shift) + +[dbuf_metadata_cache_max_bytes](#dbuf_metadata_cache_max_bytes) overrides `dbuf_metadata_cache_shift` + +| dbuf_metadata_cache_max_bytes | Notes +|---|--- +| Tags | [dbuf_cache](#dbuf_cache) +| When to change | +| Data Type | int +| Units | bytes +| Range | 0 = use [dbuf_metadata_cache_shift](#dbuf_metadata_cache_shift) to ARC `c_max` +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### dbuf_cache_shift +`dbuf_cache_shift` sets the size of the dbuf cache as a fraction of ARC target size. +This is an alternate method for setting dbuf +cache size than [dbuf_cache_max_bytes](#dbuf_cache_max_bytes). + +[dbuf_cache_max_bytes](#dbuf_cache_max_bytes) overrides `dbuf_cache_shift` + +This value is a "shift" representing the fraction +of ARC target size (`grep -w c /proc/spl/kstat/zfs/arcstats`). The ARC target size is +shifted to the right. Thus a value of '2' results in the fraction = 1/4, while a value +of '5' results in the fraction = 1/32. + +Performance tuning of dbuf cache can be monitored using: + * `dbufstat` command + * [node_exporter](https://github.com/prometheus/node_exporter) ZFS module for prometheus environments + * [telegraf](https://github.com/influxdata/telegraf) ZFS plugin for general-purpose metric collection + * `/proc/spl/kstat/zfs/dbufstats` kstat + +| dbuf_cache_shift | Notes +|---|--- +| Tags | [ARC](#ARC), [dbuf_cache](#dbuf_cache) +| When to change | to improve performance of read-intensive channel programs +| Data Type | int +| Units | shift +| Range | 5 to MAX_INT +| Default | 5 +| Change | Dynamic +| Versions Affected | planned for v2 + +### dbuf_cache_max_bytes +`dbuf_cache_max_bytes` sets the size of the dbuf cache in bytes. +This is an alternate method for setting dbuf cache size than +[dbuf_cache_shift](#dbuf_cache_shift) + +Performance tuning of dbuf cache can be monitored using: + * `dbufstat` command + * [node_exporter](https://github.com/prometheus/node_exporter) ZFS module for prometheus environments + * [telegraf](https://github.com/influxdata/telegraf) ZFS plugin for general-purpose metric collection + * `/proc/spl/kstat/zfs/dbufstats` kstat + +| dbuf_cache_max_bytes | Notes +|---|--- +| Tags | [ARC](#ARC), [dbuf_cache](#dbuf_cache) +| When to change | +| Data Type | int +| Units | bytes +| Range | 0 = use [dbuf_cache_shift](#dbuf_cache_shift) to ARC `c_max` +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### metaslab_force_ganging +When testing allocation code, `metaslab_force_ganging` forces blocks above the specified size to be ganged. + +| metaslab_force_ganging | Notes +|---|--- +| Tags | [allocation](#allocation) +| When to change | for development testing purposes only +| Data Type | ulong +| Units | bytes +| Range | SPA_MINBLOCKSIZE to (SPA_MAXBLOCKSIZE + 1) +| Default | SPA_MAXBLOCKSIZE + 1 (16,777,217 bytes) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_default_ms_count +When adding a top-level vdev, `zfs_vdev_default_ms_count` is the target number of metaslabs. + +| zfs_vdev_default_ms_count | Notes +|---|--- +| Tags | [allocation](#allocation) +| When to change | for development testing purposes only +| Data Type | int +| Range | 16 to MAX_INT +| Default | 200 +| Change | prior to creating a pool or adding a top-level vdev +| Versions Affected | planned for v2 + +### vdev_removal_max_span +During top-level vdev removal, chunks of data are copied from the vdev +which may include free space in order to trade bandwidth for IOPS. +`vdev_removal_max_span` sets the maximum span of free space +included as unnecessary data in a chunk of copied data. + +| vdev_removal_max_span | Notes +|---|--- +| Tags | [vdev_removal](#vdev_removal) +| When to change | TBD +| Data Type | int +| Units | bytes +| Range | 0 to MAX_INT +| Default | 32,768 (32 MiB) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_removal_ignore_errors +When removing a device, `zfs_removal_ignore_errors` controls the process for +handling hard I/O errors. When set, if a device encounters +a hard IO error during the removal process the removal will not be cancelled. +This can result in a normally recoverable block becoming permanently damaged +and is not recommended. This should only be used as a last resort when the +pool cannot be returned to a healthy state prior to removing the device. + +| zfs_removal_ignore_errors | Notes +|---|--- +| Tags | [vdev_removal](#vdev_removal) +| When to change | See description for caveat +| Data Type | boolean +| Range | during device removal: 0 = hard errors are not ignored, 1 = hard errors are ignored +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_removal_suspend_progress +`zfs_removal_suspend_progress` is used during automated testing of the ZFS code to +incease test coverage. + +| zfs_removal_suspend_progress | Notes +|---|--- +| Tags | [vdev_removal](#vdev_removal) +| When to change | do not change +| Data Type | boolean +| Range | 0 = do not suspend during vdev removal +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_condense_indirect_commit_entry_delay_ms +During vdev removal, the vdev indirection layer sleeps for `zfs_condense_indirect_commit_entry_delay_ms` +milliseconds during mapping geenration. This parameter is used during automated testing of the +ZFS code to improve test coverage. + +| zfs_condense_indirect_commit_entry_delay_ms | Notes +|---|--- +| Tags | [vdev_removal](#vdev_removal) +| When to change | do not change +| Data Type | int +| Units | milliseconds +| Range | 0 to MAX_INT +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_condense_indirect_vdevs_enable +During vdev removal, condensing process is an attempt to save memory by removing obsolete mappings. +`zfs_condense_indirect_vdevs_enable` enables condensing indirect vdev mappings. +When set, ZFS attempts to condense indirect vdev mappings if the mapping uses more than +[zfs_condense_min_mapping_bytes](#zfs_condense_min_mapping_bytes) bytes of memory and +if the obsolete space map object uses more than +[zfs_condense_max_obsolete_bytes](#zfs_condense_max_obsolete_bytes) bytes on disk. + +| zfs_condense_indirect_vdevs_enable | Notes +|---|--- +| Tags | [vdev_removal](#vdev_removal) +| When to change | TBD +| Data Type | boolean +| Range | 0 = do not save memory, 1 = save memory by condensing obsolete mapping after vdev removal +| Default | 1 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_condense_max_obsolete_bytes +After vdev removal, `zfs_condense_max_obsolete_bytes` sets the limit for beginning the condensing +process. Condensing begins if the obsolete space map takes up more than `zfs_condense_max_obsolete_bytes` +of space on disk (logically). The default of 1 GiB is small enough relative to a typical pool that the +space consumed by the obsolete space map is minimal. + +See also [zfs_condense_indirect_vdevs_enable](#zfs_condense_indirect_vdevs_enable) + +| zfs_condense_max_obsolete_bytes | Notes +|---|--- +| Tags | [vdev_removal](#vdev_removal) +| When to change | no not change +| Data Type | ulong +| Units | bytes +| Range | 0 to MAX_ULONG +| Default | 1,073,741,824 (1 GiB) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_condense_min_mapping_bytes +After vdev removal, `zfs_condense_min_mapping_bytes` is the lower limit for determining when +to condense the in-memory obsolete space map. The condensing process will not continue unless a minimum of +`zfs_condense_min_mapping_bytes` of memory can be freed. + +See also [zfs_condense_indirect_vdevs_enable](#zfs_condense_indirect_vdevs_enable) + +| zfs_condense_min_mapping_bytes | Notes +|---|--- +| Tags | [vdev_removal](#vdev_removal) +| When to change | do not change +| Data Type | ulong +| Units | bytes +| Range | 0 to MAX_ULONG +| Default | 128 KiB +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_initializing_max_active +`zfs_vdev_initializing_max_active` sets the maximum initializing I/Os active to each device. + +| zfs_vdev_initializing_max_active | Notes +|---|--- +| Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) +| Data Type | uint32 +| Units | I/O operations +| Range | 1 to [zfs_vdev_max_active](#zfs_vdev_max_active) +| Default | 1 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_initializing_min_active +`zfs_vdev_initializing_min_active` sets the minimum initializing I/Os active to each device. + +| zfs_vdev_initializing_min_active | Notes +|---|--- +| Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) +| Data Type | uint32 +| Units | I/O operations +| Range | 1 to [zfs_vdev_initializing_max_active](#zfs_vdev_initializing_max_active) +| Default | 1 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_removal_max_active +`zfs_vdev_removal_max_active` sets the maximum top-level vdev removal I/Os active to each device. + +| zfs_vdev_removal_max_active | Notes +|---|--- +| Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) +| Data Type | uint32 +| Units | I/O operations +| Range | 1 to [zfs_vdev_max_active](#zfs_vdev_max_active) +| Default | 2 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_removal_min_active +`zfs_vdev_removal_min_active` sets the minimum top-level vdev removal I/Os active to each device. + +| zfs_vdev_removal_min_active | Notes +|---|--- +| Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) +| Data Type | uint32 +| Units | I/O operations +| Range | 1 to [zfs_vdev_removal_max_active](#zfs_vdev_removal_max_active) +| Default | 1 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_trim_max_active +`zfs_vdev_trim_max_active` sets the maximum trim I/Os active to each device. + +| zfs_vdev_trim_max_active | Notes +|---|--- +| Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) +| Data Type | uint32 +| Units | I/O operations +| Range | 1 to [zfs_vdev_max_active](#zfs_vdev_max_active) +| Default | 2 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_trim_min_active +`zfs_vdev_trim_min_active` sets the minimum trim I/Os active to each device. + +| zfs_vdev_trim_min_active | Notes +|---|--- +| Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) +| When to change | See [ZFS I/O Scheduler](https://github.com/zfsonlinux/zfs/wiki/ZIO-Scheduler) +| Data Type | uint32 +| Units | I/O operations +| Range | 1 to [zfs_vdev_trim_max_active](#zfs_vdev_trim_max_active) +| Default | 1 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_initialize_value +When initializing a vdev, ZFS writes patterns of `zfs_initialize_value` bytes to the device. + +| zfs_initialize_value | Notes +|---|--- +| Tags | [vdev_initialize](#vdev_initialize) +| When to change | when debugging initialization code +| Data Type | uint32 or uint64 +| Default | 0xdeadbeef for 32-bit systems, 0xdeadbeefdeadbeee for 64-bit systems +| Change | prior to running `zpool initialize` +| Versions Affected | planned for v2 + +### zfs_lua_max_instrlimit +`zfs_lua_max_instrlimit` limits the maximum time for a ZFS channel program to run. + +| zfs_lua_max_instrlimit | Notes +|---|--- +| Tags | [channel_programs](#channel_programs) +| When to change | to enforce a CPU usage limit on ZFS channel programs +| Data Type | ulong +| Units | LUA instructions +| Range | 0 to MAX_ULONG +| Default | 100,000,000 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_lua_max_memlimit +'zfs_lua_max_memlimit' is the maximum memory limit for a ZFS channel program. + +| zfs_lua_max_memlimit | Notes +|---|--- +| Tags | [channel_programs](#channel_programs) +| When to change | +| Data Type | ulong +| Units | bytes +| Range | 0 to MAX_ULONG +| Default | 104,857,600 (100 MiB) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_max_dataset_nesting +`zfs_max_dataset_nesting` limits the depth of nested datasets. +Deeply nested datasets can overflow the stack. The maximum stack depth depends on kernel compilation options, +so it is impractical to predict the possible limits. For kernels compiled with small stack sizes, +`zfs_max_dataset_nesting` may require changes. + +| zfs_max_dataset_nesting | Notes +|---|--- +| Tags | [dataset](#dataset) +| When to change | can be tuned temporarily to fix existing datasets that exceed the predefined limit +| Data Type | int +| Units | datasets +| Range | 0 to MAX_INT +| Default | 50 +| Change | Dynamic, though once on-disk the value for the pool is set +| Versions Affected | planned for v2 + +### zfs_ddt_data_is_special +`zfs_ddt_data_is_special` enables the deduplication table (DDT) to reside on a special top-level vdev. + +| zfs_ddt_data_is_special | Notes +|---|--- +| Tags | [dedup](#dedup), [special_vdev](#special_vdev) +| When to change | when using a special top-level vdev and no dedup top-level vdev and it is desired to store the DDT in the main pool top-level vdevs +| Data Type | boolean +| Range | 0=do not use special vdevs to store DDT, 1=store DDT in special vdevs +| Default | 1 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_user_indirect_is_special +If special vdevs are in use, `zfs_user_indirect_is_special` enables user data indirect blocks (a form of metadata) +to be written to the special vdevs. + +| zfs_user_indirect_is_special | Notes +|---|--- +| Tags | [special_vdev](#special_vdev) +| When to change | to force user data indirect blocks to remain in the main pool top-level vdevs +| Data Type | boolean +| Range | 0=do not write user indirect blocks to a special vdev, 1=write user indirect blocks to a special vdev +| Default | 1 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_reconstruct_indirect_combinations_max +After device removal, if an indirect split block contains more than `zfs_reconstruct_indirect_combinations_max` +many possible unique combinations when being reconstructed, it can be considered too computationally +expensive to check them all. Instead, at most `zfs_reconstruct_indirect_combinations_max` randomly-selected +combinations are attempted each time the block is accessed. This allows all segment +copies to participate fairly in the reconstruction when all combinations +cannot be checked and prevents repeated use of one bad copy. + +| zfs_reconstruct_indirect_combinations_max | Notes +|---|--- +| Tags | [vdev_removal](#vdev_removal) +| When to change | TBD +| Data Type | int +| Units | attempts +| Range | 0=do not limit attempts, 1 to MAX_INT = limit for attempts +| Default | 4096 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_send_unmodified_spill_blocks +`zfs_send_unmodified_spill_blocks` enables sending of unmodified spill blocks in the send stream. +Under certain circumstances, previous versions of ZFS could incorrectly remove the spill block from an +existing object. Including unmodified copies of the spill blocks creates a +backwards compatible stream which will recreate a spill block if it was incorrectly removed. + +| zfs_send_unmodified_spill_blocks | Notes +|---|--- +| Tags | [send](#send) +| When to change | TBD +| Data Type | boolean +| Range | 0=do not send unmodified spill blocks, 1=send unmodified spill blocks +| Default | 1 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_spa_discard_memory_limit +`zfs_spa_discard_memory_limit` sets the limit for maximum memory used for prefetching a +pool's checkpoint space map on each vdev while discarding a pool checkpoint. + +| zfs_spa_discard_memory_limit | Notes +|---|--- +| Tags | [checkpoint](#checkpoint) +| When to change | TBD +| Data Type | int +| Units | bytes +| Range | 0 to MAX_INT +| Default | 16,777,216 (16 MiB) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_special_class_metadata_reserve_pct +`zfs_special_class_metadata_reserve_pct` sets a threshold for space in special vdevs to be reserved exclusively +for metadata. This prevents small blocks or dedup table from completely consuming a special vdev. + +| zfs_special_class_metadata_reserve_pct | Notes +|---|--- +| Tags | [special_vdev](#special_vdev) +| When to change | TBD +| Data Type | int +| Units | percent +| Range | 0 to 100 +| Default | 25 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_trim_extent_bytes_max +`zfs_trim_extent_bytes_max` sets the maximum size of a trim (aka discard, scsi unmap) command. +Ranges larger than `zfs_trim_extent_bytes_max` are split in to chunks no larger than `zfs_trim_extent_bytes_max` +bytes prior to being issued to the device. +Use `zpool iostat -w` to observe the latency of trim commands. + +| zfs_trim_extent_bytes_max | Notes +|---|--- +| Tags | [trim](#trim) +| When to change | if the device can efficiently handle larger trim requests +| Data Type | uint +| Units | bytes +| Range | [zfs_trim_extent_bytes_min](#zfs_trim_extent_bytes_min) to MAX_UINT +| Default | 134,217,728 (128 MiB) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_trim_extent_bytes_min +`zfs_trim_extent_bytes_min` sets the minimum size of trim (aka discard, scsi unmap) commands. +Trim ranges smaller than `zfs_trim_extent_bytes_min` are skipped unless they're part of a larger +range which was broken in to chunks. Some devices have performance degradation during trim operations, +so using a larger `zfs_trim_extent_bytes_min` can reduce the total amount of space trimmed. +Use `zpool iostat -w` to observe the latency of trim commands. + +| zfs_trim_extent_bytes_min | Notes +|---|--- +| Tags | [trim](#trim) +| When to change | when trim is in use and device performance suffers from trimming small allocations +| Data Type | uint +| Units | bytes +| Range | 0=trim all unallocated space, otherwise minimum physical block size to MAX_ +| Default | 32,768 (32 KiB) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_trim_metaslab_skip +`zfs_trim_metaslab_skip` enables uninitialized metaslabs to be skipped during the trim (aka discard, scsi unmap) +process. `zfs_trim_metaslab_skip` can be useful for pools constructed from large thinly-provisioned devices where trim +operations perform slowly. +As a pool ages an increasing fraction of the pool's metaslabs are initialized, progressively degrading the +usefulness of this option. +This setting is stored when starting a manual trim and persists for the duration of the requested trim. +Use `zpool iostat -w` to observe the latency of trim commands. + +| zfs_trim_metaslab_skip | Notes +|---|--- +| Tags | [trim](#trim) +| When to change | +| Data Type | boolean +| Range | 0=do not skip unitialized metaslabs during trim, 1=skip unitialized metaslabs during trim +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_trim_queue_limit +`zfs_trim_queue_limit` sets the maximum queue depth for leaf vdevs. +See also [zfs_vdev_trim_max_active](#zfs_vdev_trim_max_active) and +[zfs_trim_extent_bytes_max](#zfs_trim_extent_bytes_max) +Use `zpool iostat -q` to observe trim queue depth. + +| zfs_trim_queue_limit | Notes +|---|--- +| Tags | [trim](#trim) +| When to change | to restrict the number of trim commands in the queue +| Data Type | uint +| Units | I/O operations +| Range | 1 to MAX_UINT +| Default | 10 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_trim_txg_batch +`zfs_trim_txg_batch` sets the number of transaction groups worth of frees which should be aggregated +before trim (aka discard, scsi unmap) commands are issued to a device. This setting represents a +trade-off between issuing larger, more efficient trim commands and the +delay before the recently trimmed space is available for use by the device. + +Increasing this value will allow frees to be aggregated for a longer time. +This will result is larger trim operations and potentially increased memory +usage. Decreasing this value will have the opposite effect. The default +value of 32 was empirically determined to be a reasonable compromise. + +| zfs_trim_txg_batch | Notes +|---|--- +| Tags | [trim](#trim) +| When to change | TBD +| Data Type | uint +| Units | metaslabs to stride +| Range | 1 to MAX_UINT +| Default | 32 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_aggregate_trim +`zfs_vdev_aggregate_trim` allows trim I/Os to be aggregated. This is normally not helpful because +the extents to be trimmed will have been already been aggregated by the metaslab. + + +| zfs_vdev_aggregate_trim | Notes +|---|--- +| Tags | [trim](#trim), [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) +| When to change | when debugging trim code or trim performance issues +| Data Type | boolean +| Range | 0=do not attempt to aggregate trim commands, 1=attempt to aggregate trim commands +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_aggregation_limit_non_rotating +`zfs_vdev_aggregation_limit_non_rotating` is the equivalent of +[zfs_vdev_aggregation_limit](#zfs_vdev_aggregation_limit) for devices +which represent themselves as non-rotating to the Linux blkdev interfaces. +Such devices have a value of 0 in `/sys/block/DEVICE/queue/rotational` and are expected to be SSDs. + +| zfs_vdev_aggregation_limit_non_rotating | Notes +|---|--- +| Tags | [vdev](#vdev), [ZIO_scheduler](#zio_scheduler) +| When to change | see [zfs_vdev_aggregation_limit](#zfs_vdev_aggregation_limit) +| Data Type | int +| Units | bytes +| Range | 0 to MAX_INT +| Default | 131,072 bytes (128 KiB) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zil_nocacheflush +ZFS uses barriers (volatile cache flush commands) to ensure data is committed to +permanent media by devices. This ensures consistent on-media state for devices +where caches are volatile (eg HDDs). + +`zil_nocacheflush` disables the cache flush commands that are normally sent to devices by +the ZIL after a log write has completed. + +The difference between `zil_nocacheflush` and [zfs_nocacheflush](#zfs_nocacheflush) is +`zil_nocacheflush` applies to ZIL writes while [zfs_nocacheflush](#zfs_nocacheflush) disables +barrier writes to the pool devices at the end of tranaction group syncs. + +WARNING: setting this can cause ZIL corruption on power loss if the device has a volatile write cache. + + +| zil_nocacheflush | Notes +|---|--- +| Tags | [disks](#disks), [ZIL](#ZIL) +| When to change | If the storage device has nonvolatile cache, then disabling cache flush can save the cost of occasional cache flush comamnds +| Data Type | boolean +| Range | 0=send cache flush commands, 1=do not send cache flush commands +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zio_deadman_log_all +`zio_deadman_log_all` enables debugging messages for all ZFS I/Os, rather than only for leaf +ZFS I/Os for a vdev. This is meant to be used by developers to gain diagnostic information for hang +conditions which don't involve a mutex or other locking primitive. Typically these are conditions where a thread in +the zio pipeline is looping indefinitely. + +See also [zfs_dbgmsg_enable](#zfs_dbgmsg_enable) + +| zio_deadman_log_all | Notes +|---|--- +| Tags | [debug](#debug) +| When to change | when debugging ZFS I/O pipeline +| Data Type | boolean +| Range | 0=do not log all deadman events, 1=log all deadman events +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zio_decompress_fail_fraction +If non-zero, `zio_decompress_fail_fraction` represents the denominator of the probability that ZFS +should induce a decompression failure. For instance, for a 5% decompression failure rate, this value +should be set to 20. + +| zio_decompress_fail_fraction | Notes +|---|--- +| Tags | [debug](#debug) +| When to change | when debugging ZFS internal compressed buffer code +| Data Type | ulong +| Units | probability of induced decompression failure is 1/`zio_decompress_fail_fraction` +| Range | 0 = do not induce failures, or 1 to MAX_ULONG +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zio_slow_io_ms +An I/O operation taking more than `zio_slow_io_ms` milliseconds to complete is marked as a slow I/O. +Slow I/O counters can be observed with `zpool status -s`. +Each slow I/O causes a delay zevent, observable using `zpool events`. +See also `zfs-events(5)`. + +| zio_slow_io_ms | Notes +|---|--- +| Tags | [vdev](#vdev), [zed](#zed) +| When to change | when debugging slow devices and the default value is inappropriate +| Data Type | int +| Units | milliseconds +| Range | 0 to MAX_INT +| Default | 30,000 (30 seconds) +| Change | Dynamic +| Versions Affected | planned for v2 + +### vdev_validate_skip +`vdev_validate_skip` disables label validation steps during pool import. +Changing is not recommended unless you know what you are doing and are recovering a damaged label. + +| vdev_validate_skip | Notes +|---|--- +| Tags | [vdev](#vdev) +| When to change | do not change +| Data Type | boolean +| Range | 0=validate labels during pool import, 1=do not validate vdev labels during pool import +| Default | 0 +| Change | prior to pool import +| Versions Affected | planned for v2 + +### zfs_async_block_max_blocks +`zfs_async_block_max_blocks` limits the number of blocks freed in a single transaction group commit. +During deletes of large objects, such as snapshots, the number of freed blocks can cause the DMU +to extend txg sync times well beyond [zfs_txg_timeout](#zfs_txg_timeout). `zfs_async_block_max_blocks` +is used to limit these effects. + +| zfs_async_block_max_blocks | Notes +|---|--- +| Tags | [delete](#delete), [DMU](#DMU) +| When to change | TBD +| Data Type | ulong +| Units | blocks +| Range | 1 to MAX_ULONG +| Default | MAX_ULONG (do not limit) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_checksum_events_per_second +`zfs_checksum_events_per_second` is a rate limit for checksum events. +Note that this should not be set below the `zed` thresholds (currently 10 checksums over 10 sec) +or else `zed` may not trigger any action. + +| zfs_checksum_events_per_second | Notes +|---|--- +| Tags | [vdev](#vdev) +| When to change | TBD +| Data Type | uint +| Units | checksum events +| Range | `zed` threshold to MAX_UINT +| Default | 20 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_disable_ivset_guid_check +`zfs_disable_ivset_guid_check` disables requirement for IVset guids to be present and match when doing a raw +receive of encrypted datasets. Intended for users whose pools were created with +ZFS on Linux pre-release versions and now have compatibility issues. + +For a ZFS raw receive, from a send stream created by `zfs send --raw`, the crypt_keydata nvlist includes +a to_ivset_guid to be set on the new snapshot. This value will override the value generated by the snapshot code. +However, this value may not be present, because older implementations of +the raw send code did not include this value. +When `zfs_disable_ivset_guid_check` is enabled, the receive proceeds and a newly-generated value is used. + +| zfs_disable_ivset_guid_check | Notes +|---|--- +| Tags | [receive](#receive) +| When to change | debugging pre-release ZFS raw sends +| Data Type | boolean +| Range | 0=check IVset guid, 1=do not check IVset guid +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_obsolete_min_time_ms +`zfs_obsolete_min_time_ms` is similar to [zfs_free_min_time_ms](#zfs_free_min_time_ms) +and used for cleanup of old indirection records for vdevs removed using the `zpool remove` command. + +| zfs_obsolete_min_time_ms | Notes +|---|--- +| Tags | [delete](#delete), [remove](#remove) +| When to change | TBD +| Data Type | int +| Units | milliseconds +| Range | 0 to MAX_INT +| Default | 500 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_override_estimate_recordsize +`zfs_override_estimate_recordsize` overrides the default logic for estimating block +sizes when doing a zfs send. The default heuristic is that the average block size will be the current recordsize. + +| zfs_override_estimate_recordsize | Notes +|---|--- +| Tags | [send](#send) +| When to change | if most data in your dataset is not of the current recordsize and you require accurate zfs send size estimates +| Data Type | ulong +| Units | bytes +| Range | 0=do not override, 1 to MAX_ULONG +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_remove_max_segment +`zfs_remove_max_segment` sets the largest contiguous segment that ZFS attempts to allocate when removing a vdev. +This can be no larger than 16MB. If there is a performance problem with attempting to allocate large blocks, consider decreasing this. +The value is rounded up to a power-of-2. + +| zfs_remove_max_segment | Notes +|---|--- +| Tags | [remove](#remove) +| When to change | after removing a top-level vdev, consider decreasing if there is a performance degradation when attempting to allocate large blocks +| Data Type | int +| Units | bytes +| Range | maximum of the physical block size of all vdevs in the pool to 16,777,216 bytes (16 MiB) +| Default | 16,777,216 bytes (16 MiB) +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_resilver_disable_defer +`zfs_resilver_disable_defer` disables the `resilver_defer` pool feature. +The `resilver_defer` feature allows ZFS to postpone new resilvers if an existing resilver is in progress. + +| zfs_resilver_disable_defer | Notes +|---|--- +| Tags | [resilver](#resilver) +| When to change | if resilver postponement is not desired due to overall resilver time constraints +| Data Type | boolean +| Range | 0=allow `resilver_defer` to postpone new resilver operations, 1=immediately restart resilver when needed +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_scan_suspend_progress +`zfs_scan_suspend_progress` causes a scrub or resilver scan to freeze without actually pausing. + +| zfs_scan_suspend_progress | Notes +|---|--- +| Tags | [resilver](#resilver), [scrub](#scrub) +| When to change | testing or debugging scan code +| Data Type | boolean +| Range | 0=do not freeze scans, 1=freeze scans +| Default | 0 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_scrub_min_time_ms +Scrubs are processed by the sync thread. While scrubbing at least `zfs_scrub_min_time_ms` time is +spent working on a scrub between txg syncs. + +| zfs_scrub_min_time_ms | Notes +|---|--- +| Tags | [scrub](#scrub) +| When to change | +| Data Type | int +| Units | milliseconds +| Range | 1 to ([zfs_txg_timeout](#zfs_txg_timeout) - 1) +| Default | 1,000 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_slow_io_events_per_second +`zfs_slow_io_events_per_second` is a rate limit for slow I/O events. +Note that this should not be set below the `zed` thresholds (currently 10 checksums over 10 sec) +or else `zed` may not trigger any action. + +| zfs_slow_io_events_per_second | Notes +|---|--- +| Tags | [vdev](#vdev) +| When to change | TBD +| Data Type | uint +| Units | slow I/O events +| Range | `zed` threshold to MAX_UINT +| Default | 20 +| Change | Dynamic +| Versions Affected | planned for v2 + +### zfs_vdev_min_ms_count +`zfs_vdev_min_ms_count` is the minimum number of metaslabs to create in a top-level vdev. + +| zfs_vdev_min_ms_count | Notes +|---|--- +| Tags | [metaslab](#metaslab), [vdev](#vdev) +| When to change | TBD +| Data Type | int +| Units | metaslabs +| Range | 16 to [zfs_vdev_ms_count_limit](#zfs_vdev_ms_count_limit) +| Default | 16 +| Change | prior to creating a pool or adding a top-level vdev +| Versions Affected | planned for v2 + +### zfs_vdev_ms_count_limit +`zfs_vdev_ms_count_limit` is the practical upper limit for the number of metaslabs per top-level vdev. + +| zfs_vdev_ms_count_limit | Notes +|---|--- +| Tags | [metaslab](#metaslab), [vdev](#vdev) +| When to change | TBD +| Data Type | int +| Units | metaslabs +| Range | [zfs_vdev_min_ms_count](#zfs_vdev_min_ms_count) to 131,072 +| Default | 131,072 +| Change | prior to creating a pool or adding a top-level vdev +| Versions Affected | planned for v2 + +### spl_hostid +`spl_hostid` is a unique system id number. It orginated in Sun's products where most systems had a +unique id assigned at the factory. This assignment does not exist in modern hardware. +In ZFS, the hostid is stored in the vdev label and can be used to determine if another system had +imported the pool. +When set `spl_hostid` can be used to uniquely identify a system. +By default this value is set to zero which indicates the hostid is disabled. +It can be explicitly enabled by placing a unique non-zero value in the file shown in +[spl_hostid_path](#spl_hostid_path) + +| spl_hostid | Notes +|---|--- +| Tags | [hostid](#hostid), [MMP](#MMP) +| Kernel module | spl +| When to change | to uniquely identify a system when vdevs can be shared across multiple systems +| Data Type | ulong +| Range | 0=ignore hostid, 1 to 4,294,967,295 (32-bits or 0xffffffff) +| Default | 0 +| Change | prior to importing pool +| Versions Affected | v0.6.1 + +### spl_hostid_path +`spl_hostid_path` is the path name for a file that can contain a unique hostid. +For testing purposes, `spl_hostid_path` can be overridden by the ZFS_HOSTID environment variable. + +| spl_hostid_path | Notes +|---|--- +| Tags | [hostid](#hostid), [MMP](#MMP) +| Kernel module | spl +| When to change | when creating a new ZFS distribution where the default value is inappropriate +| Data Type | string +| Default | "/etc/hostid" +| Change | read-only, can only be changed prior to spl module load +| Versions Affected | v0.6.1 + +### spl_kmem_alloc_max +Large `kmem_alloc()` allocations fail if they exceed KMALLOC_MAX_SIZE, as determined by the kernel source. +Allocations which are marginally smaller than this limit may succeed but +should still be avoided due to the expense of locating a contiguous range +of free pages. Therefore, a maximum kmem size with reasonable safely +margin of 4x is set. `kmem_alloc()` allocations larger than this maximum +will quickly fail. `vmem_alloc()` allocations less than or equal to this +value will use `kmalloc()`, but shift to `vmalloc()` when exceeding this value. + +| spl_kmem_alloc_max | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | TBD +| Data Type | uint +| Units | bytes +| Range | TBD +| Default | KMALLOC_MAX_SIZE / 4 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_kmem_alloc_warn +As a general rule `kmem_alloc()` allocations should be small, preferably +just a few pages since they must by physically contiguous. Therefore, a +rate limited warning is printed to the console for any `kmem_alloc()` +which exceeds the threshold `spl_kmem_alloc_warn` + +The default warning threshold is set to eight pages but capped at 32K to +accommodate systems using large pages. This value was selected to be small +enough to ensure the largest allocations are quickly noticed and fixed. +But large enough to avoid logging any warnings when a allocation size is +larger than optimal but not a serious concern. Since this value is tunable, +developers are encouraged to set it lower when testing so any new largish +allocations are quickly caught. These warnings may be disabled by setting +the threshold to zero. + +| spl_kmem_alloc_warn | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | developers are encouraged lower when testing so any new, large allocations are quickly caught +| Data Type | uint +| Units | bytes +| Range | 0=disable the warnings, +| Default | 32,768 (32 KiB) +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_kmem_cache_expire +Cache expiration is part of default illumos cache behavior. The idea is +that objects in magazines which have not been recently accessed should be +returned to the slabs periodically. This is known as cache aging and +when enabled objects will be typically returned after 15 seconds. + +On the other hand Linux slabs are designed to never move objects back to +the slabs unless there is memory pressure. This is possible because under +Linux the cache will be notified when memory is low and objects can be +released. + +By default only the Linux method is enabled. It has been shown to improve +responsiveness on low memory systems and not negatively impact the performance +of systems with more memory. This policy may be changed by setting the +`spl_kmem_cache_expire` bit mask as follows, both policies may be enabled +concurrently. + +| spl_kmem_cache_expire | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | TBD +| Data Type | bitmask +| Range | 0x01 - Aging (illumos), 0x02 - Low memory (Linux) +| Default | 0x02 +| Change | Dynamic +| Versions Affected | v0.6.1 + +### spl_kmem_cache_kmem_limit +Depending on the size of a memory cache object it may be backed by `kmalloc()` +or `vmalloc()` memory. This is because the size of the required allocation +greatly impacts the best way to allocate the memory. + +When objects are small and only a small number of memory pages need to be +allocated, ideally just one, then `kmalloc()` is very efficient. However, +allocating multiple pages with `kmalloc()` gets increasingly expensive +because the pages must be physically contiguous. + +For this reason we shift to `vmalloc()` for slabs of large objects which +which removes the need for contiguous pages. `vmalloc()` cannot be used in +all cases because there is significant locking overhead involved. This +function takes a single global lock over the entire virtual address range +which serializes all allocations. Using slightly different allocation +functions for small and large objects allows us to handle a wide range of +object sizes. + +The `spl_kmem_cache_kmem_limit` value is used to determine this cutoff +size. One quarter of the kernel's compiled PAGE_SIZE is used as the default value because +[spl_kmem_cache_obj_per_slab](#spl_kmem_cache_obj_per_slab) defaults to 16. +With these default values, at most four contiguous pages are allocated. + +| spl_kmem_cache_kmem_limit | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | TBD +| Data Type | uint +| Units | pages +| Range | TBD +| Default | PAGE_SIZE / 4 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_kmem_cache_max_size +`spl_kmem_cache_max_size` is the maximum size of a kmem cache slab in MiB. +This effectively limits the maximum cache object size to +`spl_kmem_cache_max_size` / [spl_kmem_cache_obj_per_slab](#spl_kmem_cache_obj_per_slab) +Kmem caches may not be created with object sized larger than this limit. + +| spl_kmem_cache_max_size | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | TBD +| Data Type | uint +| Units | MiB +| Range | TBD +| Default | 4 for 32-bit kernel, 32 for 64-bit kernel +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_kmem_cache_obj_per_slab +`spl_kmem_cache_obj_per_slab` is the preferred number of objects per slab in the kmem cache. +In general, a larger value will increase the caches memory footprint while decreasing the time +required to perform an allocation. Conversely, a smaller value will minimize the footprint and +improve cache reclaim time but individual allocations may take longer. + +| spl_kmem_cache_obj_per_slab | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | TBD +| Data Type | uint +| Units | kmem cache objects +| Range | TBD +| Default | 8 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_kmem_cache_obj_per_slab_min +`spl_kmem_cache_obj_per_slab_min` is the minimum number of objects allowed per slab. +Normally slabs will contain [spl_kmem_cache_obj_per_slab](#spl_kmem_cache_obj_per_slab) objects but +for caches that contain very large objects it's desirable to only have a few, or even just one, object per slab. + +| spl_kmem_cache_obj_per_slab_min | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | debugging kmem cache operations +| Data Type | uint +| Units | kmem cache objects +| Range | TBD +| Default | 1 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_kmem_cache_reclaim +`spl_kmem_cache_reclaim` prevents Linux from being able to rapidly reclaim all the memory held by the kmem caches. +This may be useful in circumstances where it's preferable that Linux reclaim memory from some other subsystem first. +Setting `spl_kmem_cache_reclaim` increases the likelihood out of memory events on a memory constrained system. + +| spl_kmem_cache_reclaim | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | TBD +| Data Type | boolean +| Range | 0=enable rapid memory reclaim from kmem caches, 1=disable rapid memory reclaim from kmem caches +| Default | 0 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_kmem_cache_slab_limit +For small objects the Linux slab allocator should be used to make the most efficient use of the memory. +However, large objects are not supported by the Linux slab allocator and therefore the SPL implementation is preferred. +`spl_kmem_cache_slab_limit` is used to determine the cutoff between a small and large object. + +Objects of `spl_kmem_cache_slab_limit` or smaller will be allocated using the Linux slab allocator, +large objects use the SPL allocator. A cutoff of 16 KiB was determined to be optimal for architectures +using 4 KiB pages. + +| spl_kmem_cache_slab_limit | Notes +|---|--- +| Tags | [memory](#memory) +| Kernel module | spl +| When to change | TBD +| Data Type | uint +| Units | bytes +| Range | TBD +| Default | 16,384 (16 KiB) when kernel PAGE_SIZE = 4KiB, 0 for other PAGE_SIZE values +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_max_show_tasks +`spl_max_show_tasks` is the limit of tasks per pending list in each taskq shown in + `/proc/spl/taskq` and `/proc/spl/taskq-all`. +Reading the ProcFS files walks the lists with lock held and it could cause a lock up if the list +grow too large. If the list is larger than the limit, the string `"(truncated)" is printed. + +| spl_max_show_tasks | Notes +|---|--- +| Tags | [taskq](#taskq) +| Kernel module | spl +| When to change | TBD +| Data Type | uint +| Units | tasks reported +| Range | 0 disables the limit, 1 to MAX_UINT +| Default | 512 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_panic_halt +`spl_panic_halt` enables kernel panic upon assertion failures. +When not enabled, the asserting thread is halted to facilitate further debugging. + +| spl_panic_halt | Notes +|---|--- +| Tags | [debug](#debug), [panic](#panic) +| Kernel module | spl +| When to change | when debugging assertions and kernel core dumps are desired +| Data Type | boolean +| Range | 0=halt thread upon assertion, 1=panic kernel upon assertion +| Default | 0 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_taskq_kick +Upon writing a non-zero value to `spl_taskq_kick`, all taskqs are scanned. +If any taskq has a pending task more than 5 seconds old, the taskq spawns more threads. +This can be useful in rare deadlock situations caused by one or more taskqs not spawning a thread when it should. + +| spl_taskq_kick | Notes +|---|--- +| Tags | [taskq](#taskq) +| Kernel module | spl +| When to change | See description above +| Data Type | uint +| Units | N/A +| Default | 0 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_taskq_thread_bind +`spl_taskq_thread_bind` enables binding taskq threads to specific CPUs, distributed evenly over the available CPUs. +By default, this behavior is disabled to allow the Linux scheduler the maximum flexibility to determine +where a thread should run. + +| spl_taskq_thread_bind | Notes +|---|--- +| Tags | [CPU](#CPU), [taskq](#taskq) +| Kernel module | spl +| When to change | when debugging CPU scheduling options +| Data Type | boolean +| Range | 0=taskqs are not bound to specific CPUs, 1=taskqs are bound to CPUs +| Default | 0 +| Change | prior to loading spl kernel module +| Versions Affected | v0.7.0 + +### spl_taskq_thread_dynamic +`spl_taskq_thread_dynamic` enables taskqs to set the TASKQ_DYNAMIC flag will by default create only a single thread. +New threads will be created on demand up to a maximum allowed number to facilitate the completion of +outstanding tasks. Threads which are no longer needed are promptly destroyed. + By default this behavior is enabled but it can be d. + +See also [zfs_zil_clean_taskq_nthr_pct](#zfs_zil_clean_taskq_nthr_pct), [zio_taskq_batch_pct](#zio_taskq_batch_pct) + +| spl_taskq_thread_dynamic | Notes +|---|--- +| Tags | [taskq](#taskq) +| Kernel module | spl +| When to change | disable for performance analysis or troubleshooting +| Data Type | boolean +| Range | 0=taskq threads are not dynamic, 1=taskq threads are dynamically created and destroyed +| Default | 1 +| Change | prior to loading spl kernel module +| Versions Affected | v0.7.0 + +### spl_taskq_thread_priority +`spl_taskq_thread_priority` allows newly created taskq threads to set a non-default scheduler priority. +When enabled the priority specified when a taskq is created will be applied +to all threads created by that taskq. +When disabled all threads will use the default Linux kernel thread priority. + +| spl_taskq_thread_priority | Notes +|---|--- +| Tags | [CPU](#CPU), [taskq](#taskq) +| Kernel module | spl +| When to change | when troubleshooting CPU scheduling-related performance issues +| Data Type | boolean +| Range | 0=taskq threads use the default Linux kernel thread priority, 1= +| Default | 1 +| Change | prior to loading spl kernel module +| Versions Affected | v0.7.0 + +### spl_taskq_thread_sequential +`spl_taskq_thread_sequential` is the number of items a taskq worker thread must handle without interruption +before requesting a new worker thread be spawned. `spl_taskq_thread_sequential` controls +how quickly taskqs ramp up the number of threads processing the queue. +Because Linux thread creation and destruction are relatively inexpensive a +small default value has been selected. Thus threads are created aggressively, which is typically desirable. +Increasing this value results in a slower thread creation rate which may be preferable for some configurations. + +| spl_taskq_thread_sequential | Notes +|---|--- +| Tags | [CPU](#CPU), [taskq](#taskq) +| Kernel module | spl +| When to change | TBD +| Data Type | int +| Units | taskq items +| Range | 1 to MAX_INT +| Default | 4 +| Change | Dynamic +| Versions Affected | v0.7.0 + +### spl_kmem_cache_kmem_threads +`spl_kmem_cache_kmem_threads` shows the current number of `spl_kmem_cache` threads. +This task queue is responsible for allocating new slabs for use by the kmem caches. +For the majority of systems and workloads only a small number of threads are required. + +| spl_kmem_cache_kmem_threads | Notes +|---|--- +| Tags | [CPU](#CPU), [memory](#memory) +| Kernel module | spl +| When to change | read-only +| Data Type | int +| Range | 1 to MAX_INT +| Units | threads +| Default | 4 +| Change | read-only, can only be changed prior to spl module load +| Versions Affected | v0.7.0 + +### spl_kmem_cache_magazine_size +`spl_kmem_cache_magazine_size` shows the current . +Cache magazines are an optimization designed to minimize the cost of +allocating memory. They do this by keeping a per-cpu cache of recently +freed objects, which can then be reallocated without taking a lock. This +can improve performance on highly contended caches. However, because +objects in magazines will prevent otherwise empty slabs from being +immediately released this may not be ideal for low memory machines. + +For this reason spl_kmem_cache_magazine_size can be used to set a maximum +magazine size. When this value is set to 0 the magazine size will be +automatically determined based on the object size. Otherwise magazines +will be limited to 2-256 objects per magazine (eg per CPU). +Magazines cannot be disabled entirely in this implementation. + +| spl_kmem_cache_magazine_size | Notes +|---|--- +| Tags | [CPU](#CPU), [memory](#memory) +| Kernel module | spl +| When to change | +| Data Type | int +| Units | threads +| Range | 0=automatically scale magazine size, otherwise 2 to 256 +| Default | 0 +| Change | read-only, can only be changed prior to spl module load +| Versions Affected | v0.7.0