Fix ARC behavior on 32-bit systems
With the addition of the ABD changes consumption of the virtual address space has been greatly reduced. This exposed an issue on CONFIG_HIGHMEM systems where free memory was being calculated incorrectly. Functionally this didn't cause any major problems prior to ABD because a lack of available virtual address space was used as an indicator of low memory. This patch makes the following changes to address the issue and in the process realigns the code further with OpenZFS. There are no substantive changes in behavior for 64-bit systems. * Added CONFIG_HIGHMEM case to the arc_all_memory() and arc_free_memory() functions to only consider low memory pages on CONFIG_HIGHMEM systems. * The arc_free_memory() function was updated to return bytes instead of pages to be consistent with the other helper functions. In user space we make up some reasonable values since currently only testing is performed in this context. * Adds three new values to the arcstats kstat to provide visibility in to the ARC's assessment of the memory situation: memory_all_bytes, memory_free_bytes, and memory_available_bytes. * Added kmem_reap() call to arc_available_memory() for 32-bit builds to realign code with OpenZFS. * Reduced size of test file in /async_destroy_001_pos.ksh to speed up test case. Multiple txgs are still required. * Move vdevs used by zpool_clear_001_pos and zpool_upgrade_002_pos to TEST_BASE_DIR location to speed up test cases. Reviewed-by: David Quigley <david.quigley@intel.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #5352 Closes #6734
This commit is contained in:
parent
851a7cd833
commit
91b2f6ab1c
|
@ -632,6 +632,9 @@ typedef struct arc_stats {
|
||||||
kstat_named_t arcstat_memory_throttle_count;
|
kstat_named_t arcstat_memory_throttle_count;
|
||||||
kstat_named_t arcstat_memory_direct_count;
|
kstat_named_t arcstat_memory_direct_count;
|
||||||
kstat_named_t arcstat_memory_indirect_count;
|
kstat_named_t arcstat_memory_indirect_count;
|
||||||
|
kstat_named_t arcstat_memory_all_bytes;
|
||||||
|
kstat_named_t arcstat_memory_free_bytes;
|
||||||
|
kstat_named_t arcstat_memory_available_bytes;
|
||||||
kstat_named_t arcstat_no_grow;
|
kstat_named_t arcstat_no_grow;
|
||||||
kstat_named_t arcstat_tempreserve;
|
kstat_named_t arcstat_tempreserve;
|
||||||
kstat_named_t arcstat_loaned_bytes;
|
kstat_named_t arcstat_loaned_bytes;
|
||||||
|
@ -727,6 +730,9 @@ static arc_stats_t arc_stats = {
|
||||||
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
|
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
|
||||||
{ "memory_direct_count", KSTAT_DATA_UINT64 },
|
{ "memory_direct_count", KSTAT_DATA_UINT64 },
|
||||||
{ "memory_indirect_count", KSTAT_DATA_UINT64 },
|
{ "memory_indirect_count", KSTAT_DATA_UINT64 },
|
||||||
|
{ "memory_all_bytes", KSTAT_DATA_UINT64 },
|
||||||
|
{ "memory_free_bytes", KSTAT_DATA_UINT64 },
|
||||||
|
{ "memory_available_bytes", KSTAT_DATA_INT64 },
|
||||||
{ "arc_no_grow", KSTAT_DATA_UINT64 },
|
{ "arc_no_grow", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_tempreserve", KSTAT_DATA_UINT64 },
|
{ "arc_tempreserve", KSTAT_DATA_UINT64 },
|
||||||
{ "arc_loaned_bytes", KSTAT_DATA_UINT64 },
|
{ "arc_loaned_bytes", KSTAT_DATA_UINT64 },
|
||||||
|
@ -3981,30 +3987,46 @@ static uint64_t
|
||||||
arc_all_memory(void)
|
arc_all_memory(void)
|
||||||
{
|
{
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
return (MIN(ptob(physmem),
|
#ifdef CONFIG_HIGHMEM
|
||||||
vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)));
|
return (ptob(totalram_pages - totalhigh_pages));
|
||||||
|
#else
|
||||||
|
return (ptob(totalram_pages));
|
||||||
|
#endif /* CONFIG_HIGHMEM */
|
||||||
#else
|
#else
|
||||||
return (ptob(physmem) / 2);
|
return (ptob(physmem) / 2);
|
||||||
#endif
|
#endif /* _KERNEL */
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _KERNEL
|
/*
|
||||||
|
* Return the amount of memory that is considered free. In user space
|
||||||
|
* which is primarily used for testing we pretend that free memory ranges
|
||||||
|
* from 0-20% of all memory.
|
||||||
|
*/
|
||||||
static uint64_t
|
static uint64_t
|
||||||
arc_free_memory(void)
|
arc_free_memory(void)
|
||||||
{
|
{
|
||||||
|
#ifdef _KERNEL
|
||||||
|
#ifdef CONFIG_HIGHMEM
|
||||||
|
struct sysinfo si;
|
||||||
|
si_meminfo(&si);
|
||||||
|
return (ptob(si.freeram - si.freehigh));
|
||||||
|
#else
|
||||||
#ifdef ZFS_GLOBAL_NODE_PAGE_STATE
|
#ifdef ZFS_GLOBAL_NODE_PAGE_STATE
|
||||||
return (nr_free_pages() +
|
return (ptob(nr_free_pages() +
|
||||||
global_node_page_state(NR_INACTIVE_FILE) +
|
global_node_page_state(NR_INACTIVE_FILE) +
|
||||||
global_node_page_state(NR_INACTIVE_ANON) +
|
global_node_page_state(NR_INACTIVE_ANON) +
|
||||||
global_node_page_state(NR_SLAB_RECLAIMABLE));
|
global_node_page_state(NR_SLAB_RECLAIMABLE)));
|
||||||
#else
|
#else
|
||||||
return (nr_free_pages() +
|
return (ptob(nr_free_pages() +
|
||||||
global_page_state(NR_INACTIVE_FILE) +
|
global_page_state(NR_INACTIVE_FILE) +
|
||||||
global_page_state(NR_INACTIVE_ANON) +
|
global_page_state(NR_INACTIVE_ANON) +
|
||||||
global_page_state(NR_SLAB_RECLAIMABLE));
|
global_page_state(NR_SLAB_RECLAIMABLE)));
|
||||||
#endif
|
#endif /* ZFS_GLOBAL_NODE_PAGE_STATE */
|
||||||
|
#endif /* CONFIG_HIGHMEM */
|
||||||
|
#else
|
||||||
|
return (spa_get_random(arc_all_memory() * 20 / 100));
|
||||||
|
#endif /* _KERNEL */
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef enum free_memory_reason_t {
|
typedef enum free_memory_reason_t {
|
||||||
FMR_UNKNOWN,
|
FMR_UNKNOWN,
|
||||||
|
@ -4042,17 +4064,15 @@ arc_available_memory(void)
|
||||||
int64_t lowest = INT64_MAX;
|
int64_t lowest = INT64_MAX;
|
||||||
free_memory_reason_t r = FMR_UNKNOWN;
|
free_memory_reason_t r = FMR_UNKNOWN;
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
uint64_t available_memory = ptob(arc_free_memory());
|
|
||||||
int64_t n;
|
int64_t n;
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
#ifdef freemem
|
||||||
|
#undef freemem
|
||||||
|
#endif
|
||||||
pgcnt_t needfree = btop(arc_need_free);
|
pgcnt_t needfree = btop(arc_need_free);
|
||||||
pgcnt_t lotsfree = btop(arc_sys_free);
|
pgcnt_t lotsfree = btop(arc_sys_free);
|
||||||
pgcnt_t desfree = 0;
|
pgcnt_t desfree = 0;
|
||||||
#endif
|
pgcnt_t freemem = btop(arc_free_memory());
|
||||||
|
|
||||||
#if defined(__i386)
|
|
||||||
available_memory =
|
|
||||||
MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (needfree > 0) {
|
if (needfree > 0) {
|
||||||
|
@ -4070,7 +4090,7 @@ arc_available_memory(void)
|
||||||
* number of needed free pages. We add extra pages here to make sure
|
* number of needed free pages. We add extra pages here to make sure
|
||||||
* the scanner doesn't start up while we're freeing memory.
|
* the scanner doesn't start up while we're freeing memory.
|
||||||
*/
|
*/
|
||||||
n = PAGESIZE * (btop(available_memory) - lotsfree - needfree - desfree);
|
n = PAGESIZE * (freemem - lotsfree - needfree - desfree);
|
||||||
if (n < lowest) {
|
if (n < lowest) {
|
||||||
lowest = n;
|
lowest = n;
|
||||||
r = FMR_LOTSFREE;
|
r = FMR_LOTSFREE;
|
||||||
|
@ -4091,7 +4111,6 @@ arc_available_memory(void)
|
||||||
r = FMR_SWAPFS_MINFREE;
|
r = FMR_SWAPFS_MINFREE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that we have enough availrmem that memory locking (e.g., via
|
* Check that we have enough availrmem that memory locking (e.g., via
|
||||||
* mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum
|
* mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum
|
||||||
|
@ -4107,9 +4126,9 @@ arc_available_memory(void)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__i386)
|
#if defined(_ILP32)
|
||||||
/*
|
/*
|
||||||
* If we're on an i386 platform, it's possible that we'll exhaust the
|
* If we're on a 32-bit platform, it's possible that we'll exhaust the
|
||||||
* kernel heap space before we ever run out of available physical
|
* kernel heap space before we ever run out of available physical
|
||||||
* memory. Most checks of the size of the heap_area compare against
|
* memory. Most checks of the size of the heap_area compare against
|
||||||
* tune.t_minarmem, which is the minimum available real memory that we
|
* tune.t_minarmem, which is the minimum available real memory that we
|
||||||
|
@ -4178,6 +4197,7 @@ arc_kmem_reap_now(void)
|
||||||
extern kmem_cache_t *zio_data_buf_cache[];
|
extern kmem_cache_t *zio_data_buf_cache[];
|
||||||
extern kmem_cache_t *range_seg_cache;
|
extern kmem_cache_t *range_seg_cache;
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
if ((arc_meta_used >= arc_meta_limit) && zfs_arc_meta_prune) {
|
if ((arc_meta_used >= arc_meta_limit) && zfs_arc_meta_prune) {
|
||||||
/*
|
/*
|
||||||
* We are exceeding our meta-data cache limit.
|
* We are exceeding our meta-data cache limit.
|
||||||
|
@ -4185,9 +4205,16 @@ arc_kmem_reap_now(void)
|
||||||
*/
|
*/
|
||||||
arc_prune_async(zfs_arc_meta_prune);
|
arc_prune_async(zfs_arc_meta_prune);
|
||||||
}
|
}
|
||||||
|
#if defined(_ILP32)
|
||||||
|
/*
|
||||||
|
* Reclaim unused memory from all kmem caches.
|
||||||
|
*/
|
||||||
|
kmem_reap();
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
|
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
|
||||||
#ifdef _ILP32
|
#if defined(_ILP32)
|
||||||
/* reach upper limit of cache size on 32-bit */
|
/* reach upper limit of cache size on 32-bit */
|
||||||
if (zio_buf_cache[i] == NULL)
|
if (zio_buf_cache[i] == NULL)
|
||||||
break;
|
break;
|
||||||
|
@ -6077,14 +6104,11 @@ static int
|
||||||
arc_memory_throttle(uint64_t reserve, uint64_t txg)
|
arc_memory_throttle(uint64_t reserve, uint64_t txg)
|
||||||
{
|
{
|
||||||
#ifdef _KERNEL
|
#ifdef _KERNEL
|
||||||
uint64_t available_memory = ptob(arc_free_memory());
|
uint64_t available_memory = arc_free_memory();
|
||||||
static uint64_t page_load = 0;
|
static uint64_t page_load = 0;
|
||||||
static uint64_t last_txg = 0;
|
static uint64_t last_txg = 0;
|
||||||
#ifdef __linux__
|
|
||||||
pgcnt_t minfree = btop(arc_sys_free / 4);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__i386)
|
#if defined(_ILP32)
|
||||||
available_memory =
|
available_memory =
|
||||||
MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
|
MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
|
||||||
#endif
|
#endif
|
||||||
|
@ -6102,7 +6126,7 @@ arc_memory_throttle(uint64_t reserve, uint64_t txg)
|
||||||
* continue to let page writes occur as quickly as possible.
|
* continue to let page writes occur as quickly as possible.
|
||||||
*/
|
*/
|
||||||
if (current_is_kswapd()) {
|
if (current_is_kswapd()) {
|
||||||
if (page_load > MAX(ptob(minfree), available_memory) / 4) {
|
if (page_load > MAX(arc_sys_free / 4, available_memory) / 4) {
|
||||||
DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
|
DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
|
||||||
return (SET_ERROR(ERESTART));
|
return (SET_ERROR(ERESTART));
|
||||||
}
|
}
|
||||||
|
@ -6232,6 +6256,13 @@ arc_kstat_update(kstat_t *ksp, int rw)
|
||||||
&as->arcstat_mfu_ghost_size,
|
&as->arcstat_mfu_ghost_size,
|
||||||
&as->arcstat_mfu_ghost_evictable_data,
|
&as->arcstat_mfu_ghost_evictable_data,
|
||||||
&as->arcstat_mfu_ghost_evictable_metadata);
|
&as->arcstat_mfu_ghost_evictable_metadata);
|
||||||
|
|
||||||
|
as->arcstat_memory_all_bytes.value.ui64 =
|
||||||
|
arc_all_memory();
|
||||||
|
as->arcstat_memory_free_bytes.value.ui64 =
|
||||||
|
arc_free_memory();
|
||||||
|
as->arcstat_memory_available_bytes.value.i64 =
|
||||||
|
arc_available_memory();
|
||||||
}
|
}
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
|
|
|
@ -50,7 +50,7 @@ function cleanup
|
||||||
poolexists $TESTPOOL1 && \
|
poolexists $TESTPOOL1 && \
|
||||||
log_must zpool destroy -f $TESTPOOL1
|
log_must zpool destroy -f $TESTPOOL1
|
||||||
|
|
||||||
for file in `ls $TESTDIR/file.*`; do
|
for file in `ls $TEST_BASE_DIR/file.*`; do
|
||||||
log_must rm -f $file
|
log_must rm -f $file
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
@ -62,12 +62,12 @@ log_onexit cleanup
|
||||||
#make raw files to create various configuration pools
|
#make raw files to create various configuration pools
|
||||||
typeset -i i=0
|
typeset -i i=0
|
||||||
while (( i < 3 )); do
|
while (( i < 3 )); do
|
||||||
log_must mkfile $FILESIZE $TESTDIR/file.$i
|
log_must truncate -s $FILESIZE $TEST_BASE_DIR/file.$i
|
||||||
|
|
||||||
(( i = i + 1 ))
|
(( i = i + 1 ))
|
||||||
done
|
done
|
||||||
|
|
||||||
fbase=$TESTDIR/file
|
fbase=$TEST_BASE_DIR/file
|
||||||
set -A poolconf "mirror $fbase.0 $fbase.1 $fbase.2" \
|
set -A poolconf "mirror $fbase.0 $fbase.1 $fbase.2" \
|
||||||
"raidz1 $fbase.0 $fbase.1 $fbase.2" \
|
"raidz1 $fbase.0 $fbase.1 $fbase.2" \
|
||||||
"raidz2 $fbase.0 $fbase.1 $fbase.2"
|
"raidz2 $fbase.0 $fbase.1 $fbase.2"
|
||||||
|
|
|
@ -50,9 +50,9 @@ function create_old_pool
|
||||||
for pool_file in $pool_files; do
|
for pool_file in $pool_files; do
|
||||||
log_must bzcat \
|
log_must bzcat \
|
||||||
$STF_SUITE/tests/functional/cli_root/zpool_upgrade/$pool_file.bz2 \
|
$STF_SUITE/tests/functional/cli_root/zpool_upgrade/$pool_file.bz2 \
|
||||||
>/$TESTPOOL/$pool_file
|
>$TEST_BASE_DIR/$pool_file
|
||||||
done
|
done
|
||||||
log_must zpool import -d /$TESTPOOL $pool_name
|
log_must zpool import -d $TEST_BASE_DIR $pool_name
|
||||||
|
|
||||||
# Put some random contents into the pool
|
# Put some random contents into the pool
|
||||||
for i in {1..1024} ; do
|
for i in {1..1024} ; do
|
||||||
|
@ -97,7 +97,7 @@ function destroy_upgraded_pool
|
||||||
log_must zpool destroy $pool_name
|
log_must zpool destroy $pool_name
|
||||||
fi
|
fi
|
||||||
for file in $pool_files; do
|
for file in $pool_files; do
|
||||||
rm -f /$TESTPOOL/$file
|
rm -f $TEST_BASE_DIR/$file
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,8 +112,8 @@ function check_pool
|
||||||
typeset pool=$1
|
typeset pool=$1
|
||||||
typeset flag=$2
|
typeset flag=$2
|
||||||
find /$pool -type f -exec cksum {} + > \
|
find /$pool -type f -exec cksum {} + > \
|
||||||
/$TESTPOOL/pool-checksums.$pool.$flag
|
$TEST_BASE_DIR/pool-checksums.$pool.$flag
|
||||||
echo /$TESTPOOL/pool-checksums.$pool.$flag
|
echo $TEST_BASE_DIR/pool-checksums.$pool.$flag
|
||||||
}
|
}
|
||||||
|
|
||||||
# This function simply checks that a pool has a particular version number
|
# This function simply checks that a pool has a particular version number
|
||||||
|
|
|
@ -78,10 +78,10 @@ for config in $CONFIGS ; do
|
||||||
typeset -n pool_name=ZPOOL_VERSION_${config}_NAME
|
typeset -n pool_name=ZPOOL_VERSION_${config}_NAME
|
||||||
|
|
||||||
check_pool $pool_name post > /dev/null
|
check_pool $pool_name post > /dev/null
|
||||||
log_must diff /$TESTPOOL/pool-checksums.$pool_name.pre \
|
log_must diff $TEST_BASE_DIR/pool-checksums.$pool_name.pre \
|
||||||
/$TESTPOOL/pool-checksums.$pool_name.post
|
$TEST_BASE_DIR/pool-checksums.$pool_name.post
|
||||||
rm /$TESTPOOL/pool-checksums.$pool_name.pre \
|
rm $TEST_BASE_DIR/pool-checksums.$pool_name.pre \
|
||||||
/$TESTPOOL/pool-checksums.$pool_name.post
|
$TEST_BASE_DIR/pool-checksums.$pool_name.post
|
||||||
destroy_upgraded_pool $config
|
destroy_upgraded_pool $config
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
|
@ -56,12 +56,9 @@ log_assert "async_destroy can suspend and resume traversal"
|
||||||
|
|
||||||
log_must zfs create -o recordsize=512 -o compression=off $TEST_FS
|
log_must zfs create -o recordsize=512 -o compression=off $TEST_FS
|
||||||
|
|
||||||
# Create enough blocks that it will take 4 TXGs to free them all.
|
# Create enough blocks that it will take multiple TXGs to free them all.
|
||||||
typeset zfs_free_max_blocks=100000
|
log_must dd bs=1024k count=128 if=/dev/zero of=/$TEST_FS/file
|
||||||
typeset blocks=$((zfs_free_max_blocks * 4 * 512 / 1024 / 1024))
|
log_must sync
|
||||||
|
|
||||||
log_must dd bs=1024k count=$blocks if=/dev/zero of=/$TEST_FS/file
|
|
||||||
|
|
||||||
log_must zfs destroy $TEST_FS
|
log_must zfs destroy $TEST_FS
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue