zfs/tests/zfs-tests/tests/perf/perf.shlib

602 lines
15 KiB
Plaintext

#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2015, 2021 by Delphix. All rights reserved.
# Copyright (c) 2016, Intel Corporation.
#
. $STF_SUITE/include/libtest.shlib
# Defaults common to all the tests in the regression group
export PERF_RUNTIME=${PERF_RUNTIME:-'180'}
export PERF_RANDSEED=${PERF_RANDSEED:-'1234'}
export PERF_COMPPERCENT=${PERF_COMPPERCENT:-'66'}
export PERF_COMPCHUNK=${PERF_COMPCHUNK:-'4096'}
# Default to JSON for fio output
export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
# Default fs creation options
export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
' -o checksum=sha256 -o redundant_metadata=most'}
function get_sync_str
{
typeset sync=$1
typeset sync_str=''
[[ $sync -eq 0 ]] && sync_str='async'
[[ $sync -eq 1 ]] && sync_str='sync'
echo $sync_str
}
function get_suffix
{
typeset threads=$1
typeset sync=$2
typeset iosize=$3
typeset sync_str=$(get_sync_str $sync)
typeset filesystems=$(get_nfilesystems)
typeset suffix="$sync_str.$iosize-ios"
suffix="$suffix.$threads-threads.$filesystems-filesystems"
echo $suffix
}
function do_fio_run_impl
{
typeset script=$1
typeset do_recreate=$2
typeset clear_cache=$3
typeset threads=$4
typeset threads_per_fs=$5
typeset sync=$6
typeset iosize=$7
typeset sync_str=$(get_sync_str $sync)
log_note "Running with $threads $sync_str threads, $iosize ios"
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
log_must test $do_recreate
verify_threads_per_fs $threads $threads_per_fs
fi
if $do_recreate; then
recreate_perf_pool
#
# A value of zero for "threads_per_fs" is "special", and
# means a single filesystem should be used, regardless
# of the number of threads.
#
if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
populate_perf_filesystems $((threads / threads_per_fs))
else
populate_perf_filesystems 1
fi
fi
if $clear_cache; then
# Clear the ARC
log_must zinject -a
fi
if [[ -n $ZINJECT_DELAYS ]]; then
apply_zinject_delays
else
log_note "No per-device commands to execute."
fi
#
# Allow this to be overridden by the individual test case. This
# can be used to run the FIO job against something other than
# the default filesystem (e.g. against a clone).
#
export DIRECTORY=$(get_directory)
log_note "DIRECTORY: " $DIRECTORY
export RUNTIME=$PERF_RUNTIME
export RANDSEED=$PERF_RANDSEED
export COMPPERCENT=$PERF_COMPPERCENT
export COMPCHUNK=$PERF_COMPCHUNK
export FILESIZE=$((TOTAL_SIZE / threads))
export NUMJOBS=$threads
export SYNC_TYPE=$sync
export BLOCKSIZE=$iosize
sync
# When running locally, we want to keep the default behavior of
# DIRECT == 0, so only set it when we're running over NFS to
# disable client cache for reads.
if [[ $NFS -eq 1 ]]; then
export DIRECT=1
do_setup_nfs $script
else
export DIRECT=0
fi
# This will be part of the output filename.
typeset suffix=$(get_suffix $threads $sync $iosize)
# Start the data collection
do_collect_scripts $suffix
# Define output file
typeset logbase="$(get_perf_output_dir)/$(basename \
$SUDO_COMMAND)"
typeset outfile="$logbase.fio.$suffix"
# Start the load
if [[ $NFS -eq 1 ]]; then
log_must ssh -t $NFS_USER@$NFS_CLIENT "
fio --output-format=${PERF_FIO_FORMAT} \
--output /tmp/fio.out /tmp/test.fio
"
log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
else
log_must fio --output-format=${PERF_FIO_FORMAT} \
--output $outfile $FIO_SCRIPTS/$script
fi
}
#
# This function will run fio in a loop, according to the .fio file passed
# in and a number of environment variables. The following variables can be
# set before launching zfstest to override the defaults.
#
# PERF_RUNTIME: The time in seconds each fio invocation should run.
# PERF_NTHREADS: A list of how many threads each fio invocation will use.
# PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
# PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
# PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
# pairs that will be added to the scripts specified in each test.
#
function do_fio_run
{
typeset script=$1
typeset do_recreate=$2
typeset clear_cache=$3
typeset threads threads_per_fs sync iosize
for threads in $PERF_NTHREADS; do
for threads_per_fs in $PERF_NTHREADS_PER_FS; do
for sync in $PERF_SYNC_TYPES; do
for iosize in $PERF_IOSIZES; do
do_fio_run_impl \
$script \
$do_recreate \
$clear_cache \
$threads \
$threads_per_fs \
$sync \
$iosize
done
done
done
done
}
# This function sets NFS mount on the client and make sure all correct
# permissions are in place
#
function do_setup_nfs
{
typeset script=$1
zfs set sharenfs=on $TESTFS
log_must chmod -R 777 /$TESTFS
ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
log_must ssh -t $NFS_USER@$NFS_CLIENT "
sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
"
#
# The variables in the fio script are only available in our current
# shell session, so we have to evaluate them here before copying
# the resulting script over to the target machine.
#
export jobnum='$jobnum'
while read line; do
eval echo "$line"
done < $FIO_SCRIPTS/$script > /tmp/test.fio
log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
log_must rm /tmp/test.fio
}
#
# This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
# The script at index N is launched in the background, with its output
# redirected to a logfile containing the tag specified at index N + 1.
#
function do_collect_scripts
{
typeset suffix=$1
[[ -n $collect_scripts ]] || log_fail "No data collection scripts."
[[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
# Add in user supplied scripts and logfiles, if any.
typeset oIFS=$IFS
IFS=','
for item in $PERF_COLLECT_SCRIPTS; do
collect_scripts+=($(echo $item | sed 's/^ *//g'))
done
IFS=$oIFS
typeset idx=0
while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
typeset logbase="$(get_perf_output_dir)/$(basename \
$SUDO_COMMAND)"
typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
((idx += 2))
done
# Need to explicitly return 0 because timeout(1) will kill
# a child process and cause us to return non-zero.
return 0
}
# Find a place to deposit performance data collected while under load.
function get_perf_output_dir
{
typeset dir="$(pwd)/perf_data"
[[ -d $dir ]] || mkdir -p $dir
echo $dir
}
function apply_zinject_delays
{
typeset idx=0
while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
[[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
log_must "No zinject delay found at index: $idx"
for disk in $DISKS; do
log_must zinject \
-d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
done
((idx += 1))
done
}
function clear_zinject_delays
{
log_must zinject -c all
}
#
# Destroy and create the pool used for performance tests.
#
function recreate_perf_pool
{
[[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
#
# In case there's been some "leaked" zinject delays, or if the
# performance test injected some delays itself, we clear all
# delays before attempting to destroy the pool. Each delay
# places a hold on the pool, so the destroy will fail if there
# are any outstanding delays.
#
clear_zinject_delays
#
# This function handles the case where the pool already exists,
# and will destroy the previous pool and recreate a new pool.
#
create_pool $PERFPOOL $DISKS
}
function verify_threads_per_fs
{
typeset threads=$1
typeset threads_per_fs=$2
log_must test -n $threads
log_must test -n $threads_per_fs
#
# A value of "0" is treated as a "special value", and it is
# interpreted to mean all threads will run using a single
# filesystem.
#
[[ $threads_per_fs -eq 0 ]] && return
#
# The number of threads per filesystem must be a value greater
# than or equal to zero; since we just verified the value isn't
# 0 above, then it must be greater than zero here.
#
log_must test $threads_per_fs -ge 0
#
# This restriction can be lifted later if needed, but for now,
# we restrict the number of threads per filesystem to a value
# that evenly divides the thread count. This way, the threads
# will be evenly distributed over all the filesystems.
#
log_must test $((threads % threads_per_fs)) -eq 0
}
function populate_perf_filesystems
{
typeset nfilesystems=${1:-1}
export TESTFS=""
for i in $(seq 1 $nfilesystems); do
typeset dataset="$PERFPOOL/fs$i"
create_dataset $dataset $PERF_FS_OPTS
if [[ -z "$TESTFS" ]]; then
TESTFS="$dataset"
else
TESTFS="$TESTFS $dataset"
fi
done
}
function get_nfilesystems
{
typeset filesystems=( $TESTFS )
echo ${#filesystems[@]}
}
function get_directory
{
typeset filesystems=( $TESTFS )
typeset directory=
typeset idx=0
while [[ $idx -lt "${#filesystems[@]}" ]]; do
mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
if [[ -n $directory ]]; then
directory=$directory:$mountpoint
else
directory=$mountpoint
fi
((idx += 1))
done
echo $directory
}
function get_min_arc_size
{
typeset -l min_arc_size
if is_freebsd; then
min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min)
elif is_illumos; then
min_arc_size=$(dtrace -qn 'BEGIN {
printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
exit(0);
}')
elif is_linux; then
min_arc_size=`awk '$1 == "c_min" { print $3 }' \
/proc/spl/kstat/zfs/arcstats`
fi
[[ $? -eq 0 ]] || log_fail "get_min_arc_size failed"
echo $min_arc_size
}
function get_max_arc_size
{
typeset -l max_arc_size
if is_freebsd; then
max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max)
elif is_illumos; then
max_arc_size=$(dtrace -qn 'BEGIN {
printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
exit(0);
}')
elif is_linux; then
max_arc_size=`awk '$1 == "c_max" { print $3 }' \
/proc/spl/kstat/zfs/arcstats`
fi
[[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
echo $max_arc_size
}
function get_arc_target
{
typeset -l arc_c
if is_freebsd; then
arc_c=$(sysctl -n kstat.zfs.misc.arcstats.c)
elif is_illumos; then
arc_c=$(dtrace -qn 'BEGIN {
printf("%u\n", `arc_stats.arcstat_c.value.ui64);
exit(0);
}')
elif is_linux; then
arc_c=`awk '$1 == "c" { print $3 }' \
/proc/spl/kstat/zfs/arcstats`
fi
[[ $? -eq 0 ]] || log_fail "get_arc_target failed"
echo $arc_c
}
function get_dbuf_cache_size
{
typeset -l dbuf_cache_size dbuf_cache_shift
if is_illumos; then
dbuf_cache_size=$(dtrace -qn 'BEGIN {
printf("%u\n", `dbuf_cache_max_bytes);
exit(0);
}')
else
dbuf_cache_shift=$(get_tunable DBUF_CACHE_SHIFT)
dbuf_cache_size=$(($(get_arc_target) / 2**dbuf_cache_shift))
fi
[[ $? -eq 0 ]] || log_fail "get_dbuf_cache_size failed"
echo $dbuf_cache_size
}
# Create a file with some information about how this system is configured.
function get_system_config
{
typeset config=$PERF_DATA_DIR/$1
echo "{" >>$config
if is_linux; then
echo " \"ncpus\": \"$(nproc --all)\"," >>$config
echo " \"physmem\": \"$(free -b | \
awk '$1 == "Mem:" { print $2 }')\"," >>$config
echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config
echo " \"hostname\": \"$(uname -n)\"," >>$config
echo " \"kernel version\": \"$(uname -sr)\"," >>$config
else
dtrace -qn 'BEGIN{
printf(" \"ncpus\": %d,\n", `ncpus);
printf(" \"physmem\": %u,\n", `physmem * `_pagesize);
printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags);
exit(0)}' >>$config
echo " \"hostname\": \"$(uname -n)\"," >>$config
echo " \"kernel version\": \"$(uname -v)\"," >>$config
fi
if is_linux; then
lsblk -dino NAME,SIZE | awk 'BEGIN {
printf(" \"disks\": {\n"); first = 1}
{disk = $1} {size = $2;
if (first != 1) {printf(",\n")} else {first = 0}
printf(" \"%s\": \"%s\"", disk, size)}
END {printf("\n },\n")}' >>$config
zfs_tunables="/sys/module/zfs/parameters"
printf " \"tunables\": {\n" >>$config
for tunable in \
zfs_arc_max \
zfs_arc_meta_limit \
zfs_arc_sys_free \
zfs_dirty_data_max \
zfs_flags \
zfs_prefetch_disable \
zfs_txg_timeout \
zfs_vdev_aggregation_limit \
zfs_vdev_async_read_max_active \
zfs_vdev_async_write_max_active \
zfs_vdev_sync_read_max_active \
zfs_vdev_sync_write_max_active \
zio_slow_io_ms
do
if [ "$tunable" != "zfs_arc_max" ]
then
printf ",\n" >>$config
fi
printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
>>$config
done
printf "\n }\n" >>$config
else
iostat -En | awk 'BEGIN {
printf(" \"disks\": {\n"); first = 1}
/^c/ {disk = $1}
/^Size: [^0]/ {size = $2;
if (first != 1) {printf(",\n")} else {first = 0}
printf(" \"%s\": \"%s\"", disk, size)}
END {printf("\n },\n")}' >>$config
sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1}
{if (first != 1) {printf(",\n")} else {first = 0};
printf(" \"%s\": %s", $1, $2)}
END {printf("\n }\n")}' >>$config
fi
echo "}" >>$config
}
function num_jobs_by_cpu
{
if is_linux; then
typeset ncpu=$($NPROC --all)
else
typeset ncpu=$(psrinfo | $WC -l)
fi
typeset num_jobs=$ncpu
[[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
echo $num_jobs
}
#
# On illumos this looks like: ":sd3:sd4:sd1:sd2:"
#
function pool_to_lun_list
{
typeset pool=$1
typeset ctd ctds devname lun
typeset lun_list=':'
if is_illumos; then
ctds=$(zpool list -v $pool |
awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
for ctd in $ctds; do
# Get the device name as it appears in /etc/path_to_inst
devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
's/\/devices\([^:]*\):.*/\1/p')
# Add a string composed of the driver name and instance
# number to the list for comparison with dev_statname.
lun=$(sed 's/"//g' /etc/path_to_inst | grep \
$devname | awk '{print $3$2}')
lun_list="$lun_list$lun:"
done
elif is_freebsd; then
lun_list+=$(zpool list -HLv $pool | \
awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
{ printf "%s:", $1 }')
elif is_linux; then
ctds=$(zpool list -HLv $pool | \
awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
for ctd in $ctds; do
lun_list="$lun_list$ctd:"
done
fi
echo $lun_list
}
function print_perf_settings
{
echo "PERF_NTHREADS: $PERF_NTHREADS"
echo "PERF_NTHREADS_PER_FS: $PERF_NTHREADS_PER_FS"
echo "PERF_SYNC_TYPES: $PERF_SYNC_TYPES"
echo "PERF_IOSIZES: $PERF_IOSIZES"
}
# Create a perf_data directory to hold performance statistics and
# configuration information.
export PERF_DATA_DIR=$(get_perf_output_dir)
[[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json