Refresh linux-zpios

This commit is contained in:
Brian Behlendorf 2008-12-05 11:32:34 -08:00
parent 9baaa468ac
commit 9ed7c9b6ad
21 changed files with 3618 additions and 18 deletions

View File

@ -1,3 +1 @@
gcc-branch zfs-branch
fix-branch
feature-branch

17
.topmsg
View File

@ -1,19 +1,6 @@
From: Brian Behlendorf <behlendorf1@llnl.gov> From: Brian Behlendorf <behlendorf1@llnl.gov>
Subject: [PATCH] zfs branch Subject: [PATCH] linux zpios
Merged result of all changes which are relevant to both Solaris Linux kernel implementation of PIOS test app.
and Linux builds of the ZFS code. These are changes where there
is a reasonable chance they will be accepted upstream.
Additionally, since this is effectively the root of the linux
ZFS tree the core linux build system is added here. This
includes autogen.sh, configure.ac, m4 macros, some scripts/*,
and makefiles for all the core ZFS components. Linux-only
features which require tweaks to the build system should appear
on the relevant topic branches. All autotools products which
result from autogen.sh are commited to the linux-configure-branch.
This branch also contains the META, ChangeLog, AUTHORS,
README, and GIT files.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>

View File

@ -130,6 +130,7 @@ AC_CONFIG_FILES([ Makefile
zfs/lib/libspl/include/sys/Makefile zfs/lib/libspl/include/sys/Makefile
zfs/lib/libspl/include/Makefile zfs/lib/libspl/include/Makefile
zfs/lib/libspl/Makefile zfs/lib/libspl/Makefile
zfs/lib/libzpios/Makefile
zfs/zcmd/ztest/Makefile zfs/zcmd/ztest/Makefile
zfs/zcmd/Makefile zfs/zcmd/Makefile
zfs/zcmd/zfs/Makefile zfs/zcmd/zfs/Makefile
@ -137,5 +138,6 @@ AC_CONFIG_FILES([ Makefile
zfs/zcmd/zinject/Makefile zfs/zcmd/zinject/Makefile
zfs/zcmd/zdump/Makefile zfs/zcmd/zdump/Makefile
zfs/zcmd/zpool/Makefile zfs/zcmd/zpool/Makefile
zfs/zcmd/zpios/Makefile
]) ])
AC_OUTPUT AC_OUTPUT

View File

@ -0,0 +1,128 @@
#!/bin/bash
# profile-zpios-disk.sh
#
# /proc/diskinfo <after skipping major/minor>
# Field 1 -- device name
# Field 2 -- # of reads issued
# Field 3 -- # of reads merged
# Field 4 -- # of sectors read
# Field 5 -- # of milliseconds spent reading
# Field 6 -- # of writes completed
# Field 7 -- # of writes merged
# Field 8 -- # of sectors written
# Field 9 -- # of milliseconds spent writing
# Field 10 -- # of I/Os currently in progress
# Field 11 -- # of milliseconds spent doing I/Os
# Field 12 -- weighted # of milliseconds spent doing I/Os
RUN_PIDS=${0}
RUN_LOG_DIR=${1}
RUN_ID=${2}
create_table() {
local FIELD=$1
local ROW_M=()
local ROW_N=()
local HEADER=1
local STEP=1
for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do
ROW_M=( ${ROW_N[@]} )
ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` )
if [ $HEADER -eq 1 ]; then
echo -n "step, "
cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", "
echo "total"
HEADER=0
fi
if [ ${#ROW_M[@]} -eq 0 ]; then
continue
fi
if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
echo "Badly formatted profile data in ${DISK_FILE}"
break
fi
TOTAL=0
echo -n "${STEP}, "
for (( i=0; i<${#ROW_N[@]}; i++ )); do
DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
let TOTAL=${TOTAL}+${DELTA}
echo -n "${DELTA}, "
done
echo "${TOTAL}, "
let STEP=${STEP}+1
done
}
create_table_mbs() {
local FIELD=$1
local TIME=$2
local ROW_M=()
local ROW_N=()
local HEADER=1
local STEP=1
for DISK_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/disk-[0-9]*`; do
ROW_M=( ${ROW_N[@]} )
ROW_N=( `cat ${DISK_FILE} | grep sd | cut -c11- | cut -f${FIELD} -d' ' | tr "\n" "\t"` )
if [ $HEADER -eq 1 ]; then
echo -n "step, "
cat ${DISK_FILE} | grep sd | cut -c11- | cut -f1 -d' ' | tr "\n" ", "
echo "total"
HEADER=0
fi
if [ ${#ROW_M[@]} -eq 0 ]; then
continue
fi
if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
echo "Badly formatted profile data in ${DISK_FILE}"
break
fi
TOTAL=0
echo -n "${STEP}, "
for (( i=0; i<${#ROW_N[@]}; i++ )); do
DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
MBS=`echo "scale=2; ((${DELTA}*512)/${TIME})/(1024*1024)" | bc`
TOTAL=`echo "scale=2; ${TOTAL}+${MBS}" | bc`
echo -n "${MBS}, "
done
echo "${TOTAL}, "
let STEP=${STEP}+1
done
}
echo
echo "Reads issued per device"
create_table 2
echo
echo "Reads merged per device"
create_table 3
echo
echo "Sectors read per device"
create_table 4
echo "MB/s per device"
create_table_mbs 4 3
echo
echo "Writes issued per device"
create_table 6
echo
echo "Writes merged per device"
create_table 7
echo
echo "Sectors written per device"
create_table 8
echo "MB/s per device"
create_table_mbs 8 3
exit 0

View File

@ -0,0 +1,130 @@
#!/bin/bash
# profile-zpios-pids.sh
RUN_PIDS=${0}
RUN_LOG_DIR=${1}
RUN_ID=${2}
ROW_M=()
ROW_N=()
ROW_N_SCHED=()
ROW_N_WAIT=()
HEADER=1
STEP=1
for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do
ROW_M=( ${ROW_N[@]} )
ROW_N=( 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 )
ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` )
ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` )
ROW_N_NAMES=( `cat ${PID_FILE} | cut -f2 -d' ' | cut -f2 -d'(' |
cut -f1 -d')' | cut -f1 -d'/' | tr "\n" "\t"` )
for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do
SUM=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc`
case ${ROW_N_NAMES[${i}]} in
zio_taskq) IDX=0;;
zio_req_nul) IDX=1;;
zio_irq_nul) IDX=2;;
zio_req_rd) IDX=3;;
zio_irq_rd) IDX=4;;
zio_req_wr) IDX=5;;
zio_irq_wr) IDX=6;;
zio_req_fr) IDX=7;;
zio_irq_fr) IDX=8;;
zio_req_cm) IDX=9;;
zio_irq_cm) IDX=10;;
zio_req_ctl) IDX=11;;
zio_irq_ctl) IDX=12;;
txg_quiesce) IDX=13;;
txg_sync) IDX=14;;
txg_timelimit) IDX=15;;
arc_reclaim) IDX=16;;
l2arc_feed) IDX=17;;
kpios_io) IDX=18;;
*) continue;;
esac
let ROW_N[${IDX}]=${ROW_N[${IDX}]}+${SUM}
done
if [ $HEADER -eq 1 ]; then
echo "step, zio_taskq, zio_req_nul, zio_irq_nul, " \
"zio_req_rd, zio_irq_rd, zio_req_wr, zio_irq_wr, " \
"zio_req_fr, zio_irq_fr, zio_req_cm, zio_irq_cm, " \
"zio_req_ctl, zio_irq_ctl, txg_quiesce, txg_sync, " \
"txg_timelimit, arc_reclaim, l2arc_feed, kpios_io, " \
"idle"
HEADER=0
fi
if [ ${#ROW_M[@]} -eq 0 ]; then
continue
fi
if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
echo "Badly formatted profile data in ${PID_FILE}"
break
fi
# Original values are in jiffies and we expect HZ to be 1000
# on most 2.6 systems thus we divide by 10 to get a percentage.
IDLE=1000
echo -n "${STEP}, "
for (( i=0; i<${#ROW_N[@]}; i++ )); do
DELTA=`echo "${ROW_N[${i}]}-${ROW_M[${i}]}" | bc`
DELTA_PERCENT=`echo "scale=1; ${DELTA}/10" | bc`
let IDLE=${IDLE}-${DELTA}
echo -n "${DELTA_PERCENT}, "
done
ILDE_PERCENT=`echo "scale=1; ${IDLE}/10" | bc`
echo "${ILDE_PERCENT}"
let STEP=${STEP}+1
done
exit
echo
echo "Percent of total system time per pid"
for PID_FILE in `ls -r --sort=time --time=ctime ${RUN_LOG_DIR}/${RUN_ID}/pids-[0-9]*`; do
ROW_M=( ${ROW_N[@]} )
ROW_N_SCHED=( `cat ${PID_FILE} | cut -f15 -d' ' | tr "\n" "\t"` )
ROW_N_WAIT=( `cat ${PID_FILE} | cut -f17 -d' ' | tr "\n" "\t"` )
for (( i=0; i<${#ROW_N_SCHED[@]}; i++ )); do
ROW_N[${i}]=`echo "${ROW_N_WAIT[${i}]}+${ROW_N_SCHED[${i}]}" | bc`
done
if [ $HEADER -eq 1 ]; then
echo -n "step, "
cat ${PID_FILE} | cut -f2 -d' ' | tr "\n" ", "
echo
HEADER=0
fi
if [ ${#ROW_M[@]} -eq 0 ]; then
continue
fi
if [ ${#ROW_M[@]} -ne ${#ROW_N[@]} ]; then
echo "Badly formatted profile data in ${PID_FILE}"
break
fi
# Original values are in jiffies and we expect HZ to be 1000
# on most 2.6 systems thus we divide by 10 to get a percentage.
echo -n "${STEP}, "
for (( i=0; i<${#ROW_N[@]}; i++ )); do
DELTA=`echo "scale=1; (${ROW_N[${i}]}-${ROW_M[${i}]})/10" | bc`
echo -n "${DELTA}, "
done
echo
let STEP=${STEP}+1
done
exit 0

View File

@ -0,0 +1,67 @@
#!/bin/bash
prog=profile-zpios-post.sh
. ../.script-config
RUN_POST=${0}
RUN_PHASE=${1}
RUN_LOG_DIR=${2}
RUN_ID=${3}
RUN_POOL=${4}
RUN_CHUNK_SIZE=${5}
RUN_REGION_SIZE=${6}
RUN_THREAD_COUNT=${7}
RUN_REGION_COUNT=${8}
RUN_OFFSET=${9}
RUN_REGION_NOISE=${10}
RUN_CHUNK_NOISE=${11}
RUN_THREAD_DELAY=${12}
RUN_FLAGS=${13}
RUN_RESULT=${14}
PROFILE_ZPIOS_PIDS_BIN=/home/behlendo/src/zfs/scripts/profile-zpios-pids.sh
PROFILE_ZPIOS_PIDS_LOG=${RUN_LOG_DIR}/${RUN_ID}/pids-summary.csv
PROFILE_ZPIOS_DISK_BIN=/home/behlendo/src/zfs/scripts/profile-zpios-disk.sh
PROFILE_ZPIOS_DISK_LOG=${RUN_LOG_DIR}/${RUN_ID}/disk-summary.csv
PROFILE_ZPIOS_ARC_LOG=${RUN_LOG_DIR}/${RUN_ID}/arcstats
PROFILE_ZPIOS_VDEV_LOG=${RUN_LOG_DIR}/${RUN_ID}/vdev_cache_stats
KERNEL_BIN="/lib/modules/`uname -r`/kernel/"
SPL_BIN="${SPLBUILD}/modules/spl/"
ZFS_BIN="${ZFSBUILD}/lib/"
OPROFILE_SHORT_ARGS="-a -g -l -p ${KERNEL_BIN},${SPL_BIN},${ZFS_BIN}"
OPROFILE_LONG_ARGS="-d -a -g -l -p ${KERNEL_BIN},${SPL_BIN},${ZFS_BIN}"
OPROFILE_LOG=${RUN_LOG_DIR}/${RUN_ID}/oprofile.txt
OPROFILE_SHORT_LOG=${RUN_LOG_DIR}/${RUN_ID}/oprofile-short.txt
OPROFILE_LONG_LOG=${RUN_LOG_DIR}/${RUN_ID}/oprofile-long.txt
PROFILE_PID=${RUN_LOG_DIR}/${RUN_ID}/pid
if [ "${RUN_PHASE}" != "post" ]; then
exit 1
fi
# opcontrol --stop >>${OPROFILE_LOG} 2>&1
# opcontrol --dump >>${OPROFILE_LOG} 2>&1
kill -s SIGHUP `cat ${PROFILE_PID}`
rm -f ${PROFILE_PID}
# opreport ${OPROFILE_SHORT_ARGS} >${OPROFILE_SHORT_LOG} 2>&1
# opreport ${OPROFILE_LONG_ARGS} >${OPROFILE_LONG_LOG} 2>&1
# opcontrol --deinit >>${OPROFILE_LOG} 2>&1
cat /proc/spl/kstat/zfs/arcstats >${PROFILE_ZPIOS_ARC_LOG}
cat /proc/spl/kstat/zfs/vdev_cache_stats >${PROFILE_ZPIOS_VDEV_LOG}
# Summarize system time per pid
${PROFILE_ZPIOS_PIDS_BIN} ${RUN_LOG_DIR} ${RUN_ID} >${PROFILE_ZPIOS_PIDS_LOG}
# Summarize per device performance
${PROFILE_ZPIOS_DISK_BIN} ${RUN_LOG_DIR} ${RUN_ID} >${PROFILE_ZPIOS_DISK_LOG}
exit 0

View File

@ -0,0 +1,69 @@
#!/bin/bash
# profile-zpios-pre.sh
trap "PROFILE_ZPIOS_READY=1" SIGHUP
RUN_PRE=${0}
RUN_PHASE=${1}
RUN_LOG_DIR=${2}
RUN_ID=${3}
RUN_POOL=${4}
RUN_CHUNK_SIZE=${5}
RUN_REGION_SIZE=${6}
RUN_THREAD_COUNT=${7}
RUN_REGION_COUNT=${8}
RUN_OFFSET=${9}
RUN_REGION_NOISE=${10}
RUN_CHUNK_NOISE=${11}
RUN_THREAD_DELAY=${12}
RUN_FLAGS=${13}
RUN_RESULT=${14}
PROFILE_ZPIOS_BIN=/home/behlendo/src/zfs/scripts/profile-zpios.sh
PROFILE_ZPIOS_READY=0
OPROFILE_LOG=${RUN_LOG_DIR}/${RUN_ID}/oprofile.txt
PROFILE_PID=${RUN_LOG_DIR}/${RUN_ID}/pid
RUN_ARGS=${RUN_LOG_DIR}/${RUN_ID}/args
if [ "${RUN_PHASE}" != "pre" ]; then
exit 1
fi
rm -Rf ${RUN_LOG_DIR}/${RUN_ID}/
mkdir -p ${RUN_LOG_DIR}/${RUN_ID}/
echo "PHASE=${RUN_PHASE}" >>${RUN_ARGS}
echo "LOG_DIR=${RUN_LOG_DIR}" >>${RUN_ARGS}
echo "ID=${RUN_ID}" >>${RUN_ARGS}
echo "POOL=${RUN_POOL}" >>${RUN_ARGS}
echo "CHUNK_SIZE=${RUN_CHUNK_SIZE}" >>${RUN_ARGS}
echo "REGION_SIZE=${RUN_REGION_SIZE}" >>${RUN_ARGS}
echo "THREAD_COUNT=${RUN_THREAD_COUNT}" >>${RUN_ARGS}
echo "REGION_COUNT=${RUN_REGION_COUNT}" >>${RUN_ARGS}
echo "OFFSET=${RUN_OFFSET}" >>${RUN_ARGS}
echo "REGION_NOISE=${RUN_REGION_NOISE}" >>${RUN_ARGS}
echo "CHUNK_NOISE=${RUN_CHUNK_NOISE}" >>${RUN_ARGS}
echo "THREAD_DELAY=${RUN_THREAD_DELAY}" >>${RUN_ARGS}
echo "FLAGS=${RUN_FLAGS}" >>${RUN_ARGS}
echo "RESULT=${RUN_RESULT}" >>${RUN_ARGS}
# XXX: Oprofile support seems to be broken when I try and start
# it via a user mode helper script, I suspect the setup is failing.
# opcontrol --init >>${OPROFILE_LOG} 2>&1
# opcontrol --setup --vmlinux=/boot/vmlinux >>${OPROFILE_LOG} 2>&1
# Start the profile script
${PROFILE_ZPIOS_BIN} ${RUN_PHASE} ${RUN_LOG_DIR} ${RUN_ID} &
echo "$!" >${PROFILE_PID}
# Sleep waiting for profile script to be ready, it will
# signal us via SIGHUP when it is ready to start profiling.
while [ ${PROFILE_ZPIOS_READY} -eq 0 ]; do
sleep 0.1
done
# opcontrol --start-daemon >>${OPROFILE_LOG} 2>&1
# opcontrol --start >>${OPROFILE_LOG} 2>&1
exit 0

222
scripts/profile-zpios.sh Normal file
View File

@ -0,0 +1,222 @@
#!/bin/bash
# profile-zpios.sh
trap "RUN_DONE=1" SIGHUP
RUN_PHASE=${1}
RUN_LOG_DIR=${2}
RUN_ID=${3}
RUN_DONE=0
POLL_INTERVAL=2.99
# Log these pids, the exact pid numbers will vary from system to system
# so I harvest pid for all the following type of processes from /proc/<pid>/
#
# zio_taskq/#
# spa_zio_issue/#
# spa_zio_intr/#
# txg_quiesce_thr
# txg_sync_thread
# txg_timelimit_t
# arc_reclaim_thr
# l2arc_feed_thre
# kpios_io/#
ZIO_TASKQ_PIDS=()
ZIO_REQ_NUL_PIDS=()
ZIO_IRQ_NUL_PIDS=()
ZIO_REQ_RD_PIDS=()
ZIO_IRQ_RD_PIDS=()
ZIO_REQ_WR_PIDS=()
ZIO_IRQ_WR_PIDS=()
ZIO_REQ_FR_PIDS=()
ZIO_IRQ_FR_PIDS=()
ZIO_REQ_CM_PIDS=()
ZIO_IRQ_CM_PIDS=()
ZIO_REQ_CTL_PIDS=()
ZIO_IRQ_CTL_PIDS=()
TXG_QUIESCE_PIDS=()
TXG_SYNC_PIDS=()
TXG_TIMELIMIT_PIDS=()
ARC_RECLAIM_PIDS=()
L2ARC_FEED_PIDS=()
KPIOS_IO_PIDS=()
show_pids() {
echo "* zio_taskq: { ${ZIO_TASKQ_PIDS[@]} } = ${#ZIO_TASKQ_PIDS[@]}"
echo "* zio_req_nul: { ${ZIO_REQ_NUL_PIDS[@]} } = ${#ZIO_REQ_NUL_PIDS[@]}"
echo "* zio_irq_nul: { ${ZIO_IRQ_NUL_PIDS[@]} } = ${#ZIO_IRQ_NUL_PIDS[@]}"
echo "* zio_req_rd: { ${ZIO_REQ_RD_PIDS[@]} } = ${#ZIO_REQ_RD_PIDS[@]}"
echo "* zio_irq_rd: { ${ZIO_IRQ_RD_PIDS[@]} } = ${#ZIO_IRQ_RD_PIDS[@]}"
echo "* zio_req_wr: { ${ZIO_REQ_WR_PIDS[@]} } = ${#ZIO_REQ_WR_PIDS[@]}"
echo "* zio_irq_wr: { ${ZIO_IRQ_WR_PIDS[@]} } = ${#ZIO_IRQ_WR_PIDS[@]}"
echo "* zio_req_fr: { ${ZIO_REQ_FR_PIDS[@]} } = ${#ZIO_REQ_FR_PIDS[@]}"
echo "* zio_irq_fr: { ${ZIO_IRQ_FR_PIDS[@]} } = ${#ZIO_IRQ_FR_PIDS[@]}"
echo "* zio_req_cm: { ${ZIO_REQ_CM_PIDS[@]} } = ${#ZIO_REQ_CM_PIDS[@]}"
echo "* zio_irq_cm: { ${ZIO_IRQ_CM_PIDS[@]} } = ${#ZIO_IRQ_CM_PIDS[@]}"
echo "* zio_req_ctl: { ${ZIO_REQ_CTL_PIDS[@]} } = ${#ZIO_REQ_CTL_PIDS[@]}"
echo "* zio_irq_ctl: { ${ZIO_IRQ_CTL_PIDS[@]} } = ${#ZIO_IRQ_CTL_PIDS[@]}"
echo "* txg_quiesce: { ${TXG_QUIESCE_PIDS[@]} } = ${#TXG_QUIESCE_PIDS[@]}"
echo "* txg_sync: { ${TXG_SYNC_PIDS[@]} } = ${#TXG_SYNC_PIDS[@]}"
echo "* txg_timelimit: { ${TXG_TIMELIMIT_PIDS[@]} } = ${#TXG_TIMELIMIT_PIDS[@]}"
echo "* arc_reclaim: { ${ARC_RECLAIM_PIDS[@]} } = ${#ARC_RECLAIM_PIDS[@]}"
echo "* l2arc_feed: { ${L2ARC_FEED_PIDS[@]} } = ${#L2ARC_FEED_PIDS[@]}"
echo "* kpios_io: { ${KPIOS_IO_PIDS[@]} } = ${#KPIOS_IO_PIDS[@]}"
}
check_pid() {
local PID=$1
local NAME=$2
local TYPE=$3
local PIDS=( "$4" )
local NAME_STRING=`echo ${NAME} | cut -f1 -d'/'`
local NAME_NUMBER=`echo ${NAME} | cut -f2 -d'/'`
if [ "${NAME_STRING}" == "${TYPE}" ]; then
if [ -n "${NAME_NUMBER}" ]; then
PIDS[${NAME_NUMBER}]=${PID}
else
PIDS[${#PIDS[@]}]=${PID}
fi
fi
echo "${PIDS[@]}"
}
# NOTE: This whole process is crazy slow but it will do for now
aquire_pids() {
echo "--- Aquiring ZFS pids ---"
for PID in `ls /proc/ | grep [0-9] | sort -n -u`; do
if [ ! -e /proc/${PID}/status ]; then
continue
fi
NAME=`cat /proc/${PID}/status | head -n1 | cut -f2`
ZIO_TASKQ_PIDS=( `check_pid ${PID} ${NAME} "zio_taskq" \
"$(echo "${ZIO_TASKQ_PIDS[@]}")"` )
ZIO_REQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_nul" \
"$(echo "${ZIO_REQ_NUL_PIDS[@]}")"` )
ZIO_IRQ_NUL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_nul" \
"$(echo "${ZIO_IRQ_NUL_PIDS[@]}")"` )
ZIO_REQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_req_rd" \
"$(echo "${ZIO_REQ_RD_PIDS[@]}")"` )
ZIO_IRQ_RD_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_rd" \
"$(echo "${ZIO_IRQ_RD_PIDS[@]}")"` )
ZIO_REQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_wr" \
"$(echo "${ZIO_REQ_WR_PIDS[@]}")"` )
ZIO_IRQ_WR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_wr" \
"$(echo "${ZIO_IRQ_WR_PIDS[@]}")"` )
ZIO_REQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_req_fr" \
"$(echo "${ZIO_REQ_FR_PIDS[@]}")"` )
ZIO_IRQ_FR_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_fr" \
"$(echo "${ZIO_IRQ_FR_PIDS[@]}")"` )
ZIO_REQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_req_cm" \
"$(echo "${ZIO_REQ_CM_PIDS[@]}")"` )
ZIO_IRQ_CM_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_cm" \
"$(echo "${ZIO_IRQ_CM_PIDS[@]}")"` )
ZIO_REQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_req_ctl" \
"$(echo "${ZIO_REQ_CTL_PIDS[@]}")"` )
ZIO_IRQ_CTL_PIDS=( `check_pid ${PID} ${NAME} "zio_irq_ctl" \
"$(echo "${ZIO_IRQ_CTL_PIDS[@]}")"` )
TXG_QUIESCE_PIDS=( `check_pid ${PID} ${NAME} "txg_quiesce" \
"$(echo "${TXG_QUIESCE_PIDS[@]}")"` )
TXG_SYNC_PIDS=( `check_pid ${PID} ${NAME} "txg_sync" \
"$(echo "${TXG_SYNC_PIDS[@]}")"` )
TXG_TIMELIMIT_PIDS=( `check_pid ${PID} ${NAME} "txg_timelimit" \
"$(echo "${TXG_TIMELIMIT_PIDS[@]}")"` )
ARC_RECLAIM_PIDS=( `check_pid ${PID} ${NAME} "arc_reclaim" \
"$(echo "${ARC_RECLAIM_PIDS[@]}")"` )
L2ARC_FEED_PIDS=( `check_pid ${PID} ${NAME} "l2arc_feed" \
"$(echo "${L2ARC_FEED_PIDS[@]}")"` )
done
# Wait for kpios_io threads to start
kill -s SIGHUP ${PPID}
echo "* Waiting for kpios_io threads to start"
while [ ${RUN_DONE} -eq 0 ]; do
KPIOS_IO_PIDS=( `ps ax | grep kpios_io | grep -v grep | \
sed 's/^ *//g' | cut -f1 -d' '` )
if [ ${#KPIOS_IO_PIDS[@]} -gt 0 ]; then
break;
fi
sleep 0.1
done
echo "`show_pids`" >${RUN_LOG_DIR}/${RUN_ID}/pids.txt
}
log_pids() {
echo "--- Logging ZFS profile to ${RUN_LOG_DIR}/${RUN_ID}/ ---"
ALL_PIDS=( ${ZIO_TASKQ_PIDS[@]} \
${ZIO_REQ_NUL_PIDS[@]} \
${ZIO_IRQ_NUL_PIDS[@]} \
${ZIO_REQ_RD_PID[@]} \
${ZIO_IRQ_RD_PIDS[@]} \
${ZIO_REQ_WR_PIDS[@]} \
${ZIO_IRQ_WR_PIDS[@]} \
${ZIO_REQ_FR_PIDS[@]} \
${ZIO_IRQ_FR_PIDS[@]} \
${ZIO_REQ_CM_PIDS[@]} \
${ZIO_IRQ_CM_PIDS[@]} \
${ZIO_REQ_CTL_PIDS[@]} \
${ZIO_IRQ_CTL_PIDS[@]} \
${TXG_QUIESCE_PIDS[@]} \
${TXG_SYNC_PIDS[@]} \
${TXG_TIMELIMIT_PIDS[@]} \
${ARC_RECLAIM_PIDS[@]} \
${L2ARC_FEED_PIDS[@]} \
${KPIOS_IO_PIDS[@]} )
while [ ${RUN_DONE} -eq 0 ]; do
NOW=`date +%s.%N`
LOG_PIDS="${RUN_LOG_DIR}/${RUN_ID}/pids-${NOW}"
LOG_DISK="${RUN_LOG_DIR}/${RUN_ID}/disk-${NOW}"
for PID in "${ALL_PIDS[@]}"; do
if [ -z ${PID} ]; then
continue;
fi
if [ -e /proc/${PID}/stat ]; then
cat /proc/${PID}/stat | head -n1 >>${LOG_PIDS}
else
echo "<${PID} exited>" >>${LOG_PIDS}
fi
done
cat /proc/diskstats >${LOG_DISK}
NOW2=`date +%s.%N`
DELTA=`echo "${POLL_INTERVAL}-(${NOW2}-${NOW})" | bc`
sleep ${DELTA}
done
}
aquire_pids
log_pids
exit 0

102
scripts/survey.sh Normal file
View File

@ -0,0 +1,102 @@
#!/bin/bash
prog=survey.sh
. ../.script-config
LOG=/home/`whoami`/zpios-logs/`uname -r`/zpios-`date +%Y%m%d`/
mkdir -p ${LOG}
# Apply all tunings described below to generate some best case
# numbers for what is acheivable with some more elbow grease.
NAME="prefetch+zerocopy+checksum+pending1024+kmem"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"zfs_prefetch_disable=1 zfs_vdev_max_pending=1024 zio_bulk_flags=0x100" \
"--zerocopy" \
${LOG}/${NAME}/ \
"${CMDDIR}/zfs/zfs set checksum=off lustre" | \
tee ${LOG}/${NAME}.txt
# Baseline number for an out of the box config with no manual tuning.
# Ideally, we will want things to be automatically tuned and for this
# number to approach the tweaked out results above.
NAME="baseline"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"" \
"" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
# Disable ZFS's prefetching. For some reason still not clear to me
# current prefetching policy is quite bad for a random workload.
# Allow the algorithm to detect a random workload and not do anything
# may be the way to address this issue.
NAME="prefetch"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"zfs_prefetch_disable=1" \
"" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
# As expected, simulating a zerocopy IO path improves performance
# by freeing up lots of CPU which is wasted move data between buffers.
NAME="zerocopy"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"" \
"--zerocopy" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
# Disabling checksumming should show some (if small) improvement
# simply due to freeing up a modest amount of CPU.
NAME="checksum"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"" \
"" \
${LOG}/${NAME}/ \
"${CMDDIR}/zfs/zfs set checksum=off lustre" | \
tee ${LOG}/${NAME}.txt
# Increasing the pending IO depth also seems to improve things likely
# at the expense of latency. This should be exported more because I'm
# seeing a much bigger impact there that I would have expected. There
# may be some low hanging fruit to be found here.
NAME="pending"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"zfs_vdev_max_pending=1024" \
"" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt
# To avoid memory fragmentation issues our slab implementation can be
# based on a virtual address space. Interestingly, we take a pretty
# substantial performance penalty for this somewhere in the low level
# IO drivers. If we back the slab with kmem pages we see far better
# read performance numbers at the cost of memory fragmention and general
# system instability due to large allocations. This may be because of
# an optimization in the low level drivers due to the contigeous kmem
# based memory. This needs to be explained. The good news here is that
# with zerocopy interfaces added at the DMU layer we could gaurentee
# kmem based memory for a pool of pages.
#
# 0x100 = KMC_KMEM - Force kmem_* based slab
# 0x200 = KMC_VMEM - Force vmem_* based slab
NAME="kmem"
echo "----------------------- ${NAME} ------------------------------"
./zpios.sh \
"" \
"zio_bulk_flags=0x100" \
"" \
${LOG}/${NAME}/ | \
tee ${LOG}/${NAME}.txt

133
scripts/zpios.sh Normal file
View File

@ -0,0 +1,133 @@
#!/bin/bash
prog=zpios.sh
. ../.script-config
SPL_OPTIONS="spl_debug_mask=0 spl_debug_subsys=0 ${1}"
ZPOOL_OPTIONS=$2
ZPIOS_OPTIONS=$3
PROFILE_ZPIOS_LOGS=$4
ZPIOS_PRE=$5
ZPIOS_POST=$6
PROFILE_ZPIOS_PRE=/home/behlendo/src/zfs/scripts/profile-zpios-pre.sh
PROFILE_ZPIOS_POST=/home/behlendo/src/zfs/scripts/profile-zpios-post.sh
DEVICES="/dev/hda"
echo ------------------------- ZFS TEST LOG ---------------------------------
echo -n "Date = "; date
echo -n "Kernel = "; uname -r
echo ------------------------------------------------------------------------
echo
./load-zfs.sh "${SPL_OPTIONS}" "${ZPOOL_OPTIONS}"
echo ---------------------- SPL Sysctl Tunings ------------------------------
sysctl -A | grep spl
echo
echo ------------------- SPL/ZPOOL Module Tunings ---------------------------
if [ -d /sys/module/spl/parameters ]; then
grep [0-9] /sys/module/spl/parameters/*
grep [0-9] /sys/module/zpool/parameters/*
else
grep [0-9] /sys/module/spl/*
grep [0-9] /sys/module/zpool/*
fi
echo
echo "${CMDDIR}/zpool/zpool create -f lustre ${DEVICES}"
${CMDDIR}/zpool/zpool create -f lustre ${DEVICES}
echo "${CMDDIR}/zpool/zpool status lustre"
${CMDDIR}/zpool/zpool status lustre
echo "Waiting for /dev/zpios to come up..."
while [ ! -c /dev/zpios ]; do
sleep 1
done
if [ -n "${ZPIOS_PRE}" ]; then
${ZPIOS_PRE}
fi
# Usage: zpios
# --chunksize -c =values
# --chunksize_low -a =value
# --chunksize_high -b =value
# --chunksize_incr -g =value
# --offset -o =values
# --offset_low -m =value
# --offset_high -q =value
# --offset_incr -r =value
# --regioncount -n =values
# --regioncount_low -i =value
# --regioncount_high -j =value
# --regioncount_incr -k =value
# --threadcount -t =values
# --threadcount_low -l =value
# --threadcount_high -h =value
# --threadcount_incr -e =value
# --regionsize -s =values
# --regionsize_low -A =value
# --regionsize_high -B =value
# --regionsize_incr -C =value
# --cleanup -x
# --verify -V
# --zerocopy -z
# --threaddelay -T =jiffies
# --regionnoise -I =shift
# --chunknoise -N =bytes
# --prerun -P =pre-command
# --postrun -R =post-command
# --log -G =log directory
# --pool | --path -p =pool name
# --load -L =dmuio
# --help -? =this help
# --verbose -v =increase verbosity
# --prerun=${PROFILE_ZPIOS_PRE} \
# --postrun=${PROFILE_ZPIOS_POST} \
CMD="${CMDDIR}/zpios/zpios \
--load=dmuio \
--path=lustre \
--chunksize=1M \
--regionsize=4M \
--regioncount=64 \
--threadcount=4 \
--offset=4M \
--cleanup \
--verbose \
--human-readable \
${ZPIOS_OPTIONS} \
--log=${PROFILE_ZPIOS_LOGS}"
echo
date
echo ${CMD}
$CMD
date
if [ -n "${ZPIOS_POST}" ]; then
${ZPIOS_POST}
fi
${CMDDIR}/zpool/zpool destroy lustre
echo ---------------------- SPL Sysctl Tunings ------------------------------
sysctl -A | grep spl
echo
echo ------------------------ KSTAT Statistics ------------------------------
echo ARCSTATS
cat /proc/spl/kstat/zfs/arcstats
echo
echo VDEV_CACHE_STATS
cat /proc/spl/kstat/zfs/vdev_cache_stats
echo
echo SLAB
cat /proc/spl/kmem/slab
echo
./unload-zfs.sh

View File

@ -8,3 +8,5 @@ subdir-m += libzpool # Kernel | User SPA/DMU/ZVOL/ZPL
subdir-m += libavl # Kernel &| User space AVL tree support subdir-m += libavl # Kernel &| User space AVL tree support
subdir-m += libnvpair # Kernel &| User space name/value support subdir-m += libnvpair # Kernel &| User space name/value support
subdir-m += libzcommon # Kernel &| User space common support subdir-m += libzcommon # Kernel &| User space common support
subdir-m += libzpios # Kernel DMU test app

View File

@ -0,0 +1,17 @@
DISTFILES = Makefile.in *.c
MODULE := zpios
# Compile as kernel module. Needed symlinks created for all
# k* objects created by top level configure script.
EXTRA_CFLAGS = @KERNELCPPFLAGS@
EXTRA_CFLAGS += -I@LIBDIR@/libzpios/include
EXTRA_CFLAGS += -I@LIBDIR@/libzcommon/include
EXTRA_CFLAGS += -I@LIBDIR@/libport/include
EXTRA_CFLAGS += -I@LIBDIR@/libavl/include
EXTRA_CFLAGS += -I@LIBDIR@/libnvpair/include
obj-m := ${MODULE}.o
${MODULE}-objs += kpios.o # Kernel PIOS test case

View File

@ -0,0 +1 @@
DISTFILES = kpios-ctl.h kpios-internal.h

View File

@ -0,0 +1,120 @@
/*
* This file is part of the ZFS Linux port.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* LLNL-CODE-403049
*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
#ifndef _KPIOS_CTL_H
#define _KPIOS_CTL_H
/* Contains shared definitions which both the userspace
* and kernelspace portions of kpios must agree on.
*/
#ifndef _KERNEL
#include <stdint.h>
#endif
#define KPIOS_MAJOR 232 /* XXX - Arbitrary */
#define KPIOS_MINORS 1
#define KPIOS_DEV "/dev/kpios"
#define DMU_IO 0x01
#define DMU_WRITE 0x01
#define DMU_READ 0x02
#define DMU_VERIFY 0x04
#define DMU_REMOVE 0x08
#define DMU_FPP 0x10
#define DMU_WRITE_ZC 0x20 /* Incompatible with DMU_VERIFY */
#define DMU_READ_ZC 0x40 /* Incompatible with DMU_VERIFY */
#define KPIOS_NAME_SIZE 16
#define KPIOS_PATH_SIZE 128
#define PHASE_PRE "pre"
#define PHASE_POST "post"
#define PHASE_WRITE "write"
#define PHASE_READ "read"
#define KPIOS_CFG_MAGIC 0x87237190U
typedef struct kpios_cfg {
uint32_t cfg_magic; /* Unique magic */
int32_t cfg_cmd; /* Config command */
int32_t cfg_arg1; /* Config command arg 1 */
int32_t cfg_rc1; /* Config response 1 */
} kpios_cfg_t;
typedef struct kpios_time {
struct timespec start;
struct timespec stop;
struct timespec delta;
} kpios_time_t;
typedef struct kpios_stats {
kpios_time_t total_time;
kpios_time_t cr_time;
kpios_time_t rm_time;
kpios_time_t wr_time;
kpios_time_t rd_time;
uint64_t wr_data;
uint64_t wr_chunks;
uint64_t rd_data;
uint64_t rd_chunks;
} kpios_stats_t;
#define KPIOS_CMD_MAGIC 0x49715385U
typedef struct kpios_cmd {
uint32_t cmd_magic; /* Unique magic */
uint32_t cmd_id; /* Run ID */
char cmd_pool[KPIOS_NAME_SIZE]; /* Pool name */
uint64_t cmd_chunk_size; /* Chunk size */
uint32_t cmd_thread_count; /* Thread count */
uint32_t cmd_region_count; /* Region count */
uint64_t cmd_region_size; /* Region size */
uint64_t cmd_offset; /* Region offset */
uint32_t cmd_region_noise; /* Region noise */
uint32_t cmd_chunk_noise; /* Chunk noise */
uint32_t cmd_thread_delay; /* Thread delay */
uint32_t cmd_flags; /* Test flags */
char cmd_pre[KPIOS_PATH_SIZE]; /* Pre-exec hook */
char cmd_post[KPIOS_PATH_SIZE]; /* Post-exec hook */
char cmd_log[KPIOS_PATH_SIZE]; /* Requested log dir */
uint64_t cmd_data_size; /* Opaque data size */
char cmd_data_str[0]; /* Opaque data region */
} kpios_cmd_t;
/* Valid ioctls */
#define KPIOS_CFG _IOWR('f', 101, long)
#define KPIOS_CMD _IOWR('f', 102, long)
/* Valid configuration commands */
#define KPIOS_CFG_BUFFER_CLEAR 0x001 /* Clear text buffer */
#define KPIOS_CFG_BUFFER_SIZE 0x002 /* Resize text buffer */
#endif /* _KPIOS_CTL_H */

View File

@ -0,0 +1,137 @@
/*
* This file is part of the ZFS Linux port.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* LLNL-CODE-403049
*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
#ifndef _KPIOS_INTERNAL_H
#define _KPIOS_INTERNAL_H
#include "kpios-ctl.h"
#define OBJ_SIZE 64
struct run_args;
typedef struct dmu_obj {
objset_t *os;
uint64_t obj;
} dmu_obj_t;
/* thread doing the IO data */
typedef struct thread_data {
struct run_args *run_args;
int thread_no;
int rc;
kpios_stats_t stats;
kmutex_t lock;
} thread_data_t;
/* region for IO data */
typedef struct kpios_region {
__u64 wr_offset;
__u64 rd_offset;
__u64 init_offset;
__u64 max_offset;
dmu_obj_t obj;
kpios_stats_t stats;
kmutex_t lock;
} kpios_region_t;
/* arguments for one run */
typedef struct run_args {
/* Config args */
int id;
char pool[KPIOS_NAME_SIZE];
__u64 chunk_size;
__u32 thread_count;
__u32 region_count;
__u64 region_size;
__u64 offset;
__u32 region_noise;
__u32 chunk_noise;
__u32 thread_delay;
__u32 flags;
char pre[KPIOS_PATH_SIZE];
char post[KPIOS_PATH_SIZE];
char log[KPIOS_PATH_SIZE];
/* Control data */
objset_t *os;
wait_queue_head_t waitq;
volatile uint64_t threads_done;
kmutex_t lock_work;
kmutex_t lock_ctl;
__u32 region_next;
/* Results data */
struct file *file;
kpios_stats_t stats;
thread_data_t **threads;
kpios_region_t regions[0]; /* Must be last element */
} run_args_t;
#define KPIOS_INFO_BUFFER_SIZE 65536
#define KPIOS_INFO_BUFFER_REDZONE 1024
typedef struct kpios_info {
spinlock_t info_lock;
int info_size;
char *info_buffer;
char *info_head; /* Internal kernel use only */
} kpios_info_t;
#define kpios_print(file, format, args...) \
({ kpios_info_t *_info_ = (kpios_info_t *)file->private_data; \
int _rc_; \
\
ASSERT(_info_); \
ASSERT(_info_->info_buffer); \
\
spin_lock(&_info_->info_lock); \
\
/* Don't allow the kernel to start a write in the red zone */ \
if ((int)(_info_->info_head - _info_->info_buffer) > \
(_info_->info_size - KPIOS_INFO_BUFFER_REDZONE)) { \
_rc_ = -EOVERFLOW; \
} else { \
_rc_ = sprintf(_info_->info_head, format, args); \
if (_rc_ >= 0) \
_info_->info_head += _rc_; \
} \
\
spin_unlock(&_info_->info_lock); \
_rc_; \
})
#define kpios_vprint(file, test, format, args...) \
kpios_print(file, "%*s: " format, KPIOS_NAME_SIZE, test, args)
#endif /* _KPIOS_INTERNAL_H */

1295
zfs/lib/libzpios/kpios.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -4,3 +4,4 @@ subdir-m += zpool
#subdir-m += ztest #subdir-m += ztest
#subdir-m += zdump #subdir-m += zdump
#subdir-m += zinject #subdir-m += zinject
subdir-m += zpios

View File

@ -0,0 +1,13 @@
DISTFILES = Makefile.in *.c zpios.h
CMD := zpios
HOSTCFLAGS += @HOSTCFLAGS@
HOSTCFLAGS += -I@CMDDIR@/lzpios
HOSTCFLAGS += -I@LIBDIR@/libzpios/include
hostprogs-y := ${CMD}
always := $(hostprogs-y)
${CMD}-objs += zpios_main.o
${CMD}-objs += zpios_util.o

117
zfs/zcmd/zpios/zpios.h Normal file
View File

@ -0,0 +1,117 @@
/*
* This file is part of the ZFS Linux port.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* LLNL-CODE-403049
*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
#ifndef _ZPIOS_H
#define _ZPIOS_H
#include <kpios-ctl.h>
#define VERSION_SIZE 64
/* Regular expressions */
#define REGEX_NUMBERS "^[0-9]*[0-9]$"
#define REGEX_NUMBERS_COMMA "^([0-9]+,)*[0-9]+$"
#define REGEX_SIZE "^[0-9][0-9]*[kmgt]$"
#define REGEX_SIZE_COMMA "^([0-9][0-9]*[kmgt]+,)*[0-9][0-9]*[kmgt]$"
/* Flags for low, high, incr */
#define FLAG_SET 0x01
#define FLAG_LOW 0x02
#define FLAG_HIGH 0x04
#define FLAG_INCR 0x08
#define TRUE 1
#define FALSE 0
#define KB (1024)
#define MB (KB * 1024)
#define GB (MB * 1024)
#define TB (GB * 1024)
/* All offsets, sizes and counts can be passed to the application in
* multiple ways.
* 1. a value (stored in val[0], val_count will be 1)
* 2. a comma separated list of values (stored in val[], using val_count)
* 3. a range and block sizes, low, high, factor (val_count must be 0)
*/
typedef struct pios_range_repeat {
uint64_t val[32]; /* Comma sep array, or low, high, inc */
uint64_t val_count; /* Num of values */
uint64_t val_low;
uint64_t val_high;
uint64_t val_inc_perc;
uint64_t next_val; /* Used for multiple runs in get_next() */
} range_repeat_t;
typedef struct cmd_args {
range_repeat_t T; /* Thread count */
range_repeat_t N; /* Region count */
range_repeat_t O; /* Offset count */
range_repeat_t C; /* Chunksize */
range_repeat_t S; /* Regionsize */
const char *pool; /* Pool */
uint32_t flags; /* Flags */
uint32_t io_type; /* DMUIO only */
uint32_t verbose; /* Verbose */
uint32_t human_readable; /* Human readable output */
uint64_t regionnoise; /* Region noise */
uint64_t chunknoise; /* Chunk noise */
uint64_t thread_delay; /* Thread delay */
char pre[KPIOS_PATH_SIZE]; /* Pre-exec hook */
char post[KPIOS_PATH_SIZE]; /* Post-exec hook */
char log[KPIOS_PATH_SIZE]; /* Requested log dir */
/* Control */
int current_id;
uint64_t current_T;
uint64_t current_N;
uint64_t current_C;
uint64_t current_S;
uint64_t current_O;
uint32_t rc;
} cmd_args_t;
int set_count(char *pattern1, char *pattern2, range_repeat_t *range,
char *optarg, uint32_t *flags, char *arg);
int set_lhi(char *pattern, range_repeat_t *range, char *optarg,
int flag, uint32_t *flag_thread, char *arg);
int set_noise(uint64_t *noise, char *optarg, char *arg);
int set_load_params(cmd_args_t *args, char *optarg);
int check_mutual_exclusive_command_lines(uint32_t flag, char *arg);
void print_stats_header(void);
void print_stats(cmd_args_t *args, kpios_cmd_t *cmd);
#endif /* _ZPIOS_H */

619
zfs/zcmd/zpios/zpios_main.c Normal file
View File

@ -0,0 +1,619 @@
/*
* This file is part of the ZFS Linux port.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* LLNL-CODE-403049
*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Kernel PIOS DMU implemenation originally derived from PIOS test code.
* Character control interface derived from SPL code.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <getopt.h>
#include <assert.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include "zpios.h"
static const char short_opt[] = "t:l:h:e:n:i:j:k:c:u:a:b:g:L:P:R:I:"
"G:T:Vzs:A:B:C:o:m:q:r:fwxdp:v?";
static const struct option long_opt[] = {
{"chunksize", required_argument, 0, 'c' },
{"chunksize_low", required_argument, 0, 'a' },
{"chunksize_high", required_argument, 0, 'b' },
{"chunksize_incr", required_argument, 0, 'g' },
{"offset", required_argument, 0, 'o' },
{"offset_low", required_argument, 0, 'm' },
{"offset_high", required_argument, 0, 'q' },
{"offset_incr", required_argument, 0, 'r' },
{"regioncount", required_argument, 0, 'n' },
{"regioncount_low", required_argument, 0, 'i' },
{"regioncount_high", required_argument, 0, 'j' },
{"regioncount_incr", required_argument, 0, 'k' },
{"threadcount", required_argument, 0, 't' },
{"threadcount_low", required_argument, 0, 'l' },
{"threadcount_high", required_argument, 0, 'h' },
{"threadcount_incr", required_argument, 0, 'e' },
{"regionsize", required_argument, 0, 's' },
{"regionsize_low", required_argument, 0, 'A' },
{"regionsize_high", required_argument, 0, 'B' },
{"regionsize_incr", required_argument, 0, 'C' },
{"cleanup", no_argument, 0, 'x' },
{"verify", no_argument, 0, 'V' },
{"zerocopy", no_argument, 0, 'z' },
{"threaddelay", required_argument, 0, 'T' },
{"regionnoise", required_argument, 0, 'I' },
{"chunknoise", required_argument, 0, 'N' },
{"prerun", required_argument, 0, 'P' },
{"postrun", required_argument, 0, 'R' },
{"log", required_argument, 0, 'G' },
{"path", required_argument, 0, 'p' },
{"pool", required_argument, 0, 'p' },
{"load", required_argument, 0, 'L' },
{"human-readable", no_argument, 0, 'H' },
{"help", no_argument, 0, '?' },
{"verbose", no_argument, 0, 'v' },
{ 0, 0, 0, 0 },
};
static int zpiosctl_fd; /* Control file descriptor */
static char zpios_version[VERSION_SIZE]; /* Kernel version string */
static char *zpios_buffer = NULL; /* Scratch space area */
static int zpios_buffer_size = 0; /* Scratch space size */
static int
usage(void)
{
fprintf(stderr, "Usage: zpios\n");
fprintf(stderr,
" --chunksize -c =values\n"
" --chunksize_low -a =value\n"
" --chunksize_high -b =value\n"
" --chunksize_incr -g =value\n"
" --offset -o =values\n"
" --offset_low -m =value\n"
" --offset_high -q =value\n"
" --offset_incr -r =value\n"
" --regioncount -n =values\n"
" --regioncount_low -i =value\n"
" --regioncount_high -j =value\n"
" --regioncount_incr -k =value\n"
" --threadcount -t =values\n"
" --threadcount_low -l =value\n"
" --threadcount_high -h =value\n"
" --threadcount_incr -e =value\n"
" --regionsize -s =values\n"
" --regionsize_low -A =value\n"
" --regionsize_high -B =value\n"
" --regionsize_incr -C =value\n"
" --cleanup -x\n"
" --verify -V\n"
" --zerocopy -z\n"
" --threaddelay -T =jiffies\n"
" --regionnoise -I =shift\n"
" --chunknoise -N =bytes\n"
" --prerun -P =pre-command\n"
" --postrun -R =post-command\n"
" --log -G =log directory\n"
" --pool | --path -p =pool name\n"
" --load -L =dmuio\n"
" --human-readable -H\n"
" --help -? =this help\n"
" --verbose -v =increase verbosity\n\n");
return 0;
}
static void args_fini(cmd_args_t *args)
{
assert(args != NULL);
free(args);
}
static cmd_args_t *
args_init(int argc, char **argv)
{
cmd_args_t *args;
uint32_t fl_th = 0;
uint32_t fl_rc = 0;
uint32_t fl_of = 0;
uint32_t fl_rs = 0;
uint32_t fl_cs = 0;
int c, rc;
if (argc == 1) {
usage();
return (cmd_args_t *)NULL;
}
/* Configure and populate the args structures */
args = malloc(sizeof(*args));
if (args == NULL)
return NULL;
memset(args, 0, sizeof(*args));
while ((c=getopt_long(argc, argv, short_opt, long_opt, NULL)) != -1) {
rc = 0;
switch (c) {
case 'v': /* --verbose */
args->verbose++;
break;
case 't': /* --thread count */
rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA,
&args->T, optarg, &fl_th, "threadcount");
break;
case 'l': /* --threadcount_low */
rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
FLAG_LOW, &fl_th, "threadcount_low");
break;
case 'h': /* --threadcount_high */
rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
FLAG_HIGH, &fl_th, "threadcount_high");
break;
case 'e': /* --threadcount_inc */
rc = set_lhi(REGEX_NUMBERS, &args->T, optarg,
FLAG_INCR, &fl_th, "threadcount_incr");
break;
case 'n': /* --regioncount */
rc = set_count(REGEX_NUMBERS, REGEX_NUMBERS_COMMA,
&args->N, optarg, &fl_rc, "regioncount");
break;
case 'i': /* --regioncount_low */
rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
FLAG_LOW, &fl_rc, "regioncount_low");
break;
case 'j': /* --regioncount_high */
rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
FLAG_HIGH, &fl_rc, "regioncount_high");
break;
case 'k': /* --regioncount_inc */
rc = set_lhi(REGEX_NUMBERS, &args->N, optarg,
FLAG_INCR, &fl_rc, "regioncount_incr");
break;
case 'o': /* --offset */
rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
&args->O, optarg, &fl_of, "offset");
break;
case 'm': /* --offset_low */
rc = set_lhi(REGEX_SIZE, &args->O, optarg,
FLAG_LOW, &fl_of, "offset_low");
break;
case 'q': /* --offset_high */
rc = set_lhi(REGEX_SIZE, &args->O, optarg,
FLAG_HIGH, &fl_of, "offset_high");
break;
case 'r': /* --offset_inc */
rc = set_lhi(REGEX_NUMBERS, &args->O, optarg,
FLAG_INCR, &fl_of, "offset_incr");
break;
case 'c': /* --chunksize */
rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
&args->C, optarg, &fl_cs, "chunksize");
break;
case 'a': /* --chunksize_low */
rc = set_lhi(REGEX_SIZE, &args->C, optarg,
FLAG_LOW, &fl_cs, "chunksize_low");
break;
case 'b': /* --chunksize_high */
rc = set_lhi(REGEX_SIZE, &args->C, optarg,
FLAG_HIGH, &fl_cs, "chunksize_high");
break;
case 'g': /* --chunksize_inc */
rc = set_lhi(REGEX_NUMBERS, &args->C, optarg,
FLAG_INCR, &fl_cs, "chunksize_incr");
break;
case 's': /* --regionsize */
rc = set_count(REGEX_SIZE, REGEX_SIZE_COMMA,
&args->S, optarg, &fl_rs, "regionsize");
break;
case 'A': /* --regionsize_low */
rc = set_lhi(REGEX_SIZE, &args->S, optarg,
FLAG_LOW, &fl_rs, "regionsize_low");
break;
case 'B': /* --regionsize_high */
rc = set_lhi(REGEX_SIZE, &args->S, optarg,
FLAG_HIGH, &fl_rs, "regionsize_high");
break;
case 'C': /* --regionsize_inc */
rc = set_lhi(REGEX_NUMBERS, &args->S, optarg,
FLAG_INCR, &fl_rs, "regionsize_incr");
break;
case 'L': /* --load */
rc = set_load_params(args, optarg);
break;
case 'p': /* --pool */
args->pool = optarg;
break;
case 'x': /* --cleanup */
args->flags |= DMU_REMOVE;
break;
case 'P': /* --prerun */
strncpy(args->pre, optarg, KPIOS_PATH_SIZE - 1);
break;
case 'R': /* --postrun */
strncpy(args->post, optarg, KPIOS_PATH_SIZE - 1);
break;
case 'G': /* --log */
strncpy(args->log, optarg, KPIOS_PATH_SIZE - 1);
break;
case 'I': /* --regionnoise */
rc = set_noise(&args->regionnoise, optarg, "regionnoise");
break;
case 'N': /* --chunknoise */
rc = set_noise(&args->chunknoise, optarg, "chunknoise");
break;
case 'T': /* --threaddelay */
rc = set_noise(&args->thread_delay, optarg, "threaddelay");
break;
case 'V': /* --verify */
args->flags |= DMU_VERIFY;
break;
case 'z': /* --verify */
args->flags |= (DMU_WRITE_ZC | DMU_READ_ZC);
break;
case 'H':
args->human_readable = 1;
break;
case '?':
rc = 1;
break;
default:
fprintf(stderr,"Unknown option '%s'\n",argv[optind-1]);
rc = EINVAL;
break;
}
if (rc) {
usage();
args_fini(args);
return NULL;
}
}
check_mutual_exclusive_command_lines(fl_th, "threadcount");
check_mutual_exclusive_command_lines(fl_rc, "regioncount");
check_mutual_exclusive_command_lines(fl_of, "offset");
check_mutual_exclusive_command_lines(fl_rs, "regionsize");
check_mutual_exclusive_command_lines(fl_cs, "chunksize");
if (args->pool == NULL) {
fprintf(stderr, "Error: Pool not specificed\n");
usage();
args_fini(args);
return NULL;
}
if ((args->flags & (DMU_WRITE_ZC | DMU_READ_ZC)) &&
(args->flags & DMU_VERIFY)) {
fprintf(stderr, "Error, --zerocopy incompatible --verify, "
"used for performance analysis only\n");
usage();
args_fini(args);
return NULL;
}
return args;
}
static int
dev_clear(void)
{
kpios_cfg_t cfg;
int rc;
memset(&cfg, 0, sizeof(cfg));
cfg.cfg_magic = KPIOS_CFG_MAGIC;
cfg.cfg_cmd = KPIOS_CFG_BUFFER_CLEAR;
cfg.cfg_arg1 = 0;
rc = ioctl(zpiosctl_fd, KPIOS_CFG, &cfg);
if (rc)
fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
(unsigned long) KPIOS_CFG, cfg.cfg_cmd, errno);
lseek(zpiosctl_fd, 0, SEEK_SET);
return rc;
}
/* Passing a size of zero simply results in querying the current size */
static int
dev_size(int size)
{
kpios_cfg_t cfg;
int rc;
memset(&cfg, 0, sizeof(cfg));
cfg.cfg_magic = KPIOS_CFG_MAGIC;
cfg.cfg_cmd = KPIOS_CFG_BUFFER_SIZE;
cfg.cfg_arg1 = size;
rc = ioctl(zpiosctl_fd, KPIOS_CFG, &cfg);
if (rc) {
fprintf(stderr, "Ioctl() error %lu / %d: %d\n",
(unsigned long) KPIOS_CFG, cfg.cfg_cmd, errno);
return rc;
}
return cfg.cfg_rc1;
}
static void
dev_fini(void)
{
if (zpios_buffer)
free(zpios_buffer);
if (zpiosctl_fd != -1) {
if (close(zpiosctl_fd) == -1) {
fprintf(stderr, "Unable to close %s: %d\n",
KPIOS_DEV, errno);
}
}
}
static int
dev_init(void)
{
int rc;
zpiosctl_fd = open(KPIOS_DEV, O_RDONLY);
if (zpiosctl_fd == -1) {
fprintf(stderr, "Unable to open %s: %d\n"
"Is the zpios module loaded?\n", KPIOS_DEV, errno);
rc = errno;
goto error;
}
if ((rc = dev_clear()))
goto error;
if ((rc = dev_size(0)) < 0)
goto error;
zpios_buffer_size = rc;
zpios_buffer = (char *)malloc(zpios_buffer_size);
if (zpios_buffer == NULL) {
rc = ENOMEM;
goto error;
}
memset(zpios_buffer, 0, zpios_buffer_size);
return 0;
error:
if (zpiosctl_fd != -1) {
if (close(zpiosctl_fd) == -1) {
fprintf(stderr, "Unable to close %s: %d\n",
KPIOS_DEV, errno);
}
}
return rc;
}
static int
get_next(uint64_t *val, range_repeat_t *range)
{
int i;
/* if low, incr, high is given */
if (range->val_count == 0) {
*val = (range->val_low) +
(range->val_low * range->next_val / 100);
if (*val > range->val_high)
return 0; /* No more values, limit exceeded */
if (!range->next_val)
range->next_val = range->val_inc_perc;
else
range->next_val = range->next_val+range->val_inc_perc;
return 1; /* more values to come */
/* if only one val is given */
} else if (range->val_count == 1) {
if (range->next_val)
return 0; /* No more values, we only have one */
*val = range->val[0];
range->next_val = 1;
return 1; /* more values to come */
/* if comma separated values are given */
} else if (range->val_count > 1) {
if (range->next_val > range->val_count - 1)
return 0; /* No more values, limit exceeded */
*val = range->val[range->next_val];
range->next_val++;
return 1; /* more values to come */
}
return 0;
}
static int
run_one(cmd_args_t *args, uint32_t id, uint32_t T, uint32_t N,
uint64_t C, uint64_t S, uint64_t O)
{
kpios_cmd_t *cmd;
int rc, rc2, cmd_size;
dev_clear();
cmd_size = sizeof(kpios_cmd_t) + ((T + N + 1) * sizeof(kpios_stats_t));
cmd = (kpios_cmd_t *)malloc(cmd_size);
if (cmd == NULL)
return ENOMEM;
memset(cmd, 0, cmd_size);
cmd->cmd_magic = KPIOS_CMD_MAGIC;
strncpy(cmd->cmd_pool, args->pool, KPIOS_NAME_SIZE - 1);
strncpy(cmd->cmd_pre, args->pre, KPIOS_PATH_SIZE - 1);
strncpy(cmd->cmd_post, args->post, KPIOS_PATH_SIZE - 1);
strncpy(cmd->cmd_log, args->log, KPIOS_PATH_SIZE - 1);
cmd->cmd_id = id;
cmd->cmd_chunk_size = C;
cmd->cmd_thread_count = T;
cmd->cmd_region_count = N;
cmd->cmd_region_size = S;
cmd->cmd_offset = O;
cmd->cmd_region_noise = args->regionnoise;
cmd->cmd_chunk_noise = args->chunknoise;
cmd->cmd_thread_delay = args->thread_delay;
cmd->cmd_flags = args->flags;
cmd->cmd_data_size = (T + N + 1) * sizeof(kpios_stats_t);
rc = ioctl(zpiosctl_fd, KPIOS_CMD, cmd);
if (rc)
args->rc = errno;
print_stats(args, cmd);
if (args->verbose) {
rc2 = read(zpiosctl_fd, zpios_buffer, zpios_buffer_size - 1);
if (rc2 < 0) {
fprintf(stdout, "Error reading results: %d\n", rc2);
} else if ((rc2 > 0) && (strlen(zpios_buffer) > 0)) {
fprintf(stdout, "\n%s\n", zpios_buffer);
fflush(stdout);
}
}
free(cmd);
return rc;
}
static int
run_offsets(cmd_args_t *args)
{
int rc = 0;
while (rc == 0 && get_next(&args->current_O, &args->O)) {
rc = run_one(args, args->current_id,
args->current_T, args->current_N, args->current_C,
args->current_S, args->current_O);
args->current_id++;
}
args->O.next_val = 0;
return rc;
}
static int
run_region_counts(cmd_args_t *args)
{
int rc = 0;
while (rc == 0 && get_next((uint64_t *)&args->current_N, &args->N))
rc = run_offsets(args);
args->N.next_val = 0;
return rc;
}
static int
run_region_sizes(cmd_args_t *args)
{
int rc = 0;
while (rc == 0 && get_next(&args->current_S, &args->S)) {
if (args->current_S < args->current_C) {
fprintf(stderr, "Error: in any run chunksize can "
"not be smaller than regionsize.\n");
return EINVAL;
}
rc = run_region_counts(args);
}
args->S.next_val = 0;
return rc;
}
static int
run_chunk_sizes(cmd_args_t *args)
{
int rc = 0;
while (rc == 0 && get_next(&args->current_C, &args->C)) {
rc = run_region_sizes(args);
}
args->C.next_val = 0;
return rc;
}
static int
run_thread_counts(cmd_args_t *args)
{
int rc = 0;
while (rc == 0 && get_next((uint64_t *)&args->current_T, &args->T))
rc = run_chunk_sizes(args);
return rc;
}
int
main(int argc, char **argv)
{
cmd_args_t *args;
int rc = 0;
/* Argument init and parsing */
if ((args = args_init(argc, argv)) == NULL) {
rc = -1;
goto out;
}
/* Device specific init */
if ((rc = dev_init()))
goto out;
/* Generic kernel version string */
if (args->verbose)
fprintf(stdout, "%s", zpios_version);
print_stats_header();
rc = run_thread_counts(args);
out:
if (args != NULL)
args_fini(args);
dev_fini();
return rc;
}

440
zfs/zcmd/zpios/zpios_util.c Normal file
View File

@ -0,0 +1,440 @@
/*
* This file is part of the ZFS Linux port.
*
* Copyright (c) 2008 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory
* Written by:
* Brian Behlendorf <behlendorf1@llnl.gov>,
* Herb Wartens <wartens2@llnl.gov>,
* Jim Garlick <garlick@llnl.gov>
* LLNL-CODE-403049
*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*
* Kernel PIOS DMU implemenation originally derived from PIOS test code.
* Character control interface derived from SPL code.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <regex.h>
#include "zpios.h"
/* extracts an unsigned int (64) and K,M,G,T from the string */
/* and returns a 64 bit value converted to the proper units */
static int
kmgt_to_uint64(const char *str, uint64_t *val)
{
char *endptr;
int rc = 0;
*val = strtoll(str, &endptr, 0);
if ((str == endptr) && (*val == 0))
return EINVAL;
switch (endptr[0]) {
case 'k': case 'K':
*val = (*val) << 10;
break;
case 'm': case 'M':
*val = (*val) << 20;
break;
case 'g': case 'G':
*val = (*val) << 30;
break;
case 't': case 'T':
*val = (*val) << 40;
break;
case '\0':
break;
default:
rc = EINVAL;
}
return rc;
}
static char *
uint64_to_kmgt(char *str, uint64_t val)
{
char postfix[] = "kmgt";
int i = -1;
while ((val >= KB) && (i < 4)) {
val = (val >> 10);
i++;
}
if (i >= 4)
sprintf(str, "inf");
else
sprintf(str, "%lu%c", (unsigned long)val,
(i == -1) ? '\0' : postfix[i]);
return str;
}
static char *
kmgt_per_sec(char *str, uint64_t v, double t)
{
char postfix[] = "kmgt";
double val = ((double)v) / t;
int i = -1;
while ((val >= (double)KB) && (i < 4)) {
val /= (double)KB;
i++;
}
if (i >= 4)
sprintf(str, "inf");
else
sprintf(str, "%.2f%c", val,
(i == -1) ? '\0' : postfix[i]);
return str;
}
static char *
print_flags(char *str, uint32_t flags)
{
str[0] = (flags & DMU_WRITE) ? 'w' : '-';
str[1] = (flags & DMU_READ) ? 'r' : '-';
str[2] = (flags & DMU_VERIFY) ? 'v' : '-';
str[3] = (flags & DMU_REMOVE) ? 'c' : '-';
str[4] = (flags & DMU_FPP) ? 'p' : 's';
str[5] = (flags & (DMU_WRITE_ZC | DMU_READ_ZC)) ? 'z' : '-';
str[6] = '\0';
return str;
}
static double
timespec_to_double(struct timespec t)
{
return ((double)(t.tv_sec) +
((double)(t.tv_nsec) / (double)(1000*1000*1000)));
}
static int
regex_match(const char *string, char *pattern)
{
regex_t re;
int rc;
rc = regcomp(&re, pattern, REG_EXTENDED | REG_NOSUB | REG_ICASE);
if (rc) {
fprintf(stderr, "Error: Couldn't do regcomp, %d\n", rc);
return rc;
}
rc = regexec(&re, string, (size_t) 0, NULL, 0);
regfree(&re);
return rc;
}
/* fills the pios_range_repeat structure of comma separated values */
static int
split_string(const char *optarg, char *pattern, range_repeat_t *range)
{
const char comma[] = ",";
char *cp, *token[32];
int rc, i = 0;
if ((rc = regex_match(optarg, pattern)))
return rc;
cp = strdup(optarg);
if (cp == NULL)
return ENOMEM;
do {
/* STRTOK(3) Each subsequent call, with a null pointer as the
* value of the * first argument, starts searching from the
* saved pointer and behaves as described above.
*/
token[i] = strtok(cp, comma);
cp = NULL;
} while ((token[i++] != NULL) && (i < 32));
range->val_count = i - 1;
for (i = 0; i < range->val_count; i++)
kmgt_to_uint64(token[i], &range->val[i]);
free(cp);
return 0;
}
int
set_count(char *pattern1, char *pattern2, range_repeat_t *range,
char *optarg, uint32_t *flags, char *arg)
{
if (flags)
*flags |= FLAG_SET;
range->next_val = 0;
if (regex_match(optarg, pattern1) == 0) {
kmgt_to_uint64(optarg, &range->val[0]);
range->val_count = 1;
} else if (split_string(optarg, pattern2, range) < 0) {
fprintf(stderr, "Error: Incorrect pattern for %s, '%s'\n",
arg, optarg);
return EINVAL;
}
return 0;
}
/* validates the value with regular expression and sets low, high, incr
* according to value at which flag will be set. Sets the flag after. */
int
set_lhi(char *pattern, range_repeat_t *range, char *optarg,
int flag, uint32_t *flag_thread, char *arg)
{
int rc;
if ((rc = regex_match(optarg, pattern))) {
fprintf(stderr, "Error: Wrong pattern in %s, '%s'\n",
arg, optarg);
return rc;
}
switch (flag) {
case FLAG_LOW:
kmgt_to_uint64(optarg, &range->val_low);
break;
case FLAG_HIGH:
kmgt_to_uint64(optarg, &range->val_high);
break;
case FLAG_INCR:
kmgt_to_uint64(optarg, &range->val_inc_perc);
break;
default:
assert(0);
}
*flag_thread |= flag;
return 0;
}
int
set_noise(uint64_t *noise, char *optarg, char *arg)
{
if (regex_match(optarg, REGEX_NUMBERS) == 0) {
kmgt_to_uint64(optarg, noise);
} else {
fprintf(stderr, "Error: Incorrect pattern for %s\n", arg);
return EINVAL;
}
return 0;
}
int
set_load_params(cmd_args_t *args, char *optarg)
{
char *param, *search, comma[] = ",";
int rc = 0;
search = strdup(optarg);
if (search == NULL)
return ENOMEM;
while ((param = strtok(search, comma)) != NULL) {
search = NULL;
if (strcmp("fpp", param) == 0) {
args->flags |= DMU_FPP; /* File Per Process/Thread */
} else if (strcmp("sff", param) == 0) {
args->flags &= ~DMU_FPP; /* Shared Shared File */
} else if (strcmp("dmuio", param) == 0) {
args->io_type |= DMU_IO;
args->flags |= (DMU_WRITE | DMU_READ);
} else {
fprintf(stderr, "Invalid load: %s\n", param);
rc = EINVAL;
}
}
free(search);
return rc;
}
/* checks the low, high, increment values against the single value for
* mutual exclusion, for e.g threadcount is mutually exclusive to
* threadcount_low, ..._high, ..._incr */
int
check_mutual_exclusive_command_lines(uint32_t flag, char *arg)
{
if ((flag & FLAG_SET) && (flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR))) {
fprintf(stderr, "Error: --%s can not be given with --%s_low, "
"--%s_high or --%s_incr.\n", arg, arg, arg, arg);
return 0;
}
if ((flag & (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) && !(flag & FLAG_SET)) {
if (flag != (FLAG_LOW | FLAG_HIGH | FLAG_INCR)) {
fprintf(stderr, "Error: One or more values missing "
"from --%s_low, --%s_high, --%s_incr.\n",
arg, arg, arg);
return 0;
}
}
return 1;
}
void
print_stats_header(void)
{
printf("ret-code id\tth-cnt\trg-cnt\trg-sz\tch-sz\toffset\trg-no\tch-no\t"
"th-dly\tflags\ttime\tcr-time\trm-time\twr-time\t"
"rd-time\twr-data\twr-ch\twr-bw\trd-data\trd-ch\trd-bw\n");
printf("------------------------------------------------------------"
"------------------------------------------------------------"
"-----------------------------------------------------------\n");
}
static void
print_stats_human_readable(cmd_args_t *args, kpios_cmd_t *cmd)
{
kpios_stats_t *summary_stats;
double t_time, wr_time, rd_time, cr_time, rm_time;
char str[16];
if (args->rc)
printf("FAILED: %3d ", args->rc);
else
printf("PASSED: %3d ", args->rc);
printf("%u\t", cmd->cmd_id);
printf("%u\t", cmd->cmd_thread_count);
printf("%u\t", cmd->cmd_region_count);
printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_size));
printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_size));
printf("%s\t", uint64_to_kmgt(str, cmd->cmd_offset));
printf("%s\t", uint64_to_kmgt(str, cmd->cmd_region_noise));
printf("%s\t", uint64_to_kmgt(str, cmd->cmd_chunk_noise));
printf("%s\t", uint64_to_kmgt(str, cmd->cmd_thread_delay));
printf("%s\t", print_flags(str, cmd->cmd_flags));
if (args->rc) {
printf("\n");
return;
}
summary_stats = (kpios_stats_t *)cmd->cmd_data_str;
t_time = timespec_to_double(summary_stats->total_time.delta);
wr_time = timespec_to_double(summary_stats->wr_time.delta);
rd_time = timespec_to_double(summary_stats->rd_time.delta);
cr_time = timespec_to_double(summary_stats->cr_time.delta);
rm_time = timespec_to_double(summary_stats->rm_time.delta);
printf("%.2f\t", t_time);
printf("%.3f\t", cr_time);
printf("%.3f\t", rm_time);
printf("%.2f\t", wr_time);
printf("%.2f\t", rd_time);
printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_data));
printf("%s\t", uint64_to_kmgt(str, summary_stats->wr_chunks));
printf("%s\t", kmgt_per_sec(str, summary_stats->wr_data, wr_time));
printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_data));
printf("%s\t", uint64_to_kmgt(str, summary_stats->rd_chunks));
printf("%s\n", kmgt_per_sec(str, summary_stats->rd_data, rd_time));
fflush(stdout);
}
static void
print_stats_table(cmd_args_t *args, kpios_cmd_t *cmd)
{
kpios_stats_t *summary_stats;
double wr_time, rd_time;
if (args->rc)
printf("FAILED: %3d ", args->rc);
else
printf("PASSED: %3d ", args->rc);
printf("%u\t", cmd->cmd_id);
printf("%u\t", cmd->cmd_thread_count);
printf("%u\t", cmd->cmd_region_count);
printf("%llu\t", (long long unsigned)cmd->cmd_region_size);
printf("%llu\t", (long long unsigned)cmd->cmd_chunk_size);
printf("%llu\t", (long long unsigned)cmd->cmd_offset);
printf("%u\t", cmd->cmd_region_noise);
printf("%u\t", cmd->cmd_chunk_noise);
printf("%u\t", cmd->cmd_thread_delay);
printf("0x%x\t", cmd->cmd_flags);
if (args->rc) {
printf("\n");
return;
}
summary_stats = (kpios_stats_t *)cmd->cmd_data_str;
wr_time = timespec_to_double(summary_stats->wr_time.delta);
rd_time = timespec_to_double(summary_stats->rd_time.delta);
printf("%ld.%02ld\t",
summary_stats->total_time.delta.tv_sec,
summary_stats->total_time.delta.tv_nsec);
printf("%ld.%02ld\t",
summary_stats->cr_time.delta.tv_sec,
summary_stats->cr_time.delta.tv_nsec);
printf("%ld.%02ld\t",
summary_stats->rm_time.delta.tv_sec,
summary_stats->rm_time.delta.tv_nsec);
printf("%ld.%02ld\t",
summary_stats->wr_time.delta.tv_sec,
summary_stats->wr_time.delta.tv_nsec);
printf("%ld.%02ld\t",
summary_stats->rd_time.delta.tv_sec,
summary_stats->rd_time.delta.tv_nsec);
printf("%lld\t", (long long unsigned)summary_stats->wr_data);
printf("%lld\t", (long long unsigned)summary_stats->wr_chunks);
printf("%.4f\t", (double)summary_stats->wr_data / wr_time);
printf("%lld\t", (long long unsigned)summary_stats->rd_data);
printf("%lld\t", (long long unsigned)summary_stats->rd_chunks);
printf("%.4f\n", (double)summary_stats->rd_data / rd_time);
fflush(stdout);
}
void
print_stats(cmd_args_t *args, kpios_cmd_t *cmd)
{
if (args->human_readable)
print_stats_human_readable(args, cmd);
else
print_stats_table(args, cmd);
}