From 5ce3b77ef8855f266d885a683af0b6ae0737680b Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 9 Aug 2010 11:06:00 -0700 Subject: [PATCH 1/5] Remove /zvol/ path component for zvol devices As part of commit f162433deb029769f76db8cc216cad9b4f7356fb the /zvol/ path component was added for zvol devices. This ensured all zvol devices would be created by udev in /dev/zvol//, as opposed to the previous /dev// path. Logically, it was nice to organize them in a directory much like Solaris does. However, while initial testing showed this to work fine with modern kernels it does not appear to be supported under RHEL5. The extra path component triggers a NULL deref in create_dir(). Anyway, to avoid having different zvol path names based on your kernel version its more consistent simply to revert to the original naming convention. If you really want the zvol component you can always add custom udev rules to do exactly this. We can revisiting this change again once we are willing to drop support for RHEL5 and similar older distros. --- module/zcommon/include/sys/fs/zfs.h | 2 +- module/zfs/vdev.c | 10 ++++++++++ module/zfs/zvol.c | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/module/zcommon/include/sys/fs/zfs.h b/module/zcommon/include/sys/fs/zfs.h index 2f3c48b6e8..b5f0dd2fa9 100644 --- a/module/zcommon/include/sys/fs/zfs.h +++ b/module/zcommon/include/sys/fs/zfs.h @@ -691,7 +691,7 @@ typedef struct ddt_histogram { #define ZFS_DEV "/dev/zfs" /* general zvol path */ -#define ZVOL_DIR "/dev/zvol" +#define ZVOL_DIR "/dev" #define ZVOL_MAJOR 230 #define ZVOL_MINOR_BITS 4 diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index e1018ccc6a..0e57148323 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1072,6 +1072,15 @@ vdev_open_child(void *arg) boolean_t vdev_uses_zvols(vdev_t *vd) { +/* + * Stacking zpools on top of zvols is unsupported until we implement a method + * for determining if an arbitrary block device is a zvol without using the + * path. Solaris would check the 'zvol' path component but this does not + * exist in the Linux port, so we really should do something like stat the + * file and check the major number. This is complicated by the fact that + * we need to do this portably in user or kernel space. + */ +#if 0 int c; if (vd->vdev_path && strncmp(vd->vdev_path, ZVOL_DIR, @@ -1080,6 +1089,7 @@ vdev_uses_zvols(vdev_t *vd) for (c = 0; c < vd->vdev_children; c++) if (vdev_uses_zvols(vd->vdev_child[c])) return (B_TRUE); +#endif return (B_FALSE); } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 15716aba3c..4e31733f04 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1066,7 +1066,7 @@ zvol_alloc(dev_t dev, const char *name) zv->zv_disk->fops = &zvol_ops; zv->zv_disk->private_data = zv; zv->zv_disk->queue = zv->zv_queue; - snprintf(zv->zv_disk->disk_name, DISK_NAME_LEN, "zvol/%s", name); + snprintf(zv->zv_disk->disk_name, DISK_NAME_LEN, "%s", name); return zv; From 6478a7f847d8fd392d04afb12a57b09d2d9417b5 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 9 Aug 2010 11:14:09 -0700 Subject: [PATCH 2/5] Remove /zvol/ path component from zconfig.sh See previous commit for details. But the gist is with the removal of the zvol path component the regression tests must be updated to use the correct path name. --- scripts/zconfig.sh | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/zconfig.sh b/scripts/zconfig.sh index c671206186..2215548929 100755 --- a/scripts/zconfig.sh +++ b/scripts/zconfig.sh @@ -154,14 +154,14 @@ zconfig_test3() { # Partition the volume, for a 400M volume there will be # 812 cylinders, 16 heads, and 63 sectors per track. - zconfig_partition /dev/zvol/${FULL_NAME} 0 812 + zconfig_partition /dev/${FULL_NAME} 0 812 # Format the partition with ext3. - /sbin/mkfs.ext3 -q /dev/zvol/${FULL_NAME}1 || fail 5 + /sbin/mkfs.ext3 -q /dev/${FULL_NAME}1 || fail 5 # Mount the ext3 filesystem and copy some data to it. mkdir -p /tmp/${ZVOL_NAME} || fail 6 - mount /dev/zvol/${FULL_NAME}1 /tmp/${ZVOL_NAME} || fail 7 + mount /dev/${FULL_NAME}1 /tmp/${ZVOL_NAME} || fail 7 cp -RL ${SRC_DIR} /tmp/${ZVOL_NAME} || fail 8 # Verify the copied files match the original files. @@ -180,10 +180,10 @@ zconfig_test3 zconfig_zvol_device_stat() { local EXPECT=$1 - local POOL_NAME=/dev/zvol/$2 - local ZVOL_NAME=/dev/zvol/$3 - local SNAP_NAME=/dev/zvol/$4 - local CLONE_NAME=/dev/zvol/$5 + local POOL_NAME=/dev/$2 + local ZVOL_NAME=/dev/$3 + local SNAP_NAME=/dev/$4 + local CLONE_NAME=/dev/$5 local COUNT=0 # Pool exists @@ -216,7 +216,7 @@ zconfig_zvol_device_stat() { zconfig_test4() { local POOL_NAME=tank local ZVOL_NAME=volume - local SNAP_NAME=snapshot + local SNAP_NAME=snap local CLONE_NAME=clone local FULL_ZVOL_NAME=${POOL_NAME}/${ZVOL_NAME} local FULL_SNAP_NAME=${POOL_NAME}/${ZVOL_NAME}@${SNAP_NAME} @@ -229,7 +229,7 @@ zconfig_test4() { ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZFS} create -V 100M ${FULL_ZVOL_NAME} || fail 3 - zconfig_partition /dev/zvol/${FULL_ZVOL_NAME} 0 64 || fail 4 + zconfig_partition /dev/${FULL_ZVOL_NAME} 0 64 || fail 4 ${ZFS} snapshot ${FULL_SNAP_NAME} || fail 5 ${ZFS} clone ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 6 @@ -268,7 +268,7 @@ zconfig_test4 zconfig_test5() { POOL_NAME=tank ZVOL_NAME=volume - SNAP_NAME=snapshot + SNAP_NAME=snap CLONE_NAME=clone FULL_ZVOL_NAME=${POOL_NAME}/${ZVOL_NAME} FULL_SNAP_NAME=${POOL_NAME}/${ZVOL_NAME}@${SNAP_NAME} @@ -281,7 +281,7 @@ zconfig_test5() { ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 1 ${ZPOOL_CREATE_SH} -p ${POOL_NAME} -c lo-raidz2 || fail 2 ${ZFS} create -V 100M ${FULL_ZVOL_NAME} || fail 3 - zconfig_partition /dev/zvol/${FULL_ZVOL_NAME} 0 64 || fail 4 + zconfig_partition /dev/${FULL_ZVOL_NAME} 0 64 || fail 4 ${ZFS} snapshot ${FULL_SNAP_NAME} || fail 5 ${ZFS} clone ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 6 From d5168aa0895814433164d838e8ae0554a7206b77 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 9 Aug 2010 16:01:44 -0700 Subject: [PATCH 3/5] Insert small delay for udev While the zfs utilities do block until the expected device appears they can only do this for full devices, not partitions. This means that once as device appears it still may take a little bit of time before the kernel rescans the partition table, updates sysfs, udev is notified and the partition devices are created. The test case itself could block briefly waiting for the partition beause it knows what to expect. But for now the simpler thing to do is just delay. --- scripts/zconfig.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/zconfig.sh b/scripts/zconfig.sh index 2215548929..dc11b9ca60 100755 --- a/scripts/zconfig.sh +++ b/scripts/zconfig.sh @@ -186,6 +186,9 @@ zconfig_zvol_device_stat() { local CLONE_NAME=/dev/$5 local COUNT=0 + # Briefly delay for udev + sleep 1 + # Pool exists stat ${POOL_NAME} &>/dev/null && let COUNT=$COUNT+1 @@ -245,7 +248,7 @@ zconfig_test4() { ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 9 # Import the pool, wait 1 second for udev - ${ZPOOL} import ${POOL_NAME} && sleep 1 || fail 10 + ${ZPOOL} import ${POOL_NAME} || fail 10 # Verify the devices were created zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \ @@ -297,7 +300,7 @@ zconfig_test5() { ${FULL_SNAP_NAME} ${FULL_CLONE_NAME} || fail 9 # Load the modules, wait 1 second for udev - ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" && sleep 1 || fail 10 + ${ZFS_SH} zfs="spa_config_path=${TMP_CACHE}" || fail 10 # Verify the devices were created zconfig_zvol_device_stat 10 ${POOL_NAME} ${FULL_ZVOL_NAME} \ From dfc166d174c1550dab8b4528232449e3e0d8f9e1 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 9 Aug 2010 14:48:12 -0700 Subject: [PATCH 4/5] Limit sysfs name to KOBJ_NAME_LEN It appears that in earlier kernels the maximum name length of a kobject was KOBJ_NAME_LEN (20) bytes. This was later extended to dynamically allocate enough memory if it was over KOBJ_NAME_LEN, and finally it was always made dynamic. Unfortunately, util this last step happened it doesn't look like it always safe to use names larger than KOBJ_NAME_LEN. For example, under the RHEL5 2.6.18 kernel if the kobject name length exceeds KOBJ_NAME_LEN a NULL dereference is tripped. To avoid this issue the build system has been update to check to see if KOBJ_NAME_LEN is defined. If it is we have to assume the maximum kobject name length is only 20 bytes. This 20 byte name must minimally include the following components. /[@snapshot[partition]] --- config/kernel-kobj-name-len.m4 | 21 +++++++++++++++++++++ config/kernel.m4 | 1 + 2 files changed, 22 insertions(+) create mode 100644 config/kernel-kobj-name-len.m4 diff --git a/config/kernel-kobj-name-len.m4 b/config/kernel-kobj-name-len.m4 new file mode 100644 index 0000000000..5363a41ca2 --- /dev/null +++ b/config/kernel-kobj-name-len.m4 @@ -0,0 +1,21 @@ +dnl # +dnl # 2.6.27 API change, +dnl # kobject KOBJ_NAME_LEN static limit removed. All users of this +dnl # constant were removed prior to 2.6.27, but to be on the safe +dnl # side this check ensures the constant is undefined. +dnl # +AC_DEFUN([ZFS_AC_KERNEL_KOBJ_NAME_LEN], [ + AC_MSG_CHECKING([whether kernel defines KOBJ_NAME_LEN]) + ZFS_LINUX_TRY_COMPILE([ + #include + ],[ + int val; + val = KOBJ_NAME_LEN; + ],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_KOBJ_NAME_LEN, 1, + [kernel defines KOBJ_NAME_LEN]) + ],[ + AC_MSG_RESULT([no]) + ]) +]) diff --git a/config/kernel.m4 b/config/kernel.m4 index ca61dfd174..4c06981dcc 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -7,6 +7,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ ZFS_AC_KERNEL_CONFIG ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS ZFS_AC_KERNEL_TYPE_FMODE_T + ZFS_AC_KERNEL_KOBJ_NAME_LEN ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE ZFS_AC_KERNEL_INVALIDATE_BDEV_ARGS ZFS_AC_KERNEL_BDEV_LOGICAL_BLOCK_SIZE From dd11e3f7f02ad1cd88aec6fc42ceab4ed9442e80 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 9 Aug 2010 14:48:12 -0700 Subject: [PATCH 5/5] Limit sysfs name to KOBJ_NAME_LEN See commit dfc166d174c1550dab8b4528232449e3e0d8f9e1 for details. --- module/zcommon/zfs_namecheck.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/module/zcommon/zfs_namecheck.c b/module/zcommon/zfs_namecheck.c index 5cfafea471..2398c2755c 100644 --- a/module/zcommon/zfs_namecheck.c +++ b/module/zcommon/zfs_namecheck.c @@ -142,9 +142,22 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what) * which is the same as MAXNAMELEN used in the kernel. * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all * places using MAXNAMELEN. + * + * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name + * length is 20 bytes. This 20 bytes is broken down as follows to + * provide a maximum safe /[@snapshot] length of only + * 18 bytes. To ensure bytes are left for [@snapshot] the + * portition is futher limited to 9 bytes. For 2.6.27 and + * newer kernels this limit is set to MAXNAMELEN. + * + * / + + + * (18) + (1) + (1) */ - +#ifdef HAVE_KOBJ_NAME_LEN + if (strlen(path) > 18) { +#else if (strlen(path) >= MAXNAMELEN) { +#endif /* HAVE_KOBJ_NAME_LEN */ if (why) *why = NAME_ERR_TOOLONG; return (-1); @@ -303,8 +316,22 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what) * which is the same as MAXNAMELEN used in the kernel. * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all * places using MAXNAMELEN. + * + * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name + * length is 20 bytes. This 20 bytes is broken down as follows to + * provide a maximum safe /[@snapshot] length of only + * 18 bytes. To ensure bytes are left for [@snapshot] the + * portition is futher limited to 8 bytes. For 2.6.27 and + * newer kernels this limit is set to MAXNAMELEN. + * + * / + + + * (18) + (1) + (1) */ +#ifdef HAVE_KOBJ_NAME_LEN + if (strlen(pool) > 8) { +#else if (strlen(pool) >= MAXNAMELEN) { +#endif /* HAVE_KOBJ_NAME_LEN */ if (why) *why = NAME_ERR_TOOLONG; return (-1);