From 9a2e90c9fc469d377c14eb863952261f9ec12d2c Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Mon, 9 Apr 2018 14:24:46 -0700 Subject: [PATCH] Revert "Handle zap_add() failures in mixed ... " This reverts commit cc63068e95ee725cce03b1b7ce50179825a6cda5. Under certain circumstances this change can result in an ENOSPC error when adding new files to a directory. See #7401 for full details. Reviewed-by: Brian Behlendorf Signed-off-by: Tony Hutter Issue #7401 Closes #7416 --- include/sys/zap_leaf.h | 15 +- module/zfs/zap.c | 25 +--- module/zfs/zap_leaf.c | 2 +- module/zfs/zap_micro.c | 38 +---- module/zfs/zfs_dir.c | 29 +--- module/zfs/zfs_vnops.c | 73 +++------- tests/runfiles/linux.run | 2 +- .../tests/functional/casenorm/Makefile.am | 1 - .../casenorm/mixed_create_failure.ksh | 136 ------------------ 9 files changed, 32 insertions(+), 289 deletions(-) delete mode 100755 tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh diff --git a/include/sys/zap_leaf.h b/include/sys/zap_leaf.h index a3da1036a5..e784c5963b 100644 --- a/include/sys/zap_leaf.h +++ b/include/sys/zap_leaf.h @@ -46,15 +46,10 @@ struct zap_stats; * block size (1<l_bs) - hash entry size (2) * number of hash * entries - header space (2*chunksize) */ -#define ZAP_LEAF_NUMCHUNKS_BS(bs) \ - (((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \ +#define ZAP_LEAF_NUMCHUNKS(l) \ + (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \ ZAP_LEAF_CHUNKSIZE - 2) -#define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs))) - -#define ZAP_LEAF_NUMCHUNKS_DEF \ - (ZAP_LEAF_NUMCHUNKS_BS(fzap_default_block_shift)) - /* * The amount of space within the chunk available for the array is: * chunk size - space for type (1) - space for next pointer (2) @@ -79,10 +74,8 @@ struct zap_stats; * which is less than block size / CHUNKSIZE (24) / minimum number of * chunks per entry (3). */ -#define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5) -#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs)) -#define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs))) -#define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs))) +#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5) +#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l)) /* * The chunks start immediately after the hash table. The end of the diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 9843d8c500..ee9962bff3 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -819,19 +819,15 @@ fzap_lookup(zap_name_t *zn, return (err); } -#define MAX_EXPAND_RETRIES 2 - int fzap_add_cd(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers, const void *val, uint32_t cd, void *tag, dmu_tx_t *tx) { zap_leaf_t *l; - zap_leaf_t *prev_l = NULL; int err; zap_entry_handle_t zeh; zap_t *zap = zn->zn_zap; - int expand_retries = 0; ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); ASSERT(!zap->zap_ismicro); @@ -855,29 +851,10 @@ retry: if (err == 0) { zap_increment_num_entries(zap, 1, tx); } else if (err == EAGAIN) { - /* - * If the last two expansions did not help, there is no point - * trying to expand again - */ - if (expand_retries > MAX_EXPAND_RETRIES && prev_l == l) { - err = SET_ERROR(ENOSPC); - goto out; - } - err = zap_expand_leaf(zn, l, tag, tx, &l); zap = zn->zn_zap; /* zap_expand_leaf() may change zap */ - if (err == 0) { - prev_l = l; - expand_retries++; + if (err == 0) goto retry; - } else if (err == ENOSPC) { - /* - * If we failed to expand the leaf, then bailout - * as there is no point trying - * zap_put_leaf_maybe_grow_ptrtbl(). - */ - return (err); - } } out: diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c index 526e466065..c342695c7f 100644 --- a/module/zfs/zap_leaf.c +++ b/module/zfs/zap_leaf.c @@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry); ((h) >> \ (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) -#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) +#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)]) extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l); diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 34bef3e63d..3ebf995c67 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -363,41 +363,6 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash) return (cd); } -/* - * Each mzap entry requires at max : 4 chunks - * 3 chunks for names + 1 chunk for value. - */ -#define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \ - ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t))) - -/* - * Check if the current entry keeps the colliding entries under the fatzap leaf - * size. - */ -static boolean_t -mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) -{ - zap_t *zap = zn->zn_zap; - mzap_ent_t mze_tofind; - mzap_ent_t *mze; - avl_index_t idx; - avl_tree_t *avl = &zap->zap_m.zap_avl; - uint32_t mzap_ents = 0; - - mze_tofind.mze_hash = hash; - mze_tofind.mze_cd = 0; - - for (mze = avl_find(avl, &mze_tofind, &idx); - mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { - mzap_ents++; - } - - /* Include the new entry being added */ - mzap_ents++; - - return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS)); -} - static void mze_remove(zap_t *zap, mzap_ent_t *mze) { @@ -1226,8 +1191,7 @@ zap_add_impl(zap_t *zap, const char *key, err = fzap_add(zn, integer_size, num_integers, val, tag, tx); zap = zn->zn_zap; /* fzap_add() may change zap */ } else if (integer_size != 8 || num_integers != 1 || - strlen(key) >= MZAP_NAME_LEN || - !mze_canfit_fzap_leaf(zn, zn->zn_hash)) { + strlen(key) >= MZAP_NAME_LEN) { err = mzap_upgrade(&zn->zn_zap, tag, tx, 0); if (err == 0) { err = fzap_add(zn, integer_size, num_integers, val, diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index 6398a1d155..9a8bbccd92 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -742,11 +742,7 @@ zfs_dirent(znode_t *zp, uint64_t mode) } /* - * Link zp into dl. Can fail in the following cases : - * - if zp has been unlinked. - * - if the number of entries with the same hash (aka. colliding entries) - * exceed the capacity of a leaf-block of fatzap and splitting of the - * leaf-block does not help. + * Link zp into dl. Can only fail if zp has been unlinked. */ int zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) @@ -780,24 +776,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) NULL, &links, sizeof (links)); } } - - value = zfs_dirent(zp, zp->z_mode); - error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1, - &value, tx); - - /* - * zap_add could fail to add the entry if it exceeds the capacity of the - * leaf-block and zap_leaf_split() failed to help. - * The caller of this routine is responsible for failing the transaction - * which will rollback the SA updates done above. - */ - if (error != 0) { - if (!(flag & ZRENAMING) && !(flag & ZNEW)) - drop_nlink(ZTOI(zp)); - mutex_exit(&zp->z_lock); - return (error); - } - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &dzp->z_id, sizeof (dzp->z_id)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, @@ -835,6 +813,11 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) ASSERT(error == 0); mutex_exit(&dzp->z_lock); + value = zfs_dirent(zp, zp->z_mode); + error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, + 8, 1, &value, tx); + ASSERT(error == 0); + return (0); } diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 8a7ad702ca..6f6ce79db2 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1443,22 +1443,10 @@ top: } zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); - error = zfs_link_create(dl, zp, tx, ZNEW); - if (error != 0) { - /* - * Since, we failed to add the directory entry for it, - * delete the newly created dnode. - */ - zfs_znode_delete(zp, tx); - remove_inode_hash(ZTOI(zp)); - zfs_acl_ids_free(&acl_ids); - dmu_tx_commit(tx); - goto out; - } - if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); + (void) zfs_link_create(dl, zp, tx, ZNEW); txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); if (flag & FIGNORECASE) txtype |= TX_CI; @@ -2049,18 +2037,13 @@ top: */ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); + if (fuid_dirtied) + zfs_fuid_sync(zfsvfs, tx); + /* * Now put new name in parent dir. */ - error = zfs_link_create(dl, zp, tx, ZNEW); - if (error != 0) { - zfs_znode_delete(zp, tx); - remove_inode_hash(ZTOI(zp)); - goto out; - } - - if (fuid_dirtied) - zfs_fuid_sync(zfsvfs, tx); + (void) zfs_link_create(dl, zp, tx, ZNEW); *ipp = ZTOI(zp); @@ -2070,7 +2053,6 @@ top: zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, acl_ids.z_fuidp, vap); -out: zfs_acl_ids_free(&acl_ids); dmu_tx_commit(tx); @@ -2080,14 +2062,10 @@ out: if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zilog, 0); - if (error != 0) { - iput(ZTOI(zp)); - } else { - zfs_inode_update(dzp); - zfs_inode_update(zp); - } + zfs_inode_update(dzp); + zfs_inode_update(zp); ZFS_EXIT(zfsvfs); - return (error); + return (0); } /* @@ -3705,13 +3683,6 @@ top: VERIFY3U(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL), ==, 0); } - } else { - /* - * If we had removed the existing target, subsequent - * call to zfs_link_create() to add back the same entry - * but, the new dnode (szp) should not fail. - */ - ASSERT(tzp == NULL); } } @@ -3882,18 +3853,14 @@ top: /* * Insert the new object into the directory. */ - error = zfs_link_create(dl, zp, tx, ZNEW); - if (error != 0) { - zfs_znode_delete(zp, tx); - remove_inode_hash(ZTOI(zp)); - } else { - if (flags & FIGNORECASE) - txtype |= TX_CI; - zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); + (void) zfs_link_create(dl, zp, tx, ZNEW); - zfs_inode_update(dzp); - zfs_inode_update(zp); - } + if (flags & FIGNORECASE) + txtype |= TX_CI; + zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); + + zfs_inode_update(dzp); + zfs_inode_update(zp); zfs_acl_ids_free(&acl_ids); @@ -3901,14 +3868,10 @@ top: zfs_dirent_unlock(dl); - if (error == 0) { - *ipp = ZTOI(zp); + *ipp = ZTOI(zp); - if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) - zil_commit(zilog, 0); - } else { - iput(ZTOI(zp)); - } + if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) + zil_commit(zilog, 0); ZFS_EXIT(zfsvfs); return (error); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 25ae3fe5e9..4b2694202c 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -55,7 +55,7 @@ tags = ['functional', 'cachefile'] # 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete', # 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete'] [tests/functional/casenorm] -tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure'] +tests = ['case_all_values', 'norm_all_values'] tags = ['functional', 'casenorm'] [tests/functional/chattr] diff --git a/tests/zfs-tests/tests/functional/casenorm/Makefile.am b/tests/zfs-tests/tests/functional/casenorm/Makefile.am index 00cb59074e..00a19c7ff7 100644 --- a/tests/zfs-tests/tests/functional/casenorm/Makefile.am +++ b/tests/zfs-tests/tests/functional/casenorm/Makefile.am @@ -9,7 +9,6 @@ dist_pkgdata_SCRIPTS = \ insensitive_formd_lookup.ksh \ insensitive_none_delete.ksh \ insensitive_none_lookup.ksh \ - mixed_create_failure.ksh \ mixed_formd_delete.ksh \ mixed_formd_lookup_ci.ksh \ mixed_formd_lookup.ksh \ diff --git a/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh b/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh deleted file mode 100755 index 51b5bb3f65..0000000000 --- a/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh +++ /dev/null @@ -1,136 +0,0 @@ -#!/bin/ksh -p -# -# -# This file and its contents are supplied under the terms of the -# Common Development and Distribution License ("CDDL"), version 1.0. -# You may only use this file in accordance with the terms of version -# 1.0 of the CDDL. -# -# A full copy of the text of the CDDL should have accompanied this -# source. A copy of the CDDL is also available via the Internet at -# http://www.illumos.org/license/CDDL. -# -# -# Copyright 2018 Nutanix Inc. All rights reserved. -# - -. $STF_SUITE/tests/functional/casenorm/casenorm.kshlib - -# DESCRIPTION: -# For the filesystem with casesensitivity=mixed, normalization=none, -# when multiple files with the same name (differing only in case) are created, -# the number of files is limited to what can fit in a fatzap leaf-block. -# And beyond that, it fails with ENOSPC. -# -# Ensure that the create/rename operations fail gracefully and not trigger an -# ASSERT. -# -# STRATEGY: -# Repeat the below steps for objects: files, directories, symlinks and hardlinks -# 1. Create objects with same name but varying in case. -# E.g. 'abcdefghijklmnop', 'Abcdefghijklmnop', 'ABcdefghijklmnop' etc. -# The create should fail with ENOSPC. -# 2. Create an object with name 'tmp_obj' and try to rename it to name that we -# failed to add in step 1 above. -# This should fail as well. - -verify_runnable "global" - -function cleanup -{ - destroy_testfs -} - -log_onexit cleanup -log_assert "With mixed mode: ensure create fails with ENOSPC beyond a certain limit" - -create_testfs "-o casesensitivity=mixed -o normalization=none" - -# Different object types -obj_type=('file' 'dir' 'symlink' 'hardlink') - -# Commands to create different object types -typeset -A ops -ops['file']='touch' -ops['dir']='mkdir' -ops['symlink']='ln -s' -ops['hardlink']='ln' - -# This function tests the following for a give object type : -# - Create multiple objects with the same name (varying only in case). -# Ensure that it eventually fails once the leaf-block limit is exceeded. -# - Create another object with a different name. And attempt rename it to the -# name (for which the create had failed in the previous step). -# This should fail as well. -# Args : -# $1 - object type (file/dir/symlink/hardlink) -# $2 - test directory -# -function test_ops -{ - typeset obj_type=$1 - typeset testdir=$2 - - target_obj='target-file' - - op="${ops[$obj_type]}" - - log_note "The op : $op" - log_note "testdir=$testdir obj_type=$obj_type" - - test_path="$testdir/$obj_type" - mkdir $test_path - log_note "Created test dir $test_path" - - if [[ $obj_type = "symlink" || $obj_type = "hardlink" ]]; then - touch $test_path/$target_obj - log_note "Created target: $test_path/$target_obj" - op="$op $test_path/$target_obj" - fi - - log_note "op : $op" - names='{a,A}{b,B}{c,C}{d,D}{e,E}{f,F}{g,G}{h,H}{i,I}{j,J}{k,K}{l,L}' - for name in $names; do - cmd="$op $test_path/$name" - out=$($cmd 2>&1) - ret=$? - log_note "cmd: $cmd ret: $ret out=$out" - if (($ret != 0)); then - if [[ $out = *@(No space left on device)* ]]; then - save_name="$test_path/$name" - break; - else - log_err "$cmd failed with unexpected error : $out" - fi - fi - done - - log_note 'Test rename \"sample_name\" rename' - TMP_OBJ="$test_path/tmp_obj" - cmd="$op $TMP_OBJ" - out=$($cmd 2>&1) - ret=$? - if (($ret != 0)); then - log_err "cmd:$cmd failed out:$out" - fi - - # Now, try to rename the tmp_obj to the name which we failed to add earlier. - # This should fail as well. - out=$(mv $TMP_OBJ $save_name 2>&1) - ret=$? - if (($ret != 0)); then - if [[ $out = *@(No space left on device)* ]]; then - log_note "$cmd failed as expected : $out" - else - log_err "$cmd failed with : $out" - fi - fi -} - -for obj_type in ${obj_type[*]}; -do - log_note "Testing create of $obj_type" - test_ops $obj_type $TESTDIR -done - -log_pass "Mixed mode FS: Ops on large number of colliding names fail gracefully"