Use zfs_mknode() to create dataset root

Long, long, long ago when the effort to port ZFS was begun
the zfs_create_fs() function was heavily modified to remove
all of its VFS dependencies.  This allowed Lustre to use
the dataset without us having to spend the time porting all
the required VFS code.

Fast-forward several years and we now have all the VFS code
in place but are still relying on the modified zfs_create_fs().
This isn't required anymore and we can now use zfs_mknode()
to create the root znode for the filesystem.

This commit reverts the contents of zfs_create_fs() to largely
match the upstream OpenSolaris code.  There have been minor
modifications to accomidate the Linux VFS but that is all.

This code fixes issue #116 by bootstraping enough of the VFS
data structures so we can rely on zfs_mknode() to create the
root directory.  This ensures it is created properly with
support for system attributes.  Previously it wasn't which
is why it behaved differently that all other directories
when modified.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #116
This commit is contained in:
Brian Behlendorf 2011-07-20 16:50:22 -07:00
parent 9fd91daeef
commit 22872ff5da
1 changed files with 71 additions and 34 deletions

View File

@ -1397,13 +1397,18 @@ log:
void void
zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
{ {
struct super_block *sb;
zfs_sb_t *zsb;
uint64_t moid, obj, sa_obj, version; uint64_t moid, obj, sa_obj, version;
uint64_t sense = ZFS_CASE_SENSITIVE;
uint64_t norm = 0; uint64_t norm = 0;
nvpair_t *elem; nvpair_t *elem;
int error; int error;
timestruc_t now; int i;
dmu_buf_t *db; znode_t *rootzp = NULL;
znode_phys_t *pzp; vattr_t vattr;
znode_t *zp;
zfs_acl_ids_t acl_ids;
/* /*
* First attempt to create master node. * First attempt to create master node.
@ -1439,6 +1444,8 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
ASSERT(error == 0); ASSERT(error == 0);
if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
norm = val; norm = val;
else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
sense = val;
} }
ASSERT(version != 0); ASSERT(version != 0);
error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
@ -1464,47 +1471,77 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
ASSERT(error == 0); ASSERT(error == 0);
/* /*
* Create root znode with code free of VFS dependencies. This * Create root znode. Create minimal znode/inode/zsb/sb
* is important because without a registered filesystem and super * to allow zfs_mknode to work.
* block all the required VFS hooks will be missing. The critical
* thing is to just crete the required root znode.
*/ */
obj = zap_create_norm(os, norm, DMU_OT_DIRECTORY_CONTENTS, vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
DMU_OT_ZNODE, sizeof (znode_phys_t), tx); vattr.va_mode = S_IFDIR|0755;
vattr.va_uid = crgetuid(cr);
vattr.va_gid = crgetgid(cr);
VERIFY(0 == dmu_bonus_hold(os, obj, FTAG, &db)); rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
dmu_buf_will_dirty(db, tx); rootzp->z_moved = 0;
rootzp->z_unlinked = 0;
rootzp->z_atime_dirty = 0;
rootzp->z_is_sa = USE_SA(version, os);
/* zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
* Initialize the znode physical data to zero. zsb->z_os = os;
*/ zsb->z_parent = zsb;
ASSERT(db->db_size >= sizeof (znode_phys_t)); zsb->z_version = version;
bzero(db->db_data, db->db_size); zsb->z_use_fuids = USE_FUIDS(version, os);
pzp = db->db_data; zsb->z_use_sa = USE_SA(version, os);
zsb->z_norm = norm;
if (USE_FUIDS(version, os)) sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; sb->s_fs_info = zsb;
pzp->zp_size = 2; /* "." and ".." */ ZTOI(rootzp)->i_sb = sb;
pzp->zp_links = 2;
pzp->zp_parent = obj;
pzp->zp_gen = dmu_tx_get_txg(tx);
pzp->zp_mode = S_IFDIR | 0755;
pzp->zp_flags = ZFS_ACL_TRIVIAL;
gethrestime(&now); error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
&zsb->z_attr_table);
ZFS_TIME_ENCODE(&now, pzp->zp_crtime);
ZFS_TIME_ENCODE(&now, pzp->zp_ctime);
ZFS_TIME_ENCODE(&now, pzp->zp_atime);
ZFS_TIME_ENCODE(&now, pzp->zp_mtime);
error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &obj, tx);
ASSERT(error == 0); ASSERT(error == 0);
dmu_buf_rele(db, FTAG); /*
} * Fold case on file systems that are always or sometimes case
* insensitive.
*/
if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
zsb->z_norm |= U8_TEXTPREP_TOUPPER;
mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zsb->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
cr, NULL, &acl_ids));
zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
ASSERT3P(zp, ==, rootzp);
error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
ASSERT(error == 0);
zfs_acl_ids_free(&acl_ids);
atomic_set(&ZTOI(rootzp)->i_count, 0);
sa_handle_destroy(rootzp->z_sa_hdl);
kmem_free(sb, sizeof (struct super_block));
kmem_free(zsb, sizeof (zfs_sb_t));
kmem_cache_free(znode_cache, rootzp);
/*
* Create shares directory
*/
error = zfs_create_share_dir(zsb, tx);
ASSERT(error == 0);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_destroy(&zsb->z_hold_mtx[i]);
}
#endif /* _KERNEL */ #endif /* _KERNEL */
static int static int