From e2e7aa2df88708cdcae87688a69ba3f5d1cc43ed Mon Sep 17 00:00:00 2001
From: Brian Behlendorf <behlendorf1@llnl.gov>
Date: Fri, 1 Jul 2011 11:25:07 -0700
Subject: [PATCH] Add ZFS specific mmap() checks

Under Linux the VFS handles virtually all of the mmap() access
checks.  Filesystem specific checks are left to be handled in
the .mmap() hook and normally there arn't any.

However, ZFS provides a few attributes which can influence the
mmap behavior and should be honored.  Note, currently the code
to modify these attributes has not been implemented under Linux.

* ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY: when any of these
  attributes are set a file may not be mmaped with write access.

* ZFS_AV_QUARANTINED: when set a file file may not be mmaped with
  read or exec access.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
---
 include/sys/zfs_vnops.h |   2 +
 module/zfs/zfs_vnops.c  | 136 +++++++---------------------------------
 module/zfs/zpl_file.c   |   8 ++-
 3 files changed, 30 insertions(+), 116 deletions(-)

diff --git a/include/sys/zfs_vnops.h b/include/sys/zfs_vnops.h
index 23218a45ba..acc617b414 100644
--- a/include/sys/zfs_vnops.h
+++ b/include/sys/zfs_vnops.h
@@ -74,6 +74,8 @@ extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag,
 extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
 extern int zfs_putpage(struct page *page, struct writeback_control *wbc,
     void *data);
+extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
+    size_t len, unsigned long vm_flags);
 
 #ifdef	__cplusplus
 }
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 53d3194788..2139713102 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -3995,144 +3995,50 @@ zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
 }
 EXPORT_SYMBOL(zfs_getpage);
 
-#ifdef HAVE_MMAP
 /*
- * Request a memory map for a section of a file.  This code interacts
- * with common code and the VM system as follows:
+ * Check ZFS specific permissions to memory map a section of a file.
  *
- *	common code calls mmap(), which ends up in smmap_common()
+ *	IN:	ip	- inode of the file to mmap
+ *		off	- file offset
+ *		addrp	- start address in memory region
+ *		len	- length of memory region
+ *		vm_flags- address flags
  *
- *	this calls VOP_MAP(), which takes you into (say) zfs
- *
- *	zfs_map() calls as_map(), passing segvn_create() as the callback
- *
- *	segvn_create() creates the new segment and calls VOP_ADDMAP()
- *
- *	zfs_addmap() updates z_mapcnt
+ *	RETURN:	0 if success
+ *		error code if failure
  */
 /*ARGSUSED*/
-static int
-zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
-    size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr)
+int
+zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
+    unsigned long vm_flags)
 {
-	znode_t *zp = VTOZ(vp);
-	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
-	segvn_crargs_t	vn_a;
-	int		error;
+	znode_t  *zp = ITOZ(ip);
+	zfs_sb_t *zsb = ITOZSB(ip);
 
-	ZFS_ENTER(zfsvfs);
+	ZFS_ENTER(zsb);
 	ZFS_VERIFY_ZP(zp);
 
-	if ((prot & PROT_WRITE) && (zp->z_pflags &
+	if ((vm_flags & VM_WRITE) && (zp->z_pflags &
 	    (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
-		ZFS_EXIT(zfsvfs);
+		ZFS_EXIT(zsb);
 		return (EPERM);
 	}
 
-	if ((prot & (PROT_READ | PROT_EXEC)) &&
+	if ((vm_flags & (VM_READ | VM_EXEC)) &&
 	    (zp->z_pflags & ZFS_AV_QUARANTINED)) {
-		ZFS_EXIT(zfsvfs);
+		ZFS_EXIT(zsb);
 		return (EACCES);
 	}
 
-	if (vp->v_flag & VNOMAP) {
-		ZFS_EXIT(zfsvfs);
-		return (ENOSYS);
-	}
-
 	if (off < 0 || len > MAXOFFSET_T - off) {
-		ZFS_EXIT(zfsvfs);
+		ZFS_EXIT(zsb);
 		return (ENXIO);
 	}
 
-	if (vp->v_type != VREG) {
-		ZFS_EXIT(zfsvfs);
-		return (ENODEV);
-	}
-
-	/*
-	 * If file is locked, disallow mapping.
-	 */
-	if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) {
-		ZFS_EXIT(zfsvfs);
-		return (EAGAIN);
-	}
-
-	as_rangelock(as);
-	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
-	if (error != 0) {
-		as_rangeunlock(as);
-		ZFS_EXIT(zfsvfs);
-		return (error);
-	}
-
-	vn_a.vp = vp;
-	vn_a.offset = (u_offset_t)off;
-	vn_a.type = flags & MAP_TYPE;
-	vn_a.prot = prot;
-	vn_a.maxprot = maxprot;
-	vn_a.cred = cr;
-	vn_a.amp = NULL;
-	vn_a.flags = flags & ~MAP_TYPE;
-	vn_a.szc = 0;
-	vn_a.lgrp_mem_policy_flags = 0;
-
-	error = as_map(as, *addrp, len, segvn_create, &vn_a);
-
-	as_rangeunlock(as);
-	ZFS_EXIT(zfsvfs);
-	return (error);
-}
-
-/* ARGSUSED */
-static int
-zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
-    size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr)
-{
-	uint64_t pages = btopr(len);
-
-	atomic_add_64(&VTOZ(vp)->z_mapcnt, pages);
+	ZFS_EXIT(zsb);
 	return (0);
 }
-
-/*
- * The reason we push dirty pages as part of zfs_delmap() is so that we get a
- * more accurate mtime for the associated file.  Since we don't have a way of
- * detecting when the data was actually modified, we have to resort to
- * heuristics.  If an explicit msync() is done, then we mark the mtime when the
- * last page is pushed.  The problem occurs when the msync() call is omitted,
- * which by far the most common case:
- *
- * 	open()
- * 	mmap()
- * 	<modify memory>
- * 	munmap()
- * 	close()
- * 	<time lapse>
- * 	putpage() via fsflush
- *
- * If we wait until fsflush to come along, we can have a modification time that
- * is some arbitrary point in the future.  In order to prevent this in the
- * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is
- * torn down.
- */
-/* ARGSUSED */
-static int
-zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
-    size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
-{
-	uint64_t pages = btopr(len);
-
-	ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
-	atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
-
-	if ((flags & MAP_SHARED) && (prot & PROT_WRITE) &&
-	    vn_has_cached_data(vp))
-		(void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct);
-
-	return (0);
-}
-#endif /* HAVE_MMAP */
+EXPORT_SYMBOL(zfs_map);
 
 /*
  * convoff - converts the given data (start, whence) to the
diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c
index 0e90b78036..fc2b813294 100644
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@@ -240,9 +240,15 @@ zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 static int
 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 {
-	znode_t *zp = ITOZ(filp->f_mapping->host);
+	struct inode *ip = filp->f_mapping->host;
+	znode_t *zp = ITOZ(ip);
 	int error;
 
+	error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
+	    (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
+	if (error)
+		return (error);
+
 	error = generic_file_mmap(filp, vma);
 	if (error)
 		return (error);