Correctly lock pages for .readpages()

Unlike the .readpage() callback which is passed a single locked page to be populated. The .readpages() callback is passed a list of unlocked pages which are all marked for read-ahead (PG_readahead set). It is the responsibly of .readpages() to ensure to pages are properly locked before being populated. Prior to this change the requested read-ahead pages would be updated outside of the page lock which is unsafe. The unlocked pages would then be unlocked again which is harmless but should have been immediately detected as bug. Unfortunately, newer kernels failed detect this issue because the check is done with a VM_BUG_ON which is disabled by default. Luckily, the old Debian Lenny 2.6.26 kernel caught this because it simply uses a BUG_ON. The straight forward fix for this is to update the .readpages() callback to use the read_cache_pages() helper function. The helper function will ensure that each page in the list is properly locked before it is passed to the .readpage() callback. In addition resolving the bug, this results in a nice simplification of the existing code. The downside to this change is that instead of passing one large read request to the dmu multiple smaller ones are submitted. All of these requests however are marked for readahead so the lower layers should issue a large I/O regardless. Thus most of the request should hit the ARC cache. Futher optimization of this code can be done in the future is a perform analysis determines it to be worthwhile. But for the moment, it is preferable that code be correct and understandable. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #355
2011-08-04 16:25:43 -07:00 · 2011-08-04 16:25:43 -07:00 · f3ab88d646
parent 76659dc110
commit f3ab88d646
1 changed files with 13 additions and 54 deletions
--- a/module/zfs/zpl_file.c
+++ b/module/zfs/zpl_file.c
@ -260,60 +260,6 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 	return (error);
 }

-static struct page **
-pages_vector_from_list(struct list_head *pages, unsigned nr_pages)
-{
-	struct page **pl;
-	struct page *t;
-	unsigned page_idx;
-
-	pl = kmalloc(sizeof(*pl) * nr_pages, GFP_NOFS);
-	if (!pl)
-		return ERR_PTR(-ENOMEM);
-
-	page_idx = 0;
-	list_for_each_entry_reverse(t, pages, lru) {
-		pl[page_idx] = t;
-		page_idx++;
-	}
-
-	return pl;
-}
-
-static int
-zpl_readpages(struct file *file, struct address_space *mapping,
-	struct list_head *pages, unsigned nr_pages)
-{
-	struct inode *ip;
-	struct page  **pl;
-	struct page  *p, *n;
-	int          error;
-
-	ip = mapping->host;
-
-	pl = pages_vector_from_list(pages, nr_pages);
-	if (IS_ERR(pl))
-		return PTR_ERR(pl);
-
-	error = -zfs_getpage(ip, pl, nr_pages);
-	if (error)
-		goto error;
-
-	list_for_each_entry_safe_reverse(p, n, pages, lru) {
-
-		list_del(&p->lru);
-
-		flush_dcache_page(p);
-		SetPageUptodate(p);
-		unlock_page(p);
-		page_cache_release(p);
-	}
-
-error:
-	kfree(pl);
-	return error;
-}
-
 /*
 * Populate a page with data for the Linux page cache.  This function is
 * only used to support mmap(2).  There will be an identical copy of the
@ -349,6 +295,19 @@ zpl_readpage(struct file *filp, struct page *pp)
 	return error;
 }

+/*
+ * Populate a set of pages with data for the Linux page cache.  This
+ * function will only be called for read ahead and never for demand
+ * paging.  For simplicity, the code relies on read_cache_pages() to
+ * correctly lock each page for IO and call zpl_readpage().
+ */
+static int
+zpl_readpages(struct file *filp, struct address_space *mapping,
+	struct list_head *pages, unsigned nr_pages)
+{
+	return (read_cache_pages(mapping, pages, zpl_readpage, filp));
+}
+
 int
 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
 {