AC_MSG_RESULT([no])
])
- # --------- zap_page_range(vma) --------------------------------
- AC_MSG_CHECKING([if zap_pag_range with vma parameter])
- ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
- if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
- AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no])
- fi
-
# ---------- Red Hat 2.4.20 backports some 2.5 bits --------
# This needs to run after we've defined the KCPPFLAGS
#define D_RPCTRACE 0x00100000 /* for distributed debugging */
#define D_VFSTRACE 0x00200000
#define D_READA 0x00400000 /* read-ahead */
-#define D_MMAP 0x00800000
#ifdef __KERNEL__
# include <linux/sched.h> /* THREAD_SIZE */
{"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
"blocks", "net", "warning", "buffs", "other", "dentry", "portals",
"page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
- "reada", "mmap", NULL};
+ "reada", NULL};
struct debug_daemon_cmd {
char *cmd;
#define PageWriteback(page) 0
#define end_page_writeback(page)
-static inline int mapping_mapped(struct address_space *mapping)
-{
- return mapping->i_mmap_shared ? 1 : 0;
-}
-
-#ifdef ZAP_PAGE_RANGE_VMA
-#define ll_zap_page_range(vma, addr, len) zap_page_range(vma, addr, len)
-#else
-#define ll_zap_page_range(vma, addr, len) zap_page_range(vma->vm_mm, addr, len)
-#endif
-
#endif /* end of 2.4 compat macros */
#ifdef HAVE_PAGE_LIST
+++ /dev/null
-Index: linux-2.4.24-l36mmap/mm/memory.c
-===================================================================
---- linux-2.4.24-l36mmap.orig/mm/memory.c 2004-05-27 17:44:13.000000000 -0700
-+++ linux-2.4.24-l36mmap/mm/memory.c 2004-05-27 17:45:07.000000000 -0700
-@@ -411,6 +411,7 @@
- mm->rss = 0;
- spin_unlock(&mm->page_table_lock);
- }
-+EXPORT_SYMBOL_GPL(zap_page_range);
-
- /*
- * Do a quick page-table lookup for a single page.
MODULES := llite
-llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o special.o symlink.o llite_mmap.o
+llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o special.o symlink.o
ifeq ($(PATCHLEVEL),4)
llite-objs += rw24.o super.o
llite-objs += rw26.o super25.o
endif
-@INCLUDE_RULES@
+@INCLUDE_RULES@
\ No newline at end of file
obj-y += llite.o
llite-objs := llite_lib.o dcache.o super.o rw.o \
super25.o file.o dir.o symlink.o namei.o lproc_llite.o \
- rw26.o llite_nfs.o llite_close.o special.o llite_mmap.o
+ rw26.o llite_nfs.o llite_close.o special.o
if (end < tmpex.l_extent.end >> PAGE_CACHE_SHIFT)
end = ~0;
- i = inode->i_size ? (inode->i_size - 1) >> PAGE_CACHE_SHIFT : 0;
+ i = (inode->i_size + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
if (i < end)
end = i;
"count: %lu skip: %lu end: %lu%s\n", start, start % count,
count, skip, end, discard ? " (DISCARDING)" : "");
- /* walk through the vmas on the inode and tear down mmaped pages that
- * intersect with the lock. this stops immediately if there are no
- * mmap()ed regions of the file. This is not efficient at all and
- * should be short lived. We'll associate mmap()ed pages with the lock
- * and will be able to find them directly */
- for (i = start; i <= end; i += (j + skip)) {
- j = min(count - (i % count), end - i + 1);
- LASSERT(inode->i_mapping);
- if (ll_teardown_mmaps(inode->i_mapping, i << PAGE_CACHE_SHIFT,
- ((i+j) << PAGE_CACHE_SHIFT) - 1) )
- break;
- }
-
/* this is the simplistic implementation of page eviction at
* cancelation. It is careful to get races with other page
* lockers handled correctly. fixes from bug 20 will make it
static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
loff_t *ppos)
{
+ struct ll_file_data *fd = filp->private_data;
struct inode *inode = filp->f_dentry->d_inode;
struct ll_inode_info *lli = ll_i2info(inode);
struct lov_stripe_md *lsm = lli->lli_smd;
- struct ll_lock_tree tree;
- struct ll_lock_tree_node *node;
+ struct lustre_handle lockh = { 0 };
+ ldlm_policy_data_t policy;
int rc;
ssize_t retval;
__u64 kms;
if (!lsm)
RETURN(0);
- node = ll_node_from_inode(inode, *ppos, *ppos + count - 1,
- LCK_PR);
-
- tree.lt_fd = filp->private_data;
+ policy.l_extent.start = *ppos;
+ policy.l_extent.end = *ppos + count - 1;
- rc = ll_tree_lock(&tree, node, inode, buf, count,
- filp->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0);
+ rc = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh, 0);
if (rc != 0)
RETURN(rc);
retval = generic_file_read(filp, buf, count, ppos);
out:
- ll_tree_unlock(&tree, inode);
+ ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);
RETURN(retval);
}
static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
loff_t *ppos)
{
+ struct ll_file_data *fd = file->private_data;
struct inode *inode = file->f_dentry->d_inode;
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct ll_lock_tree tree;
- struct ll_lock_tree_node *node;
+ struct lustre_handle lockh = { 0 };
+ ldlm_policy_data_t policy;
loff_t maxbytes = ll_file_maxbytes(inode);
ssize_t retval;
int rc;
LASSERT(lsm);
- if (file->f_flags & O_APPEND)
- node = ll_node_from_inode(inode, 0, OBD_OBJECT_EOF, LCK_PW);
- else
- node = ll_node_from_inode(inode, *ppos, *ppos + count - 1,
- LCK_PW);
- if (IS_ERR(node))
- RETURN(PTR_ERR(node));
-
- tree.lt_fd = file->private_data;
+ if (file->f_flags & O_APPEND) {
+ policy.l_extent.start = 0;
+ policy.l_extent.end = OBD_OBJECT_EOF;
+ } else {
+ policy.l_extent.start = *ppos;
+ policy.l_extent.end = *ppos + count - 1;
+ }
- rc = ll_tree_lock(&tree, node, inode, buf, count,
- file->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0);
+ rc = ll_extent_lock(fd, inode, lsm, LCK_PW, &policy, &lockh, 0);
if (rc != 0)
RETURN(rc);
retval = generic_file_write(file, buf, count, ppos);
out:
- ll_tree_unlock(&tree, inode);
- /* serialize with mmap/munmap/mremap */
+ ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh);
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES,
retval > 0 ? retval : 0);
RETURN(retval);
.ioctl = ll_file_ioctl,
.open = ll_file_open,
.release = ll_file_release,
- .mmap = ll_file_mmap,
+ .mmap = generic_file_mmap,
.llseek = ll_file_seek,
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
.sendfile = generic_file_sendfile,
/* only trust these if the page lock is providing exclusion */
unsigned llap_write_queued:1,
llap_defer_uptodate:1,
- llap_origin:3,
llap_ra_used:1;
struct list_head llap_proc_item;
};
-enum {
- LLAP_ORIGIN_UNKNOWN = 0,
- LLAP_ORIGIN_READPAGE,
- LLAP_ORIGIN_READAHEAD,
- LLAP_ORIGIN_COMMIT_WRITE,
- LLAP_ORIGIN_WRITEPAGE,
- LLAP__ORIGIN_MAX,
-};
-
/* llite/lproc_llite.c */
int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
struct super_block *sb, char *osc, char *mdc);
/* llite/rw.c */
int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
-int ll_writepage(struct page *page);
void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
void ll_removepage(struct page *page);
int ll_readpage(struct file *file, struct page *page);
struct ll_async_page *llap_from_cookie(void *cookie);
-struct ll_async_page *llap_from_page(struct page *page, unsigned origin);
+struct ll_async_page *llap_from_page(struct page *page);
struct ll_async_page *llap_cast_private(struct page *page);
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
void ll_ra_accounting(struct page *page, struct address_space *mapping);
void ll_close_thread_shutdown(struct ll_close_queue *lcq);
int ll_close_thread_start(struct ll_close_queue **lcq_ret);
-/* llite/llite_mmap.c */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-typedef struct rb_root rb_root_t;
-typedef struct rb_node rb_node_t;
-#endif
-
-struct ll_lock_tree_node;
-struct ll_lock_tree {
- rb_root_t lt_root;
- struct list_head lt_locked_list;
- struct ll_file_data *lt_fd;
-};
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first,
- __u64 last);
-int ll_file_mmap(struct file * file, struct vm_area_struct * vma);
-struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
- __u64 end, ldlm_mode_t mode);
-int ll_tree_lock(struct ll_lock_tree *tree,
- struct ll_lock_tree_node *first_node, struct inode *inode,
- const char *buf, size_t count, int ast_flags);
-int ll_tree_unlock(struct ll_lock_tree *tree, struct inode *inode);
-
#define LL_SBI_NOLCK 0x1
#define LL_SBI_READAHEAD 0x2
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- * This file is part of Lustre, http://www.lustre.org.
- *
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/version.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/iobuf.h>
-#endif
-
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/lustre_mds.h>
-#include <linux/lustre_lite.h>
-#include "llite_internal.h"
-#include <linux/lustre_compat25.h>
-
-struct ll_lock_tree_node {
- rb_node_t lt_node;
- struct list_head lt_locked_item;
- __u64 lt_oid;
- ldlm_policy_data_t lt_policy;
- struct lustre_handle lt_lockh;
- ldlm_mode_t lt_mode;
-};
-
-__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms);
-int lt_get_mmap_locks(struct ll_lock_tree *tree, struct inode *inode,
- unsigned long addr, size_t count);
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
- int *type);
-#else
-
-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
- int unused);
-#endif
-
-struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
- __u64 end, ldlm_mode_t mode)
-{
- struct ll_lock_tree_node *node;
-
- OBD_ALLOC(node, sizeof(*node));
- if (node == NULL)
- RETURN(ERR_PTR(-ENOMEM));
-
- node->lt_oid = ll_i2info(inode)->lli_smd->lsm_object_id;
- node->lt_policy.l_extent.start = start;
- node->lt_policy.l_extent.end = end;
- memset(&node->lt_lockh, 0, sizeof(node->lt_lockh));
- INIT_LIST_HEAD(&node->lt_locked_item);
- node->lt_mode = mode;
-
- return node;
-}
-
-int lt_compare(struct ll_lock_tree_node *one, struct ll_lock_tree_node *two)
-{
- if ( one->lt_oid < two->lt_oid)
- return -1;
- if ( one->lt_oid > two->lt_oid)
- return 1;
-
- if ( one->lt_policy.l_extent.end < two->lt_policy.l_extent.start )
- return -1;
- if ( one->lt_policy.l_extent.start > two->lt_policy.l_extent.end )
- return 1;
-
- return 0; /* they are the same object and overlap */
-}
-
-static void lt_merge(struct ll_lock_tree_node *dst,
- struct ll_lock_tree_node *src)
-{
- dst->lt_policy.l_extent.start = min(dst->lt_policy.l_extent.start,
- src->lt_policy.l_extent.start);
- dst->lt_policy.l_extent.end = max(dst->lt_policy.l_extent.end,
- src->lt_policy.l_extent.end);
-
- /* XXX could be a real call to the dlm to find superset modes */
- if (src->lt_mode == LCK_PW && dst->lt_mode != LCK_PW)
- dst->lt_mode = LCK_PW;
-}
-
-static void lt_insert(struct ll_lock_tree *tree,
- struct ll_lock_tree_node *node)
-{
- struct ll_lock_tree_node *walk;
- rb_node_t **p, *parent;
- ENTRY;
-
-restart:
- p = &tree->lt_root.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- walk = rb_entry(parent, struct ll_lock_tree_node, lt_node);
- switch (lt_compare(node, walk)) {
- case -1:
- p = &(*p)->rb_left;
- break;
- case 1:
- p = &(*p)->rb_right;
- break;
- case 0:
- lt_merge(node, walk);
- rb_erase(&walk->lt_node, &tree->lt_root);
- OBD_FREE(walk, sizeof(*walk));
- goto restart;
- break;
- default:
- LBUG();
- break;
- }
- }
- rb_link_node(&node->lt_node, parent, p);
- rb_insert_color(&node->lt_node, &tree->lt_root);
- EXIT;
-}
-
-static struct ll_lock_tree_node *lt_least_node(struct ll_lock_tree *tree)
-{
- rb_node_t *rbnode;
- struct ll_lock_tree_node *node = NULL;
-
- for ( rbnode = tree->lt_root.rb_node; rbnode != NULL;
- rbnode = rbnode->rb_left) {
- if (rbnode->rb_left == NULL) {
- node = rb_entry(rbnode, struct ll_lock_tree_node,
- lt_node);
- break;
- }
- }
- RETURN(node);
-}
-
-int ll_tree_unlock(struct ll_lock_tree *tree, struct inode *inode)
-{
- struct ll_lock_tree_node *node;
- struct list_head *pos, *n;
- int rc = 0;
- ENTRY;
-
- list_for_each_safe(pos, n, &tree->lt_locked_list) {
- node = list_entry(pos, struct ll_lock_tree_node,
- lt_locked_item);
-
- rc = ll_extent_unlock(tree->lt_fd, inode,
- ll_i2info(inode)->lli_smd, node->lt_mode,
- &node->lt_lockh);
- if (rc != 0) {
- /* XXX better message */
- CERROR("couldn't unlock %d\n", rc);
- }
- list_del(&node->lt_locked_item);
- OBD_FREE(node, sizeof(*node));
- }
-
- while ((node = lt_least_node(tree))) {
- rb_erase(&node->lt_node, &tree->lt_root);
- OBD_FREE(node, sizeof(*node));
- }
-
- RETURN(rc);
-}
-
-int ll_tree_lock(struct ll_lock_tree *tree,
- struct ll_lock_tree_node *first_node, struct inode *inode,
- const char *buf, size_t count, int ast_flags)
-{
- struct ll_lock_tree_node *node;
- int rc = 0;
- ENTRY;
-
- tree->lt_root.rb_node = NULL;
- INIT_LIST_HEAD(&tree->lt_locked_list);
- if (first_node != NULL)
- lt_insert(tree, first_node);
-
- if (mapping_mapped(inode->i_mapping)) {
- rc = lt_get_mmap_locks(tree, inode, (unsigned long)buf, count);
- if (rc)
- GOTO(out, rc);
- }
-
- while ((node = lt_least_node(tree))) {
- rc = ll_extent_lock(tree->lt_fd, inode,
- ll_i2info(inode)->lli_smd, node->lt_mode,
- &node->lt_policy, &node->lt_lockh,
- ast_flags);
- if (rc != 0)
- GOTO(out, rc);
-
- rb_erase(&node->lt_node, &tree->lt_root);
- list_add_tail(&node->lt_locked_item, &tree->lt_locked_list);
- }
- RETURN(rc);
-out:
- ll_tree_unlock(tree, inode);
- RETURN(rc);
-}
-
-static ldlm_mode_t mode_from_vma(struct vm_area_struct *vma)
-{
- /* we only want to hold PW locks if the mmap() can generate
- * writes back to the file and that only happens in shared
- * writable vmas */
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
- return LCK_PW;
- return LCK_PR;
-}
-
-static void policy_from_vma(ldlm_policy_data_t *policy,
- struct vm_area_struct *vma, unsigned long addr,
- size_t count)
-{
- policy->l_extent.start = ((addr - vma->vm_start) & PAGE_CACHE_MASK) +
- (vma->vm_pgoff << PAGE_CACHE_SHIFT);
- policy->l_extent.end = (policy->l_extent.start + count - 1) |
- (PAGE_CACHE_SIZE - 1);
-}
-
-static struct vm_area_struct * our_vma(unsigned long addr, size_t count)
-{
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma, *ret = NULL;
- ENTRY;
-
- spin_lock(&mm->page_table_lock);
- for(vma = find_vma(mm, addr);
- vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
- if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage) {
- ret = vma;
- break;
- }
- }
- spin_unlock(&mm->page_table_lock);
- RETURN(ret);
-}
-
-int lt_get_mmap_locks(struct ll_lock_tree *tree, struct inode *inode,
- unsigned long addr, size_t count)
-{
- struct vm_area_struct *vma;
- struct ll_lock_tree_node *node;
- ldlm_policy_data_t policy;
- ENTRY;
-
- if (count == 0)
- RETURN(0);
-
- /* we need to look up vmas on page aligned addresses */
- count += addr & (PAGE_SIZE - 1);
- addr -= addr & (PAGE_SIZE - 1);
-
- while ((vma = our_vma(addr, count)) != NULL) {
-
- policy_from_vma(&policy, vma, addr, count);
- node = ll_node_from_inode(inode, policy.l_extent.start,
- policy.l_extent.end,
- mode_from_vma(vma));
- if (IS_ERR(node)) {
- CERROR("not enough mem for lock_tree_node!\n");
- RETURN(-ENOMEM);
- }
- lt_insert(tree, node);
-
- if (vma->vm_end - addr >= count)
- break;
- count -= vma->vm_end - addr;
- addr = vma->vm_end;
- }
- RETURN(0);
-}
-
-/* FIXME: there is a pagefault race goes as follow:
- * 1. A user process on node A accesses a portion of a mapped file,
- * resulting in a page fault. The pagefault handler invokes the
- * ll_nopage function, which reads the page into memory.
- * 2. A user process on node B writes to the same portion of the file
- * (either via mmap or write()), that cause node A to cancel the
- * lock and truncate the page.
- * 3. Node A then executes the rest of do_no_page(), entering the
- * now-invalid page into the PTEs.
- *
- * Make the whole do_no_page as a hook to cover both the page cache
- * and page mapping installing with dlm lock would eliminate this race.
- */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
- int *type)
-#else
-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
- int unused)
-#endif
-{
- struct file *filp = vma->vm_file;
- struct ll_file_data *fd = filp->private_data;
- struct inode *inode = filp->f_dentry->d_inode;
- struct lustre_handle lockh = { 0 };
- ldlm_policy_data_t policy;
- ldlm_mode_t mode;
- struct page *page;
- __u64 kms;
- unsigned long pgoff, size, rand_read, seq_read;
- int rc = 0;
- ENTRY;
-
- if (ll_i2info(inode)->lli_smd == NULL) {
- CERROR("No lsm on fault?\n");
- RETURN(NULL);
- }
-
- /* start and end the lock on the first and last bytes in the page */
- policy_from_vma(&policy, vma, address, PAGE_CACHE_SIZE);
-
- CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n",
- vma, inode->i_ino, policy.l_extent.start,
- policy.l_extent.end);
-
- mode = mode_from_vma(vma);
-
- rc = ll_extent_lock(fd, inode, ll_i2info(inode)->lli_smd, mode, &policy,
- &lockh, LDLM_FL_CBPENDING);
- if (rc != 0)
- RETURN(NULL);
-
- /* XXX change inode size without i_sem hold! there is a race condition
- * with truncate path. (see ll_extent_lock) */
- kms = lov_merge_size(ll_i2info(inode)->lli_smd, 1);
- pgoff = ((address - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
- size = (kms + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
- if (pgoff >= size)
- ll_glimpse_size(inode);
- else
- inode->i_size = kms;
-
- /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that
- * the kernel will not read other pages not covered by ldlm in
- * filemap_nopage. we do our readahead in ll_readpage.
- */
- rand_read = vma->vm_flags & VM_RAND_READ;
- seq_read = vma->vm_flags & VM_SEQ_READ;
- vma->vm_flags &= ~ VM_SEQ_READ;
- vma->vm_flags |= VM_RAND_READ;
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
- page = filemap_nopage(vma, address, type);
-#else
- page = filemap_nopage(vma, address, unused);
-#endif
- vma->vm_flags &= ~VM_RAND_READ;
- vma->vm_flags |= (rand_read | seq_read);
-
- ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh);
- RETURN(page);
-}
-
-/* return the user space pointer that maps to a file offset via a vma */
-static inline unsigned long file_to_user(struct vm_area_struct *vma,
- __u64 byte)
-{
- return vma->vm_start +
- (byte - ((__u64)vma->vm_pgoff << PAGE_CACHE_SHIFT));
-
-}
-
-#define VMA_DEBUG(vma, fmt, arg...) \
- CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld): " fmt, \
- vma, vma->vm_start, vma->vm_end, vma->vm_pgoff, ## arg);
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-/* [first, last] are the byte offsets affected.
- * vm_{start, end} are user addresses of the first byte of the mapping and
- * the next byte beyond it
- * vm_pgoff is the page index of the first byte in the mapping */
-static void teardown_vmas(struct vm_area_struct *vma, __u64 first,
- __u64 last)
-{
- unsigned long address, len;
- for (; vma ; vma = vma->vm_next_share) {
- if (last >> PAGE_CACHE_SHIFT < vma->vm_pgoff)
- continue;
- if (first >> PAGE_CACHE_SHIFT > (vma->vm_pgoff +
- ((vma->vm_end - vma->vm_start) >> PAGE_CACHE_SHIFT)))
- continue;
-
- address = max((unsigned long)vma->vm_start,
- file_to_user(vma, first));
- len = min((unsigned long)vma->vm_end,
- file_to_user(vma, last) + 1) - address;
-
- VMA_DEBUG(vma, "zapping vma [address=%ld len=%ld]\n",
- address, len);
- LASSERT(vma->vm_mm);
- ll_zap_page_range(vma, address, len);
- }
-}
-#endif
-
-/* XXX put nice comment here. talk about __free_pte -> dirty pages and
- * nopage's reference passing to the pte */
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first,
- __u64 last)
-{
- int rc = -ENOENT;
- ENTRY;
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
- if (mapping_mapped(mapping)) {
- rc = 0;
- unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
- last - first + 1, 1);
- }
-#else
- spin_lock(&mapping->i_shared_lock);
- if (mapping->i_mmap != NULL) {
- rc = 0;
- teardown_vmas(mapping->i_mmap, first, last);
- }
- if (mapping->i_mmap_shared != NULL) {
- rc = 0;
- teardown_vmas(mapping->i_mmap_shared, first, last);
- }
- spin_unlock(&mapping->i_shared_lock);
-#endif
- RETURN(rc);
-}
-
-static struct vm_operations_struct ll_file_vm_ops = {
- .nopage = ll_nopage,
-};
-
-int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
-{
- int rc;
- ENTRY;
-
- rc = generic_file_mmap(file, vma);
- if (rc == 0)
- vma->vm_ops = &ll_file_vm_ops;
-
- RETURN(rc);
-}
/* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
* it in our own state */
if (dummy_llap->llap_magic == 0) {
- seq_printf(seq, "generation | llap .cookie origin | page ");
+ seq_printf(seq, "generation | llap .cookie | page ");
seq_printf(seq, "inode .index [ page flags ]\n");
return 0;
}
if (llap != NULL) {
int has_flags = 0;
struct page *page = llap->llap_page;
- static char *origins[] = {
- [LLAP_ORIGIN_UNKNOWN] = "--",
- [LLAP_ORIGIN_READPAGE] = "rp",
- [LLAP_ORIGIN_READAHEAD] = "ra",
- [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
- [LLAP_ORIGIN_WRITEPAGE] = "wp",
- };
-
- LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n",
- llap->llap_origin);
-
- seq_printf(seq, "%lu | %p %p %s | %p %p %lu [",
+
+ seq_printf(seq, "%lu | %p %p | %p %p %lu [",
sbi->ll_pglist_gen,
- llap, llap->llap_cookie,
- origins[llap->llap_origin],
+ llap, llap->llap_cookie,
page, page->mapping->host, page->index);
seq_page_flag(seq, page, locked, has_flags);
seq_page_flag(seq, page, error, has_flags);
}
/* XXX have the exp be an argument? */
-struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
+struct ll_async_page *llap_from_page(struct page *page)
{
struct ll_async_page *llap;
struct obd_export *exp;
int rc;
ENTRY;
- LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
-
llap = llap_cast_private(page);
if (llap != NULL)
- GOTO(out, llap);
+ RETURN(llap);
exp = ll_i2obdexp(page->mapping->host);
if (exp == NULL)
RETURN(ERR_PTR(rc));
}
- LL_CDEBUG_PAGE(D_PAGE, page, "obj off "LPU64"\n",
- (obd_off)page->index << PAGE_SHIFT);
CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap,
page, llap->llap_cookie, (obd_off)page->index << PAGE_SHIFT);
/* also zeroing the PRIVBITS low order bitflags */
list_add_tail(&llap->llap_proc_item, &sbi->ll_pglist);
spin_unlock(&sbi->ll_lock);
-out:
- llap->llap_origin = origin;
RETURN(llap);
}
CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
inode, page, from, to, page->index);
- llap = llap_from_page(page, LLAP_ORIGIN_COMMIT_WRITE);
+ llap = llap_from_page(page);
if (IS_ERR(llap))
RETURN(PTR_ERR(llap));
spin_unlock(&sbi->ll_lock);
}
-int ll_writepage(struct page *page)
-{
- struct inode *inode = page->mapping->host;
- struct obd_export *exp;
- struct ll_async_page *llap;
- int rc = 0;
- ENTRY;
-
- LASSERT(!PageDirty(page));
- LASSERT(PageLocked(page));
-
- exp = ll_i2obdexp(inode);
- if (exp == NULL)
- GOTO(out, rc = -EINVAL);
-
- llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
- if (IS_ERR(llap))
- GOTO(out, rc = PTR_ERR(llap));
-
- page_cache_get(page);
- if (llap->llap_write_queued) {
- LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
- rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL,
- llap->llap_cookie,
- ASYNC_READY | ASYNC_URGENT);
- } else {
- rc = queue_or_sync_write(exp, inode, llap, PAGE_SIZE,
- ASYNC_READY | ASYNC_URGENT);
- }
- if (rc)
- page_cache_release(page);
-out:
- if (rc)
- unlock_page(page);
- RETURN(rc);
-}
-
/* called for each page in a completed rpc.*/
void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
{
return;
}
- llap = llap_from_page(page, 0);
+ llap = llap_from_page(page);
if (IS_ERR(llap)) {
CERROR("page %p ind %lu couldn't find llap: %ld\n", page,
page->index, PTR_ERR(llap));
{
struct ll_async_page *llap;
- llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
+ llap = llap_from_page(page);
if (IS_ERR(llap))
return;
/* we do this first so that we can see the page in the /proc
* accounting */
- llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
+ llap = llap_from_page(page);
if (IS_ERR(llap) || llap->llap_defer_uptodate)
goto next_page;
if (exp == NULL)
GOTO(out, rc = -EINVAL);
- llap = llap_from_page(page, LLAP_ORIGIN_READPAGE);
+ llap = llap_from_page(page);
if (IS_ERR(llap))
GOTO(out, rc = PTR_ERR(llap));
}
if (rc == 0) {
+#if 0
CWARN("ino %lu page %lu (%llu) not covered by "
"a lock (mmap?). check debug logs.\n",
inode->i_ino, page->index,
(long long)page->index << PAGE_CACHE_SHIFT);
+#endif
}
rc = ll_issue_page_read(exp, llap, oig, 0);
#include "llite_internal.h"
#include <linux/lustre_compat25.h>
+static int ll_writepage_24(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct obd_export *exp;
+ struct ll_async_page *llap;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(!PageDirty(page));
+ LASSERT(PageLocked(page));
+
+ exp = ll_i2obdexp(inode);
+ if (exp == NULL)
+ GOTO(out, rc = -EINVAL);
+
+ llap = llap_from_page(page);
+ if (IS_ERR(llap))
+ GOTO(out, rc = PTR_ERR(llap));
+
+ page_cache_get(page);
+ if (llap->llap_write_queued) {
+ LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
+ rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL,
+ llap->llap_cookie,
+ ASYNC_READY | ASYNC_URGENT);
+ } else {
+ llap->llap_write_queued = 1;
+ rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
+ llap->llap_cookie, OBD_BRW_WRITE, 0, 0,
+ 0, ASYNC_READY | ASYNC_URGENT);
+ if (rc == 0)
+ LL_CDEBUG_PAGE(D_PAGE, page, "mmap write queued\n");
+ else
+ llap->llap_write_queued = 0;
+ }
+ if (rc)
+ page_cache_release(page);
+out:
+ if (rc)
+ unlock_page(page);
+ RETURN(rc);
+}
+
static int ll_direct_IO_24(int rw,
#ifdef HAVE_DIO_FILE
struct file *file,
struct address_space_operations ll_aops = {
.readpage = ll_readpage,
.direct_IO = ll_direct_IO_24,
- .writepage = ll_writepage,
+ .writepage = ll_writepage_24,
.prepare_write = ll_prepare_write,
.commit_write = ll_commit_write,
.removepage = ll_removepage,
static int ll_writepage_26(struct page *page, struct writeback_control *wbc)
{
- return ll_writepage(page);
+ struct inode *inode = page->mapping->host;
+ struct obd_export *exp;
+ struct ll_async_page *llap;
+ int rc;
+ ENTRY;
+
+ LASSERT(!PageDirty(page));
+ LASSERT(PageLocked(page));
+
+ exp = ll_i2obdexp(inode);
+ if (exp == NULL)
+ GOTO(out, rc = -EINVAL);
+
+ llap = llap_from_page(page);
+ if (IS_ERR(llap))
+ GOTO(out, rc = PTR_ERR(llap));
+
+ page_cache_get(page);
+ if (llap->llap_write_queued) {
+ LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
+ rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL,
+ llap->llap_cookie,
+ ASYNC_READY | ASYNC_URGENT);
+ } else {
+ llap->llap_write_queued = 1;
+ rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
+ llap->llap_cookie, OBD_BRW_WRITE, 0, 0,
+ 0, ASYNC_READY | ASYNC_URGENT);
+ if (rc == 0)
+ LL_CDEBUG_PAGE(D_PAGE, page, "mmap write queued\n");
+ else
+ llap->llap_write_queued = 0;
+ }
+ if (rc)
+ page_cache_release(page);
+out:
+ if (rc)
+ unlock_page(page);
+ else
+ set_page_writeback(page);
+ RETURN(rc);
}
/* It is safe to not check anything in invalidatepage/releasepage below
AC_MSG_RESULT([no])
])
- # --------- zap_page_range(vma) --------------------------------
- AC_MSG_CHECKING([if zap_pag_range with vma parameter])
- ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
- if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
- AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no])
- fi
-
# ---------- Red Hat 2.4.20 backports some 2.5 bits --------
# This needs to run after we've defined the KCPPFLAGS
#define D_RPCTRACE 0x00100000 /* for distributed debugging */
#define D_VFSTRACE 0x00200000
#define D_READA 0x00400000 /* read-ahead */
-#define D_MMAP 0x00800000
#ifdef __KERNEL__
# include <linux/sched.h> /* THREAD_SIZE */
{"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
"blocks", "net", "warning", "buffs", "other", "dentry", "portals",
"page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
- "reada", "mmap", NULL};
+ "reada", NULL};
struct debug_daemon_cmd {
char *cmd;
run_test 15 "test out-of-space with multiple writers ==========="
test_16() {
- fsx -c 50 -p 100 -N 2500 $MOUNT1/fsxfile $MOUNT2/fsxfile
+ fsx -R -W -c 50 -p 100 -N 2500 $MOUNT1/fsxfile $MOUNT2/fsxfile
}
run_test 16 "2500 iterations of dual-mount fsx ================="
./mmap_sanity -d $MOUNT1 -m $MOUNT2
sync; sleep 1; sync
}
-run_test 18 "mmap sanity check ================================="
+#run_test 18 "mmap sanity check ================================="
test_19() { # bug3811
[ -d /proc/fs/lustre/obdfilter ] || return 0
"rpctrace" : (1 << 20),
"vfstrace" : (1 << 21),
"reada" : (1 << 22),
- "mmap" : (1 << 23),
}
subsystem_names = {