Whamcloud - gitweb
back the mmap code out of b1_2
authorphil <phil>
Sun, 24 Oct 2004 00:24:40 +0000 (00:24 +0000)
committerphil <phil>
Sun, 24 Oct 2004 00:24:40 +0000 (00:24 +0000)
a complete backout patch will be attached to bug 3918

19 files changed:
lnet/archdep.m4
lnet/include/linux/libcfs.h
lnet/utils/debug.c
lustre/include/linux/lustre_compat25.h
lustre/kernel_patches/patches/export-zap-page-range.patch [deleted file]
lustre/llite/Makefile.in
lustre/llite/Makefile.mk
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_mmap.c [deleted file]
lustre/llite/lproc_llite.c
lustre/llite/rw.c
lustre/llite/rw24.c
lustre/llite/rw26.c
lustre/portals/archdep.m4
lustre/portals/include/linux/libcfs.h
lustre/portals/utils/debug.c
lustre/tests/sanityN.sh
lustre/utils/lconf

index 94fa984..27704bd 100644 (file)
@@ -436,16 +436,6 @@ if test x$enable_modules != xno ; then
                        AC_MSG_RESULT([no])
                ])
 
-       # --------- zap_page_range(vma) --------------------------------
-       AC_MSG_CHECKING([if zap_pag_range with vma parameter])
-       ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
-       if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
-               AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
-               AC_MSG_RESULT([yes])
-       else
-               AC_MSG_RESULT([no])
-       fi
-
        # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
        # This needs to run after we've defined the KCPPFLAGS
 
index 301dca1..66ee471 100644 (file)
@@ -89,7 +89,6 @@ struct ptldebug_header {
 #define D_RPCTRACE    0x00100000 /* for distributed debugging */
 #define D_VFSTRACE    0x00200000
 #define D_READA       0x00400000 /* read-ahead */
-#define D_MMAP        0x00800000
 
 #ifdef __KERNEL__
 # include <linux/sched.h> /* THREAD_SIZE */
index 300437f..538af44 100644 (file)
@@ -74,7 +74,7 @@ static const char *portal_debug_masks[] =
         {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
          "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
          "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
-         "reada", "mmap", NULL};
+         "reada", NULL};
 
 struct debug_daemon_cmd {
         char *cmd;
index 6167f2f..95c462f 100644 (file)
@@ -216,17 +216,6 @@ static inline void cond_resched(void)
 #define PageWriteback(page) 0
 #define end_page_writeback(page)
 
-static inline int mapping_mapped(struct address_space *mapping)
-{
-        return mapping->i_mmap_shared ? 1 : 0;
-}
-
-#ifdef ZAP_PAGE_RANGE_VMA
-#define ll_zap_page_range(vma, addr, len)  zap_page_range(vma, addr, len)
-#else
-#define ll_zap_page_range(vma, addr, len)  zap_page_range(vma->vm_mm, addr, len)
-#endif
-
 #endif /* end of 2.4 compat macros */
 
 #ifdef HAVE_PAGE_LIST
diff --git a/lustre/kernel_patches/patches/export-zap-page-range.patch b/lustre/kernel_patches/patches/export-zap-page-range.patch
deleted file mode 100644 (file)
index 9b9d48f..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-Index: linux-2.4.24-l36mmap/mm/memory.c
-===================================================================
---- linux-2.4.24-l36mmap.orig/mm/memory.c      2004-05-27 17:44:13.000000000 -0700
-+++ linux-2.4.24-l36mmap/mm/memory.c   2004-05-27 17:45:07.000000000 -0700
-@@ -411,6 +411,7 @@
-               mm->rss = 0;
-       spin_unlock(&mm->page_table_lock);
- }
-+EXPORT_SYMBOL_GPL(zap_page_range);
- /*
-  * Do a quick page-table lookup for a single page. 
index 4daad42..9492120 100644 (file)
@@ -1,5 +1,5 @@
 MODULES := llite
-llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o special.o symlink.o llite_mmap.o
+llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o special.o symlink.o
 
 ifeq ($(PATCHLEVEL),4)
 llite-objs += rw24.o super.o
@@ -7,4 +7,4 @@ else
 llite-objs += rw26.o super25.o
 endif
 
-@INCLUDE_RULES@
+@INCLUDE_RULES@
\ No newline at end of file
index dabbd9e..06dd10e 100644 (file)
@@ -8,4 +8,4 @@ include $(src)/../portals/Kernelenv
 obj-y += llite.o
 llite-objs := llite_lib.o dcache.o super.o rw.o \
        super25.o file.o dir.o symlink.o namei.o lproc_llite.o \
-       rw26.o llite_nfs.o llite_close.o special.o llite_mmap.o
+       rw26.o llite_nfs.o llite_close.o special.o
index 338353e..ef1f0a5 100644 (file)
@@ -361,7 +361,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
         if (end < tmpex.l_extent.end >> PAGE_CACHE_SHIFT)
                 end = ~0;
 
-        i = inode->i_size ? (inode->i_size - 1) >> PAGE_CACHE_SHIFT : 0;
+        i = (inode->i_size + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
         if (i < end)
                 end = i;
 
@@ -369,19 +369,6 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm,
                "count: %lu skip: %lu end: %lu%s\n", start, start % count,
                count, skip, end, discard ? " (DISCARDING)" : "");
 
-        /* walk through the vmas on the inode and tear down mmaped pages that
-         * intersect with the lock.  this stops immediately if there are no
-         * mmap()ed regions of the file.  This is not efficient at all and
-         * should be short lived. We'll associate mmap()ed pages with the lock
-         * and will be able to find them directly */
-        for (i = start; i <= end; i += (j + skip)) {
-                j = min(count - (i % count), end - i + 1);
-                LASSERT(inode->i_mapping);
-                if (ll_teardown_mmaps(inode->i_mapping, i << PAGE_CACHE_SHIFT,
-                                      ((i+j) << PAGE_CACHE_SHIFT) - 1) )
-                        break;
-        }
-
         /* this is the simplistic implementation of page eviction at
          * cancelation.  It is careful to get races with other page
          * lockers handled correctly.  fixes from bug 20 will make it
@@ -740,11 +727,12 @@ int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
 static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
                             loff_t *ppos)
 {
+        struct ll_file_data *fd = filp->private_data;
         struct inode *inode = filp->f_dentry->d_inode;
         struct ll_inode_info *lli = ll_i2info(inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
-        struct ll_lock_tree tree;
-        struct ll_lock_tree_node *node;
+        struct lustre_handle lockh = { 0 };
+        ldlm_policy_data_t policy;
         int rc;
         ssize_t retval;
         __u64 kms;
@@ -763,13 +751,10 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
         if (!lsm)
                 RETURN(0);
 
-        node = ll_node_from_inode(inode, *ppos, *ppos  + count - 1, 
-                                  LCK_PR);
-
-        tree.lt_fd = filp->private_data;
+        policy.l_extent.start = *ppos;
+        policy.l_extent.end = *ppos + count - 1;
 
-        rc = ll_tree_lock(&tree, node, inode, buf, count, 
-                          filp->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0);
+        rc = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh, 0);
         if (rc != 0)
                 RETURN(rc);
 
@@ -796,7 +781,7 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
         retval = generic_file_read(filp, buf, count, ppos);
 
  out:
-        ll_tree_unlock(&tree, inode);
+        ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);
         RETURN(retval);
 }
 
@@ -806,10 +791,11 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
 static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
                              loff_t *ppos)
 {
+        struct ll_file_data *fd = file->private_data;
         struct inode *inode = file->f_dentry->d_inode;
         struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-        struct ll_lock_tree tree;
-        struct ll_lock_tree_node *node;
+        struct lustre_handle lockh = { 0 };
+        ldlm_policy_data_t policy;
         loff_t maxbytes = ll_file_maxbytes(inode);
         ssize_t retval;
         int rc;
@@ -830,18 +816,15 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
 
         LASSERT(lsm);
 
-        if (file->f_flags & O_APPEND)
-                node = ll_node_from_inode(inode, 0, OBD_OBJECT_EOF, LCK_PW);
-        else
-                node = ll_node_from_inode(inode, *ppos, *ppos  + count - 1, 
-                                          LCK_PW);
-        if (IS_ERR(node))
-                RETURN(PTR_ERR(node));
-
-        tree.lt_fd = file->private_data;
+        if (file->f_flags & O_APPEND) {
+                policy.l_extent.start = 0;
+                policy.l_extent.end = OBD_OBJECT_EOF;
+        } else  {
+                policy.l_extent.start = *ppos;
+                policy.l_extent.end = *ppos + count - 1;
+        }
 
-        rc = ll_tree_lock(&tree, node, inode, buf, count, 
-                          file->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0);
+        rc = ll_extent_lock(fd, inode, lsm, LCK_PW, &policy, &lockh, 0);
         if (rc != 0)
                 RETURN(rc);
 
@@ -866,8 +849,7 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
         retval = generic_file_write(file, buf, count, ppos);
 
 out:
-        ll_tree_unlock(&tree, inode);
-        /* serialize with mmap/munmap/mremap */
+        ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh);
         lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES,
                             retval > 0 ? retval : 0);
         RETURN(retval);
@@ -1394,7 +1376,7 @@ struct file_operations ll_file_operations = {
         .ioctl          = ll_file_ioctl,
         .open           = ll_file_open,
         .release        = ll_file_release,
-        .mmap           = ll_file_mmap,
+        .mmap           = generic_file_mmap,
         .llseek         = ll_file_seek,
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
         .sendfile       = generic_file_sendfile,
index a3ad73b..e5352ed 100644 (file)
@@ -134,20 +134,10 @@ struct ll_async_page {
          /* only trust these if the page lock is providing exclusion */
         unsigned         llap_write_queued:1,
                          llap_defer_uptodate:1,
-                         llap_origin:3,
                          llap_ra_used:1;
         struct list_head llap_proc_item;
 };
 
-enum {
-        LLAP_ORIGIN_UNKNOWN = 0,
-        LLAP_ORIGIN_READPAGE,
-        LLAP_ORIGIN_READAHEAD,
-        LLAP_ORIGIN_COMMIT_WRITE,
-        LLAP_ORIGIN_WRITEPAGE,
-        LLAP__ORIGIN_MAX,
-};
-
 /* llite/lproc_llite.c */
 int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
                                 struct super_block *sb, char *osc, char *mdc);
@@ -173,13 +163,12 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *,
 /* llite/rw.c */
 int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
 int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
-int ll_writepage(struct page *page);
 void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa);
 void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc);
 void ll_removepage(struct page *page);
 int ll_readpage(struct file *file, struct page *page);
 struct ll_async_page *llap_from_cookie(void *cookie);
-struct ll_async_page *llap_from_page(struct page *page, unsigned origin);
+struct ll_async_page *llap_from_page(struct page *page);
 struct ll_async_page *llap_cast_private(struct page *page);
 void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
 void ll_ra_accounting(struct page *page, struct address_space *mapping);
@@ -277,28 +266,6 @@ void ll_queue_done_writing(struct inode *inode);
 void ll_close_thread_shutdown(struct ll_close_queue *lcq);
 int ll_close_thread_start(struct ll_close_queue **lcq_ret);
 
-/* llite/llite_mmap.c */
-#if  (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-typedef struct rb_root  rb_root_t;
-typedef struct rb_node  rb_node_t;
-#endif
-
-struct ll_lock_tree_node;
-struct ll_lock_tree {
-        rb_root_t                       lt_root;
-        struct list_head                lt_locked_list;
-        struct ll_file_data             *lt_fd;
-};
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first, 
-                      __u64 last);
-int ll_file_mmap(struct file * file, struct vm_area_struct * vma);
-struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
-                                              __u64 end, ldlm_mode_t mode);
-int ll_tree_lock(struct ll_lock_tree *tree, 
-                 struct ll_lock_tree_node *first_node, struct inode *inode,
-                 const char *buf, size_t count, int ast_flags);
-int ll_tree_unlock(struct ll_lock_tree *tree, struct inode *inode);
-
 #define LL_SBI_NOLCK            0x1
 #define LL_SBI_READAHEAD        0x2
 
diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c
deleted file mode 100644 (file)
index 9e34556..0000000
+++ /dev/null
@@ -1,482 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/version.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/iobuf.h>
-#endif
-
-
-#define DEBUG_SUBSYSTEM S_LLITE
-
-#include <linux/lustre_mds.h>
-#include <linux/lustre_lite.h>
-#include "llite_internal.h"
-#include <linux/lustre_compat25.h>
-
-struct ll_lock_tree_node {
-        rb_node_t               lt_node;
-        struct list_head        lt_locked_item;
-        __u64                   lt_oid;
-        ldlm_policy_data_t      lt_policy;
-        struct lustre_handle    lt_lockh;
-        ldlm_mode_t             lt_mode;
-};
-
-__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms);
-int lt_get_mmap_locks(struct ll_lock_tree *tree, struct inode *inode,
-                      unsigned long addr, size_t count);
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-                       int *type);
-#else
-
-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-                       int unused);
-#endif
-
-struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start,
-                                              __u64 end, ldlm_mode_t mode)
-{
-        struct ll_lock_tree_node *node;
-
-        OBD_ALLOC(node, sizeof(*node));
-        if (node == NULL)
-                RETURN(ERR_PTR(-ENOMEM));
-
-        node->lt_oid = ll_i2info(inode)->lli_smd->lsm_object_id;
-        node->lt_policy.l_extent.start = start;
-        node->lt_policy.l_extent.end = end;
-        memset(&node->lt_lockh, 0, sizeof(node->lt_lockh));
-        INIT_LIST_HEAD(&node->lt_locked_item);
-        node->lt_mode = mode;
-
-        return node;
-}
-
-int lt_compare(struct ll_lock_tree_node *one, struct ll_lock_tree_node *two)
-{
-        if ( one->lt_oid < two->lt_oid)
-                return -1;
-        if ( one->lt_oid > two->lt_oid)
-                return 1;
-
-        if ( one->lt_policy.l_extent.end < two->lt_policy.l_extent.start )
-                return -1;
-        if ( one->lt_policy.l_extent.start > two->lt_policy.l_extent.end )
-                return 1;
-
-        return 0; /* they are the same object and overlap */
-}
-
-static void lt_merge(struct ll_lock_tree_node *dst, 
-                     struct ll_lock_tree_node *src)
-{
-        dst->lt_policy.l_extent.start = min(dst->lt_policy.l_extent.start,
-                                            src->lt_policy.l_extent.start);
-        dst->lt_policy.l_extent.end = max(dst->lt_policy.l_extent.end,
-                                          src->lt_policy.l_extent.end);
-
-        /* XXX could be a real call to the dlm to find superset modes */
-        if (src->lt_mode == LCK_PW && dst->lt_mode != LCK_PW)
-                dst->lt_mode = LCK_PW;
-}
-
-static void lt_insert(struct ll_lock_tree *tree, 
-                      struct ll_lock_tree_node *node)
-{
-        struct ll_lock_tree_node *walk;
-        rb_node_t **p, *parent;
-        ENTRY;
-
-restart:
-        p = &tree->lt_root.rb_node;
-        parent = NULL;
-        while (*p) {
-                parent = *p;
-                walk = rb_entry(parent, struct ll_lock_tree_node, lt_node);
-                switch (lt_compare(node, walk)) {
-                case -1:
-                        p = &(*p)->rb_left;
-                        break;
-                case 1:
-                        p = &(*p)->rb_right;
-                        break;
-                case 0:
-                        lt_merge(node, walk);
-                        rb_erase(&walk->lt_node, &tree->lt_root);
-                        OBD_FREE(walk, sizeof(*walk));
-                        goto restart;
-                        break;
-                default:
-                        LBUG();
-                        break;
-                }
-        }
-        rb_link_node(&node->lt_node, parent, p);
-        rb_insert_color(&node->lt_node, &tree->lt_root);
-        EXIT;
-}
-
-static struct ll_lock_tree_node *lt_least_node(struct ll_lock_tree *tree)
-{
-        rb_node_t *rbnode;
-        struct ll_lock_tree_node *node = NULL;
-
-        for ( rbnode = tree->lt_root.rb_node; rbnode != NULL; 
-              rbnode = rbnode->rb_left) {
-                if (rbnode->rb_left == NULL) {
-                        node = rb_entry(rbnode, struct ll_lock_tree_node, 
-                                        lt_node);
-                        break;
-                }
-        }
-        RETURN(node);
-}
-
-int ll_tree_unlock(struct ll_lock_tree *tree, struct inode *inode)
-{
-        struct ll_lock_tree_node *node;
-        struct list_head *pos, *n;
-        int rc = 0;
-        ENTRY;
-
-        list_for_each_safe(pos, n, &tree->lt_locked_list) {
-                node = list_entry(pos, struct ll_lock_tree_node, 
-                                  lt_locked_item);
-
-                rc = ll_extent_unlock(tree->lt_fd, inode, 
-                                      ll_i2info(inode)->lli_smd, node->lt_mode, 
-                                      &node->lt_lockh);
-                if (rc != 0) {
-                        /* XXX better message */
-                        CERROR("couldn't unlock %d\n", rc);
-                }
-                list_del(&node->lt_locked_item);
-                OBD_FREE(node, sizeof(*node));
-        }
-
-        while ((node = lt_least_node(tree))) {
-                rb_erase(&node->lt_node, &tree->lt_root);
-                OBD_FREE(node, sizeof(*node));
-        }
-
-        RETURN(rc);
-}
-
-int ll_tree_lock(struct ll_lock_tree *tree,
-                 struct ll_lock_tree_node *first_node, struct inode *inode,
-                 const char *buf, size_t count, int ast_flags)
-{
-        struct ll_lock_tree_node *node;
-        int rc = 0;
-        ENTRY;
-
-        tree->lt_root.rb_node = NULL;
-        INIT_LIST_HEAD(&tree->lt_locked_list);
-        if (first_node != NULL)
-                lt_insert(tree, first_node);
-
-        if (mapping_mapped(inode->i_mapping)) {
-                rc = lt_get_mmap_locks(tree, inode, (unsigned long)buf, count);
-                if (rc)
-                        GOTO(out, rc);
-        }
-
-        while ((node = lt_least_node(tree))) {
-                rc = ll_extent_lock(tree->lt_fd, inode, 
-                                    ll_i2info(inode)->lli_smd, node->lt_mode, 
-                                    &node->lt_policy, &node->lt_lockh,
-                                    ast_flags);
-                if (rc != 0)
-                        GOTO(out, rc);
-
-                rb_erase(&node->lt_node, &tree->lt_root);
-                list_add_tail(&node->lt_locked_item, &tree->lt_locked_list);
-        }
-        RETURN(rc);
-out:
-        ll_tree_unlock(tree, inode);
-        RETURN(rc);
-}
-
-static ldlm_mode_t mode_from_vma(struct vm_area_struct *vma)
-{
-        /* we only want to hold PW locks if the mmap() can generate 
-         * writes back to the file and that only happens in shared
-         * writable vmas */
-        if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
-                return LCK_PW;
-        return LCK_PR;
-}
-
-static void policy_from_vma(ldlm_policy_data_t *policy, 
-                            struct vm_area_struct *vma, unsigned long addr,
-                            size_t count)
-{
-        policy->l_extent.start = ((addr - vma->vm_start) & PAGE_CACHE_MASK) +
-                                 (vma->vm_pgoff << PAGE_CACHE_SHIFT);
-        policy->l_extent.end = (policy->l_extent.start + count - 1) | 
-                               (PAGE_CACHE_SIZE - 1);
-}
-
-static struct vm_area_struct * our_vma(unsigned long addr, size_t count)
-{
-        struct mm_struct *mm = current->mm;
-        struct vm_area_struct *vma, *ret = NULL;
-        ENTRY;
-
-        spin_lock(&mm->page_table_lock);
-        for(vma = find_vma(mm, addr); 
-            vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
-                if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage) {
-                        ret = vma;
-                        break;
-                }
-        }
-        spin_unlock(&mm->page_table_lock);
-        RETURN(ret);
-}
-
-int lt_get_mmap_locks(struct ll_lock_tree *tree, struct inode *inode, 
-                      unsigned long addr, size_t count)
-{
-        struct vm_area_struct *vma;
-        struct ll_lock_tree_node *node;
-        ldlm_policy_data_t policy;
-        ENTRY;
-
-        if (count == 0)
-                RETURN(0);
-
-        /* we need to look up vmas on page aligned addresses */
-        count += addr & (PAGE_SIZE - 1);
-        addr -= addr & (PAGE_SIZE - 1);
-
-        while ((vma = our_vma(addr, count)) != NULL) {
-
-                policy_from_vma(&policy, vma, addr, count);
-                node = ll_node_from_inode(inode, policy.l_extent.start, 
-                                          policy.l_extent.end, 
-                                          mode_from_vma(vma));
-                if (IS_ERR(node)) {
-                        CERROR("not enough mem for lock_tree_node!\n");
-                        RETURN(-ENOMEM);
-                }
-                lt_insert(tree, node);
-
-                if (vma->vm_end - addr >= count)
-                        break;
-                count -= vma->vm_end - addr;
-                addr = vma->vm_end;
-        }
-        RETURN(0);
-}
-
-/* FIXME: there is a pagefault race goes as follow:
- * 1. A user process on node A accesses a portion of a mapped file, 
- *    resulting in a page fault.  The pagefault handler invokes the 
- *    ll_nopage function, which reads the page into memory.
- * 2. A user process on node B writes to the same portion of the file 
- *    (either via mmap or write()), that cause node A to cancel the
- *    lock and truncate the page.
- * 3. Node A then executes the rest of do_no_page(), entering the 
- *    now-invalid page into the PTEs.
- *
- * Make the whole do_no_page as a hook to cover both the page cache
- * and page mapping installing with dlm lock would eliminate this race.
- */
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-                       int *type)
-#else
-struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
-                       int unused)
-#endif
-{
-        struct file *filp = vma->vm_file;
-        struct ll_file_data *fd = filp->private_data;
-        struct inode *inode = filp->f_dentry->d_inode;
-        struct lustre_handle lockh = { 0 };
-        ldlm_policy_data_t policy;
-        ldlm_mode_t mode;
-        struct page *page;
-        __u64 kms;
-        unsigned long pgoff, size, rand_read, seq_read;
-        int rc = 0;
-        ENTRY;
-
-        if (ll_i2info(inode)->lli_smd == NULL) {
-                CERROR("No lsm on fault?\n");
-                RETURN(NULL);
-        }
-
-        /* start and end the lock on the first and last bytes in the page */
-        policy_from_vma(&policy, vma, address, PAGE_CACHE_SIZE);
-
-        CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n",
-               vma, inode->i_ino, policy.l_extent.start, 
-               policy.l_extent.end);
-
-        mode = mode_from_vma(vma);
-
-        rc = ll_extent_lock(fd, inode, ll_i2info(inode)->lli_smd, mode, &policy,
-                            &lockh, LDLM_FL_CBPENDING);
-        if (rc != 0)
-                RETURN(NULL);
-        
-        /* XXX change inode size without i_sem hold! there is a race condition
-         *     with truncate path. (see ll_extent_lock) */
-        kms = lov_merge_size(ll_i2info(inode)->lli_smd, 1);
-        pgoff = ((address - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
-        size = (kms + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-        
-        if (pgoff >= size)
-                ll_glimpse_size(inode);
-        else
-                inode->i_size = kms;
-        
-        /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that 
-         * the kernel will not read other pages not covered by ldlm in 
-         * filemap_nopage. we do our readahead in ll_readpage. 
-         */
-        rand_read = vma->vm_flags & VM_RAND_READ;
-        seq_read = vma->vm_flags & VM_SEQ_READ;
-        vma->vm_flags &= ~ VM_SEQ_READ;
-        vma->vm_flags |= VM_RAND_READ;
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-        page = filemap_nopage(vma, address, type);
-#else       
-        page = filemap_nopage(vma, address, unused);
-#endif  
-        vma->vm_flags &= ~VM_RAND_READ;
-        vma->vm_flags |= (rand_read | seq_read);
-        
-        ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh);
-        RETURN(page);
-}
-
-/* return the user space pointer that maps to a file offset via a vma */
-static inline unsigned long file_to_user(struct vm_area_struct *vma,
-                                         __u64 byte)
-{
-        return vma->vm_start + 
-               (byte - ((__u64)vma->vm_pgoff << PAGE_CACHE_SHIFT));
-
-}
-
-#define VMA_DEBUG(vma, fmt, arg...)                                     \
-        CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld): " fmt,  \
-               vma, vma->vm_start, vma->vm_end, vma->vm_pgoff, ## arg);
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-/* [first, last] are the byte offsets affected.  
- * vm_{start, end} are user addresses of the first byte of the mapping and
- *      the next byte beyond it
- * vm_pgoff is the page index of the first byte in the mapping */
-static void teardown_vmas(struct vm_area_struct *vma, __u64 first,
-                          __u64 last)
-{
-        unsigned long address, len;
-        for (; vma ; vma = vma->vm_next_share) {
-                if (last >> PAGE_CACHE_SHIFT < vma->vm_pgoff)
-                        continue;
-                if (first >> PAGE_CACHE_SHIFT > (vma->vm_pgoff + 
-                    ((vma->vm_end - vma->vm_start) >> PAGE_CACHE_SHIFT)))
-                        continue;
-                
-                address = max((unsigned long)vma->vm_start, 
-                              file_to_user(vma, first));
-                len = min((unsigned long)vma->vm_end, 
-                          file_to_user(vma, last) + 1) - address;
-
-                VMA_DEBUG(vma, "zapping vma [address=%ld len=%ld]\n",
-                          address, len);
-                LASSERT(vma->vm_mm);
-                ll_zap_page_range(vma, address, len);
-        }
-}
-#endif
-
-/* XXX put nice comment here.  talk about __free_pte -> dirty pages and
- * nopage's reference passing to the pte */
-int ll_teardown_mmaps(struct address_space *mapping, __u64 first, 
-                       __u64 last)
-{
-        int rc = -ENOENT;
-        ENTRY;
-        
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-        if (mapping_mapped(mapping)) {
-                rc = 0;
-                unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
-                                    last - first + 1, 1);
-        }
-#else
-        spin_lock(&mapping->i_shared_lock);
-        if (mapping->i_mmap != NULL) {
-                rc = 0;
-                teardown_vmas(mapping->i_mmap, first, last);
-        }
-        if (mapping->i_mmap_shared != NULL) {
-                rc = 0;
-                teardown_vmas(mapping->i_mmap_shared, first, last);
-        }
-        spin_unlock(&mapping->i_shared_lock);
-#endif
-        RETURN(rc);
-}
-
-static struct vm_operations_struct ll_file_vm_ops = {
-        .nopage = ll_nopage,
-};
-
-int ll_file_mmap(struct file * file, struct vm_area_struct * vma)
-{
-        int rc;
-        ENTRY;
-
-        rc = generic_file_mmap(file, vma);
-        if (rc == 0)
-                vma->vm_ops = &ll_file_vm_ops;
-
-        RETURN(rc);
-}
index 59113dd..d390eab 100644 (file)
@@ -494,7 +494,7 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
         /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
          * it in our own state */
         if (dummy_llap->llap_magic == 0) {
-                seq_printf(seq, "generation | llap .cookie origin | page ");
+                seq_printf(seq, "generation | llap .cookie | page ");
                 seq_printf(seq, "inode .index [ page flags ]\n");
                 return 0;
         }
@@ -505,21 +505,10 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v)
         if (llap != NULL)  {
                 int has_flags = 0;
                 struct page *page = llap->llap_page;
-                static char *origins[] = {
-                        [LLAP_ORIGIN_UNKNOWN] = "--",
-                        [LLAP_ORIGIN_READPAGE] = "rp",
-                        [LLAP_ORIGIN_READAHEAD] = "ra",
-                        [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
-                        [LLAP_ORIGIN_WRITEPAGE] = "wp",
-                };
-
-                LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n", 
-                         llap->llap_origin);
-
-                seq_printf(seq, "%lu | %p %p %s | %p %p %lu [", 
+
+                seq_printf(seq, "%lu | %p %p | %p %p %lu [", 
                                 sbi->ll_pglist_gen, 
-                                llap, llap->llap_cookie, 
-                                        origins[llap->llap_origin],
+                                llap, llap->llap_cookie,
                                 page, page->mapping->host, page->index);
                 seq_page_flag(seq, page, locked, has_flags);
                 seq_page_flag(seq, page, error, has_flags);
index c9f7637..bdc9b10 100644 (file)
@@ -360,7 +360,7 @@ struct ll_async_page *llap_cast_private(struct page *page)
 }
 
 /* XXX have the exp be an argument? */
-struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
+struct ll_async_page *llap_from_page(struct page *page)
 {
         struct ll_async_page *llap;
         struct obd_export *exp;
@@ -369,11 +369,9 @@ struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
         int rc;
         ENTRY;
 
-        LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
-
         llap = llap_cast_private(page);
         if (llap != NULL)
-                GOTO(out, llap);
+                RETURN(llap);
 
         exp = ll_i2obdexp(page->mapping->host);
         if (exp == NULL)
@@ -391,8 +389,6 @@ struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
                 RETURN(ERR_PTR(rc));
         }
 
-        LL_CDEBUG_PAGE(D_PAGE, page, "obj off "LPU64"\n", 
-                       (obd_off)page->index << PAGE_SHIFT);
         CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap,
                page, llap->llap_cookie, (obd_off)page->index << PAGE_SHIFT);
         /* also zeroing the PRIVBITS low order bitflags */
@@ -404,8 +400,6 @@ struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
         list_add_tail(&llap->llap_proc_item, &sbi->ll_pglist);
         spin_unlock(&sbi->ll_lock);
 
-out:
-        llap->llap_origin = origin;
         RETURN(llap);
 }
 
@@ -498,7 +492,7 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from,
         CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
                inode, page, from, to, page->index);
 
-        llap = llap_from_page(page, LLAP_ORIGIN_COMMIT_WRITE);
+        llap = llap_from_page(page);
         if (IS_ERR(llap))
                 RETURN(PTR_ERR(llap));
 
@@ -566,43 +560,6 @@ static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
         spin_unlock(&sbi->ll_lock);
 }
 
-int ll_writepage(struct page *page)
-{
-        struct inode *inode = page->mapping->host;
-        struct obd_export *exp;
-        struct ll_async_page *llap;
-        int rc = 0;
-        ENTRY;
-
-        LASSERT(!PageDirty(page));
-        LASSERT(PageLocked(page));
-
-        exp = ll_i2obdexp(inode);
-        if (exp == NULL)
-                GOTO(out, rc = -EINVAL);
-
-        llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
-        if (IS_ERR(llap))
-                GOTO(out, rc = PTR_ERR(llap));
-
-        page_cache_get(page);
-        if (llap->llap_write_queued) {
-                LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
-                rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL,
-                                         llap->llap_cookie,
-                                         ASYNC_READY | ASYNC_URGENT);
-        } else {
-                rc = queue_or_sync_write(exp, inode, llap, PAGE_SIZE,
-                                         ASYNC_READY | ASYNC_URGENT);
-        }
-        if (rc)
-                page_cache_release(page);
-out:
-        if (rc)
-                unlock_page(page);
-        RETURN(rc);
-}
-
 /* called for each page in a completed rpc.*/
 void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
 {
@@ -683,7 +640,7 @@ void ll_removepage(struct page *page)
                 return;
         }
 
-        llap = llap_from_page(page, 0);
+        llap = llap_from_page(page);
         if (IS_ERR(llap)) {
                 CERROR("page %p ind %lu couldn't find llap: %ld\n", page,
                        page->index, PTR_ERR(llap));
@@ -770,7 +727,7 @@ void ll_ra_accounting(struct page *page, struct address_space *mapping)
 {
         struct ll_async_page *llap;
 
-        llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
+        llap = llap_from_page(page);
         if (IS_ERR(llap))
                 return;
 
@@ -847,7 +804,7 @@ static int ll_readahead(struct ll_readahead_state *ras,
 
                 /* we do this first so that we can see the page in the /proc
                  * accounting */
-                llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
+                llap = llap_from_page(page);
                 if (IS_ERR(llap) || llap->llap_defer_uptodate)
                         goto next_page;
 
@@ -1026,7 +983,7 @@ int ll_readpage(struct file *filp, struct page *page)
         if (exp == NULL)
                 GOTO(out, rc = -EINVAL);
 
-        llap = llap_from_page(page, LLAP_ORIGIN_READPAGE);
+        llap = llap_from_page(page);
         if (IS_ERR(llap))
                 GOTO(out, rc = PTR_ERR(llap));
 
@@ -1054,10 +1011,12 @@ int ll_readpage(struct file *filp, struct page *page)
         }
 
         if (rc == 0) {
+#if 0
                 CWARN("ino %lu page %lu (%llu) not covered by "
                       "a lock (mmap?).  check debug logs.\n",
                       inode->i_ino, page->index,
                       (long long)page->index << PAGE_CACHE_SHIFT);
+#endif
         }
 
         rc = ll_issue_page_read(exp, llap, oig, 0);
index d8c98f0..3993af4 100644 (file)
 #include "llite_internal.h"
 #include <linux/lustre_compat25.h>
 
+static int ll_writepage_24(struct page *page)
+{
+        struct inode *inode = page->mapping->host;
+        struct obd_export *exp;
+        struct ll_async_page *llap;
+        int rc = 0;
+        ENTRY;
+
+        LASSERT(!PageDirty(page));
+        LASSERT(PageLocked(page));
+
+        exp = ll_i2obdexp(inode);
+        if (exp == NULL)
+                GOTO(out, rc = -EINVAL);
+
+        llap = llap_from_page(page);
+        if (IS_ERR(llap))
+                GOTO(out, rc = PTR_ERR(llap));
+
+        page_cache_get(page);
+        if (llap->llap_write_queued) {
+                LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
+                rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL,
+                                         llap->llap_cookie,
+                                         ASYNC_READY | ASYNC_URGENT);
+        } else {
+                llap->llap_write_queued = 1;
+                rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
+                                        llap->llap_cookie, OBD_BRW_WRITE, 0, 0,
+                                        0, ASYNC_READY | ASYNC_URGENT);
+                if (rc == 0)
+                        LL_CDEBUG_PAGE(D_PAGE, page, "mmap write queued\n");
+                else
+                        llap->llap_write_queued = 0;
+        }
+        if (rc)
+                page_cache_release(page);
+out:
+        if (rc)
+                unlock_page(page);
+        RETURN(rc);
+}
+
 static int ll_direct_IO_24(int rw,
 #ifdef HAVE_DIO_FILE
                            struct file *file,
@@ -137,7 +180,7 @@ static int ll_direct_IO_24(int rw,
 struct address_space_operations ll_aops = {
         .readpage       = ll_readpage,
         .direct_IO      = ll_direct_IO_24,
-        .writepage      = ll_writepage,
+        .writepage      = ll_writepage_24,
         .prepare_write  = ll_prepare_write,
         .commit_write   = ll_commit_write,
         .removepage     = ll_removepage,
index 07b0d45..71964de 100644 (file)
 
 static int ll_writepage_26(struct page *page, struct writeback_control *wbc)
 {
-        return ll_writepage(page);
+        struct inode *inode = page->mapping->host;
+        struct obd_export *exp;
+        struct ll_async_page *llap;
+        int rc;
+        ENTRY;
+
+        LASSERT(!PageDirty(page));
+        LASSERT(PageLocked(page));
+
+        exp = ll_i2obdexp(inode);
+        if (exp == NULL)
+                GOTO(out, rc = -EINVAL);
+
+        llap = llap_from_page(page);
+        if (IS_ERR(llap))
+                GOTO(out, rc = PTR_ERR(llap));
+
+        page_cache_get(page);
+        if (llap->llap_write_queued) {
+                LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n");
+                rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL,
+                                         llap->llap_cookie,
+                                         ASYNC_READY | ASYNC_URGENT);
+        } else {
+                llap->llap_write_queued = 1;
+                rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
+                                        llap->llap_cookie, OBD_BRW_WRITE, 0, 0,
+                                        0, ASYNC_READY | ASYNC_URGENT);
+                if (rc == 0)
+                        LL_CDEBUG_PAGE(D_PAGE, page, "mmap write queued\n");
+                else
+                        llap->llap_write_queued = 0;
+        }
+        if (rc)
+                page_cache_release(page);
+out:
+        if (rc)
+                unlock_page(page);
+        else
+                set_page_writeback(page);
+        RETURN(rc);
 }
 
 /* It is safe to not check anything in invalidatepage/releasepage below
index 94fa984..27704bd 100644 (file)
@@ -436,16 +436,6 @@ if test x$enable_modules != xno ; then
                        AC_MSG_RESULT([no])
                ])
 
-       # --------- zap_page_range(vma) --------------------------------
-       AC_MSG_CHECKING([if zap_pag_range with vma parameter])
-       ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
-       if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
-               AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
-               AC_MSG_RESULT([yes])
-       else
-               AC_MSG_RESULT([no])
-       fi
-
        # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
        # This needs to run after we've defined the KCPPFLAGS
 
index 301dca1..66ee471 100644 (file)
@@ -89,7 +89,6 @@ struct ptldebug_header {
 #define D_RPCTRACE    0x00100000 /* for distributed debugging */
 #define D_VFSTRACE    0x00200000
 #define D_READA       0x00400000 /* read-ahead */
-#define D_MMAP        0x00800000
 
 #ifdef __KERNEL__
 # include <linux/sched.h> /* THREAD_SIZE */
index 300437f..538af44 100644 (file)
@@ -74,7 +74,7 @@ static const char *portal_debug_masks[] =
         {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
          "blocks", "net", "warning", "buffs", "other", "dentry", "portals",
          "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
-         "reada", "mmap", NULL};
+         "reada", NULL};
 
 struct debug_daemon_cmd {
         char *cmd;
index e6c797d..7df1454 100644 (file)
@@ -346,7 +346,7 @@ test_15() { # bug 974 - ENOSPC
 run_test 15 "test out-of-space with multiple writers ==========="
 
 test_16() {
-       fsx -c 50 -p 100 -N 2500 $MOUNT1/fsxfile $MOUNT2/fsxfile
+       fsx -R -W -c 50 -p 100 -N 2500 $MOUNT1/fsxfile $MOUNT2/fsxfile
 }
 run_test 16 "2500 iterations of dual-mount fsx ================="
 
@@ -375,7 +375,7 @@ test_18() {
        ./mmap_sanity -d $MOUNT1 -m $MOUNT2
        sync; sleep 1; sync
 }
-run_test 18 "mmap sanity check ================================="
+#run_test 18 "mmap sanity check ================================="
 
 test_19() { # bug3811
        [ -d /proc/fs/lustre/obdfilter ] || return 0
index cb8d14a..0a1d5bc 100755 (executable)
@@ -88,7 +88,6 @@ ptldebug_names = {
     "rpctrace" :  (1 << 20),
     "vfstrace" :  (1 << 21),
     "reada" :     (1 << 22),
-    "mmap" :     (1 << 23),
     }
 
 subsystem_names = {