From 3a49136ecd0a156bf3aa891e66653d93077f420b Mon Sep 17 00:00:00 2001 From: buffalo-pull Date: Wed, 25 Aug 2004 14:17:11 +0000 Subject: [PATCH] b=3918 revert b1_2_mmap landing. The testing shows a very obvious regression(2973), and this code was not updated from b1_2 before the final testing. --- lnet/archdep.m4 | 10 - lnet/include/linux/libcfs.h | 1 - lnet/utils/debug.c | 2 +- lustre/ChangeLog | 1 - lustre/include/linux/lustre_compat25.h | 11 - .../patches/export-zap-page-range.patch | 12 - lustre/kernel_patches/series/vanilla-2.4.20 | 1 - lustre/llite/Makefile.in | 4 +- lustre/llite/Makefile.mk | 2 +- lustre/llite/file.c | 60 +- lustre/llite/llite_internal.h | 23 - lustre/llite/llite_mmap.c | 482 --------------- lustre/llite/rw.c | 140 ++--- lustre/llite/rw24.c | 45 +- lustre/llite/rw26.c | 47 +- lustre/portals/archdep.m4 | 10 - lustre/portals/include/linux/libcfs.h | 1 - lustre/portals/utils/debug.c | 2 +- lustre/tests/.cvsignore | 2 - lustre/tests/Makefile.am | 2 - lustre/tests/mmap_sanity.c | 643 --------------------- lustre/tests/sanityN.sh | 9 +- lustre/utils/lconf | 1 - 23 files changed, 161 insertions(+), 1350 deletions(-) delete mode 100644 lustre/kernel_patches/patches/export-zap-page-range.patch delete mode 100644 lustre/llite/llite_mmap.c delete mode 100644 lustre/tests/mmap_sanity.c diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 index 94fa984..27704bd 100644 --- a/lnet/archdep.m4 +++ b/lnet/archdep.m4 @@ -436,16 +436,6 @@ if test x$enable_modules != xno ; then AC_MSG_RESULT([no]) ]) - # --------- zap_page_range(vma) -------------------------------- - AC_MSG_CHECKING([if zap_pag_range with vma parameter]) - ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`" - if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then - AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter]) - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi - # ---------- Red Hat 2.4.20 backports some 2.5 bits -------- # This needs to run after we've defined the KCPPFLAGS diff --git a/lnet/include/linux/libcfs.h b/lnet/include/linux/libcfs.h index cad7a69..acf4045 100644 --- a/lnet/include/linux/libcfs.h +++ b/lnet/include/linux/libcfs.h @@ -90,7 +90,6 @@ struct ptldebug_header { #define D_RPCTRACE 0x00100000 /* for distributed debugging */ #define D_VFSTRACE 0x00200000 #define D_READA 0x00400000 /* read-ahead */ -#define D_MMAP 0x00800000 #ifdef __KERNEL__ # include /* THREAD_SIZE */ diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index dce196f..e546aaf 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -74,7 +74,7 @@ static const char *portal_debug_masks[] = {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", "blocks", "net", "warning", "buffs", "other", "dentry", "portals", "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", - "reada", "mmap", NULL}; + "reada", NULL}; struct debug_daemon_cmd { char *cmd; diff --git a/lustre/ChangeLog b/lustre/ChangeLog index f344fea..22dd2fb 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -19,7 +19,6 @@ - replace some LBUG about llog ops with error handling (3841) - don't match INVALID dentries from d_lookup and spin (3784) - hold dcache_lock while marking dentries INVALID and hashing (4255) - - basic mmap support (3918) * miscellania - add libwrap support for the TCP acceptor (3996) - add /proc/sys/portals/routes for non-root route listing (3994) diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 56e36e9..ede6646 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -212,17 +212,6 @@ static inline void cond_resched(void) #define PageWriteback(page) 0 #define end_page_writeback(page) -static inline int mapping_mapped(struct address_space *mapping) -{ - return mapping->i_mmap_shared ? 1 : 0; -} - -#ifdef ZAP_PAGE_RANGE_VMA -#define ll_zap_page_range(vma, addr, len) zap_page_range(vma, addr, len) -#else -#define ll_zap_page_range(vma, addr, len) zap_page_range(vma->vm_mm, addr, len) -#endif - #endif /* end of 2.4 compat macros */ #ifdef HAVE_PAGE_LIST diff --git a/lustre/kernel_patches/patches/export-zap-page-range.patch b/lustre/kernel_patches/patches/export-zap-page-range.patch deleted file mode 100644 index 9b9d48f..0000000 --- a/lustre/kernel_patches/patches/export-zap-page-range.patch +++ /dev/null @@ -1,12 +0,0 @@ -Index: linux-2.4.24-l36mmap/mm/memory.c -=================================================================== ---- linux-2.4.24-l36mmap.orig/mm/memory.c 2004-05-27 17:44:13.000000000 -0700 -+++ linux-2.4.24-l36mmap/mm/memory.c 2004-05-27 17:45:07.000000000 -0700 -@@ -411,6 +411,7 @@ - mm->rss = 0; - spin_unlock(&mm->page_table_lock); - } -+EXPORT_SYMBOL_GPL(zap_page_range); - - /* - * Do a quick page-table lookup for a single page. diff --git a/lustre/kernel_patches/series/vanilla-2.4.20 b/lustre/kernel_patches/series/vanilla-2.4.20 index fa7a583..d11bec0 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.20 +++ b/lustre/kernel_patches/series/vanilla-2.4.20 @@ -52,4 +52,3 @@ gfp_memalloc-2.4.22.patch procfs-ndynamic-2.4.patch linux-2.4.20-filemap.patch ext3-truncate-buffer-head.patch -export-zap-page-range.patch diff --git a/lustre/llite/Makefile.in b/lustre/llite/Makefile.in index 4daad42..9492120 100644 --- a/lustre/llite/Makefile.in +++ b/lustre/llite/Makefile.in @@ -1,5 +1,5 @@ MODULES := llite -llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o special.o symlink.o llite_mmap.o +llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o rw.o lproc_llite.o namei.o special.o symlink.o ifeq ($(PATCHLEVEL),4) llite-objs += rw24.o super.o @@ -7,4 +7,4 @@ else llite-objs += rw26.o super25.o endif -@INCLUDE_RULES@ +@INCLUDE_RULES@ \ No newline at end of file diff --git a/lustre/llite/Makefile.mk b/lustre/llite/Makefile.mk index dabbd9e..06dd10e 100644 --- a/lustre/llite/Makefile.mk +++ b/lustre/llite/Makefile.mk @@ -8,4 +8,4 @@ include $(src)/../portals/Kernelenv obj-y += llite.o llite-objs := llite_lib.o dcache.o super.o rw.o \ super25.o file.o dir.o symlink.o namei.o lproc_llite.o \ - rw26.o llite_nfs.o llite_close.o special.o llite_mmap.o + rw26.o llite_nfs.o llite_close.o special.o diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 8a497ca..bdac6d1 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -361,7 +361,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, if (end < tmpex.l_extent.end >> PAGE_CACHE_SHIFT) end = ~0; - i = inode->i_size ? (inode->i_size - 1) >> PAGE_CACHE_SHIFT : 0; + i = (inode->i_size + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; if (i < end) end = i; @@ -369,19 +369,6 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, "count: %lu skip: %lu end: %lu%s\n", start, start % count, count, skip, end, discard ? " (DISCARDING)" : ""); - /* walk through the vmas on the inode and tear down mmaped pages that - * intersect with the lock. this stops immediately if there are no - * mmap()ed regions of the file. This is not efficient at all and - * should be short lived. We'll associate mmap()ed pages with the lock - * and will be able to find them directly */ - for (i = start; i <= end; i += (j + skip)) { - j = min(count - (i % count), end - i + 1); - LASSERT(inode->i_mapping); - if (ll_teardown_mmaps(inode->i_mapping, i << PAGE_CACHE_SHIFT, - ((i+j) << PAGE_CACHE_SHIFT) - 1) ) - break; - } - /* this is the simplistic implementation of page eviction at * cancelation. It is careful to get races with other page * lockers handled correctly. fixes from bug 20 will make it @@ -735,11 +722,12 @@ int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) { + struct ll_file_data *fd = filp->private_data; struct inode *inode = filp->f_dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; - struct ll_lock_tree tree; - struct ll_lock_tree_node *node; + struct lustre_handle lockh = { 0 }; + ldlm_policy_data_t policy; int rc; ssize_t retval; __u64 kms; @@ -758,13 +746,10 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, if (!lsm) RETURN(0); - node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, - LCK_PR); - - tree.lt_fd = filp->private_data; + policy.l_extent.start = *ppos; + policy.l_extent.end = *ppos + count - 1; - rc = ll_tree_lock(&tree, node, inode, buf, count, - filp->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0); + rc = ll_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh, 0); if (rc != 0) RETURN(rc); @@ -791,7 +776,7 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, retval = generic_file_read(filp, buf, count, ppos); out: - ll_tree_unlock(&tree, inode); + ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); RETURN(retval); } @@ -801,10 +786,11 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { + struct ll_file_data *fd = file->private_data; struct inode *inode = file->f_dentry->d_inode; struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct ll_lock_tree tree; - struct ll_lock_tree_node *node; + struct lustre_handle lockh = { 0 }; + ldlm_policy_data_t policy; loff_t maxbytes = ll_file_maxbytes(inode); ssize_t retval; int rc; @@ -825,18 +811,15 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, LASSERT(lsm); - if (file->f_flags & O_APPEND) - node = ll_node_from_inode(inode, 0, OBD_OBJECT_EOF, LCK_PW); - else - node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, - LCK_PW); - if (IS_ERR(node)) - RETURN(PTR_ERR(node)); - - tree.lt_fd = file->private_data; + if (file->f_flags & O_APPEND) { + policy.l_extent.start = 0; + policy.l_extent.end = OBD_OBJECT_EOF; + } else { + policy.l_extent.start = *ppos; + policy.l_extent.end = *ppos + count - 1; + } - rc = ll_tree_lock(&tree, node, inode, buf, count, - file->f_flags & O_NONBLOCK ? LDLM_FL_BLOCK_NOWAIT :0); + rc = ll_extent_lock(fd, inode, lsm, LCK_PW, &policy, &lockh, 0); if (rc != 0) RETURN(rc); @@ -861,8 +844,7 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, retval = generic_file_write(file, buf, count, ppos); out: - ll_tree_unlock(&tree, inode); - /* serialize with mmap/munmap/mremap */ + ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES, retval > 0 ? retval : 0); RETURN(retval); @@ -1388,7 +1370,7 @@ struct file_operations ll_file_operations = { .ioctl = ll_file_ioctl, .open = ll_file_open, .release = ll_file_release, - .mmap = ll_file_mmap, + .mmap = generic_file_mmap, .llseek = ll_file_seek, #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) .sendfile = generic_file_sendfile, diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 8fcce14..c4b3f87 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -165,7 +165,6 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *, /* llite/rw.c */ int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to); int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to); -int ll_writepage(struct page *page); void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa); void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc); void ll_removepage(struct page *page); @@ -269,28 +268,6 @@ void ll_queue_done_writing(struct inode *inode); void ll_close_thread_shutdown(struct ll_close_queue *lcq); int ll_close_thread_start(struct ll_close_queue **lcq_ret); -/* llite/llite_mmap.c */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -typedef struct rb_root rb_root_t; -typedef struct rb_node rb_node_t; -#endif - -struct ll_lock_tree_node; -struct ll_lock_tree { - rb_root_t lt_root; - struct list_head lt_locked_list; - struct ll_file_data *lt_fd; -}; -int ll_teardown_mmaps(struct address_space *mapping, __u64 first, - __u64 last); -int ll_file_mmap(struct file * file, struct vm_area_struct * vma); -struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start, - __u64 end, ldlm_mode_t mode); -int ll_tree_lock(struct ll_lock_tree *tree, - struct ll_lock_tree_node *first_node, struct inode *inode, - const char *buf, size_t count, int ast_flags); -int ll_tree_unlock(struct ll_lock_tree *tree, struct inode *inode); - #define LL_SBI_NOLCK 0x1 #define LL_SBI_READAHEAD 0x2 diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c deleted file mode 100644 index 9e34556..0000000 --- a/lustre/llite/llite_mmap.c +++ /dev/null @@ -1,482 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include -#endif - - -#define DEBUG_SUBSYSTEM S_LLITE - -#include -#include -#include "llite_internal.h" -#include - -struct ll_lock_tree_node { - rb_node_t lt_node; - struct list_head lt_locked_item; - __u64 lt_oid; - ldlm_policy_data_t lt_policy; - struct lustre_handle lt_lockh; - ldlm_mode_t lt_mode; -}; - -__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms); -int lt_get_mmap_locks(struct ll_lock_tree *tree, struct inode *inode, - unsigned long addr, size_t count); - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, - int *type); -#else - -struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, - int unused); -#endif - -struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start, - __u64 end, ldlm_mode_t mode) -{ - struct ll_lock_tree_node *node; - - OBD_ALLOC(node, sizeof(*node)); - if (node == NULL) - RETURN(ERR_PTR(-ENOMEM)); - - node->lt_oid = ll_i2info(inode)->lli_smd->lsm_object_id; - node->lt_policy.l_extent.start = start; - node->lt_policy.l_extent.end = end; - memset(&node->lt_lockh, 0, sizeof(node->lt_lockh)); - INIT_LIST_HEAD(&node->lt_locked_item); - node->lt_mode = mode; - - return node; -} - -int lt_compare(struct ll_lock_tree_node *one, struct ll_lock_tree_node *two) -{ - if ( one->lt_oid < two->lt_oid) - return -1; - if ( one->lt_oid > two->lt_oid) - return 1; - - if ( one->lt_policy.l_extent.end < two->lt_policy.l_extent.start ) - return -1; - if ( one->lt_policy.l_extent.start > two->lt_policy.l_extent.end ) - return 1; - - return 0; /* they are the same object and overlap */ -} - -static void lt_merge(struct ll_lock_tree_node *dst, - struct ll_lock_tree_node *src) -{ - dst->lt_policy.l_extent.start = min(dst->lt_policy.l_extent.start, - src->lt_policy.l_extent.start); - dst->lt_policy.l_extent.end = max(dst->lt_policy.l_extent.end, - src->lt_policy.l_extent.end); - - /* XXX could be a real call to the dlm to find superset modes */ - if (src->lt_mode == LCK_PW && dst->lt_mode != LCK_PW) - dst->lt_mode = LCK_PW; -} - -static void lt_insert(struct ll_lock_tree *tree, - struct ll_lock_tree_node *node) -{ - struct ll_lock_tree_node *walk; - rb_node_t **p, *parent; - ENTRY; - -restart: - p = &tree->lt_root.rb_node; - parent = NULL; - while (*p) { - parent = *p; - walk = rb_entry(parent, struct ll_lock_tree_node, lt_node); - switch (lt_compare(node, walk)) { - case -1: - p = &(*p)->rb_left; - break; - case 1: - p = &(*p)->rb_right; - break; - case 0: - lt_merge(node, walk); - rb_erase(&walk->lt_node, &tree->lt_root); - OBD_FREE(walk, sizeof(*walk)); - goto restart; - break; - default: - LBUG(); - break; - } - } - rb_link_node(&node->lt_node, parent, p); - rb_insert_color(&node->lt_node, &tree->lt_root); - EXIT; -} - -static struct ll_lock_tree_node *lt_least_node(struct ll_lock_tree *tree) -{ - rb_node_t *rbnode; - struct ll_lock_tree_node *node = NULL; - - for ( rbnode = tree->lt_root.rb_node; rbnode != NULL; - rbnode = rbnode->rb_left) { - if (rbnode->rb_left == NULL) { - node = rb_entry(rbnode, struct ll_lock_tree_node, - lt_node); - break; - } - } - RETURN(node); -} - -int ll_tree_unlock(struct ll_lock_tree *tree, struct inode *inode) -{ - struct ll_lock_tree_node *node; - struct list_head *pos, *n; - int rc = 0; - ENTRY; - - list_for_each_safe(pos, n, &tree->lt_locked_list) { - node = list_entry(pos, struct ll_lock_tree_node, - lt_locked_item); - - rc = ll_extent_unlock(tree->lt_fd, inode, - ll_i2info(inode)->lli_smd, node->lt_mode, - &node->lt_lockh); - if (rc != 0) { - /* XXX better message */ - CERROR("couldn't unlock %d\n", rc); - } - list_del(&node->lt_locked_item); - OBD_FREE(node, sizeof(*node)); - } - - while ((node = lt_least_node(tree))) { - rb_erase(&node->lt_node, &tree->lt_root); - OBD_FREE(node, sizeof(*node)); - } - - RETURN(rc); -} - -int ll_tree_lock(struct ll_lock_tree *tree, - struct ll_lock_tree_node *first_node, struct inode *inode, - const char *buf, size_t count, int ast_flags) -{ - struct ll_lock_tree_node *node; - int rc = 0; - ENTRY; - - tree->lt_root.rb_node = NULL; - INIT_LIST_HEAD(&tree->lt_locked_list); - if (first_node != NULL) - lt_insert(tree, first_node); - - if (mapping_mapped(inode->i_mapping)) { - rc = lt_get_mmap_locks(tree, inode, (unsigned long)buf, count); - if (rc) - GOTO(out, rc); - } - - while ((node = lt_least_node(tree))) { - rc = ll_extent_lock(tree->lt_fd, inode, - ll_i2info(inode)->lli_smd, node->lt_mode, - &node->lt_policy, &node->lt_lockh, - ast_flags); - if (rc != 0) - GOTO(out, rc); - - rb_erase(&node->lt_node, &tree->lt_root); - list_add_tail(&node->lt_locked_item, &tree->lt_locked_list); - } - RETURN(rc); -out: - ll_tree_unlock(tree, inode); - RETURN(rc); -} - -static ldlm_mode_t mode_from_vma(struct vm_area_struct *vma) -{ - /* we only want to hold PW locks if the mmap() can generate - * writes back to the file and that only happens in shared - * writable vmas */ - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE)) - return LCK_PW; - return LCK_PR; -} - -static void policy_from_vma(ldlm_policy_data_t *policy, - struct vm_area_struct *vma, unsigned long addr, - size_t count) -{ - policy->l_extent.start = ((addr - vma->vm_start) & PAGE_CACHE_MASK) + - (vma->vm_pgoff << PAGE_CACHE_SHIFT); - policy->l_extent.end = (policy->l_extent.start + count - 1) | - (PAGE_CACHE_SIZE - 1); -} - -static struct vm_area_struct * our_vma(unsigned long addr, size_t count) -{ - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *ret = NULL; - ENTRY; - - spin_lock(&mm->page_table_lock); - for(vma = find_vma(mm, addr); - vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) { - if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage) { - ret = vma; - break; - } - } - spin_unlock(&mm->page_table_lock); - RETURN(ret); -} - -int lt_get_mmap_locks(struct ll_lock_tree *tree, struct inode *inode, - unsigned long addr, size_t count) -{ - struct vm_area_struct *vma; - struct ll_lock_tree_node *node; - ldlm_policy_data_t policy; - ENTRY; - - if (count == 0) - RETURN(0); - - /* we need to look up vmas on page aligned addresses */ - count += addr & (PAGE_SIZE - 1); - addr -= addr & (PAGE_SIZE - 1); - - while ((vma = our_vma(addr, count)) != NULL) { - - policy_from_vma(&policy, vma, addr, count); - node = ll_node_from_inode(inode, policy.l_extent.start, - policy.l_extent.end, - mode_from_vma(vma)); - if (IS_ERR(node)) { - CERROR("not enough mem for lock_tree_node!\n"); - RETURN(-ENOMEM); - } - lt_insert(tree, node); - - if (vma->vm_end - addr >= count) - break; - count -= vma->vm_end - addr; - addr = vma->vm_end; - } - RETURN(0); -} - -/* FIXME: there is a pagefault race goes as follow: - * 1. A user process on node A accesses a portion of a mapped file, - * resulting in a page fault. The pagefault handler invokes the - * ll_nopage function, which reads the page into memory. - * 2. A user process on node B writes to the same portion of the file - * (either via mmap or write()), that cause node A to cancel the - * lock and truncate the page. - * 3. Node A then executes the rest of do_no_page(), entering the - * now-invalid page into the PTEs. - * - * Make the whole do_no_page as a hook to cover both the page cache - * and page mapping installing with dlm lock would eliminate this race. - */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, - int *type) -#else -struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, - int unused) -#endif -{ - struct file *filp = vma->vm_file; - struct ll_file_data *fd = filp->private_data; - struct inode *inode = filp->f_dentry->d_inode; - struct lustre_handle lockh = { 0 }; - ldlm_policy_data_t policy; - ldlm_mode_t mode; - struct page *page; - __u64 kms; - unsigned long pgoff, size, rand_read, seq_read; - int rc = 0; - ENTRY; - - if (ll_i2info(inode)->lli_smd == NULL) { - CERROR("No lsm on fault?\n"); - RETURN(NULL); - } - - /* start and end the lock on the first and last bytes in the page */ - policy_from_vma(&policy, vma, address, PAGE_CACHE_SIZE); - - CDEBUG(D_MMAP, "nopage vma %p inode %lu, locking ["LPU64", "LPU64"]\n", - vma, inode->i_ino, policy.l_extent.start, - policy.l_extent.end); - - mode = mode_from_vma(vma); - - rc = ll_extent_lock(fd, inode, ll_i2info(inode)->lli_smd, mode, &policy, - &lockh, LDLM_FL_CBPENDING); - if (rc != 0) - RETURN(NULL); - - /* XXX change inode size without i_sem hold! there is a race condition - * with truncate path. (see ll_extent_lock) */ - kms = lov_merge_size(ll_i2info(inode)->lli_smd, 1); - pgoff = ((address - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; - size = (kms + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - if (pgoff >= size) - ll_glimpse_size(inode); - else - inode->i_size = kms; - - /* disable VM_SEQ_READ and use VM_RAND_READ to make sure that - * the kernel will not read other pages not covered by ldlm in - * filemap_nopage. we do our readahead in ll_readpage. - */ - rand_read = vma->vm_flags & VM_RAND_READ; - seq_read = vma->vm_flags & VM_SEQ_READ; - vma->vm_flags &= ~ VM_SEQ_READ; - vma->vm_flags |= VM_RAND_READ; - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - page = filemap_nopage(vma, address, type); -#else - page = filemap_nopage(vma, address, unused); -#endif - vma->vm_flags &= ~VM_RAND_READ; - vma->vm_flags |= (rand_read | seq_read); - - ll_extent_unlock(fd, inode, ll_i2info(inode)->lli_smd, mode, &lockh); - RETURN(page); -} - -/* return the user space pointer that maps to a file offset via a vma */ -static inline unsigned long file_to_user(struct vm_area_struct *vma, - __u64 byte) -{ - return vma->vm_start + - (byte - ((__u64)vma->vm_pgoff << PAGE_CACHE_SHIFT)); - -} - -#define VMA_DEBUG(vma, fmt, arg...) \ - CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld): " fmt, \ - vma, vma->vm_start, vma->vm_end, vma->vm_pgoff, ## arg); - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -/* [first, last] are the byte offsets affected. - * vm_{start, end} are user addresses of the first byte of the mapping and - * the next byte beyond it - * vm_pgoff is the page index of the first byte in the mapping */ -static void teardown_vmas(struct vm_area_struct *vma, __u64 first, - __u64 last) -{ - unsigned long address, len; - for (; vma ; vma = vma->vm_next_share) { - if (last >> PAGE_CACHE_SHIFT < vma->vm_pgoff) - continue; - if (first >> PAGE_CACHE_SHIFT > (vma->vm_pgoff + - ((vma->vm_end - vma->vm_start) >> PAGE_CACHE_SHIFT))) - continue; - - address = max((unsigned long)vma->vm_start, - file_to_user(vma, first)); - len = min((unsigned long)vma->vm_end, - file_to_user(vma, last) + 1) - address; - - VMA_DEBUG(vma, "zapping vma [address=%ld len=%ld]\n", - address, len); - LASSERT(vma->vm_mm); - ll_zap_page_range(vma, address, len); - } -} -#endif - -/* XXX put nice comment here. talk about __free_pte -> dirty pages and - * nopage's reference passing to the pte */ -int ll_teardown_mmaps(struct address_space *mapping, __u64 first, - __u64 last) -{ - int rc = -ENOENT; - ENTRY; - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - if (mapping_mapped(mapping)) { - rc = 0; - unmap_mapping_range(mapping, first + PAGE_SIZE - 1, - last - first + 1, 1); - } -#else - spin_lock(&mapping->i_shared_lock); - if (mapping->i_mmap != NULL) { - rc = 0; - teardown_vmas(mapping->i_mmap, first, last); - } - if (mapping->i_mmap_shared != NULL) { - rc = 0; - teardown_vmas(mapping->i_mmap_shared, first, last); - } - spin_unlock(&mapping->i_shared_lock); -#endif - RETURN(rc); -} - -static struct vm_operations_struct ll_file_vm_ops = { - .nopage = ll_nopage, -}; - -int ll_file_mmap(struct file * file, struct vm_area_struct * vma) -{ - int rc; - ENTRY; - - rc = generic_file_mmap(file, vma); - if (rc == 0) - vma->vm_ops = &ll_file_vm_ops; - - RETURN(rc); -} diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 83252cc..4e09d2f 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -390,57 +390,6 @@ struct ll_async_page *llap_from_page(struct page *page) RETURN(llap); } -static int queue_or_sync_write(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct ll_async_page *llap, - unsigned to, - obd_flag async_flags) -{ - struct obd_io_group *oig; - int rc; - ENTRY; - - /* _make_ready only sees llap once we've unlocked the page */ - llap->llap_write_queued = 1; - rc = obd_queue_async_io(exp, lsm, NULL, llap->llap_cookie, - OBD_BRW_WRITE, 0, 0, 0, async_flags); - if (rc == 0) { - LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n"); - //llap_write_pending(inode, llap); - GOTO(out, 0); - } - - llap->llap_write_queued = 0; - - rc = oig_init(&oig); - if (rc) - GOTO(out, rc); - - rc = obd_queue_group_io(exp, lsm, NULL, oig, llap->llap_cookie, - OBD_BRW_WRITE, 0, to, 0, ASYNC_READY | - ASYNC_URGENT | ASYNC_COUNT_STABLE | - ASYNC_GROUP_SYNC); - if (rc) - GOTO(free_oig, rc); - - rc = obd_trigger_group_io(exp, lsm, NULL, oig); - if (rc) - GOTO(free_oig, rc); - - rc = oig_wait(oig); - - if (!rc && async_flags & ASYNC_READY) - unlock_page(llap->llap_page); - - LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "sync write returned %d\n", - rc); - -free_oig: - oig_release(oig); -out: - RETURN(rc); -} - void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm, obd_off size); /* update our write count to account for i_size increases that may have @@ -480,11 +429,39 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, exp = ll_i2obdexp(inode); if (exp == NULL) RETURN(-EINVAL); - - rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, llap, - to, 0); - if (rc) + + /* _make_ready only sees llap once we've unlocked the page */ + llap->llap_write_queued = 1; + rc = obd_queue_async_io(exp, lsm, NULL, llap->llap_cookie, + OBD_BRW_WRITE, 0, 0, 0, 0); + if (rc != 0) { /* async failed, try sync.. */ + struct obd_io_group *oig; + rc = oig_init(&oig); + if (rc) + GOTO(out, rc); + + llap->llap_write_queued = 0; + rc = obd_queue_group_io(exp, lsm, NULL, oig, + llap->llap_cookie, + OBD_BRW_WRITE, 0, to, 0, + ASYNC_READY | ASYNC_URGENT | + ASYNC_COUNT_STABLE | + ASYNC_GROUP_SYNC); + + if (rc) + GOTO(free_oig, rc); + + rc = obd_trigger_group_io(exp, lsm, NULL, oig); + if (rc) + GOTO(free_oig, rc); + + rc = oig_wait(oig); +free_oig: + oig_release(oig); GOTO(out, rc); + } + LL_CDEBUG_PAGE(D_PAGE, page, "write queued\n"); + //llap_write_pending(inode, llap); } else { lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRTY_HITS); @@ -529,44 +506,6 @@ static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len) spin_unlock(&sbi->ll_lock); } -int ll_writepage(struct page *page) -{ - struct inode *inode = page->mapping->host; - struct obd_export *exp; - struct ll_async_page *llap; - int rc = 0; - ENTRY; - - LASSERT(!PageDirty(page)); - LASSERT(PageLocked(page)); - - exp = ll_i2obdexp(inode); - if (exp == NULL) - GOTO(out, rc = -EINVAL); - - llap = llap_from_page(page); - if (IS_ERR(llap)) - GOTO(out, rc = PTR_ERR(llap)); - - page_cache_get(page); - if (llap->llap_write_queued) { - LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n"); - rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL, - llap->llap_cookie, - ASYNC_READY | ASYNC_URGENT); - } else { - rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, llap, - PAGE_SIZE, ASYNC_READY | - ASYNC_URGENT); - } - if (rc) - page_cache_release(page); -out: - if (rc) - unlock_page(page); - RETURN(rc); -} - /* called for each page in a completed rpc.*/ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { @@ -1018,10 +957,17 @@ int ll_readpage(struct file *filp, struct page *page) } if (rc == 0) { - CWARN("ino %lu page %lu (%llu) not covered by " - "a lock (mmap?). check debug logs.\n", - inode->i_ino, page->index, - (long long)page->index << PAGE_CACHE_SHIFT); + static unsigned long next_print; + CDEBUG(D_INODE, "ino %lu page %lu (%llu) didn't match a lock\n", + inode->i_ino, page->index, + (long long)page->index << PAGE_CACHE_SHIFT); + if (0 && time_after(jiffies, next_print)) { + CWARN("ino %lu page %lu (%llu) not covered by " + "a lock (mmap?). check debug logs.\n", + inode->i_ino, page->index, + (long long)page->index << PAGE_CACHE_SHIFT); + next_print = jiffies + 30 * HZ; + } } rc = ll_issue_page_read(exp, llap, oig, 0); diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c index fc3cab1..8a3099f 100644 --- a/lustre/llite/rw24.c +++ b/lustre/llite/rw24.c @@ -49,6 +49,49 @@ #include "llite_internal.h" #include +static int ll_writepage_24(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct obd_export *exp; + struct ll_async_page *llap; + int rc = 0; + ENTRY; + + LASSERT(!PageDirty(page)); + LASSERT(PageLocked(page)); + + exp = ll_i2obdexp(inode); + if (exp == NULL) + GOTO(out, rc = -EINVAL); + + llap = llap_from_page(page); + if (IS_ERR(llap)) + GOTO(out, rc = PTR_ERR(llap)); + + page_cache_get(page); + if (llap->llap_write_queued) { + LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n"); + rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL, + llap->llap_cookie, + ASYNC_READY | ASYNC_URGENT); + } else { + llap->llap_write_queued = 1; + rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL, + llap->llap_cookie, OBD_BRW_WRITE, 0, 0, + 0, ASYNC_READY | ASYNC_URGENT); + if (rc == 0) + LL_CDEBUG_PAGE(D_PAGE, page, "mmap write queued\n"); + else + llap->llap_write_queued = 0; + } + if (rc) + page_cache_release(page); +out: + if (rc) + unlock_page(page); + RETURN(rc); +} + static int ll_direct_IO_24(int rw, #ifdef HAVE_DIO_FILE struct file *file, @@ -136,7 +179,7 @@ static int ll_direct_IO_24(int rw, struct address_space_operations ll_aops = { .readpage = ll_readpage, .direct_IO = ll_direct_IO_24, - .writepage = ll_writepage, + .writepage = ll_writepage_24, .prepare_write = ll_prepare_write, .commit_write = ll_commit_write, .removepage = ll_removepage, diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 53bde80..71964de 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -51,6 +51,51 @@ #include "llite_internal.h" #include +static int ll_writepage_26(struct page *page, struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct obd_export *exp; + struct ll_async_page *llap; + int rc; + ENTRY; + + LASSERT(!PageDirty(page)); + LASSERT(PageLocked(page)); + + exp = ll_i2obdexp(inode); + if (exp == NULL) + GOTO(out, rc = -EINVAL); + + llap = llap_from_page(page); + if (IS_ERR(llap)) + GOTO(out, rc = PTR_ERR(llap)); + + page_cache_get(page); + if (llap->llap_write_queued) { + LL_CDEBUG_PAGE(D_PAGE, page, "marking urgent\n"); + rc = obd_set_async_flags(exp, ll_i2info(inode)->lli_smd, NULL, + llap->llap_cookie, + ASYNC_READY | ASYNC_URGENT); + } else { + llap->llap_write_queued = 1; + rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL, + llap->llap_cookie, OBD_BRW_WRITE, 0, 0, + 0, ASYNC_READY | ASYNC_URGENT); + if (rc == 0) + LL_CDEBUG_PAGE(D_PAGE, page, "mmap write queued\n"); + else + llap->llap_write_queued = 0; + } + if (rc) + page_cache_release(page); +out: + if (rc) + unlock_page(page); + else + set_page_writeback(page); + RETURN(rc); +} + /* It is safe to not check anything in invalidatepage/releasepage below because they are run with page locked and all our io is happening with locked page too */ @@ -72,7 +117,7 @@ struct address_space_operations ll_aops = { .readpage = ll_readpage, // .readpages = ll_readpages, // .direct_IO = ll_direct_IO_26, - .writepage = ll_writepage, + .writepage = ll_writepage_26, .writepages = generic_writepages, .set_page_dirty = __set_page_dirty_nobuffers, .sync_page = NULL, diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 index 94fa984..27704bd 100644 --- a/lustre/portals/archdep.m4 +++ b/lustre/portals/archdep.m4 @@ -436,16 +436,6 @@ if test x$enable_modules != xno ; then AC_MSG_RESULT([no]) ]) - # --------- zap_page_range(vma) -------------------------------- - AC_MSG_CHECKING([if zap_pag_range with vma parameter]) - ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`" - if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then - AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter]) - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi - # ---------- Red Hat 2.4.20 backports some 2.5 bits -------- # This needs to run after we've defined the KCPPFLAGS diff --git a/lustre/portals/include/linux/libcfs.h b/lustre/portals/include/linux/libcfs.h index cad7a69..acf4045 100644 --- a/lustre/portals/include/linux/libcfs.h +++ b/lustre/portals/include/linux/libcfs.h @@ -90,7 +90,6 @@ struct ptldebug_header { #define D_RPCTRACE 0x00100000 /* for distributed debugging */ #define D_VFSTRACE 0x00200000 #define D_READA 0x00400000 /* read-ahead */ -#define D_MMAP 0x00800000 #ifdef __KERNEL__ # include /* THREAD_SIZE */ diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c index dce196f..e546aaf 100644 --- a/lustre/portals/utils/debug.c +++ b/lustre/portals/utils/debug.c @@ -74,7 +74,7 @@ static const char *portal_debug_masks[] = {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", "blocks", "net", "warning", "buffs", "other", "dentry", "portals", "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", - "reada", "mmap", NULL}; + "reada", NULL}; struct debug_daemon_cmd { char *cmd; diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index 3eb90ab..bc148be 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -65,5 +65,3 @@ ll_dirstripe_verify openfilleddirunlink copy_attr rename_many -memhog -mmap_sanity diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 166755f..bb3368d 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -24,12 +24,10 @@ noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory noinst_PROGRAMS += small_write multiop sleeptest ll_sparseness_verify cmknod noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany rmdirmany noinst_PROGRAMS += openfilleddirunlink rename_many memhog iopentest1 iopentest2 -noinst_PROGRAMS += mmap_sanity # noinst_PROGRAMS += ldaptest copy_attr bin_PROGRAMS = mcreate munlink endif # TESTS -mmap_sanity_SOURCES= mmap_sanity.c stat_SOURCES = stat.c stat_fs.h mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl $(LIBREADLINE) #write_append_truncate_CC=mpicc diff --git a/lustre/tests/mmap_sanity.c b/lustre/tests/mmap_sanity.c deleted file mode 100644 index 3fd0b0e..0000000 --- a/lustre/tests/mmap_sanity.c +++ /dev/null @@ -1,643 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -char *dir = NULL, *node = NULL, *dir2 = NULL; -long page_size; -char mmap_sanity[256]; - - -static void usage(void) -{ - printf("Usage: mmap_sanity -d dir [-n node | -m dir2]\n"); - printf(" dir lustre mount point\n"); - printf(" node another client\n"); - printf(" dir2 another mount point\n"); - exit(127); -} - -#define MMAP_NOTIFY_PORT 7676 -static int mmap_notify(char *target, char *str, int delay) -{ - unsigned short port = MMAP_NOTIFY_PORT; - int socket_type = SOCK_DGRAM; - struct sockaddr_in server; - struct hostent *hp; - int len, sockfd, rc = 0; - - if (target == NULL) - return 0; - - sockfd = socket(AF_INET, socket_type, 0); - if (sockfd < 0) { - perror("socket()"); - return errno; - } - - if ((hp = gethostbyname(target)) == NULL) { - perror(target); - rc = errno; - goto out_close; - } - - memset(&server,0,sizeof(server)); - memcpy(&(server.sin_addr), hp->h_addr, hp->h_length); - server.sin_family = AF_INET; - server.sin_port = htons(port); - - len = sizeof(server); - if (delay) - sleep(delay); - - rc = sendto(sockfd, str, strlen(str), 0, - (struct sockaddr *)&server, len); - if (rc < 0) { - perror("sendto()"); - rc = errno; - } else - rc = 0; - -out_close: - close(sockfd); - return rc; -} - -static int mmap_wait(char *str, int timeout) -{ - unsigned short port = MMAP_NOTIFY_PORT; - int socket_type = SOCK_DGRAM; - struct sockaddr_in local, from; - char host[256]; - struct hostent *hp; - fd_set rfds; - struct timeval tv; - int sockfd, rc = 0; - - if (dir2 != NULL) - return 0; - - memset(host, 0, sizeof(host)); - if (gethostname(host, sizeof(host))) { - perror("gethostname()"); - return errno; - } - - if ((hp = gethostbyname(host)) == NULL) { - perror(host); - return errno; - } - - local.sin_family = AF_INET; - memcpy(&(local.sin_addr), hp->h_addr, hp->h_length); - local.sin_port = htons(port); - - sockfd = socket(AF_INET, socket_type, 0); - if (sockfd < 0) { - perror("socket()"); - return errno; - } - - rc = bind(sockfd, (struct sockaddr *)&local, sizeof(local)); - if (rc < 0) { - perror("bind()"); - rc = errno; - goto out_close; - } - - FD_ZERO(&rfds); - FD_SET(sockfd, &rfds); - tv.tv_sec = timeout ? timeout : 5; - tv.tv_usec = 0; - - rc = select(sockfd + 1, &rfds, NULL, NULL, &tv); - if (rc) { /* got data */ - char buffer[1024]; - int fromlen =sizeof(from); - - memset(buffer, 0, sizeof(buffer)); - rc = recvfrom(sockfd, buffer, sizeof(buffer), 0, - (struct sockaddr *)&from, &fromlen); - if (rc <= 0) { - perror("recvfrom()"); - rc = errno; - goto out_close; - } - rc = 0; - - if (strncmp(str, buffer, strlen(str)) != 0) { - fprintf(stderr, "expected string mismatch!\n"); - rc = EINVAL; - } - } else { /* timeout */ - fprintf(stderr, "timeout!\n"); - rc = ETIME; - } - -out_close: - close(sockfd); - return rc; -} - -static int remote_tst(int tc, char *mnt); -static int mmap_run(char *host, int tc) -{ - pid_t child; - char nodearg[256], command[256]; - int rc = 0; - - child = fork(); - if (child < 0) - return errno; - else if (child) - return 0; - - if (dir2 != NULL) { - rc = remote_tst(tc, dir2); - } else { - sprintf(nodearg, "-w %s", node); - sprintf(command, "%s -d %s -n %s -c %d", - mmap_sanity, dir, host, tc); - rc = execlp("pdsh", "pdsh", "-S", nodearg, command, NULL); - if (rc) - perror("execlp()"); - } - _exit(rc); -} - -static int mmap_initialize(char *myself, int tc) -{ - char buf[1024], *file; - int fdr, fdw, count, rc = 0; - - page_size = sysconf(_SC_PAGESIZE); - if (page_size == -1) { - perror("sysconf(_SC_PAGESIZE)"); - return errno; - } - if (tc) - return 0; - - /* copy myself to lustre for another client */ - fdr = open(myself, O_RDONLY); - if (fdr < 0) { - perror(myself); - return EINVAL; - } - file = strrchr(myself, '/'); - if (file == NULL) { - fprintf(stderr, "can't get test filename\n"); - close(fdr); - return EINVAL; - } - file++; - sprintf(mmap_sanity, "%s/%s", dir, file); - - fdw = open(mmap_sanity, O_CREAT|O_WRONLY, 0777); - if (fdw < 0) { - perror(mmap_sanity); - close(fdr); - return EINVAL; - } - while ((count = read(fdr, buf, sizeof(buf))) != 0) { - int writes; - - if (count < 0) { - perror("read()"); - rc = errno; - break; - } - writes = write(fdw, buf, count); - if (writes != count) { - perror("write()"); - rc = errno; - break; - } - } - close(fdr); - close(fdw); - return rc; -} - -static void mmap_finalize(int tc) -{ - if (tc) - return; - unlink(mmap_sanity); -} - -/* basic mmap operation on single node */ -static int mmap_tst1(char *mnt) -{ - char *ptr, mmap_file[256]; - int region, fd, rc = 0; - - region = page_size * 10; - sprintf(mmap_file, "%s/%s", mnt, "mmap_file1"); - - if (unlink(mmap_file) && errno != ENOENT) { - perror("unlink()"); - return errno; - } - - fd = open(mmap_file, O_CREAT|O_RDWR, 0600); - if (fd < 0) { - perror(mmap_file); - return errno; - } - ftruncate(fd, region); - - ptr = mmap(NULL, region, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (ptr == MAP_FAILED) { - perror("mmap()"); - rc = errno; - goto out_close; - } - memset(ptr, 'a', region); - - munmap(ptr, region); -out_close: - close(fd); - unlink(mmap_file); - return rc; -} - -/* MAP_PRIVATE create a copy-on-write mmap */ -static int mmap_tst2(char *mnt) -{ - char *ptr, mmap_file[256], buf[256]; - int fd, rc = 0; - - sprintf(mmap_file, "%s/%s", mnt, "mmap_file2"); - - if (unlink(mmap_file) && errno != ENOENT) { - perror("unlink()"); - return errno; - } - - fd = open(mmap_file, O_CREAT|O_RDWR, 0600); - if (fd < 0) { - perror(mmap_file); - return errno; - } - ftruncate(fd, page_size); - - ptr = mmap(NULL, page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); - if (ptr == MAP_FAILED) { - perror("mmap()"); - rc = errno; - goto out_close; - } - memcpy(ptr, "blah", strlen("blah")); - - munmap(ptr, page_size); -out_close: - close(fd); - if (rc) - return rc; - - fd = open(mmap_file, O_RDONLY); - if (fd < 0) { - perror(mmap_file); - return errno; - } - rc = read(fd, buf, sizeof(buf)); - if (rc < 0) { - perror("read()"); - rc = errno; - goto out_close; - } - rc = 0; - - if (strncmp("blah", buf, strlen("blah")) == 0) { - fprintf(stderr, "mmap write back with MAP_PRIVATE!\n"); - rc = EFAULT; - } - close(fd); - unlink(mmap_file); - return rc; -} - -/* cocurrent mmap operations on two nodes */ -static int mmap_tst3(char *mnt) -{ - char *ptr, mmap_file[256], host[256]; - int region, fd, rc = 0; - - region = page_size * 100; - sprintf(mmap_file, "%s/%s", mnt, "mmap_file3"); - - if (unlink(mmap_file) && errno != ENOENT) { - perror("unlink()"); - return errno; - } - - fd = open(mmap_file, O_CREAT|O_RDWR, 0600); - if (fd < 0) { - perror(mmap_file); - return errno; - } - ftruncate(fd, region); - - ptr = mmap(NULL, region, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (ptr == MAP_FAILED) { - perror("mmap()"); - rc = errno; - goto out_close; - } - - if (gethostname(host, sizeof(host))) { - perror("gethostname()"); - rc = errno; - goto out_unmap; - } - - rc = mmap_run(host, 3); - if (rc) - goto out_unmap; - - rc = mmap_wait("mmap done", 10); - memset(ptr, 'a', region); - - sleep(2); /* wait for remote test finish */ -out_unmap: - munmap(ptr, region); -out_close: - close(fd); - unlink(mmap_file); - return rc; -} - -static int remote_tst3(char *mnt) -{ - char *ptr, mmap_file[256]; - int region, fd, rc = 0; - - region = page_size * 100; - sprintf(mmap_file, "%s/%s", mnt, "mmap_file3"); - - fd = open(mmap_file, O_RDWR, 0600); - if (fd < 0) { - perror(mmap_file); - return errno; - } - - ptr = mmap(NULL, region, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (ptr == MAP_FAILED) { - perror("mmap()"); - rc = errno; - goto out_close; - } - memset(ptr, 'b', region); - - rc = mmap_notify(node, "mmap done", 1); - if (rc) - goto out_unmap; - - memset(ptr, 'c', region); - -out_unmap: - munmap(ptr, region); -out_close: - close(fd); - return rc; -} - -/* client1 write to file_4a from mmap()ed file_4b; - * client2 write to file_4b from mmap()ed file_4a. */ -static int mmap_tst4(char *mnt) -{ - char *ptr, filea[256], fileb[256], host[256]; - int region, fdr, fdw, rc = 0; - - region = page_size * 100; - sprintf(filea, "%s/%s", mnt, "mmap_file_4a"); - sprintf(fileb, "%s/%s", mnt, "mmap_file_4b"); - - if (unlink(filea) && errno != ENOENT) { - perror("unlink()"); - return errno; - } - if (unlink(fileb) && errno != ENOENT) { - perror("unlink()"); - return errno; - } - - fdr = fdw = -1; - fdr = open(fileb, O_CREAT|O_RDWR, 0600); - if (fdr < 0) { - perror(fileb); - return errno; - } - ftruncate(fdr, region); - fdw = open(filea, O_CREAT|O_RDWR, 0600); - if (fdw < 0) { - perror(filea); - rc = errno; - goto out_close; - } - ftruncate(fdw, region); - - ptr = mmap(NULL, region, PROT_READ|PROT_WRITE, MAP_SHARED, fdr, 0); - if (ptr == MAP_FAILED) { - perror("mmap()"); - rc = errno; - goto out_close; - } - - if (gethostname(host, sizeof(host))) { - perror("gethostname()"); - rc = errno; - goto out_unmap; - } - - rc = mmap_run(host, 4); - if (rc) - goto out_unmap; - - rc = mmap_wait("mmap done", 10); - if (rc) - goto out_unmap; - - memset(ptr, '1', region); - - rc = write(fdw, ptr, region); - if (rc <= 0) { - perror("write()"); - rc = errno; - } else - rc = 0; - - sleep(2); /* wait for remote test finish */ -out_unmap: - munmap(ptr, region); -out_close: - if (fdr >= 0) - close(fdr); - if (fdw >= 0) - close(fdw); - unlink(filea); - unlink(fileb); - return rc; -} - -static int remote_tst4(char *mnt) -{ - char *ptr, filea[256], fileb[256]; - int region, fdr, fdw, rc = 0; - - region = page_size * 100; - sprintf(filea, "%s/%s", mnt, "mmap_file_4a"); - sprintf(fileb, "%s/%s", mnt, "mmap_file_4b"); - - fdr = fdw = -1; - fdr = open(filea, O_RDWR, 0600); - if (fdr < 0) { - perror(filea); - return errno; - } - fdw = open(fileb, O_RDWR, 0600); - if (fdw < 0) { - perror(fileb); - rc = errno; - goto out_close; - } - - ptr = mmap(NULL, region, PROT_READ|PROT_WRITE, MAP_SHARED, fdr, 0); - if (ptr == MAP_FAILED) { - perror("mmap()"); - rc = errno; - goto out_close; - } - - rc = mmap_notify(node, "mmap done", 1); - if (rc) - goto out_unmap; - - memset(ptr, '2', region); - - rc = write(fdw, ptr, region); - if (rc <= 0) { - perror("write()"); - rc = errno; - } else - rc = 0; - -out_unmap: - munmap(ptr, region); -out_close: - if (fdr >= 0) - close(fdr); - if (fdw >= 0) - close(fdw); - return rc; -} - -static int remote_tst(int tc, char *mnt) -{ - int rc = 0; - switch(tc) { - case 3: - rc = remote_tst3(mnt); - break; - case 4: - rc = remote_tst4(mnt); - break; - case 1: - case 2: - default: - fprintf(stderr, "wrong test case number %d\n", tc); - rc = EINVAL; - break; - } - return rc; -} - -struct test_case { - int tc; /* test case number */ - char *desc; /* test description */ - int (* test_fn)(char *mnt); /* test function */ - int node_cnt; /* node count */ -}; - -struct test_case tests[] = { - { 1, "mmap test1: basic mmap operation", mmap_tst1, 1 }, - { 2, "mmap test2: MAP_PRIVATE not write back", mmap_tst2, 1 }, - { 3, "mmap test3: cocurrent mmap ops on two nodes", mmap_tst3, 2 }, - { 4, "mmap test4: c1 write to f1 from mmaped f2, " - "c2 write to f1 from mmaped f1", mmap_tst4, 2 }, - { 0, NULL, 0, 0 } -}; - -int main(int argc, char **argv) -{ - extern char *optarg; - struct test_case *test; - int c, rc = 0, tc = 0; - - for(;;) { - c = getopt(argc, argv, "d:n:c:m:"); - if ( c == -1 ) - break; - - switch(c) { - case 'd': - dir = optarg; - break; - case 'n': - node = optarg; - break; - case 'c': - tc = atoi(optarg); - break; - case 'm': - dir2 = optarg; - break; - default: - case '?': - usage(); - break; - } - } - - if (dir == NULL) - usage(); - if (dir2 != NULL && node != NULL) - usage(); - - if (mmap_initialize(argv[0], tc) != 0) { - fprintf(stderr, "mmap_initialize failed!\n"); - return EINVAL; - } - - if (tc) { - rc = remote_tst(tc, dir); - goto out; - } - - for (test = tests; test->tc; test++) { - char *rs = "skip"; - rc = 0; - if (test->node_cnt == 1 || node != NULL || dir2 != NULL) { - rc = test->test_fn(dir); - rs = rc ? "fail" : "pass"; - } - fprintf(stderr, "%s (%s)\n", test->desc, rs); - if (rc) - break; - } -out: - mmap_finalize(tc); - return rc; -} diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index cc578b4..c3e0a80 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -4,7 +4,7 @@ set -e ONLY=${ONLY:-"$*"} # bug number for skipped test: 1768 3192 -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b 14c"} +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT" @@ -334,7 +334,7 @@ test_15() { # bug 974 - ENOSPC run_test 15 "test out-of-space with multiple writers ===========" test_16() { - fsx -c 50 -p 100 -N 2500 $MOUNT1/fsxfile $MOUNT2/fsxfile + fsx -R -W -c 50 -p 100 -N 2500 $MOUNT1/fsxfile $MOUNT2/fsxfile } run_test 16 "2500 iterations of dual-mount fsx =================" @@ -359,11 +359,6 @@ test_17() { # bug 3513, 3667 } run_test 17 "resource creation/LVB creation race ===============" -test_18() { - ./mmap_sanity -d $MOUNT1 -m $MOUNT2 -} -run_test 18 "mmap sanity check =================================" - log "cleanup: ======================================================" rm -rf $DIR1/[df][0-9]* $DIR1/lnk || true diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 3f00da0..049491f7 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -88,7 +88,6 @@ ptldebug_names = { "rpctrace" : (1 << 20), "vfstrace" : (1 << 21), "reada" : (1 << 22), - "mmap" : (1 << 23), } subsystem_names = { -- 1.8.3.1