Whamcloud - gitweb
Merge b_intent into b_md:
authorpschwan <pschwan>
Fri, 31 Jan 2003 21:37:51 +0000 (21:37 +0000)
committerpschwan <pschwan>
Fri, 31 Jan 2003 21:37:51 +0000 (21:37 +0000)
- New kernel patch (version 9)
- DLM hooks to revalidate locked data, once the lock is granted (604)
- Further MDS reorganization, particularly of the open and o_creat paths

lustre/kernel_patches/patches/invalidate_show.patch [new file with mode: 0644]
lustre/kernel_patches/patches/iod-rmap-exports.patch [new file with mode: 0644]
lustre/kernel_patches/pc/iod-rmap-exports.pc [new file with mode: 0644]
lustre/mds/mds_open.c [new file with mode: 0644]
lustre/tests/open_delay.c [new file with mode: 0644]

diff --git a/lustre/kernel_patches/patches/invalidate_show.patch b/lustre/kernel_patches/patches/invalidate_show.patch
new file mode 100644 (file)
index 0000000..7e27e3a
--- /dev/null
@@ -0,0 +1,125 @@
+--- lum/fs/inode.c     Sat Oct 19 11:42:42 2002
++++ linux-2.4.18-uml35-ext3online/fs/inode.c   Mon Oct 14 00:41:20 2002
+@@ -606,7 +553,8 @@ static void dispose_list(struct list_hea
+ /*
+  * Invalidate all inodes for a device.
+  */
+-static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
++static int invalidate_list(struct list_head *head, struct super_block * sb,
++                         struct list_head * dispose, int show)
+ {
+       struct list_head *next;
+       int busy = 0, count = 0;
+@@ -631,6 +579,10 @@ static int invalidate_list(struct list_h
+                       count++;
+                       continue;
+               }
++              if (show)
++                      printk(KERN_ERR "inode busy: %s: %d (count %ld)\n",
++                             kdevname(sb->s_dev), inode->i_ino,
++                             atomic_read(&inode->i_count));
+               busy = 1;
+       }
+       /* only unused inodes may be cached with i_count zero */
+@@ -649,22 +601,23 @@ static int invalidate_list(struct list_h
+ /**
+  *    invalidate_inodes       - discard the inodes on a device
+  *    @sb: superblock
++ *    @show: whether we should display any busy inodes found
+  *
+  *    Discard all of the inodes for a given superblock. If the discard
+  *    fails because there are busy inodes then a non zero value is returned.
+  *    If the discard is successful all the inodes have been discarded.
+  */
+  
+-int invalidate_inodes(struct super_block * sb)
++int invalidate_inodes(struct super_block * sb, int show)
+ {
+       int busy;
+       LIST_HEAD(throw_away);
+       spin_lock(&inode_lock);
+-      busy = invalidate_list(&inode_in_use, sb, &throw_away);
+-      busy |= invalidate_list(&inode_unused, sb, &throw_away);
+-      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+-      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
++      busy = invalidate_list(&inode_in_use, sb, &throw_away, show);
++      busy |= invalidate_list(&inode_unused, sb, &throw_away, show);
++      busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show);
++      busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show);
+       spin_unlock(&inode_lock);
+       dispose_list(&throw_away);
+@@ -672,7 +625,7 @@ int invalidate_inodes(struct super_block
+       return busy;
+ }
+  
+-int invalidate_device(kdev_t dev, int do_sync)
++int invalidate_device(kdev_t dev, int do_sync, int show)
+ {
+       struct super_block *sb;
+       int res;
+@@ -690,7 +643,7 @@ int invalidate_device(kdev_t dev, int do
+                * hold).
+                */
+               shrink_dcache_sb(sb);
+-              res = invalidate_inodes(sb);
++              res = invalidate_inodes(sb, show);
+               drop_super(sb);
+       }
+       invalidate_buffers(dev);
+--- lum/fs/devfs/base.c.orig   Sat Oct 19 11:42:16 2002
++++ lum/fs/devfs/base.c        Wed Oct 30 17:12:33 2002
+@@ -2448,7 +2448,7 @@
+     retval = 1;
+     printk (KERN_DEBUG "VFS: Disk change detected on device %s\n",
+            kdevname (dev) );
+-    if ( invalidate_device (dev, 0) )
++    if ( invalidate_device (dev, 0, 1) )
+       printk (KERN_WARNING "VFS: busy inodes on changed media..\n");
+     /*  Ugly hack to disable messages about unable to read partition table  */
+     tmp = warn_no_part;
+--- lum/fs/super.c.orig        Sat Oct 19 11:42:42 2002
++++ lum/fs/super.c     Wed Oct 30 17:16:55 2002
+@@ -936,7 +936,7 @@
+       lock_super(sb);
+       lock_kernel();
+       sb->s_flags &= ~MS_ACTIVE;
+-      invalidate_inodes(sb);  /* bad name - it should be evict_inodes() */
++      invalidate_inodes(sb, 0);  /* bad name - it should be evict_inodes() */
+       if (sop) {
+               if (sop->write_super && sb->s_dirt)
+                       sop->write_super(sb);
+@@ -945,7 +945,7 @@
+       }
+       /* Forget any remaining inodes */
+-      if (invalidate_inodes(sb)) {
++      if (invalidate_inodes(sb, 1)) {
+               printk(KERN_ERR "VFS: Busy inodes after unmount. "
+                       "Self-destruct in 5 seconds.  Have a nice day...\n");
+       }
+--- lum/fs/block_dev.c.orig    Sat Oct 19 11:42:16 2002
++++ lum/fs/block_dev.c Wed Oct 30 17:18:15 2002
+@@ -533,7 +533,7 @@
+       if (!bdops->check_media_change(dev))
+               return 0;
+-      if (invalidate_device(dev, 0))
++      if (invalidate_device(dev, 0, 1))
+               printk("VFS: busy inodes on changed media.\n");
+       if (bdops->revalidate)
+--- lum/include/linux/fs.h     Wed Oct 30 17:10:42 2002
++++ lum/include/linux/fs.h.orig        Tue Oct 22 23:15:00 2002
+@@ -1261,8 +1261,8 @@
+ extern void set_buffer_flushtime(struct buffer_head *);
+ extern void balance_dirty(void);
+ extern int check_disk_change(kdev_t);
+-extern int invalidate_inodes(struct super_block *);
+-extern int invalidate_device(kdev_t, int);
++extern int invalidate_inodes(struct super_block *, int);
++extern int invalidate_device(kdev_t, int, int);
+ extern void invalidate_inode_pages(struct inode *);
+ extern void invalidate_inode_pages2(struct address_space *);
+ extern void invalidate_inode_buffers(struct inode *);
diff --git a/lustre/kernel_patches/patches/iod-rmap-exports.patch b/lustre/kernel_patches/patches/iod-rmap-exports.patch
new file mode 100644 (file)
index 0000000..00eba97
--- /dev/null
@@ -0,0 +1,64 @@
+--- linux-chaos/fs/inode.c.b_io_export Wed Jan 29 16:56:15 2003
++++ linux-chaos/fs/inode.c     Wed Jan 29 16:56:27 2003
+@@ -66,7 +66,8 @@
+  * NOTE! You also have to own the lock if you change
+  * the i_state of an inode while it is in use..
+  */
+-static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
++spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
++EXPORT_SYMBOL(inode_lock);
+ /*
+  * Statistics gathering..
+--- linux-chaos/fs/Makefile.b_io_export        Wed Jan 29 16:56:45 2003
++++ linux-chaos/fs/Makefile    Wed Jan 29 16:56:53 2003
+@@ -7,7 +7,7 @@
+ O_TARGET := fs.o
+-export-objs :=        filesystems.o open.o dcache.o buffer.o
++export-objs :=        filesystems.o open.o dcache.o buffer.o inode.o
+ mod-subdirs :=        nls
+ obj-y :=      open.o read_write.o devices.o file_table.o buffer.o \
+--- linux-chaos/mm/filemap.c.b_io_export       Wed Jan 29 16:50:39 2003
++++ linux-chaos/mm/filemap.c   Wed Jan 29 16:51:11 2003
+@@ -65,6 +65,7 @@
+  *                    pagecache_lock
+  */
+ spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED};
++EXPORT_SYMBOL(pagemap_lru_lock_cacheline);
+ #define CLUSTER_PAGES         (1 << page_cluster)
+ #define CLUSTER_OFFSET(x)     (((x) >> page_cluster) << page_cluster)
+--- linux-chaos/mm/vmscan.c.b_io_export        Wed Jan 29 16:51:58 2003
++++ linux-chaos/mm/vmscan.c    Wed Jan 29 16:55:16 2003
+@@ -839,6 +839,7 @@
+       set_current_state(TASK_RUNNING);
+       remove_wait_queue(&kswapd_done, &wait);
+ }
++EXPORT_SYMBOL(wakeup_kswapd);
+ static void wakeup_memwaiters(void)
+ {
+--- linux-chaos/mm/Makefile.b_io_export        Wed Jan 29 16:52:46 2003
++++ linux-chaos/mm/Makefile    Wed Jan 29 16:54:23 2003
+@@ -9,7 +9,7 @@
+ O_TARGET := mm.o
+-export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
++export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o vmscan.c
+ obj-y  := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
+           vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
+--- linux-chaos/mm/page_alloc.c.b_io_export    Wed Jan 29 17:00:32 2003
++++ linux-chaos/mm/page_alloc.c        Wed Jan 29 17:01:31 2003
+@@ -31,6 +31,7 @@
+ int nr_inactive_dirty_pages;
+ int nr_inactive_clean_pages;
+ pg_data_t *pgdat_list;
++EXPORT_SYMBOL(pgdat_list);
+ /*
+  * The zone_table array is used to look up the address of the
diff --git a/lustre/kernel_patches/pc/iod-rmap-exports.pc b/lustre/kernel_patches/pc/iod-rmap-exports.pc
new file mode 100644 (file)
index 0000000..1218f55
--- /dev/null
@@ -0,0 +1,6 @@
+fs/inode.c
+fs/Makefile
+mm/filemap.c
+mm/vmscan.c
+mm/Makefile
+mm/page_alloc.c
diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c
new file mode 100644 (file)
index 0000000..f4bac4a
--- /dev/null
@@ -0,0 +1,224 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  lustre/mds/handler.c
+ *  Lustre Metadata Server (mds) request handler
+ *
+ *  Copyright (c) 2001, 2002 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *   Author: Mike Shaver <shaver@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_MDS
+
+#include <linux/module.h>
+#include <linux/lustre_mds.h>
+#include <linux/lustre_dlm.h>
+#include <linux/init.h>
+#include <linux/obd_class.h>
+#include <linux/random.h>
+#include <linux/locks.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
+#endif
+#include <linux/obd_lov.h>
+#include <linux/lustre_mds.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lprocfs_status.h>
+
+extern kmem_cache_t *mds_file_cache;
+extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
+extern void mds_start_transno(struct mds_obd *mds);
+extern int mds_finish_transno(struct mds_obd *mds, void *handle,
+                              struct ptlrpc_request *req, int rc);
+
+int mds_open(struct mds_update_record *rec, int offset,
+             struct ptlrpc_request *req)
+{
+        struct mds_obd *mds = mds_req2mds(req);
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct ldlm_reply *rep = lustre_msg_buf(req->rq_repmsg, 0);
+        struct obd_ucred uc;
+        struct obd_run_ctxt saved;
+        struct lustre_handle lockh;
+        int lock_mode;
+        struct file *file;
+        struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+        struct dentry *dchild, *parent;
+        struct inode *dir;
+        struct mds_export_data *med;
+        struct mds_file_data *mfd = NULL;
+        struct vfsmount *mnt = mds->mds_vfsmnt;
+        __u32 flags;
+        struct list_head *tmp;
+        int rc = 0;
+        ENTRY;
+
+#warning replay of open needs to be redone
+        /* was this animal open already and the client lost the reply? */
+        /* XXX need some way to detect a reopen, to avoid locked list walks */
+        med = &req->rq_export->exp_mds_data;
+#if 0
+        spin_lock(&med->med_open_lock);
+        list_for_each(tmp, &med->med_open_head) {
+                mfd = list_entry(tmp, typeof(*mfd), mfd_list);
+                if (!memcmp(&mfd->mfd_clienthandle, &body->handle,
+                            sizeof(mfd->mfd_clienthandle)) &&
+                    body->fid1.id == mfd->mfd_file->f_dentry->d_inode->i_ino) {
+                        dchild = mfd->mfd_file->f_dentry;
+                        spin_unlock(&med->med_open_lock);
+                        CERROR("Re opening "LPD64"\n", body->fid1.id);
+                        GOTO(out_pack, rc = 0);
+                }
+        }
+        spin_unlock(&med->med_open_lock);
+#endif
+        rep->lock_policy_res1 |= IT_OPEN_LOOKUP;
+        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
+                CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
+                req->rq_status = -ENOMEM;
+                RETURN(-ENOMEM);
+        }
+
+        lock_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR;
+        parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, lock_mode,
+                                       &lockh);
+        if (IS_ERR(parent)) {
+                rc = PTR_ERR(parent);
+                CERROR("parent lookup error %d\n", rc);
+                LBUG();
+                RETURN(rc);
+        }
+        dir = parent->d_inode;
+
+        down(&dir->i_sem);
+        dchild = lookup_one_len(lustre_msg_buf(req->rq_reqmsg, 3),
+                                parent, req->rq_reqmsg->buflens[3] - 1);
+        if (IS_ERR(dchild)) {
+                up(&dir->i_sem);
+                GOTO(out_unlock, rc = PTR_ERR(dchild));
+        }
+
+        if (dchild->d_inode)
+                rep->lock_policy_res1 |= IT_OPEN_POS;
+        else
+                rep->lock_policy_res1 |= IT_OPEN_NEG;
+
+        /* Negative dentry, just create the file */
+        if (dchild->d_inode) { 
+                up(&dir->i_sem);
+               if ((rec->ur_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) { 
+                        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+                        mds_pack_inode2body(body, dchild->d_inode);
+                        if (S_ISREG(dchild->d_inode->i_mode))
+                                rc = mds_pack_md(obd, req->rq_repmsg, 3, body,
+                                                 dchild->d_inode);
+                        if (rc == 0)
+                                rc = -EEXIST;
+                       GOTO(out_ldput, rc);
+                }
+        } else if ((rec->ur_flags & O_CREAT) && !dchild->d_inode) {
+                int err;
+                void *handle;
+                mds_start_transno(mds);
+                rep->lock_policy_res1 |= IT_OPEN_CREATE;
+                handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE);
+                if (IS_ERR(handle)) {
+                        rc = PTR_ERR(handle);
+                        mds_finish_transno(mds, handle, req, rc);
+                        up(&dir->i_sem);
+                        GOTO(out_ldput, rc);
+                }
+                rc = vfs_create(dir, dchild, rec->ur_mode);
+                up(&dir->i_sem);
+                rc = mds_finish_transno(mds, handle, req, rc);
+                err = fsfilt_commit(obd, dir, handle);
+                if (rc || err) {
+                        CERROR("error on commit: err = %d\n", err);
+                        if (!rc)
+                                rc = err;
+                        GOTO(out_ldput, rc);
+                }
+        } else if (!dchild->d_inode) {
+                up(&dir->i_sem);
+                GOTO(out_ldput, rc = 0);
+        } 
+
+        /*
+         * It already exists.
+         */
+        mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+        mds_pack_inode2body(body, dchild->d_inode);
+
+        if (!S_ISREG(dchild->d_inode->i_mode))
+                GOTO(out_ldput, rc = 0);
+
+        rc = mds_pack_md(obd, req->rq_repmsg, 3, body, dchild->d_inode);
+        if (rc) {
+                CERROR("failure to get EA for %ld\n", dchild->d_inode->i_ino);
+                GOTO(out_ldput, req->rq_status = rc);
+        }
+
+        rep->lock_policy_res1 |= IT_OPEN_OPEN;
+        mfd = kmem_cache_alloc(mds_file_cache, GFP_KERNEL);
+        if (!mfd) {
+                CERROR("mds: out of memory\n");
+                GOTO(out_ldput, req->rq_status = -ENOMEM);
+        }
+
+        flags = rec->ur_flags;
+        /* dentry_open does a dput(de) and mntput(mnt) on error */
+        mntget(mnt);
+        file = dentry_open(dchild, mnt, flags & ~O_DIRECT & ~O_TRUNC);
+        if (IS_ERR(file))
+                GOTO(out_unlock, req->rq_status = PTR_ERR(file));
+
+        file->private_data = mfd;
+        mfd->mfd_file = file;
+        get_random_bytes(&mfd->mfd_servercookie, sizeof(mfd->mfd_servercookie));
+        spin_lock(&med->med_open_lock);
+        list_add(&mfd->mfd_list, &med->med_open_head);
+        spin_unlock(&med->med_open_lock);
+
+ out_unlock:
+        l_dput(parent);
+        ldlm_lock_decref(&lockh, lock_mode);
+        if (rc && rc != -EEXIST && mfd != NULL) {
+                kmem_cache_free(mds_file_cache, mfd);
+                mfd = NULL;
+        }
+        if (rc)
+                RETURN(rc);
+
+ out_pack:
+        if (mfd) {
+                body->handle.addr = (__u64)(unsigned long)mfd;
+                body->handle.cookie = mfd->mfd_servercookie;
+                CDEBUG(D_INODE, "file %p: mfd %p, cookie "LPX64"\n",
+                       mfd->mfd_file, mfd, mfd->mfd_servercookie);
+        }
+        RETURN(0);
+
+ out_ldput:
+        l_dput(dchild);
+        goto out_unlock;
+}
diff --git a/lustre/tests/open_delay.c b/lustre/tests/open_delay.c
new file mode 100644 (file)
index 0000000..2f41884
--- /dev/null
@@ -0,0 +1,25 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_lite.h>
+#include <linux/obd_lov.h>
+
+int main(int argc, char **argv)
+{
+        int fd; 
+
+        if (argc != 2) { 
+                printf("Usage %s <filename>\n", argv[0]); 
+                exit(1);
+        }
+
+        fd = open(argv[1], O_RDONLY | O_LOV_DELAY_CREATE);
+        if (fd == -1) { 
+                printf("Error opening %s\n", argv[1]);
+                exit(1);
+        }
+
+        return 0;
+}