Whamcloud - gitweb
b=11973
[fs/lustre-release.git] / lustre / lvfs / lvfs_linux.c
index 935548e..53147a4 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  lustre/lib/fsfilt_ext3.c
+ *  lustre/lib/lvfs_linux.c
  *  Lustre filesystem abstraction routines
  *
  *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
 #include <linux/version.h>
 #include <linux/fs.h>
 #include <asm/unistd.h>
-#include <linux/jbd.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/version.h>
-#include <linux/kp30.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
+#include <libcfs/kp30.h>
+#include <lustre_fsfilt.h>
+#include <obd.h>
+#include <obd_class.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/lustre_compat25.h>
-#include <linux/lvfs.h>
+#include <lvfs.h>
 #include "lvfs_internal.h"
 
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
+#include <obd.h>
+#include <lustre_lib.h>
+#include <lustre_quota.h>
 
 atomic_t obd_memory;
 int obd_memmax;
 
-
 /* Debugging check only needed during development */
 #ifdef OBD_CTXT_DEBUG
 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
-# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds()))
-# define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds()))
+# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
+                                              msg)
+# define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
 #else
 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
 #endif
 
+static void push_group_info(struct lvfs_run_ctxt *save,
+                            struct upcall_cache_entry *uce)
+{
+        struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
+
+        if (!ginfo) {
+                save->ngroups = current_ngroups;
+                current_ngroups = 0;
+        } else {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+                task_lock(current);
+                save->group_info = current->group_info;
+                current->group_info = ginfo;
+                task_unlock(current);
+#else
+                LASSERT(ginfo->ngroups <= NGROUPS);
+                LASSERT(current->ngroups <= NGROUPS_SMALL);
+                /* save old */
+                save->group_info.ngroups = current->ngroups;
+                if (current->ngroups)
+                        memcpy(save->group_info.small_block, current->groups,
+                               current->ngroups * sizeof(gid_t));
+                /* push new */
+                current->ngroups = ginfo->ngroups;
+                if (ginfo->ngroups)
+                        memcpy(current->groups, ginfo->small_block,
+                               current->ngroups * sizeof(gid_t));
+#endif
+        }
+}
+
+static void pop_group_info(struct lvfs_run_ctxt *save,
+                           struct upcall_cache_entry *uce)
+{
+        struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
+
+        if (!ginfo) {
+                current_ngroups = save->ngroups;
+        } else {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+                task_lock(current);
+                current->group_info = save->group_info;
+                task_unlock(current);
+#else
+                current->ngroups = save->group_info.ngroups;
+                if (current->ngroups)
+                        memcpy(current->groups, save->group_info.small_block,
+                               current->ngroups * sizeof(gid_t));
+#endif
+        }
+}
+
 /* push / pop to root of obd store */
-void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
-               struct obd_ucred *uc)
+void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
+               struct lvfs_ucred *uc)
 {
         //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
         ASSERT_CTXT_MAGIC(new_ctx->magic);
@@ -75,7 +127,7 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
 
         /*
         CDEBUG(D_INFO,
-               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
                save, current, current->fs, current->fs->pwd,
                atomic_read(&current->fs->pwd->d_count),
                atomic_read(&current->fs->pwd->d_inode->i_count),
@@ -89,7 +141,7 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
         LASSERT(atomic_read(&new_ctx->pwd->d_count));
         save->pwd = dget(current->fs->pwd);
         save->pwdmnt = mntget(current->fs->pwdmnt);
-        save->ngroups = current->ngroups;
+        save->luc.luc_umask = current->fs->umask;
 
         LASSERT(save->pwd);
         LASSERT(save->pwdmnt);
@@ -97,28 +149,22 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
         LASSERT(new_ctx->pwdmnt);
 
         if (uc) {
-                save->ouc.ouc_fsuid = current->fsuid;
-                save->ouc.ouc_fsgid = current->fsgid;
-                save->ouc.ouc_cap = current->cap_effective;
-                save->ouc.ouc_suppgid1 = current->groups[0];
-                save->ouc.ouc_suppgid2 = current->groups[1];
-
-                current->fsuid = uc->ouc_fsuid;
-                current->fsgid = uc->ouc_fsgid;
-                current->cap_effective = uc->ouc_cap;
-                current->ngroups = 0;
-
-                if (uc->ouc_suppgid1 != -1)
-                        current->groups[current->ngroups++] = uc->ouc_suppgid1;
-                if (uc->ouc_suppgid2 != -1)
-                        current->groups[current->ngroups++] = uc->ouc_suppgid2;
+                save->luc.luc_fsuid = current->fsuid;
+                save->luc.luc_fsgid = current->fsgid;
+                save->luc.luc_cap = current->cap_effective;
+
+                current->fsuid = uc->luc_fsuid;
+                current->fsgid = uc->luc_fsgid;
+                current->cap_effective = uc->luc_cap;
+                push_group_info(save, uc->luc_uce);
         }
+        current->fs->umask = 0; /* umask already applied on client */
         set_fs(new_ctx->fs);
-        set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
+        ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
 
         /*
         CDEBUG(D_INFO,
-               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
                new_ctx, current, current->fs, current->fs->pwd,
                atomic_read(&current->fs->pwd->d_count),
                atomic_read(&current->fs->pwd->d_inode->i_count),
@@ -129,8 +175,8 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
 }
 EXPORT_SYMBOL(push_ctxt);
 
-void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
-              struct obd_ucred *uc)
+void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
+              struct lvfs_ucred *uc)
 {
         //printk("pc0");
         ASSERT_CTXT_MAGIC(saved->magic);
@@ -139,7 +185,7 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
 
         /*
         CDEBUG(D_INFO,
-               " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+               " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
                new_ctx, current, current->fs, current->fs->pwd,
                atomic_read(&current->fs->pwd->d_count),
                atomic_read(&current->fs->pwd->d_inode->i_count),
@@ -148,26 +194,27 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
                atomic_read(&current->fs->pwdmnt->mnt_count));
         */
 
-        LASSERT(current->fs->pwd == new_ctx->pwd);
-        LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
+        LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n",
+                 current->fs->pwd, new_ctx->pwd);
+        LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n",
+                 current->fs->pwdmnt, new_ctx->pwdmnt);
 
         set_fs(saved->fs);
-        set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
+        ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
 
         dput(saved->pwd);
         mntput(saved->pwdmnt);
+        current->fs->umask = saved->luc.luc_umask;
         if (uc) {
-                current->fsuid = saved->ouc.ouc_fsuid;
-                current->fsgid = saved->ouc.ouc_fsgid;
-                current->cap_effective = saved->ouc.ouc_cap;
-                current->ngroups = saved->ngroups;
-                current->groups[0] = saved->ouc.ouc_suppgid1;
-                current->groups[1] = saved->ouc.ouc_suppgid2;
+                current->fsuid = saved->luc.luc_fsuid;
+                current->fsgid = saved->luc.luc_fsgid;
+                current->cap_effective = saved->luc.luc_cap;
+                pop_group_info(saved, uc->luc_uce);
         }
 
         /*
         CDEBUG(D_INFO,
-               "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+               "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
                saved, current, current->fs, current->fs->pwd,
                atomic_read(&current->fs->pwd->d_count),
                atomic_read(&current->fs->pwd->d_inode->i_count),
@@ -179,23 +226,32 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
 EXPORT_SYMBOL(pop_ctxt);
 
 /* utility to make a file */
-struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
+struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
 {
         struct dentry *dchild;
         int err = 0;
         ENTRY;
 
         ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
-        CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
+        CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
 
         dchild = ll_lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
 
         if (dchild->d_inode) {
-                if (!S_ISREG(dchild->d_inode->i_mode))
+                int old_mode = dchild->d_inode->i_mode;
+                if (!S_ISREG(old_mode))
                         GOTO(out_err, err = -EEXIST);
 
+                /* Fixup file permissions if necessary */
+                if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
+                        CWARN("fixing permissions on %s from %o to %o\n",
+                              name, old_mode, mode);
+                        dchild->d_inode->i_mode = (mode & S_IALLUGO) |
+                                                  (old_mode & ~S_IALLUGO);
+                        mark_inode_dirty(dchild->d_inode);
+                }
                 GOTO(out_up, dchild);
         }
 
@@ -215,22 +271,36 @@ out_up:
 EXPORT_SYMBOL(simple_mknod);
 
 /* utility to make a directory */
-struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
+struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
 {
         struct dentry *dchild;
         int err = 0;
         ENTRY;
 
         ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
-        CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
+        CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
         dchild = ll_lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
                 GOTO(out_up, dchild);
 
         if (dchild->d_inode) {
-                if (!S_ISDIR(dchild->d_inode->i_mode))
+                int old_mode = dchild->d_inode->i_mode;
+                if (!S_ISDIR(old_mode)) {
+                        CERROR("found %s (%lu/%u) is mode %o\n", name,
+                               dchild->d_inode->i_ino,
+                               dchild->d_inode->i_generation, old_mode);
                         GOTO(out_err, err = -ENOTDIR);
-
+                }
+
+                /* Fixup directory permissions if necessary */
+                if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
+                        CDEBUG(D_CONFIG, 
+                               "fixing permissions on %s from %o to %o\n",
+                               name, old_mode, mode);
+                        dchild->d_inode->i_mode = (mode & S_IALLUGO) |
+                                                  (old_mode & ~S_IALLUGO);
+                        mark_inode_dirty(dchild->d_inode);
+                }
                 GOTO(out_up, dchild);
         }
 
@@ -248,6 +318,37 @@ out_up:
 }
 EXPORT_SYMBOL(simple_mkdir);
 
+/* utility to rename a file */
+int lustre_rename(struct dentry *dir, char *oldname, char *newname)
+{
+        struct dentry *dchild_old, *dchild_new;
+        int err = 0;
+        ENTRY;
+
+        ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
+        CDEBUG(D_INODE, "renaming file %.*s to %.*s\n", 
+               (int)strlen(oldname), oldname, (int)strlen(newname), newname);
+
+        dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
+        if (IS_ERR(dchild_old))
+                RETURN(PTR_ERR(dchild_old));
+
+        if (!dchild_old->d_inode) 
+                GOTO(put_old, err = -ENOENT);
+
+        dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
+        if (IS_ERR(dchild_new))
+                GOTO(put_old, err = PTR_ERR(dchild_new));
+
+        err = vfs_rename(dir->d_inode, dchild_old, dir->d_inode, dchild_new);
+
+        dput(dchild_new);
+put_old:
+        dput(dchild_old);
+        RETURN(err);
+}
+EXPORT_SYMBOL(lustre_rename);
+
 /*
  * Read a file from within kernel context.  Prior to calling this
  * function we should already have done a push_ctxt().
@@ -299,7 +400,7 @@ int lustre_fsync(struct file *file)
 }
 EXPORT_SYMBOL(lustre_fsync);
 
-struct l_file *l_dentry_open(struct obd_run_ctxt *ctxt, struct l_dentry *de,
+struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
                              int flags)
 {
         mntget(ctxt->pwdmnt);
@@ -307,18 +408,26 @@ struct l_file *l_dentry_open(struct obd_run_ctxt *ctxt, struct l_dentry *de,
 }
 EXPORT_SYMBOL(l_dentry_open);
 
+#ifdef HAVE_VFS_READDIR_U64_INO
+static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
+                     u64 ino, unsigned int d_type)
+#else
 static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
                      ino_t ino, unsigned int d_type)
+#endif
 {
         struct l_linux_dirent *dirent;
         struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
-        
+
         dirent = buf->lrc_dirent;
         if (dirent)
-               dirent->lld_off = offset; 
+               dirent->lld_off = offset;
 
         OBD_ALLOC(dirent, sizeof(*dirent));
 
+        if (!dirent)
+                return -ENOMEM;
+
         list_add_tail(&dirent->lld_list, buf->lrc_list);
 
         buf->lrc_dirent = dirent;
@@ -352,7 +461,54 @@ EXPORT_SYMBOL(l_readdir);
 EXPORT_SYMBOL(obd_memory);
 EXPORT_SYMBOL(obd_memmax);
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#ifdef LUSTRE_KERNEL_VERSION
+#ifndef HAVE_CLEAR_RDONLY_ON_PUT
+#error rdonly patchset must be updated [cfs bz11248]
+#endif
+
+void dev_set_rdonly(lvfs_sbdev_type dev);
+int dev_check_rdonly(lvfs_sbdev_type dev);
+
+void __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
+{
+        lvfs_sbdev_sync(dev);
+        if (jdev && (jdev != dev)) {
+                CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
+                       (long)jdev);
+                dev_set_rdonly(jdev);
+        }
+        CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
+        dev_set_rdonly(dev);
+}
+
+int lvfs_check_rdonly(lvfs_sbdev_type dev)
+{
+        return dev_check_rdonly(dev);
+}
+
+EXPORT_SYMBOL(__lvfs_set_rdonly);
+EXPORT_SYMBOL(lvfs_check_rdonly);
+#endif /* LUSTRE_KERNEL_VERSION */
+
+int lvfs_check_io_health(struct obd_device *obd, struct file *file)
+{
+        char *write_page = NULL;
+        loff_t offset = 0;
+        int rc = 0;
+        ENTRY;
+
+        OBD_ALLOC(write_page, CFS_PAGE_SIZE);
+        if (!write_page)
+                RETURN(-ENOMEM);
+        
+        rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
+       
+        OBD_FREE(write_page, CFS_PAGE_SIZE);
+
+        CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
+        RETURN(rc); 
+}
+EXPORT_SYMBOL(lvfs_check_io_health);
 
 static int __init lvfs_linux_init(void)
 {
@@ -368,6 +524,7 @@ static void __exit lvfs_linux_exit(void)
         CDEBUG(leaked ? D_ERROR : D_INFO,
                "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
 
+        EXIT;
         return;
 }
 
@@ -377,10 +534,3 @@ MODULE_LICENSE("GPL");
 
 module_init(lvfs_linux_init);
 module_exit(lvfs_linux_exit);
-
-#else
-
-#warning "lvfs_linux_init() and fsfilt_ext3_exit() aren't called on 2.6. MUST be fixed"
-
-
-#endif