Whamcloud - gitweb
LU-2158 lvfs: move obdclass related functions to obclass
[fs/lustre-release.git] / lustre / lvfs / lvfs_linux.c
index 2855b52..a4f9340 100644 (file)
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
  *
- *  lustre/lib/fsfilt_ext3.c
- *  Lustre filesystem abstraction routines
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
- *   Author: Andreas Dilger <adilger@clusterfs.com>
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/lvfs/lvfs_linux.c
+ *
+ * Author: Andreas Dilger <adilger@clusterfs.com>
  */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 
 #define DEBUG_SUBSYSTEM S_FILTER
 
 #include <linux/version.h>
 #include <linux/fs.h>
 #include <asm/unistd.h>
-#include <linux/jbd.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
 #include <linux/version.h>
-#include <linux/kp30.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
+#include <libcfs/libcfs.h>
+#include <lustre_fsfilt.h>
+#include <obd.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/lustre_compat25.h>
-#include <linux/lvfs.h>
-#include "lvfs_internal.h"
+#include <lvfs.h>
 
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
-
-atomic_t obd_memory;
-int obd_memmax;
+#include <obd.h>
+#include <lustre_lib.h>
 
+struct lprocfs_stats *obd_memory = NULL;
+EXPORT_SYMBOL(obd_memory);
+/* refine later and change to seqlock or simlar from libcfs */
 
 /* Debugging check only needed during development */
 #ifdef OBD_CTXT_DEBUG
 # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
-# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds()))
-# define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds()))
+# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\
+                                              msg)
+# define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg)
 #else
 # define ASSERT_CTXT_MAGIC(magic) do {} while(0)
 # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
 # define ASSERT_KERNEL_CTXT(msg) do {} while(0)
 #endif
 
+static void push_group_info(struct lvfs_run_ctxt *save,
+                            struct group_info *ginfo)
+{
+        if (!ginfo) {
+                save->ngroups = current_ngroups;
+                current_ngroups = 0;
+        } else {
+                struct cred *cred;
+                task_lock(current);
+                save->group_info = current_cred()->group_info;
+                if ((cred = prepare_creds())) {
+                        cred->group_info = ginfo;
+                        commit_creds(cred);
+                }
+                task_unlock(current);
+        }
+}
+
+static void pop_group_info(struct lvfs_run_ctxt *save,
+                           struct group_info *ginfo)
+{
+        if (!ginfo) {
+                current_ngroups = save->ngroups;
+        } else {
+                struct cred *cred;
+                task_lock(current);
+                if ((cred = prepare_creds())) {
+                        cred->group_info = save->group_info;
+                        commit_creds(cred);
+                }
+                task_unlock(current);
+        }
+}
+
 /* push / pop to root of obd store */
-void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
-               struct obd_ucred *uc)
+void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
+               struct lvfs_ucred *uc)
 {
+       /* if there is underlaying dt_device then push_ctxt is not needed */
+       if (new_ctx->dt != NULL)
+               return;
+
         //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n");
         ASSERT_CTXT_MAGIC(new_ctx->magic);
         OBD_SET_CTXT_MAGIC(save);
 
-        /*
-        CDEBUG(D_INFO,
-               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
-               save, current, current->fs, current->fs->pwd,
-               atomic_read(&current->fs->pwd->d_count),
-               atomic_read(&current->fs->pwd->d_inode->i_count),
-               current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
-               current->fs->pwdmnt,
-               atomic_read(&current->fs->pwdmnt->mnt_count));
-        */
-
         save->fs = get_fs();
-        LASSERT(atomic_read(&current->fs->pwd->d_count));
-        LASSERT(atomic_read(&new_ctx->pwd->d_count));
-        save->pwd = dget(current->fs->pwd);
-        save->pwdmnt = mntget(current->fs->pwdmnt);
-        save->ngroups = current->ngroups;
+       LASSERT(d_refcount(cfs_fs_pwd(current->fs)));
+       LASSERT(d_refcount(new_ctx->pwd));
+        save->pwd = dget(cfs_fs_pwd(current->fs));
+        save->pwdmnt = mntget(cfs_fs_mnt(current->fs));
+        save->luc.luc_umask = cfs_curproc_umask();
+        save->ngroups = current_cred()->group_info->ngroups;
 
         LASSERT(save->pwd);
         LASSERT(save->pwdmnt);
@@ -97,281 +134,159 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
         LASSERT(new_ctx->pwdmnt);
 
         if (uc) {
-                save->ouc.ouc_fsuid = current->fsuid;
-                save->ouc.ouc_fsgid = current->fsgid;
-                save->ouc.ouc_cap = current->cap_effective;
-                save->ouc.ouc_suppgid1 = current->groups[0];
-                save->ouc.ouc_suppgid2 = current->groups[1];
-
-                current->fsuid = uc->ouc_fsuid;
-                current->fsgid = uc->ouc_fsgid;
-                current->cap_effective = uc->ouc_cap;
-                current->ngroups = 0;
-
-                if (uc->ouc_suppgid1 != -1)
-                        current->groups[current->ngroups++] = uc->ouc_suppgid1;
-                if (uc->ouc_suppgid2 != -1)
-                        current->groups[current->ngroups++] = uc->ouc_suppgid2;
+                struct cred *cred;
+                save->luc.luc_uid = current_uid();
+                save->luc.luc_gid = current_gid();
+                save->luc.luc_fsuid = current_fsuid();
+                save->luc.luc_fsgid = current_fsgid();
+                save->luc.luc_cap = current_cap();
+
+                if ((cred = prepare_creds())) {
+                        cred->uid = uc->luc_uid;
+                        cred->gid = uc->luc_gid;
+                        cred->fsuid = uc->luc_fsuid;
+                        cred->fsgid = uc->luc_fsgid;
+                        cred->cap_effective = uc->luc_cap;
+                        commit_creds(cred);
+                }
+
+                push_group_info(save,
+                                uc->luc_ginfo ?:
+                                uc->luc_identity ? uc->luc_identity->mi_ginfo :
+                                                   NULL);
         }
+        current->fs->umask = 0; /* umask already applied on client */
         set_fs(new_ctx->fs);
-        set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
-
-        /*
-        CDEBUG(D_INFO,
-               "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
-               new_ctx, current, current->fs, current->fs->pwd,
-               atomic_read(&current->fs->pwd->d_count),
-               atomic_read(&current->fs->pwd->d_inode->i_count),
-               current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
-               current->fs->pwdmnt,
-               atomic_read(&current->fs->pwdmnt->mnt_count));
-        */
+        ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
 }
 EXPORT_SYMBOL(push_ctxt);
 
-void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx,
-              struct obd_ucred *uc)
+void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
+              struct lvfs_ucred *uc)
 {
-        //printk("pc0");
+       /* if there is underlaying dt_device then pop_ctxt is not needed */
+       if (new_ctx->dt != NULL)
+               return;
+
         ASSERT_CTXT_MAGIC(saved->magic);
-        //printk("pc1");
         ASSERT_KERNEL_CTXT("popping non-kernel context!\n");
 
-        /*
-        CDEBUG(D_INFO,
-               " = pop  %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
-               new_ctx, current, current->fs, current->fs->pwd,
-               atomic_read(&current->fs->pwd->d_count),
-               atomic_read(&current->fs->pwd->d_inode->i_count),
-               current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
-               current->fs->pwdmnt,
-               atomic_read(&current->fs->pwdmnt->mnt_count));
-        */
-
-        LASSERT(current->fs->pwd == new_ctx->pwd);
-        LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt);
+        LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n",
+                 cfs_fs_pwd(current->fs), new_ctx->pwd);
+        LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n",
+                 cfs_fs_mnt(current->fs), new_ctx->pwdmnt);
 
         set_fs(saved->fs);
-        set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
+        ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd);
 
         dput(saved->pwd);
         mntput(saved->pwdmnt);
+        current->fs->umask = saved->luc.luc_umask;
         if (uc) {
-                current->fsuid = saved->ouc.ouc_fsuid;
-                current->fsgid = saved->ouc.ouc_fsgid;
-                current->cap_effective = saved->ouc.ouc_cap;
-                current->ngroups = saved->ngroups;
-                current->groups[0] = saved->ouc.ouc_suppgid1;
-                current->groups[1] = saved->ouc.ouc_suppgid2;
+                struct cred *cred;
+                if ((cred = prepare_creds())) {
+                        cred->uid = saved->luc.luc_uid;
+                        cred->gid = saved->luc.luc_gid;
+                        cred->fsuid = saved->luc.luc_fsuid;
+                        cred->fsgid = saved->luc.luc_fsgid;
+                        cred->cap_effective = saved->luc.luc_cap;
+                        commit_creds(cred);
+                }
+
+                pop_group_info(saved,
+                               uc->luc_ginfo ?:
+                               uc->luc_identity ? uc->luc_identity->mi_ginfo :
+                                                  NULL);
         }
-
-        /*
-        CDEBUG(D_INFO,
-               "= pop  %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
-               saved, current, current->fs, current->fs->pwd,
-               atomic_read(&current->fs->pwd->d_count),
-               atomic_read(&current->fs->pwd->d_inode->i_count),
-               current->fs->pwd->d_name.len, current->fs->pwd->d_name.name,
-               current->fs->pwdmnt,
-               atomic_read(&current->fs->pwdmnt->mnt_count));
-        */
 }
 EXPORT_SYMBOL(pop_ctxt);
 
-/* utility to make a file */
-struct dentry *simple_mknod(struct dentry *dir, char *name, int mode)
-{
-        struct dentry *dchild;
-        int err = 0;
-        ENTRY;
-
-        ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
-        CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
-
-        dchild = ll_lookup_one_len(name, dir, strlen(name));
-        if (IS_ERR(dchild))
-                GOTO(out_up, dchild);
-
-        if (dchild->d_inode) {
-                if (!S_ISREG(dchild->d_inode->i_mode))
-                        GOTO(out_err, err = -EEXIST);
-
-                GOTO(out_up, dchild);
-        }
-
-        err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG,
-                            NULL);
-        if (err)
-                GOTO(out_err, err);
-
-        RETURN(dchild);
-
-out_err:
-        dput(dchild);
-        dchild = ERR_PTR(err);
-out_up:
-        return dchild;
-}
-EXPORT_SYMBOL(simple_mknod);
-
-/* utility to make a directory */
-struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode)
+/* utility to rename a file */
+int lustre_rename(struct dentry *dir, struct vfsmount *mnt,
+                  char *oldname, char *newname)
 {
-        struct dentry *dchild;
+        struct dentry *dchild_old, *dchild_new;
         int err = 0;
         ENTRY;
 
-        ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
-        CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
-        dchild = ll_lookup_one_len(name, dir, strlen(name));
-        if (IS_ERR(dchild))
-                GOTO(out_up, dchild);
-
-        if (dchild->d_inode) {
-                if (!S_ISDIR(dchild->d_inode->i_mode))
-                        GOTO(out_err, err = -ENOTDIR);
-
-                GOTO(out_up, dchild);
-        }
-
-        err = vfs_mkdir(dir->d_inode, dchild, mode);
-        if (err)
-                GOTO(out_err, err);
-
-        RETURN(dchild);
-
-out_err:
-        dput(dchild);
-        dchild = ERR_PTR(err);
-out_up:
-        return dchild;
-}
-EXPORT_SYMBOL(simple_mkdir);
+        ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
+        CDEBUG(D_INODE, "renaming file %.*s to %.*s\n",
+               (int)strlen(oldname), oldname, (int)strlen(newname), newname);
 
-/*
- * Read a file from within kernel context.  Prior to calling this
- * function we should already have done a push_ctxt().
- */
-int lustre_fread(struct file *file, void *buf, int len, loff_t *off)
-{
-        ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n");
-        if (!file || !file->f_op || !file->f_op->read || !off)
-                RETURN(-ENOSYS);
+        dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
+        if (IS_ERR(dchild_old))
+                RETURN(PTR_ERR(dchild_old));
 
-        return file->f_op->read(file, buf, len, off);
-}
-EXPORT_SYMBOL(lustre_fread);
+        if (!dchild_old->d_inode)
+                GOTO(put_old, err = -ENOENT);
 
-/*
- * Write a file from within kernel context.  Prior to calling this
- * function we should already have done a push_ctxt().
- */
-int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off)
-{
-        ENTRY;
-        ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
-        if (!file)
-                RETURN(-ENOENT);
-        if (!file->f_op)
-                RETURN(-ENOSYS);
-        if (!off)
-                RETURN(-EINVAL);
-
-        if (!file->f_op->write)
-                RETURN(-EROFS);
-
-        RETURN(file->f_op->write(file, buf, len, off));
-}
-EXPORT_SYMBOL(lustre_fwrite);
+        dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
+        if (IS_ERR(dchild_new))
+                GOTO(put_old, err = PTR_ERR(dchild_new));
 
-/*
- * Sync a file from within kernel context.  Prior to calling this
- * function we should already have done a push_ctxt().
- */
-int lustre_fsync(struct file *file)
-{
-        ENTRY;
-        ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
-        if (!file || !file->f_op || !file->f_op->fsync)
-                RETURN(-ENOSYS);
+        err = ll_vfs_rename(dir->d_inode, dchild_old, mnt,
+                            dir->d_inode, dchild_new, mnt);
 
-        RETURN(file->f_op->fsync(file, file->f_dentry, 0));
+        dput(dchild_new);
+put_old:
+        dput(dchild_old);
+        RETURN(err);
 }
-EXPORT_SYMBOL(lustre_fsync);
+EXPORT_SYMBOL(lustre_rename);
 
-struct l_file *l_dentry_open(struct obd_run_ctxt *ctxt, struct l_dentry *de,
+/* Note: dput(dchild) will be called if there is an error */
+struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de,
                              int flags)
 {
         mntget(ctxt->pwdmnt);
-        return dentry_open(de, ctxt->pwdmnt, flags);
+        return ll_dentry_open(de, ctxt->pwdmnt, flags, current_cred());
 }
 EXPORT_SYMBOL(l_dentry_open);
 
-static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset,
-                     ino_t ino, unsigned int d_type)
-{
-        struct l_linux_dirent *dirent;
-        struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf;
-        
-        dirent = buf->lrc_dirent;
-        if (dirent)
-               dirent->lld_off = offset; 
-
-        OBD_ALLOC(dirent, sizeof(*dirent));
-
-        list_add_tail(&dirent->lld_list, buf->lrc_list);
-
-        buf->lrc_dirent = dirent;
-        dirent->lld_ino = ino;
-        LASSERT(sizeof(dirent->lld_name) >= namlen + 1);
-        memcpy(dirent->lld_name, name, namlen);
-
-        return 0;
-}
-
-long l_readdir(struct file *file, struct list_head *dentry_list)
-{
-        struct l_linux_dirent *lastdirent;
-        struct l_readdir_callback buf;
-        int error;
-
-        buf.lrc_dirent = NULL;
-        buf.lrc_list = dentry_list; 
-
-        error = vfs_readdir(file, l_filldir, &buf);
-        if (error < 0)
-                return error;
-
-        lastdirent = buf.lrc_dirent;
-        if (lastdirent)
-                lastdirent->lld_off = file->f_pos;
-
-        return 0; 
-}
-EXPORT_SYMBOL(l_readdir);
-EXPORT_SYMBOL(obd_memory);
-EXPORT_SYMBOL(obd_memmax);
-
-static int __init lvfs_linux_init(void)
-{
-        RETURN(0);
-}
-
-static void __exit lvfs_linux_exit(void)
+#ifdef LPROCFS
+__s64 lprocfs_read_helper(struct lprocfs_counter *lc,
+                         struct lprocfs_counter_header *header,
+                         enum lprocfs_stats_flags flags,
+                         enum lprocfs_fields_flags field)
 {
-        int leaked;
-        ENTRY;
-
-        leaked = atomic_read(&obd_memory);
-        CDEBUG(leaked ? D_ERROR : D_INFO,
-               "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
-
-        return;
+       __s64 ret = 0;
+
+       if (lc == NULL || header == NULL)
+               RETURN(0);
+
+       switch (field) {
+               case LPROCFS_FIELDS_FLAGS_CONFIG:
+                       ret = header->lc_config;
+                       break;
+               case LPROCFS_FIELDS_FLAGS_SUM:
+                       ret = lc->lc_sum;
+                       if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0)
+                               ret += lc->lc_sum_irq;
+                       break;
+               case LPROCFS_FIELDS_FLAGS_MIN:
+                       ret = lc->lc_min;
+                       break;
+               case LPROCFS_FIELDS_FLAGS_MAX:
+                       ret = lc->lc_max;
+                       break;
+               case LPROCFS_FIELDS_FLAGS_AVG:
+                       ret = (lc->lc_max - lc->lc_min) / 2;
+                       break;
+               case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
+                       ret = lc->lc_sumsquare;
+                       break;
+               case LPROCFS_FIELDS_FLAGS_COUNT:
+                       ret = lc->lc_count;
+                       break;
+               default:
+                       break;
+       };
+
+       RETURN(ret);
 }
+EXPORT_SYMBOL(lprocfs_read_helper);
+#endif /* LPROCFS */
 
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
 MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1");
 MODULE_LICENSE("GPL");
-
-module_init(lvfs_linux_init);
-module_exit(lvfs_linux_exit);