X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flvfs%2Flvfs_linux.c;h=a4538cc28c1375b38549c84824e44f197143c3f5;hp=935548e763f2c73382454492143b9789fda8e89a;hb=1e149bef8d832aade6c04b65b8308b71c6d523ed;hpb=0343ecb7de2dae4cf8016416bf1af5d34e9a746d diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 935548e..a4538cc 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -1,95 +1,132 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * lustre/lib/fsfilt_ext3.c - * Lustre filesystem abstraction routines + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Andreas Dilger + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * This file is part of Lustre, http://www.lustre.org. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/lvfs/lvfs_linux.c + * + * Author: Andreas Dilger */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_FILTER #include #include #include -#include #include #include #include #include -#include -#include -#include -#include +#include +#include +#include #include #include #include -#include -#include "lvfs_internal.h" - -#include -#include +#include -atomic_t obd_memory; -int obd_memmax; +#include +#include +struct lprocfs_stats *obd_memory = NULL; +EXPORT_SYMBOL(obd_memory); +/* refine later and change to seqlock or simlar from libcfs */ /* Debugging check only needed during development */ #ifdef OBD_CTXT_DEBUG # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC) -# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds())) -# define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds())) +# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\ + msg) +# define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg) #else # define ASSERT_CTXT_MAGIC(magic) do {} while(0) # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0) # define ASSERT_KERNEL_CTXT(msg) do {} while(0) #endif +static void push_group_info(struct lvfs_run_ctxt *save, + struct group_info *ginfo) +{ + if (!ginfo) { + save->ngroups = current_ngroups; + current_ngroups = 0; + } else { + struct cred *cred; + task_lock(current); + save->group_info = current_cred()->group_info; + if ((cred = prepare_creds())) { + cred->group_info = ginfo; + commit_creds(cred); + } + task_unlock(current); + } +} + +static void pop_group_info(struct lvfs_run_ctxt *save, + struct group_info *ginfo) +{ + if (!ginfo) { + current_ngroups = save->ngroups; + } else { + struct cred *cred; + task_lock(current); + if ((cred = prepare_creds())) { + cred->group_info = save->group_info; + commit_creds(cred); + } + task_unlock(current); + } +} + /* push / pop to root of obd store */ -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { + /* if there is underlaying dt_device then push_ctxt is not needed */ + if (new_ctx->dt != NULL) + return; + //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n"); ASSERT_CTXT_MAGIC(new_ctx->magic); OBD_SET_CTXT_MAGIC(save); - /* - CDEBUG(D_INFO, - "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", - save, current, current->fs, current->fs->pwd, - atomic_read(¤t->fs->pwd->d_count), - atomic_read(¤t->fs->pwd->d_inode->i_count), - current->fs->pwd->d_name.len, current->fs->pwd->d_name.name, - current->fs->pwdmnt, - atomic_read(¤t->fs->pwdmnt->mnt_count)); - */ - save->fs = get_fs(); - LASSERT(atomic_read(¤t->fs->pwd->d_count)); - LASSERT(atomic_read(&new_ctx->pwd->d_count)); - save->pwd = dget(current->fs->pwd); - save->pwdmnt = mntget(current->fs->pwdmnt); - save->ngroups = current->ngroups; + LASSERT(d_refcount(cfs_fs_pwd(current->fs))); + LASSERT(d_refcount(new_ctx->pwd)); + save->pwd = dget(cfs_fs_pwd(current->fs)); + save->pwdmnt = mntget(cfs_fs_mnt(current->fs)); + save->luc.luc_umask = cfs_curproc_umask(); + save->ngroups = current_cred()->group_info->ngroups; LASSERT(save->pwd); LASSERT(save->pwdmnt); @@ -97,290 +134,162 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, LASSERT(new_ctx->pwdmnt); if (uc) { - save->ouc.ouc_fsuid = current->fsuid; - save->ouc.ouc_fsgid = current->fsgid; - save->ouc.ouc_cap = current->cap_effective; - save->ouc.ouc_suppgid1 = current->groups[0]; - save->ouc.ouc_suppgid2 = current->groups[1]; - - current->fsuid = uc->ouc_fsuid; - current->fsgid = uc->ouc_fsgid; - current->cap_effective = uc->ouc_cap; - current->ngroups = 0; - - if (uc->ouc_suppgid1 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid1; - if (uc->ouc_suppgid2 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid2; + struct cred *cred; + save->luc.luc_uid = current_uid(); + save->luc.luc_gid = current_gid(); + save->luc.luc_fsuid = current_fsuid(); + save->luc.luc_fsgid = current_fsgid(); + save->luc.luc_cap = current_cap(); + + if ((cred = prepare_creds())) { + cred->uid = uc->luc_uid; + cred->gid = uc->luc_gid; + cred->fsuid = uc->luc_fsuid; + cred->fsgid = uc->luc_fsgid; + cred->cap_effective = uc->luc_cap; + commit_creds(cred); + } + + push_group_info(save, + uc->luc_ginfo ?: + uc->luc_identity ? uc->luc_identity->mi_ginfo : + NULL); } + current->fs->umask = 0; /* umask already applied on client */ set_fs(new_ctx->fs); - set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); - - /* - CDEBUG(D_INFO, - "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", - new_ctx, current, current->fs, current->fs->pwd, - atomic_read(¤t->fs->pwd->d_count), - atomic_read(¤t->fs->pwd->d_inode->i_count), - current->fs->pwd->d_name.len, current->fs->pwd->d_name.name, - current->fs->pwdmnt, - atomic_read(¤t->fs->pwdmnt->mnt_count)); - */ + ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); } EXPORT_SYMBOL(push_ctxt); -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { - //printk("pc0"); + /* if there is underlaying dt_device then pop_ctxt is not needed */ + if (new_ctx->dt != NULL) + return; + ASSERT_CTXT_MAGIC(saved->magic); - //printk("pc1"); ASSERT_KERNEL_CTXT("popping non-kernel context!\n"); - /* - CDEBUG(D_INFO, - " = pop %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", - new_ctx, current, current->fs, current->fs->pwd, - atomic_read(¤t->fs->pwd->d_count), - atomic_read(¤t->fs->pwd->d_inode->i_count), - current->fs->pwd->d_name.len, current->fs->pwd->d_name.name, - current->fs->pwdmnt, - atomic_read(¤t->fs->pwdmnt->mnt_count)); - */ - - LASSERT(current->fs->pwd == new_ctx->pwd); - LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt); + LASSERTF(cfs_fs_pwd(current->fs) == new_ctx->pwd, "%p != %p\n", + cfs_fs_pwd(current->fs), new_ctx->pwd); + LASSERTF(cfs_fs_mnt(current->fs) == new_ctx->pwdmnt, "%p != %p\n", + cfs_fs_mnt(current->fs), new_ctx->pwdmnt); set_fs(saved->fs); - set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd); + ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd); dput(saved->pwd); mntput(saved->pwdmnt); + current->fs->umask = saved->luc.luc_umask; if (uc) { - current->fsuid = saved->ouc.ouc_fsuid; - current->fsgid = saved->ouc.ouc_fsgid; - current->cap_effective = saved->ouc.ouc_cap; - current->ngroups = saved->ngroups; - current->groups[0] = saved->ouc.ouc_suppgid1; - current->groups[1] = saved->ouc.ouc_suppgid2; + struct cred *cred; + if ((cred = prepare_creds())) { + cred->uid = saved->luc.luc_uid; + cred->gid = saved->luc.luc_gid; + cred->fsuid = saved->luc.luc_fsuid; + cred->fsgid = saved->luc.luc_fsgid; + cred->cap_effective = saved->luc.luc_cap; + commit_creds(cred); + } + + pop_group_info(saved, + uc->luc_ginfo ?: + uc->luc_identity ? uc->luc_identity->mi_ginfo : + NULL); } - - /* - CDEBUG(D_INFO, - "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", - saved, current, current->fs, current->fs->pwd, - atomic_read(¤t->fs->pwd->d_count), - atomic_read(¤t->fs->pwd->d_inode->i_count), - current->fs->pwd->d_name.len, current->fs->pwd->d_name.name, - current->fs->pwdmnt, - atomic_read(¤t->fs->pwdmnt->mnt_count)); - */ } EXPORT_SYMBOL(pop_ctxt); -/* utility to make a file */ -struct dentry *simple_mknod(struct dentry *dir, char *name, int mode) -{ - struct dentry *dchild; - int err = 0; - ENTRY; - - ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); - CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name); - - dchild = ll_lookup_one_len(name, dir, strlen(name)); - if (IS_ERR(dchild)) - GOTO(out_up, dchild); - - if (dchild->d_inode) { - if (!S_ISREG(dchild->d_inode->i_mode)) - GOTO(out_err, err = -EEXIST); - - GOTO(out_up, dchild); - } - - err = ll_vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG, - NULL); - if (err) - GOTO(out_err, err); - - RETURN(dchild); - -out_err: - dput(dchild); - dchild = ERR_PTR(err); -out_up: - return dchild; -} -EXPORT_SYMBOL(simple_mknod); - -/* utility to make a directory */ -struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode) +/* utility to rename a file */ +int lustre_rename(struct dentry *dir, struct vfsmount *mnt, + char *oldname, char *newname) { - struct dentry *dchild; + struct dentry *dchild_old, *dchild_new; int err = 0; ENTRY; - ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); - CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name); - dchild = ll_lookup_one_len(name, dir, strlen(name)); - if (IS_ERR(dchild)) - GOTO(out_up, dchild); + ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n"); + CDEBUG(D_INODE, "renaming file %.*s to %.*s\n", + (int)strlen(oldname), oldname, (int)strlen(newname), newname); - if (dchild->d_inode) { - if (!S_ISDIR(dchild->d_inode->i_mode)) - GOTO(out_err, err = -ENOTDIR); + dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname)); + if (IS_ERR(dchild_old)) + RETURN(PTR_ERR(dchild_old)); - GOTO(out_up, dchild); - } + if (!dchild_old->d_inode) + GOTO(put_old, err = -ENOENT); - err = vfs_mkdir(dir->d_inode, dchild, mode); - if (err) - GOTO(out_err, err); + dchild_new = ll_lookup_one_len(newname, dir, strlen(newname)); + if (IS_ERR(dchild_new)) + GOTO(put_old, err = PTR_ERR(dchild_new)); - RETURN(dchild); + err = ll_vfs_rename(dir->d_inode, dchild_old, mnt, + dir->d_inode, dchild_new, mnt); -out_err: - dput(dchild); - dchild = ERR_PTR(err); -out_up: - return dchild; + dput(dchild_new); +put_old: + dput(dchild_old); + RETURN(err); } -EXPORT_SYMBOL(simple_mkdir); +EXPORT_SYMBOL(lustre_rename); -/* - * Read a file from within kernel context. Prior to calling this - * function we should already have done a push_ctxt(). - */ -int lustre_fread(struct file *file, void *buf, int len, loff_t *off) +/* Note: dput(dchild) will *not* be called if there is an error */ +struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de, + int flags) { - ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n"); - if (!file || !file->f_op || !file->f_op->read || !off) - RETURN(-ENOSYS); - - return file->f_op->read(file, buf, len, off); -} -EXPORT_SYMBOL(lustre_fread); - -/* - * Write a file from within kernel context. Prior to calling this - * function we should already have done a push_ctxt(). - */ -int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off) -{ - ENTRY; - ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n"); - if (!file) - RETURN(-ENOENT); - if (!file->f_op) - RETURN(-ENOSYS); - if (!off) - RETURN(-EINVAL); - - if (!file->f_op->write) - RETURN(-EROFS); - - RETURN(file->f_op->write(file, buf, len, off)); -} -EXPORT_SYMBOL(lustre_fwrite); - -/* - * Sync a file from within kernel context. Prior to calling this - * function we should already have done a push_ctxt(). - */ -int lustre_fsync(struct file *file) -{ - ENTRY; - ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n"); - if (!file || !file->f_op || !file->f_op->fsync) - RETURN(-ENOSYS); - - RETURN(file->f_op->fsync(file, file->f_dentry, 0)); -} -EXPORT_SYMBOL(lustre_fsync); - -struct l_file *l_dentry_open(struct obd_run_ctxt *ctxt, struct l_dentry *de, - int flags) -{ - mntget(ctxt->pwdmnt); - return dentry_open(de, ctxt->pwdmnt, flags); + struct path path = { + .dentry = de, + .mnt = ctxt->pwdmnt, + }; + return ll_dentry_open(&path, flags, current_cred()); } EXPORT_SYMBOL(l_dentry_open); -static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset, - ino_t ino, unsigned int d_type) +#ifdef LPROCFS +__s64 lprocfs_read_helper(struct lprocfs_counter *lc, + struct lprocfs_counter_header *header, + enum lprocfs_stats_flags flags, + enum lprocfs_fields_flags field) { - struct l_linux_dirent *dirent; - struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf; - - dirent = buf->lrc_dirent; - if (dirent) - dirent->lld_off = offset; - - OBD_ALLOC(dirent, sizeof(*dirent)); - - list_add_tail(&dirent->lld_list, buf->lrc_list); - - buf->lrc_dirent = dirent; - dirent->lld_ino = ino; - LASSERT(sizeof(dirent->lld_name) >= namlen + 1); - memcpy(dirent->lld_name, name, namlen); - - return 0; + __s64 ret = 0; + + if (lc == NULL || header == NULL) + RETURN(0); + + switch (field) { + case LPROCFS_FIELDS_FLAGS_CONFIG: + ret = header->lc_config; + break; + case LPROCFS_FIELDS_FLAGS_SUM: + ret = lc->lc_sum; + if ((flags & LPROCFS_STATS_FLAG_IRQ_SAFE) != 0) + ret += lc->lc_sum_irq; + break; + case LPROCFS_FIELDS_FLAGS_MIN: + ret = lc->lc_min; + break; + case LPROCFS_FIELDS_FLAGS_MAX: + ret = lc->lc_max; + break; + case LPROCFS_FIELDS_FLAGS_AVG: + ret = (lc->lc_max - lc->lc_min) / 2; + break; + case LPROCFS_FIELDS_FLAGS_SUMSQUARE: + ret = lc->lc_sumsquare; + break; + case LPROCFS_FIELDS_FLAGS_COUNT: + ret = lc->lc_count; + break; + default: + break; + }; + + RETURN(ret); } +EXPORT_SYMBOL(lprocfs_read_helper); +#endif /* LPROCFS */ -long l_readdir(struct file *file, struct list_head *dentry_list) -{ - struct l_linux_dirent *lastdirent; - struct l_readdir_callback buf; - int error; - - buf.lrc_dirent = NULL; - buf.lrc_list = dentry_list; - - error = vfs_readdir(file, l_filldir, &buf); - if (error < 0) - return error; - - lastdirent = buf.lrc_dirent; - if (lastdirent) - lastdirent->lld_off = file->f_pos; - - return 0; -} -EXPORT_SYMBOL(l_readdir); -EXPORT_SYMBOL(obd_memory); -EXPORT_SYMBOL(obd_memmax); - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -static int __init lvfs_linux_init(void) -{ - RETURN(0); -} - -static void __exit lvfs_linux_exit(void) -{ - int leaked; - ENTRY; - - leaked = atomic_read(&obd_memory); - CDEBUG(leaked ? D_ERROR : D_INFO, - "obd mem max: %d leaked: %d\n", obd_memmax, leaked); - - return; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_AUTHOR("Sun Microsystems, Inc. "); MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1"); MODULE_LICENSE("GPL"); - -module_init(lvfs_linux_init); -module_exit(lvfs_linux_exit); - -#else - -#warning "lvfs_linux_init() and fsfilt_ext3_exit() aren't called on 2.6. MUST be fixed" - - -#endif