X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Flvfs%2Flvfs_linux.c;h=8dbf9e194f3c450ed37dbef9db2387fc4aad719b;hb=19d9d3d7b5eb88aa7de74a0ad918419e2acbd5c2;hp=112a1ad6ed4f621bdd6c430ec6617052d50dfcfd;hpb=4161eaece092a47b24fe2da5fcb3b19332ccd2ea;p=fs%2Flustre-release.git diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 112a1ad..8dbf9e1 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -1,26 +1,41 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * lustre/lib/lvfs_linux.c - * Lustre filesystem abstraction routines + * GPL HEADER START * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Andreas Dilger + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * This file is part of Lustre, http://www.lustre.org. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/lvfs/lvfs_linux.c + * + * Author: Andreas Dilger */ #ifndef EXPORT_SYMTAB @@ -36,22 +51,24 @@ #include #include #include -#include -#include -#include -#include +#include +#include +#include #include #include #include -#include +#include #include "lvfs_internal.h" -#include -#include -#include /* for mds_grp_hash_entry */ +#include +#include +#include -atomic_t obd_memory; -int obd_memmax; +__u64 obd_max_pages = 0; +__u64 obd_max_alloc = 0; +struct lprocfs_stats *obd_memory = NULL; +spinlock_t obd_updatemax_lock = SPIN_LOCK_UNLOCKED; +/* refine later and change to seqlock or simlar from libcfs */ /* Debugging check only needed during development */ #ifdef OBD_CTXT_DEBUG @@ -59,7 +76,6 @@ int obd_memmax; # define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\ msg) # define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg) - #else # define ASSERT_CTXT_MAGIC(magic) do {} while(0) # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0) @@ -73,24 +89,10 @@ static void push_group_info(struct lvfs_run_ctxt *save, save->ngroups = current_ngroups; current_ngroups = 0; } else { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) task_lock(current); save->group_info = current->group_info; current->group_info = ginfo; task_unlock(current); -#else - LASSERT(ginfo->ngroups <= NGROUPS); - /* save old */ - save->group_info.ngroups = current->ngroups; - if (current->ngroups) - memcpy(save->group_info.small_block, current->groups, - current->ngroups); - /* push new */ - current->ngroups = ginfo->ngroups; - if (ginfo->ngroups) - memcpy(current->groups, ginfo->small_block, - current->ngroups); -#endif } } @@ -100,16 +102,9 @@ static void pop_group_info(struct lvfs_run_ctxt *save, if (!ginfo) { current_ngroups = save->ngroups; } else { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) task_lock(current); current->group_info = save->group_info; task_unlock(current); -#else - current->ngroups = ginfo->ngroups; - if (current->ngroups) - memcpy(current->groups, save->group_info.small_block, - current->ngroups); -#endif } } @@ -119,9 +114,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, { //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n"); ASSERT_CTXT_MAGIC(new_ctx->magic); - LASSERT(save->magic != OBD_RUN_CTXT_MAGIC || save->pid != current->pid); OBD_SET_CTXT_MAGIC(save); - save->pid = current->pid; /* CDEBUG(D_INFO, @@ -139,8 +132,8 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, LASSERT(atomic_read(&new_ctx->pwd->d_count)); save->pwd = dget(current->fs->pwd); save->pwdmnt = mntget(current->fs->pwdmnt); - save->ngroups = current_ngroups; save->luc.luc_umask = current->fs->umask; + save->ngroups = current->group_info->ngroups; LASSERT(save->pwd); LASSERT(save->pwdmnt); @@ -160,11 +153,14 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, current->fsgid = uc->luc_fsgid; current->cap_effective = uc->luc_cap; - push_group_info(save, uc->luc_ginfo); + push_group_info(save, + uc->luc_ginfo ?: + uc->luc_identity ? uc->luc_identity->mi_ginfo : + NULL); } current->fs->umask = 0; /* umask already applied on client */ set_fs(new_ctx->fs); - set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); + ll_set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); /* CDEBUG(D_INFO, @@ -184,9 +180,6 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, { //printk("pc0"); ASSERT_CTXT_MAGIC(saved->magic); - LASSERT(saved->pid == current->pid); - saved->magic = 0; - saved->pid = 0; //printk("pc1"); ASSERT_KERNEL_CTXT("popping non-kernel context!\n"); @@ -201,11 +194,13 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, atomic_read(¤t->fs->pwdmnt->mnt_count)); */ - LASSERT(current->fs->pwd == new_ctx->pwd); - LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt); + LASSERTF(current->fs->pwd == new_ctx->pwd, "%p != %p\n", + current->fs->pwd, new_ctx->pwd); + LASSERTF(current->fs->pwdmnt == new_ctx->pwdmnt, "%p != %p\n", + current->fs->pwdmnt, new_ctx->pwdmnt); set_fs(saved->fs); - set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd); + ll_set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd); dput(saved->pwd); mntput(saved->pwdmnt); @@ -216,8 +211,10 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, current->fsuid = saved->luc.luc_fsuid; current->fsgid = saved->luc.luc_fsgid; current->cap_effective = saved->luc.luc_cap; - - pop_group_info(saved, uc->luc_ginfo); + pop_group_info(saved, + uc->luc_ginfo ?: + uc->luc_identity ? uc->luc_identity->mi_ginfo : + NULL); } /* @@ -240,7 +237,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix) int err = 0; ENTRY; - ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); + // ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name); dchild = ll_lookup_one_len(name, dir, strlen(name)); @@ -279,13 +276,14 @@ out_up: EXPORT_SYMBOL(simple_mknod); /* utility to make a directory */ -struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix) +struct dentry *simple_mkdir(struct dentry *dir, struct vfsmount *mnt, + char *name, int mode, int fix) { struct dentry *dchild; int err = 0; ENTRY; - ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); + // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name); dchild = ll_lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) @@ -302,8 +300,9 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix) /* Fixup directory permissions if necessary */ if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { - CWARN("fixing permissions on %s from %o to %o\n", - name, old_mode, mode); + CDEBUG(D_CONFIG, + "fixing permissions on %s from %o to %o\n", + name, old_mode, mode); dchild->d_inode->i_mode = (mode & S_IALLUGO) | (old_mode & ~S_IALLUGO); mark_inode_dirty(dchild->d_inode); @@ -311,7 +310,7 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix) GOTO(out_up, dchild); } - err = vfs_mkdir(dir->d_inode, dchild, mode); + err = ll_vfs_mkdir(dir->d_inode, dchild, mnt, mode); if (err) GOTO(out_err, err); @@ -325,6 +324,39 @@ out_up: } EXPORT_SYMBOL(simple_mkdir); +/* utility to rename a file */ +int lustre_rename(struct dentry *dir, struct vfsmount *mnt, + char *oldname, char *newname) +{ + struct dentry *dchild_old, *dchild_new; + int err = 0; + ENTRY; + + ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n"); + CDEBUG(D_INODE, "renaming file %.*s to %.*s\n", + (int)strlen(oldname), oldname, (int)strlen(newname), newname); + + dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname)); + if (IS_ERR(dchild_old)) + RETURN(PTR_ERR(dchild_old)); + + if (!dchild_old->d_inode) + GOTO(put_old, err = -ENOENT); + + dchild_new = ll_lookup_one_len(newname, dir, strlen(newname)); + if (IS_ERR(dchild_new)) + GOTO(put_old, err = PTR_ERR(dchild_new)); + + err = ll_vfs_rename(dir->d_inode, dchild_old, mnt, + dir->d_inode, dchild_new, mnt); + + dput(dchild_new); +put_old: + dput(dchild_old); + RETURN(err); +} +EXPORT_SYMBOL(lustre_rename); + /* * Read a file from within kernel context. Prior to calling this * function we should already have done a push_ctxt(). @@ -384,18 +416,26 @@ struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de, } EXPORT_SYMBOL(l_dentry_open); +#ifdef HAVE_VFS_READDIR_U64_INO +static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset, + u64 ino, unsigned int d_type) +#else static int l_filldir(void *__buf, const char *name, int namlen, loff_t offset, ino_t ino, unsigned int d_type) +#endif { struct l_linux_dirent *dirent; struct l_readdir_callback *buf = (struct l_readdir_callback *)__buf; - + dirent = buf->lrc_dirent; if (dirent) - dirent->lld_off = offset; + dirent->lld_off = offset; OBD_ALLOC(dirent, sizeof(*dirent)); + if (!dirent) + return -ENOMEM; + list_add_tail(&dirent->lld_list, buf->lrc_list); buf->lrc_dirent = dirent; @@ -413,7 +453,7 @@ long l_readdir(struct file *file, struct list_head *dentry_list) int error; buf.lrc_dirent = NULL; - buf.lrc_list = dentry_list; + buf.lrc_list = dentry_list; error = vfs_readdir(file, l_filldir, &buf); if (error < 0) @@ -423,218 +463,146 @@ long l_readdir(struct file *file, struct list_head *dentry_list) if (lastdirent) lastdirent->lld_off = file->f_pos; - return 0; + return 0; } EXPORT_SYMBOL(l_readdir); -EXPORT_SYMBOL(obd_memory); -EXPORT_SYMBOL(obd_memmax); - -#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) -static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED; -static struct hlist_head *obd_memtable; -static unsigned long obd_memtable_size; - -static int lvfs_memdbg_init(int size) -{ - struct hlist_head *head; - int i; - - LASSERT(size > sizeof(sizeof(struct hlist_head))); - obd_memtable_size = size / sizeof(struct hlist_head); - - CWARN("allocating %lu malloc entries...\n", - (unsigned long)obd_memtable_size); - - obd_memtable = kmalloc(size, GFP_KERNEL); - if (!obd_memtable) - return -ENOMEM; - i = obd_memtable_size; - head = obd_memtable; - do { - INIT_HLIST_HEAD(head); - head++; - i--; - } while(i); - - return 0; -} +#ifdef LUSTRE_KERNEL_VERSION +#ifndef HAVE_CLEAR_RDONLY_ON_PUT +#error rdonly patchset must be updated [cfs bz11248] +#endif +void dev_set_rdonly(lvfs_sbdev_type dev); +int dev_check_rdonly(lvfs_sbdev_type dev); -static int lvfs_memdbg_cleanup(void) +void __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev) { - struct hlist_node *node = NULL, *tmp = NULL; - struct hlist_head *head; - struct mem_track *mt; - int i; - - spin_lock(&obd_memlist_lock); - for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { - hlist_for_each_safe(node, tmp, head) { - mt = hlist_entry(node, struct mem_track, m_hash); - hlist_del_init(&mt->m_hash); - kfree(mt); - } + lvfs_sbdev_sync(dev); + if (jdev && (jdev != dev)) { + CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n", + (long)jdev); + dev_set_rdonly(jdev); } - spin_unlock(&obd_memlist_lock); - kfree(obd_memtable); - return 0; + CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev); + dev_set_rdonly(dev); } -static inline unsigned long const hashfn(void *ptr) +int lvfs_check_rdonly(lvfs_sbdev_type dev) { - return (unsigned long)ptr & - (obd_memtable_size - 1); + return dev_check_rdonly(dev); } -static void __lvfs_memdbg_insert(struct mem_track *mt) -{ - struct hlist_head *head = obd_memtable + - hashfn(mt->m_ptr); - hlist_add_head(&mt->m_hash, head); -} +EXPORT_SYMBOL(__lvfs_set_rdonly); +EXPORT_SYMBOL(lvfs_check_rdonly); -void lvfs_memdbg_insert(struct mem_track *mt) +int lvfs_check_io_health(struct obd_device *obd, struct file *file) { - spin_lock(&obd_memlist_lock); - __lvfs_memdbg_insert(mt); - spin_unlock(&obd_memlist_lock); -} -EXPORT_SYMBOL(lvfs_memdbg_insert); - -static void __lvfs_memdbg_remove(struct mem_track *mt) -{ - hlist_del_init(&mt->m_hash); -} + char *write_page = NULL; + loff_t offset = 0; + int rc = 0; + ENTRY; -void lvfs_memdbg_remove(struct mem_track *mt) -{ - spin_lock(&obd_memlist_lock); - __lvfs_memdbg_remove(mt); - spin_unlock(&obd_memlist_lock); -} -EXPORT_SYMBOL(lvfs_memdbg_remove); + OBD_ALLOC(write_page, CFS_PAGE_SIZE); + if (!write_page) + RETURN(-ENOMEM); -static struct mem_track *__lvfs_memdbg_find(void *ptr) -{ - struct hlist_node *node = NULL; - struct mem_track *mt = NULL; - struct hlist_head *head; + rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1); - head = obd_memtable + hashfn(ptr); + OBD_FREE(write_page, CFS_PAGE_SIZE); - hlist_for_each(node, head) { - mt = hlist_entry(node, struct mem_track, m_hash); - if ((unsigned long)mt->m_ptr == (unsigned long)ptr) - break; - mt = NULL; - } - return mt; + CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc); + RETURN(rc); } +EXPORT_SYMBOL(lvfs_check_io_health); +#endif /* LUSTRE_KERNEL_VERSION */ -struct mem_track *lvfs_memdbg_find(void *ptr) +void obd_update_maxusage() { - struct mem_track *mt; + __u64 max1, max2; + + max1 = obd_pages_sum(); + max2 = obd_memory_sum(); - spin_lock(&obd_memlist_lock); - mt = __lvfs_memdbg_find(ptr); - spin_unlock(&obd_memlist_lock); + spin_lock(&obd_updatemax_lock); + if (max1 > obd_max_pages) + obd_max_pages = max1; + if (max2 > obd_max_alloc) + obd_max_alloc = max2; + spin_unlock(&obd_updatemax_lock); - return mt; } -EXPORT_SYMBOL(lvfs_memdbg_find); -int lvfs_memdbg_check_insert(struct mem_track *mt) +__u64 obd_memory_max(void) { - spin_lock(&obd_memlist_lock); - if (!__lvfs_memdbg_find(mt->m_ptr)) { - __lvfs_memdbg_insert(mt); - spin_unlock(&obd_memlist_lock); - return 1; - } - spin_unlock(&obd_memlist_lock); - return 0; -} -EXPORT_SYMBOL(lvfs_memdbg_check_insert); + __u64 ret; -struct mem_track * -lvfs_memdbg_check_remove(void *ptr) -{ - struct mem_track *mt; - - spin_lock(&obd_memlist_lock); - mt = __lvfs_memdbg_find(ptr); - if (mt) { - __lvfs_memdbg_remove(mt); - spin_unlock(&obd_memlist_lock); - return mt; - } - spin_unlock(&obd_memlist_lock); - return NULL; + spin_lock(&obd_updatemax_lock); + ret = obd_max_alloc; + spin_unlock(&obd_updatemax_lock); + + return ret; } -EXPORT_SYMBOL(lvfs_memdbg_check_remove); -#endif -void lvfs_memdbg_show(void) +__u64 obd_pages_max(void) { -#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) - struct hlist_node *node = NULL; - struct hlist_head *head; - struct mem_track *mt; -#endif - int leaked; - -#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) - int i; -#endif - - leaked = atomic_read(&obd_memory); + __u64 ret; - if (leaked > 0) { - CWARN("memory leaks detected (max %d, leaked %d)\n", - obd_memmax, leaked); + spin_lock(&obd_updatemax_lock); + ret = obd_max_pages; + spin_unlock(&obd_updatemax_lock); -#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) - spin_lock(&obd_memlist_lock); - for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { - hlist_for_each(node, head) { - mt = hlist_entry(node, struct mem_track, m_hash); - CWARN(" ptr: 0x%p, size: %d, src at \"%s\"\n", - mt->m_ptr, mt->m_size, mt->m_loc); - } - } - spin_unlock(&obd_memlist_lock); -#endif - } + return ret; } -EXPORT_SYMBOL(lvfs_memdbg_show); -static int __init lvfs_linux_init(void) -{ - ENTRY; -#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) - lvfs_memdbg_init(PAGE_SIZE); -#endif - lvfs_mount_list_init(); - RETURN(0); -} +EXPORT_SYMBOL(obd_update_maxusage); +EXPORT_SYMBOL(obd_pages_max); +EXPORT_SYMBOL(obd_memory_max); +EXPORT_SYMBOL(obd_memory); -static void __exit lvfs_linux_exit(void) +#ifdef LPROCFS +__s64 lprocfs_read_helper(struct lprocfs_counter *lc, + enum lprocfs_fields_flags field) { - ENTRY; - - lvfs_mount_list_cleanup(); - lvfs_memdbg_show(); + __s64 ret = 0; + int centry; -#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) - lvfs_memdbg_cleanup(); -#endif - EXIT; - return; + if (!lc) + RETURN(0); + do { + centry = atomic_read(&lc->lc_cntl.la_entry); + + switch (field) { + case LPROCFS_FIELDS_FLAGS_CONFIG: + ret = lc->lc_config; + break; + case LPROCFS_FIELDS_FLAGS_SUM: + ret = lc->lc_sum; + break; + case LPROCFS_FIELDS_FLAGS_MIN: + ret = lc->lc_min; + break; + case LPROCFS_FIELDS_FLAGS_MAX: + ret = lc->lc_max; + break; + case LPROCFS_FIELDS_FLAGS_AVG: + ret = (lc->lc_max - lc->lc_min)/2; + break; + case LPROCFS_FIELDS_FLAGS_SUMSQUARE: + ret = lc->lc_sumsquare; + break; + case LPROCFS_FIELDS_FLAGS_COUNT: + ret = lc->lc_count; + break; + default: + break; + }; + } while (centry != atomic_read(&lc->lc_cntl.la_entry) && + centry != atomic_read(&lc->lc_cntl.la_exit)); + + RETURN(ret); } +EXPORT_SYMBOL(lprocfs_read_helper); +#endif /* LPROCFS */ -MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_AUTHOR("Sun Microsystems, Inc. "); MODULE_DESCRIPTION("Lustre VFS Filesystem Helper v0.1"); MODULE_LICENSE("GPL"); - -module_init(lvfs_linux_init); -module_exit(lvfs_linux_exit);