X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Flvfs%2Flvfs_linux.c;h=112a1ad6ed4f621bdd6c430ec6617052d50dfcfd;hb=4161eaece092a47b24fe2da5fcb3b19332ccd2ea;hp=7e34fce0081a90948227b6cc092a5363145c5323;hpb=164bf598e3e4f738cb31e8422904de6dc614a2f0;p=fs%2Flustre-release.git diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 7e34fce..112a1ad 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -1,7 +1,7 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * lustre/lib/fsfilt_ext3.c + * lustre/lib/lvfs_linux.c * Lustre filesystem abstraction routines * * Copyright (C) 2002, 2003 Cluster File Systems, Inc. @@ -32,12 +32,11 @@ #include #include #include -#include #include #include #include #include -#include +#include #include #include #include @@ -49,33 +48,84 @@ #include #include +#include /* for mds_grp_hash_entry */ atomic_t obd_memory; int obd_memmax; - /* Debugging check only needed during development */ #ifdef OBD_CTXT_DEBUG # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC) -# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds())) -# define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds())) +# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERTF(!segment_eq(get_fs(), get_ds()),\ + msg) +# define ASSERT_KERNEL_CTXT(msg) LASSERTF(segment_eq(get_fs(), get_ds()), msg) + #else # define ASSERT_CTXT_MAGIC(magic) do {} while(0) # define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0) # define ASSERT_KERNEL_CTXT(msg) do {} while(0) #endif +static void push_group_info(struct lvfs_run_ctxt *save, + struct group_info *ginfo) +{ + if (!ginfo) { + save->ngroups = current_ngroups; + current_ngroups = 0; + } else { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + task_lock(current); + save->group_info = current->group_info; + current->group_info = ginfo; + task_unlock(current); +#else + LASSERT(ginfo->ngroups <= NGROUPS); + /* save old */ + save->group_info.ngroups = current->ngroups; + if (current->ngroups) + memcpy(save->group_info.small_block, current->groups, + current->ngroups); + /* push new */ + current->ngroups = ginfo->ngroups; + if (ginfo->ngroups) + memcpy(current->groups, ginfo->small_block, + current->ngroups); +#endif + } +} + +static void pop_group_info(struct lvfs_run_ctxt *save, + struct group_info *ginfo) +{ + if (!ginfo) { + current_ngroups = save->ngroups; + } else { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + task_lock(current); + current->group_info = save->group_info; + task_unlock(current); +#else + current->ngroups = ginfo->ngroups; + if (current->ngroups) + memcpy(current->groups, save->group_info.small_block, + current->ngroups); +#endif + } +} + /* push / pop to root of obd store */ -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n"); ASSERT_CTXT_MAGIC(new_ctx->magic); + LASSERT(save->magic != OBD_RUN_CTXT_MAGIC || save->pid != current->pid); OBD_SET_CTXT_MAGIC(save); + save->pid = current->pid; /* CDEBUG(D_INFO, - "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", + "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n", save, current, current->fs, current->fs->pwd, atomic_read(¤t->fs->pwd->d_count), atomic_read(¤t->fs->pwd->d_inode->i_count), @@ -89,7 +139,8 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, LASSERT(atomic_read(&new_ctx->pwd->d_count)); save->pwd = dget(current->fs->pwd); save->pwdmnt = mntget(current->fs->pwdmnt); - save->ngroups = current->ngroups; + save->ngroups = current_ngroups; + save->luc.luc_umask = current->fs->umask; LASSERT(save->pwd); LASSERT(save->pwdmnt); @@ -97,28 +148,27 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, LASSERT(new_ctx->pwdmnt); if (uc) { - save->ouc.ouc_fsuid = current->fsuid; - save->ouc.ouc_fsgid = current->fsgid; - save->ouc.ouc_cap = current->cap_effective; - save->ouc.ouc_suppgid1 = current->groups[0]; - save->ouc.ouc_suppgid2 = current->groups[1]; - - current->fsuid = uc->ouc_fsuid; - current->fsgid = uc->ouc_fsgid; - current->cap_effective = uc->ouc_cap; - current->ngroups = 0; - - if (uc->ouc_suppgid1 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid1; - if (uc->ouc_suppgid2 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid2; + save->luc.luc_uid = current->uid; + save->luc.luc_gid = current->gid; + save->luc.luc_fsuid = current->fsuid; + save->luc.luc_fsgid = current->fsgid; + save->luc.luc_cap = current->cap_effective; + + current->uid = uc->luc_uid; + current->gid = uc->luc_gid; + current->fsuid = uc->luc_fsuid; + current->fsgid = uc->luc_fsgid; + current->cap_effective = uc->luc_cap; + + push_group_info(save, uc->luc_ginfo); } + current->fs->umask = 0; /* umask already applied on client */ set_fs(new_ctx->fs); set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); /* CDEBUG(D_INFO, - "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", + "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n", new_ctx, current, current->fs, current->fs->pwd, atomic_read(¤t->fs->pwd->d_count), atomic_read(¤t->fs->pwd->d_inode->i_count), @@ -129,17 +179,20 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, } EXPORT_SYMBOL(push_ctxt); -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { //printk("pc0"); ASSERT_CTXT_MAGIC(saved->magic); + LASSERT(saved->pid == current->pid); + saved->magic = 0; + saved->pid = 0; //printk("pc1"); ASSERT_KERNEL_CTXT("popping non-kernel context!\n"); /* CDEBUG(D_INFO, - " = pop %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", + " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n", new_ctx, current, current->fs, current->fs->pwd, atomic_read(¤t->fs->pwd->d_count), atomic_read(¤t->fs->pwd->d_inode->i_count), @@ -156,18 +209,20 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, dput(saved->pwd); mntput(saved->pwdmnt); + current->fs->umask = saved->luc.luc_umask; if (uc) { - current->fsuid = saved->ouc.ouc_fsuid; - current->fsgid = saved->ouc.ouc_fsgid; - current->cap_effective = saved->ouc.ouc_cap; - current->ngroups = saved->ngroups; - current->groups[0] = saved->ouc.ouc_suppgid1; - current->groups[1] = saved->ouc.ouc_suppgid2; + current->uid = saved->luc.luc_uid; + current->gid = saved->luc.luc_gid; + current->fsuid = saved->luc.luc_fsuid; + current->fsgid = saved->luc.luc_fsgid; + current->cap_effective = saved->luc.luc_cap; + + pop_group_info(saved, uc->luc_ginfo); } /* CDEBUG(D_INFO, - "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", + "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n", saved, current, current->fs, current->fs->pwd, atomic_read(¤t->fs->pwd->d_count), atomic_read(¤t->fs->pwd->d_inode->i_count), @@ -179,14 +234,14 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, EXPORT_SYMBOL(pop_ctxt); /* utility to make a file */ -struct dentry *simple_mknod(struct dentry *dir, char *name, int mode) +struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix) { struct dentry *dchild; int err = 0; ENTRY; ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); - CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name); + CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name); dchild = ll_lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) @@ -198,7 +253,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode) GOTO(out_err, err = -EEXIST); /* Fixup file permissions if necessary */ - if ((old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { + if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { CWARN("fixing permissions on %s from %o to %o\n", name, old_mode, mode); dchild->d_inode->i_mode = (mode & S_IALLUGO) | @@ -224,25 +279,29 @@ out_up: EXPORT_SYMBOL(simple_mknod); /* utility to make a directory */ -struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode) +struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix) { struct dentry *dchild; int err = 0; ENTRY; ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); - CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name); + CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name); dchild = ll_lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) GOTO(out_up, dchild); if (dchild->d_inode) { int old_mode = dchild->d_inode->i_mode; - if (!S_ISDIR(old_mode)) + if (!S_ISDIR(old_mode)) { + CERROR("found %s (%lu/%u) is mode %o\n", name, + dchild->d_inode->i_ino, + dchild->d_inode->i_generation, old_mode); GOTO(out_err, err = -ENOTDIR); + } /* Fixup directory permissions if necessary */ - if ((old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { + if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { CWARN("fixing permissions on %s from %o to %o\n", name, old_mode, mode); dchild->d_inode->i_mode = (mode & S_IALLUGO) | @@ -317,7 +376,7 @@ int lustre_fsync(struct file *file) } EXPORT_SYMBOL(lustre_fsync); -struct l_file *l_dentry_open(struct obd_run_ctxt *ctxt, struct l_dentry *de, +struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de, int flags) { mntget(ctxt->pwdmnt); @@ -370,20 +429,206 @@ EXPORT_SYMBOL(l_readdir); EXPORT_SYMBOL(obd_memory); EXPORT_SYMBOL(obd_memmax); +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) +static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED; +static struct hlist_head *obd_memtable; +static unsigned long obd_memtable_size; + +static int lvfs_memdbg_init(int size) +{ + struct hlist_head *head; + int i; + + LASSERT(size > sizeof(sizeof(struct hlist_head))); + obd_memtable_size = size / sizeof(struct hlist_head); + + CWARN("allocating %lu malloc entries...\n", + (unsigned long)obd_memtable_size); + + obd_memtable = kmalloc(size, GFP_KERNEL); + if (!obd_memtable) + return -ENOMEM; + + i = obd_memtable_size; + head = obd_memtable; + do { + INIT_HLIST_HEAD(head); + head++; + i--; + } while(i); + + return 0; +} + +static int lvfs_memdbg_cleanup(void) +{ + struct hlist_node *node = NULL, *tmp = NULL; + struct hlist_head *head; + struct mem_track *mt; + int i; + + spin_lock(&obd_memlist_lock); + for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { + hlist_for_each_safe(node, tmp, head) { + mt = hlist_entry(node, struct mem_track, m_hash); + hlist_del_init(&mt->m_hash); + kfree(mt); + } + } + spin_unlock(&obd_memlist_lock); + kfree(obd_memtable); + return 0; +} + +static inline unsigned long const hashfn(void *ptr) +{ + return (unsigned long)ptr & + (obd_memtable_size - 1); +} + +static void __lvfs_memdbg_insert(struct mem_track *mt) +{ + struct hlist_head *head = obd_memtable + + hashfn(mt->m_ptr); + hlist_add_head(&mt->m_hash, head); +} + +void lvfs_memdbg_insert(struct mem_track *mt) +{ + spin_lock(&obd_memlist_lock); + __lvfs_memdbg_insert(mt); + spin_unlock(&obd_memlist_lock); +} +EXPORT_SYMBOL(lvfs_memdbg_insert); + +static void __lvfs_memdbg_remove(struct mem_track *mt) +{ + hlist_del_init(&mt->m_hash); +} + +void lvfs_memdbg_remove(struct mem_track *mt) +{ + spin_lock(&obd_memlist_lock); + __lvfs_memdbg_remove(mt); + spin_unlock(&obd_memlist_lock); +} +EXPORT_SYMBOL(lvfs_memdbg_remove); + +static struct mem_track *__lvfs_memdbg_find(void *ptr) +{ + struct hlist_node *node = NULL; + struct mem_track *mt = NULL; + struct hlist_head *head; + + head = obd_memtable + hashfn(ptr); + + hlist_for_each(node, head) { + mt = hlist_entry(node, struct mem_track, m_hash); + if ((unsigned long)mt->m_ptr == (unsigned long)ptr) + break; + mt = NULL; + } + return mt; +} + +struct mem_track *lvfs_memdbg_find(void *ptr) +{ + struct mem_track *mt; + + spin_lock(&obd_memlist_lock); + mt = __lvfs_memdbg_find(ptr); + spin_unlock(&obd_memlist_lock); + + return mt; +} +EXPORT_SYMBOL(lvfs_memdbg_find); + +int lvfs_memdbg_check_insert(struct mem_track *mt) +{ + spin_lock(&obd_memlist_lock); + if (!__lvfs_memdbg_find(mt->m_ptr)) { + __lvfs_memdbg_insert(mt); + spin_unlock(&obd_memlist_lock); + return 1; + } + spin_unlock(&obd_memlist_lock); + return 0; +} +EXPORT_SYMBOL(lvfs_memdbg_check_insert); + +struct mem_track * +lvfs_memdbg_check_remove(void *ptr) +{ + struct mem_track *mt; + + spin_lock(&obd_memlist_lock); + mt = __lvfs_memdbg_find(ptr); + if (mt) { + __lvfs_memdbg_remove(mt); + spin_unlock(&obd_memlist_lock); + return mt; + } + spin_unlock(&obd_memlist_lock); + return NULL; +} +EXPORT_SYMBOL(lvfs_memdbg_check_remove); +#endif + +void lvfs_memdbg_show(void) +{ +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + struct hlist_node *node = NULL; + struct hlist_head *head; + struct mem_track *mt; +#endif + int leaked; + +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + int i; +#endif + + leaked = atomic_read(&obd_memory); + + if (leaked > 0) { + CWARN("memory leaks detected (max %d, leaked %d)\n", + obd_memmax, leaked); + +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + spin_lock(&obd_memlist_lock); + for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { + hlist_for_each(node, head) { + mt = hlist_entry(node, struct mem_track, m_hash); + CWARN(" ptr: 0x%p, size: %d, src at \"%s\"\n", + mt->m_ptr, mt->m_size, mt->m_loc); + } + } + spin_unlock(&obd_memlist_lock); +#endif + } +} +EXPORT_SYMBOL(lvfs_memdbg_show); + static int __init lvfs_linux_init(void) { + ENTRY; +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + lvfs_memdbg_init(PAGE_SIZE); +#endif + lvfs_mount_list_init(); RETURN(0); } static void __exit lvfs_linux_exit(void) { - int leaked; ENTRY; - leaked = atomic_read(&obd_memory); - CDEBUG(leaked ? D_ERROR : D_INFO, - "obd mem max: %d leaked: %d\n", obd_memmax, leaked); + lvfs_mount_list_cleanup(); + lvfs_memdbg_show(); +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + lvfs_memdbg_cleanup(); +#endif + EXIT; return; }