Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / lvfs / lvfs_linux.c
index 21cd56a..fef6996 100644 (file)
@@ -39,7 +39,6 @@
 #include <libcfs/kp30.h>
 #include <lustre_fsfilt.h>
 #include <obd.h>
-#include <obd_class.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/lustre_compat25.h>
 #include <lustre_lib.h>
 #include <lustre_quota.h>
 
-atomic_t obd_memory;
-int obd_memmax;
+__u64 obd_max_pages = 0;
+__u64 obd_max_alloc = 0;
+struct lprocfs_stats *obd_memory = NULL;
+spinlock_t obd_updatemax_lock = SPIN_LOCK_UNLOCKED;
+/* refine later and change to seqlock or simlar from libcfs */
 
 /* Debugging check only needed during development */
 #ifdef OBD_CTXT_DEBUG
@@ -66,54 +68,28 @@ int obd_memmax;
 #endif
 
 static void push_group_info(struct lvfs_run_ctxt *save,
-                            struct upcall_cache_entry *uce)
+                            struct group_info *ginfo)
 {
-        struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
-
         if (!ginfo) {
                 save->ngroups = current_ngroups;
                 current_ngroups = 0;
         } else {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
                 task_lock(current);
                 save->group_info = current->group_info;
                 current->group_info = ginfo;
                 task_unlock(current);
-#else
-                LASSERT(ginfo->ngroups <= NGROUPS);
-                LASSERT(current->ngroups <= NGROUPS_SMALL);
-                /* save old */
-                save->group_info.ngroups = current->ngroups;
-                if (current->ngroups)
-                        memcpy(save->group_info.small_block, current->groups,
-                               current->ngroups * sizeof(gid_t));
-                /* push new */
-                current->ngroups = ginfo->ngroups;
-                if (ginfo->ngroups)
-                        memcpy(current->groups, ginfo->small_block,
-                               current->ngroups * sizeof(gid_t));
-#endif
         }
 }
 
 static void pop_group_info(struct lvfs_run_ctxt *save,
-                           struct upcall_cache_entry *uce)
+                           struct group_info *ginfo)
 {
-        struct group_info *ginfo = uce ? uce->ue_group_info : NULL;
-
         if (!ginfo) {
                 current_ngroups = save->ngroups;
         } else {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
                 task_lock(current);
                 current->group_info = save->group_info;
                 task_unlock(current);
-#else
-                current->ngroups = save->group_info.ngroups;
-                if (current->ngroups)
-                        memcpy(current->groups, save->group_info.small_block,
-                               current->ngroups * sizeof(gid_t));
-#endif
         }
 }
 
@@ -142,6 +118,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
         save->pwd = dget(current->fs->pwd);
         save->pwdmnt = mntget(current->fs->pwdmnt);
         save->luc.luc_umask = current->fs->umask;
+        save->ngroups = current->group_info->ngroups;
 
         LASSERT(save->pwd);
         LASSERT(save->pwdmnt);
@@ -149,14 +126,22 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
         LASSERT(new_ctx->pwdmnt);
 
         if (uc) {
+                save->luc.luc_uid = current->uid;
+                save->luc.luc_gid = current->gid;
                 save->luc.luc_fsuid = current->fsuid;
                 save->luc.luc_fsgid = current->fsgid;
                 save->luc.luc_cap = current->cap_effective;
 
+                current->uid = uc->luc_uid;
+                current->gid = uc->luc_gid;
                 current->fsuid = uc->luc_fsuid;
                 current->fsgid = uc->luc_fsgid;
                 current->cap_effective = uc->luc_cap;
-                push_group_info(save, uc->luc_uce);
+
+                push_group_info(save,
+                                uc->luc_ginfo ?:
+                                uc->luc_identity ? uc->luc_identity->mi_ginfo :
+                                                   NULL);
         }
         current->fs->umask = 0; /* umask already applied on client */
         set_fs(new_ctx->fs);
@@ -206,10 +191,15 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
         mntput(saved->pwdmnt);
         current->fs->umask = saved->luc.luc_umask;
         if (uc) {
+                current->uid = saved->luc.luc_uid;
+                current->gid = saved->luc.luc_gid;
                 current->fsuid = saved->luc.luc_fsuid;
                 current->fsgid = saved->luc.luc_fsgid;
                 current->cap_effective = saved->luc.luc_cap;
-                pop_group_info(saved, uc->luc_uce);
+                pop_group_info(saved,
+                               uc->luc_ginfo ?:
+                               uc->luc_identity ? uc->luc_identity->mi_ginfo :
+                                                  NULL);
         }
 
         /*
@@ -232,7 +222,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix)
         int err = 0;
         ENTRY;
 
-        ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
+        // ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
         CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
 
         dchild = ll_lookup_one_len(name, dir, strlen(name));
@@ -277,7 +267,7 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
         int err = 0;
         ENTRY;
 
-        ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
+        // ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
         CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
         dchild = ll_lookup_one_len(name, dir, strlen(name));
         if (IS_ERR(dchild))
@@ -318,6 +308,37 @@ out_up:
 }
 EXPORT_SYMBOL(simple_mkdir);
 
+/* utility to rename a file */
+int lustre_rename(struct dentry *dir, char *oldname, char *newname)
+{
+        struct dentry *dchild_old, *dchild_new;
+        int err = 0;
+        ENTRY;
+
+        ASSERT_KERNEL_CTXT("kernel doing rename outside kernel context\n");
+        CDEBUG(D_INODE, "renaming file %.*s to %.*s\n", 
+               (int)strlen(oldname), oldname, (int)strlen(newname), newname);
+
+        dchild_old = ll_lookup_one_len(oldname, dir, strlen(oldname));
+        if (IS_ERR(dchild_old))
+                RETURN(PTR_ERR(dchild_old));
+
+        if (!dchild_old->d_inode) 
+                GOTO(put_old, err = -ENOENT);
+
+        dchild_new = ll_lookup_one_len(newname, dir, strlen(newname));
+        if (IS_ERR(dchild_new))
+                GOTO(put_old, err = PTR_ERR(dchild_new));
+
+        err = vfs_rename(dir->d_inode, dchild_old, dir->d_inode, dchild_new);
+
+        dput(dchild_new);
+put_old:
+        dput(dchild_old);
+        RETURN(err);
+}
+EXPORT_SYMBOL(lustre_rename);
+
 /*
  * Read a file from within kernel context.  Prior to calling this
  * function we should already have done a push_ctxt().
@@ -414,7 +435,7 @@ long l_readdir(struct file *file, struct list_head *dentry_list)
         int error;
 
         buf.lrc_dirent = NULL;
-        buf.lrc_list = dentry_list; 
+        buf.lrc_list = dentry_list;
 
         error = vfs_readdir(file, l_filldir, &buf);
         if (error < 0)
@@ -424,26 +445,209 @@ long l_readdir(struct file *file, struct list_head *dentry_list)
         if (lastdirent)
                 lastdirent->lld_off = file->f_pos;
 
-        return 0; 
+        return 0;
 }
 EXPORT_SYMBOL(l_readdir);
-EXPORT_SYMBOL(obd_memory);
-EXPORT_SYMBOL(obd_memmax);
 
-#ifdef LUSTRE_KERNEL_VERSION
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED;
+static struct hlist_head *obd_memtable = NULL;
+static unsigned long obd_memtable_size = 0;
 
-#ifndef HAVE_CLEAR_RDONLY_ON_PUT
-#error rdonly patchset must be updated
+static int lvfs_memdbg_init(int size)
+{
+        struct hlist_head *head;
+        int i;
+
+        LASSERT(size > sizeof(sizeof(struct hlist_head)));
+        obd_memtable_size = size / sizeof(struct hlist_head);
+
+        CWARN("Allocating %lu memdbg entries.\n",
+              (unsigned long)obd_memtable_size);
+
+        LASSERT(obd_memtable == NULL);
+        obd_memtable = kmalloc(size, GFP_KERNEL);
+        if (!obd_memtable)
+                return -ENOMEM;
+
+        i = obd_memtable_size;
+        head = obd_memtable;
+        do {
+                INIT_HLIST_HEAD(head);
+                head++;
+                i--;
+        } while(i);
+
+        return 0;
+}
+
+static int lvfs_memdbg_cleanup(void)
+{
+        struct hlist_node *node = NULL, *tmp = NULL;
+        struct hlist_head *head;
+        struct obd_mem_track *mt;
+        int i;
+
+        spin_lock(&obd_memlist_lock);
+        for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
+                hlist_for_each_safe(node, tmp, head) {
+                        mt = hlist_entry(node, struct obd_mem_track, mt_hash);
+                        hlist_del_init(&mt->mt_hash);
+                        kfree(mt);
+                }
+        }
+        spin_unlock(&obd_memlist_lock);
+        kfree(obd_memtable);
+        return 0;
+}
+
+static inline unsigned long const hashfn(void *ptr)
+{
+        return (unsigned long)ptr &
+                (obd_memtable_size - 1);
+}
+
+static void __lvfs_memdbg_insert(struct obd_mem_track *mt)
+{
+        struct hlist_head *head = obd_memtable +
+                hashfn(mt->mt_ptr);
+        hlist_add_head(&mt->mt_hash, head);
+}
+
+void lvfs_memdbg_insert(struct obd_mem_track *mt)
+{
+        spin_lock(&obd_memlist_lock);
+        __lvfs_memdbg_insert(mt);
+        spin_unlock(&obd_memlist_lock);
+}
+EXPORT_SYMBOL(lvfs_memdbg_insert);
+
+static void __lvfs_memdbg_remove(struct obd_mem_track *mt)
+{
+        hlist_del_init(&mt->mt_hash);
+}
+
+void lvfs_memdbg_remove(struct obd_mem_track *mt)
+{
+        spin_lock(&obd_memlist_lock);
+        __lvfs_memdbg_remove(mt);
+        spin_unlock(&obd_memlist_lock);
+}
+EXPORT_SYMBOL(lvfs_memdbg_remove);
+
+static struct obd_mem_track *__lvfs_memdbg_find(void *ptr)
+{
+        struct hlist_node *node = NULL;
+        struct obd_mem_track *mt = NULL;
+        struct hlist_head *head;
+
+        head = obd_memtable + hashfn(ptr);
+
+        hlist_for_each(node, head) {
+                mt = hlist_entry(node, struct obd_mem_track, mt_hash);
+                if ((unsigned long)mt->mt_ptr == (unsigned long)ptr)
+                        break;
+                mt = NULL;
+        }
+        return mt;
+}
+
+struct obd_mem_track *lvfs_memdbg_find(void *ptr)
+{
+        struct obd_mem_track *mt;
+
+        spin_lock(&obd_memlist_lock);
+        mt = __lvfs_memdbg_find(ptr);
+        spin_unlock(&obd_memlist_lock);
+        
+        return mt;
+}
+EXPORT_SYMBOL(lvfs_memdbg_find);
+
+int lvfs_memdbg_check_insert(struct obd_mem_track *mt)
+{
+        struct obd_mem_track *tmp;
+        
+        spin_lock(&obd_memlist_lock);
+        tmp = __lvfs_memdbg_find(mt->mt_ptr);
+        if (tmp == NULL) {
+                __lvfs_memdbg_insert(mt);
+                spin_unlock(&obd_memlist_lock);
+                return 1;
+        }
+        spin_unlock(&obd_memlist_lock);
+        return 0;
+}
+EXPORT_SYMBOL(lvfs_memdbg_check_insert);
+
+struct obd_mem_track *
+lvfs_memdbg_check_remove(void *ptr)
+{
+        struct obd_mem_track *mt;
+
+        spin_lock(&obd_memlist_lock);
+        mt = __lvfs_memdbg_find(ptr);
+        if (mt) {
+                __lvfs_memdbg_remove(mt);
+                spin_unlock(&obd_memlist_lock);
+                return mt;
+        }
+        spin_unlock(&obd_memlist_lock);
+        return NULL;
+}
+EXPORT_SYMBOL(lvfs_memdbg_check_remove);
+#endif
+
+void lvfs_memdbg_show(void)
+{
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+        struct hlist_node *node = NULL;
+        struct hlist_head *head;
+        struct obd_mem_track *mt;
+        int header = 0;
+#endif
+       
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+       int i;
 #endif
 
+       
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+        spin_lock(&obd_memlist_lock);
+        for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
+                hlist_for_each(node, head) {
+                        if (header == 0) {
+                                CWARN("Abnormal memory activities:\n");
+                                header = 1;
+                        }
+                        mt = hlist_entry(node, struct obd_mem_track, mt_hash);
+                        CWARN("  [%s] ptr: 0x%p, size: %d, src at %s\n",
+                              ((mt->mt_flags & OBD_MT_WRONG_SIZE) ?
+                               "wrong size" : "leaked memory"),
+                              mt->mt_ptr, mt->mt_size, mt->mt_loc);
+                }
+        }
+        spin_unlock(&obd_memlist_lock);
+#endif
+}
+EXPORT_SYMBOL(lvfs_memdbg_show);
+
+#ifdef LUSTRE_KERNEL_VERSION
+#ifndef HAVE_CLEAR_RDONLY_ON_PUT
+#error rdonly patchset must be updated [cfs bz11248]
+#endif
 void dev_set_rdonly(lvfs_sbdev_type dev);
-void dev_clear_rdonly(lvfs_sbdev_type dev);
 int dev_check_rdonly(lvfs_sbdev_type dev);
 
-void lvfs_set_rdonly(lvfs_sbdev_type dev)
+void __lvfs_set_rdonly(lvfs_sbdev_type dev, lvfs_sbdev_type jdev)
 {
-        CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
         lvfs_sbdev_sync(dev);
+        if (jdev && (jdev != dev)) {
+                CDEBUG(D_IOCTL | D_HA, "set journal dev %lx rdonly\n",
+                       (long)jdev);
+                dev_set_rdonly(jdev);
+        }
+        CDEBUG(D_IOCTL | D_HA, "set dev %lx rdonly\n", (long)dev);
         dev_set_rdonly(dev);
 }
 
@@ -452,16 +656,8 @@ int lvfs_check_rdonly(lvfs_sbdev_type dev)
         return dev_check_rdonly(dev);
 }
 
-void lvfs_clear_rdonly(lvfs_sbdev_type dev)
-{
-        CDEBUG(D_IOCTL | D_HA, "(will unset dev %lx rdonly on put)\n",
-               (long)dev);
-}
-
-EXPORT_SYMBOL(lvfs_set_rdonly);
+EXPORT_SYMBOL(__lvfs_set_rdonly);
 EXPORT_SYMBOL(lvfs_check_rdonly);
-EXPORT_SYMBOL(lvfs_clear_rdonly);
-#endif
 
 int lvfs_check_io_health(struct obd_device *obd, struct file *file)
 {
@@ -473,32 +669,124 @@ int lvfs_check_io_health(struct obd_device *obd, struct file *file)
         OBD_ALLOC(write_page, CFS_PAGE_SIZE);
         if (!write_page)
                 RETURN(-ENOMEM);
-        
+
         rc = fsfilt_write_record(obd, file, write_page, CFS_PAGE_SIZE, &offset, 1);
-       
+
         OBD_FREE(write_page, CFS_PAGE_SIZE);
 
         CDEBUG(D_INFO, "write 1 page synchronously for checking io rc %d\n",rc);
-        RETURN(rc); 
+        RETURN(rc);
 }
 EXPORT_SYMBOL(lvfs_check_io_health);
+#endif /* LUSTRE_KERNEL_VERSION */
+
+void obd_update_maxusage()
+{
+        __u64 max1, max2;
+
+        max1 = obd_pages_sum();
+        max2 = obd_memory_sum();
+
+        spin_lock(&obd_updatemax_lock);
+        if (max1 > obd_max_pages)
+                obd_max_pages = max1;
+        if (max2 > obd_max_alloc)
+                obd_max_alloc = max2;
+        spin_unlock(&obd_updatemax_lock);
+        
+}
+
+__u64 obd_memory_max(void)
+{
+        __u64 ret;
+
+        spin_lock(&obd_updatemax_lock);
+        ret = obd_max_alloc;
+        spin_unlock(&obd_updatemax_lock);
+
+        return ret;
+}
+
+__u64 obd_pages_max(void)
+{
+        __u64 ret;
+
+        spin_lock(&obd_updatemax_lock);
+        ret = obd_max_pages;
+        spin_unlock(&obd_updatemax_lock);
+
+        return ret;
+}
+
+EXPORT_SYMBOL(obd_update_maxusage);
+EXPORT_SYMBOL(obd_pages_max);
+EXPORT_SYMBOL(obd_memory_max);
+EXPORT_SYMBOL(obd_memory);
+
+#ifdef LPROCFS
+__s64 lprocfs_read_helper(struct lprocfs_counter *lc,
+                          enum lprocfs_fields_flags field)
+{
+        __s64 ret = 0;
+        int centry;
+
+        if (!lc)
+                RETURN(0);
+        do {
+                centry = atomic_read(&lc->lc_cntl.la_entry);
+
+                switch (field) {
+                        case LPROCFS_FIELDS_FLAGS_CONFIG:
+                                ret = lc->lc_config;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_SUM:
+                                ret = lc->lc_sum;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_MIN:
+                                ret = lc->lc_min;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_MAX:
+                                ret = lc->lc_max;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_AVG:
+                                ret = (lc->lc_max - lc->lc_min)/2;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_SUMSQUARE:
+                                ret = lc->lc_sumsquare;
+                                break;
+                        case LPROCFS_FIELDS_FLAGS_COUNT:
+                                ret = lc->lc_count;
+                                break;
+                        default:
+                                break;
+                };
+        } while (centry != atomic_read(&lc->lc_cntl.la_entry) &&
+                 centry != atomic_read(&lc->lc_cntl.la_exit));
+
+        RETURN(ret);
+}
+EXPORT_SYMBOL(lprocfs_read_helper);
+#endif /* LPROCFS */
 
 static int __init lvfs_linux_init(void)
 {
+        ENTRY;
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+        lvfs_memdbg_init(PAGE_SIZE);
+#endif
         RETURN(0);
 }
 
 static void __exit lvfs_linux_exit(void)
 {
-        int leaked;
         ENTRY;
 
-        leaked = atomic_read(&obd_memory);
-        CDEBUG(leaked ? D_ERROR : D_INFO,
-               "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
-
+        lvfs_memdbg_show();
+        
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+        lvfs_memdbg_cleanup();
+#endif
         EXIT;
-        return;
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");