From 08fbadcfe1b62ba46e721eca2d0c5309ca95273c Mon Sep 17 00:00:00 2001 From: James Simmons Date: Fri, 20 Sep 2013 12:34:38 -0400 Subject: [PATCH] LU-2757 dynlocks: move into osd-ldiskfs Commit 43603bd1 (LU-1548 osd: move i_htree_lock to iam container) eliminated the use of dynlocks inside ldiskfs. Now that it's only used in the osd-ldiskfs code, we can move it under osd-ldiskfs/, and eliminate a patch we apply during build. Long term dynlocks will never merged into the ext4 work so it makes sense to move it to the osd-ldisk layer. Also the mdd layer was carrying techinical debt of dynlocks as well so this patch removes it from that layer. This work helps us move forward to a patchless ldiskfs in the future. Signed-off-by: Jeff Mahoney Signed-off-by: James Simmons Change-Id: I3d2ca2317426df3a7f3855d85a834a37091a78bd Reviewed-on: http://review.whamcloud.com/5282 Reviewed-by: Bob Glossman Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger --- ldiskfs/Makefile.in | 7 +- .../patches/rhel6.3/ext4-dynlocks-common.patch | 355 -------------------- .../patches/rhel6.3/ext4-pdir-fix.patch | 8 - .../patches/rhel6.3/ext4_pdirop.patch | 4 +- .../patches/sles11sp2/ext4-dynlocks-common.patch | 370 --------------------- .../patches/sles11sp2/ext4_pdirop.patch | 4 +- .../series/ldiskfs-2.6-rhel6.4.series | 1 - .../kernel_patches/series/ldiskfs-2.6-rhel6.series | 1 - .../series/ldiskfs-2.6-sles11.series | 1 - .../series/ldiskfs-3.0-sles11.series | 1 - .../series/ldiskfs-3.0-sles11sp3.series | 1 - lustre/mdd/Makefile.in | 2 - lustre/mdd/mdd_dir.c | 159 +++------ lustre/mdd/mdd_internal.h | 12 - lustre/mdd/mdd_lock.c | 113 ------- lustre/mdd/mdd_object.c | 1 - lustre/osd-ldiskfs/Makefile.in | 6 +- lustre/osd-ldiskfs/autoMakefile.am | 2 +- lustre/osd-ldiskfs/osd_dynlocks.c | 222 +++++++++++++ lustre/osd-ldiskfs/osd_dynlocks.h | 40 +++ lustre/osd-ldiskfs/osd_handler.c | 33 +- lustre/osd-ldiskfs/osd_iam.h | 5 +- 22 files changed, 350 insertions(+), 998 deletions(-) delete mode 100644 ldiskfs/kernel_patches/patches/rhel6.3/ext4-dynlocks-common.patch delete mode 100644 ldiskfs/kernel_patches/patches/sles11sp2/ext4-dynlocks-common.patch create mode 100644 lustre/osd-ldiskfs/osd_dynlocks.c create mode 100644 lustre/osd-ldiskfs/osd_dynlocks.h diff --git a/ldiskfs/Makefile.in b/ldiskfs/Makefile.in index af44d1d..bc3f058 100644 --- a/ldiskfs/Makefile.in +++ b/ldiskfs/Makefile.in @@ -7,16 +7,15 @@ backfs_extra := $(wildcard @LINUX@/fs/ext4/Makefile) backfs_headers := $(wildcard @EXT4_SRC_DIR@/*.h) linux_headers := $(wildcard @LINUX@/include/linux/ext4*.h) -linux_new_headers := dynlocks.h -linux_new_headers += htree_lock.h +linux_new_headers := htree_lock.h trace_headers := $(wildcard @LINUX@/include/trace/events/ext4*.h) backfs_sources := $(filter-out %.mod.c,$(wildcard @EXT4_SRC_DIR@/*.c)) -ext3_new_sources := extents.c mballoc.c group.h dynlocks.c fiemap.h +ext3_new_sources := extents.c mballoc.c group.h fiemap.h ext3_new_headers := ext3_extents.h -ext4_new_sources := dynlocks.c fiemap.h mmp.c +ext4_new_sources := fiemap.h mmp.c ext4_new_sources += htree_lock.c ext4_new_headers := diff --git a/ldiskfs/kernel_patches/patches/rhel6.3/ext4-dynlocks-common.patch b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-dynlocks-common.patch deleted file mode 100644 index c91c8bc..0000000 --- a/ldiskfs/kernel_patches/patches/rhel6.3/ext4-dynlocks-common.patch +++ /dev/null @@ -1,355 +0,0 @@ -Index: linux-stage/fs/ext4/dynlocks.c -=================================================================== ---- /dev/null -+++ linux-stage/fs/ext4/dynlocks.c -@@ -0,0 +1,236 @@ -+/* -+ * Dynamic Locks -+ * -+ * struct dynlock is lockspace -+ * one may request lock (exclusive or shared) for some value -+ * in that lockspace -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DYNLOCK_HANDLE_MAGIC 0xd19a10c -+#define DYNLOCK_HANDLE_DEAD 0xd1956ee -+#define DYNLOCK_LIST_MAGIC 0x11ee91e6 -+ -+static struct kmem_cache * dynlock_cachep = NULL; -+ -+struct dynlock_handle { -+ unsigned dh_magic; -+ struct list_head dh_list; -+ unsigned long dh_value; /* lock value */ -+ int dh_refcount; /* number of users */ -+ int dh_readers; -+ int dh_writers; -+ int dh_pid; /* holder of the lock */ -+ wait_queue_head_t dh_wait; -+}; -+ -+int __init dynlock_cache_init(void) -+{ -+ int rc = 0; -+ -+ /* printk(KERN_INFO "init dynlocks cache\n"); */ -+ dynlock_cachep = kmem_cache_create("dynlock_cache", -+ sizeof(struct dynlock_handle), -+ 0, -+ SLAB_HWCACHE_ALIGN, -+ NULL); -+ if (dynlock_cachep == NULL) { -+ printk(KERN_ERR "Not able to create dynlock cache"); -+ rc = -ENOMEM; -+ } -+ return rc; -+} -+ -+void dynlock_cache_exit(void) -+{ -+ /* printk(KERN_INFO "exit dynlocks cache\n"); */ -+ kmem_cache_destroy(dynlock_cachep); -+} -+ -+/* -+ * dynlock_init -+ * -+ * initialize lockspace -+ * -+ */ -+void dynlock_init(struct dynlock *dl) -+{ -+ spin_lock_init(&dl->dl_list_lock); -+ INIT_LIST_HEAD(&dl->dl_list); -+ dl->dl_magic = DYNLOCK_LIST_MAGIC; -+} -+EXPORT_SYMBOL(dynlock_init); -+ -+/* -+ * dynlock_lock -+ * -+ * acquires lock (exclusive or shared) in specified lockspace -+ * each lock in lockspace is allocated separately, so user have -+ * to specify GFP flags. -+ * routine returns pointer to lock. this pointer is intended to -+ * be passed to dynlock_unlock -+ * -+ */ -+struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, -+ enum dynlock_type lt, gfp_t gfp) -+{ -+ struct dynlock_handle *nhl = NULL; -+ struct dynlock_handle *hl; -+ -+ BUG_ON(dl == NULL); -+ BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); -+ -+repeat: -+ /* find requested lock in lockspace */ -+ spin_lock(&dl->dl_list_lock); -+ BUG_ON(dl->dl_list.next == NULL); -+ BUG_ON(dl->dl_list.prev == NULL); -+ list_for_each_entry(hl, &dl->dl_list, dh_list) { -+ BUG_ON(hl->dh_list.next == NULL); -+ BUG_ON(hl->dh_list.prev == NULL); -+ BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); -+ if (hl->dh_value == value) { -+ /* lock is found */ -+ if (nhl) { -+ /* someone else just allocated -+ * lock we didn't find and just created -+ * so, we drop our lock -+ */ -+ kmem_cache_free(dynlock_cachep, nhl); -+ nhl = NULL; -+ } -+ hl->dh_refcount++; -+ goto found; -+ } -+ } -+ /* lock not found */ -+ if (nhl) { -+ /* we already have allocated lock. use it */ -+ hl = nhl; -+ nhl = NULL; -+ list_add(&hl->dh_list, &dl->dl_list); -+ goto found; -+ } -+ spin_unlock(&dl->dl_list_lock); -+ -+ /* lock not found and we haven't allocated lock yet. allocate it */ -+ nhl = kmem_cache_alloc(dynlock_cachep, gfp); -+ if (nhl == NULL) -+ return NULL; -+ nhl->dh_refcount = 1; -+ nhl->dh_value = value; -+ nhl->dh_readers = 0; -+ nhl->dh_writers = 0; -+ nhl->dh_magic = DYNLOCK_HANDLE_MAGIC; -+ init_waitqueue_head(&nhl->dh_wait); -+ -+ /* while lock is being allocated, someone else may allocate it -+ * and put onto to list. check this situation -+ */ -+ goto repeat; -+ -+found: -+ if (lt == DLT_WRITE) { -+ /* exclusive lock: user don't want to share lock at all -+ * NOTE: one process may take the same lock several times -+ * this functionaly is useful for rename operations */ -+ while ((hl->dh_writers && hl->dh_pid != current->pid) || -+ hl->dh_readers) { -+ spin_unlock(&dl->dl_list_lock); -+ wait_event(hl->dh_wait, -+ hl->dh_writers == 0 && hl->dh_readers == 0); -+ spin_lock(&dl->dl_list_lock); -+ } -+ hl->dh_writers++; -+ } else { -+ /* shared lock: user do not want to share lock with writer */ -+ while (hl->dh_writers) { -+ spin_unlock(&dl->dl_list_lock); -+ wait_event(hl->dh_wait, hl->dh_writers == 0); -+ spin_lock(&dl->dl_list_lock); -+ } -+ hl->dh_readers++; -+ } -+ hl->dh_pid = current->pid; -+ spin_unlock(&dl->dl_list_lock); -+ -+ return hl; -+} -+EXPORT_SYMBOL(dynlock_lock); -+ -+ -+/* -+ * dynlock_unlock -+ * -+ * user have to specify lockspace (dl) and pointer to lock structure -+ * returned by dynlock_lock() -+ * -+ */ -+void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *hl) -+{ -+ int wakeup = 0; -+ -+ BUG_ON(dl == NULL); -+ BUG_ON(hl == NULL); -+ BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); -+ -+ if (hl->dh_magic != DYNLOCK_HANDLE_MAGIC) -+ printk(KERN_EMERG "wrong lock magic: %#x\n", hl->dh_magic); -+ -+ BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); -+ BUG_ON(hl->dh_writers != 0 && current->pid != hl->dh_pid); -+ -+ spin_lock(&dl->dl_list_lock); -+ if (hl->dh_writers) { -+ BUG_ON(hl->dh_readers != 0); -+ hl->dh_writers--; -+ if (hl->dh_writers == 0) -+ wakeup = 1; -+ } else if (hl->dh_readers) { -+ hl->dh_readers--; -+ if (hl->dh_readers == 0) -+ wakeup = 1; -+ } else { -+ BUG(); -+ } -+ if (wakeup) { -+ hl->dh_pid = 0; -+ wake_up(&hl->dh_wait); -+ } -+ if (--(hl->dh_refcount) == 0) { -+ hl->dh_magic = DYNLOCK_HANDLE_DEAD; -+ list_del(&hl->dh_list); -+ kmem_cache_free(dynlock_cachep, hl); -+ } -+ spin_unlock(&dl->dl_list_lock); -+} -+EXPORT_SYMBOL(dynlock_unlock); -+ -+int dynlock_is_locked(struct dynlock *dl, unsigned long value) -+{ -+ struct dynlock_handle *hl; -+ int result = 0; -+ -+ /* find requested lock in lockspace */ -+ spin_lock(&dl->dl_list_lock); -+ BUG_ON(dl->dl_list.next == NULL); -+ BUG_ON(dl->dl_list.prev == NULL); -+ list_for_each_entry(hl, &dl->dl_list, dh_list) { -+ BUG_ON(hl->dh_list.next == NULL); -+ BUG_ON(hl->dh_list.prev == NULL); -+ BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); -+ if (hl->dh_value == value && hl->dh_pid == current->pid) { -+ /* lock is found */ -+ result = 1; -+ break; -+ } -+ } -+ spin_unlock(&dl->dl_list_lock); -+ return result; -+} -+EXPORT_SYMBOL(dynlock_is_locked); -Index: linux-stage/include/linux/dynlocks.h -=================================================================== ---- /dev/null -+++ linux-stage/include/linux/dynlocks.h -@@ -0,0 +1,34 @@ -+#ifndef _LINUX_DYNLOCKS_H -+#define _LINUX_DYNLOCKS_H -+ -+#include -+#include -+ -+struct dynlock_handle; -+ -+/* -+ * lock's namespace: -+ * - list of locks -+ * - lock to protect this list -+ */ -+struct dynlock { -+ unsigned dl_magic; -+ struct list_head dl_list; -+ spinlock_t dl_list_lock; -+}; -+ -+enum dynlock_type { -+ DLT_WRITE, -+ DLT_READ -+}; -+ -+int dynlock_cache_init(void); -+void dynlock_cache_exit(void); -+void dynlock_init(struct dynlock *dl); -+struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, -+ enum dynlock_type lt, gfp_t gfp); -+void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *lock); -+int dynlock_is_locked(struct dynlock *dl, unsigned long value); -+ -+#endif -+ -Index: linux-stage/fs/ext4/Makefile -=================================================================== ---- linux-stage.orig/fs/ext4/Makefile -+++ linux-stage/fs/ext4/Makefile -@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o - ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ -- mmp.o -+ mmp.o dynlocks.o - - ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o -Index: linux-stage/fs/ext4/super.c -=================================================================== ---- linux-stage.orig/fs/ext4/super.c -+++ linux-stage/fs/ext4/super.c -@@ -4620,20 +4620,23 @@ static int __init init_ext4_fs(void) - return err; - ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); - if (!ext4_kset) -- goto out4; -+ goto out5; - ext4_proc_root = proc_mkdir("fs/ext4", NULL); - - err = ext4_init_feat_adverts(); - - err = init_ext4_mballoc(); - if (err) -- goto out3; -+ goto out4; - - err = init_ext4_xattr(); - if (err) -- goto out2; -+ goto out3; - err = init_inodecache(); - if (err) -+ goto out2; -+ err = dynlock_cache_init(); -+ if (err) - goto out1; - err = register_filesystem(&ext4_fs_type); - if (err) -@@ -4643,16 +4646,18 @@ static int __init init_ext4_fs(void) - mutex_init(&ext4_li_mtx); - return 0; - out: -- destroy_inodecache(); -+ dynlock_cache_exit(); - out1: -- exit_ext4_xattr(); -+ destroy_inodecache(); - out2: -- exit_ext4_mballoc(); -+ exit_ext4_xattr(); - out3: -+ exit_ext4_mballoc(); -+out4: - ext4_exit_feat_adverts(); - remove_proc_entry("fs/ext4", NULL); - kset_unregister(ext4_kset); --out4: -+out5: - exit_ext4_system_zone(); - return err; - } -@@ -4661,6 +4666,7 @@ static void __exit exit_ext4_fs(void) - { - ext4_destroy_lazyinit_thread(); - unregister_filesystem(&ext4_fs_type); -+ dynlock_cache_exit(); - destroy_inodecache(); - exit_ext4_xattr(); - exit_ext4_mballoc(); diff --git a/ldiskfs/kernel_patches/patches/rhel6.3/ext4-pdir-fix.patch b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-pdir-fix.patch index 419f386..0f261d1 100644 --- a/ldiskfs/kernel_patches/patches/rhel6.3/ext4-pdir-fix.patch +++ b/ldiskfs/kernel_patches/patches/rhel6.3/ext4-pdir-fix.patch @@ -2,14 +2,6 @@ Index: linux-stage/fs/ext4/ext4.h =================================================================== --- linux-stage.orig/fs/ext4/ext4.h 2012-08-07 11:52:38.994200699 -0700 +++ linux-stage/fs/ext4/ext4.h 2012-08-07 12:28:19.497442862 -0700 -@@ -16,6 +16,7 @@ - #ifndef _EXT4_H - #define _EXT4_H - -+#include - #include - #include - #include @@ -706,6 +707,9 @@ __u32 i_dtime; ext4_fsblk_t i_file_acl; diff --git a/ldiskfs/kernel_patches/patches/rhel6.3/ext4_pdirop.patch b/ldiskfs/kernel_patches/patches/rhel6.3/ext4_pdirop.patch index dbd6a8e..c6e93c3 100644 --- a/ldiskfs/kernel_patches/patches/rhel6.3/ext4_pdirop.patch +++ b/ldiskfs/kernel_patches/patches/rhel6.3/ext4_pdirop.patch @@ -2251,8 +2251,8 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ -- mmp.o dynlocks.o -+ htree_lock.o mmp.o dynlocks.o +- mmp.o ++ htree_lock.o mmp.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/ldiskfs/kernel_patches/patches/sles11sp2/ext4-dynlocks-common.patch b/ldiskfs/kernel_patches/patches/sles11sp2/ext4-dynlocks-common.patch deleted file mode 100644 index db04c62..0000000 --- a/ldiskfs/kernel_patches/patches/sles11sp2/ext4-dynlocks-common.patch +++ /dev/null @@ -1,370 +0,0 @@ ---- /dev/null -+++ b/fs/ext4/dynlocks.c -@@ -0,0 +1,236 @@ -+/* -+ * Dynamic Locks -+ * -+ * struct dynlock is lockspace -+ * one may request lock (exclusive or shared) for some value -+ * in that lockspace -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DYNLOCK_HANDLE_MAGIC 0xd19a10c -+#define DYNLOCK_HANDLE_DEAD 0xd1956ee -+#define DYNLOCK_LIST_MAGIC 0x11ee91e6 -+ -+static struct kmem_cache * dynlock_cachep = NULL; -+ -+struct dynlock_handle { -+ unsigned dh_magic; -+ struct list_head dh_list; -+ unsigned long dh_value; /* lock value */ -+ int dh_refcount; /* number of users */ -+ int dh_readers; -+ int dh_writers; -+ int dh_pid; /* holder of the lock */ -+ wait_queue_head_t dh_wait; -+}; -+ -+int __init dynlock_cache_init(void) -+{ -+ int rc = 0; -+ -+ /* printk(KERN_INFO "init dynlocks cache\n"); */ -+ dynlock_cachep = kmem_cache_create("dynlock_cache", -+ sizeof(struct dynlock_handle), -+ 0, -+ SLAB_HWCACHE_ALIGN, -+ NULL); -+ if (dynlock_cachep == NULL) { -+ printk(KERN_ERR "Not able to create dynlock cache"); -+ rc = -ENOMEM; -+ } -+ return rc; -+} -+ -+void dynlock_cache_exit(void) -+{ -+ /* printk(KERN_INFO "exit dynlocks cache\n"); */ -+ kmem_cache_destroy(dynlock_cachep); -+} -+ -+/* -+ * dynlock_init -+ * -+ * initialize lockspace -+ * -+ */ -+void dynlock_init(struct dynlock *dl) -+{ -+ spin_lock_init(&dl->dl_list_lock); -+ INIT_LIST_HEAD(&dl->dl_list); -+ dl->dl_magic = DYNLOCK_LIST_MAGIC; -+} -+EXPORT_SYMBOL(dynlock_init); -+ -+/* -+ * dynlock_lock -+ * -+ * acquires lock (exclusive or shared) in specified lockspace -+ * each lock in lockspace is allocated separately, so user have -+ * to specify GFP flags. -+ * routine returns pointer to lock. this pointer is intended to -+ * be passed to dynlock_unlock -+ * -+ */ -+struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, -+ enum dynlock_type lt, gfp_t gfp) -+{ -+ struct dynlock_handle *nhl = NULL; -+ struct dynlock_handle *hl; -+ -+ BUG_ON(dl == NULL); -+ BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); -+ -+repeat: -+ /* find requested lock in lockspace */ -+ spin_lock(&dl->dl_list_lock); -+ BUG_ON(dl->dl_list.next == NULL); -+ BUG_ON(dl->dl_list.prev == NULL); -+ list_for_each_entry(hl, &dl->dl_list, dh_list) { -+ BUG_ON(hl->dh_list.next == NULL); -+ BUG_ON(hl->dh_list.prev == NULL); -+ BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); -+ if (hl->dh_value == value) { -+ /* lock is found */ -+ if (nhl) { -+ /* someone else just allocated -+ * lock we didn't find and just created -+ * so, we drop our lock -+ */ -+ kmem_cache_free(dynlock_cachep, nhl); -+ nhl = NULL; -+ } -+ hl->dh_refcount++; -+ goto found; -+ } -+ } -+ /* lock not found */ -+ if (nhl) { -+ /* we already have allocated lock. use it */ -+ hl = nhl; -+ nhl = NULL; -+ list_add(&hl->dh_list, &dl->dl_list); -+ goto found; -+ } -+ spin_unlock(&dl->dl_list_lock); -+ -+ /* lock not found and we haven't allocated lock yet. allocate it */ -+ nhl = kmem_cache_alloc(dynlock_cachep, gfp); -+ if (nhl == NULL) -+ return NULL; -+ nhl->dh_refcount = 1; -+ nhl->dh_value = value; -+ nhl->dh_readers = 0; -+ nhl->dh_writers = 0; -+ nhl->dh_magic = DYNLOCK_HANDLE_MAGIC; -+ init_waitqueue_head(&nhl->dh_wait); -+ -+ /* while lock is being allocated, someone else may allocate it -+ * and put onto to list. check this situation -+ */ -+ goto repeat; -+ -+found: -+ if (lt == DLT_WRITE) { -+ /* exclusive lock: user don't want to share lock at all -+ * NOTE: one process may take the same lock several times -+ * this functionaly is useful for rename operations */ -+ while ((hl->dh_writers && hl->dh_pid != current->pid) || -+ hl->dh_readers) { -+ spin_unlock(&dl->dl_list_lock); -+ wait_event(hl->dh_wait, -+ hl->dh_writers == 0 && hl->dh_readers == 0); -+ spin_lock(&dl->dl_list_lock); -+ } -+ hl->dh_writers++; -+ } else { -+ /* shared lock: user do not want to share lock with writer */ -+ while (hl->dh_writers) { -+ spin_unlock(&dl->dl_list_lock); -+ wait_event(hl->dh_wait, hl->dh_writers == 0); -+ spin_lock(&dl->dl_list_lock); -+ } -+ hl->dh_readers++; -+ } -+ hl->dh_pid = current->pid; -+ spin_unlock(&dl->dl_list_lock); -+ -+ return hl; -+} -+EXPORT_SYMBOL(dynlock_lock); -+ -+ -+/* -+ * dynlock_unlock -+ * -+ * user have to specify lockspace (dl) and pointer to lock structure -+ * returned by dynlock_lock() -+ * -+ */ -+void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *hl) -+{ -+ int wakeup = 0; -+ -+ BUG_ON(dl == NULL); -+ BUG_ON(hl == NULL); -+ BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); -+ -+ if (hl->dh_magic != DYNLOCK_HANDLE_MAGIC) -+ printk(KERN_EMERG "wrong lock magic: %#x\n", hl->dh_magic); -+ -+ BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); -+ BUG_ON(hl->dh_writers != 0 && current->pid != hl->dh_pid); -+ -+ spin_lock(&dl->dl_list_lock); -+ if (hl->dh_writers) { -+ BUG_ON(hl->dh_readers != 0); -+ hl->dh_writers--; -+ if (hl->dh_writers == 0) -+ wakeup = 1; -+ } else if (hl->dh_readers) { -+ hl->dh_readers--; -+ if (hl->dh_readers == 0) -+ wakeup = 1; -+ } else { -+ BUG(); -+ } -+ if (wakeup) { -+ hl->dh_pid = 0; -+ wake_up(&hl->dh_wait); -+ } -+ if (--(hl->dh_refcount) == 0) { -+ hl->dh_magic = DYNLOCK_HANDLE_DEAD; -+ list_del(&hl->dh_list); -+ kmem_cache_free(dynlock_cachep, hl); -+ } -+ spin_unlock(&dl->dl_list_lock); -+} -+EXPORT_SYMBOL(dynlock_unlock); -+ -+int dynlock_is_locked(struct dynlock *dl, unsigned long value) -+{ -+ struct dynlock_handle *hl; -+ int result = 0; -+ -+ /* find requested lock in lockspace */ -+ spin_lock(&dl->dl_list_lock); -+ BUG_ON(dl->dl_list.next == NULL); -+ BUG_ON(dl->dl_list.prev == NULL); -+ list_for_each_entry(hl, &dl->dl_list, dh_list) { -+ BUG_ON(hl->dh_list.next == NULL); -+ BUG_ON(hl->dh_list.prev == NULL); -+ BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); -+ if (hl->dh_value == value && hl->dh_pid == current->pid) { -+ /* lock is found */ -+ result = 1; -+ break; -+ } -+ } -+ spin_unlock(&dl->dl_list_lock); -+ return result; -+} -+EXPORT_SYMBOL(dynlock_is_locked); ---- /dev/null -+++ b/include/linux/dynlocks.h -@@ -0,0 +1,34 @@ -+#ifndef _LINUX_DYNLOCKS_H -+#define _LINUX_DYNLOCKS_H -+ -+#include -+#include -+ -+struct dynlock_handle; -+ -+/* -+ * lock's namespace: -+ * - list of locks -+ * - lock to protect this list -+ */ -+struct dynlock { -+ unsigned dl_magic; -+ struct list_head dl_list; -+ spinlock_t dl_list_lock; -+}; -+ -+enum dynlock_type { -+ DLT_WRITE, -+ DLT_READ -+}; -+ -+int dynlock_cache_init(void); -+void dynlock_cache_exit(void); -+void dynlock_init(struct dynlock *dl); -+struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, -+ enum dynlock_type lt, gfp_t gfp); -+void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *lock); -+int dynlock_is_locked(struct dynlock *dl, unsigned long value); -+ -+#endif -+ ---- a/fs/ext4/super.c -+++ b/fs/ext4/super.c -@@ -5178,30 +5178,33 @@ static int __init ext4_init_fs(void) - return err; - err = ext4_init_system_zone(); - if (err) -- goto out7; -+ goto out8; - ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); - if (!ext4_kset) -- goto out6; -+ goto out7; - ext4_proc_root = proc_mkdir("fs/ext4", NULL); - if (!ext4_proc_root) -- goto out5; -+ goto out6; - - err = ext4_init_feat_adverts(); - if (err) -- goto out4; -+ goto out5; - - err = ext4_init_mballoc(); - if (err) -- goto out3; -+ goto out4; - - err = ext4_init_xattr(); - if (err) -- goto out2; -+ goto out3; - err = init_inodecache(); - if (err) -- goto out1; -+ goto out2; - register_as_ext3(); - register_as_ext2(); -+ err = dynlock_cache_init(); -+ if (err) -+ goto out1; - err = register_filesystem(&ext4_fs_type); - if (err) - goto out; -@@ -5210,22 +5213,24 @@ static int __init ext4_init_fs(void) - mutex_init(&ext4_li_mtx); - return 0; - out: -+ dynlock_cache_exit(); -+out1: - unregister_as_ext2(); - unregister_as_ext3(); - destroy_inodecache(); --out1: -- ext4_exit_xattr(); - out2: -- ext4_exit_mballoc(); -+ ext4_exit_xattr(); - out3: -- ext4_exit_feat_adverts(); -+ ext4_exit_mballoc(); - out4: -- remove_proc_entry("fs/ext4", NULL); -+ ext4_exit_feat_adverts(); - out5: -- kset_unregister(ext4_kset); -+ remove_proc_entry("fs/ext4", NULL); - out6: -- ext4_exit_system_zone(); -+ kset_unregister(ext4_kset); - out7: -+ ext4_exit_system_zone(); -+out8: - ext4_exit_pageio(); - return err; - } -@@ -5236,6 +5241,7 @@ static void __exit ext4_exit_fs(void) - unregister_as_ext2(); - unregister_as_ext3(); - unregister_filesystem(&ext4_fs_type); -+ dynlock_cache_exit(); - destroy_inodecache(); - ext4_exit_xattr(); - ext4_exit_mballoc(); ---- a/fs/ext4/Makefile -+++ b/fs/ext4/Makefile -@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o - ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ -- mmp.o -+ mmp.o dynlocks.o - - ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/ldiskfs/kernel_patches/patches/sles11sp2/ext4_pdirop.patch b/ldiskfs/kernel_patches/patches/sles11sp2/ext4_pdirop.patch index 614b4a7..4d2acff 100644 --- a/ldiskfs/kernel_patches/patches/sles11sp2/ext4_pdirop.patch +++ b/ldiskfs/kernel_patches/patches/sles11sp2/ext4_pdirop.patch @@ -13,8 +13,8 @@ ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \ -- mmp.o dynlocks.o -+ htree_lock.o mmp.o dynlocks.o +- mmp.o ++ htree_lock.o mmp.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series index 5e9698d..38a6203 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.4.series @@ -23,7 +23,6 @@ rhel6.3/ext4-pdir-fix.patch rhel6.3/ext4-add-more-error-checks-to-ext4_mkdir.patch rhel6.3/ext4-osd-iop-common.patch rhel6.3/ext4-osd-iam-exports.patch -rhel6.3/ext4-dynlocks-common.patch rhel6.3/ext4-hash-indexed-dir-dotdot-update.patch rhel6.3/ext4-kill-dx_root.patch rhel6.3/ext4-extents-mount-option.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series index 14af33a..a142b09 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series @@ -23,7 +23,6 @@ rhel6.3/ext4-pdir-fix.patch rhel6.3/ext4-add-more-error-checks-to-ext4_mkdir.patch rhel6.3/ext4-osd-iop-common.patch rhel6.3/ext4-osd-iam-exports.patch -rhel6.3/ext4-dynlocks-common.patch rhel6.3/ext4-hash-indexed-dir-dotdot-update.patch rhel6.3/ext4-kill-dx_root.patch rhel6.3/ext4-extents-mount-option.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series index 97ba519..40bbf41 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles11.series @@ -24,7 +24,6 @@ rhel6.3/ext4-pdir-fix.patch rhel6.3/ext4-add-more-error-checks-to-ext4_mkdir.patch rhel6.3/ext4-osd-iop-common.patch rhel6.3/ext4-osd-iam-exports.patch -rhel6.3/ext4-dynlocks-common.patch rhel6.3/ext4-hash-indexed-dir-dotdot-update.patch rhel6.3/ext4-kill-dx_root.patch rhel6.3/ext4-extents-mount-option.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series index 179edb7..35d04d2 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11.series @@ -27,7 +27,6 @@ rhel6.3/ext4-alloc-policy-2.6.patch sles11sp2/ext4-force_over_128tb.patch rhel6.3/ext4-pdir-fix.patch sles11sp2/ext4-osd-iop-common.patch -sles11sp2/ext4-dynlocks-common.patch rhel6.3/ext4-osd-iam-exports.patch rhel6.3/ext4-hash-indexed-dir-dotdot-update.patch sles11sp2/ext4-kill-dx_root.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series index 0c62588..b4909a7 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-3.0-sles11sp3.series @@ -26,7 +26,6 @@ rhel6.3/ext4-alloc-policy-2.6.patch sles11sp2/ext4-force_over_128tb.patch rhel6.3/ext4-pdir-fix.patch sles11sp2/ext4-osd-iop-common.patch -sles11sp2/ext4-dynlocks-common.patch rhel6.3/ext4-osd-iam-exports.patch rhel6.3/ext4-hash-indexed-dir-dotdot-update.patch sles11sp2/ext4-kill-dx_root.patch diff --git a/lustre/mdd/Makefile.in b/lustre/mdd/Makefile.in index 5fe2c02..fd3e7e5 100644 --- a/lustre/mdd/Makefile.in +++ b/lustre/mdd/Makefile.in @@ -3,6 +3,4 @@ mdd-objs := mdd_object.o mdd_lov.o mdd_orphans.o mdd_lproc.o mdd_dir.o mdd-objs += mdd_device.o mdd_trans.o mdd_permission.o mdd_lock.o mdd-objs += mdd_compat.o -EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LDISKFS_DIR@ -I@LDISKFS_DIR@/ldiskfs - @INCLUDE_RULES@ diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 8321f36..2919a3b 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -57,25 +57,52 @@ static struct lu_name lname_dotdot = { sizeof(dotdot) - 1 }; -static int __mdd_lookup(const struct lu_env *, struct md_object *, - const struct lu_name *, struct lu_fid*, int); - +/* Get FID from name and parent */ static int -__mdd_lookup_locked(const struct lu_env *env, struct md_object *pobj, - const struct lu_name *lname, struct lu_fid* fid, int mask) +__mdd_lookup(const struct lu_env *env, struct md_object *pobj, + const struct lu_name *lname, struct lu_fid* fid, int mask) { - const char *name = lname->ln_name; - struct mdd_object *mdd_obj = md2mdd_obj(pobj); - struct dynlock_handle *dlh; + const char *name = lname->ln_name; + const struct dt_key *key = (const struct dt_key *)name; + struct mdd_object *mdd_obj = md2mdd_obj(pobj); + struct mdd_device *m = mdo2mdd(pobj); + struct dt_object *dir = mdd_object_child(mdd_obj); int rc; + ENTRY; - dlh = mdd_pdo_read_lock(env, mdd_obj, name, MOR_TGT_PARENT); - if (unlikely(dlh == NULL)) - return -ENOMEM; - rc = __mdd_lookup(env, pobj, lname, fid, mask); - mdd_pdo_read_unlock(env, mdd_obj, dlh); + if (unlikely(mdd_is_dead_obj(mdd_obj))) + RETURN(-ESTALE); - return rc; + if (mdd_object_remote(mdd_obj)) { + CDEBUG(D_INFO, "%s: Object "DFID" locates on remote server\n", + mdd2obd_dev(m)->obd_name, PFID(mdo2fid(mdd_obj))); + } else if (!mdd_object_exists(mdd_obj)) { + RETURN(-ESTALE); + } + + /* The common filename length check. */ + if (unlikely(lname->ln_namelen > m->mdd_dt_conf.ddp_max_name_len)) + RETURN(-ENAMETOOLONG); + + rc = mdd_permission_internal_locked(env, mdd_obj, NULL, mask, + MOR_TGT_PARENT); + if (rc) + RETURN(rc); + + if (likely(S_ISDIR(mdd_object_type(mdd_obj)) && + dt_try_as_dir(env, dir))) { + + rc = dir->do_index_ops->dio_lookup(env, dir, + (struct dt_rec *)fid, key, + mdd_object_capa(env, mdd_obj)); + if (rc > 0) + rc = 0; + else if (rc == 0) + rc = -ENOENT; + } else + rc = -ENOTDIR; + + RETURN(rc); } int mdd_lookup(const struct lu_env *env, @@ -84,14 +111,14 @@ int mdd_lookup(const struct lu_env *env, { int rc; ENTRY; - rc = __mdd_lookup_locked(env, pobj, lname, fid, MAY_EXEC); + rc = __mdd_lookup(env, pobj, lname, fid, MAY_EXEC); RETURN(rc); } int mdd_parent_fid(const struct lu_env *env, struct mdd_object *obj, struct lu_fid *fid) { - return __mdd_lookup_locked(env, &obj->mod_obj, &lname_dotdot, fid, 0); + return __mdd_lookup(env, &obj->mod_obj, &lname_dotdot, fid, 0); } /* @@ -1183,7 +1210,6 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, struct mdd_object *mdd_tobj = md2mdd_obj(tgt_obj); struct mdd_object *mdd_sobj = md2mdd_obj(src_obj); struct mdd_device *mdd = mdo2mdd(src_obj); - struct dynlock_handle *dlh; struct thandle *handle; struct linkea_data *ldata = &mdd_env_info(env)->mti_link_data; int rc; @@ -1207,11 +1233,7 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, if (rc) GOTO(stop, rc); - dlh = mdd_pdo_write_lock(env, mdd_tobj, name, MOR_TGT_CHILD); - if (dlh == NULL) - GOTO(out_trans, rc = -ENOMEM); mdd_write_lock(env, mdd_sobj, MOR_TGT_CHILD); - rc = mdd_link_sanity_check(env, mdd_tobj, lname, mdd_sobj); if (rc) GOTO(out_unlock, rc); @@ -1250,8 +1272,6 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, EXIT; out_unlock: mdd_write_unlock(env, mdd_sobj); - mdd_pdo_write_unlock(env, mdd_tobj, dlh); -out_trans: if (rc == 0) rc = mdd_changelog_ns_store(env, mdd, CL_HARDLINK, 0, mdd_sobj, mdd_tobj, lname, handle); @@ -1437,7 +1457,6 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, struct mdd_object *mdd_pobj = md2mdd_obj(pobj); struct mdd_object *mdd_cobj = NULL; struct mdd_device *mdd = mdo2mdd(pobj); - struct dynlock_handle *dlh; struct thandle *handle; int rc, is_dir = 0; ENTRY; @@ -1465,10 +1484,6 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, if (rc) GOTO(stop, rc); - dlh = mdd_pdo_write_lock(env, mdd_pobj, name, MOR_TGT_PARENT); - if (dlh == NULL) - GOTO(stop, rc = -ENOMEM); - if (likely(mdd_cobj != NULL)) { mdd_write_lock(env, mdd_cobj, MOR_TGT_CHILD); @@ -1523,10 +1538,8 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, GOTO(cleanup, rc); /* Enough for only unlink the entry */ - if (unlikely(mdd_cobj == NULL)) { - mdd_pdo_write_unlock(env, mdd_pobj, dlh); + if (unlikely(mdd_cobj == NULL)) GOTO(stop, rc); - } if (cattr->la_nlink > 0 || mdd_cobj->mod_count > 0) { /* update ctime of an unlinked file only if it is still @@ -1559,7 +1572,6 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, EXIT; cleanup: mdd_write_unlock(env, mdd_cobj); - mdd_pdo_write_unlock(env, mdd_pobj, dlh); if (rc == 0) { int cl_flags = 0; @@ -1677,54 +1689,6 @@ out_free: RETURN(rc); } -/* Get fid from name and parent */ -static int -__mdd_lookup(const struct lu_env *env, struct md_object *pobj, - const struct lu_name *lname, struct lu_fid* fid, int mask) -{ - const char *name = lname->ln_name; - const struct dt_key *key = (const struct dt_key *)name; - struct mdd_object *mdd_obj = md2mdd_obj(pobj); - struct mdd_device *m = mdo2mdd(pobj); - struct dt_object *dir = mdd_object_child(mdd_obj); - int rc; - ENTRY; - - if (unlikely(mdd_is_dead_obj(mdd_obj))) - RETURN(-ESTALE); - - if (mdd_object_remote(mdd_obj)) { - CDEBUG(D_INFO, "%s: Object "DFID" locates on remote server\n", - mdd2obd_dev(m)->obd_name, PFID(mdo2fid(mdd_obj))); - } else if (!mdd_object_exists(mdd_obj)) { - RETURN(-ESTALE); - } - - /* The common filename length check. */ - if (unlikely(lname->ln_namelen > m->mdd_dt_conf.ddp_max_name_len)) - RETURN(-ENAMETOOLONG); - - rc = mdd_permission_internal_locked(env, mdd_obj, NULL, mask, - MOR_TGT_PARENT); - if (rc) - RETURN(rc); - - if (likely(S_ISDIR(mdd_object_type(mdd_obj)) && - dt_try_as_dir(env, dir))) { - - rc = dir->do_index_ops->dio_lookup(env, dir, - (struct dt_rec *)fid, key, - mdd_object_capa(env, mdd_obj)); - if (rc > 0) - rc = 0; - else if (rc == 0) - rc = -ENOENT; - } else - rc = -ENOTDIR; - - RETURN(rc); -} - static int mdd_declare_object_initialize(const struct lu_env *env, struct mdd_object *parent, struct mdd_object *child, @@ -1831,8 +1795,7 @@ static int mdd_create_sanity_check(const struct lu_env *env, * _index_insert also, for avoiding rolling back if exists * _index_insert. */ - rc = __mdd_lookup_locked(env, pobj, lname, fid, - MAY_WRITE | MAY_EXEC); + rc = __mdd_lookup(env, pobj, lname, fid, MAY_WRITE | MAY_EXEC); if (rc != -ENOENT) RETURN(rc ? : -EEXIST); } else { @@ -2030,7 +1993,6 @@ static int mdd_create(const struct lu_env *env, struct md_object *pobj, struct lu_buf acl_buf; struct lu_buf def_acl_buf; struct linkea_data *ldata = &info->mti_link_data; - struct dynlock_handle *dlh; const char *name = lname->ln_name; int rc, created = 0, initialized = 0, inserted = 0; ENTRY; @@ -2110,10 +2072,6 @@ static int mdd_create(const struct lu_env *env, struct md_object *pobj, if (rc) GOTO(out_stop, rc); - dlh = mdd_pdo_write_lock(env, mdd_pobj, name, MOR_TGT_PARENT); - if (dlh == NULL) - GOTO(out_trans, rc = -ENOMEM); - mdd_write_lock(env, son, MOR_TGT_CHILD); rc = mdd_object_create_internal(env, NULL, son, attr, handle, spec); if (rc) { @@ -2257,8 +2215,6 @@ cleanup: mdd_write_unlock(env, son); } - mdd_pdo_write_unlock(env, mdd_pobj, dlh); -out_trans: if (rc == 0 && fid_is_namespace_visible(mdo2fid(son))) rc = mdd_changelog_ns_store(env, mdd, S_ISDIR(attr->la_mode) ? CL_MKDIR : @@ -2502,7 +2458,6 @@ static int mdd_rename(const struct lu_env *env, struct mdd_device *mdd = mdo2mdd(src_pobj); struct mdd_object *mdd_sobj = NULL; /* source object */ struct mdd_object *mdd_tobj = NULL; - struct dynlock_handle *sdlh = NULL, *tdlh = NULL; struct thandle *handle; struct linkea_data *ldata = &mdd_env_info(env)->mti_link_data; const struct lu_fid *tpobj_fid = mdo2fid(mdd_tpobj); @@ -2557,26 +2512,6 @@ static int mdd_rename(const struct lu_env *env, if (rc < 0) GOTO(cleanup_unlocked, rc); - /* Get locks in determined order */ - if (rc == MDD_RN_SAME) { - sdlh = mdd_pdo_write_lock(env, mdd_spobj, - sname, MOR_SRC_PARENT); - /* check hashes to determine do we need one lock or two */ - if (mdd_name2hash(sname) != mdd_name2hash(tname)) - tdlh = mdd_pdo_write_lock(env, mdd_tpobj, tname, - MOR_TGT_PARENT); - else - tdlh = sdlh; - } else if (rc == MDD_RN_SRCTGT) { - sdlh = mdd_pdo_write_lock(env, mdd_spobj, sname,MOR_SRC_PARENT); - tdlh = mdd_pdo_write_lock(env, mdd_tpobj, tname,MOR_TGT_PARENT); - } else { - tdlh = mdd_pdo_write_lock(env, mdd_tpobj, tname,MOR_SRC_PARENT); - sdlh = mdd_pdo_write_lock(env, mdd_spobj, sname,MOR_TGT_PARENT); - } - if (sdlh == NULL || tdlh == NULL) - GOTO(cleanup, rc = -ENOMEM); - is_dir = S_ISDIR(so_attr->la_mode); /* Remove source name from source directory */ @@ -2785,10 +2720,6 @@ fixup_spobj2: cleanup: if (tobj_locked) mdd_write_unlock(env, mdd_tobj); - if (likely(tdlh) && sdlh != tdlh) - mdd_pdo_write_unlock(env, mdd_tpobj, tdlh); - if (likely(sdlh)) - mdd_pdo_write_unlock(env, mdd_spobj, sdlh); cleanup_unlocked: if (rc == 0) rc = mdd_changelog_ext_ns_store(env, mdd, CL_RENAME, cl_flags, diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index b3a4bc3..4f5d0a6 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -52,10 +52,6 @@ #include #include -/* PDO lock is unnecessary for current MDT stack because operations - * are already protected by ldlm lock */ -#define MDD_DISABLE_PDO_LOCK 1 - /* Changelog flags */ /** changelog is recording */ #define CLM_ON 0x00001 @@ -125,9 +121,6 @@ struct mdd_object { __u32 mod_valid; __u64 mod_cltime; unsigned long mod_flags; -#ifndef MDD_DISABLE_PDO_LOCK - struct dynlock mod_pdlock; -#endif #ifdef CONFIG_LOCKDEP /* "dep_map" name is assumed by lockdep.h macros. */ struct lockdep_map dep_map; @@ -256,16 +249,11 @@ void mdd_write_unlock(const struct lu_env *env, struct mdd_object *obj); void mdd_read_unlock(const struct lu_env *env, struct mdd_object *obj); int mdd_write_locked(const struct lu_env *env, struct mdd_object *obj); -void mdd_pdlock_init(struct mdd_object *obj); unsigned long mdd_name2hash(const char *name); void *mdd_pdo_write_lock(const struct lu_env *env, struct mdd_object *obj, const char *name, enum mdd_object_role role); -void *mdd_pdo_read_lock(const struct lu_env *env, struct mdd_object *obj, - const char *name, enum mdd_object_role role); void mdd_pdo_write_unlock(const struct lu_env *env, struct mdd_object *obj, void *dlh); -void mdd_pdo_read_unlock(const struct lu_env *env, struct mdd_object *obj, - void *dlh); /* mdd_dir.c */ int mdd_parent_fid(const struct lu_env *env, struct mdd_object *obj, struct lu_fid *fid); diff --git a/lustre/mdd/mdd_lock.c b/lustre/mdd/mdd_lock.c index d9921af..2d33a16 100644 --- a/lustre/mdd/mdd_lock.c +++ b/lustre/mdd/mdd_lock.c @@ -87,116 +87,3 @@ unsigned long mdd_name2hash(const char *name) { return full_name_hash((unsigned char*)name, strlen(name)); } - -/* Methods for parallel directory locking */ -#if MDD_DISABLE_PDO_LOCK - -static void *pdo_handle = (void *)0xbabecafe; - -void mdd_pdlock_init(struct mdd_object *obj) -{ -} - -void *mdd_pdo_write_lock(const struct lu_env *env, struct mdd_object *obj, - const char *name, enum mdd_object_role role) -{ - return pdo_handle; -} - -void *mdd_pdo_read_lock(const struct lu_env *env, struct mdd_object *obj, - const char *name, enum mdd_object_role role) -{ - return pdo_handle; -} - -void mdd_pdo_write_unlock(const struct lu_env *env, struct mdd_object *obj, - void *dlh) -{ - LASSERT(dlh == pdo_handle); -} - -void mdd_pdo_read_unlock(const struct lu_env *env, struct mdd_object *obj, - void *dlh) -{ - LASSERT(dlh == pdo_handle); -} - -#else /* !MDD_DISABLE_PDO_LOCK */ - -#ifdef CONFIG_LOCKDEP -static struct lock_class_key mdd_pdirop_key; - -#define RETIP ((unsigned long)__builtin_return_address(0)) - -static void mdd_lockdep_init(struct mdd_object *obj) -{ - lockdep_set_class_and_name(obj, &mdd_pdirop_key, "pdir"); -} - -static void mdd_lockdep_pd_acquire(struct mdd_object *obj, - enum mdd_object_role role) -{ - lock_map_acquire(&obj->dep_map); -} - -static void mdd_lockdep_pd_release(struct mdd_object *obj) -{ - lock_map_release(&obj->dep_map); -} - -#else /* !CONFIG_LOCKDEP */ - -static void mdd_lockdep_init(struct mdd_object *obj) -{} -static void mdd_lockdep_pd_acquire(struct mdd_object *obj, - enum mdd_object_role role) -{} -static void mdd_lockdep_pd_release(struct mdd_object *obj) -{} - -#endif /* !CONFIG_LOCKDEP */ - -void mdd_pdlock_init(struct mdd_object *obj) -{ - dynlock_init(&obj->mod_pdlock); - mdd_lockdep_init(obj); -} - -void *mdd_pdo_write_lock(const struct lu_env *env, struct mdd_object *obj, - const char *name, enum mdd_object_role role) -{ - struct dynlock_handle *handle; - unsigned long value = mdd_name2hash(name); - - handle = dynlock_lock(&obj->mod_pdlock, value, DLT_WRITE, GFP_NOFS); - if (handle != NULL) - mdd_lockdep_pd_acquire(obj, role); - return handle; -} - -void *mdd_pdo_read_lock(const struct lu_env *env, struct mdd_object *obj, - const char *name, enum mdd_object_role role) -{ - struct dynlock_handle *handle; - unsigned long value = mdd_name2hash(name); - handle = dynlock_lock(&obj->mod_pdlock, value, DLT_READ, GFP_NOFS); - if (handle != NULL) - mdd_lockdep_pd_acquire(obj, role); - return handle; -} - -void mdd_pdo_write_unlock(const struct lu_env *env, struct mdd_object *obj, - void *dlh) -{ - mdd_lockdep_pd_release(obj); - return dynlock_unlock(&obj->mod_pdlock, dlh); -} - -void mdd_pdo_read_unlock(const struct lu_env *env, struct mdd_object *obj, - void *dlh) -{ - mdd_lockdep_pd_release(obj); - return dynlock_unlock(&obj->mod_pdlock, dlh); -} - -#endif /* MDD_DISABLE_PDO_LOCK */ diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index ea03362..2ed976d 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -173,7 +173,6 @@ static int mdd_object_init(const struct lu_env *env, struct lu_object *o, mdd_obj->mod_cltime = 0; under = &d->mdd_child->dd_lu_dev; below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under); - mdd_pdlock_init(mdd_obj); if (IS_ERR(below)) RETURN(PTR_ERR(below)); diff --git a/lustre/osd-ldiskfs/Makefile.in b/lustre/osd-ldiskfs/Makefile.in index dbe23b3..a7ffd31 100644 --- a/lustre/osd-ldiskfs/Makefile.in +++ b/lustre/osd-ldiskfs/Makefile.in @@ -1,7 +1,7 @@ MODULES := osd_ldiskfs -osd_ldiskfs-objs := osd_handler.o osd_oi.o osd_lproc.o osd_iam.o \ - osd_iam_lfix.o osd_iam_lvar.o osd_io.o osd_compat.o \ - osd_scrub.o osd_quota.o osd_quota_fmt.o +osd_ldiskfs-objs = osd_handler.o osd_oi.o osd_lproc.o osd_iam.o \ + osd_iam_lfix.o osd_iam_lvar.o osd_io.o osd_compat.o \ + osd_scrub.o osd_dynlocks.o osd_quota.o osd_quota_fmt.o EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@abs_top_builddir@ -I@abs_top_builddir@/ldiskfs diff --git a/lustre/osd-ldiskfs/autoMakefile.am b/lustre/osd-ldiskfs/autoMakefile.am index 5bfd6ff..89a20c1 100644 --- a/lustre/osd-ldiskfs/autoMakefile.am +++ b/lustre/osd-ldiskfs/autoMakefile.am @@ -44,4 +44,4 @@ endif MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ EXTRA_DIST := $(osd_ldiskfs-objs:%.o=%.c) osd_internal.h osd_oi.h \ - osd_iam.h osd_scrub.h osd_quota_fmt.h + osd_iam.h osd_dynlocks.h osd_scrub.h osd_quota_fmt.h diff --git a/lustre/osd-ldiskfs/osd_dynlocks.c b/lustre/osd-ldiskfs/osd_dynlocks.c new file mode 100644 index 0000000..0e01a67 --- /dev/null +++ b/lustre/osd-ldiskfs/osd_dynlocks.c @@ -0,0 +1,222 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + * + * Dynamic Locks + * + * struct dynlock is lockspace + * one may request lock (exclusive or shared) for some value + * in that lockspace + * + */ +#ifdef __KERNEL__ + +#include +#include +#include + +#include + +#include +#include "osd_dynlocks.h" + +extern struct kmem_cache *dynlock_cachep; + +#define DYNLOCK_HANDLE_MAGIC 0xd19a10c +#define DYNLOCK_HANDLE_DEAD 0xd1956ee +#define DYNLOCK_LIST_MAGIC 0x11ee91e6 + +/* + * dynlock_init + * + * initialize lockspace + * + */ +void dynlock_init(struct dynlock *dl) +{ + spin_lock_init(&dl->dl_list_lock); + INIT_LIST_HEAD(&dl->dl_list); + dl->dl_magic = DYNLOCK_LIST_MAGIC; +} + +/* + * dynlock_lock + * + * acquires lock (exclusive or shared) in specified lockspace + * each lock in lockspace is allocated separately, so user have + * to specify GFP flags. + * routine returns pointer to lock. this pointer is intended to + * be passed to dynlock_unlock + * + */ +struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, + enum dynlock_type lt, gfp_t gfp) +{ + struct dynlock_handle *nhl = NULL; + struct dynlock_handle *hl; + + BUG_ON(dl == NULL); + BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); + +repeat: + /* find requested lock in lockspace */ + spin_lock(&dl->dl_list_lock); + BUG_ON(dl->dl_list.next == NULL); + BUG_ON(dl->dl_list.prev == NULL); + list_for_each_entry(hl, &dl->dl_list, dh_list) { + BUG_ON(hl->dh_list.next == NULL); + BUG_ON(hl->dh_list.prev == NULL); + BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); + if (hl->dh_value == value) { + /* lock is found */ + if (nhl) { + /* someone else just allocated + * lock we didn't find and just created + * so, we drop our lock + */ + OBD_SLAB_FREE(nhl, dynlock_cachep, sizeof(*nhl)); + } + hl->dh_refcount++; + goto found; + } + } + /* lock not found */ + if (nhl) { + /* we already have allocated lock. use it */ + hl = nhl; + nhl = NULL; + list_add(&hl->dh_list, &dl->dl_list); + goto found; + } + spin_unlock(&dl->dl_list_lock); + + /* lock not found and we haven't allocated lock yet. allocate it */ + OBD_SLAB_ALLOC_GFP(nhl, dynlock_cachep, sizeof(*nhl), gfp); + if (nhl == NULL) + return NULL; + nhl->dh_refcount = 1; + nhl->dh_value = value; + nhl->dh_readers = 0; + nhl->dh_writers = 0; + nhl->dh_magic = DYNLOCK_HANDLE_MAGIC; + init_waitqueue_head(&nhl->dh_wait); + + /* while lock is being allocated, someone else may allocate it + * and put onto to list. check this situation + */ + goto repeat; + +found: + if (lt == DLT_WRITE) { + /* exclusive lock: user don't want to share lock at all + * NOTE: one process may take the same lock several times + * this functionaly is useful for rename operations */ + while ((hl->dh_writers && hl->dh_pid != current->pid) || + hl->dh_readers) { + spin_unlock(&dl->dl_list_lock); + wait_event(hl->dh_wait, + hl->dh_writers == 0 && hl->dh_readers == 0); + spin_lock(&dl->dl_list_lock); + } + hl->dh_writers++; + } else { + /* shared lock: user do not want to share lock with writer */ + while (hl->dh_writers) { + spin_unlock(&dl->dl_list_lock); + wait_event(hl->dh_wait, hl->dh_writers == 0); + spin_lock(&dl->dl_list_lock); + } + hl->dh_readers++; + } + hl->dh_pid = current->pid; + spin_unlock(&dl->dl_list_lock); + + return hl; +} + +/* + * dynlock_unlock + * + * user have to specify lockspace (dl) and pointer to lock structure + * returned by dynlock_lock() + * + */ +void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *hl) +{ + int wakeup = 0; + + BUG_ON(dl == NULL); + BUG_ON(hl == NULL); + BUG_ON(dl->dl_magic != DYNLOCK_LIST_MAGIC); + + if (hl->dh_magic != DYNLOCK_HANDLE_MAGIC) + printk(KERN_EMERG "wrong lock magic: %#x\n", hl->dh_magic); + + BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); + BUG_ON(hl->dh_writers != 0 && current->pid != hl->dh_pid); + + spin_lock(&dl->dl_list_lock); + if (hl->dh_writers) { + BUG_ON(hl->dh_readers != 0); + hl->dh_writers--; + if (hl->dh_writers == 0) + wakeup = 1; + } else if (hl->dh_readers) { + hl->dh_readers--; + if (hl->dh_readers == 0) + wakeup = 1; + } else { + BUG(); + } + if (wakeup) { + hl->dh_pid = 0; + wake_up(&hl->dh_wait); + } + if (--(hl->dh_refcount) == 0) { + hl->dh_magic = DYNLOCK_HANDLE_DEAD; + list_del(&hl->dh_list); + OBD_SLAB_FREE(hl, dynlock_cachep, sizeof(*hl)); + } + spin_unlock(&dl->dl_list_lock); +} + +int dynlock_is_locked(struct dynlock *dl, unsigned long value) +{ + struct dynlock_handle *hl; + int result = 0; + + /* find requested lock in lockspace */ + spin_lock(&dl->dl_list_lock); + BUG_ON(dl->dl_list.next == NULL); + BUG_ON(dl->dl_list.prev == NULL); + list_for_each_entry(hl, &dl->dl_list, dh_list) { + BUG_ON(hl->dh_list.next == NULL); + BUG_ON(hl->dh_list.prev == NULL); + BUG_ON(hl->dh_magic != DYNLOCK_HANDLE_MAGIC); + if (hl->dh_value == value && hl->dh_pid == current->pid) { + /* lock is found */ + result = 1; + break; + } + } + spin_unlock(&dl->dl_list_lock); + return result; +} +#endif diff --git a/lustre/osd-ldiskfs/osd_dynlocks.h b/lustre/osd-ldiskfs/osd_dynlocks.h new file mode 100644 index 0000000..3f42918 --- /dev/null +++ b/lustre/osd-ldiskfs/osd_dynlocks.h @@ -0,0 +1,40 @@ +#ifndef _OSD_DYNLOCKS_H +#define _OSD_DYNLOCKS_H + +#include +#include + +/* + * lock's namespace: + * - list of locks + * - lock to protect this list + */ +struct dynlock { + unsigned dl_magic; + struct list_head dl_list; + spinlock_t dl_list_lock; +}; + +enum dynlock_type { + DLT_WRITE, + DLT_READ +}; + +struct dynlock_handle { + unsigned dh_magic; + struct list_head dh_list; + unsigned long dh_value; /* lock value */ + int dh_refcount; /* number of users */ + int dh_readers; + int dh_writers; + int dh_pid; /* holder of the lock */ + wait_queue_head_t dh_wait; +}; + +void dynlock_init(struct dynlock *dl); +struct dynlock_handle *dynlock_lock(struct dynlock *dl, unsigned long value, + enum dynlock_type lt, gfp_t gfp); +void dynlock_unlock(struct dynlock *dl, struct dynlock_handle *lock); +int dynlock_is_locked(struct dynlock *dl, unsigned long value); + +#endif diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 7a6815a..0add02e 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -64,6 +64,7 @@ #include #include "osd_internal.h" +#include "osd_dynlocks.h" /* llo_* api support */ #include @@ -77,6 +78,20 @@ int ldiskfs_track_declares_assert; CFS_MODULE_PARM(ldiskfs_track_declares_assert, "i", int, 0644, "LBUG during tracking of declares"); +/* Slab to allocate dynlocks */ +struct kmem_cache *dynlock_cachep; + +static struct lu_kmem_descr ldiskfs_caches[] = { + { + .ckd_cache = &dynlock_cachep, + .ckd_name = "dynlock_cache", + .ckd_size = sizeof(struct dynlock_handle) + }, + { + .ckd_cache = NULL + } +}; + static const char dot[] = "."; static const char dotdot[] = ".."; static const char remote_obj_dir[] = "REM_OBJ_DIR"; @@ -5828,16 +5843,26 @@ static struct obd_ops osd_obd_device_ops = { static int __init osd_mod_init(void) { struct lprocfs_static_vars lvars; + int rc; + + osd_oi_mod_init(); + lprocfs_osd_init_vars(&lvars); - osd_oi_mod_init(); - lprocfs_osd_init_vars(&lvars); - return class_register_type(&osd_obd_device_ops, NULL, lvars.module_vars, - LUSTRE_OSD_LDISKFS_NAME, &osd_device_type); + rc = lu_kmem_init(ldiskfs_caches); + if (rc) + return rc; + + rc = class_register_type(&osd_obd_device_ops, NULL, lvars.module_vars, + LUSTRE_OSD_LDISKFS_NAME, &osd_device_type); + if (rc) + lu_kmem_fini(ldiskfs_caches); + return rc; } static void __exit osd_mod_exit(void) { class_unregister_type(LUSTRE_OSD_LDISKFS_NAME); + lu_kmem_fini(ldiskfs_caches); } MODULE_AUTHOR("Sun Microsystems, Inc. "); diff --git a/lustre/osd-ldiskfs/osd_iam.h b/lustre/osd-ldiskfs/osd_iam.h index a782769..9c03e08 100644 --- a/lustre/osd-ldiskfs/osd_iam.h +++ b/lustre/osd-ldiskfs/osd_iam.h @@ -45,9 +45,10 @@ #include #include -#include + +#include "osd_dynlocks.h" /* - * linux/include/linux/osd_iam.h + * osd_iam.h */ #ifndef CLASSERT #define CLASSERT(cond) do {switch(42) {case (cond): case 0: break;}} while (0) -- 1.8.3.1