--- /dev/null
+ext4: parallel directory locking for htree
+
+Single directory performance is a critical for HPC workloads. In a
+typical use case an application creates a separate output file for
+each node and task in a job. As nodes and tasks increase, hundreds
+of thousands of files may be created in a single directory within
+a short window of time.
+
+Today, both filename lookup and file system modifying operations
+such as create and unlink) are protected with a single lock for an
+entire ext4 directory. Parallel Directory Operations will remove this
+bottleneck by introducing a parallel locking mechanism within one
+ext4 directory. This work will enable multiple application threads
+to simultaneously lookup, create and unlink in parallel.
+
+This patch contains the parallel htree locking for ext4 directories.
+The idea is simple, like how pdirop is implement at LDLM level.
+
+ - implement advanced lock with multiple modes (CR, PR, CW, PW, EX)
+ (see https://en.wikipedia.org/wiki/Distributed_lock_manager for details)
+ - it's just a resource lock that resides in above the ext4 directory, which
+ replaces current monolithic semaphore locking the whole directory
+ - protect sub-resources by locking N hash keys when holding CR & CW
+ * for Concurrent Read (CR), sub locking is Protected Read (PR)
+ * for Concurrent Write (CW), sub locking is Protected Write (PW)
+ - for !is_dx(dir):
+ * change operations always hold Exclusive (EX) lock on
+ dir, lookup & readdir hold PR lock on dir
+ - for is_dx(dir):
+ * change operations take CW lock, then take two sub-locks:
+ + name-hash as the first key, + block number as the second key
+ * lookup take CR lock, then take two sub-locks:
+ + name-hash as the first key, + block number as the second key
+ * readdir take PR lock:
+ + for Lustre it's not necessary to take PR lock, but we already have
+ PR lock in ldlm level, and this is needed for non-Lustre usage
+ - if we need to split any blocks (name entries block or indexing block)
+ while holding CW lock, we drop CW lock and take EX lock and retry.
+ - disable pdirops by always EX lock on change and PR lock on lookup/readdir.
+
+Lustre-bug-id: LU-50" target="_blank">https://jira.whamcloud.com/browse/LU-50
+Lustre-change: http://review.whamcloud.com/375
+Signed-off-by: Liang Zhen <liang@whamcloud.com>
+
+
+Index: linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
+===================================================================
+--- /dev/null
++++ linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
+@@ -0,0 +1,187 @@
++/*
++ * include/linux/htree_lock.h
++ *
++ * Copyright (c) 2011, 2012, Intel Corporation.
++ *
++ * Author: Liang Zhen <liang@whamcloud.com>
++ */
++
++/*
++ * htree lock
++ *
++ * htree_lock is an advanced lock, it can support five lock modes (concept is
++ * taken from DLM) and it's a sleeping lock.
++ *
++ * most common use case is:
++ * - create a htree_lock_head for data
++ * - each thread (contender) creates it's own htree_lock
++ * - contender needs to call htree_lock(lock_node, mode) to protect data and
++ * call htree_unlock to release lock
++ *
++ * Also, there is advanced use-case which is more complex, user can have
++ * PW/PR lock on particular key, it's mostly used while user holding shared
++ * lock on the htree (CW, CR)
++ *
++ * htree_lock(lock_node, HTREE_LOCK_CR); lock the htree with CR
++ * htree_node_lock(lock_node, HTREE_LOCK_PR, key...); lock @key with PR
++ * ...
++ * htree_node_unlock(lock_node);; unlock the key
++ *
++ * Another tip is, we can have N-levels of this kind of keys, all we need to
++ * do is specifying N-levels while creating htree_lock_head, then we can
++ * lock/unlock a specific level by:
++ * htree_node_lock(lock_node, mode1, key1, level1...);
++ * do something;
++ * htree_node_lock(lock_node, mode1, key2, level2...);
++ * do something;
++ * htree_node_unlock(lock_node, level2);
++ * htree_node_unlock(lock_node, level1);
++ *
++ * NB: for multi-level, should be careful about locking order to avoid deadlock
++ */
++
++#ifndef _LINUX_HTREE_LOCK_H
++#define _LINUX_HTREE_LOCK_H
++
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/sched.h>
++
++/*
++ * Lock Modes
++ * more details can be found here:
++ * http://en.wikipedia.org/wiki/Distributed_lock_manager
++ */
++typedef enum {
++ HTREE_LOCK_EX = 0, /* exclusive lock: incompatible with all others */
++ HTREE_LOCK_PW, /* protected write: allows only CR users */
++ HTREE_LOCK_PR, /* protected read: allow PR, CR users */
++ HTREE_LOCK_CW, /* concurrent write: allow CR, CW users */
++ HTREE_LOCK_CR, /* concurrent read: allow all but EX users */
++ HTREE_LOCK_MAX, /* number of lock modes */
++} htree_lock_mode_t;
++
++#define HTREE_LOCK_NL HTREE_LOCK_MAX
++#define HTREE_LOCK_INVAL 0xdead10c
++
++enum {
++ HTREE_HBITS_MIN = 2,
++ HTREE_HBITS_DEF = 14,
++ HTREE_HBITS_MAX = 32,
++};
++
++enum {
++ HTREE_EVENT_DISABLE = (0),
++ HTREE_EVENT_RD = (1 << HTREE_LOCK_PR),
++ HTREE_EVENT_WR = (1 << HTREE_LOCK_PW),
++ HTREE_EVENT_RDWR = (HTREE_EVENT_RD | HTREE_EVENT_WR),
++};
++
++struct htree_lock;
++
++typedef void (*htree_event_cb_t)(void *target, void *event);
++
++struct htree_lock_child {
++ struct list_head lc_list; /* granted list */
++ htree_event_cb_t lc_callback; /* event callback */
++ unsigned lc_events; /* event types */
++};
++
++struct htree_lock_head {
++ unsigned long lh_lock; /* bits lock */
++ /* blocked lock list (htree_lock) */
++ struct list_head lh_blocked_list;
++ /* # key levels */
++ u16 lh_depth;
++ /* hash bits for key and limit number of locks */
++ u16 lh_hbits;
++ /* counters for blocked locks */
++ u16 lh_nblocked[HTREE_LOCK_MAX];
++ /* counters for granted locks */
++ u16 lh_ngranted[HTREE_LOCK_MAX];
++ /* private data */
++ void *lh_private;
++ /* array of children locks */
++ struct htree_lock_child lh_children[0];
++};
++
++/* htree_lock_node_t is child-lock for a specific key (ln_value) */
++struct htree_lock_node {
++ htree_lock_mode_t ln_mode;
++ /* major hash key */
++ u16 ln_major_key;
++ /* minor hash key */
++ u16 ln_minor_key;
++ struct list_head ln_major_list;
++ struct list_head ln_minor_list;
++ /* alive list, all locks (granted, blocked, listening) are on it */
++ struct list_head ln_alive_list;
++ /* blocked list */
++ struct list_head ln_blocked_list;
++ /* granted list */
++ struct list_head ln_granted_list;
++ void *ln_ev_target;
++};
++
++struct htree_lock {
++ struct task_struct *lk_task;
++ struct htree_lock_head *lk_head;
++ void *lk_private;
++ unsigned lk_depth;
++ htree_lock_mode_t lk_mode;
++ struct list_head lk_blocked_list;
++ struct htree_lock_node lk_nodes[0];
++};
++
++/* create a lock head, which stands for a resource */
++struct htree_lock_head *htree_lock_head_alloc(unsigned depth,
++ unsigned hbits, unsigned priv);
++/* free a lock head */
++void htree_lock_head_free(struct htree_lock_head *lhead);
++/* register event callback for child lock at level @depth */
++void htree_lock_event_attach(struct htree_lock_head *lhead, unsigned depth,
++ unsigned events, htree_event_cb_t callback);
++/* create a lock handle, which stands for a thread */
++struct htree_lock *htree_lock_alloc(unsigned depth, unsigned pbytes);
++/* free a lock handle */
++void htree_lock_free(struct htree_lock *lck);
++/* lock htree, when @wait is true, 0 is returned if the lock can't
++ * be granted immediately */
++int htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
++ htree_lock_mode_t mode, int wait);
++/* unlock htree */
++void htree_unlock(struct htree_lock *lck);
++/* unlock and relock htree with @new_mode */
++int htree_change_lock_try(struct htree_lock *lck,
++ htree_lock_mode_t new_mode, int wait);
++void htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode);
++/* require child lock (key) of htree at level @dep, @event will be sent to all
++ * listeners on this @key while lock being granted */
++int htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
++ u32 key, unsigned dep, int wait, void *event);
++/* release child lock at level @dep, this lock will listen on it's key
++ * if @event isn't NULL, event_cb will be called against @lck while granting
++ * any other lock at level @dep with the same key */
++void htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event);
++/* stop listening on child lock at level @dep */
++void htree_node_stop_listen(struct htree_lock *lck, unsigned dep);
++/* for debug */
++void htree_lock_stat_print(int depth);
++void htree_lock_stat_reset(void);
++
++#define htree_lock(lck, lh, mode) htree_lock_try(lck, lh, mode, 1)
++#define htree_change_lock(lck, mode) htree_change_lock_try(lck, mode, 1)
++
++#define htree_lock_mode(lck) ((lck)->lk_mode)
++
++#define htree_node_lock(lck, mode, key, dep) \
++ htree_node_lock_try(lck, mode, key, dep, 1, NULL)
++/* this is only safe in thread context of lock owner */
++#define htree_node_is_granted(lck, dep) \
++ ((lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_INVAL && \
++ (lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_NL)
++/* this is only safe in thread context of lock owner */
++#define htree_node_is_listening(lck, dep) \
++ ((lck)->lk_nodes[dep].ln_mode == HTREE_LOCK_NL)
++
++#endif
+Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
+===================================================================
+--- /dev/null
++++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
+@@ -0,0 +1,895 @@
++/*
++ * fs/ext4/htree_lock.c
++ *
++ * Copyright (c) 2011, 2012, Intel Corporation.
++ *
++ * Author: Liang Zhen <liang@whamcloud.com>
++ */
++#include <linux/jbd2.h>
++#include <linux/hash.h>
++#include <linux/module.h>
++#include <linux/htree_lock.h>
++
++enum {
++ HTREE_LOCK_BIT_EX = (1 << HTREE_LOCK_EX),
++ HTREE_LOCK_BIT_PW = (1 << HTREE_LOCK_PW),
++ HTREE_LOCK_BIT_PR = (1 << HTREE_LOCK_PR),
++ HTREE_LOCK_BIT_CW = (1 << HTREE_LOCK_CW),
++ HTREE_LOCK_BIT_CR = (1 << HTREE_LOCK_CR),
++};
++
++enum {
++ HTREE_LOCK_COMPAT_EX = 0,
++ HTREE_LOCK_COMPAT_PW = HTREE_LOCK_COMPAT_EX | HTREE_LOCK_BIT_CR,
++ HTREE_LOCK_COMPAT_PR = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_PR,
++ HTREE_LOCK_COMPAT_CW = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_CW,
++ HTREE_LOCK_COMPAT_CR = HTREE_LOCK_COMPAT_CW | HTREE_LOCK_BIT_PR |
++ HTREE_LOCK_BIT_PW,
++};
++
++static int htree_lock_compat[] = {
++ [HTREE_LOCK_EX] HTREE_LOCK_COMPAT_EX,
++ [HTREE_LOCK_PW] HTREE_LOCK_COMPAT_PW,
++ [HTREE_LOCK_PR] HTREE_LOCK_COMPAT_PR,
++ [HTREE_LOCK_CW] HTREE_LOCK_COMPAT_CW,
++ [HTREE_LOCK_CR] HTREE_LOCK_COMPAT_CR,
++};
++
++/* max allowed htree-lock depth.
++ * We only need depth=3 for ext4 although user can have higher value. */
++#define HTREE_LOCK_DEP_MAX 16
++
++#ifdef HTREE_LOCK_DEBUG
++
++static char *hl_name[] = {
++ [HTREE_LOCK_EX] "EX",
++ [HTREE_LOCK_PW] "PW",
++ [HTREE_LOCK_PR] "PR",
++ [HTREE_LOCK_CW] "CW",
++ [HTREE_LOCK_CR] "CR",
++};
++
++/* lock stats */
++struct htree_lock_node_stats {
++ unsigned long long blocked[HTREE_LOCK_MAX];
++ unsigned long long granted[HTREE_LOCK_MAX];
++ unsigned long long retried[HTREE_LOCK_MAX];
++ unsigned long long events;
++};
++
++struct htree_lock_stats {
++ struct htree_lock_node_stats nodes[HTREE_LOCK_DEP_MAX];
++ unsigned long long granted[HTREE_LOCK_MAX];
++ unsigned long long blocked[HTREE_LOCK_MAX];
++};
++
++static struct htree_lock_stats hl_stats;
++
++void htree_lock_stat_reset(void)
++{
++ memset(&hl_stats, 0, sizeof(hl_stats));
++}
++
++void htree_lock_stat_print(int depth)
++{
++ int i;
++ int j;
++
++ printk(KERN_DEBUG "HTREE LOCK STATS:\n");
++ for (i = 0; i < HTREE_LOCK_MAX; i++) {
++ printk(KERN_DEBUG "[%s]: G [%10llu], B [%10llu]\n",
++ hl_name[i], hl_stats.granted[i], hl_stats.blocked[i]);
++ }
++ for (i = 0; i < depth; i++) {
++ printk(KERN_DEBUG "HTREE CHILD [%d] STATS:\n", i);
++ for (j = 0; j < HTREE_LOCK_MAX; j++) {
++ printk(KERN_DEBUG
++ "[%s]: G [%10llu], B [%10llu], R [%10llu]\n",
++ hl_name[j], hl_stats.nodes[i].granted[j],
++ hl_stats.nodes[i].blocked[j],
++ hl_stats.nodes[i].retried[j]);
++ }
++ }
++}
++
++#define lk_grant_inc(m) do { hl_stats.granted[m]++; } while (0)
++#define lk_block_inc(m) do { hl_stats.blocked[m]++; } while (0)
++#define ln_grant_inc(d, m) do { hl_stats.nodes[d].granted[m]++; } while (0)
++#define ln_block_inc(d, m) do { hl_stats.nodes[d].blocked[m]++; } while (0)
++#define ln_retry_inc(d, m) do { hl_stats.nodes[d].retried[m]++; } while (0)
++#define ln_event_inc(d) do { hl_stats.nodes[d].events++; } while (0)
++
++#else /* !DEBUG */
++
++void htree_lock_stat_reset(void) {}
++void htree_lock_stat_print(int depth) {}
++
++#define lk_grant_inc(m) do {} while (0)
++#define lk_block_inc(m) do {} while (0)
++#define ln_grant_inc(d, m) do {} while (0)
++#define ln_block_inc(d, m) do {} while (0)
++#define ln_retry_inc(d, m) do {} while (0)
++#define ln_event_inc(d) do {} while (0)
++
++#endif /* DEBUG */
++
++EXPORT_SYMBOL(htree_lock_stat_reset);
++EXPORT_SYMBOL(htree_lock_stat_print);
++
++#define HTREE_DEP_ROOT (-1)
++
++#define htree_spin_lock(lhead, dep) \
++ bit_spin_lock((dep) + 1, &(lhead)->lh_lock)
++#define htree_spin_unlock(lhead, dep) \
++ bit_spin_unlock((dep) + 1, &(lhead)->lh_lock)
++
++#define htree_key_event_ignore(child, ln) \
++ (!((child)->lc_events & (1 << (ln)->ln_mode)))
++
++static int
++htree_key_list_empty(struct htree_lock_node *ln)
++{
++ return list_empty(&ln->ln_major_list) && list_empty(&ln->ln_minor_list);
++}
++
++static void
++htree_key_list_del_init(struct htree_lock_node *ln)
++{
++ struct htree_lock_node *tmp = NULL;
++
++ if (!list_empty(&ln->ln_minor_list)) {
++ tmp = list_entry(ln->ln_minor_list.next,
++ struct htree_lock_node, ln_minor_list);
++ list_del_init(&ln->ln_minor_list);
++ }
++
++ if (list_empty(&ln->ln_major_list))
++ return;
++
++ if (tmp == NULL) { /* not on minor key list */
++ list_del_init(&ln->ln_major_list);
++ } else {
++ BUG_ON(!list_empty(&tmp->ln_major_list));
++ list_replace_init(&ln->ln_major_list, &tmp->ln_major_list);
++ }
++}
++
++static void
++htree_key_list_replace_init(struct htree_lock_node *old,
++ struct htree_lock_node *new)
++{
++ if (!list_empty(&old->ln_major_list))
++ list_replace_init(&old->ln_major_list, &new->ln_major_list);
++
++ if (!list_empty(&old->ln_minor_list))
++ list_replace_init(&old->ln_minor_list, &new->ln_minor_list);
++}
++
++static void
++htree_key_event_enqueue(struct htree_lock_child *child,
++ struct htree_lock_node *ln, int dep, void *event)
++{
++ struct htree_lock_node *tmp;
++
++ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
++ BUG_ON(ln->ln_mode == HTREE_LOCK_NL);
++ if (event == NULL || htree_key_event_ignore(child, ln))
++ return;
++
++ /* shouldn't be a very long list */
++ list_for_each_entry(tmp, &ln->ln_alive_list, ln_alive_list) {
++ if (tmp->ln_mode == HTREE_LOCK_NL) {
++ ln_event_inc(dep);
++ if (child->lc_callback != NULL)
++ child->lc_callback(tmp->ln_ev_target, event);
++ }
++ }
++}
++
++static int
++htree_node_lock_enqueue(struct htree_lock *newlk, struct htree_lock *curlk,
++ unsigned dep, int wait, void *event)
++{
++ struct htree_lock_child *child = &newlk->lk_head->lh_children[dep];
++ struct htree_lock_node *newln = &newlk->lk_nodes[dep];
++ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
++
++ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
++ /* NB: we only expect PR/PW lock mode at here, only these two modes are
++ * allowed for htree_node_lock(asserted in htree_node_lock_internal),
++ * NL is only used for listener, user can't directly require NL mode */
++ if ((curln->ln_mode == HTREE_LOCK_NL) ||
++ (curln->ln_mode != HTREE_LOCK_PW &&
++ newln->ln_mode != HTREE_LOCK_PW)) {
++ /* no conflict, attach it on granted list of @curlk */
++ if (curln->ln_mode != HTREE_LOCK_NL) {
++ list_add(&newln->ln_granted_list,
++ &curln->ln_granted_list);
++ } else {
++ /* replace key owner */
++ htree_key_list_replace_init(curln, newln);
++ }
++
++ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
++ htree_key_event_enqueue(child, newln, dep, event);
++ ln_grant_inc(dep, newln->ln_mode);
++ return 1; /* still hold lh_lock */
++ }
++
++ if (!wait) { /* can't grant and don't want to wait */
++ ln_retry_inc(dep, newln->ln_mode);
++ newln->ln_mode = HTREE_LOCK_INVAL;
++ return -1; /* don't wait and just return -1 */
++ }
++
++ newlk->lk_task = current;
++ /* conflict, attach it on blocked list of curlk */
++ list_add_tail(&newln->ln_blocked_list, &curln->ln_blocked_list);
++ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
++ ln_block_inc(dep, newln->ln_mode);
++
++retry:
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ htree_spin_unlock(newlk->lk_head, dep);
++ /* wait to be given the lock */
++ if (newlk->lk_task != NULL)
++ schedule();
++ /* granted, no doubt, wake up will set me RUNNING */
++ htree_spin_lock(newlk->lk_head, dep);
++ /* Need to check lock really granted, thread maybe awaken wrongly */
++ if (list_empty(&newln->ln_granted_list) && htree_key_list_empty(newln))
++ goto retry;
++ if (event && !htree_key_event_ignore(child, newln))
++ htree_key_event_enqueue(child, newln, dep, event);
++
++ return 1; /* still hold lh_lock */
++}
++
++/*
++ * get PR/PW access to particular tree-node according to @dep and @key,
++ * it will return -1 if @wait is false and can't immediately grant this lock.
++ * All listeners(HTREE_LOCK_NL) on @dep and with the same @key will get
++ * @event if it's not NULL.
++ * NB: ALWAYS called holding lhead::lh_lock
++ */
++static int
++htree_node_lock_internal(struct htree_lock_head *lhead, struct htree_lock *lck,
++ htree_lock_mode_t mode, u32 key, unsigned dep,
++ int wait, void *event)
++{
++ LIST_HEAD(list);
++ struct htree_lock *tmp;
++ struct htree_lock *tmp2;
++ u16 major;
++ u16 minor;
++ u8 reverse;
++ u8 ma_bits;
++ u8 mi_bits;
++
++ BUG_ON(mode != HTREE_LOCK_PW && mode != HTREE_LOCK_PR);
++ BUG_ON(htree_node_is_granted(lck, dep));
++
++ key = hash_long(key, lhead->lh_hbits);
++
++ mi_bits = lhead->lh_hbits >> 1;
++ ma_bits = lhead->lh_hbits - mi_bits;
++
++ lck->lk_nodes[dep].ln_major_key = major = key & ((1U << ma_bits) - 1);
++ lck->lk_nodes[dep].ln_minor_key = minor = key >> ma_bits;
++ lck->lk_nodes[dep].ln_mode = mode;
++
++ /*
++ * The major key list is an ordered list, so searches are started
++ * at the end of the list that is numerically closer to major_key,
++ * so at most half of the list will be walked (for well-distributed
++ * keys). The list traversal aborts early if the expected key
++ * location is passed.
++ */
++ reverse = (major >= (1 << (ma_bits - 1)));
++
++ if (reverse) {
++ list_for_each_entry_reverse(tmp,
++ &lhead->lh_children[dep].lc_list,
++ lk_nodes[dep].ln_major_list) {
++ if (tmp->lk_nodes[dep].ln_major_key == major) {
++ goto search_minor;
++
++ } else if (tmp->lk_nodes[dep].ln_major_key < major) {
++ /* attach _after_ @tmp */
++ list_add(&lck->lk_nodes[dep].ln_major_list,
++ &tmp->lk_nodes[dep].ln_major_list);
++ goto out_grant_major;
++ }
++ }
++
++ list_add(&lck->lk_nodes[dep].ln_major_list,
++ &lhead->lh_children[dep].lc_list);
++ goto out_grant_major;
++
++ } else {
++ list_for_each_entry(tmp, &lhead->lh_children[dep].lc_list,
++ lk_nodes[dep].ln_major_list) {
++ if (tmp->lk_nodes[dep].ln_major_key == major) {
++ goto search_minor;
++
++ } else if (tmp->lk_nodes[dep].ln_major_key > major) {
++ /* insert _before_ @tmp */
++ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
++ &tmp->lk_nodes[dep].ln_major_list);
++ goto out_grant_major;
++ }
++ }
++
++ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
++ &lhead->lh_children[dep].lc_list);
++ goto out_grant_major;
++ }
++
++ search_minor:
++ /*
++ * NB: minor_key list doesn't have a "head", @list is just a
++ * temporary stub for helping list searching, make sure it's removed
++ * after searching.
++ * minor_key list is an ordered list too.
++ */
++ list_add_tail(&list, &tmp->lk_nodes[dep].ln_minor_list);
++
++ reverse = (minor >= (1 << (mi_bits - 1)));
++
++ if (reverse) {
++ list_for_each_entry_reverse(tmp2, &list,
++ lk_nodes[dep].ln_minor_list) {
++ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
++ goto out_enqueue;
++
++ } else if (tmp2->lk_nodes[dep].ln_minor_key < minor) {
++ /* attach _after_ @tmp2 */
++ list_add(&lck->lk_nodes[dep].ln_minor_list,
++ &tmp2->lk_nodes[dep].ln_minor_list);
++ goto out_grant_minor;
++ }
++ }
++
++ list_add(&lck->lk_nodes[dep].ln_minor_list, &list);
++
++ } else {
++ list_for_each_entry(tmp2, &list,
++ lk_nodes[dep].ln_minor_list) {
++ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
++ goto out_enqueue;
++
++ } else if (tmp2->lk_nodes[dep].ln_minor_key > minor) {
++ /* insert _before_ @tmp2 */
++ list_add_tail(&lck->lk_nodes[dep].ln_minor_list,
++ &tmp2->lk_nodes[dep].ln_minor_list);
++ goto out_grant_minor;
++ }
++ }
++
++ list_add_tail(&lck->lk_nodes[dep].ln_minor_list, &list);
++ }
++
++ out_grant_minor:
++ if (list.next == &lck->lk_nodes[dep].ln_minor_list) {
++ /* new lock @lck is the first one on minor_key list, which
++ * means it has the smallest minor_key and it should
++ * replace @tmp as minor_key owner */
++ list_replace_init(&tmp->lk_nodes[dep].ln_major_list,
++ &lck->lk_nodes[dep].ln_major_list);
++ }
++ /* remove the temporary head */
++ list_del(&list);
++
++ out_grant_major:
++ ln_grant_inc(dep, lck->lk_nodes[dep].ln_mode);
++ return 1; /* granted with holding lh_lock */
++
++ out_enqueue:
++ list_del(&list); /* remove temprary head */
++ return htree_node_lock_enqueue(lck, tmp2, dep, wait, event);
++}
++
++/*
++ * release the key of @lck at level @dep, and grant any blocked locks.
++ * caller will still listen on @key if @event is not NULL, which means
++ * caller can see a event (by event_cb) while granting any lock with
++ * the same key at level @dep.
++ * NB: ALWAYS called holding lhead::lh_lock
++ * NB: listener will not block anyone because listening mode is HTREE_LOCK_NL
++ */
++static void
++htree_node_unlock_internal(struct htree_lock_head *lhead,
++ struct htree_lock *curlk, unsigned dep, void *event)
++{
++ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
++ struct htree_lock *grtlk = NULL;
++ struct htree_lock_node *grtln;
++ struct htree_lock *poslk;
++ struct htree_lock *tmplk;
++
++ if (!htree_node_is_granted(curlk, dep))
++ return;
++
++ if (!list_empty(&curln->ln_granted_list)) {
++ /* there is another granted lock */
++ grtlk = list_entry(curln->ln_granted_list.next,
++ struct htree_lock,
++ lk_nodes[dep].ln_granted_list);
++ list_del_init(&curln->ln_granted_list);
++ }
++
++ if (grtlk == NULL && !list_empty(&curln->ln_blocked_list)) {
++ /*
++ * @curlk is the only granted lock, so we confirmed:
++ * a) curln is key owner (attached on major/minor_list),
++ * so if there is any blocked lock, it should be attached
++ * on curln->ln_blocked_list
++ * b) we always can grant the first blocked lock
++ */
++ grtlk = list_entry(curln->ln_blocked_list.next,
++ struct htree_lock,
++ lk_nodes[dep].ln_blocked_list);
++ BUG_ON(grtlk->lk_task == NULL);
++ wake_up_process(grtlk->lk_task);
++ }
++
++ if (event != NULL &&
++ lhead->lh_children[dep].lc_events != HTREE_EVENT_DISABLE) {
++ curln->ln_ev_target = event;
++ curln->ln_mode = HTREE_LOCK_NL; /* listen! */
++ } else {
++ curln->ln_mode = HTREE_LOCK_INVAL;
++ }
++
++ if (grtlk == NULL) { /* I must be the only one locking this key */
++ struct htree_lock_node *tmpln;
++
++ BUG_ON(htree_key_list_empty(curln));
++
++ if (curln->ln_mode == HTREE_LOCK_NL) /* listening */
++ return;
++
++ /* not listening */
++ if (list_empty(&curln->ln_alive_list)) { /* no more listener */
++ htree_key_list_del_init(curln);
++ return;
++ }
++
++ tmpln = list_entry(curln->ln_alive_list.next,
++ struct htree_lock_node, ln_alive_list);
++
++ BUG_ON(tmpln->ln_mode != HTREE_LOCK_NL);
++
++ htree_key_list_replace_init(curln, tmpln);
++ list_del_init(&curln->ln_alive_list);
++
++ return;
++ }
++
++ /* have a granted lock */
++ grtln = &grtlk->lk_nodes[dep];
++ if (!list_empty(&curln->ln_blocked_list)) {
++ /* only key owner can be on both lists */
++ BUG_ON(htree_key_list_empty(curln));
++
++ if (list_empty(&grtln->ln_blocked_list)) {
++ list_add(&grtln->ln_blocked_list,
++ &curln->ln_blocked_list);
++ }
++ list_del_init(&curln->ln_blocked_list);
++ }
++ /*
++ * NB: this is the tricky part:
++ * We have only two modes for child-lock (PR and PW), also,
++ * only owner of the key (attached on major/minor_list) can be on
++ * both blocked_list and granted_list, so @grtlk must be one
++ * of these two cases:
++ *
++ * a) @grtlk is taken from granted_list, which means we've granted
++ * more than one lock so @grtlk has to be PR, the first blocked
++ * lock must be PW and we can't grant it at all.
++ * So even @grtlk is not owner of the key (empty blocked_list),
++ * we don't care because we can't grant any lock.
++ * b) we just grant a new lock which is taken from head of blocked
++ * list, and it should be the first granted lock, and it should
++ * be the first one linked on blocked_list.
++ *
++ * Either way, we can get correct result by iterating blocked_list
++ * of @grtlk, and don't have to bother on how to find out
++ * owner of current key.
++ */
++ list_for_each_entry_safe(poslk, tmplk, &grtln->ln_blocked_list,
++ lk_nodes[dep].ln_blocked_list) {
++ if (grtlk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW ||
++ poslk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW)
++ break;
++ /* grant all readers */
++ list_del_init(&poslk->lk_nodes[dep].ln_blocked_list);
++ list_add(&poslk->lk_nodes[dep].ln_granted_list,
++ &grtln->ln_granted_list);
++
++ BUG_ON(poslk->lk_task == NULL);
++ wake_up_process(poslk->lk_task);
++ }
++
++ /* if @curln is the owner of this key, replace it with @grtln */
++ if (!htree_key_list_empty(curln))
++ htree_key_list_replace_init(curln, grtln);
++
++ if (curln->ln_mode == HTREE_LOCK_INVAL)
++ list_del_init(&curln->ln_alive_list);
++}
++
++/*
++ * it's just wrapper of htree_node_lock_internal, it returns 1 on granted
++ * and 0 only if @wait is false and can't grant it immediately
++ */
++int
++htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
++ u32 key, unsigned dep, int wait, void *event)
++{
++ struct htree_lock_head *lhead = lck->lk_head;
++ int rc;
++
++ BUG_ON(dep >= lck->lk_depth);
++ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
++
++ htree_spin_lock(lhead, dep);
++ rc = htree_node_lock_internal(lhead, lck, mode, key, dep, wait, event);
++ if (rc != 0)
++ htree_spin_unlock(lhead, dep);
++ return rc >= 0;
++}
++EXPORT_SYMBOL(htree_node_lock_try);
++
++/* it's wrapper of htree_node_unlock_internal */
++void
++htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event)
++{
++ struct htree_lock_head *lhead = lck->lk_head;
++
++ BUG_ON(dep >= lck->lk_depth);
++ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
++
++ htree_spin_lock(lhead, dep);
++ htree_node_unlock_internal(lhead, lck, dep, event);
++ htree_spin_unlock(lhead, dep);
++}
++EXPORT_SYMBOL(htree_node_unlock);
++
++/* stop listening on child-lock level @dep */
++void
++htree_node_stop_listen(struct htree_lock *lck, unsigned dep)
++{
++ struct htree_lock_node *ln = &lck->lk_nodes[dep];
++ struct htree_lock_node *tmp;
++
++ BUG_ON(htree_node_is_granted(lck, dep));
++ BUG_ON(!list_empty(&ln->ln_blocked_list));
++ BUG_ON(!list_empty(&ln->ln_granted_list));
++
++ if (!htree_node_is_listening(lck, dep))
++ return;
++
++ htree_spin_lock(lck->lk_head, dep);
++ ln->ln_mode = HTREE_LOCK_INVAL;
++ ln->ln_ev_target = NULL;
++
++ if (htree_key_list_empty(ln)) { /* not owner */
++ list_del_init(&ln->ln_alive_list);
++ goto out;
++ }
++
++ /* I'm the owner... */
++ if (list_empty(&ln->ln_alive_list)) { /* no more listener */
++ htree_key_list_del_init(ln);
++ goto out;
++ }
++
++ tmp = list_entry(ln->ln_alive_list.next,
++ struct htree_lock_node, ln_alive_list);
++
++ BUG_ON(tmp->ln_mode != HTREE_LOCK_NL);
++ htree_key_list_replace_init(ln, tmp);
++ list_del_init(&ln->ln_alive_list);
++ out:
++ htree_spin_unlock(lck->lk_head, dep);
++}
++EXPORT_SYMBOL(htree_node_stop_listen);
++
++/* release all child-locks if we have any */
++static void
++htree_node_release_all(struct htree_lock *lck)
++{
++ int i;
++
++ for (i = 0; i < lck->lk_depth; i++) {
++ if (htree_node_is_granted(lck, i))
++ htree_node_unlock(lck, i, NULL);
++ else if (htree_node_is_listening(lck, i))
++ htree_node_stop_listen(lck, i);
++ }
++}
++
++/*
++ * obtain htree lock, it could be blocked inside if there's conflict
++ * with any granted or blocked lock and @wait is true.
++ * NB: ALWAYS called holding lhead::lh_lock
++ */
++static int
++htree_lock_internal(struct htree_lock *lck, int wait)
++{
++ struct htree_lock_head *lhead = lck->lk_head;
++ int granted = 0;
++ int blocked = 0;
++ int i;
++
++ for (i = 0; i < HTREE_LOCK_MAX; i++) {
++ if (lhead->lh_ngranted[i] != 0)
++ granted |= 1 << i;
++ if (lhead->lh_nblocked[i] != 0)
++ blocked |= 1 << i;
++ }
++ if ((htree_lock_compat[lck->lk_mode] & granted) != granted ||
++ (htree_lock_compat[lck->lk_mode] & blocked) != blocked) {
++ /* will block current lock even it just conflicts with any
++ * other blocked lock, so lock like EX wouldn't starve */
++ if (!wait)
++ return -1;
++ lhead->lh_nblocked[lck->lk_mode]++;
++ lk_block_inc(lck->lk_mode);
++
++ lck->lk_task = current;
++ list_add_tail(&lck->lk_blocked_list, &lhead->lh_blocked_list);
++
++retry:
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
++ /* wait to be given the lock */
++ if (lck->lk_task != NULL)
++ schedule();
++ /* granted, no doubt. wake up will set me RUNNING.
++ * Since thread would be waken up accidentally,
++ * so we need check lock whether granted or not again. */
++ if (!list_empty(&lck->lk_blocked_list)) {
++ htree_spin_lock(lhead, HTREE_DEP_ROOT);
++ if (list_empty(&lck->lk_blocked_list)) {
++ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
++ return 0;
++ }
++ goto retry;
++ }
++ return 0; /* without lh_lock */
++ }
++ lhead->lh_ngranted[lck->lk_mode]++;
++ lk_grant_inc(lck->lk_mode);
++ return 1;
++}
++
++/* release htree lock. NB: ALWAYS called holding lhead::lh_lock */
++static void
++htree_unlock_internal(struct htree_lock *lck)
++{
++ struct htree_lock_head *lhead = lck->lk_head;
++ struct htree_lock *tmp;
++ struct htree_lock *tmp2;
++ int granted = 0;
++ int i;
++
++ BUG_ON(lhead->lh_ngranted[lck->lk_mode] == 0);
++
++ lhead->lh_ngranted[lck->lk_mode]--;
++ lck->lk_mode = HTREE_LOCK_INVAL;
++
++ for (i = 0; i < HTREE_LOCK_MAX; i++) {
++ if (lhead->lh_ngranted[i] != 0)
++ granted |= 1 << i;
++ }
++ list_for_each_entry_safe(tmp, tmp2,
++ &lhead->lh_blocked_list, lk_blocked_list) {
++ /* conflict with any granted lock? */
++ if ((htree_lock_compat[tmp->lk_mode] & granted) != granted)
++ break;
++
++ list_del_init(&tmp->lk_blocked_list);
++
++ BUG_ON(lhead->lh_nblocked[tmp->lk_mode] == 0);
++
++ lhead->lh_nblocked[tmp->lk_mode]--;
++ lhead->lh_ngranted[tmp->lk_mode]++;
++ granted |= 1 << tmp->lk_mode;
++
++ BUG_ON(tmp->lk_task == NULL);
++ wake_up_process(tmp->lk_task);
++ }
++}
++
++/* it's wrapper of htree_lock_internal and exported interface.
++ * It always return 1 with granted lock if @wait is true, it can return 0
++ * if @wait is false and locking request can't be granted immediately */
++int
++htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
++ htree_lock_mode_t mode, int wait)
++{
++ int rc;
++
++ BUG_ON(lck->lk_depth > lhead->lh_depth);
++ BUG_ON(lck->lk_head != NULL);
++ BUG_ON(lck->lk_task != NULL);
++
++ lck->lk_head = lhead;
++ lck->lk_mode = mode;
++
++ htree_spin_lock(lhead, HTREE_DEP_ROOT);
++ rc = htree_lock_internal(lck, wait);
++ if (rc != 0)
++ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
++ return rc >= 0;
++}
++EXPORT_SYMBOL(htree_lock_try);
++
++/* it's wrapper of htree_unlock_internal and exported interface.
++ * It will release all htree_node_locks and htree_lock */
++void
++htree_unlock(struct htree_lock *lck)
++{
++ BUG_ON(lck->lk_head == NULL);
++ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
++
++ htree_node_release_all(lck);
++
++ htree_spin_lock(lck->lk_head, HTREE_DEP_ROOT);
++ htree_unlock_internal(lck);
++ htree_spin_unlock(lck->lk_head, HTREE_DEP_ROOT);
++ lck->lk_head = NULL;
++ lck->lk_task = NULL;
++}
++EXPORT_SYMBOL(htree_unlock);
++
++/* change lock mode */
++void
++htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode)
++{
++ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
++ lck->lk_mode = mode;
++}
++EXPORT_SYMBOL(htree_change_mode);
++
++/* release htree lock, and lock it again with new mode.
++ * This function will first release all htree_node_locks and htree_lock,
++ * then try to gain htree_lock with new @mode.
++ * It always return 1 with granted lock if @wait is true, it can return 0
++ * if @wait is false and locking request can't be granted immediately */
++int
++htree_change_lock_try(struct htree_lock *lck, htree_lock_mode_t mode, int wait)
++{
++ struct htree_lock_head *lhead = lck->lk_head;
++ int rc;
++
++ BUG_ON(lhead == NULL);
++ BUG_ON(lck->lk_mode == mode);
++ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL || mode == HTREE_LOCK_INVAL);
++
++ htree_node_release_all(lck);
++
++ htree_spin_lock(lhead, HTREE_DEP_ROOT);
++ htree_unlock_internal(lck);
++ lck->lk_mode = mode;
++ rc = htree_lock_internal(lck, wait);
++ if (rc != 0)
++ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
++ return rc >= 0;
++}
++EXPORT_SYMBOL(htree_change_lock_try);
++
++/* create a htree_lock head with @depth levels (number of child-locks),
++ * it is a per resoruce structure */
++struct htree_lock_head *
++htree_lock_head_alloc(unsigned depth, unsigned hbits, unsigned priv)
++{
++ struct htree_lock_head *lhead;
++ int i;
++
++ if (depth > HTREE_LOCK_DEP_MAX) {
++ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
++ depth, HTREE_LOCK_DEP_MAX);
++ return NULL;
++ }
++
++ lhead = kzalloc(offsetof(struct htree_lock_head,
++ lh_children[depth]) + priv, GFP_NOFS);
++ if (lhead == NULL)
++ return NULL;
++
++ if (hbits < HTREE_HBITS_MIN)
++ hbits = HTREE_HBITS_MIN;
++ else if (hbits > HTREE_HBITS_MAX)
++ hbits = HTREE_HBITS_MAX;
++
++ lhead->lh_hbits = hbits;
++ lhead->lh_lock = 0;
++ lhead->lh_depth = depth;
++ INIT_LIST_HEAD(&lhead->lh_blocked_list);
++ if (priv > 0) {
++ lhead->lh_private = (void *)lhead +
++ offsetof(struct htree_lock_head, lh_children[depth]);
++ }
++
++ for (i = 0; i < depth; i++) {
++ INIT_LIST_HEAD(&lhead->lh_children[i].lc_list);
++ lhead->lh_children[i].lc_events = HTREE_EVENT_DISABLE;
++ }
++ return lhead;
++}
++EXPORT_SYMBOL(htree_lock_head_alloc);
++
++/* free the htree_lock head */
++void
++htree_lock_head_free(struct htree_lock_head *lhead)
++{
++ int i;
++
++ BUG_ON(!list_empty(&lhead->lh_blocked_list));
++ for (i = 0; i < lhead->lh_depth; i++)
++ BUG_ON(!list_empty(&lhead->lh_children[i].lc_list));
++ kfree(lhead);
++}
++EXPORT_SYMBOL(htree_lock_head_free);
++
++/* register event callback for @events of child-lock at level @dep */
++void
++htree_lock_event_attach(struct htree_lock_head *lhead, unsigned dep,
++ unsigned events, htree_event_cb_t callback)
++{
++ BUG_ON(lhead->lh_depth <= dep);
++ lhead->lh_children[dep].lc_events = events;
++ lhead->lh_children[dep].lc_callback = callback;
++}
++EXPORT_SYMBOL(htree_lock_event_attach);
++
++/* allocate a htree_lock, which is per-thread structure, @pbytes is some
++ * extra-bytes as private data for caller */
++struct htree_lock *
++htree_lock_alloc(unsigned depth, unsigned pbytes)
++{
++ struct htree_lock *lck;
++ int i = offsetof(struct htree_lock, lk_nodes[depth]);
++
++ if (depth > HTREE_LOCK_DEP_MAX) {
++ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
++ depth, HTREE_LOCK_DEP_MAX);
++ return NULL;
++ }
++ lck = kzalloc(i + pbytes, GFP_NOFS);
++ if (lck == NULL)
++ return NULL;
++
++ if (pbytes != 0)
++ lck->lk_private = (void *)lck + i;
++ lck->lk_mode = HTREE_LOCK_INVAL;
++ lck->lk_depth = depth;
++ INIT_LIST_HEAD(&lck->lk_blocked_list);
++
++ for (i = 0; i < depth; i++) {
++ struct htree_lock_node *node = &lck->lk_nodes[i];
++
++ node->ln_mode = HTREE_LOCK_INVAL;
++ INIT_LIST_HEAD(&node->ln_major_list);
++ INIT_LIST_HEAD(&node->ln_minor_list);
++ INIT_LIST_HEAD(&node->ln_alive_list);
++ INIT_LIST_HEAD(&node->ln_blocked_list);
++ INIT_LIST_HEAD(&node->ln_granted_list);
++ }
++
++ return lck;
++}
++EXPORT_SYMBOL(htree_lock_alloc);
++
++/* free htree_lock node */
++void
++htree_lock_free(struct htree_lock *lck)
++{
++ BUG_ON(lck->lk_mode != HTREE_LOCK_INVAL);
++ kfree(lck);
++}
++EXPORT_SYMBOL(htree_lock_free);
}
static const unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-diff --git a/fs/ext4/htree_lock.c b/fs/ext4/htree_lock.c
-new file mode 100644
-index 0000000..ee407ed
---- /dev/null
-+++ b/fs/ext4/htree_lock.c
-@@ -0,0 +1,891 @@
-+/*
-+ * fs/ext4/htree_lock.c
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+#include <linux/jbd2.h>
-+#include <linux/hash.h>
-+#include <linux/module.h>
-+#include <linux/htree_lock.h>
-+
-+enum {
-+ HTREE_LOCK_BIT_EX = (1 << HTREE_LOCK_EX),
-+ HTREE_LOCK_BIT_PW = (1 << HTREE_LOCK_PW),
-+ HTREE_LOCK_BIT_PR = (1 << HTREE_LOCK_PR),
-+ HTREE_LOCK_BIT_CW = (1 << HTREE_LOCK_CW),
-+ HTREE_LOCK_BIT_CR = (1 << HTREE_LOCK_CR),
-+};
-+
-+enum {
-+ HTREE_LOCK_COMPAT_EX = 0,
-+ HTREE_LOCK_COMPAT_PW = HTREE_LOCK_COMPAT_EX | HTREE_LOCK_BIT_CR,
-+ HTREE_LOCK_COMPAT_PR = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_PR,
-+ HTREE_LOCK_COMPAT_CW = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_CW,
-+ HTREE_LOCK_COMPAT_CR = HTREE_LOCK_COMPAT_CW | HTREE_LOCK_BIT_PR |
-+ HTREE_LOCK_BIT_PW,
-+};
-+
-+static int htree_lock_compat[] = {
-+ [HTREE_LOCK_EX] HTREE_LOCK_COMPAT_EX,
-+ [HTREE_LOCK_PW] HTREE_LOCK_COMPAT_PW,
-+ [HTREE_LOCK_PR] HTREE_LOCK_COMPAT_PR,
-+ [HTREE_LOCK_CW] HTREE_LOCK_COMPAT_CW,
-+ [HTREE_LOCK_CR] HTREE_LOCK_COMPAT_CR,
-+};
-+
-+/* max allowed htree-lock depth.
-+ * We only need depth=3 for ext4 although user can have higher value. */
-+#define HTREE_LOCK_DEP_MAX 16
-+
-+#ifdef HTREE_LOCK_DEBUG
-+
-+static char *hl_name[] = {
-+ [HTREE_LOCK_EX] "EX",
-+ [HTREE_LOCK_PW] "PW",
-+ [HTREE_LOCK_PR] "PR",
-+ [HTREE_LOCK_CW] "CW",
-+ [HTREE_LOCK_CR] "CR",
-+};
-+
-+/* lock stats */
-+struct htree_lock_node_stats {
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long retried[HTREE_LOCK_MAX];
-+ unsigned long long events;
-+};
-+
-+struct htree_lock_stats {
-+ struct htree_lock_node_stats nodes[HTREE_LOCK_DEP_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+};
-+
-+static struct htree_lock_stats hl_stats;
-+
-+void htree_lock_stat_reset(void)
-+{
-+ memset(&hl_stats, 0, sizeof(hl_stats));
-+}
-+
-+void htree_lock_stat_print(int depth)
-+{
-+ int i;
-+ int j;
-+
-+ printk(KERN_DEBUG "HTREE LOCK STATS:\n");
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ printk(KERN_DEBUG "[%s]: G [%10llu], B [%10llu]\n",
-+ hl_name[i], hl_stats.granted[i], hl_stats.blocked[i]);
-+ }
-+ for (i = 0; i < depth; i++) {
-+ printk(KERN_DEBUG "HTREE CHILD [%d] STATS:\n", i);
-+ for (j = 0; j < HTREE_LOCK_MAX; j++) {
-+ printk(KERN_DEBUG
-+ "[%s]: G [%10llu], B [%10llu], R [%10llu]\n",
-+ hl_name[j], hl_stats.nodes[i].granted[j],
-+ hl_stats.nodes[i].blocked[j],
-+ hl_stats.nodes[i].retried[j]);
-+ }
-+ }
-+}
-+
-+#define lk_grant_inc(m) do { hl_stats.granted[m]++; } while (0)
-+#define lk_block_inc(m) do { hl_stats.blocked[m]++; } while (0)
-+#define ln_grant_inc(d, m) do { hl_stats.nodes[d].granted[m]++; } while (0)
-+#define ln_block_inc(d, m) do { hl_stats.nodes[d].blocked[m]++; } while (0)
-+#define ln_retry_inc(d, m) do { hl_stats.nodes[d].retried[m]++; } while (0)
-+#define ln_event_inc(d) do { hl_stats.nodes[d].events++; } while (0)
-+
-+#else /* !DEBUG */
-+
-+void htree_lock_stat_reset(void) {}
-+void htree_lock_stat_print(int depth) {}
-+
-+#define lk_grant_inc(m) do {} while (0)
-+#define lk_block_inc(m) do {} while (0)
-+#define ln_grant_inc(d, m) do {} while (0)
-+#define ln_block_inc(d, m) do {} while (0)
-+#define ln_retry_inc(d, m) do {} while (0)
-+#define ln_event_inc(d) do {} while (0)
-+
-+#endif /* DEBUG */
-+
-+EXPORT_SYMBOL(htree_lock_stat_reset);
-+EXPORT_SYMBOL(htree_lock_stat_print);
-+
-+#define HTREE_DEP_ROOT (-1)
-+
-+#define htree_spin_lock(lhead, dep) \
-+ bit_spin_lock((dep) + 1, &(lhead)->lh_lock)
-+#define htree_spin_unlock(lhead, dep) \
-+ bit_spin_unlock((dep) + 1, &(lhead)->lh_lock)
-+
-+#define htree_key_event_ignore(child, ln) \
-+ (!((child)->lc_events & (1 << (ln)->ln_mode)))
-+
-+static int
-+htree_key_list_empty(struct htree_lock_node *ln)
-+{
-+ return list_empty(&ln->ln_major_list) && list_empty(&ln->ln_minor_list);
-+}
-+
-+static void
-+htree_key_list_del_init(struct htree_lock_node *ln)
-+{
-+ struct htree_lock_node *tmp = NULL;
-+
-+ if (!list_empty(&ln->ln_minor_list)) {
-+ tmp = list_entry(ln->ln_minor_list.next,
-+ struct htree_lock_node, ln_minor_list);
-+ list_del_init(&ln->ln_minor_list);
-+ }
-+
-+ if (list_empty(&ln->ln_major_list))
-+ return;
-+
-+ if (tmp == NULL) { /* not on minor key list */
-+ list_del_init(&ln->ln_major_list);
-+ } else {
-+ BUG_ON(!list_empty(&tmp->ln_major_list));
-+ list_replace_init(&ln->ln_major_list, &tmp->ln_major_list);
-+ }
-+}
-+
-+static void
-+htree_key_list_replace_init(struct htree_lock_node *old,
-+ struct htree_lock_node *new)
-+{
-+ if (!list_empty(&old->ln_major_list))
-+ list_replace_init(&old->ln_major_list, &new->ln_major_list);
-+
-+ if (!list_empty(&old->ln_minor_list))
-+ list_replace_init(&old->ln_minor_list, &new->ln_minor_list);
-+}
-+
-+static void
-+htree_key_event_enqueue(struct htree_lock_child *child,
-+ struct htree_lock_node *ln, int dep, void *event)
-+{
-+ struct htree_lock_node *tmp;
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ BUG_ON(ln->ln_mode == HTREE_LOCK_NL);
-+ if (event == NULL || htree_key_event_ignore(child, ln))
-+ return;
-+
-+ /* shouldn't be a very long list */
-+ list_for_each_entry(tmp, &ln->ln_alive_list, ln_alive_list) {
-+ if (tmp->ln_mode == HTREE_LOCK_NL) {
-+ ln_event_inc(dep);
-+ if (child->lc_callback != NULL)
-+ child->lc_callback(tmp->ln_ev_target, event);
-+ }
-+ }
-+}
-+
-+static int
-+htree_node_lock_enqueue(struct htree_lock *newlk, struct htree_lock *curlk,
-+ unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_child *child = &newlk->lk_head->lh_children[dep];
-+ struct htree_lock_node *newln = &newlk->lk_nodes[dep];
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ /* NB: we only expect PR/PW lock mode at here, only these two modes are
-+ * allowed for htree_node_lock(asserted in htree_node_lock_internal),
-+ * NL is only used for listener, user can't directly require NL mode */
-+ if ((curln->ln_mode == HTREE_LOCK_NL) ||
-+ (curln->ln_mode != HTREE_LOCK_PW &&
-+ newln->ln_mode != HTREE_LOCK_PW)) {
-+ /* no conflict, attach it on granted list of @curlk */
-+ if (curln->ln_mode != HTREE_LOCK_NL) {
-+ list_add(&newln->ln_granted_list,
-+ &curln->ln_granted_list);
-+ } else {
-+ /* replace key owner */
-+ htree_key_list_replace_init(curln, newln);
-+ }
-+
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ ln_grant_inc(dep, newln->ln_mode);
-+ return 1; /* still hold lh_lock */
-+ }
-+
-+ if (!wait) { /* can't grant and don't want to wait */
-+ ln_retry_inc(dep, newln->ln_mode);
-+ newln->ln_mode = HTREE_LOCK_INVAL;
-+ return -1; /* don't wait and just return -1 */
-+ }
-+
-+ newlk->lk_task = current;
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ /* conflict, attach it on blocked list of curlk */
-+ list_add_tail(&newln->ln_blocked_list, &curln->ln_blocked_list);
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ ln_block_inc(dep, newln->ln_mode);
-+
-+ htree_spin_unlock(newlk->lk_head, dep);
-+ /* wait to be given the lock */
-+ if (newlk->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt, wake up will set me RUNNING */
-+ if (event == NULL || htree_key_event_ignore(child, newln))
-+ return 0; /* granted without lh_lock */
-+
-+ htree_spin_lock(newlk->lk_head, dep);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ return 1; /* still hold lh_lock */
-+}
-+
-+/*
-+ * get PR/PW access to particular tree-node according to @dep and @key,
-+ * it will return -1 if @wait is false and can't immediately grant this lock.
-+ * All listeners(HTREE_LOCK_NL) on @dep and with the same @key will get
-+ * @event if it's not NULL.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_node_lock_internal(struct htree_lock_head *lhead, struct htree_lock *lck,
-+ htree_lock_mode_t mode, u32 key, unsigned dep,
-+ int wait, void *event)
-+{
-+ LIST_HEAD(list);
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ u16 major;
-+ u16 minor;
-+ u8 reverse;
-+ u8 ma_bits;
-+ u8 mi_bits;
-+
-+ BUG_ON(mode != HTREE_LOCK_PW && mode != HTREE_LOCK_PR);
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+
-+ key = hash_long(key, lhead->lh_hbits);
-+
-+ mi_bits = lhead->lh_hbits >> 1;
-+ ma_bits = lhead->lh_hbits - mi_bits;
-+
-+ lck->lk_nodes[dep].ln_major_key = major = key & ((1U << ma_bits) - 1);
-+ lck->lk_nodes[dep].ln_minor_key = minor = key >> ma_bits;
-+ lck->lk_nodes[dep].ln_mode = mode;
-+
-+ /*
-+ * The major key list is an ordered list, so searches are started
-+ * at the end of the list that is numerically closer to major_key,
-+ * so at most half of the list will be walked (for well-distributed
-+ * keys). The list traversal aborts early if the expected key
-+ * location is passed.
-+ */
-+ reverse = (major >= (1 << (ma_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp,
-+ &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key < major) {
-+ /* attach _after_ @tmp */
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+
-+ } else {
-+ list_for_each_entry(tmp, &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key > major) {
-+ /* insert _before_ @tmp */
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+ }
-+
-+ search_minor:
-+ /*
-+ * NB: minor_key list doesn't have a "head", @list is just a
-+ * temporary stub for helping list searching, make sure it's removed
-+ * after searching.
-+ * minor_key list is an ordered list too.
-+ */
-+ list_add_tail(&list, &tmp->lk_nodes[dep].ln_minor_list);
-+
-+ reverse = (minor >= (1 << (mi_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key < minor) {
-+ /* attach _after_ @tmp2 */
-+ list_add(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_minor_list, &list);
-+
-+ } else {
-+ list_for_each_entry(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key > minor) {
-+ /* insert _before_ @tmp2 */
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list, &list);
-+ }
-+
-+ out_grant_minor:
-+ if (list.next == &lck->lk_nodes[dep].ln_minor_list) {
-+ /* new lock @lck is the first one on minor_key list, which
-+ * means it has the smallest minor_key and it should
-+ * replace @tmp as minor_key owner */
-+ list_replace_init(&tmp->lk_nodes[dep].ln_major_list,
-+ &lck->lk_nodes[dep].ln_major_list);
-+ }
-+ /* remove the temporary head */
-+ list_del(&list);
-+
-+ out_grant_major:
-+ ln_grant_inc(dep, lck->lk_nodes[dep].ln_mode);
-+ return 1; /* granted with holding lh_lock */
-+
-+ out_enqueue:
-+ list_del(&list); /* remove temprary head */
-+ return htree_node_lock_enqueue(lck, tmp2, dep, wait, event);
-+}
-+
-+/*
-+ * release the key of @lck at level @dep, and grant any blocked locks.
-+ * caller will still listen on @key if @event is not NULL, which means
-+ * caller can see a event (by event_cb) while granting any lock with
-+ * the same key at level @dep.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ * NB: listener will not block anyone because listening mode is HTREE_LOCK_NL
-+ */
-+static void
-+htree_node_unlock_internal(struct htree_lock_head *lhead,
-+ struct htree_lock *curlk, unsigned dep, void *event)
-+{
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+ struct htree_lock *grtlk = NULL;
-+ struct htree_lock_node *grtln;
-+ struct htree_lock *poslk;
-+ struct htree_lock *tmplk;
-+
-+ if (!htree_node_is_granted(curlk, dep))
-+ return;
-+
-+ if (!list_empty(&curln->ln_granted_list)) {
-+ /* there is another granted lock */
-+ grtlk = list_entry(curln->ln_granted_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_granted_list);
-+ list_del_init(&curln->ln_granted_list);
-+ }
-+
-+ if (grtlk == NULL && !list_empty(&curln->ln_blocked_list)) {
-+ /*
-+ * @curlk is the only granted lock, so we confirmed:
-+ * a) curln is key owner (attached on major/minor_list),
-+ * so if there is any blocked lock, it should be attached
-+ * on curln->ln_blocked_list
-+ * b) we always can grant the first blocked lock
-+ */
-+ grtlk = list_entry(curln->ln_blocked_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_blocked_list);
-+ BUG_ON(grtlk->lk_task == NULL);
-+ wake_up_process(grtlk->lk_task);
-+ }
-+
-+ if (event != NULL &&
-+ lhead->lh_children[dep].lc_events != HTREE_EVENT_DISABLE) {
-+ curln->ln_ev_target = event;
-+ curln->ln_mode = HTREE_LOCK_NL; /* listen! */
-+ } else {
-+ curln->ln_mode = HTREE_LOCK_INVAL;
-+ }
-+
-+ if (grtlk == NULL) { /* I must be the only one locking this key */
-+ struct htree_lock_node *tmpln;
-+
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (curln->ln_mode == HTREE_LOCK_NL) /* listening */
-+ return;
-+
-+ /* not listening */
-+ if (list_empty(&curln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(curln);
-+ return;
-+ }
-+
-+ tmpln = list_entry(curln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmpln->ln_mode != HTREE_LOCK_NL);
-+
-+ htree_key_list_replace_init(curln, tmpln);
-+ list_del_init(&curln->ln_alive_list);
-+
-+ return;
-+ }
-+
-+ /* have a granted lock */
-+ grtln = &grtlk->lk_nodes[dep];
-+ if (!list_empty(&curln->ln_blocked_list)) {
-+ /* only key owner can be on both lists */
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (list_empty(&grtln->ln_blocked_list)) {
-+ list_add(&grtln->ln_blocked_list,
-+ &curln->ln_blocked_list);
-+ }
-+ list_del_init(&curln->ln_blocked_list);
-+ }
-+ /*
-+ * NB: this is the tricky part:
-+ * We have only two modes for child-lock (PR and PW), also,
-+ * only owner of the key (attached on major/minor_list) can be on
-+ * both blocked_list and granted_list, so @grtlk must be one
-+ * of these two cases:
-+ *
-+ * a) @grtlk is taken from granted_list, which means we've granted
-+ * more than one lock so @grtlk has to be PR, the first blocked
-+ * lock must be PW and we can't grant it at all.
-+ * So even @grtlk is not owner of the key (empty blocked_list),
-+ * we don't care because we can't grant any lock.
-+ * b) we just grant a new lock which is taken from head of blocked
-+ * list, and it should be the first granted lock, and it should
-+ * be the first one linked on blocked_list.
-+ *
-+ * Either way, we can get correct result by iterating blocked_list
-+ * of @grtlk, and don't have to bother on how to find out
-+ * owner of current key.
-+ */
-+ list_for_each_entry_safe(poslk, tmplk, &grtln->ln_blocked_list,
-+ lk_nodes[dep].ln_blocked_list) {
-+ if (grtlk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW ||
-+ poslk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW)
-+ break;
-+ /* grant all readers */
-+ list_del_init(&poslk->lk_nodes[dep].ln_blocked_list);
-+ list_add(&poslk->lk_nodes[dep].ln_granted_list,
-+ &grtln->ln_granted_list);
-+
-+ BUG_ON(poslk->lk_task == NULL);
-+ wake_up_process(poslk->lk_task);
-+ }
-+
-+ /* if @curln is the owner of this key, replace it with @grtln */
-+ if (!htree_key_list_empty(curln))
-+ htree_key_list_replace_init(curln, grtln);
-+
-+ if (curln->ln_mode == HTREE_LOCK_INVAL)
-+ list_del_init(&curln->ln_alive_list);
-+}
-+
-+/*
-+ * it's just wrapper of htree_node_lock_internal, it returns 1 on granted
-+ * and 0 only if @wait is false and can't grant it immediately
-+ */
-+int
-+htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ rc = htree_node_lock_internal(lhead, lck, mode, key, dep, wait, event);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, dep);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_node_lock_try);
-+
-+/* it's wrapper of htree_node_unlock_internal */
-+void
-+htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ htree_node_unlock_internal(lhead, lck, dep, event);
-+ htree_spin_unlock(lhead, dep);
-+}
-+EXPORT_SYMBOL(htree_node_unlock);
-+
-+/* stop listening on child-lock level @dep */
-+void
-+htree_node_stop_listen(struct htree_lock *lck, unsigned dep)
-+{
-+ struct htree_lock_node *ln = &lck->lk_nodes[dep];
-+ struct htree_lock_node *tmp;
-+
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+ BUG_ON(!list_empty(&ln->ln_blocked_list));
-+ BUG_ON(!list_empty(&ln->ln_granted_list));
-+
-+ if (!htree_node_is_listening(lck, dep))
-+ return;
-+
-+ htree_spin_lock(lck->lk_head, dep);
-+ ln->ln_mode = HTREE_LOCK_INVAL;
-+ ln->ln_ev_target = NULL;
-+
-+ if (htree_key_list_empty(ln)) { /* not owner */
-+ list_del_init(&ln->ln_alive_list);
-+ goto out;
-+ }
-+
-+ /* I'm the owner... */
-+ if (list_empty(&ln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(ln);
-+ goto out;
-+ }
-+
-+ tmp = list_entry(ln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmp->ln_mode != HTREE_LOCK_NL);
-+ htree_key_list_replace_init(ln, tmp);
-+ list_del_init(&ln->ln_alive_list);
-+ out:
-+ htree_spin_unlock(lck->lk_head, dep);
-+}
-+EXPORT_SYMBOL(htree_node_stop_listen);
-+
-+/* release all child-locks if we have any */
-+static void
-+htree_node_release_all(struct htree_lock *lck)
-+{
-+ int i;
-+
-+ for (i = 0; i < lck->lk_depth; i++) {
-+ if (htree_node_is_granted(lck, i))
-+ htree_node_unlock(lck, i, NULL);
-+ else if (htree_node_is_listening(lck, i))
-+ htree_node_stop_listen(lck, i);
-+ }
-+}
-+
-+/*
-+ * obtain htree lock, it could be blocked inside if there's conflict
-+ * with any granted or blocked lock and @wait is true.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_lock_internal(struct htree_lock *lck, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int granted = 0;
-+ int blocked = 0;
-+ int i;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ if (lhead->lh_nblocked[i] != 0)
-+ blocked |= 1 << i;
-+ }
-+ if ((htree_lock_compat[lck->lk_mode] & granted) != granted ||
-+ (htree_lock_compat[lck->lk_mode] & blocked) != blocked) {
-+ /* will block current lock even it just conflicts with any
-+ * other blocked lock, so lock like EX wouldn't starve */
-+ if (!wait)
-+ return -1;
-+ lhead->lh_nblocked[lck->lk_mode]++;
-+ lk_block_inc(lck->lk_mode);
-+
-+ lck->lk_task = current;
-+ list_add_tail(&lck->lk_blocked_list, &lhead->lh_blocked_list);
-+
-+retry:
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ /* wait to be given the lock */
-+ if (lck->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt. wake up will set me RUNNING.
-+ * Since thread would be waken up accidentally,
-+ * so we need check lock whether granted or not again. */
-+ if (!list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ if (list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return 0;
-+ }
-+ goto retry;
-+ }
-+ return 0; /* without lh_lock */
-+ }
-+ lhead->lh_ngranted[lck->lk_mode]++;
-+ lk_grant_inc(lck->lk_mode);
-+ return 1;
-+}
-+
-+/* release htree lock. NB: ALWAYS called holding lhead::lh_lock */
-+static void
-+htree_unlock_internal(struct htree_lock *lck)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ int granted = 0;
-+ int i;
-+
-+ BUG_ON(lhead->lh_ngranted[lck->lk_mode] == 0);
-+
-+ lhead->lh_ngranted[lck->lk_mode]--;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ }
-+ list_for_each_entry_safe(tmp, tmp2,
-+ &lhead->lh_blocked_list, lk_blocked_list) {
-+ /* conflict with any granted lock? */
-+ if ((htree_lock_compat[tmp->lk_mode] & granted) != granted)
-+ break;
-+
-+ list_del_init(&tmp->lk_blocked_list);
-+
-+ BUG_ON(lhead->lh_nblocked[tmp->lk_mode] == 0);
-+
-+ lhead->lh_nblocked[tmp->lk_mode]--;
-+ lhead->lh_ngranted[tmp->lk_mode]++;
-+ granted |= 1 << tmp->lk_mode;
-+
-+ BUG_ON(tmp->lk_task == NULL);
-+ wake_up_process(tmp->lk_task);
-+ }
-+}
-+
-+/* it's wrapper of htree_lock_internal and exported interface.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait)
-+{
-+ int rc;
-+
-+ BUG_ON(lck->lk_depth > lhead->lh_depth);
-+ BUG_ON(lck->lk_head != NULL);
-+ BUG_ON(lck->lk_task != NULL);
-+
-+ lck->lk_head = lhead;
-+ lck->lk_mode = mode;
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_lock_try);
-+
-+/* it's wrapper of htree_unlock_internal and exported interface.
-+ * It will release all htree_node_locks and htree_lock */
-+void
-+htree_unlock(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_head == NULL);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lck->lk_head, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ htree_spin_unlock(lck->lk_head, HTREE_DEP_ROOT);
-+ lck->lk_head = NULL;
-+ lck->lk_task = NULL;
-+}
-+EXPORT_SYMBOL(htree_unlock);
-+
-+/* change lock mode */
-+void
-+htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode)
-+{
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+ lck->lk_mode = mode;
-+}
-+EXPORT_SYMBOL(htree_change_mode);
-+
-+/* release htree lock, and lock it again with new mode.
-+ * This function will first release all htree_node_locks and htree_lock,
-+ * then try to gain htree_lock with new @mode.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_change_lock_try(struct htree_lock *lck, htree_lock_mode_t mode, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(lhead == NULL);
-+ BUG_ON(lck->lk_mode == mode);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL || mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ lck->lk_mode = mode;
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_change_lock_try);
-+
-+/* create a htree_lock head with @depth levels (number of child-locks),
-+ * it is a per resoruce structure */
-+struct htree_lock_head *
-+htree_lock_head_alloc(unsigned depth, unsigned hbits, unsigned priv)
-+{
-+ struct htree_lock_head *lhead;
-+ int i;
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+
-+ lhead = kzalloc(offsetof(struct htree_lock_head,
-+ lh_children[depth]) + priv, GFP_NOFS);
-+ if (lhead == NULL)
-+ return NULL;
-+
-+ if (hbits < HTREE_HBITS_MIN)
-+ lhead->lh_hbits = HTREE_HBITS_MIN;
-+ else if (hbits > HTREE_HBITS_MAX)
-+ lhead->lh_hbits = HTREE_HBITS_MAX;
-+
-+ lhead->lh_lock = 0;
-+ lhead->lh_depth = depth;
-+ INIT_LIST_HEAD(&lhead->lh_blocked_list);
-+ if (priv > 0) {
-+ lhead->lh_private = (void *)lhead +
-+ offsetof(struct htree_lock_head, lh_children[depth]);
-+ }
-+
-+ for (i = 0; i < depth; i++) {
-+ INIT_LIST_HEAD(&lhead->lh_children[i].lc_list);
-+ lhead->lh_children[i].lc_events = HTREE_EVENT_DISABLE;
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(htree_lock_head_alloc);
-+
-+/* free the htree_lock head */
-+void
-+htree_lock_head_free(struct htree_lock_head *lhead)
-+{
-+ int i;
-+
-+ BUG_ON(!list_empty(&lhead->lh_blocked_list));
-+ for (i = 0; i < lhead->lh_depth; i++)
-+ BUG_ON(!list_empty(&lhead->lh_children[i].lc_list));
-+ kfree(lhead);
-+}
-+EXPORT_SYMBOL(htree_lock_head_free);
-+
-+/* register event callback for @events of child-lock at level @dep */
-+void
-+htree_lock_event_attach(struct htree_lock_head *lhead, unsigned dep,
-+ unsigned events, htree_event_cb_t callback)
-+{
-+ BUG_ON(lhead->lh_depth <= dep);
-+ lhead->lh_children[dep].lc_events = events;
-+ lhead->lh_children[dep].lc_callback = callback;
-+}
-+EXPORT_SYMBOL(htree_lock_event_attach);
-+
-+/* allocate a htree_lock, which is per-thread structure, @pbytes is some
-+ * extra-bytes as private data for caller */
-+struct htree_lock *
-+htree_lock_alloc(unsigned depth, unsigned pbytes)
-+{
-+ struct htree_lock *lck;
-+ int i = offsetof(struct htree_lock, lk_nodes[depth]);
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+ lck = kzalloc(i + pbytes, GFP_NOFS);
-+ if (lck == NULL)
-+ return NULL;
-+
-+ if (pbytes != 0)
-+ lck->lk_private = (void *)lck + i;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+ lck->lk_depth = depth;
-+ INIT_LIST_HEAD(&lck->lk_blocked_list);
-+
-+ for (i = 0; i < depth; i++) {
-+ struct htree_lock_node *node = &lck->lk_nodes[i];
-+
-+ node->ln_mode = HTREE_LOCK_INVAL;
-+ INIT_LIST_HEAD(&node->ln_major_list);
-+ INIT_LIST_HEAD(&node->ln_minor_list);
-+ INIT_LIST_HEAD(&node->ln_alive_list);
-+ INIT_LIST_HEAD(&node->ln_blocked_list);
-+ INIT_LIST_HEAD(&node->ln_granted_list);
-+ }
-+
-+ return lck;
-+}
-+EXPORT_SYMBOL(htree_lock_alloc);
-+
-+/* free htree_lock node */
-+void
-+htree_lock_free(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_mode != HTREE_LOCK_INVAL);
-+ kfree(lck);
-+}
-+EXPORT_SYMBOL(htree_lock_free);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 91525f7..9c57749 100644
--- a/fs/ext4/namei.c
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
-diff --git a/include/linux/htree_lock.h b/include/linux/htree_lock.h
-new file mode 100644
-index 0000000..9dc7788
---- /dev/null
-+++ b/include/linux/htree_lock.h
-@@ -0,0 +1,187 @@
-+/*
-+ * include/linux/htree_lock.h
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+
-+/*
-+ * htree lock
-+ *
-+ * htree_lock is an advanced lock, it can support five lock modes (concept is
-+ * taken from DLM) and it's a sleeping lock.
-+ *
-+ * most common use case is:
-+ * - create a htree_lock_head for data
-+ * - each thread (contender) creates it's own htree_lock
-+ * - contender needs to call htree_lock(lock_node, mode) to protect data and
-+ * call htree_unlock to release lock
-+ *
-+ * Also, there is advanced use-case which is more complex, user can have
-+ * PW/PR lock on particular key, it's mostly used while user holding shared
-+ * lock on the htree (CW, CR)
-+ *
-+ * htree_lock(lock_node, HTREE_LOCK_CR); lock the htree with CR
-+ * htree_node_lock(lock_node, HTREE_LOCK_PR, key...); lock @key with PR
-+ * ...
-+ * htree_node_unlock(lock_node);; unlock the key
-+ *
-+ * Another tip is, we can have N-levels of this kind of keys, all we need to
-+ * do is specifying N-levels while creating htree_lock_head, then we can
-+ * lock/unlock a specific level by:
-+ * htree_node_lock(lock_node, mode1, key1, level1...);
-+ * do something;
-+ * htree_node_lock(lock_node, mode1, key2, level2...);
-+ * do something;
-+ * htree_node_unlock(lock_node, level2);
-+ * htree_node_unlock(lock_node, level1);
-+ *
-+ * NB: for multi-level, should be careful about locking order to avoid deadlock
-+ */
-+
-+#ifndef _LINUX_HTREE_LOCK_H
-+#define _LINUX_HTREE_LOCK_H
-+
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * Lock Modes
-+ * more details can be found here:
-+ * http://en.wikipedia.org/wiki/Distributed_lock_manager
-+ */
-+typedef enum {
-+ HTREE_LOCK_EX = 0, /* exclusive lock: incompatible with all others */
-+ HTREE_LOCK_PW, /* protected write: allows only CR users */
-+ HTREE_LOCK_PR, /* protected read: allow PR, CR users */
-+ HTREE_LOCK_CW, /* concurrent write: allow CR, CW users */
-+ HTREE_LOCK_CR, /* concurrent read: allow all but EX users */
-+ HTREE_LOCK_MAX, /* number of lock modes */
-+} htree_lock_mode_t;
-+
-+#define HTREE_LOCK_NL HTREE_LOCK_MAX
-+#define HTREE_LOCK_INVAL 0xdead10c
-+
-+enum {
-+ HTREE_HBITS_MIN = 2,
-+ HTREE_HBITS_DEF = 14,
-+ HTREE_HBITS_MAX = 32,
-+};
-+
-+enum {
-+ HTREE_EVENT_DISABLE = (0),
-+ HTREE_EVENT_RD = (1 << HTREE_LOCK_PR),
-+ HTREE_EVENT_WR = (1 << HTREE_LOCK_PW),
-+ HTREE_EVENT_RDWR = (HTREE_EVENT_RD | HTREE_EVENT_WR),
-+};
-+
-+struct htree_lock;
-+
-+typedef void (*htree_event_cb_t)(void *target, void *event);
-+
-+struct htree_lock_child {
-+ struct list_head lc_list; /* granted list */
-+ htree_event_cb_t lc_callback; /* event callback */
-+ unsigned lc_events; /* event types */
-+};
-+
-+struct htree_lock_head {
-+ unsigned long lh_lock; /* bits lock */
-+ /* blocked lock list (htree_lock) */
-+ struct list_head lh_blocked_list;
-+ /* # key levels */
-+ u16 lh_depth;
-+ /* hash bits for key and limit number of locks */
-+ u16 lh_hbits;
-+ /* counters for blocked locks */
-+ u16 lh_nblocked[HTREE_LOCK_MAX];
-+ /* counters for granted locks */
-+ u16 lh_ngranted[HTREE_LOCK_MAX];
-+ /* private data */
-+ void *lh_private;
-+ /* array of children locks */
-+ struct htree_lock_child lh_children[0];
-+};
-+
-+/* htree_lock_node_t is child-lock for a specific key (ln_value) */
-+struct htree_lock_node {
-+ htree_lock_mode_t ln_mode;
-+ /* major hash key */
-+ u16 ln_major_key;
-+ /* minor hash key */
-+ u16 ln_minor_key;
-+ struct list_head ln_major_list;
-+ struct list_head ln_minor_list;
-+ /* alive list, all locks (granted, blocked, listening) are on it */
-+ struct list_head ln_alive_list;
-+ /* blocked list */
-+ struct list_head ln_blocked_list;
-+ /* granted list */
-+ struct list_head ln_granted_list;
-+ void *ln_ev_target;
-+};
-+
-+struct htree_lock {
-+ struct task_struct *lk_task;
-+ struct htree_lock_head *lk_head;
-+ void *lk_private;
-+ unsigned lk_depth;
-+ htree_lock_mode_t lk_mode;
-+ struct list_head lk_blocked_list;
-+ struct htree_lock_node lk_nodes[0];
-+};
-+
-+/* create a lock head, which stands for a resource */
-+struct htree_lock_head *htree_lock_head_alloc(unsigned depth,
-+ unsigned hbits, unsigned priv);
-+/* free a lock head */
-+void htree_lock_head_free(struct htree_lock_head *lhead);
-+/* register event callback for child lock at level @depth */
-+void htree_lock_event_attach(struct htree_lock_head *lhead, unsigned depth,
-+ unsigned events, htree_event_cb_t callback);
-+/* create a lock handle, which stands for a thread */
-+struct htree_lock *htree_lock_alloc(unsigned depth, unsigned pbytes);
-+/* free a lock handle */
-+void htree_lock_free(struct htree_lock *lck);
-+/* lock htree, when @wait is true, 0 is returned if the lock can't
-+ * be granted immediately */
-+int htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait);
-+/* unlock htree */
-+void htree_unlock(struct htree_lock *lck);
-+/* unlock and relock htree with @new_mode */
-+int htree_change_lock_try(struct htree_lock *lck,
-+ htree_lock_mode_t new_mode, int wait);
-+void htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode);
-+/* require child lock (key) of htree at level @dep, @event will be sent to all
-+ * listeners on this @key while lock being granted */
-+int htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event);
-+/* release child lock at level @dep, this lock will listen on it's key
-+ * if @event isn't NULL, event_cb will be called against @lck while granting
-+ * any other lock at level @dep with the same key */
-+void htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event);
-+/* stop listening on child lock at level @dep */
-+void htree_node_stop_listen(struct htree_lock *lck, unsigned dep);
-+/* for debug */
-+void htree_lock_stat_print(int depth);
-+void htree_lock_stat_reset(void);
-+
-+#define htree_lock(lck, lh, mode) htree_lock_try(lck, lh, mode, 1)
-+#define htree_change_lock(lck, mode) htree_change_lock_try(lck, mode, 1)
-+
-+#define htree_lock_mode(lck) ((lck)->lk_mode)
-+
-+#define htree_node_lock(lck, mode, key, dep) \
-+ htree_node_lock_try(lck, mode, key, dep, 1, NULL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_granted(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_INVAL && \
-+ (lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_NL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_listening(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode == HTREE_LOCK_NL)
-+
-+#endif
---
-2.20.1
-
- pdirops support for ldiskfs
- integrate with osd-ldiskfs
-Index: linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
-===================================================================
---- /dev/null
-+++ linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
-@@ -0,0 +1,187 @@
-+/*
-+ * include/linux/htree_lock.h
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+
-+/*
-+ * htree lock
-+ *
-+ * htree_lock is an advanced lock, it can support five lock modes (concept is
-+ * taken from DLM) and it's a sleeping lock.
-+ *
-+ * most common use case is:
-+ * - create a htree_lock_head for data
-+ * - each thread (contender) creates it's own htree_lock
-+ * - contender needs to call htree_lock(lock_node, mode) to protect data and
-+ * call htree_unlock to release lock
-+ *
-+ * Also, there is advanced use-case which is more complex, user can have
-+ * PW/PR lock on particular key, it's mostly used while user holding shared
-+ * lock on the htree (CW, CR)
-+ *
-+ * htree_lock(lock_node, HTREE_LOCK_CR); lock the htree with CR
-+ * htree_node_lock(lock_node, HTREE_LOCK_PR, key...); lock @key with PR
-+ * ...
-+ * htree_node_unlock(lock_node);; unlock the key
-+ *
-+ * Another tip is, we can have N-levels of this kind of keys, all we need to
-+ * do is specifying N-levels while creating htree_lock_head, then we can
-+ * lock/unlock a specific level by:
-+ * htree_node_lock(lock_node, mode1, key1, level1...);
-+ * do something;
-+ * htree_node_lock(lock_node, mode1, key2, level2...);
-+ * do something;
-+ * htree_node_unlock(lock_node, level2);
-+ * htree_node_unlock(lock_node, level1);
-+ *
-+ * NB: for multi-level, should be careful about locking order to avoid deadlock
-+ */
-+
-+#ifndef _LINUX_HTREE_LOCK_H
-+#define _LINUX_HTREE_LOCK_H
-+
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * Lock Modes
-+ * more details can be found here:
-+ * http://en.wikipedia.org/wiki/Distributed_lock_manager
-+ */
-+typedef enum {
-+ HTREE_LOCK_EX = 0, /* exclusive lock: incompatible with all others */
-+ HTREE_LOCK_PW, /* protected write: allows only CR users */
-+ HTREE_LOCK_PR, /* protected read: allow PR, CR users */
-+ HTREE_LOCK_CW, /* concurrent write: allow CR, CW users */
-+ HTREE_LOCK_CR, /* concurrent read: allow all but EX users */
-+ HTREE_LOCK_MAX, /* number of lock modes */
-+} htree_lock_mode_t;
-+
-+#define HTREE_LOCK_NL HTREE_LOCK_MAX
-+#define HTREE_LOCK_INVAL 0xdead10c
-+
-+enum {
-+ HTREE_HBITS_MIN = 2,
-+ HTREE_HBITS_DEF = 14,
-+ HTREE_HBITS_MAX = 32,
-+};
-+
-+enum {
-+ HTREE_EVENT_DISABLE = (0),
-+ HTREE_EVENT_RD = (1 << HTREE_LOCK_PR),
-+ HTREE_EVENT_WR = (1 << HTREE_LOCK_PW),
-+ HTREE_EVENT_RDWR = (HTREE_EVENT_RD | HTREE_EVENT_WR),
-+};
-+
-+struct htree_lock;
-+
-+typedef void (*htree_event_cb_t)(void *target, void *event);
-+
-+struct htree_lock_child {
-+ struct list_head lc_list; /* granted list */
-+ htree_event_cb_t lc_callback; /* event callback */
-+ unsigned lc_events; /* event types */
-+};
-+
-+struct htree_lock_head {
-+ unsigned long lh_lock; /* bits lock */
-+ /* blocked lock list (htree_lock) */
-+ struct list_head lh_blocked_list;
-+ /* # key levels */
-+ u16 lh_depth;
-+ /* hash bits for key and limit number of locks */
-+ u16 lh_hbits;
-+ /* counters for blocked locks */
-+ u16 lh_nblocked[HTREE_LOCK_MAX];
-+ /* counters for granted locks */
-+ u16 lh_ngranted[HTREE_LOCK_MAX];
-+ /* private data */
-+ void *lh_private;
-+ /* array of children locks */
-+ struct htree_lock_child lh_children[0];
-+};
-+
-+/* htree_lock_node_t is child-lock for a specific key (ln_value) */
-+struct htree_lock_node {
-+ htree_lock_mode_t ln_mode;
-+ /* major hash key */
-+ u16 ln_major_key;
-+ /* minor hash key */
-+ u16 ln_minor_key;
-+ struct list_head ln_major_list;
-+ struct list_head ln_minor_list;
-+ /* alive list, all locks (granted, blocked, listening) are on it */
-+ struct list_head ln_alive_list;
-+ /* blocked list */
-+ struct list_head ln_blocked_list;
-+ /* granted list */
-+ struct list_head ln_granted_list;
-+ void *ln_ev_target;
-+};
-+
-+struct htree_lock {
-+ struct task_struct *lk_task;
-+ struct htree_lock_head *lk_head;
-+ void *lk_private;
-+ unsigned lk_depth;
-+ htree_lock_mode_t lk_mode;
-+ struct list_head lk_blocked_list;
-+ struct htree_lock_node lk_nodes[0];
-+};
-+
-+/* create a lock head, which stands for a resource */
-+struct htree_lock_head *htree_lock_head_alloc(unsigned depth,
-+ unsigned hbits, unsigned priv);
-+/* free a lock head */
-+void htree_lock_head_free(struct htree_lock_head *lhead);
-+/* register event callback for child lock at level @depth */
-+void htree_lock_event_attach(struct htree_lock_head *lhead, unsigned depth,
-+ unsigned events, htree_event_cb_t callback);
-+/* create a lock handle, which stands for a thread */
-+struct htree_lock *htree_lock_alloc(unsigned depth, unsigned pbytes);
-+/* free a lock handle */
-+void htree_lock_free(struct htree_lock *lck);
-+/* lock htree, when @wait is true, 0 is returned if the lock can't
-+ * be granted immediately */
-+int htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait);
-+/* unlock htree */
-+void htree_unlock(struct htree_lock *lck);
-+/* unlock and relock htree with @new_mode */
-+int htree_change_lock_try(struct htree_lock *lck,
-+ htree_lock_mode_t new_mode, int wait);
-+void htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode);
-+/* require child lock (key) of htree at level @dep, @event will be sent to all
-+ * listeners on this @key while lock being granted */
-+int htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event);
-+/* release child lock at level @dep, this lock will listen on it's key
-+ * if @event isn't NULL, event_cb will be called against @lck while granting
-+ * any other lock at level @dep with the same key */
-+void htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event);
-+/* stop listening on child lock at level @dep */
-+void htree_node_stop_listen(struct htree_lock *lck, unsigned dep);
-+/* for debug */
-+void htree_lock_stat_print(int depth);
-+void htree_lock_stat_reset(void);
-+
-+#define htree_lock(lck, lh, mode) htree_lock_try(lck, lh, mode, 1)
-+#define htree_change_lock(lck, mode) htree_change_lock_try(lck, mode, 1)
-+
-+#define htree_lock_mode(lck) ((lck)->lk_mode)
-+
-+#define htree_node_lock(lck, mode, key, dep) \
-+ htree_node_lock_try(lck, mode, key, dep, 1, NULL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_granted(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_INVAL && \
-+ (lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_NL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_listening(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode == HTREE_LOCK_NL)
-+
-+#endif
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
-===================================================================
---- /dev/null
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
-@@ -0,0 +1,891 @@
-+/*
-+ * fs/ext4/htree_lock.c
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+#include <linux/jbd2.h>
-+#include <linux/hash.h>
-+#include <linux/module.h>
-+#include <linux/htree_lock.h>
-+
-+enum {
-+ HTREE_LOCK_BIT_EX = (1 << HTREE_LOCK_EX),
-+ HTREE_LOCK_BIT_PW = (1 << HTREE_LOCK_PW),
-+ HTREE_LOCK_BIT_PR = (1 << HTREE_LOCK_PR),
-+ HTREE_LOCK_BIT_CW = (1 << HTREE_LOCK_CW),
-+ HTREE_LOCK_BIT_CR = (1 << HTREE_LOCK_CR),
-+};
-+
-+enum {
-+ HTREE_LOCK_COMPAT_EX = 0,
-+ HTREE_LOCK_COMPAT_PW = HTREE_LOCK_COMPAT_EX | HTREE_LOCK_BIT_CR,
-+ HTREE_LOCK_COMPAT_PR = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_PR,
-+ HTREE_LOCK_COMPAT_CW = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_CW,
-+ HTREE_LOCK_COMPAT_CR = HTREE_LOCK_COMPAT_CW | HTREE_LOCK_BIT_PR |
-+ HTREE_LOCK_BIT_PW,
-+};
-+
-+static int htree_lock_compat[] = {
-+ [HTREE_LOCK_EX] HTREE_LOCK_COMPAT_EX,
-+ [HTREE_LOCK_PW] HTREE_LOCK_COMPAT_PW,
-+ [HTREE_LOCK_PR] HTREE_LOCK_COMPAT_PR,
-+ [HTREE_LOCK_CW] HTREE_LOCK_COMPAT_CW,
-+ [HTREE_LOCK_CR] HTREE_LOCK_COMPAT_CR,
-+};
-+
-+/* max allowed htree-lock depth.
-+ * We only need depth=3 for ext4 although user can have higher value. */
-+#define HTREE_LOCK_DEP_MAX 16
-+
-+#ifdef HTREE_LOCK_DEBUG
-+
-+static char *hl_name[] = {
-+ [HTREE_LOCK_EX] "EX",
-+ [HTREE_LOCK_PW] "PW",
-+ [HTREE_LOCK_PR] "PR",
-+ [HTREE_LOCK_CW] "CW",
-+ [HTREE_LOCK_CR] "CR",
-+};
-+
-+/* lock stats */
-+struct htree_lock_node_stats {
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long retried[HTREE_LOCK_MAX];
-+ unsigned long long events;
-+};
-+
-+struct htree_lock_stats {
-+ struct htree_lock_node_stats nodes[HTREE_LOCK_DEP_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+};
-+
-+static struct htree_lock_stats hl_stats;
-+
-+void htree_lock_stat_reset(void)
-+{
-+ memset(&hl_stats, 0, sizeof(hl_stats));
-+}
-+
-+void htree_lock_stat_print(int depth)
-+{
-+ int i;
-+ int j;
-+
-+ printk(KERN_DEBUG "HTREE LOCK STATS:\n");
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ printk(KERN_DEBUG "[%s]: G [%10llu], B [%10llu]\n",
-+ hl_name[i], hl_stats.granted[i], hl_stats.blocked[i]);
-+ }
-+ for (i = 0; i < depth; i++) {
-+ printk(KERN_DEBUG "HTREE CHILD [%d] STATS:\n", i);
-+ for (j = 0; j < HTREE_LOCK_MAX; j++) {
-+ printk(KERN_DEBUG
-+ "[%s]: G [%10llu], B [%10llu], R [%10llu]\n",
-+ hl_name[j], hl_stats.nodes[i].granted[j],
-+ hl_stats.nodes[i].blocked[j],
-+ hl_stats.nodes[i].retried[j]);
-+ }
-+ }
-+}
-+
-+#define lk_grant_inc(m) do { hl_stats.granted[m]++; } while (0)
-+#define lk_block_inc(m) do { hl_stats.blocked[m]++; } while (0)
-+#define ln_grant_inc(d, m) do { hl_stats.nodes[d].granted[m]++; } while (0)
-+#define ln_block_inc(d, m) do { hl_stats.nodes[d].blocked[m]++; } while (0)
-+#define ln_retry_inc(d, m) do { hl_stats.nodes[d].retried[m]++; } while (0)
-+#define ln_event_inc(d) do { hl_stats.nodes[d].events++; } while (0)
-+
-+#else /* !DEBUG */
-+
-+void htree_lock_stat_reset(void) {}
-+void htree_lock_stat_print(int depth) {}
-+
-+#define lk_grant_inc(m) do {} while (0)
-+#define lk_block_inc(m) do {} while (0)
-+#define ln_grant_inc(d, m) do {} while (0)
-+#define ln_block_inc(d, m) do {} while (0)
-+#define ln_retry_inc(d, m) do {} while (0)
-+#define ln_event_inc(d) do {} while (0)
-+
-+#endif /* DEBUG */
-+
-+EXPORT_SYMBOL(htree_lock_stat_reset);
-+EXPORT_SYMBOL(htree_lock_stat_print);
-+
-+#define HTREE_DEP_ROOT (-1)
-+
-+#define htree_spin_lock(lhead, dep) \
-+ bit_spin_lock((dep) + 1, &(lhead)->lh_lock)
-+#define htree_spin_unlock(lhead, dep) \
-+ bit_spin_unlock((dep) + 1, &(lhead)->lh_lock)
-+
-+#define htree_key_event_ignore(child, ln) \
-+ (!((child)->lc_events & (1 << (ln)->ln_mode)))
-+
-+static int
-+htree_key_list_empty(struct htree_lock_node *ln)
-+{
-+ return list_empty(&ln->ln_major_list) && list_empty(&ln->ln_minor_list);
-+}
-+
-+static void
-+htree_key_list_del_init(struct htree_lock_node *ln)
-+{
-+ struct htree_lock_node *tmp = NULL;
-+
-+ if (!list_empty(&ln->ln_minor_list)) {
-+ tmp = list_entry(ln->ln_minor_list.next,
-+ struct htree_lock_node, ln_minor_list);
-+ list_del_init(&ln->ln_minor_list);
-+ }
-+
-+ if (list_empty(&ln->ln_major_list))
-+ return;
-+
-+ if (tmp == NULL) { /* not on minor key list */
-+ list_del_init(&ln->ln_major_list);
-+ } else {
-+ BUG_ON(!list_empty(&tmp->ln_major_list));
-+ list_replace_init(&ln->ln_major_list, &tmp->ln_major_list);
-+ }
-+}
-+
-+static void
-+htree_key_list_replace_init(struct htree_lock_node *old,
-+ struct htree_lock_node *new)
-+{
-+ if (!list_empty(&old->ln_major_list))
-+ list_replace_init(&old->ln_major_list, &new->ln_major_list);
-+
-+ if (!list_empty(&old->ln_minor_list))
-+ list_replace_init(&old->ln_minor_list, &new->ln_minor_list);
-+}
-+
-+static void
-+htree_key_event_enqueue(struct htree_lock_child *child,
-+ struct htree_lock_node *ln, int dep, void *event)
-+{
-+ struct htree_lock_node *tmp;
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ BUG_ON(ln->ln_mode == HTREE_LOCK_NL);
-+ if (event == NULL || htree_key_event_ignore(child, ln))
-+ return;
-+
-+ /* shouldn't be a very long list */
-+ list_for_each_entry(tmp, &ln->ln_alive_list, ln_alive_list) {
-+ if (tmp->ln_mode == HTREE_LOCK_NL) {
-+ ln_event_inc(dep);
-+ if (child->lc_callback != NULL)
-+ child->lc_callback(tmp->ln_ev_target, event);
-+ }
-+ }
-+}
-+
-+static int
-+htree_node_lock_enqueue(struct htree_lock *newlk, struct htree_lock *curlk,
-+ unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_child *child = &newlk->lk_head->lh_children[dep];
-+ struct htree_lock_node *newln = &newlk->lk_nodes[dep];
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ /* NB: we only expect PR/PW lock mode at here, only these two modes are
-+ * allowed for htree_node_lock(asserted in htree_node_lock_internal),
-+ * NL is only used for listener, user can't directly require NL mode */
-+ if ((curln->ln_mode == HTREE_LOCK_NL) ||
-+ (curln->ln_mode != HTREE_LOCK_PW &&
-+ newln->ln_mode != HTREE_LOCK_PW)) {
-+ /* no conflict, attach it on granted list of @curlk */
-+ if (curln->ln_mode != HTREE_LOCK_NL) {
-+ list_add(&newln->ln_granted_list,
-+ &curln->ln_granted_list);
-+ } else {
-+ /* replace key owner */
-+ htree_key_list_replace_init(curln, newln);
-+ }
-+
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ ln_grant_inc(dep, newln->ln_mode);
-+ return 1; /* still hold lh_lock */
-+ }
-+
-+ if (!wait) { /* can't grant and don't want to wait */
-+ ln_retry_inc(dep, newln->ln_mode);
-+ newln->ln_mode = HTREE_LOCK_INVAL;
-+ return -1; /* don't wait and just return -1 */
-+ }
-+
-+ newlk->lk_task = current;
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ /* conflict, attach it on blocked list of curlk */
-+ list_add_tail(&newln->ln_blocked_list, &curln->ln_blocked_list);
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ ln_block_inc(dep, newln->ln_mode);
-+
-+ htree_spin_unlock(newlk->lk_head, dep);
-+ /* wait to be given the lock */
-+ if (newlk->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt, wake up will set me RUNNING */
-+ if (event == NULL || htree_key_event_ignore(child, newln))
-+ return 0; /* granted without lh_lock */
-+
-+ htree_spin_lock(newlk->lk_head, dep);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ return 1; /* still hold lh_lock */
-+}
-+
-+/*
-+ * get PR/PW access to particular tree-node according to @dep and @key,
-+ * it will return -1 if @wait is false and can't immediately grant this lock.
-+ * All listeners(HTREE_LOCK_NL) on @dep and with the same @key will get
-+ * @event if it's not NULL.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_node_lock_internal(struct htree_lock_head *lhead, struct htree_lock *lck,
-+ htree_lock_mode_t mode, u32 key, unsigned dep,
-+ int wait, void *event)
-+{
-+ LIST_HEAD(list);
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ u16 major;
-+ u16 minor;
-+ u8 reverse;
-+ u8 ma_bits;
-+ u8 mi_bits;
-+
-+ BUG_ON(mode != HTREE_LOCK_PW && mode != HTREE_LOCK_PR);
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+
-+ key = hash_long(key, lhead->lh_hbits);
-+
-+ mi_bits = lhead->lh_hbits >> 1;
-+ ma_bits = lhead->lh_hbits - mi_bits;
-+
-+ lck->lk_nodes[dep].ln_major_key = major = key & ((1U << ma_bits) - 1);
-+ lck->lk_nodes[dep].ln_minor_key = minor = key >> ma_bits;
-+ lck->lk_nodes[dep].ln_mode = mode;
-+
-+ /*
-+ * The major key list is an ordered list, so searches are started
-+ * at the end of the list that is numerically closer to major_key,
-+ * so at most half of the list will be walked (for well-distributed
-+ * keys). The list traversal aborts early if the expected key
-+ * location is passed.
-+ */
-+ reverse = (major >= (1 << (ma_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp,
-+ &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key < major) {
-+ /* attach _after_ @tmp */
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+
-+ } else {
-+ list_for_each_entry(tmp, &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key > major) {
-+ /* insert _before_ @tmp */
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+ }
-+
-+ search_minor:
-+ /*
-+ * NB: minor_key list doesn't have a "head", @list is just a
-+ * temporary stub for helping list searching, make sure it's removed
-+ * after searching.
-+ * minor_key list is an ordered list too.
-+ */
-+ list_add_tail(&list, &tmp->lk_nodes[dep].ln_minor_list);
-+
-+ reverse = (minor >= (1 << (mi_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key < minor) {
-+ /* attach _after_ @tmp2 */
-+ list_add(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_minor_list, &list);
-+
-+ } else {
-+ list_for_each_entry(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key > minor) {
-+ /* insert _before_ @tmp2 */
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list, &list);
-+ }
-+
-+ out_grant_minor:
-+ if (list.next == &lck->lk_nodes[dep].ln_minor_list) {
-+ /* new lock @lck is the first one on minor_key list, which
-+ * means it has the smallest minor_key and it should
-+ * replace @tmp as minor_key owner */
-+ list_replace_init(&tmp->lk_nodes[dep].ln_major_list,
-+ &lck->lk_nodes[dep].ln_major_list);
-+ }
-+ /* remove the temporary head */
-+ list_del(&list);
-+
-+ out_grant_major:
-+ ln_grant_inc(dep, lck->lk_nodes[dep].ln_mode);
-+ return 1; /* granted with holding lh_lock */
-+
-+ out_enqueue:
-+ list_del(&list); /* remove temprary head */
-+ return htree_node_lock_enqueue(lck, tmp2, dep, wait, event);
-+}
-+
-+/*
-+ * release the key of @lck at level @dep, and grant any blocked locks.
-+ * caller will still listen on @key if @event is not NULL, which means
-+ * caller can see a event (by event_cb) while granting any lock with
-+ * the same key at level @dep.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ * NB: listener will not block anyone because listening mode is HTREE_LOCK_NL
-+ */
-+static void
-+htree_node_unlock_internal(struct htree_lock_head *lhead,
-+ struct htree_lock *curlk, unsigned dep, void *event)
-+{
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+ struct htree_lock *grtlk = NULL;
-+ struct htree_lock_node *grtln;
-+ struct htree_lock *poslk;
-+ struct htree_lock *tmplk;
-+
-+ if (!htree_node_is_granted(curlk, dep))
-+ return;
-+
-+ if (!list_empty(&curln->ln_granted_list)) {
-+ /* there is another granted lock */
-+ grtlk = list_entry(curln->ln_granted_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_granted_list);
-+ list_del_init(&curln->ln_granted_list);
-+ }
-+
-+ if (grtlk == NULL && !list_empty(&curln->ln_blocked_list)) {
-+ /*
-+ * @curlk is the only granted lock, so we confirmed:
-+ * a) curln is key owner (attached on major/minor_list),
-+ * so if there is any blocked lock, it should be attached
-+ * on curln->ln_blocked_list
-+ * b) we always can grant the first blocked lock
-+ */
-+ grtlk = list_entry(curln->ln_blocked_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_blocked_list);
-+ BUG_ON(grtlk->lk_task == NULL);
-+ wake_up_process(grtlk->lk_task);
-+ }
-+
-+ if (event != NULL &&
-+ lhead->lh_children[dep].lc_events != HTREE_EVENT_DISABLE) {
-+ curln->ln_ev_target = event;
-+ curln->ln_mode = HTREE_LOCK_NL; /* listen! */
-+ } else {
-+ curln->ln_mode = HTREE_LOCK_INVAL;
-+ }
-+
-+ if (grtlk == NULL) { /* I must be the only one locking this key */
-+ struct htree_lock_node *tmpln;
-+
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (curln->ln_mode == HTREE_LOCK_NL) /* listening */
-+ return;
-+
-+ /* not listening */
-+ if (list_empty(&curln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(curln);
-+ return;
-+ }
-+
-+ tmpln = list_entry(curln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmpln->ln_mode != HTREE_LOCK_NL);
-+
-+ htree_key_list_replace_init(curln, tmpln);
-+ list_del_init(&curln->ln_alive_list);
-+
-+ return;
-+ }
-+
-+ /* have a granted lock */
-+ grtln = &grtlk->lk_nodes[dep];
-+ if (!list_empty(&curln->ln_blocked_list)) {
-+ /* only key owner can be on both lists */
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (list_empty(&grtln->ln_blocked_list)) {
-+ list_add(&grtln->ln_blocked_list,
-+ &curln->ln_blocked_list);
-+ }
-+ list_del_init(&curln->ln_blocked_list);
-+ }
-+ /*
-+ * NB: this is the tricky part:
-+ * We have only two modes for child-lock (PR and PW), also,
-+ * only owner of the key (attached on major/minor_list) can be on
-+ * both blocked_list and granted_list, so @grtlk must be one
-+ * of these two cases:
-+ *
-+ * a) @grtlk is taken from granted_list, which means we've granted
-+ * more than one lock so @grtlk has to be PR, the first blocked
-+ * lock must be PW and we can't grant it at all.
-+ * So even @grtlk is not owner of the key (empty blocked_list),
-+ * we don't care because we can't grant any lock.
-+ * b) we just grant a new lock which is taken from head of blocked
-+ * list, and it should be the first granted lock, and it should
-+ * be the first one linked on blocked_list.
-+ *
-+ * Either way, we can get correct result by iterating blocked_list
-+ * of @grtlk, and don't have to bother on how to find out
-+ * owner of current key.
-+ */
-+ list_for_each_entry_safe(poslk, tmplk, &grtln->ln_blocked_list,
-+ lk_nodes[dep].ln_blocked_list) {
-+ if (grtlk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW ||
-+ poslk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW)
-+ break;
-+ /* grant all readers */
-+ list_del_init(&poslk->lk_nodes[dep].ln_blocked_list);
-+ list_add(&poslk->lk_nodes[dep].ln_granted_list,
-+ &grtln->ln_granted_list);
-+
-+ BUG_ON(poslk->lk_task == NULL);
-+ wake_up_process(poslk->lk_task);
-+ }
-+
-+ /* if @curln is the owner of this key, replace it with @grtln */
-+ if (!htree_key_list_empty(curln))
-+ htree_key_list_replace_init(curln, grtln);
-+
-+ if (curln->ln_mode == HTREE_LOCK_INVAL)
-+ list_del_init(&curln->ln_alive_list);
-+}
-+
-+/*
-+ * it's just wrapper of htree_node_lock_internal, it returns 1 on granted
-+ * and 0 only if @wait is false and can't grant it immediately
-+ */
-+int
-+htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ rc = htree_node_lock_internal(lhead, lck, mode, key, dep, wait, event);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, dep);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_node_lock_try);
-+
-+/* it's wrapper of htree_node_unlock_internal */
-+void
-+htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ htree_node_unlock_internal(lhead, lck, dep, event);
-+ htree_spin_unlock(lhead, dep);
-+}
-+EXPORT_SYMBOL(htree_node_unlock);
-+
-+/* stop listening on child-lock level @dep */
-+void
-+htree_node_stop_listen(struct htree_lock *lck, unsigned dep)
-+{
-+ struct htree_lock_node *ln = &lck->lk_nodes[dep];
-+ struct htree_lock_node *tmp;
-+
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+ BUG_ON(!list_empty(&ln->ln_blocked_list));
-+ BUG_ON(!list_empty(&ln->ln_granted_list));
-+
-+ if (!htree_node_is_listening(lck, dep))
-+ return;
-+
-+ htree_spin_lock(lck->lk_head, dep);
-+ ln->ln_mode = HTREE_LOCK_INVAL;
-+ ln->ln_ev_target = NULL;
-+
-+ if (htree_key_list_empty(ln)) { /* not owner */
-+ list_del_init(&ln->ln_alive_list);
-+ goto out;
-+ }
-+
-+ /* I'm the owner... */
-+ if (list_empty(&ln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(ln);
-+ goto out;
-+ }
-+
-+ tmp = list_entry(ln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmp->ln_mode != HTREE_LOCK_NL);
-+ htree_key_list_replace_init(ln, tmp);
-+ list_del_init(&ln->ln_alive_list);
-+ out:
-+ htree_spin_unlock(lck->lk_head, dep);
-+}
-+EXPORT_SYMBOL(htree_node_stop_listen);
-+
-+/* release all child-locks if we have any */
-+static void
-+htree_node_release_all(struct htree_lock *lck)
-+{
-+ int i;
-+
-+ for (i = 0; i < lck->lk_depth; i++) {
-+ if (htree_node_is_granted(lck, i))
-+ htree_node_unlock(lck, i, NULL);
-+ else if (htree_node_is_listening(lck, i))
-+ htree_node_stop_listen(lck, i);
-+ }
-+}
-+
-+/*
-+ * obtain htree lock, it could be blocked inside if there's conflict
-+ * with any granted or blocked lock and @wait is true.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_lock_internal(struct htree_lock *lck, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int granted = 0;
-+ int blocked = 0;
-+ int i;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ if (lhead->lh_nblocked[i] != 0)
-+ blocked |= 1 << i;
-+ }
-+ if ((htree_lock_compat[lck->lk_mode] & granted) != granted ||
-+ (htree_lock_compat[lck->lk_mode] & blocked) != blocked) {
-+ /* will block current lock even it just conflicts with any
-+ * other blocked lock, so lock like EX wouldn't starve */
-+ if (!wait)
-+ return -1;
-+ lhead->lh_nblocked[lck->lk_mode]++;
-+ lk_block_inc(lck->lk_mode);
-+
-+ lck->lk_task = current;
-+ list_add_tail(&lck->lk_blocked_list, &lhead->lh_blocked_list);
-+
-+retry:
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ /* wait to be given the lock */
-+ if (lck->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt. wake up will set me RUNNING.
-+ * Since thread would be waken up accidentally,
-+ * so we need check lock whether granted or not again. */
-+ if (!list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ if (list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return 0;
-+ }
-+ goto retry;
-+ }
-+ return 0; /* without lh_lock */
-+ }
-+ lhead->lh_ngranted[lck->lk_mode]++;
-+ lk_grant_inc(lck->lk_mode);
-+ return 1;
-+}
-+
-+/* release htree lock. NB: ALWAYS called holding lhead::lh_lock */
-+static void
-+htree_unlock_internal(struct htree_lock *lck)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ int granted = 0;
-+ int i;
-+
-+ BUG_ON(lhead->lh_ngranted[lck->lk_mode] == 0);
-+
-+ lhead->lh_ngranted[lck->lk_mode]--;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ }
-+ list_for_each_entry_safe(tmp, tmp2,
-+ &lhead->lh_blocked_list, lk_blocked_list) {
-+ /* conflict with any granted lock? */
-+ if ((htree_lock_compat[tmp->lk_mode] & granted) != granted)
-+ break;
-+
-+ list_del_init(&tmp->lk_blocked_list);
-+
-+ BUG_ON(lhead->lh_nblocked[tmp->lk_mode] == 0);
-+
-+ lhead->lh_nblocked[tmp->lk_mode]--;
-+ lhead->lh_ngranted[tmp->lk_mode]++;
-+ granted |= 1 << tmp->lk_mode;
-+
-+ BUG_ON(tmp->lk_task == NULL);
-+ wake_up_process(tmp->lk_task);
-+ }
-+}
-+
-+/* it's wrapper of htree_lock_internal and exported interface.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait)
-+{
-+ int rc;
-+
-+ BUG_ON(lck->lk_depth > lhead->lh_depth);
-+ BUG_ON(lck->lk_head != NULL);
-+ BUG_ON(lck->lk_task != NULL);
-+
-+ lck->lk_head = lhead;
-+ lck->lk_mode = mode;
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_lock_try);
-+
-+/* it's wrapper of htree_unlock_internal and exported interface.
-+ * It will release all htree_node_locks and htree_lock */
-+void
-+htree_unlock(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_head == NULL);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lck->lk_head, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ htree_spin_unlock(lck->lk_head, HTREE_DEP_ROOT);
-+ lck->lk_head = NULL;
-+ lck->lk_task = NULL;
-+}
-+EXPORT_SYMBOL(htree_unlock);
-+
-+/* change lock mode */
-+void
-+htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode)
-+{
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+ lck->lk_mode = mode;
-+}
-+EXPORT_SYMBOL(htree_change_mode);
-+
-+/* release htree lock, and lock it again with new mode.
-+ * This function will first release all htree_node_locks and htree_lock,
-+ * then try to gain htree_lock with new @mode.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_change_lock_try(struct htree_lock *lck, htree_lock_mode_t mode, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(lhead == NULL);
-+ BUG_ON(lck->lk_mode == mode);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL || mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ lck->lk_mode = mode;
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_change_lock_try);
-+
-+/* create a htree_lock head with @depth levels (number of child-locks),
-+ * it is a per resoruce structure */
-+struct htree_lock_head *
-+htree_lock_head_alloc(unsigned depth, unsigned hbits, unsigned priv)
-+{
-+ struct htree_lock_head *lhead;
-+ int i;
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+
-+ lhead = kzalloc(offsetof(struct htree_lock_head,
-+ lh_children[depth]) + priv, GFP_NOFS);
-+ if (lhead == NULL)
-+ return NULL;
-+
-+ if (hbits < HTREE_HBITS_MIN)
-+ lhead->lh_hbits = HTREE_HBITS_MIN;
-+ else if (hbits > HTREE_HBITS_MAX)
-+ lhead->lh_hbits = HTREE_HBITS_MAX;
-+
-+ lhead->lh_lock = 0;
-+ lhead->lh_depth = depth;
-+ INIT_LIST_HEAD(&lhead->lh_blocked_list);
-+ if (priv > 0) {
-+ lhead->lh_private = (void *)lhead +
-+ offsetof(struct htree_lock_head, lh_children[depth]);
-+ }
-+
-+ for (i = 0; i < depth; i++) {
-+ INIT_LIST_HEAD(&lhead->lh_children[i].lc_list);
-+ lhead->lh_children[i].lc_events = HTREE_EVENT_DISABLE;
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(htree_lock_head_alloc);
-+
-+/* free the htree_lock head */
-+void
-+htree_lock_head_free(struct htree_lock_head *lhead)
-+{
-+ int i;
-+
-+ BUG_ON(!list_empty(&lhead->lh_blocked_list));
-+ for (i = 0; i < lhead->lh_depth; i++)
-+ BUG_ON(!list_empty(&lhead->lh_children[i].lc_list));
-+ kfree(lhead);
-+}
-+EXPORT_SYMBOL(htree_lock_head_free);
-+
-+/* register event callback for @events of child-lock at level @dep */
-+void
-+htree_lock_event_attach(struct htree_lock_head *lhead, unsigned dep,
-+ unsigned events, htree_event_cb_t callback)
-+{
-+ BUG_ON(lhead->lh_depth <= dep);
-+ lhead->lh_children[dep].lc_events = events;
-+ lhead->lh_children[dep].lc_callback = callback;
-+}
-+EXPORT_SYMBOL(htree_lock_event_attach);
-+
-+/* allocate a htree_lock, which is per-thread structure, @pbytes is some
-+ * extra-bytes as private data for caller */
-+struct htree_lock *
-+htree_lock_alloc(unsigned depth, unsigned pbytes)
-+{
-+ struct htree_lock *lck;
-+ int i = offsetof(struct htree_lock, lk_nodes[depth]);
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+ lck = kzalloc(i + pbytes, GFP_NOFS);
-+ if (lck == NULL)
-+ return NULL;
-+
-+ if (pbytes != 0)
-+ lck->lk_private = (void *)lck + i;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+ lck->lk_depth = depth;
-+ INIT_LIST_HEAD(&lck->lk_blocked_list);
-+
-+ for (i = 0; i < depth; i++) {
-+ struct htree_lock_node *node = &lck->lk_nodes[i];
-+
-+ node->ln_mode = HTREE_LOCK_INVAL;
-+ INIT_LIST_HEAD(&node->ln_major_list);
-+ INIT_LIST_HEAD(&node->ln_minor_list);
-+ INIT_LIST_HEAD(&node->ln_alive_list);
-+ INIT_LIST_HEAD(&node->ln_blocked_list);
-+ INIT_LIST_HEAD(&node->ln_granted_list);
-+ }
-+
-+ return lck;
-+}
-+EXPORT_SYMBOL(htree_lock_alloc);
-+
-+/* free htree_lock node */
-+void
-+htree_lock_free(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_mode != HTREE_LOCK_INVAL);
-+ kfree(lck);
-+}
-+EXPORT_SYMBOL(htree_lock_free);
Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile
===================================================================
--- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/Makefile
- pdirops support for ldiskfs
- integrate with osd-ldiskfs
-Index: linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
-===================================================================
---- /dev/null
-+++ linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
-@@ -0,0 +1,187 @@
-+/*
-+ * include/linux/htree_lock.h
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+
-+/*
-+ * htree lock
-+ *
-+ * htree_lock is an advanced lock, it can support five lock modes (concept is
-+ * taken from DLM) and it's a sleeping lock.
-+ *
-+ * most common use case is:
-+ * - create a htree_lock_head for data
-+ * - each thread (contender) creates it's own htree_lock
-+ * - contender needs to call htree_lock(lock_node, mode) to protect data and
-+ * call htree_unlock to release lock
-+ *
-+ * Also, there is advanced use-case which is more complex, user can have
-+ * PW/PR lock on particular key, it's mostly used while user holding shared
-+ * lock on the htree (CW, CR)
-+ *
-+ * htree_lock(lock_node, HTREE_LOCK_CR); lock the htree with CR
-+ * htree_node_lock(lock_node, HTREE_LOCK_PR, key...); lock @key with PR
-+ * ...
-+ * htree_node_unlock(lock_node);; unlock the key
-+ *
-+ * Another tip is, we can have N-levels of this kind of keys, all we need to
-+ * do is specifying N-levels while creating htree_lock_head, then we can
-+ * lock/unlock a specific level by:
-+ * htree_node_lock(lock_node, mode1, key1, level1...);
-+ * do something;
-+ * htree_node_lock(lock_node, mode1, key2, level2...);
-+ * do something;
-+ * htree_node_unlock(lock_node, level2);
-+ * htree_node_unlock(lock_node, level1);
-+ *
-+ * NB: for multi-level, should be careful about locking order to avoid deadlock
-+ */
-+
-+#ifndef _LINUX_HTREE_LOCK_H
-+#define _LINUX_HTREE_LOCK_H
-+
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * Lock Modes
-+ * more details can be found here:
-+ * http://en.wikipedia.org/wiki/Distributed_lock_manager
-+ */
-+typedef enum {
-+ HTREE_LOCK_EX = 0, /* exclusive lock: incompatible with all others */
-+ HTREE_LOCK_PW, /* protected write: allows only CR users */
-+ HTREE_LOCK_PR, /* protected read: allow PR, CR users */
-+ HTREE_LOCK_CW, /* concurrent write: allow CR, CW users */
-+ HTREE_LOCK_CR, /* concurrent read: allow all but EX users */
-+ HTREE_LOCK_MAX, /* number of lock modes */
-+} htree_lock_mode_t;
-+
-+#define HTREE_LOCK_NL HTREE_LOCK_MAX
-+#define HTREE_LOCK_INVAL 0xdead10c
-+
-+enum {
-+ HTREE_HBITS_MIN = 2,
-+ HTREE_HBITS_DEF = 14,
-+ HTREE_HBITS_MAX = 32,
-+};
-+
-+enum {
-+ HTREE_EVENT_DISABLE = (0),
-+ HTREE_EVENT_RD = (1 << HTREE_LOCK_PR),
-+ HTREE_EVENT_WR = (1 << HTREE_LOCK_PW),
-+ HTREE_EVENT_RDWR = (HTREE_EVENT_RD | HTREE_EVENT_WR),
-+};
-+
-+struct htree_lock;
-+
-+typedef void (*htree_event_cb_t)(void *target, void *event);
-+
-+struct htree_lock_child {
-+ struct list_head lc_list; /* granted list */
-+ htree_event_cb_t lc_callback; /* event callback */
-+ unsigned lc_events; /* event types */
-+};
-+
-+struct htree_lock_head {
-+ unsigned long lh_lock; /* bits lock */
-+ /* blocked lock list (htree_lock) */
-+ struct list_head lh_blocked_list;
-+ /* # key levels */
-+ u16 lh_depth;
-+ /* hash bits for key and limit number of locks */
-+ u16 lh_hbits;
-+ /* counters for blocked locks */
-+ u16 lh_nblocked[HTREE_LOCK_MAX];
-+ /* counters for granted locks */
-+ u16 lh_ngranted[HTREE_LOCK_MAX];
-+ /* private data */
-+ void *lh_private;
-+ /* array of children locks */
-+ struct htree_lock_child lh_children[0];
-+};
-+
-+/* htree_lock_node_t is child-lock for a specific key (ln_value) */
-+struct htree_lock_node {
-+ htree_lock_mode_t ln_mode;
-+ /* major hash key */
-+ u16 ln_major_key;
-+ /* minor hash key */
-+ u16 ln_minor_key;
-+ struct list_head ln_major_list;
-+ struct list_head ln_minor_list;
-+ /* alive list, all locks (granted, blocked, listening) are on it */
-+ struct list_head ln_alive_list;
-+ /* blocked list */
-+ struct list_head ln_blocked_list;
-+ /* granted list */
-+ struct list_head ln_granted_list;
-+ void *ln_ev_target;
-+};
-+
-+struct htree_lock {
-+ struct task_struct *lk_task;
-+ struct htree_lock_head *lk_head;
-+ void *lk_private;
-+ unsigned lk_depth;
-+ htree_lock_mode_t lk_mode;
-+ struct list_head lk_blocked_list;
-+ struct htree_lock_node lk_nodes[0];
-+};
-+
-+/* create a lock head, which stands for a resource */
-+struct htree_lock_head *htree_lock_head_alloc(unsigned depth,
-+ unsigned hbits, unsigned priv);
-+/* free a lock head */
-+void htree_lock_head_free(struct htree_lock_head *lhead);
-+/* register event callback for child lock at level @depth */
-+void htree_lock_event_attach(struct htree_lock_head *lhead, unsigned depth,
-+ unsigned events, htree_event_cb_t callback);
-+/* create a lock handle, which stands for a thread */
-+struct htree_lock *htree_lock_alloc(unsigned depth, unsigned pbytes);
-+/* free a lock handle */
-+void htree_lock_free(struct htree_lock *lck);
-+/* lock htree, when @wait is true, 0 is returned if the lock can't
-+ * be granted immediately */
-+int htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait);
-+/* unlock htree */
-+void htree_unlock(struct htree_lock *lck);
-+/* unlock and relock htree with @new_mode */
-+int htree_change_lock_try(struct htree_lock *lck,
-+ htree_lock_mode_t new_mode, int wait);
-+void htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode);
-+/* require child lock (key) of htree at level @dep, @event will be sent to all
-+ * listeners on this @key while lock being granted */
-+int htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event);
-+/* release child lock at level @dep, this lock will listen on it's key
-+ * if @event isn't NULL, event_cb will be called against @lck while granting
-+ * any other lock at level @dep with the same key */
-+void htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event);
-+/* stop listening on child lock at level @dep */
-+void htree_node_stop_listen(struct htree_lock *lck, unsigned dep);
-+/* for debug */
-+void htree_lock_stat_print(int depth);
-+void htree_lock_stat_reset(void);
-+
-+#define htree_lock(lck, lh, mode) htree_lock_try(lck, lh, mode, 1)
-+#define htree_change_lock(lck, mode) htree_change_lock_try(lck, mode, 1)
-+
-+#define htree_lock_mode(lck) ((lck)->lk_mode)
-+
-+#define htree_node_lock(lck, mode, key, dep) \
-+ htree_node_lock_try(lck, mode, key, dep, 1, NULL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_granted(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_INVAL && \
-+ (lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_NL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_listening(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode == HTREE_LOCK_NL)
-+
-+#endif
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
-===================================================================
---- /dev/null
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
-@@ -0,0 +1,895 @@
-+/*
-+ * fs/ext4/htree_lock.c
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+#include <linux/jbd2.h>
-+#include <linux/hash.h>
-+#include <linux/module.h>
-+#include <linux/htree_lock.h>
-+
-+enum {
-+ HTREE_LOCK_BIT_EX = (1 << HTREE_LOCK_EX),
-+ HTREE_LOCK_BIT_PW = (1 << HTREE_LOCK_PW),
-+ HTREE_LOCK_BIT_PR = (1 << HTREE_LOCK_PR),
-+ HTREE_LOCK_BIT_CW = (1 << HTREE_LOCK_CW),
-+ HTREE_LOCK_BIT_CR = (1 << HTREE_LOCK_CR),
-+};
-+
-+enum {
-+ HTREE_LOCK_COMPAT_EX = 0,
-+ HTREE_LOCK_COMPAT_PW = HTREE_LOCK_COMPAT_EX | HTREE_LOCK_BIT_CR,
-+ HTREE_LOCK_COMPAT_PR = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_PR,
-+ HTREE_LOCK_COMPAT_CW = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_CW,
-+ HTREE_LOCK_COMPAT_CR = HTREE_LOCK_COMPAT_CW | HTREE_LOCK_BIT_PR |
-+ HTREE_LOCK_BIT_PW,
-+};
-+
-+static int htree_lock_compat[] = {
-+ [HTREE_LOCK_EX] HTREE_LOCK_COMPAT_EX,
-+ [HTREE_LOCK_PW] HTREE_LOCK_COMPAT_PW,
-+ [HTREE_LOCK_PR] HTREE_LOCK_COMPAT_PR,
-+ [HTREE_LOCK_CW] HTREE_LOCK_COMPAT_CW,
-+ [HTREE_LOCK_CR] HTREE_LOCK_COMPAT_CR,
-+};
-+
-+/* max allowed htree-lock depth.
-+ * We only need depth=3 for ext4 although user can have higher value. */
-+#define HTREE_LOCK_DEP_MAX 16
-+
-+#ifdef HTREE_LOCK_DEBUG
-+
-+static char *hl_name[] = {
-+ [HTREE_LOCK_EX] "EX",
-+ [HTREE_LOCK_PW] "PW",
-+ [HTREE_LOCK_PR] "PR",
-+ [HTREE_LOCK_CW] "CW",
-+ [HTREE_LOCK_CR] "CR",
-+};
-+
-+/* lock stats */
-+struct htree_lock_node_stats {
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long retried[HTREE_LOCK_MAX];
-+ unsigned long long events;
-+};
-+
-+struct htree_lock_stats {
-+ struct htree_lock_node_stats nodes[HTREE_LOCK_DEP_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+};
-+
-+static struct htree_lock_stats hl_stats;
-+
-+void htree_lock_stat_reset(void)
-+{
-+ memset(&hl_stats, 0, sizeof(hl_stats));
-+}
-+
-+void htree_lock_stat_print(int depth)
-+{
-+ int i;
-+ int j;
-+
-+ printk(KERN_DEBUG "HTREE LOCK STATS:\n");
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ printk(KERN_DEBUG "[%s]: G [%10llu], B [%10llu]\n",
-+ hl_name[i], hl_stats.granted[i], hl_stats.blocked[i]);
-+ }
-+ for (i = 0; i < depth; i++) {
-+ printk(KERN_DEBUG "HTREE CHILD [%d] STATS:\n", i);
-+ for (j = 0; j < HTREE_LOCK_MAX; j++) {
-+ printk(KERN_DEBUG
-+ "[%s]: G [%10llu], B [%10llu], R [%10llu]\n",
-+ hl_name[j], hl_stats.nodes[i].granted[j],
-+ hl_stats.nodes[i].blocked[j],
-+ hl_stats.nodes[i].retried[j]);
-+ }
-+ }
-+}
-+
-+#define lk_grant_inc(m) do { hl_stats.granted[m]++; } while (0)
-+#define lk_block_inc(m) do { hl_stats.blocked[m]++; } while (0)
-+#define ln_grant_inc(d, m) do { hl_stats.nodes[d].granted[m]++; } while (0)
-+#define ln_block_inc(d, m) do { hl_stats.nodes[d].blocked[m]++; } while (0)
-+#define ln_retry_inc(d, m) do { hl_stats.nodes[d].retried[m]++; } while (0)
-+#define ln_event_inc(d) do { hl_stats.nodes[d].events++; } while (0)
-+
-+#else /* !DEBUG */
-+
-+void htree_lock_stat_reset(void) {}
-+void htree_lock_stat_print(int depth) {}
-+
-+#define lk_grant_inc(m) do {} while (0)
-+#define lk_block_inc(m) do {} while (0)
-+#define ln_grant_inc(d, m) do {} while (0)
-+#define ln_block_inc(d, m) do {} while (0)
-+#define ln_retry_inc(d, m) do {} while (0)
-+#define ln_event_inc(d) do {} while (0)
-+
-+#endif /* DEBUG */
-+
-+EXPORT_SYMBOL(htree_lock_stat_reset);
-+EXPORT_SYMBOL(htree_lock_stat_print);
-+
-+#define HTREE_DEP_ROOT (-1)
-+
-+#define htree_spin_lock(lhead, dep) \
-+ bit_spin_lock((dep) + 1, &(lhead)->lh_lock)
-+#define htree_spin_unlock(lhead, dep) \
-+ bit_spin_unlock((dep) + 1, &(lhead)->lh_lock)
-+
-+#define htree_key_event_ignore(child, ln) \
-+ (!((child)->lc_events & (1 << (ln)->ln_mode)))
-+
-+static int
-+htree_key_list_empty(struct htree_lock_node *ln)
-+{
-+ return list_empty(&ln->ln_major_list) && list_empty(&ln->ln_minor_list);
-+}
-+
-+static void
-+htree_key_list_del_init(struct htree_lock_node *ln)
-+{
-+ struct htree_lock_node *tmp = NULL;
-+
-+ if (!list_empty(&ln->ln_minor_list)) {
-+ tmp = list_entry(ln->ln_minor_list.next,
-+ struct htree_lock_node, ln_minor_list);
-+ list_del_init(&ln->ln_minor_list);
-+ }
-+
-+ if (list_empty(&ln->ln_major_list))
-+ return;
-+
-+ if (tmp == NULL) { /* not on minor key list */
-+ list_del_init(&ln->ln_major_list);
-+ } else {
-+ BUG_ON(!list_empty(&tmp->ln_major_list));
-+ list_replace_init(&ln->ln_major_list, &tmp->ln_major_list);
-+ }
-+}
-+
-+static void
-+htree_key_list_replace_init(struct htree_lock_node *old,
-+ struct htree_lock_node *new)
-+{
-+ if (!list_empty(&old->ln_major_list))
-+ list_replace_init(&old->ln_major_list, &new->ln_major_list);
-+
-+ if (!list_empty(&old->ln_minor_list))
-+ list_replace_init(&old->ln_minor_list, &new->ln_minor_list);
-+}
-+
-+static void
-+htree_key_event_enqueue(struct htree_lock_child *child,
-+ struct htree_lock_node *ln, int dep, void *event)
-+{
-+ struct htree_lock_node *tmp;
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ BUG_ON(ln->ln_mode == HTREE_LOCK_NL);
-+ if (event == NULL || htree_key_event_ignore(child, ln))
-+ return;
-+
-+ /* shouldn't be a very long list */
-+ list_for_each_entry(tmp, &ln->ln_alive_list, ln_alive_list) {
-+ if (tmp->ln_mode == HTREE_LOCK_NL) {
-+ ln_event_inc(dep);
-+ if (child->lc_callback != NULL)
-+ child->lc_callback(tmp->ln_ev_target, event);
-+ }
-+ }
-+}
-+
-+static int
-+htree_node_lock_enqueue(struct htree_lock *newlk, struct htree_lock *curlk,
-+ unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_child *child = &newlk->lk_head->lh_children[dep];
-+ struct htree_lock_node *newln = &newlk->lk_nodes[dep];
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ /* NB: we only expect PR/PW lock mode at here, only these two modes are
-+ * allowed for htree_node_lock(asserted in htree_node_lock_internal),
-+ * NL is only used for listener, user can't directly require NL mode */
-+ if ((curln->ln_mode == HTREE_LOCK_NL) ||
-+ (curln->ln_mode != HTREE_LOCK_PW &&
-+ newln->ln_mode != HTREE_LOCK_PW)) {
-+ /* no conflict, attach it on granted list of @curlk */
-+ if (curln->ln_mode != HTREE_LOCK_NL) {
-+ list_add(&newln->ln_granted_list,
-+ &curln->ln_granted_list);
-+ } else {
-+ /* replace key owner */
-+ htree_key_list_replace_init(curln, newln);
-+ }
-+
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ ln_grant_inc(dep, newln->ln_mode);
-+ return 1; /* still hold lh_lock */
-+ }
-+
-+ if (!wait) { /* can't grant and don't want to wait */
-+ ln_retry_inc(dep, newln->ln_mode);
-+ newln->ln_mode = HTREE_LOCK_INVAL;
-+ return -1; /* don't wait and just return -1 */
-+ }
-+
-+ newlk->lk_task = current;
-+ /* conflict, attach it on blocked list of curlk */
-+ list_add_tail(&newln->ln_blocked_list, &curln->ln_blocked_list);
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ ln_block_inc(dep, newln->ln_mode);
-+
-+retry:
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ htree_spin_unlock(newlk->lk_head, dep);
-+ /* wait to be given the lock */
-+ if (newlk->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt, wake up will set me RUNNING */
-+ htree_spin_lock(newlk->lk_head, dep);
-+ /* Need to check lock really granted, thread maybe awaken wrongly */
-+ if (list_empty(&newln->ln_granted_list) && htree_key_list_empty(newln))
-+ goto retry;
-+ if (event && !htree_key_event_ignore(child, newln))
-+ htree_key_event_enqueue(child, newln, dep, event);
-+
-+ return 1; /* still hold lh_lock */
-+}
-+
-+/*
-+ * get PR/PW access to particular tree-node according to @dep and @key,
-+ * it will return -1 if @wait is false and can't immediately grant this lock.
-+ * All listeners(HTREE_LOCK_NL) on @dep and with the same @key will get
-+ * @event if it's not NULL.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_node_lock_internal(struct htree_lock_head *lhead, struct htree_lock *lck,
-+ htree_lock_mode_t mode, u32 key, unsigned dep,
-+ int wait, void *event)
-+{
-+ LIST_HEAD(list);
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ u16 major;
-+ u16 minor;
-+ u8 reverse;
-+ u8 ma_bits;
-+ u8 mi_bits;
-+
-+ BUG_ON(mode != HTREE_LOCK_PW && mode != HTREE_LOCK_PR);
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+
-+ key = hash_long(key, lhead->lh_hbits);
-+
-+ mi_bits = lhead->lh_hbits >> 1;
-+ ma_bits = lhead->lh_hbits - mi_bits;
-+
-+ lck->lk_nodes[dep].ln_major_key = major = key & ((1U << ma_bits) - 1);
-+ lck->lk_nodes[dep].ln_minor_key = minor = key >> ma_bits;
-+ lck->lk_nodes[dep].ln_mode = mode;
-+
-+ /*
-+ * The major key list is an ordered list, so searches are started
-+ * at the end of the list that is numerically closer to major_key,
-+ * so at most half of the list will be walked (for well-distributed
-+ * keys). The list traversal aborts early if the expected key
-+ * location is passed.
-+ */
-+ reverse = (major >= (1 << (ma_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp,
-+ &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key < major) {
-+ /* attach _after_ @tmp */
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+
-+ } else {
-+ list_for_each_entry(tmp, &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key > major) {
-+ /* insert _before_ @tmp */
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+ }
-+
-+ search_minor:
-+ /*
-+ * NB: minor_key list doesn't have a "head", @list is just a
-+ * temporary stub for helping list searching, make sure it's removed
-+ * after searching.
-+ * minor_key list is an ordered list too.
-+ */
-+ list_add_tail(&list, &tmp->lk_nodes[dep].ln_minor_list);
-+
-+ reverse = (minor >= (1 << (mi_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key < minor) {
-+ /* attach _after_ @tmp2 */
-+ list_add(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_minor_list, &list);
-+
-+ } else {
-+ list_for_each_entry(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key > minor) {
-+ /* insert _before_ @tmp2 */
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list, &list);
-+ }
-+
-+ out_grant_minor:
-+ if (list.next == &lck->lk_nodes[dep].ln_minor_list) {
-+ /* new lock @lck is the first one on minor_key list, which
-+ * means it has the smallest minor_key and it should
-+ * replace @tmp as minor_key owner */
-+ list_replace_init(&tmp->lk_nodes[dep].ln_major_list,
-+ &lck->lk_nodes[dep].ln_major_list);
-+ }
-+ /* remove the temporary head */
-+ list_del(&list);
-+
-+ out_grant_major:
-+ ln_grant_inc(dep, lck->lk_nodes[dep].ln_mode);
-+ return 1; /* granted with holding lh_lock */
-+
-+ out_enqueue:
-+ list_del(&list); /* remove temprary head */
-+ return htree_node_lock_enqueue(lck, tmp2, dep, wait, event);
-+}
-+
-+/*
-+ * release the key of @lck at level @dep, and grant any blocked locks.
-+ * caller will still listen on @key if @event is not NULL, which means
-+ * caller can see a event (by event_cb) while granting any lock with
-+ * the same key at level @dep.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ * NB: listener will not block anyone because listening mode is HTREE_LOCK_NL
-+ */
-+static void
-+htree_node_unlock_internal(struct htree_lock_head *lhead,
-+ struct htree_lock *curlk, unsigned dep, void *event)
-+{
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+ struct htree_lock *grtlk = NULL;
-+ struct htree_lock_node *grtln;
-+ struct htree_lock *poslk;
-+ struct htree_lock *tmplk;
-+
-+ if (!htree_node_is_granted(curlk, dep))
-+ return;
-+
-+ if (!list_empty(&curln->ln_granted_list)) {
-+ /* there is another granted lock */
-+ grtlk = list_entry(curln->ln_granted_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_granted_list);
-+ list_del_init(&curln->ln_granted_list);
-+ }
-+
-+ if (grtlk == NULL && !list_empty(&curln->ln_blocked_list)) {
-+ /*
-+ * @curlk is the only granted lock, so we confirmed:
-+ * a) curln is key owner (attached on major/minor_list),
-+ * so if there is any blocked lock, it should be attached
-+ * on curln->ln_blocked_list
-+ * b) we always can grant the first blocked lock
-+ */
-+ grtlk = list_entry(curln->ln_blocked_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_blocked_list);
-+ BUG_ON(grtlk->lk_task == NULL);
-+ wake_up_process(grtlk->lk_task);
-+ }
-+
-+ if (event != NULL &&
-+ lhead->lh_children[dep].lc_events != HTREE_EVENT_DISABLE) {
-+ curln->ln_ev_target = event;
-+ curln->ln_mode = HTREE_LOCK_NL; /* listen! */
-+ } else {
-+ curln->ln_mode = HTREE_LOCK_INVAL;
-+ }
-+
-+ if (grtlk == NULL) { /* I must be the only one locking this key */
-+ struct htree_lock_node *tmpln;
-+
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (curln->ln_mode == HTREE_LOCK_NL) /* listening */
-+ return;
-+
-+ /* not listening */
-+ if (list_empty(&curln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(curln);
-+ return;
-+ }
-+
-+ tmpln = list_entry(curln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmpln->ln_mode != HTREE_LOCK_NL);
-+
-+ htree_key_list_replace_init(curln, tmpln);
-+ list_del_init(&curln->ln_alive_list);
-+
-+ return;
-+ }
-+
-+ /* have a granted lock */
-+ grtln = &grtlk->lk_nodes[dep];
-+ if (!list_empty(&curln->ln_blocked_list)) {
-+ /* only key owner can be on both lists */
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (list_empty(&grtln->ln_blocked_list)) {
-+ list_add(&grtln->ln_blocked_list,
-+ &curln->ln_blocked_list);
-+ }
-+ list_del_init(&curln->ln_blocked_list);
-+ }
-+ /*
-+ * NB: this is the tricky part:
-+ * We have only two modes for child-lock (PR and PW), also,
-+ * only owner of the key (attached on major/minor_list) can be on
-+ * both blocked_list and granted_list, so @grtlk must be one
-+ * of these two cases:
-+ *
-+ * a) @grtlk is taken from granted_list, which means we've granted
-+ * more than one lock so @grtlk has to be PR, the first blocked
-+ * lock must be PW and we can't grant it at all.
-+ * So even @grtlk is not owner of the key (empty blocked_list),
-+ * we don't care because we can't grant any lock.
-+ * b) we just grant a new lock which is taken from head of blocked
-+ * list, and it should be the first granted lock, and it should
-+ * be the first one linked on blocked_list.
-+ *
-+ * Either way, we can get correct result by iterating blocked_list
-+ * of @grtlk, and don't have to bother on how to find out
-+ * owner of current key.
-+ */
-+ list_for_each_entry_safe(poslk, tmplk, &grtln->ln_blocked_list,
-+ lk_nodes[dep].ln_blocked_list) {
-+ if (grtlk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW ||
-+ poslk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW)
-+ break;
-+ /* grant all readers */
-+ list_del_init(&poslk->lk_nodes[dep].ln_blocked_list);
-+ list_add(&poslk->lk_nodes[dep].ln_granted_list,
-+ &grtln->ln_granted_list);
-+
-+ BUG_ON(poslk->lk_task == NULL);
-+ wake_up_process(poslk->lk_task);
-+ }
-+
-+ /* if @curln is the owner of this key, replace it with @grtln */
-+ if (!htree_key_list_empty(curln))
-+ htree_key_list_replace_init(curln, grtln);
-+
-+ if (curln->ln_mode == HTREE_LOCK_INVAL)
-+ list_del_init(&curln->ln_alive_list);
-+}
-+
-+/*
-+ * it's just wrapper of htree_node_lock_internal, it returns 1 on granted
-+ * and 0 only if @wait is false and can't grant it immediately
-+ */
-+int
-+htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ rc = htree_node_lock_internal(lhead, lck, mode, key, dep, wait, event);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, dep);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_node_lock_try);
-+
-+/* it's wrapper of htree_node_unlock_internal */
-+void
-+htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ htree_node_unlock_internal(lhead, lck, dep, event);
-+ htree_spin_unlock(lhead, dep);
-+}
-+EXPORT_SYMBOL(htree_node_unlock);
-+
-+/* stop listening on child-lock level @dep */
-+void
-+htree_node_stop_listen(struct htree_lock *lck, unsigned dep)
-+{
-+ struct htree_lock_node *ln = &lck->lk_nodes[dep];
-+ struct htree_lock_node *tmp;
-+
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+ BUG_ON(!list_empty(&ln->ln_blocked_list));
-+ BUG_ON(!list_empty(&ln->ln_granted_list));
-+
-+ if (!htree_node_is_listening(lck, dep))
-+ return;
-+
-+ htree_spin_lock(lck->lk_head, dep);
-+ ln->ln_mode = HTREE_LOCK_INVAL;
-+ ln->ln_ev_target = NULL;
-+
-+ if (htree_key_list_empty(ln)) { /* not owner */
-+ list_del_init(&ln->ln_alive_list);
-+ goto out;
-+ }
-+
-+ /* I'm the owner... */
-+ if (list_empty(&ln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(ln);
-+ goto out;
-+ }
-+
-+ tmp = list_entry(ln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmp->ln_mode != HTREE_LOCK_NL);
-+ htree_key_list_replace_init(ln, tmp);
-+ list_del_init(&ln->ln_alive_list);
-+ out:
-+ htree_spin_unlock(lck->lk_head, dep);
-+}
-+EXPORT_SYMBOL(htree_node_stop_listen);
-+
-+/* release all child-locks if we have any */
-+static void
-+htree_node_release_all(struct htree_lock *lck)
-+{
-+ int i;
-+
-+ for (i = 0; i < lck->lk_depth; i++) {
-+ if (htree_node_is_granted(lck, i))
-+ htree_node_unlock(lck, i, NULL);
-+ else if (htree_node_is_listening(lck, i))
-+ htree_node_stop_listen(lck, i);
-+ }
-+}
-+
-+/*
-+ * obtain htree lock, it could be blocked inside if there's conflict
-+ * with any granted or blocked lock and @wait is true.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_lock_internal(struct htree_lock *lck, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int granted = 0;
-+ int blocked = 0;
-+ int i;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ if (lhead->lh_nblocked[i] != 0)
-+ blocked |= 1 << i;
-+ }
-+ if ((htree_lock_compat[lck->lk_mode] & granted) != granted ||
-+ (htree_lock_compat[lck->lk_mode] & blocked) != blocked) {
-+ /* will block current lock even it just conflicts with any
-+ * other blocked lock, so lock like EX wouldn't starve */
-+ if (!wait)
-+ return -1;
-+ lhead->lh_nblocked[lck->lk_mode]++;
-+ lk_block_inc(lck->lk_mode);
-+
-+ lck->lk_task = current;
-+ list_add_tail(&lck->lk_blocked_list, &lhead->lh_blocked_list);
-+
-+retry:
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ /* wait to be given the lock */
-+ if (lck->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt. wake up will set me RUNNING.
-+ * Since thread would be waken up accidentally,
-+ * so we need check lock whether granted or not again. */
-+ if (!list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ if (list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return 0;
-+ }
-+ goto retry;
-+ }
-+ return 0; /* without lh_lock */
-+ }
-+ lhead->lh_ngranted[lck->lk_mode]++;
-+ lk_grant_inc(lck->lk_mode);
-+ return 1;
-+}
-+
-+/* release htree lock. NB: ALWAYS called holding lhead::lh_lock */
-+static void
-+htree_unlock_internal(struct htree_lock *lck)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ int granted = 0;
-+ int i;
-+
-+ BUG_ON(lhead->lh_ngranted[lck->lk_mode] == 0);
-+
-+ lhead->lh_ngranted[lck->lk_mode]--;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ }
-+ list_for_each_entry_safe(tmp, tmp2,
-+ &lhead->lh_blocked_list, lk_blocked_list) {
-+ /* conflict with any granted lock? */
-+ if ((htree_lock_compat[tmp->lk_mode] & granted) != granted)
-+ break;
-+
-+ list_del_init(&tmp->lk_blocked_list);
-+
-+ BUG_ON(lhead->lh_nblocked[tmp->lk_mode] == 0);
-+
-+ lhead->lh_nblocked[tmp->lk_mode]--;
-+ lhead->lh_ngranted[tmp->lk_mode]++;
-+ granted |= 1 << tmp->lk_mode;
-+
-+ BUG_ON(tmp->lk_task == NULL);
-+ wake_up_process(tmp->lk_task);
-+ }
-+}
-+
-+/* it's wrapper of htree_lock_internal and exported interface.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait)
-+{
-+ int rc;
-+
-+ BUG_ON(lck->lk_depth > lhead->lh_depth);
-+ BUG_ON(lck->lk_head != NULL);
-+ BUG_ON(lck->lk_task != NULL);
-+
-+ lck->lk_head = lhead;
-+ lck->lk_mode = mode;
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_lock_try);
-+
-+/* it's wrapper of htree_unlock_internal and exported interface.
-+ * It will release all htree_node_locks and htree_lock */
-+void
-+htree_unlock(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_head == NULL);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lck->lk_head, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ htree_spin_unlock(lck->lk_head, HTREE_DEP_ROOT);
-+ lck->lk_head = NULL;
-+ lck->lk_task = NULL;
-+}
-+EXPORT_SYMBOL(htree_unlock);
-+
-+/* change lock mode */
-+void
-+htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode)
-+{
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+ lck->lk_mode = mode;
-+}
-+EXPORT_SYMBOL(htree_change_mode);
-+
-+/* release htree lock, and lock it again with new mode.
-+ * This function will first release all htree_node_locks and htree_lock,
-+ * then try to gain htree_lock with new @mode.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_change_lock_try(struct htree_lock *lck, htree_lock_mode_t mode, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(lhead == NULL);
-+ BUG_ON(lck->lk_mode == mode);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL || mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ lck->lk_mode = mode;
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_change_lock_try);
-+
-+/* create a htree_lock head with @depth levels (number of child-locks),
-+ * it is a per resoruce structure */
-+struct htree_lock_head *
-+htree_lock_head_alloc(unsigned depth, unsigned hbits, unsigned priv)
-+{
-+ struct htree_lock_head *lhead;
-+ int i;
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+
-+ lhead = kzalloc(offsetof(struct htree_lock_head,
-+ lh_children[depth]) + priv, GFP_NOFS);
-+ if (lhead == NULL)
-+ return NULL;
-+
-+ if (hbits < HTREE_HBITS_MIN)
-+ hbits = HTREE_HBITS_MIN;
-+ else if (hbits > HTREE_HBITS_MAX)
-+ hbits = HTREE_HBITS_MAX;
-+
-+ lhead->lh_hbits = hbits;
-+ lhead->lh_lock = 0;
-+ lhead->lh_depth = depth;
-+ INIT_LIST_HEAD(&lhead->lh_blocked_list);
-+ if (priv > 0) {
-+ lhead->lh_private = (void *)lhead +
-+ offsetof(struct htree_lock_head, lh_children[depth]);
-+ }
-+
-+ for (i = 0; i < depth; i++) {
-+ INIT_LIST_HEAD(&lhead->lh_children[i].lc_list);
-+ lhead->lh_children[i].lc_events = HTREE_EVENT_DISABLE;
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(htree_lock_head_alloc);
-+
-+/* free the htree_lock head */
-+void
-+htree_lock_head_free(struct htree_lock_head *lhead)
-+{
-+ int i;
-+
-+ BUG_ON(!list_empty(&lhead->lh_blocked_list));
-+ for (i = 0; i < lhead->lh_depth; i++)
-+ BUG_ON(!list_empty(&lhead->lh_children[i].lc_list));
-+ kfree(lhead);
-+}
-+EXPORT_SYMBOL(htree_lock_head_free);
-+
-+/* register event callback for @events of child-lock at level @dep */
-+void
-+htree_lock_event_attach(struct htree_lock_head *lhead, unsigned dep,
-+ unsigned events, htree_event_cb_t callback)
-+{
-+ BUG_ON(lhead->lh_depth <= dep);
-+ lhead->lh_children[dep].lc_events = events;
-+ lhead->lh_children[dep].lc_callback = callback;
-+}
-+EXPORT_SYMBOL(htree_lock_event_attach);
-+
-+/* allocate a htree_lock, which is per-thread structure, @pbytes is some
-+ * extra-bytes as private data for caller */
-+struct htree_lock *
-+htree_lock_alloc(unsigned depth, unsigned pbytes)
-+{
-+ struct htree_lock *lck;
-+ int i = offsetof(struct htree_lock, lk_nodes[depth]);
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+ lck = kzalloc(i + pbytes, GFP_NOFS);
-+ if (lck == NULL)
-+ return NULL;
-+
-+ if (pbytes != 0)
-+ lck->lk_private = (void *)lck + i;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+ lck->lk_depth = depth;
-+ INIT_LIST_HEAD(&lck->lk_blocked_list);
-+
-+ for (i = 0; i < depth; i++) {
-+ struct htree_lock_node *node = &lck->lk_nodes[i];
-+
-+ node->ln_mode = HTREE_LOCK_INVAL;
-+ INIT_LIST_HEAD(&node->ln_major_list);
-+ INIT_LIST_HEAD(&node->ln_minor_list);
-+ INIT_LIST_HEAD(&node->ln_alive_list);
-+ INIT_LIST_HEAD(&node->ln_blocked_list);
-+ INIT_LIST_HEAD(&node->ln_granted_list);
-+ }
-+
-+ return lck;
-+}
-+EXPORT_SYMBOL(htree_lock_alloc);
-+
-+/* free htree_lock node */
-+void
-+htree_lock_free(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_mode != HTREE_LOCK_INVAL);
-+ kfree(lck);
-+}
-+EXPORT_SYMBOL(htree_lock_free);
Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile
===================================================================
--- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/Makefile
}
static const unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-Index: linux-4.18.0-80.1.2.el8_0/fs/ext4/htree_lock.c
-===================================================================
---- /dev/null
-+++ linux-4.18.0-80.1.2.el8_0/fs/ext4/htree_lock.c
-@@ -0,0 +1,891 @@
-+/*
-+ * fs/ext4/htree_lock.c
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+#include <linux/jbd2.h>
-+#include <linux/hash.h>
-+#include <linux/module.h>
-+#include <linux/htree_lock.h>
-+
-+enum {
-+ HTREE_LOCK_BIT_EX = (1 << HTREE_LOCK_EX),
-+ HTREE_LOCK_BIT_PW = (1 << HTREE_LOCK_PW),
-+ HTREE_LOCK_BIT_PR = (1 << HTREE_LOCK_PR),
-+ HTREE_LOCK_BIT_CW = (1 << HTREE_LOCK_CW),
-+ HTREE_LOCK_BIT_CR = (1 << HTREE_LOCK_CR),
-+};
-+
-+enum {
-+ HTREE_LOCK_COMPAT_EX = 0,
-+ HTREE_LOCK_COMPAT_PW = HTREE_LOCK_COMPAT_EX | HTREE_LOCK_BIT_CR,
-+ HTREE_LOCK_COMPAT_PR = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_PR,
-+ HTREE_LOCK_COMPAT_CW = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_CW,
-+ HTREE_LOCK_COMPAT_CR = HTREE_LOCK_COMPAT_CW | HTREE_LOCK_BIT_PR |
-+ HTREE_LOCK_BIT_PW,
-+};
-+
-+static int htree_lock_compat[] = {
-+ [HTREE_LOCK_EX] HTREE_LOCK_COMPAT_EX,
-+ [HTREE_LOCK_PW] HTREE_LOCK_COMPAT_PW,
-+ [HTREE_LOCK_PR] HTREE_LOCK_COMPAT_PR,
-+ [HTREE_LOCK_CW] HTREE_LOCK_COMPAT_CW,
-+ [HTREE_LOCK_CR] HTREE_LOCK_COMPAT_CR,
-+};
-+
-+/* max allowed htree-lock depth.
-+ * We only need depth=3 for ext4 although user can have higher value. */
-+#define HTREE_LOCK_DEP_MAX 16
-+
-+#ifdef HTREE_LOCK_DEBUG
-+
-+static char *hl_name[] = {
-+ [HTREE_LOCK_EX] "EX",
-+ [HTREE_LOCK_PW] "PW",
-+ [HTREE_LOCK_PR] "PR",
-+ [HTREE_LOCK_CW] "CW",
-+ [HTREE_LOCK_CR] "CR",
-+};
-+
-+/* lock stats */
-+struct htree_lock_node_stats {
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long retried[HTREE_LOCK_MAX];
-+ unsigned long long events;
-+};
-+
-+struct htree_lock_stats {
-+ struct htree_lock_node_stats nodes[HTREE_LOCK_DEP_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+};
-+
-+static struct htree_lock_stats hl_stats;
-+
-+void htree_lock_stat_reset(void)
-+{
-+ memset(&hl_stats, 0, sizeof(hl_stats));
-+}
-+
-+void htree_lock_stat_print(int depth)
-+{
-+ int i;
-+ int j;
-+
-+ printk(KERN_DEBUG "HTREE LOCK STATS:\n");
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ printk(KERN_DEBUG "[%s]: G [%10llu], B [%10llu]\n",
-+ hl_name[i], hl_stats.granted[i], hl_stats.blocked[i]);
-+ }
-+ for (i = 0; i < depth; i++) {
-+ printk(KERN_DEBUG "HTREE CHILD [%d] STATS:\n", i);
-+ for (j = 0; j < HTREE_LOCK_MAX; j++) {
-+ printk(KERN_DEBUG
-+ "[%s]: G [%10llu], B [%10llu], R [%10llu]\n",
-+ hl_name[j], hl_stats.nodes[i].granted[j],
-+ hl_stats.nodes[i].blocked[j],
-+ hl_stats.nodes[i].retried[j]);
-+ }
-+ }
-+}
-+
-+#define lk_grant_inc(m) do { hl_stats.granted[m]++; } while (0)
-+#define lk_block_inc(m) do { hl_stats.blocked[m]++; } while (0)
-+#define ln_grant_inc(d, m) do { hl_stats.nodes[d].granted[m]++; } while (0)
-+#define ln_block_inc(d, m) do { hl_stats.nodes[d].blocked[m]++; } while (0)
-+#define ln_retry_inc(d, m) do { hl_stats.nodes[d].retried[m]++; } while (0)
-+#define ln_event_inc(d) do { hl_stats.nodes[d].events++; } while (0)
-+
-+#else /* !DEBUG */
-+
-+void htree_lock_stat_reset(void) {}
-+void htree_lock_stat_print(int depth) {}
-+
-+#define lk_grant_inc(m) do {} while (0)
-+#define lk_block_inc(m) do {} while (0)
-+#define ln_grant_inc(d, m) do {} while (0)
-+#define ln_block_inc(d, m) do {} while (0)
-+#define ln_retry_inc(d, m) do {} while (0)
-+#define ln_event_inc(d) do {} while (0)
-+
-+#endif /* DEBUG */
-+
-+EXPORT_SYMBOL(htree_lock_stat_reset);
-+EXPORT_SYMBOL(htree_lock_stat_print);
-+
-+#define HTREE_DEP_ROOT (-1)
-+
-+#define htree_spin_lock(lhead, dep) \
-+ bit_spin_lock((dep) + 1, &(lhead)->lh_lock)
-+#define htree_spin_unlock(lhead, dep) \
-+ bit_spin_unlock((dep) + 1, &(lhead)->lh_lock)
-+
-+#define htree_key_event_ignore(child, ln) \
-+ (!((child)->lc_events & (1 << (ln)->ln_mode)))
-+
-+static int
-+htree_key_list_empty(struct htree_lock_node *ln)
-+{
-+ return list_empty(&ln->ln_major_list) && list_empty(&ln->ln_minor_list);
-+}
-+
-+static void
-+htree_key_list_del_init(struct htree_lock_node *ln)
-+{
-+ struct htree_lock_node *tmp = NULL;
-+
-+ if (!list_empty(&ln->ln_minor_list)) {
-+ tmp = list_entry(ln->ln_minor_list.next,
-+ struct htree_lock_node, ln_minor_list);
-+ list_del_init(&ln->ln_minor_list);
-+ }
-+
-+ if (list_empty(&ln->ln_major_list))
-+ return;
-+
-+ if (tmp == NULL) { /* not on minor key list */
-+ list_del_init(&ln->ln_major_list);
-+ } else {
-+ BUG_ON(!list_empty(&tmp->ln_major_list));
-+ list_replace_init(&ln->ln_major_list, &tmp->ln_major_list);
-+ }
-+}
-+
-+static void
-+htree_key_list_replace_init(struct htree_lock_node *old,
-+ struct htree_lock_node *new)
-+{
-+ if (!list_empty(&old->ln_major_list))
-+ list_replace_init(&old->ln_major_list, &new->ln_major_list);
-+
-+ if (!list_empty(&old->ln_minor_list))
-+ list_replace_init(&old->ln_minor_list, &new->ln_minor_list);
-+}
-+
-+static void
-+htree_key_event_enqueue(struct htree_lock_child *child,
-+ struct htree_lock_node *ln, int dep, void *event)
-+{
-+ struct htree_lock_node *tmp;
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ BUG_ON(ln->ln_mode == HTREE_LOCK_NL);
-+ if (event == NULL || htree_key_event_ignore(child, ln))
-+ return;
-+
-+ /* shouldn't be a very long list */
-+ list_for_each_entry(tmp, &ln->ln_alive_list, ln_alive_list) {
-+ if (tmp->ln_mode == HTREE_LOCK_NL) {
-+ ln_event_inc(dep);
-+ if (child->lc_callback != NULL)
-+ child->lc_callback(tmp->ln_ev_target, event);
-+ }
-+ }
-+}
-+
-+static int
-+htree_node_lock_enqueue(struct htree_lock *newlk, struct htree_lock *curlk,
-+ unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_child *child = &newlk->lk_head->lh_children[dep];
-+ struct htree_lock_node *newln = &newlk->lk_nodes[dep];
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ /* NB: we only expect PR/PW lock mode at here, only these two modes are
-+ * allowed for htree_node_lock(asserted in htree_node_lock_internal),
-+ * NL is only used for listener, user can't directly require NL mode */
-+ if ((curln->ln_mode == HTREE_LOCK_NL) ||
-+ (curln->ln_mode != HTREE_LOCK_PW &&
-+ newln->ln_mode != HTREE_LOCK_PW)) {
-+ /* no conflict, attach it on granted list of @curlk */
-+ if (curln->ln_mode != HTREE_LOCK_NL) {
-+ list_add(&newln->ln_granted_list,
-+ &curln->ln_granted_list);
-+ } else {
-+ /* replace key owner */
-+ htree_key_list_replace_init(curln, newln);
-+ }
-+
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ ln_grant_inc(dep, newln->ln_mode);
-+ return 1; /* still hold lh_lock */
-+ }
-+
-+ if (!wait) { /* can't grant and don't want to wait */
-+ ln_retry_inc(dep, newln->ln_mode);
-+ newln->ln_mode = HTREE_LOCK_INVAL;
-+ return -1; /* don't wait and just return -1 */
-+ }
-+
-+ newlk->lk_task = current;
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ /* conflict, attach it on blocked list of curlk */
-+ list_add_tail(&newln->ln_blocked_list, &curln->ln_blocked_list);
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ ln_block_inc(dep, newln->ln_mode);
-+
-+ htree_spin_unlock(newlk->lk_head, dep);
-+ /* wait to be given the lock */
-+ if (newlk->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt, wake up will set me RUNNING */
-+ if (event == NULL || htree_key_event_ignore(child, newln))
-+ return 0; /* granted without lh_lock */
-+
-+ htree_spin_lock(newlk->lk_head, dep);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ return 1; /* still hold lh_lock */
-+}
-+
-+/*
-+ * get PR/PW access to particular tree-node according to @dep and @key,
-+ * it will return -1 if @wait is false and can't immediately grant this lock.
-+ * All listeners(HTREE_LOCK_NL) on @dep and with the same @key will get
-+ * @event if it's not NULL.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_node_lock_internal(struct htree_lock_head *lhead, struct htree_lock *lck,
-+ htree_lock_mode_t mode, u32 key, unsigned dep,
-+ int wait, void *event)
-+{
-+ LIST_HEAD(list);
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ u16 major;
-+ u16 minor;
-+ u8 reverse;
-+ u8 ma_bits;
-+ u8 mi_bits;
-+
-+ BUG_ON(mode != HTREE_LOCK_PW && mode != HTREE_LOCK_PR);
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+
-+ key = hash_long(key, lhead->lh_hbits);
-+
-+ mi_bits = lhead->lh_hbits >> 1;
-+ ma_bits = lhead->lh_hbits - mi_bits;
-+
-+ lck->lk_nodes[dep].ln_major_key = major = key & ((1U << ma_bits) - 1);
-+ lck->lk_nodes[dep].ln_minor_key = minor = key >> ma_bits;
-+ lck->lk_nodes[dep].ln_mode = mode;
-+
-+ /*
-+ * The major key list is an ordered list, so searches are started
-+ * at the end of the list that is numerically closer to major_key,
-+ * so at most half of the list will be walked (for well-distributed
-+ * keys). The list traversal aborts early if the expected key
-+ * location is passed.
-+ */
-+ reverse = (major >= (1 << (ma_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp,
-+ &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key < major) {
-+ /* attach _after_ @tmp */
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+
-+ } else {
-+ list_for_each_entry(tmp, &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key > major) {
-+ /* insert _before_ @tmp */
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+ }
-+
-+ search_minor:
-+ /*
-+ * NB: minor_key list doesn't have a "head", @list is just a
-+ * temporary stub for helping list searching, make sure it's removed
-+ * after searching.
-+ * minor_key list is an ordered list too.
-+ */
-+ list_add_tail(&list, &tmp->lk_nodes[dep].ln_minor_list);
-+
-+ reverse = (minor >= (1 << (mi_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key < minor) {
-+ /* attach _after_ @tmp2 */
-+ list_add(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_minor_list, &list);
-+
-+ } else {
-+ list_for_each_entry(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key > minor) {
-+ /* insert _before_ @tmp2 */
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list, &list);
-+ }
-+
-+ out_grant_minor:
-+ if (list.next == &lck->lk_nodes[dep].ln_minor_list) {
-+ /* new lock @lck is the first one on minor_key list, which
-+ * means it has the smallest minor_key and it should
-+ * replace @tmp as minor_key owner */
-+ list_replace_init(&tmp->lk_nodes[dep].ln_major_list,
-+ &lck->lk_nodes[dep].ln_major_list);
-+ }
-+ /* remove the temporary head */
-+ list_del(&list);
-+
-+ out_grant_major:
-+ ln_grant_inc(dep, lck->lk_nodes[dep].ln_mode);
-+ return 1; /* granted with holding lh_lock */
-+
-+ out_enqueue:
-+ list_del(&list); /* remove temprary head */
-+ return htree_node_lock_enqueue(lck, tmp2, dep, wait, event);
-+}
-+
-+/*
-+ * release the key of @lck at level @dep, and grant any blocked locks.
-+ * caller will still listen on @key if @event is not NULL, which means
-+ * caller can see a event (by event_cb) while granting any lock with
-+ * the same key at level @dep.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ * NB: listener will not block anyone because listening mode is HTREE_LOCK_NL
-+ */
-+static void
-+htree_node_unlock_internal(struct htree_lock_head *lhead,
-+ struct htree_lock *curlk, unsigned dep, void *event)
-+{
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+ struct htree_lock *grtlk = NULL;
-+ struct htree_lock_node *grtln;
-+ struct htree_lock *poslk;
-+ struct htree_lock *tmplk;
-+
-+ if (!htree_node_is_granted(curlk, dep))
-+ return;
-+
-+ if (!list_empty(&curln->ln_granted_list)) {
-+ /* there is another granted lock */
-+ grtlk = list_entry(curln->ln_granted_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_granted_list);
-+ list_del_init(&curln->ln_granted_list);
-+ }
-+
-+ if (grtlk == NULL && !list_empty(&curln->ln_blocked_list)) {
-+ /*
-+ * @curlk is the only granted lock, so we confirmed:
-+ * a) curln is key owner (attached on major/minor_list),
-+ * so if there is any blocked lock, it should be attached
-+ * on curln->ln_blocked_list
-+ * b) we always can grant the first blocked lock
-+ */
-+ grtlk = list_entry(curln->ln_blocked_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_blocked_list);
-+ BUG_ON(grtlk->lk_task == NULL);
-+ wake_up_process(grtlk->lk_task);
-+ }
-+
-+ if (event != NULL &&
-+ lhead->lh_children[dep].lc_events != HTREE_EVENT_DISABLE) {
-+ curln->ln_ev_target = event;
-+ curln->ln_mode = HTREE_LOCK_NL; /* listen! */
-+ } else {
-+ curln->ln_mode = HTREE_LOCK_INVAL;
-+ }
-+
-+ if (grtlk == NULL) { /* I must be the only one locking this key */
-+ struct htree_lock_node *tmpln;
-+
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (curln->ln_mode == HTREE_LOCK_NL) /* listening */
-+ return;
-+
-+ /* not listening */
-+ if (list_empty(&curln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(curln);
-+ return;
-+ }
-+
-+ tmpln = list_entry(curln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmpln->ln_mode != HTREE_LOCK_NL);
-+
-+ htree_key_list_replace_init(curln, tmpln);
-+ list_del_init(&curln->ln_alive_list);
-+
-+ return;
-+ }
-+
-+ /* have a granted lock */
-+ grtln = &grtlk->lk_nodes[dep];
-+ if (!list_empty(&curln->ln_blocked_list)) {
-+ /* only key owner can be on both lists */
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (list_empty(&grtln->ln_blocked_list)) {
-+ list_add(&grtln->ln_blocked_list,
-+ &curln->ln_blocked_list);
-+ }
-+ list_del_init(&curln->ln_blocked_list);
-+ }
-+ /*
-+ * NB: this is the tricky part:
-+ * We have only two modes for child-lock (PR and PW), also,
-+ * only owner of the key (attached on major/minor_list) can be on
-+ * both blocked_list and granted_list, so @grtlk must be one
-+ * of these two cases:
-+ *
-+ * a) @grtlk is taken from granted_list, which means we've granted
-+ * more than one lock so @grtlk has to be PR, the first blocked
-+ * lock must be PW and we can't grant it at all.
-+ * So even @grtlk is not owner of the key (empty blocked_list),
-+ * we don't care because we can't grant any lock.
-+ * b) we just grant a new lock which is taken from head of blocked
-+ * list, and it should be the first granted lock, and it should
-+ * be the first one linked on blocked_list.
-+ *
-+ * Either way, we can get correct result by iterating blocked_list
-+ * of @grtlk, and don't have to bother on how to find out
-+ * owner of current key.
-+ */
-+ list_for_each_entry_safe(poslk, tmplk, &grtln->ln_blocked_list,
-+ lk_nodes[dep].ln_blocked_list) {
-+ if (grtlk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW ||
-+ poslk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW)
-+ break;
-+ /* grant all readers */
-+ list_del_init(&poslk->lk_nodes[dep].ln_blocked_list);
-+ list_add(&poslk->lk_nodes[dep].ln_granted_list,
-+ &grtln->ln_granted_list);
-+
-+ BUG_ON(poslk->lk_task == NULL);
-+ wake_up_process(poslk->lk_task);
-+ }
-+
-+ /* if @curln is the owner of this key, replace it with @grtln */
-+ if (!htree_key_list_empty(curln))
-+ htree_key_list_replace_init(curln, grtln);
-+
-+ if (curln->ln_mode == HTREE_LOCK_INVAL)
-+ list_del_init(&curln->ln_alive_list);
-+}
-+
-+/*
-+ * it's just wrapper of htree_node_lock_internal, it returns 1 on granted
-+ * and 0 only if @wait is false and can't grant it immediately
-+ */
-+int
-+htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ rc = htree_node_lock_internal(lhead, lck, mode, key, dep, wait, event);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, dep);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_node_lock_try);
-+
-+/* it's wrapper of htree_node_unlock_internal */
-+void
-+htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ htree_node_unlock_internal(lhead, lck, dep, event);
-+ htree_spin_unlock(lhead, dep);
-+}
-+EXPORT_SYMBOL(htree_node_unlock);
-+
-+/* stop listening on child-lock level @dep */
-+void
-+htree_node_stop_listen(struct htree_lock *lck, unsigned dep)
-+{
-+ struct htree_lock_node *ln = &lck->lk_nodes[dep];
-+ struct htree_lock_node *tmp;
-+
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+ BUG_ON(!list_empty(&ln->ln_blocked_list));
-+ BUG_ON(!list_empty(&ln->ln_granted_list));
-+
-+ if (!htree_node_is_listening(lck, dep))
-+ return;
-+
-+ htree_spin_lock(lck->lk_head, dep);
-+ ln->ln_mode = HTREE_LOCK_INVAL;
-+ ln->ln_ev_target = NULL;
-+
-+ if (htree_key_list_empty(ln)) { /* not owner */
-+ list_del_init(&ln->ln_alive_list);
-+ goto out;
-+ }
-+
-+ /* I'm the owner... */
-+ if (list_empty(&ln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(ln);
-+ goto out;
-+ }
-+
-+ tmp = list_entry(ln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmp->ln_mode != HTREE_LOCK_NL);
-+ htree_key_list_replace_init(ln, tmp);
-+ list_del_init(&ln->ln_alive_list);
-+ out:
-+ htree_spin_unlock(lck->lk_head, dep);
-+}
-+EXPORT_SYMBOL(htree_node_stop_listen);
-+
-+/* release all child-locks if we have any */
-+static void
-+htree_node_release_all(struct htree_lock *lck)
-+{
-+ int i;
-+
-+ for (i = 0; i < lck->lk_depth; i++) {
-+ if (htree_node_is_granted(lck, i))
-+ htree_node_unlock(lck, i, NULL);
-+ else if (htree_node_is_listening(lck, i))
-+ htree_node_stop_listen(lck, i);
-+ }
-+}
-+
-+/*
-+ * obtain htree lock, it could be blocked inside if there's conflict
-+ * with any granted or blocked lock and @wait is true.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_lock_internal(struct htree_lock *lck, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int granted = 0;
-+ int blocked = 0;
-+ int i;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ if (lhead->lh_nblocked[i] != 0)
-+ blocked |= 1 << i;
-+ }
-+ if ((htree_lock_compat[lck->lk_mode] & granted) != granted ||
-+ (htree_lock_compat[lck->lk_mode] & blocked) != blocked) {
-+ /* will block current lock even it just conflicts with any
-+ * other blocked lock, so lock like EX wouldn't starve */
-+ if (!wait)
-+ return -1;
-+ lhead->lh_nblocked[lck->lk_mode]++;
-+ lk_block_inc(lck->lk_mode);
-+
-+ lck->lk_task = current;
-+ list_add_tail(&lck->lk_blocked_list, &lhead->lh_blocked_list);
-+
-+retry:
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ /* wait to be given the lock */
-+ if (lck->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt. wake up will set me RUNNING.
-+ * Since thread would be waken up accidentally,
-+ * so we need check lock whether granted or not again. */
-+ if (!list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ if (list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return 0;
-+ }
-+ goto retry;
-+ }
-+ return 0; /* without lh_lock */
-+ }
-+ lhead->lh_ngranted[lck->lk_mode]++;
-+ lk_grant_inc(lck->lk_mode);
-+ return 1;
-+}
-+
-+/* release htree lock. NB: ALWAYS called holding lhead::lh_lock */
-+static void
-+htree_unlock_internal(struct htree_lock *lck)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ int granted = 0;
-+ int i;
-+
-+ BUG_ON(lhead->lh_ngranted[lck->lk_mode] == 0);
-+
-+ lhead->lh_ngranted[lck->lk_mode]--;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ }
-+ list_for_each_entry_safe(tmp, tmp2,
-+ &lhead->lh_blocked_list, lk_blocked_list) {
-+ /* conflict with any granted lock? */
-+ if ((htree_lock_compat[tmp->lk_mode] & granted) != granted)
-+ break;
-+
-+ list_del_init(&tmp->lk_blocked_list);
-+
-+ BUG_ON(lhead->lh_nblocked[tmp->lk_mode] == 0);
-+
-+ lhead->lh_nblocked[tmp->lk_mode]--;
-+ lhead->lh_ngranted[tmp->lk_mode]++;
-+ granted |= 1 << tmp->lk_mode;
-+
-+ BUG_ON(tmp->lk_task == NULL);
-+ wake_up_process(tmp->lk_task);
-+ }
-+}
-+
-+/* it's wrapper of htree_lock_internal and exported interface.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait)
-+{
-+ int rc;
-+
-+ BUG_ON(lck->lk_depth > lhead->lh_depth);
-+ BUG_ON(lck->lk_head != NULL);
-+ BUG_ON(lck->lk_task != NULL);
-+
-+ lck->lk_head = lhead;
-+ lck->lk_mode = mode;
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_lock_try);
-+
-+/* it's wrapper of htree_unlock_internal and exported interface.
-+ * It will release all htree_node_locks and htree_lock */
-+void
-+htree_unlock(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_head == NULL);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lck->lk_head, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ htree_spin_unlock(lck->lk_head, HTREE_DEP_ROOT);
-+ lck->lk_head = NULL;
-+ lck->lk_task = NULL;
-+}
-+EXPORT_SYMBOL(htree_unlock);
-+
-+/* change lock mode */
-+void
-+htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode)
-+{
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+ lck->lk_mode = mode;
-+}
-+EXPORT_SYMBOL(htree_change_mode);
-+
-+/* release htree lock, and lock it again with new mode.
-+ * This function will first release all htree_node_locks and htree_lock,
-+ * then try to gain htree_lock with new @mode.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_change_lock_try(struct htree_lock *lck, htree_lock_mode_t mode, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(lhead == NULL);
-+ BUG_ON(lck->lk_mode == mode);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL || mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ lck->lk_mode = mode;
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_change_lock_try);
-+
-+/* create a htree_lock head with @depth levels (number of child-locks),
-+ * it is a per resoruce structure */
-+struct htree_lock_head *
-+htree_lock_head_alloc(unsigned depth, unsigned hbits, unsigned priv)
-+{
-+ struct htree_lock_head *lhead;
-+ int i;
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+
-+ lhead = kzalloc(offsetof(struct htree_lock_head,
-+ lh_children[depth]) + priv, GFP_NOFS);
-+ if (lhead == NULL)
-+ return NULL;
-+
-+ if (hbits < HTREE_HBITS_MIN)
-+ lhead->lh_hbits = HTREE_HBITS_MIN;
-+ else if (hbits > HTREE_HBITS_MAX)
-+ lhead->lh_hbits = HTREE_HBITS_MAX;
-+
-+ lhead->lh_lock = 0;
-+ lhead->lh_depth = depth;
-+ INIT_LIST_HEAD(&lhead->lh_blocked_list);
-+ if (priv > 0) {
-+ lhead->lh_private = (void *)lhead +
-+ offsetof(struct htree_lock_head, lh_children[depth]);
-+ }
-+
-+ for (i = 0; i < depth; i++) {
-+ INIT_LIST_HEAD(&lhead->lh_children[i].lc_list);
-+ lhead->lh_children[i].lc_events = HTREE_EVENT_DISABLE;
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(htree_lock_head_alloc);
-+
-+/* free the htree_lock head */
-+void
-+htree_lock_head_free(struct htree_lock_head *lhead)
-+{
-+ int i;
-+
-+ BUG_ON(!list_empty(&lhead->lh_blocked_list));
-+ for (i = 0; i < lhead->lh_depth; i++)
-+ BUG_ON(!list_empty(&lhead->lh_children[i].lc_list));
-+ kfree(lhead);
-+}
-+EXPORT_SYMBOL(htree_lock_head_free);
-+
-+/* register event callback for @events of child-lock at level @dep */
-+void
-+htree_lock_event_attach(struct htree_lock_head *lhead, unsigned dep,
-+ unsigned events, htree_event_cb_t callback)
-+{
-+ BUG_ON(lhead->lh_depth <= dep);
-+ lhead->lh_children[dep].lc_events = events;
-+ lhead->lh_children[dep].lc_callback = callback;
-+}
-+EXPORT_SYMBOL(htree_lock_event_attach);
-+
-+/* allocate a htree_lock, which is per-thread structure, @pbytes is some
-+ * extra-bytes as private data for caller */
-+struct htree_lock *
-+htree_lock_alloc(unsigned depth, unsigned pbytes)
-+{
-+ struct htree_lock *lck;
-+ int i = offsetof(struct htree_lock, lk_nodes[depth]);
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+ lck = kzalloc(i + pbytes, GFP_NOFS);
-+ if (lck == NULL)
-+ return NULL;
-+
-+ if (pbytes != 0)
-+ lck->lk_private = (void *)lck + i;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+ lck->lk_depth = depth;
-+ INIT_LIST_HEAD(&lck->lk_blocked_list);
-+
-+ for (i = 0; i < depth; i++) {
-+ struct htree_lock_node *node = &lck->lk_nodes[i];
-+
-+ node->ln_mode = HTREE_LOCK_INVAL;
-+ INIT_LIST_HEAD(&node->ln_major_list);
-+ INIT_LIST_HEAD(&node->ln_minor_list);
-+ INIT_LIST_HEAD(&node->ln_alive_list);
-+ INIT_LIST_HEAD(&node->ln_blocked_list);
-+ INIT_LIST_HEAD(&node->ln_granted_list);
-+ }
-+
-+ return lck;
-+}
-+EXPORT_SYMBOL(htree_lock_alloc);
-+
-+/* free htree_lock node */
-+void
-+htree_lock_free(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_mode != HTREE_LOCK_INVAL);
-+ kfree(lck);
-+}
-+EXPORT_SYMBOL(htree_lock_free);
Index: linux-4.18.0-80.1.2.el8_0/fs/ext4/namei.c
===================================================================
--- linux-4.18.0-80.1.2.el8_0.orig/fs/ext4/namei.c
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
-Index: linux-4.18.0-80.1.2.el8_0/include/linux/htree_lock.h
-===================================================================
---- /dev/null
-+++ linux-4.18.0-80.1.2.el8_0/include/linux/htree_lock.h
-@@ -0,0 +1,187 @@
-+/*
-+ * include/linux/htree_lock.h
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+
-+/*
-+ * htree lock
-+ *
-+ * htree_lock is an advanced lock, it can support five lock modes (concept is
-+ * taken from DLM) and it's a sleeping lock.
-+ *
-+ * most common use case is:
-+ * - create a htree_lock_head for data
-+ * - each thread (contender) creates it's own htree_lock
-+ * - contender needs to call htree_lock(lock_node, mode) to protect data and
-+ * call htree_unlock to release lock
-+ *
-+ * Also, there is advanced use-case which is more complex, user can have
-+ * PW/PR lock on particular key, it's mostly used while user holding shared
-+ * lock on the htree (CW, CR)
-+ *
-+ * htree_lock(lock_node, HTREE_LOCK_CR); lock the htree with CR
-+ * htree_node_lock(lock_node, HTREE_LOCK_PR, key...); lock @key with PR
-+ * ...
-+ * htree_node_unlock(lock_node);; unlock the key
-+ *
-+ * Another tip is, we can have N-levels of this kind of keys, all we need to
-+ * do is specifying N-levels while creating htree_lock_head, then we can
-+ * lock/unlock a specific level by:
-+ * htree_node_lock(lock_node, mode1, key1, level1...);
-+ * do something;
-+ * htree_node_lock(lock_node, mode1, key2, level2...);
-+ * do something;
-+ * htree_node_unlock(lock_node, level2);
-+ * htree_node_unlock(lock_node, level1);
-+ *
-+ * NB: for multi-level, should be careful about locking order to avoid deadlock
-+ */
-+
-+#ifndef _LINUX_HTREE_LOCK_H
-+#define _LINUX_HTREE_LOCK_H
-+
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * Lock Modes
-+ * more details can be found here:
-+ * http://en.wikipedia.org/wiki/Distributed_lock_manager
-+ */
-+typedef enum {
-+ HTREE_LOCK_EX = 0, /* exclusive lock: incompatible with all others */
-+ HTREE_LOCK_PW, /* protected write: allows only CR users */
-+ HTREE_LOCK_PR, /* protected read: allow PR, CR users */
-+ HTREE_LOCK_CW, /* concurrent write: allow CR, CW users */
-+ HTREE_LOCK_CR, /* concurrent read: allow all but EX users */
-+ HTREE_LOCK_MAX, /* number of lock modes */
-+} htree_lock_mode_t;
-+
-+#define HTREE_LOCK_NL HTREE_LOCK_MAX
-+#define HTREE_LOCK_INVAL 0xdead10c
-+
-+enum {
-+ HTREE_HBITS_MIN = 2,
-+ HTREE_HBITS_DEF = 14,
-+ HTREE_HBITS_MAX = 32,
-+};
-+
-+enum {
-+ HTREE_EVENT_DISABLE = (0),
-+ HTREE_EVENT_RD = (1 << HTREE_LOCK_PR),
-+ HTREE_EVENT_WR = (1 << HTREE_LOCK_PW),
-+ HTREE_EVENT_RDWR = (HTREE_EVENT_RD | HTREE_EVENT_WR),
-+};
-+
-+struct htree_lock;
-+
-+typedef void (*htree_event_cb_t)(void *target, void *event);
-+
-+struct htree_lock_child {
-+ struct list_head lc_list; /* granted list */
-+ htree_event_cb_t lc_callback; /* event callback */
-+ unsigned lc_events; /* event types */
-+};
-+
-+struct htree_lock_head {
-+ unsigned long lh_lock; /* bits lock */
-+ /* blocked lock list (htree_lock) */
-+ struct list_head lh_blocked_list;
-+ /* # key levels */
-+ u16 lh_depth;
-+ /* hash bits for key and limit number of locks */
-+ u16 lh_hbits;
-+ /* counters for blocked locks */
-+ u16 lh_nblocked[HTREE_LOCK_MAX];
-+ /* counters for granted locks */
-+ u16 lh_ngranted[HTREE_LOCK_MAX];
-+ /* private data */
-+ void *lh_private;
-+ /* array of children locks */
-+ struct htree_lock_child lh_children[0];
-+};
-+
-+/* htree_lock_node_t is child-lock for a specific key (ln_value) */
-+struct htree_lock_node {
-+ htree_lock_mode_t ln_mode;
-+ /* major hash key */
-+ u16 ln_major_key;
-+ /* minor hash key */
-+ u16 ln_minor_key;
-+ struct list_head ln_major_list;
-+ struct list_head ln_minor_list;
-+ /* alive list, all locks (granted, blocked, listening) are on it */
-+ struct list_head ln_alive_list;
-+ /* blocked list */
-+ struct list_head ln_blocked_list;
-+ /* granted list */
-+ struct list_head ln_granted_list;
-+ void *ln_ev_target;
-+};
-+
-+struct htree_lock {
-+ struct task_struct *lk_task;
-+ struct htree_lock_head *lk_head;
-+ void *lk_private;
-+ unsigned lk_depth;
-+ htree_lock_mode_t lk_mode;
-+ struct list_head lk_blocked_list;
-+ struct htree_lock_node lk_nodes[0];
-+};
-+
-+/* create a lock head, which stands for a resource */
-+struct htree_lock_head *htree_lock_head_alloc(unsigned depth,
-+ unsigned hbits, unsigned priv);
-+/* free a lock head */
-+void htree_lock_head_free(struct htree_lock_head *lhead);
-+/* register event callback for child lock at level @depth */
-+void htree_lock_event_attach(struct htree_lock_head *lhead, unsigned depth,
-+ unsigned events, htree_event_cb_t callback);
-+/* create a lock handle, which stands for a thread */
-+struct htree_lock *htree_lock_alloc(unsigned depth, unsigned pbytes);
-+/* free a lock handle */
-+void htree_lock_free(struct htree_lock *lck);
-+/* lock htree, when @wait is true, 0 is returned if the lock can't
-+ * be granted immediately */
-+int htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait);
-+/* unlock htree */
-+void htree_unlock(struct htree_lock *lck);
-+/* unlock and relock htree with @new_mode */
-+int htree_change_lock_try(struct htree_lock *lck,
-+ htree_lock_mode_t new_mode, int wait);
-+void htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode);
-+/* require child lock (key) of htree at level @dep, @event will be sent to all
-+ * listeners on this @key while lock being granted */
-+int htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event);
-+/* release child lock at level @dep, this lock will listen on it's key
-+ * if @event isn't NULL, event_cb will be called against @lck while granting
-+ * any other lock at level @dep with the same key */
-+void htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event);
-+/* stop listening on child lock at level @dep */
-+void htree_node_stop_listen(struct htree_lock *lck, unsigned dep);
-+/* for debug */
-+void htree_lock_stat_print(int depth);
-+void htree_lock_stat_reset(void);
-+
-+#define htree_lock(lck, lh, mode) htree_lock_try(lck, lh, mode, 1)
-+#define htree_change_lock(lck, mode) htree_change_lock_try(lck, mode, 1)
-+
-+#define htree_lock_mode(lck) ((lck)->lk_mode)
-+
-+#define htree_node_lock(lck, mode, key, dep) \
-+ htree_node_lock_try(lck, mode, key, dep, 1, NULL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_granted(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_INVAL && \
-+ (lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_NL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_listening(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode == HTREE_LOCK_NL)
-+
-+#endif
- pdirops support for ldiskfs
- integrate with osd-ldiskfs
-Index: linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
-===================================================================
---- /dev/null
-+++ linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
-@@ -0,0 +1,187 @@
-+/*
-+ * include/linux/htree_lock.h
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+
-+/*
-+ * htree lock
-+ *
-+ * htree_lock is an advanced lock, it can support five lock modes (concept is
-+ * taken from DLM) and it's a sleeping lock.
-+ *
-+ * most common use case is:
-+ * - create a htree_lock_head for data
-+ * - each thread (contender) creates it's own htree_lock
-+ * - contender needs to call htree_lock(lock_node, mode) to protect data and
-+ * call htree_unlock to release lock
-+ *
-+ * Also, there is advanced use-case which is more complex, user can have
-+ * PW/PR lock on particular key, it's mostly used while user holding shared
-+ * lock on the htree (CW, CR)
-+ *
-+ * htree_lock(lock_node, HTREE_LOCK_CR); lock the htree with CR
-+ * htree_node_lock(lock_node, HTREE_LOCK_PR, key...); lock @key with PR
-+ * ...
-+ * htree_node_unlock(lock_node);; unlock the key
-+ *
-+ * Another tip is, we can have N-levels of this kind of keys, all we need to
-+ * do is specifying N-levels while creating htree_lock_head, then we can
-+ * lock/unlock a specific level by:
-+ * htree_node_lock(lock_node, mode1, key1, level1...);
-+ * do something;
-+ * htree_node_lock(lock_node, mode1, key2, level2...);
-+ * do something;
-+ * htree_node_unlock(lock_node, level2);
-+ * htree_node_unlock(lock_node, level1);
-+ *
-+ * NB: for multi-level, should be careful about locking order to avoid deadlock
-+ */
-+
-+#ifndef _LINUX_HTREE_LOCK_H
-+#define _LINUX_HTREE_LOCK_H
-+
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * Lock Modes
-+ * more details can be found here:
-+ * http://en.wikipedia.org/wiki/Distributed_lock_manager
-+ */
-+typedef enum {
-+ HTREE_LOCK_EX = 0, /* exclusive lock: incompatible with all others */
-+ HTREE_LOCK_PW, /* protected write: allows only CR users */
-+ HTREE_LOCK_PR, /* protected read: allow PR, CR users */
-+ HTREE_LOCK_CW, /* concurrent write: allow CR, CW users */
-+ HTREE_LOCK_CR, /* concurrent read: allow all but EX users */
-+ HTREE_LOCK_MAX, /* number of lock modes */
-+} htree_lock_mode_t;
-+
-+#define HTREE_LOCK_NL HTREE_LOCK_MAX
-+#define HTREE_LOCK_INVAL 0xdead10c
-+
-+enum {
-+ HTREE_HBITS_MIN = 2,
-+ HTREE_HBITS_DEF = 14,
-+ HTREE_HBITS_MAX = 32,
-+};
-+
-+enum {
-+ HTREE_EVENT_DISABLE = (0),
-+ HTREE_EVENT_RD = (1 << HTREE_LOCK_PR),
-+ HTREE_EVENT_WR = (1 << HTREE_LOCK_PW),
-+ HTREE_EVENT_RDWR = (HTREE_EVENT_RD | HTREE_EVENT_WR),
-+};
-+
-+struct htree_lock;
-+
-+typedef void (*htree_event_cb_t)(void *target, void *event);
-+
-+struct htree_lock_child {
-+ struct list_head lc_list; /* granted list */
-+ htree_event_cb_t lc_callback; /* event callback */
-+ unsigned lc_events; /* event types */
-+};
-+
-+struct htree_lock_head {
-+ unsigned long lh_lock; /* bits lock */
-+ /* blocked lock list (htree_lock) */
-+ struct list_head lh_blocked_list;
-+ /* # key levels */
-+ u16 lh_depth;
-+ /* hash bits for key and limit number of locks */
-+ u16 lh_hbits;
-+ /* counters for blocked locks */
-+ u16 lh_nblocked[HTREE_LOCK_MAX];
-+ /* counters for granted locks */
-+ u16 lh_ngranted[HTREE_LOCK_MAX];
-+ /* private data */
-+ void *lh_private;
-+ /* array of children locks */
-+ struct htree_lock_child lh_children[0];
-+};
-+
-+/* htree_lock_node_t is child-lock for a specific key (ln_value) */
-+struct htree_lock_node {
-+ htree_lock_mode_t ln_mode;
-+ /* major hash key */
-+ u16 ln_major_key;
-+ /* minor hash key */
-+ u16 ln_minor_key;
-+ struct list_head ln_major_list;
-+ struct list_head ln_minor_list;
-+ /* alive list, all locks (granted, blocked, listening) are on it */
-+ struct list_head ln_alive_list;
-+ /* blocked list */
-+ struct list_head ln_blocked_list;
-+ /* granted list */
-+ struct list_head ln_granted_list;
-+ void *ln_ev_target;
-+};
-+
-+struct htree_lock {
-+ struct task_struct *lk_task;
-+ struct htree_lock_head *lk_head;
-+ void *lk_private;
-+ unsigned lk_depth;
-+ htree_lock_mode_t lk_mode;
-+ struct list_head lk_blocked_list;
-+ struct htree_lock_node lk_nodes[0];
-+};
-+
-+/* create a lock head, which stands for a resource */
-+struct htree_lock_head *htree_lock_head_alloc(unsigned depth,
-+ unsigned hbits, unsigned priv);
-+/* free a lock head */
-+void htree_lock_head_free(struct htree_lock_head *lhead);
-+/* register event callback for child lock at level @depth */
-+void htree_lock_event_attach(struct htree_lock_head *lhead, unsigned depth,
-+ unsigned events, htree_event_cb_t callback);
-+/* create a lock handle, which stands for a thread */
-+struct htree_lock *htree_lock_alloc(unsigned depth, unsigned pbytes);
-+/* free a lock handle */
-+void htree_lock_free(struct htree_lock *lck);
-+/* lock htree, when @wait is true, 0 is returned if the lock can't
-+ * be granted immediately */
-+int htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait);
-+/* unlock htree */
-+void htree_unlock(struct htree_lock *lck);
-+/* unlock and relock htree with @new_mode */
-+int htree_change_lock_try(struct htree_lock *lck,
-+ htree_lock_mode_t new_mode, int wait);
-+void htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode);
-+/* require child lock (key) of htree at level @dep, @event will be sent to all
-+ * listeners on this @key while lock being granted */
-+int htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event);
-+/* release child lock at level @dep, this lock will listen on it's key
-+ * if @event isn't NULL, event_cb will be called against @lck while granting
-+ * any other lock at level @dep with the same key */
-+void htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event);
-+/* stop listening on child lock at level @dep */
-+void htree_node_stop_listen(struct htree_lock *lck, unsigned dep);
-+/* for debug */
-+void htree_lock_stat_print(int depth);
-+void htree_lock_stat_reset(void);
-+
-+#define htree_lock(lck, lh, mode) htree_lock_try(lck, lh, mode, 1)
-+#define htree_change_lock(lck, mode) htree_change_lock_try(lck, mode, 1)
-+
-+#define htree_lock_mode(lck) ((lck)->lk_mode)
-+
-+#define htree_node_lock(lck, mode, key, dep) \
-+ htree_node_lock_try(lck, mode, key, dep, 1, NULL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_granted(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_INVAL && \
-+ (lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_NL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_listening(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode == HTREE_LOCK_NL)
-+
-+#endif
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
-===================================================================
---- /dev/null
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
-@@ -0,0 +1,891 @@
-+/*
-+ * fs/ext4/htree_lock.c
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+#include <linux/jbd2.h>
-+#include <linux/hash.h>
-+#include <linux/module.h>
-+#include <linux/htree_lock.h>
-+
-+enum {
-+ HTREE_LOCK_BIT_EX = (1 << HTREE_LOCK_EX),
-+ HTREE_LOCK_BIT_PW = (1 << HTREE_LOCK_PW),
-+ HTREE_LOCK_BIT_PR = (1 << HTREE_LOCK_PR),
-+ HTREE_LOCK_BIT_CW = (1 << HTREE_LOCK_CW),
-+ HTREE_LOCK_BIT_CR = (1 << HTREE_LOCK_CR),
-+};
-+
-+enum {
-+ HTREE_LOCK_COMPAT_EX = 0,
-+ HTREE_LOCK_COMPAT_PW = HTREE_LOCK_COMPAT_EX | HTREE_LOCK_BIT_CR,
-+ HTREE_LOCK_COMPAT_PR = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_PR,
-+ HTREE_LOCK_COMPAT_CW = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_CW,
-+ HTREE_LOCK_COMPAT_CR = HTREE_LOCK_COMPAT_CW | HTREE_LOCK_BIT_PR |
-+ HTREE_LOCK_BIT_PW,
-+};
-+
-+static int htree_lock_compat[] = {
-+ [HTREE_LOCK_EX] HTREE_LOCK_COMPAT_EX,
-+ [HTREE_LOCK_PW] HTREE_LOCK_COMPAT_PW,
-+ [HTREE_LOCK_PR] HTREE_LOCK_COMPAT_PR,
-+ [HTREE_LOCK_CW] HTREE_LOCK_COMPAT_CW,
-+ [HTREE_LOCK_CR] HTREE_LOCK_COMPAT_CR,
-+};
-+
-+/* max allowed htree-lock depth.
-+ * We only need depth=3 for ext4 although user can have higher value. */
-+#define HTREE_LOCK_DEP_MAX 16
-+
-+#ifdef HTREE_LOCK_DEBUG
-+
-+static char *hl_name[] = {
-+ [HTREE_LOCK_EX] "EX",
-+ [HTREE_LOCK_PW] "PW",
-+ [HTREE_LOCK_PR] "PR",
-+ [HTREE_LOCK_CW] "CW",
-+ [HTREE_LOCK_CR] "CR",
-+};
-+
-+/* lock stats */
-+struct htree_lock_node_stats {
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long retried[HTREE_LOCK_MAX];
-+ unsigned long long events;
-+};
-+
-+struct htree_lock_stats {
-+ struct htree_lock_node_stats nodes[HTREE_LOCK_DEP_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+};
-+
-+static struct htree_lock_stats hl_stats;
-+
-+void htree_lock_stat_reset(void)
-+{
-+ memset(&hl_stats, 0, sizeof(hl_stats));
-+}
-+
-+void htree_lock_stat_print(int depth)
-+{
-+ int i;
-+ int j;
-+
-+ printk(KERN_DEBUG "HTREE LOCK STATS:\n");
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ printk(KERN_DEBUG "[%s]: G [%10llu], B [%10llu]\n",
-+ hl_name[i], hl_stats.granted[i], hl_stats.blocked[i]);
-+ }
-+ for (i = 0; i < depth; i++) {
-+ printk(KERN_DEBUG "HTREE CHILD [%d] STATS:\n", i);
-+ for (j = 0; j < HTREE_LOCK_MAX; j++) {
-+ printk(KERN_DEBUG
-+ "[%s]: G [%10llu], B [%10llu], R [%10llu]\n",
-+ hl_name[j], hl_stats.nodes[i].granted[j],
-+ hl_stats.nodes[i].blocked[j],
-+ hl_stats.nodes[i].retried[j]);
-+ }
-+ }
-+}
-+
-+#define lk_grant_inc(m) do { hl_stats.granted[m]++; } while (0)
-+#define lk_block_inc(m) do { hl_stats.blocked[m]++; } while (0)
-+#define ln_grant_inc(d, m) do { hl_stats.nodes[d].granted[m]++; } while (0)
-+#define ln_block_inc(d, m) do { hl_stats.nodes[d].blocked[m]++; } while (0)
-+#define ln_retry_inc(d, m) do { hl_stats.nodes[d].retried[m]++; } while (0)
-+#define ln_event_inc(d) do { hl_stats.nodes[d].events++; } while (0)
-+
-+#else /* !DEBUG */
-+
-+void htree_lock_stat_reset(void) {}
-+void htree_lock_stat_print(int depth) {}
-+
-+#define lk_grant_inc(m) do {} while (0)
-+#define lk_block_inc(m) do {} while (0)
-+#define ln_grant_inc(d, m) do {} while (0)
-+#define ln_block_inc(d, m) do {} while (0)
-+#define ln_retry_inc(d, m) do {} while (0)
-+#define ln_event_inc(d) do {} while (0)
-+
-+#endif /* DEBUG */
-+
-+EXPORT_SYMBOL(htree_lock_stat_reset);
-+EXPORT_SYMBOL(htree_lock_stat_print);
-+
-+#define HTREE_DEP_ROOT (-1)
-+
-+#define htree_spin_lock(lhead, dep) \
-+ bit_spin_lock((dep) + 1, &(lhead)->lh_lock)
-+#define htree_spin_unlock(lhead, dep) \
-+ bit_spin_unlock((dep) + 1, &(lhead)->lh_lock)
-+
-+#define htree_key_event_ignore(child, ln) \
-+ (!((child)->lc_events & (1 << (ln)->ln_mode)))
-+
-+static int
-+htree_key_list_empty(struct htree_lock_node *ln)
-+{
-+ return list_empty(&ln->ln_major_list) && list_empty(&ln->ln_minor_list);
-+}
-+
-+static void
-+htree_key_list_del_init(struct htree_lock_node *ln)
-+{
-+ struct htree_lock_node *tmp = NULL;
-+
-+ if (!list_empty(&ln->ln_minor_list)) {
-+ tmp = list_entry(ln->ln_minor_list.next,
-+ struct htree_lock_node, ln_minor_list);
-+ list_del_init(&ln->ln_minor_list);
-+ }
-+
-+ if (list_empty(&ln->ln_major_list))
-+ return;
-+
-+ if (tmp == NULL) { /* not on minor key list */
-+ list_del_init(&ln->ln_major_list);
-+ } else {
-+ BUG_ON(!list_empty(&tmp->ln_major_list));
-+ list_replace_init(&ln->ln_major_list, &tmp->ln_major_list);
-+ }
-+}
-+
-+static void
-+htree_key_list_replace_init(struct htree_lock_node *old,
-+ struct htree_lock_node *new)
-+{
-+ if (!list_empty(&old->ln_major_list))
-+ list_replace_init(&old->ln_major_list, &new->ln_major_list);
-+
-+ if (!list_empty(&old->ln_minor_list))
-+ list_replace_init(&old->ln_minor_list, &new->ln_minor_list);
-+}
-+
-+static void
-+htree_key_event_enqueue(struct htree_lock_child *child,
-+ struct htree_lock_node *ln, int dep, void *event)
-+{
-+ struct htree_lock_node *tmp;
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ BUG_ON(ln->ln_mode == HTREE_LOCK_NL);
-+ if (event == NULL || htree_key_event_ignore(child, ln))
-+ return;
-+
-+ /* shouldn't be a very long list */
-+ list_for_each_entry(tmp, &ln->ln_alive_list, ln_alive_list) {
-+ if (tmp->ln_mode == HTREE_LOCK_NL) {
-+ ln_event_inc(dep);
-+ if (child->lc_callback != NULL)
-+ child->lc_callback(tmp->ln_ev_target, event);
-+ }
-+ }
-+}
-+
-+static int
-+htree_node_lock_enqueue(struct htree_lock *newlk, struct htree_lock *curlk,
-+ unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_child *child = &newlk->lk_head->lh_children[dep];
-+ struct htree_lock_node *newln = &newlk->lk_nodes[dep];
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ /* NB: we only expect PR/PW lock mode at here, only these two modes are
-+ * allowed for htree_node_lock(asserted in htree_node_lock_internal),
-+ * NL is only used for listener, user can't directly require NL mode */
-+ if ((curln->ln_mode == HTREE_LOCK_NL) ||
-+ (curln->ln_mode != HTREE_LOCK_PW &&
-+ newln->ln_mode != HTREE_LOCK_PW)) {
-+ /* no conflict, attach it on granted list of @curlk */
-+ if (curln->ln_mode != HTREE_LOCK_NL) {
-+ list_add(&newln->ln_granted_list,
-+ &curln->ln_granted_list);
-+ } else {
-+ /* replace key owner */
-+ htree_key_list_replace_init(curln, newln);
-+ }
-+
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ ln_grant_inc(dep, newln->ln_mode);
-+ return 1; /* still hold lh_lock */
-+ }
-+
-+ if (!wait) { /* can't grant and don't want to wait */
-+ ln_retry_inc(dep, newln->ln_mode);
-+ newln->ln_mode = HTREE_LOCK_INVAL;
-+ return -1; /* don't wait and just return -1 */
-+ }
-+
-+ newlk->lk_task = current;
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ /* conflict, attach it on blocked list of curlk */
-+ list_add_tail(&newln->ln_blocked_list, &curln->ln_blocked_list);
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ ln_block_inc(dep, newln->ln_mode);
-+
-+ htree_spin_unlock(newlk->lk_head, dep);
-+ /* wait to be given the lock */
-+ if (newlk->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt, wake up will set me RUNNING */
-+ if (event == NULL || htree_key_event_ignore(child, newln))
-+ return 0; /* granted without lh_lock */
-+
-+ htree_spin_lock(newlk->lk_head, dep);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ return 1; /* still hold lh_lock */
-+}
-+
-+/*
-+ * get PR/PW access to particular tree-node according to @dep and @key,
-+ * it will return -1 if @wait is false and can't immediately grant this lock.
-+ * All listeners(HTREE_LOCK_NL) on @dep and with the same @key will get
-+ * @event if it's not NULL.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_node_lock_internal(struct htree_lock_head *lhead, struct htree_lock *lck,
-+ htree_lock_mode_t mode, u32 key, unsigned dep,
-+ int wait, void *event)
-+{
-+ LIST_HEAD(list);
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ u16 major;
-+ u16 minor;
-+ u8 reverse;
-+ u8 ma_bits;
-+ u8 mi_bits;
-+
-+ BUG_ON(mode != HTREE_LOCK_PW && mode != HTREE_LOCK_PR);
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+
-+ key = hash_long(key, lhead->lh_hbits);
-+
-+ mi_bits = lhead->lh_hbits >> 1;
-+ ma_bits = lhead->lh_hbits - mi_bits;
-+
-+ lck->lk_nodes[dep].ln_major_key = major = key & ((1U << ma_bits) - 1);
-+ lck->lk_nodes[dep].ln_minor_key = minor = key >> ma_bits;
-+ lck->lk_nodes[dep].ln_mode = mode;
-+
-+ /*
-+ * The major key list is an ordered list, so searches are started
-+ * at the end of the list that is numerically closer to major_key,
-+ * so at most half of the list will be walked (for well-distributed
-+ * keys). The list traversal aborts early if the expected key
-+ * location is passed.
-+ */
-+ reverse = (major >= (1 << (ma_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp,
-+ &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key < major) {
-+ /* attach _after_ @tmp */
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+
-+ } else {
-+ list_for_each_entry(tmp, &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key > major) {
-+ /* insert _before_ @tmp */
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+ }
-+
-+ search_minor:
-+ /*
-+ * NB: minor_key list doesn't have a "head", @list is just a
-+ * temporary stub for helping list searching, make sure it's removed
-+ * after searching.
-+ * minor_key list is an ordered list too.
-+ */
-+ list_add_tail(&list, &tmp->lk_nodes[dep].ln_minor_list);
-+
-+ reverse = (minor >= (1 << (mi_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key < minor) {
-+ /* attach _after_ @tmp2 */
-+ list_add(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_minor_list, &list);
-+
-+ } else {
-+ list_for_each_entry(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key > minor) {
-+ /* insert _before_ @tmp2 */
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list, &list);
-+ }
-+
-+ out_grant_minor:
-+ if (list.next == &lck->lk_nodes[dep].ln_minor_list) {
-+ /* new lock @lck is the first one on minor_key list, which
-+ * means it has the smallest minor_key and it should
-+ * replace @tmp as minor_key owner */
-+ list_replace_init(&tmp->lk_nodes[dep].ln_major_list,
-+ &lck->lk_nodes[dep].ln_major_list);
-+ }
-+ /* remove the temporary head */
-+ list_del(&list);
-+
-+ out_grant_major:
-+ ln_grant_inc(dep, lck->lk_nodes[dep].ln_mode);
-+ return 1; /* granted with holding lh_lock */
-+
-+ out_enqueue:
-+ list_del(&list); /* remove temprary head */
-+ return htree_node_lock_enqueue(lck, tmp2, dep, wait, event);
-+}
-+
-+/*
-+ * release the key of @lck at level @dep, and grant any blocked locks.
-+ * caller will still listen on @key if @event is not NULL, which means
-+ * caller can see a event (by event_cb) while granting any lock with
-+ * the same key at level @dep.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ * NB: listener will not block anyone because listening mode is HTREE_LOCK_NL
-+ */
-+static void
-+htree_node_unlock_internal(struct htree_lock_head *lhead,
-+ struct htree_lock *curlk, unsigned dep, void *event)
-+{
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+ struct htree_lock *grtlk = NULL;
-+ struct htree_lock_node *grtln;
-+ struct htree_lock *poslk;
-+ struct htree_lock *tmplk;
-+
-+ if (!htree_node_is_granted(curlk, dep))
-+ return;
-+
-+ if (!list_empty(&curln->ln_granted_list)) {
-+ /* there is another granted lock */
-+ grtlk = list_entry(curln->ln_granted_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_granted_list);
-+ list_del_init(&curln->ln_granted_list);
-+ }
-+
-+ if (grtlk == NULL && !list_empty(&curln->ln_blocked_list)) {
-+ /*
-+ * @curlk is the only granted lock, so we confirmed:
-+ * a) curln is key owner (attached on major/minor_list),
-+ * so if there is any blocked lock, it should be attached
-+ * on curln->ln_blocked_list
-+ * b) we always can grant the first blocked lock
-+ */
-+ grtlk = list_entry(curln->ln_blocked_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_blocked_list);
-+ BUG_ON(grtlk->lk_task == NULL);
-+ wake_up_process(grtlk->lk_task);
-+ }
-+
-+ if (event != NULL &&
-+ lhead->lh_children[dep].lc_events != HTREE_EVENT_DISABLE) {
-+ curln->ln_ev_target = event;
-+ curln->ln_mode = HTREE_LOCK_NL; /* listen! */
-+ } else {
-+ curln->ln_mode = HTREE_LOCK_INVAL;
-+ }
-+
-+ if (grtlk == NULL) { /* I must be the only one locking this key */
-+ struct htree_lock_node *tmpln;
-+
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (curln->ln_mode == HTREE_LOCK_NL) /* listening */
-+ return;
-+
-+ /* not listening */
-+ if (list_empty(&curln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(curln);
-+ return;
-+ }
-+
-+ tmpln = list_entry(curln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmpln->ln_mode != HTREE_LOCK_NL);
-+
-+ htree_key_list_replace_init(curln, tmpln);
-+ list_del_init(&curln->ln_alive_list);
-+
-+ return;
-+ }
-+
-+ /* have a granted lock */
-+ grtln = &grtlk->lk_nodes[dep];
-+ if (!list_empty(&curln->ln_blocked_list)) {
-+ /* only key owner can be on both lists */
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (list_empty(&grtln->ln_blocked_list)) {
-+ list_add(&grtln->ln_blocked_list,
-+ &curln->ln_blocked_list);
-+ }
-+ list_del_init(&curln->ln_blocked_list);
-+ }
-+ /*
-+ * NB: this is the tricky part:
-+ * We have only two modes for child-lock (PR and PW), also,
-+ * only owner of the key (attached on major/minor_list) can be on
-+ * both blocked_list and granted_list, so @grtlk must be one
-+ * of these two cases:
-+ *
-+ * a) @grtlk is taken from granted_list, which means we've granted
-+ * more than one lock so @grtlk has to be PR, the first blocked
-+ * lock must be PW and we can't grant it at all.
-+ * So even @grtlk is not owner of the key (empty blocked_list),
-+ * we don't care because we can't grant any lock.
-+ * b) we just grant a new lock which is taken from head of blocked
-+ * list, and it should be the first granted lock, and it should
-+ * be the first one linked on blocked_list.
-+ *
-+ * Either way, we can get correct result by iterating blocked_list
-+ * of @grtlk, and don't have to bother on how to find out
-+ * owner of current key.
-+ */
-+ list_for_each_entry_safe(poslk, tmplk, &grtln->ln_blocked_list,
-+ lk_nodes[dep].ln_blocked_list) {
-+ if (grtlk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW ||
-+ poslk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW)
-+ break;
-+ /* grant all readers */
-+ list_del_init(&poslk->lk_nodes[dep].ln_blocked_list);
-+ list_add(&poslk->lk_nodes[dep].ln_granted_list,
-+ &grtln->ln_granted_list);
-+
-+ BUG_ON(poslk->lk_task == NULL);
-+ wake_up_process(poslk->lk_task);
-+ }
-+
-+ /* if @curln is the owner of this key, replace it with @grtln */
-+ if (!htree_key_list_empty(curln))
-+ htree_key_list_replace_init(curln, grtln);
-+
-+ if (curln->ln_mode == HTREE_LOCK_INVAL)
-+ list_del_init(&curln->ln_alive_list);
-+}
-+
-+/*
-+ * it's just wrapper of htree_node_lock_internal, it returns 1 on granted
-+ * and 0 only if @wait is false and can't grant it immediately
-+ */
-+int
-+htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ rc = htree_node_lock_internal(lhead, lck, mode, key, dep, wait, event);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, dep);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_node_lock_try);
-+
-+/* it's wrapper of htree_node_unlock_internal */
-+void
-+htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ htree_node_unlock_internal(lhead, lck, dep, event);
-+ htree_spin_unlock(lhead, dep);
-+}
-+EXPORT_SYMBOL(htree_node_unlock);
-+
-+/* stop listening on child-lock level @dep */
-+void
-+htree_node_stop_listen(struct htree_lock *lck, unsigned dep)
-+{
-+ struct htree_lock_node *ln = &lck->lk_nodes[dep];
-+ struct htree_lock_node *tmp;
-+
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+ BUG_ON(!list_empty(&ln->ln_blocked_list));
-+ BUG_ON(!list_empty(&ln->ln_granted_list));
-+
-+ if (!htree_node_is_listening(lck, dep))
-+ return;
-+
-+ htree_spin_lock(lck->lk_head, dep);
-+ ln->ln_mode = HTREE_LOCK_INVAL;
-+ ln->ln_ev_target = NULL;
-+
-+ if (htree_key_list_empty(ln)) { /* not owner */
-+ list_del_init(&ln->ln_alive_list);
-+ goto out;
-+ }
-+
-+ /* I'm the owner... */
-+ if (list_empty(&ln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(ln);
-+ goto out;
-+ }
-+
-+ tmp = list_entry(ln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmp->ln_mode != HTREE_LOCK_NL);
-+ htree_key_list_replace_init(ln, tmp);
-+ list_del_init(&ln->ln_alive_list);
-+ out:
-+ htree_spin_unlock(lck->lk_head, dep);
-+}
-+EXPORT_SYMBOL(htree_node_stop_listen);
-+
-+/* release all child-locks if we have any */
-+static void
-+htree_node_release_all(struct htree_lock *lck)
-+{
-+ int i;
-+
-+ for (i = 0; i < lck->lk_depth; i++) {
-+ if (htree_node_is_granted(lck, i))
-+ htree_node_unlock(lck, i, NULL);
-+ else if (htree_node_is_listening(lck, i))
-+ htree_node_stop_listen(lck, i);
-+ }
-+}
-+
-+/*
-+ * obtain htree lock, it could be blocked inside if there's conflict
-+ * with any granted or blocked lock and @wait is true.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_lock_internal(struct htree_lock *lck, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int granted = 0;
-+ int blocked = 0;
-+ int i;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ if (lhead->lh_nblocked[i] != 0)
-+ blocked |= 1 << i;
-+ }
-+ if ((htree_lock_compat[lck->lk_mode] & granted) != granted ||
-+ (htree_lock_compat[lck->lk_mode] & blocked) != blocked) {
-+ /* will block current lock even it just conflicts with any
-+ * other blocked lock, so lock like EX wouldn't starve */
-+ if (!wait)
-+ return -1;
-+ lhead->lh_nblocked[lck->lk_mode]++;
-+ lk_block_inc(lck->lk_mode);
-+
-+ lck->lk_task = current;
-+ list_add_tail(&lck->lk_blocked_list, &lhead->lh_blocked_list);
-+
-+retry:
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ /* wait to be given the lock */
-+ if (lck->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt. wake up will set me RUNNING.
-+ * Since thread would be waken up accidentally,
-+ * so we need check lock whether granted or not again. */
-+ if (!list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ if (list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return 0;
-+ }
-+ goto retry;
-+ }
-+ return 0; /* without lh_lock */
-+ }
-+ lhead->lh_ngranted[lck->lk_mode]++;
-+ lk_grant_inc(lck->lk_mode);
-+ return 1;
-+}
-+
-+/* release htree lock. NB: ALWAYS called holding lhead::lh_lock */
-+static void
-+htree_unlock_internal(struct htree_lock *lck)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ int granted = 0;
-+ int i;
-+
-+ BUG_ON(lhead->lh_ngranted[lck->lk_mode] == 0);
-+
-+ lhead->lh_ngranted[lck->lk_mode]--;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ }
-+ list_for_each_entry_safe(tmp, tmp2,
-+ &lhead->lh_blocked_list, lk_blocked_list) {
-+ /* conflict with any granted lock? */
-+ if ((htree_lock_compat[tmp->lk_mode] & granted) != granted)
-+ break;
-+
-+ list_del_init(&tmp->lk_blocked_list);
-+
-+ BUG_ON(lhead->lh_nblocked[tmp->lk_mode] == 0);
-+
-+ lhead->lh_nblocked[tmp->lk_mode]--;
-+ lhead->lh_ngranted[tmp->lk_mode]++;
-+ granted |= 1 << tmp->lk_mode;
-+
-+ BUG_ON(tmp->lk_task == NULL);
-+ wake_up_process(tmp->lk_task);
-+ }
-+}
-+
-+/* it's wrapper of htree_lock_internal and exported interface.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait)
-+{
-+ int rc;
-+
-+ BUG_ON(lck->lk_depth > lhead->lh_depth);
-+ BUG_ON(lck->lk_head != NULL);
-+ BUG_ON(lck->lk_task != NULL);
-+
-+ lck->lk_head = lhead;
-+ lck->lk_mode = mode;
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_lock_try);
-+
-+/* it's wrapper of htree_unlock_internal and exported interface.
-+ * It will release all htree_node_locks and htree_lock */
-+void
-+htree_unlock(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_head == NULL);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lck->lk_head, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ htree_spin_unlock(lck->lk_head, HTREE_DEP_ROOT);
-+ lck->lk_head = NULL;
-+ lck->lk_task = NULL;
-+}
-+EXPORT_SYMBOL(htree_unlock);
-+
-+/* change lock mode */
-+void
-+htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode)
-+{
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+ lck->lk_mode = mode;
-+}
-+EXPORT_SYMBOL(htree_change_mode);
-+
-+/* release htree lock, and lock it again with new mode.
-+ * This function will first release all htree_node_locks and htree_lock,
-+ * then try to gain htree_lock with new @mode.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_change_lock_try(struct htree_lock *lck, htree_lock_mode_t mode, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(lhead == NULL);
-+ BUG_ON(lck->lk_mode == mode);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL || mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ lck->lk_mode = mode;
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_change_lock_try);
-+
-+/* create a htree_lock head with @depth levels (number of child-locks),
-+ * it is a per resoruce structure */
-+struct htree_lock_head *
-+htree_lock_head_alloc(unsigned depth, unsigned hbits, unsigned priv)
-+{
-+ struct htree_lock_head *lhead;
-+ int i;
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+
-+ lhead = kzalloc(offsetof(struct htree_lock_head,
-+ lh_children[depth]) + priv, GFP_NOFS);
-+ if (lhead == NULL)
-+ return NULL;
-+
-+ if (hbits < HTREE_HBITS_MIN)
-+ lhead->lh_hbits = HTREE_HBITS_MIN;
-+ else if (hbits > HTREE_HBITS_MAX)
-+ lhead->lh_hbits = HTREE_HBITS_MAX;
-+
-+ lhead->lh_lock = 0;
-+ lhead->lh_depth = depth;
-+ INIT_LIST_HEAD(&lhead->lh_blocked_list);
-+ if (priv > 0) {
-+ lhead->lh_private = (void *)lhead +
-+ offsetof(struct htree_lock_head, lh_children[depth]);
-+ }
-+
-+ for (i = 0; i < depth; i++) {
-+ INIT_LIST_HEAD(&lhead->lh_children[i].lc_list);
-+ lhead->lh_children[i].lc_events = HTREE_EVENT_DISABLE;
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(htree_lock_head_alloc);
-+
-+/* free the htree_lock head */
-+void
-+htree_lock_head_free(struct htree_lock_head *lhead)
-+{
-+ int i;
-+
-+ BUG_ON(!list_empty(&lhead->lh_blocked_list));
-+ for (i = 0; i < lhead->lh_depth; i++)
-+ BUG_ON(!list_empty(&lhead->lh_children[i].lc_list));
-+ kfree(lhead);
-+}
-+EXPORT_SYMBOL(htree_lock_head_free);
-+
-+/* register event callback for @events of child-lock at level @dep */
-+void
-+htree_lock_event_attach(struct htree_lock_head *lhead, unsigned dep,
-+ unsigned events, htree_event_cb_t callback)
-+{
-+ BUG_ON(lhead->lh_depth <= dep);
-+ lhead->lh_children[dep].lc_events = events;
-+ lhead->lh_children[dep].lc_callback = callback;
-+}
-+EXPORT_SYMBOL(htree_lock_event_attach);
-+
-+/* allocate a htree_lock, which is per-thread structure, @pbytes is some
-+ * extra-bytes as private data for caller */
-+struct htree_lock *
-+htree_lock_alloc(unsigned depth, unsigned pbytes)
-+{
-+ struct htree_lock *lck;
-+ int i = offsetof(struct htree_lock, lk_nodes[depth]);
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+ lck = kzalloc(i + pbytes, GFP_NOFS);
-+ if (lck == NULL)
-+ return NULL;
-+
-+ if (pbytes != 0)
-+ lck->lk_private = (void *)lck + i;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+ lck->lk_depth = depth;
-+ INIT_LIST_HEAD(&lck->lk_blocked_list);
-+
-+ for (i = 0; i < depth; i++) {
-+ struct htree_lock_node *node = &lck->lk_nodes[i];
-+
-+ node->ln_mode = HTREE_LOCK_INVAL;
-+ INIT_LIST_HEAD(&node->ln_major_list);
-+ INIT_LIST_HEAD(&node->ln_minor_list);
-+ INIT_LIST_HEAD(&node->ln_alive_list);
-+ INIT_LIST_HEAD(&node->ln_blocked_list);
-+ INIT_LIST_HEAD(&node->ln_granted_list);
-+ }
-+
-+ return lck;
-+}
-+EXPORT_SYMBOL(htree_lock_alloc);
-+
-+/* free htree_lock node */
-+void
-+htree_lock_free(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_mode != HTREE_LOCK_INVAL);
-+ kfree(lck);
-+}
-+EXPORT_SYMBOL(htree_lock_free);
Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile
===================================================================
--- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/Makefile
- pdirops support for ldiskfs
- integrate with osd-ldiskfs
-Index: linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
-===================================================================
---- /dev/null
-+++ linux-3.10.0-229.1.2.fc21.x86_64/include/linux/htree_lock.h
-@@ -0,0 +1,187 @@
-+/*
-+ * include/linux/htree_lock.h
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+
-+/*
-+ * htree lock
-+ *
-+ * htree_lock is an advanced lock, it can support five lock modes (concept is
-+ * taken from DLM) and it's a sleeping lock.
-+ *
-+ * most common use case is:
-+ * - create a htree_lock_head for data
-+ * - each thread (contender) creates it's own htree_lock
-+ * - contender needs to call htree_lock(lock_node, mode) to protect data and
-+ * call htree_unlock to release lock
-+ *
-+ * Also, there is advanced use-case which is more complex, user can have
-+ * PW/PR lock on particular key, it's mostly used while user holding shared
-+ * lock on the htree (CW, CR)
-+ *
-+ * htree_lock(lock_node, HTREE_LOCK_CR); lock the htree with CR
-+ * htree_node_lock(lock_node, HTREE_LOCK_PR, key...); lock @key with PR
-+ * ...
-+ * htree_node_unlock(lock_node);; unlock the key
-+ *
-+ * Another tip is, we can have N-levels of this kind of keys, all we need to
-+ * do is specifying N-levels while creating htree_lock_head, then we can
-+ * lock/unlock a specific level by:
-+ * htree_node_lock(lock_node, mode1, key1, level1...);
-+ * do something;
-+ * htree_node_lock(lock_node, mode1, key2, level2...);
-+ * do something;
-+ * htree_node_unlock(lock_node, level2);
-+ * htree_node_unlock(lock_node, level1);
-+ *
-+ * NB: for multi-level, should be careful about locking order to avoid deadlock
-+ */
-+
-+#ifndef _LINUX_HTREE_LOCK_H
-+#define _LINUX_HTREE_LOCK_H
-+
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * Lock Modes
-+ * more details can be found here:
-+ * http://en.wikipedia.org/wiki/Distributed_lock_manager
-+ */
-+typedef enum {
-+ HTREE_LOCK_EX = 0, /* exclusive lock: incompatible with all others */
-+ HTREE_LOCK_PW, /* protected write: allows only CR users */
-+ HTREE_LOCK_PR, /* protected read: allow PR, CR users */
-+ HTREE_LOCK_CW, /* concurrent write: allow CR, CW users */
-+ HTREE_LOCK_CR, /* concurrent read: allow all but EX users */
-+ HTREE_LOCK_MAX, /* number of lock modes */
-+} htree_lock_mode_t;
-+
-+#define HTREE_LOCK_NL HTREE_LOCK_MAX
-+#define HTREE_LOCK_INVAL 0xdead10c
-+
-+enum {
-+ HTREE_HBITS_MIN = 2,
-+ HTREE_HBITS_DEF = 14,
-+ HTREE_HBITS_MAX = 32,
-+};
-+
-+enum {
-+ HTREE_EVENT_DISABLE = (0),
-+ HTREE_EVENT_RD = (1 << HTREE_LOCK_PR),
-+ HTREE_EVENT_WR = (1 << HTREE_LOCK_PW),
-+ HTREE_EVENT_RDWR = (HTREE_EVENT_RD | HTREE_EVENT_WR),
-+};
-+
-+struct htree_lock;
-+
-+typedef void (*htree_event_cb_t)(void *target, void *event);
-+
-+struct htree_lock_child {
-+ struct list_head lc_list; /* granted list */
-+ htree_event_cb_t lc_callback; /* event callback */
-+ unsigned lc_events; /* event types */
-+};
-+
-+struct htree_lock_head {
-+ unsigned long lh_lock; /* bits lock */
-+ /* blocked lock list (htree_lock) */
-+ struct list_head lh_blocked_list;
-+ /* # key levels */
-+ u16 lh_depth;
-+ /* hash bits for key and limit number of locks */
-+ u16 lh_hbits;
-+ /* counters for blocked locks */
-+ u16 lh_nblocked[HTREE_LOCK_MAX];
-+ /* counters for granted locks */
-+ u16 lh_ngranted[HTREE_LOCK_MAX];
-+ /* private data */
-+ void *lh_private;
-+ /* array of children locks */
-+ struct htree_lock_child lh_children[0];
-+};
-+
-+/* htree_lock_node_t is child-lock for a specific key (ln_value) */
-+struct htree_lock_node {
-+ htree_lock_mode_t ln_mode;
-+ /* major hash key */
-+ u16 ln_major_key;
-+ /* minor hash key */
-+ u16 ln_minor_key;
-+ struct list_head ln_major_list;
-+ struct list_head ln_minor_list;
-+ /* alive list, all locks (granted, blocked, listening) are on it */
-+ struct list_head ln_alive_list;
-+ /* blocked list */
-+ struct list_head ln_blocked_list;
-+ /* granted list */
-+ struct list_head ln_granted_list;
-+ void *ln_ev_target;
-+};
-+
-+struct htree_lock {
-+ struct task_struct *lk_task;
-+ struct htree_lock_head *lk_head;
-+ void *lk_private;
-+ unsigned lk_depth;
-+ htree_lock_mode_t lk_mode;
-+ struct list_head lk_blocked_list;
-+ struct htree_lock_node lk_nodes[0];
-+};
-+
-+/* create a lock head, which stands for a resource */
-+struct htree_lock_head *htree_lock_head_alloc(unsigned depth,
-+ unsigned hbits, unsigned priv);
-+/* free a lock head */
-+void htree_lock_head_free(struct htree_lock_head *lhead);
-+/* register event callback for child lock at level @depth */
-+void htree_lock_event_attach(struct htree_lock_head *lhead, unsigned depth,
-+ unsigned events, htree_event_cb_t callback);
-+/* create a lock handle, which stands for a thread */
-+struct htree_lock *htree_lock_alloc(unsigned depth, unsigned pbytes);
-+/* free a lock handle */
-+void htree_lock_free(struct htree_lock *lck);
-+/* lock htree, when @wait is true, 0 is returned if the lock can't
-+ * be granted immediately */
-+int htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait);
-+/* unlock htree */
-+void htree_unlock(struct htree_lock *lck);
-+/* unlock and relock htree with @new_mode */
-+int htree_change_lock_try(struct htree_lock *lck,
-+ htree_lock_mode_t new_mode, int wait);
-+void htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode);
-+/* require child lock (key) of htree at level @dep, @event will be sent to all
-+ * listeners on this @key while lock being granted */
-+int htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event);
-+/* release child lock at level @dep, this lock will listen on it's key
-+ * if @event isn't NULL, event_cb will be called against @lck while granting
-+ * any other lock at level @dep with the same key */
-+void htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event);
-+/* stop listening on child lock at level @dep */
-+void htree_node_stop_listen(struct htree_lock *lck, unsigned dep);
-+/* for debug */
-+void htree_lock_stat_print(int depth);
-+void htree_lock_stat_reset(void);
-+
-+#define htree_lock(lck, lh, mode) htree_lock_try(lck, lh, mode, 1)
-+#define htree_change_lock(lck, mode) htree_change_lock_try(lck, mode, 1)
-+
-+#define htree_lock_mode(lck) ((lck)->lk_mode)
-+
-+#define htree_node_lock(lck, mode, key, dep) \
-+ htree_node_lock_try(lck, mode, key, dep, 1, NULL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_granted(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_INVAL && \
-+ (lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_NL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_listening(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode == HTREE_LOCK_NL)
-+
-+#endif
-Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
-===================================================================
---- /dev/null
-+++ linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/htree_lock.c
-@@ -0,0 +1,891 @@
-+/*
-+ * fs/ext4/htree_lock.c
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+#include <linux/jbd2.h>
-+#include <linux/hash.h>
-+#include <linux/module.h>
-+#include <linux/htree_lock.h>
-+
-+enum {
-+ HTREE_LOCK_BIT_EX = (1 << HTREE_LOCK_EX),
-+ HTREE_LOCK_BIT_PW = (1 << HTREE_LOCK_PW),
-+ HTREE_LOCK_BIT_PR = (1 << HTREE_LOCK_PR),
-+ HTREE_LOCK_BIT_CW = (1 << HTREE_LOCK_CW),
-+ HTREE_LOCK_BIT_CR = (1 << HTREE_LOCK_CR),
-+};
-+
-+enum {
-+ HTREE_LOCK_COMPAT_EX = 0,
-+ HTREE_LOCK_COMPAT_PW = HTREE_LOCK_COMPAT_EX | HTREE_LOCK_BIT_CR,
-+ HTREE_LOCK_COMPAT_PR = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_PR,
-+ HTREE_LOCK_COMPAT_CW = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_CW,
-+ HTREE_LOCK_COMPAT_CR = HTREE_LOCK_COMPAT_CW | HTREE_LOCK_BIT_PR |
-+ HTREE_LOCK_BIT_PW,
-+};
-+
-+static int htree_lock_compat[] = {
-+ [HTREE_LOCK_EX] HTREE_LOCK_COMPAT_EX,
-+ [HTREE_LOCK_PW] HTREE_LOCK_COMPAT_PW,
-+ [HTREE_LOCK_PR] HTREE_LOCK_COMPAT_PR,
-+ [HTREE_LOCK_CW] HTREE_LOCK_COMPAT_CW,
-+ [HTREE_LOCK_CR] HTREE_LOCK_COMPAT_CR,
-+};
-+
-+/* max allowed htree-lock depth.
-+ * We only need depth=3 for ext4 although user can have higher value. */
-+#define HTREE_LOCK_DEP_MAX 16
-+
-+#ifdef HTREE_LOCK_DEBUG
-+
-+static char *hl_name[] = {
-+ [HTREE_LOCK_EX] "EX",
-+ [HTREE_LOCK_PW] "PW",
-+ [HTREE_LOCK_PR] "PR",
-+ [HTREE_LOCK_CW] "CW",
-+ [HTREE_LOCK_CR] "CR",
-+};
-+
-+/* lock stats */
-+struct htree_lock_node_stats {
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long retried[HTREE_LOCK_MAX];
-+ unsigned long long events;
-+};
-+
-+struct htree_lock_stats {
-+ struct htree_lock_node_stats nodes[HTREE_LOCK_DEP_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+};
-+
-+static struct htree_lock_stats hl_stats;
-+
-+void htree_lock_stat_reset(void)
-+{
-+ memset(&hl_stats, 0, sizeof(hl_stats));
-+}
-+
-+void htree_lock_stat_print(int depth)
-+{
-+ int i;
-+ int j;
-+
-+ printk(KERN_DEBUG "HTREE LOCK STATS:\n");
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ printk(KERN_DEBUG "[%s]: G [%10llu], B [%10llu]\n",
-+ hl_name[i], hl_stats.granted[i], hl_stats.blocked[i]);
-+ }
-+ for (i = 0; i < depth; i++) {
-+ printk(KERN_DEBUG "HTREE CHILD [%d] STATS:\n", i);
-+ for (j = 0; j < HTREE_LOCK_MAX; j++) {
-+ printk(KERN_DEBUG
-+ "[%s]: G [%10llu], B [%10llu], R [%10llu]\n",
-+ hl_name[j], hl_stats.nodes[i].granted[j],
-+ hl_stats.nodes[i].blocked[j],
-+ hl_stats.nodes[i].retried[j]);
-+ }
-+ }
-+}
-+
-+#define lk_grant_inc(m) do { hl_stats.granted[m]++; } while (0)
-+#define lk_block_inc(m) do { hl_stats.blocked[m]++; } while (0)
-+#define ln_grant_inc(d, m) do { hl_stats.nodes[d].granted[m]++; } while (0)
-+#define ln_block_inc(d, m) do { hl_stats.nodes[d].blocked[m]++; } while (0)
-+#define ln_retry_inc(d, m) do { hl_stats.nodes[d].retried[m]++; } while (0)
-+#define ln_event_inc(d) do { hl_stats.nodes[d].events++; } while (0)
-+
-+#else /* !DEBUG */
-+
-+void htree_lock_stat_reset(void) {}
-+void htree_lock_stat_print(int depth) {}
-+
-+#define lk_grant_inc(m) do {} while (0)
-+#define lk_block_inc(m) do {} while (0)
-+#define ln_grant_inc(d, m) do {} while (0)
-+#define ln_block_inc(d, m) do {} while (0)
-+#define ln_retry_inc(d, m) do {} while (0)
-+#define ln_event_inc(d) do {} while (0)
-+
-+#endif /* DEBUG */
-+
-+EXPORT_SYMBOL(htree_lock_stat_reset);
-+EXPORT_SYMBOL(htree_lock_stat_print);
-+
-+#define HTREE_DEP_ROOT (-1)
-+
-+#define htree_spin_lock(lhead, dep) \
-+ bit_spin_lock((dep) + 1, &(lhead)->lh_lock)
-+#define htree_spin_unlock(lhead, dep) \
-+ bit_spin_unlock((dep) + 1, &(lhead)->lh_lock)
-+
-+#define htree_key_event_ignore(child, ln) \
-+ (!((child)->lc_events & (1 << (ln)->ln_mode)))
-+
-+static int
-+htree_key_list_empty(struct htree_lock_node *ln)
-+{
-+ return list_empty(&ln->ln_major_list) && list_empty(&ln->ln_minor_list);
-+}
-+
-+static void
-+htree_key_list_del_init(struct htree_lock_node *ln)
-+{
-+ struct htree_lock_node *tmp = NULL;
-+
-+ if (!list_empty(&ln->ln_minor_list)) {
-+ tmp = list_entry(ln->ln_minor_list.next,
-+ struct htree_lock_node, ln_minor_list);
-+ list_del_init(&ln->ln_minor_list);
-+ }
-+
-+ if (list_empty(&ln->ln_major_list))
-+ return;
-+
-+ if (tmp == NULL) { /* not on minor key list */
-+ list_del_init(&ln->ln_major_list);
-+ } else {
-+ BUG_ON(!list_empty(&tmp->ln_major_list));
-+ list_replace_init(&ln->ln_major_list, &tmp->ln_major_list);
-+ }
-+}
-+
-+static void
-+htree_key_list_replace_init(struct htree_lock_node *old,
-+ struct htree_lock_node *new)
-+{
-+ if (!list_empty(&old->ln_major_list))
-+ list_replace_init(&old->ln_major_list, &new->ln_major_list);
-+
-+ if (!list_empty(&old->ln_minor_list))
-+ list_replace_init(&old->ln_minor_list, &new->ln_minor_list);
-+}
-+
-+static void
-+htree_key_event_enqueue(struct htree_lock_child *child,
-+ struct htree_lock_node *ln, int dep, void *event)
-+{
-+ struct htree_lock_node *tmp;
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ BUG_ON(ln->ln_mode == HTREE_LOCK_NL);
-+ if (event == NULL || htree_key_event_ignore(child, ln))
-+ return;
-+
-+ /* shouldn't be a very long list */
-+ list_for_each_entry(tmp, &ln->ln_alive_list, ln_alive_list) {
-+ if (tmp->ln_mode == HTREE_LOCK_NL) {
-+ ln_event_inc(dep);
-+ if (child->lc_callback != NULL)
-+ child->lc_callback(tmp->ln_ev_target, event);
-+ }
-+ }
-+}
-+
-+static int
-+htree_node_lock_enqueue(struct htree_lock *newlk, struct htree_lock *curlk,
-+ unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_child *child = &newlk->lk_head->lh_children[dep];
-+ struct htree_lock_node *newln = &newlk->lk_nodes[dep];
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ /* NB: we only expect PR/PW lock mode at here, only these two modes are
-+ * allowed for htree_node_lock(asserted in htree_node_lock_internal),
-+ * NL is only used for listener, user can't directly require NL mode */
-+ if ((curln->ln_mode == HTREE_LOCK_NL) ||
-+ (curln->ln_mode != HTREE_LOCK_PW &&
-+ newln->ln_mode != HTREE_LOCK_PW)) {
-+ /* no conflict, attach it on granted list of @curlk */
-+ if (curln->ln_mode != HTREE_LOCK_NL) {
-+ list_add(&newln->ln_granted_list,
-+ &curln->ln_granted_list);
-+ } else {
-+ /* replace key owner */
-+ htree_key_list_replace_init(curln, newln);
-+ }
-+
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ ln_grant_inc(dep, newln->ln_mode);
-+ return 1; /* still hold lh_lock */
-+ }
-+
-+ if (!wait) { /* can't grant and don't want to wait */
-+ ln_retry_inc(dep, newln->ln_mode);
-+ newln->ln_mode = HTREE_LOCK_INVAL;
-+ return -1; /* don't wait and just return -1 */
-+ }
-+
-+ newlk->lk_task = current;
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ /* conflict, attach it on blocked list of curlk */
-+ list_add_tail(&newln->ln_blocked_list, &curln->ln_blocked_list);
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ ln_block_inc(dep, newln->ln_mode);
-+
-+ htree_spin_unlock(newlk->lk_head, dep);
-+ /* wait to be given the lock */
-+ if (newlk->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt, wake up will set me RUNNING */
-+ if (event == NULL || htree_key_event_ignore(child, newln))
-+ return 0; /* granted without lh_lock */
-+
-+ htree_spin_lock(newlk->lk_head, dep);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ return 1; /* still hold lh_lock */
-+}
-+
-+/*
-+ * get PR/PW access to particular tree-node according to @dep and @key,
-+ * it will return -1 if @wait is false and can't immediately grant this lock.
-+ * All listeners(HTREE_LOCK_NL) on @dep and with the same @key will get
-+ * @event if it's not NULL.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_node_lock_internal(struct htree_lock_head *lhead, struct htree_lock *lck,
-+ htree_lock_mode_t mode, u32 key, unsigned dep,
-+ int wait, void *event)
-+{
-+ LIST_HEAD(list);
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ u16 major;
-+ u16 minor;
-+ u8 reverse;
-+ u8 ma_bits;
-+ u8 mi_bits;
-+
-+ BUG_ON(mode != HTREE_LOCK_PW && mode != HTREE_LOCK_PR);
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+
-+ key = hash_long(key, lhead->lh_hbits);
-+
-+ mi_bits = lhead->lh_hbits >> 1;
-+ ma_bits = lhead->lh_hbits - mi_bits;
-+
-+ lck->lk_nodes[dep].ln_major_key = major = key & ((1U << ma_bits) - 1);
-+ lck->lk_nodes[dep].ln_minor_key = minor = key >> ma_bits;
-+ lck->lk_nodes[dep].ln_mode = mode;
-+
-+ /*
-+ * The major key list is an ordered list, so searches are started
-+ * at the end of the list that is numerically closer to major_key,
-+ * so at most half of the list will be walked (for well-distributed
-+ * keys). The list traversal aborts early if the expected key
-+ * location is passed.
-+ */
-+ reverse = (major >= (1 << (ma_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp,
-+ &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key < major) {
-+ /* attach _after_ @tmp */
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+
-+ } else {
-+ list_for_each_entry(tmp, &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key > major) {
-+ /* insert _before_ @tmp */
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+ }
-+
-+ search_minor:
-+ /*
-+ * NB: minor_key list doesn't have a "head", @list is just a
-+ * temporary stub for helping list searching, make sure it's removed
-+ * after searching.
-+ * minor_key list is an ordered list too.
-+ */
-+ list_add_tail(&list, &tmp->lk_nodes[dep].ln_minor_list);
-+
-+ reverse = (minor >= (1 << (mi_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key < minor) {
-+ /* attach _after_ @tmp2 */
-+ list_add(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_minor_list, &list);
-+
-+ } else {
-+ list_for_each_entry(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key > minor) {
-+ /* insert _before_ @tmp2 */
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list, &list);
-+ }
-+
-+ out_grant_minor:
-+ if (list.next == &lck->lk_nodes[dep].ln_minor_list) {
-+ /* new lock @lck is the first one on minor_key list, which
-+ * means it has the smallest minor_key and it should
-+ * replace @tmp as minor_key owner */
-+ list_replace_init(&tmp->lk_nodes[dep].ln_major_list,
-+ &lck->lk_nodes[dep].ln_major_list);
-+ }
-+ /* remove the temporary head */
-+ list_del(&list);
-+
-+ out_grant_major:
-+ ln_grant_inc(dep, lck->lk_nodes[dep].ln_mode);
-+ return 1; /* granted with holding lh_lock */
-+
-+ out_enqueue:
-+ list_del(&list); /* remove temprary head */
-+ return htree_node_lock_enqueue(lck, tmp2, dep, wait, event);
-+}
-+
-+/*
-+ * release the key of @lck at level @dep, and grant any blocked locks.
-+ * caller will still listen on @key if @event is not NULL, which means
-+ * caller can see a event (by event_cb) while granting any lock with
-+ * the same key at level @dep.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ * NB: listener will not block anyone because listening mode is HTREE_LOCK_NL
-+ */
-+static void
-+htree_node_unlock_internal(struct htree_lock_head *lhead,
-+ struct htree_lock *curlk, unsigned dep, void *event)
-+{
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+ struct htree_lock *grtlk = NULL;
-+ struct htree_lock_node *grtln;
-+ struct htree_lock *poslk;
-+ struct htree_lock *tmplk;
-+
-+ if (!htree_node_is_granted(curlk, dep))
-+ return;
-+
-+ if (!list_empty(&curln->ln_granted_list)) {
-+ /* there is another granted lock */
-+ grtlk = list_entry(curln->ln_granted_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_granted_list);
-+ list_del_init(&curln->ln_granted_list);
-+ }
-+
-+ if (grtlk == NULL && !list_empty(&curln->ln_blocked_list)) {
-+ /*
-+ * @curlk is the only granted lock, so we confirmed:
-+ * a) curln is key owner (attached on major/minor_list),
-+ * so if there is any blocked lock, it should be attached
-+ * on curln->ln_blocked_list
-+ * b) we always can grant the first blocked lock
-+ */
-+ grtlk = list_entry(curln->ln_blocked_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_blocked_list);
-+ BUG_ON(grtlk->lk_task == NULL);
-+ wake_up_process(grtlk->lk_task);
-+ }
-+
-+ if (event != NULL &&
-+ lhead->lh_children[dep].lc_events != HTREE_EVENT_DISABLE) {
-+ curln->ln_ev_target = event;
-+ curln->ln_mode = HTREE_LOCK_NL; /* listen! */
-+ } else {
-+ curln->ln_mode = HTREE_LOCK_INVAL;
-+ }
-+
-+ if (grtlk == NULL) { /* I must be the only one locking this key */
-+ struct htree_lock_node *tmpln;
-+
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (curln->ln_mode == HTREE_LOCK_NL) /* listening */
-+ return;
-+
-+ /* not listening */
-+ if (list_empty(&curln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(curln);
-+ return;
-+ }
-+
-+ tmpln = list_entry(curln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmpln->ln_mode != HTREE_LOCK_NL);
-+
-+ htree_key_list_replace_init(curln, tmpln);
-+ list_del_init(&curln->ln_alive_list);
-+
-+ return;
-+ }
-+
-+ /* have a granted lock */
-+ grtln = &grtlk->lk_nodes[dep];
-+ if (!list_empty(&curln->ln_blocked_list)) {
-+ /* only key owner can be on both lists */
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (list_empty(&grtln->ln_blocked_list)) {
-+ list_add(&grtln->ln_blocked_list,
-+ &curln->ln_blocked_list);
-+ }
-+ list_del_init(&curln->ln_blocked_list);
-+ }
-+ /*
-+ * NB: this is the tricky part:
-+ * We have only two modes for child-lock (PR and PW), also,
-+ * only owner of the key (attached on major/minor_list) can be on
-+ * both blocked_list and granted_list, so @grtlk must be one
-+ * of these two cases:
-+ *
-+ * a) @grtlk is taken from granted_list, which means we've granted
-+ * more than one lock so @grtlk has to be PR, the first blocked
-+ * lock must be PW and we can't grant it at all.
-+ * So even @grtlk is not owner of the key (empty blocked_list),
-+ * we don't care because we can't grant any lock.
-+ * b) we just grant a new lock which is taken from head of blocked
-+ * list, and it should be the first granted lock, and it should
-+ * be the first one linked on blocked_list.
-+ *
-+ * Either way, we can get correct result by iterating blocked_list
-+ * of @grtlk, and don't have to bother on how to find out
-+ * owner of current key.
-+ */
-+ list_for_each_entry_safe(poslk, tmplk, &grtln->ln_blocked_list,
-+ lk_nodes[dep].ln_blocked_list) {
-+ if (grtlk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW ||
-+ poslk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW)
-+ break;
-+ /* grant all readers */
-+ list_del_init(&poslk->lk_nodes[dep].ln_blocked_list);
-+ list_add(&poslk->lk_nodes[dep].ln_granted_list,
-+ &grtln->ln_granted_list);
-+
-+ BUG_ON(poslk->lk_task == NULL);
-+ wake_up_process(poslk->lk_task);
-+ }
-+
-+ /* if @curln is the owner of this key, replace it with @grtln */
-+ if (!htree_key_list_empty(curln))
-+ htree_key_list_replace_init(curln, grtln);
-+
-+ if (curln->ln_mode == HTREE_LOCK_INVAL)
-+ list_del_init(&curln->ln_alive_list);
-+}
-+
-+/*
-+ * it's just wrapper of htree_node_lock_internal, it returns 1 on granted
-+ * and 0 only if @wait is false and can't grant it immediately
-+ */
-+int
-+htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ rc = htree_node_lock_internal(lhead, lck, mode, key, dep, wait, event);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, dep);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_node_lock_try);
-+
-+/* it's wrapper of htree_node_unlock_internal */
-+void
-+htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ htree_node_unlock_internal(lhead, lck, dep, event);
-+ htree_spin_unlock(lhead, dep);
-+}
-+EXPORT_SYMBOL(htree_node_unlock);
-+
-+/* stop listening on child-lock level @dep */
-+void
-+htree_node_stop_listen(struct htree_lock *lck, unsigned dep)
-+{
-+ struct htree_lock_node *ln = &lck->lk_nodes[dep];
-+ struct htree_lock_node *tmp;
-+
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+ BUG_ON(!list_empty(&ln->ln_blocked_list));
-+ BUG_ON(!list_empty(&ln->ln_granted_list));
-+
-+ if (!htree_node_is_listening(lck, dep))
-+ return;
-+
-+ htree_spin_lock(lck->lk_head, dep);
-+ ln->ln_mode = HTREE_LOCK_INVAL;
-+ ln->ln_ev_target = NULL;
-+
-+ if (htree_key_list_empty(ln)) { /* not owner */
-+ list_del_init(&ln->ln_alive_list);
-+ goto out;
-+ }
-+
-+ /* I'm the owner... */
-+ if (list_empty(&ln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(ln);
-+ goto out;
-+ }
-+
-+ tmp = list_entry(ln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmp->ln_mode != HTREE_LOCK_NL);
-+ htree_key_list_replace_init(ln, tmp);
-+ list_del_init(&ln->ln_alive_list);
-+ out:
-+ htree_spin_unlock(lck->lk_head, dep);
-+}
-+EXPORT_SYMBOL(htree_node_stop_listen);
-+
-+/* release all child-locks if we have any */
-+static void
-+htree_node_release_all(struct htree_lock *lck)
-+{
-+ int i;
-+
-+ for (i = 0; i < lck->lk_depth; i++) {
-+ if (htree_node_is_granted(lck, i))
-+ htree_node_unlock(lck, i, NULL);
-+ else if (htree_node_is_listening(lck, i))
-+ htree_node_stop_listen(lck, i);
-+ }
-+}
-+
-+/*
-+ * obtain htree lock, it could be blocked inside if there's conflict
-+ * with any granted or blocked lock and @wait is true.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_lock_internal(struct htree_lock *lck, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int granted = 0;
-+ int blocked = 0;
-+ int i;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ if (lhead->lh_nblocked[i] != 0)
-+ blocked |= 1 << i;
-+ }
-+ if ((htree_lock_compat[lck->lk_mode] & granted) != granted ||
-+ (htree_lock_compat[lck->lk_mode] & blocked) != blocked) {
-+ /* will block current lock even it just conflicts with any
-+ * other blocked lock, so lock like EX wouldn't starve */
-+ if (!wait)
-+ return -1;
-+ lhead->lh_nblocked[lck->lk_mode]++;
-+ lk_block_inc(lck->lk_mode);
-+
-+ lck->lk_task = current;
-+ list_add_tail(&lck->lk_blocked_list, &lhead->lh_blocked_list);
-+
-+retry:
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ /* wait to be given the lock */
-+ if (lck->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt. wake up will set me RUNNING.
-+ * Since thread would be waken up accidentally,
-+ * so we need check lock whether granted or not again. */
-+ if (!list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ if (list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return 0;
-+ }
-+ goto retry;
-+ }
-+ return 0; /* without lh_lock */
-+ }
-+ lhead->lh_ngranted[lck->lk_mode]++;
-+ lk_grant_inc(lck->lk_mode);
-+ return 1;
-+}
-+
-+/* release htree lock. NB: ALWAYS called holding lhead::lh_lock */
-+static void
-+htree_unlock_internal(struct htree_lock *lck)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ int granted = 0;
-+ int i;
-+
-+ BUG_ON(lhead->lh_ngranted[lck->lk_mode] == 0);
-+
-+ lhead->lh_ngranted[lck->lk_mode]--;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ }
-+ list_for_each_entry_safe(tmp, tmp2,
-+ &lhead->lh_blocked_list, lk_blocked_list) {
-+ /* conflict with any granted lock? */
-+ if ((htree_lock_compat[tmp->lk_mode] & granted) != granted)
-+ break;
-+
-+ list_del_init(&tmp->lk_blocked_list);
-+
-+ BUG_ON(lhead->lh_nblocked[tmp->lk_mode] == 0);
-+
-+ lhead->lh_nblocked[tmp->lk_mode]--;
-+ lhead->lh_ngranted[tmp->lk_mode]++;
-+ granted |= 1 << tmp->lk_mode;
-+
-+ BUG_ON(tmp->lk_task == NULL);
-+ wake_up_process(tmp->lk_task);
-+ }
-+}
-+
-+/* it's wrapper of htree_lock_internal and exported interface.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait)
-+{
-+ int rc;
-+
-+ BUG_ON(lck->lk_depth > lhead->lh_depth);
-+ BUG_ON(lck->lk_head != NULL);
-+ BUG_ON(lck->lk_task != NULL);
-+
-+ lck->lk_head = lhead;
-+ lck->lk_mode = mode;
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_lock_try);
-+
-+/* it's wrapper of htree_unlock_internal and exported interface.
-+ * It will release all htree_node_locks and htree_lock */
-+void
-+htree_unlock(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_head == NULL);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lck->lk_head, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ htree_spin_unlock(lck->lk_head, HTREE_DEP_ROOT);
-+ lck->lk_head = NULL;
-+ lck->lk_task = NULL;
-+}
-+EXPORT_SYMBOL(htree_unlock);
-+
-+/* change lock mode */
-+void
-+htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode)
-+{
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+ lck->lk_mode = mode;
-+}
-+EXPORT_SYMBOL(htree_change_mode);
-+
-+/* release htree lock, and lock it again with new mode.
-+ * This function will first release all htree_node_locks and htree_lock,
-+ * then try to gain htree_lock with new @mode.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_change_lock_try(struct htree_lock *lck, htree_lock_mode_t mode, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(lhead == NULL);
-+ BUG_ON(lck->lk_mode == mode);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL || mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ lck->lk_mode = mode;
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_change_lock_try);
-+
-+/* create a htree_lock head with @depth levels (number of child-locks),
-+ * it is a per resoruce structure */
-+struct htree_lock_head *
-+htree_lock_head_alloc(unsigned depth, unsigned hbits, unsigned priv)
-+{
-+ struct htree_lock_head *lhead;
-+ int i;
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+
-+ lhead = kzalloc(offsetof(struct htree_lock_head,
-+ lh_children[depth]) + priv, GFP_NOFS);
-+ if (lhead == NULL)
-+ return NULL;
-+
-+ if (hbits < HTREE_HBITS_MIN)
-+ lhead->lh_hbits = HTREE_HBITS_MIN;
-+ else if (hbits > HTREE_HBITS_MAX)
-+ lhead->lh_hbits = HTREE_HBITS_MAX;
-+
-+ lhead->lh_lock = 0;
-+ lhead->lh_depth = depth;
-+ INIT_LIST_HEAD(&lhead->lh_blocked_list);
-+ if (priv > 0) {
-+ lhead->lh_private = (void *)lhead +
-+ offsetof(struct htree_lock_head, lh_children[depth]);
-+ }
-+
-+ for (i = 0; i < depth; i++) {
-+ INIT_LIST_HEAD(&lhead->lh_children[i].lc_list);
-+ lhead->lh_children[i].lc_events = HTREE_EVENT_DISABLE;
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(htree_lock_head_alloc);
-+
-+/* free the htree_lock head */
-+void
-+htree_lock_head_free(struct htree_lock_head *lhead)
-+{
-+ int i;
-+
-+ BUG_ON(!list_empty(&lhead->lh_blocked_list));
-+ for (i = 0; i < lhead->lh_depth; i++)
-+ BUG_ON(!list_empty(&lhead->lh_children[i].lc_list));
-+ kfree(lhead);
-+}
-+EXPORT_SYMBOL(htree_lock_head_free);
-+
-+/* register event callback for @events of child-lock at level @dep */
-+void
-+htree_lock_event_attach(struct htree_lock_head *lhead, unsigned dep,
-+ unsigned events, htree_event_cb_t callback)
-+{
-+ BUG_ON(lhead->lh_depth <= dep);
-+ lhead->lh_children[dep].lc_events = events;
-+ lhead->lh_children[dep].lc_callback = callback;
-+}
-+EXPORT_SYMBOL(htree_lock_event_attach);
-+
-+/* allocate a htree_lock, which is per-thread structure, @pbytes is some
-+ * extra-bytes as private data for caller */
-+struct htree_lock *
-+htree_lock_alloc(unsigned depth, unsigned pbytes)
-+{
-+ struct htree_lock *lck;
-+ int i = offsetof(struct htree_lock, lk_nodes[depth]);
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+ lck = kzalloc(i + pbytes, GFP_NOFS);
-+ if (lck == NULL)
-+ return NULL;
-+
-+ if (pbytes != 0)
-+ lck->lk_private = (void *)lck + i;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+ lck->lk_depth = depth;
-+ INIT_LIST_HEAD(&lck->lk_blocked_list);
-+
-+ for (i = 0; i < depth; i++) {
-+ struct htree_lock_node *node = &lck->lk_nodes[i];
-+
-+ node->ln_mode = HTREE_LOCK_INVAL;
-+ INIT_LIST_HEAD(&node->ln_major_list);
-+ INIT_LIST_HEAD(&node->ln_minor_list);
-+ INIT_LIST_HEAD(&node->ln_alive_list);
-+ INIT_LIST_HEAD(&node->ln_blocked_list);
-+ INIT_LIST_HEAD(&node->ln_granted_list);
-+ }
-+
-+ return lck;
-+}
-+EXPORT_SYMBOL(htree_lock_alloc);
-+
-+/* free htree_lock node */
-+void
-+htree_lock_free(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_mode != HTREE_LOCK_INVAL);
-+ kfree(lck);
-+}
-+EXPORT_SYMBOL(htree_lock_free);
Index: linux-3.10.0-229.1.2.fc21.x86_64/fs/ext4/Makefile
===================================================================
--- linux-3.10.0-229.1.2.fc21.x86_64.orig/fs/ext4/Makefile
}
static const unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-Index: linux-4.15.0/fs/ext4/htree_lock.c
-===================================================================
---- /dev/null
-+++ linux-4.15.0/fs/ext4/htree_lock.c
-@@ -0,0 +1,891 @@
-+/*
-+ * fs/ext4/htree_lock.c
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+#include <linux/jbd2.h>
-+#include <linux/hash.h>
-+#include <linux/module.h>
-+#include <linux/htree_lock.h>
-+
-+enum {
-+ HTREE_LOCK_BIT_EX = (1 << HTREE_LOCK_EX),
-+ HTREE_LOCK_BIT_PW = (1 << HTREE_LOCK_PW),
-+ HTREE_LOCK_BIT_PR = (1 << HTREE_LOCK_PR),
-+ HTREE_LOCK_BIT_CW = (1 << HTREE_LOCK_CW),
-+ HTREE_LOCK_BIT_CR = (1 << HTREE_LOCK_CR),
-+};
-+
-+enum {
-+ HTREE_LOCK_COMPAT_EX = 0,
-+ HTREE_LOCK_COMPAT_PW = HTREE_LOCK_COMPAT_EX | HTREE_LOCK_BIT_CR,
-+ HTREE_LOCK_COMPAT_PR = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_PR,
-+ HTREE_LOCK_COMPAT_CW = HTREE_LOCK_COMPAT_PW | HTREE_LOCK_BIT_CW,
-+ HTREE_LOCK_COMPAT_CR = HTREE_LOCK_COMPAT_CW | HTREE_LOCK_BIT_PR |
-+ HTREE_LOCK_BIT_PW,
-+};
-+
-+static int htree_lock_compat[] = {
-+ [HTREE_LOCK_EX] HTREE_LOCK_COMPAT_EX,
-+ [HTREE_LOCK_PW] HTREE_LOCK_COMPAT_PW,
-+ [HTREE_LOCK_PR] HTREE_LOCK_COMPAT_PR,
-+ [HTREE_LOCK_CW] HTREE_LOCK_COMPAT_CW,
-+ [HTREE_LOCK_CR] HTREE_LOCK_COMPAT_CR,
-+};
-+
-+/* max allowed htree-lock depth.
-+ * We only need depth=3 for ext4 although user can have higher value. */
-+#define HTREE_LOCK_DEP_MAX 16
-+
-+#ifdef HTREE_LOCK_DEBUG
-+
-+static char *hl_name[] = {
-+ [HTREE_LOCK_EX] "EX",
-+ [HTREE_LOCK_PW] "PW",
-+ [HTREE_LOCK_PR] "PR",
-+ [HTREE_LOCK_CW] "CW",
-+ [HTREE_LOCK_CR] "CR",
-+};
-+
-+/* lock stats */
-+struct htree_lock_node_stats {
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long retried[HTREE_LOCK_MAX];
-+ unsigned long long events;
-+};
-+
-+struct htree_lock_stats {
-+ struct htree_lock_node_stats nodes[HTREE_LOCK_DEP_MAX];
-+ unsigned long long granted[HTREE_LOCK_MAX];
-+ unsigned long long blocked[HTREE_LOCK_MAX];
-+};
-+
-+static struct htree_lock_stats hl_stats;
-+
-+void htree_lock_stat_reset(void)
-+{
-+ memset(&hl_stats, 0, sizeof(hl_stats));
-+}
-+
-+void htree_lock_stat_print(int depth)
-+{
-+ int i;
-+ int j;
-+
-+ printk(KERN_DEBUG "HTREE LOCK STATS:\n");
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ printk(KERN_DEBUG "[%s]: G [%10llu], B [%10llu]\n",
-+ hl_name[i], hl_stats.granted[i], hl_stats.blocked[i]);
-+ }
-+ for (i = 0; i < depth; i++) {
-+ printk(KERN_DEBUG "HTREE CHILD [%d] STATS:\n", i);
-+ for (j = 0; j < HTREE_LOCK_MAX; j++) {
-+ printk(KERN_DEBUG
-+ "[%s]: G [%10llu], B [%10llu], R [%10llu]\n",
-+ hl_name[j], hl_stats.nodes[i].granted[j],
-+ hl_stats.nodes[i].blocked[j],
-+ hl_stats.nodes[i].retried[j]);
-+ }
-+ }
-+}
-+
-+#define lk_grant_inc(m) do { hl_stats.granted[m]++; } while (0)
-+#define lk_block_inc(m) do { hl_stats.blocked[m]++; } while (0)
-+#define ln_grant_inc(d, m) do { hl_stats.nodes[d].granted[m]++; } while (0)
-+#define ln_block_inc(d, m) do { hl_stats.nodes[d].blocked[m]++; } while (0)
-+#define ln_retry_inc(d, m) do { hl_stats.nodes[d].retried[m]++; } while (0)
-+#define ln_event_inc(d) do { hl_stats.nodes[d].events++; } while (0)
-+
-+#else /* !DEBUG */
-+
-+void htree_lock_stat_reset(void) {}
-+void htree_lock_stat_print(int depth) {}
-+
-+#define lk_grant_inc(m) do {} while (0)
-+#define lk_block_inc(m) do {} while (0)
-+#define ln_grant_inc(d, m) do {} while (0)
-+#define ln_block_inc(d, m) do {} while (0)
-+#define ln_retry_inc(d, m) do {} while (0)
-+#define ln_event_inc(d) do {} while (0)
-+
-+#endif /* DEBUG */
-+
-+EXPORT_SYMBOL(htree_lock_stat_reset);
-+EXPORT_SYMBOL(htree_lock_stat_print);
-+
-+#define HTREE_DEP_ROOT (-1)
-+
-+#define htree_spin_lock(lhead, dep) \
-+ bit_spin_lock((dep) + 1, &(lhead)->lh_lock)
-+#define htree_spin_unlock(lhead, dep) \
-+ bit_spin_unlock((dep) + 1, &(lhead)->lh_lock)
-+
-+#define htree_key_event_ignore(child, ln) \
-+ (!((child)->lc_events & (1 << (ln)->ln_mode)))
-+
-+static int
-+htree_key_list_empty(struct htree_lock_node *ln)
-+{
-+ return list_empty(&ln->ln_major_list) && list_empty(&ln->ln_minor_list);
-+}
-+
-+static void
-+htree_key_list_del_init(struct htree_lock_node *ln)
-+{
-+ struct htree_lock_node *tmp = NULL;
-+
-+ if (!list_empty(&ln->ln_minor_list)) {
-+ tmp = list_entry(ln->ln_minor_list.next,
-+ struct htree_lock_node, ln_minor_list);
-+ list_del_init(&ln->ln_minor_list);
-+ }
-+
-+ if (list_empty(&ln->ln_major_list))
-+ return;
-+
-+ if (tmp == NULL) { /* not on minor key list */
-+ list_del_init(&ln->ln_major_list);
-+ } else {
-+ BUG_ON(!list_empty(&tmp->ln_major_list));
-+ list_replace_init(&ln->ln_major_list, &tmp->ln_major_list);
-+ }
-+}
-+
-+static void
-+htree_key_list_replace_init(struct htree_lock_node *old,
-+ struct htree_lock_node *new)
-+{
-+ if (!list_empty(&old->ln_major_list))
-+ list_replace_init(&old->ln_major_list, &new->ln_major_list);
-+
-+ if (!list_empty(&old->ln_minor_list))
-+ list_replace_init(&old->ln_minor_list, &new->ln_minor_list);
-+}
-+
-+static void
-+htree_key_event_enqueue(struct htree_lock_child *child,
-+ struct htree_lock_node *ln, int dep, void *event)
-+{
-+ struct htree_lock_node *tmp;
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ BUG_ON(ln->ln_mode == HTREE_LOCK_NL);
-+ if (event == NULL || htree_key_event_ignore(child, ln))
-+ return;
-+
-+ /* shouldn't be a very long list */
-+ list_for_each_entry(tmp, &ln->ln_alive_list, ln_alive_list) {
-+ if (tmp->ln_mode == HTREE_LOCK_NL) {
-+ ln_event_inc(dep);
-+ if (child->lc_callback != NULL)
-+ child->lc_callback(tmp->ln_ev_target, event);
-+ }
-+ }
-+}
-+
-+static int
-+htree_node_lock_enqueue(struct htree_lock *newlk, struct htree_lock *curlk,
-+ unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_child *child = &newlk->lk_head->lh_children[dep];
-+ struct htree_lock_node *newln = &newlk->lk_nodes[dep];
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+
-+ /* NB: ALWAYS called holding lhead::lh_lock(dep) */
-+ /* NB: we only expect PR/PW lock mode at here, only these two modes are
-+ * allowed for htree_node_lock(asserted in htree_node_lock_internal),
-+ * NL is only used for listener, user can't directly require NL mode */
-+ if ((curln->ln_mode == HTREE_LOCK_NL) ||
-+ (curln->ln_mode != HTREE_LOCK_PW &&
-+ newln->ln_mode != HTREE_LOCK_PW)) {
-+ /* no conflict, attach it on granted list of @curlk */
-+ if (curln->ln_mode != HTREE_LOCK_NL) {
-+ list_add(&newln->ln_granted_list,
-+ &curln->ln_granted_list);
-+ } else {
-+ /* replace key owner */
-+ htree_key_list_replace_init(curln, newln);
-+ }
-+
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ ln_grant_inc(dep, newln->ln_mode);
-+ return 1; /* still hold lh_lock */
-+ }
-+
-+ if (!wait) { /* can't grant and don't want to wait */
-+ ln_retry_inc(dep, newln->ln_mode);
-+ newln->ln_mode = HTREE_LOCK_INVAL;
-+ return -1; /* don't wait and just return -1 */
-+ }
-+
-+ newlk->lk_task = current;
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ /* conflict, attach it on blocked list of curlk */
-+ list_add_tail(&newln->ln_blocked_list, &curln->ln_blocked_list);
-+ list_add(&newln->ln_alive_list, &curln->ln_alive_list);
-+ ln_block_inc(dep, newln->ln_mode);
-+
-+ htree_spin_unlock(newlk->lk_head, dep);
-+ /* wait to be given the lock */
-+ if (newlk->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt, wake up will set me RUNNING */
-+ if (event == NULL || htree_key_event_ignore(child, newln))
-+ return 0; /* granted without lh_lock */
-+
-+ htree_spin_lock(newlk->lk_head, dep);
-+ htree_key_event_enqueue(child, newln, dep, event);
-+ return 1; /* still hold lh_lock */
-+}
-+
-+/*
-+ * get PR/PW access to particular tree-node according to @dep and @key,
-+ * it will return -1 if @wait is false and can't immediately grant this lock.
-+ * All listeners(HTREE_LOCK_NL) on @dep and with the same @key will get
-+ * @event if it's not NULL.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_node_lock_internal(struct htree_lock_head *lhead, struct htree_lock *lck,
-+ htree_lock_mode_t mode, u32 key, unsigned dep,
-+ int wait, void *event)
-+{
-+ LIST_HEAD(list);
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ u16 major;
-+ u16 minor;
-+ u8 reverse;
-+ u8 ma_bits;
-+ u8 mi_bits;
-+
-+ BUG_ON(mode != HTREE_LOCK_PW && mode != HTREE_LOCK_PR);
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+
-+ key = hash_long(key, lhead->lh_hbits);
-+
-+ mi_bits = lhead->lh_hbits >> 1;
-+ ma_bits = lhead->lh_hbits - mi_bits;
-+
-+ lck->lk_nodes[dep].ln_major_key = major = key & ((1U << ma_bits) - 1);
-+ lck->lk_nodes[dep].ln_minor_key = minor = key >> ma_bits;
-+ lck->lk_nodes[dep].ln_mode = mode;
-+
-+ /*
-+ * The major key list is an ordered list, so searches are started
-+ * at the end of the list that is numerically closer to major_key,
-+ * so at most half of the list will be walked (for well-distributed
-+ * keys). The list traversal aborts early if the expected key
-+ * location is passed.
-+ */
-+ reverse = (major >= (1 << (ma_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp,
-+ &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key < major) {
-+ /* attach _after_ @tmp */
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+
-+ } else {
-+ list_for_each_entry(tmp, &lhead->lh_children[dep].lc_list,
-+ lk_nodes[dep].ln_major_list) {
-+ if (tmp->lk_nodes[dep].ln_major_key == major) {
-+ goto search_minor;
-+
-+ } else if (tmp->lk_nodes[dep].ln_major_key > major) {
-+ /* insert _before_ @tmp */
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &tmp->lk_nodes[dep].ln_major_list);
-+ goto out_grant_major;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_major_list,
-+ &lhead->lh_children[dep].lc_list);
-+ goto out_grant_major;
-+ }
-+
-+ search_minor:
-+ /*
-+ * NB: minor_key list doesn't have a "head", @list is just a
-+ * temporary stub for helping list searching, make sure it's removed
-+ * after searching.
-+ * minor_key list is an ordered list too.
-+ */
-+ list_add_tail(&list, &tmp->lk_nodes[dep].ln_minor_list);
-+
-+ reverse = (minor >= (1 << (mi_bits - 1)));
-+
-+ if (reverse) {
-+ list_for_each_entry_reverse(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key < minor) {
-+ /* attach _after_ @tmp2 */
-+ list_add(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add(&lck->lk_nodes[dep].ln_minor_list, &list);
-+
-+ } else {
-+ list_for_each_entry(tmp2, &list,
-+ lk_nodes[dep].ln_minor_list) {
-+ if (tmp2->lk_nodes[dep].ln_minor_key == minor) {
-+ goto out_enqueue;
-+
-+ } else if (tmp2->lk_nodes[dep].ln_minor_key > minor) {
-+ /* insert _before_ @tmp2 */
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list,
-+ &tmp2->lk_nodes[dep].ln_minor_list);
-+ goto out_grant_minor;
-+ }
-+ }
-+
-+ list_add_tail(&lck->lk_nodes[dep].ln_minor_list, &list);
-+ }
-+
-+ out_grant_minor:
-+ if (list.next == &lck->lk_nodes[dep].ln_minor_list) {
-+ /* new lock @lck is the first one on minor_key list, which
-+ * means it has the smallest minor_key and it should
-+ * replace @tmp as minor_key owner */
-+ list_replace_init(&tmp->lk_nodes[dep].ln_major_list,
-+ &lck->lk_nodes[dep].ln_major_list);
-+ }
-+ /* remove the temporary head */
-+ list_del(&list);
-+
-+ out_grant_major:
-+ ln_grant_inc(dep, lck->lk_nodes[dep].ln_mode);
-+ return 1; /* granted with holding lh_lock */
-+
-+ out_enqueue:
-+ list_del(&list); /* remove temprary head */
-+ return htree_node_lock_enqueue(lck, tmp2, dep, wait, event);
-+}
-+
-+/*
-+ * release the key of @lck at level @dep, and grant any blocked locks.
-+ * caller will still listen on @key if @event is not NULL, which means
-+ * caller can see a event (by event_cb) while granting any lock with
-+ * the same key at level @dep.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ * NB: listener will not block anyone because listening mode is HTREE_LOCK_NL
-+ */
-+static void
-+htree_node_unlock_internal(struct htree_lock_head *lhead,
-+ struct htree_lock *curlk, unsigned dep, void *event)
-+{
-+ struct htree_lock_node *curln = &curlk->lk_nodes[dep];
-+ struct htree_lock *grtlk = NULL;
-+ struct htree_lock_node *grtln;
-+ struct htree_lock *poslk;
-+ struct htree_lock *tmplk;
-+
-+ if (!htree_node_is_granted(curlk, dep))
-+ return;
-+
-+ if (!list_empty(&curln->ln_granted_list)) {
-+ /* there is another granted lock */
-+ grtlk = list_entry(curln->ln_granted_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_granted_list);
-+ list_del_init(&curln->ln_granted_list);
-+ }
-+
-+ if (grtlk == NULL && !list_empty(&curln->ln_blocked_list)) {
-+ /*
-+ * @curlk is the only granted lock, so we confirmed:
-+ * a) curln is key owner (attached on major/minor_list),
-+ * so if there is any blocked lock, it should be attached
-+ * on curln->ln_blocked_list
-+ * b) we always can grant the first blocked lock
-+ */
-+ grtlk = list_entry(curln->ln_blocked_list.next,
-+ struct htree_lock,
-+ lk_nodes[dep].ln_blocked_list);
-+ BUG_ON(grtlk->lk_task == NULL);
-+ wake_up_process(grtlk->lk_task);
-+ }
-+
-+ if (event != NULL &&
-+ lhead->lh_children[dep].lc_events != HTREE_EVENT_DISABLE) {
-+ curln->ln_ev_target = event;
-+ curln->ln_mode = HTREE_LOCK_NL; /* listen! */
-+ } else {
-+ curln->ln_mode = HTREE_LOCK_INVAL;
-+ }
-+
-+ if (grtlk == NULL) { /* I must be the only one locking this key */
-+ struct htree_lock_node *tmpln;
-+
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (curln->ln_mode == HTREE_LOCK_NL) /* listening */
-+ return;
-+
-+ /* not listening */
-+ if (list_empty(&curln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(curln);
-+ return;
-+ }
-+
-+ tmpln = list_entry(curln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmpln->ln_mode != HTREE_LOCK_NL);
-+
-+ htree_key_list_replace_init(curln, tmpln);
-+ list_del_init(&curln->ln_alive_list);
-+
-+ return;
-+ }
-+
-+ /* have a granted lock */
-+ grtln = &grtlk->lk_nodes[dep];
-+ if (!list_empty(&curln->ln_blocked_list)) {
-+ /* only key owner can be on both lists */
-+ BUG_ON(htree_key_list_empty(curln));
-+
-+ if (list_empty(&grtln->ln_blocked_list)) {
-+ list_add(&grtln->ln_blocked_list,
-+ &curln->ln_blocked_list);
-+ }
-+ list_del_init(&curln->ln_blocked_list);
-+ }
-+ /*
-+ * NB: this is the tricky part:
-+ * We have only two modes for child-lock (PR and PW), also,
-+ * only owner of the key (attached on major/minor_list) can be on
-+ * both blocked_list and granted_list, so @grtlk must be one
-+ * of these two cases:
-+ *
-+ * a) @grtlk is taken from granted_list, which means we've granted
-+ * more than one lock so @grtlk has to be PR, the first blocked
-+ * lock must be PW and we can't grant it at all.
-+ * So even @grtlk is not owner of the key (empty blocked_list),
-+ * we don't care because we can't grant any lock.
-+ * b) we just grant a new lock which is taken from head of blocked
-+ * list, and it should be the first granted lock, and it should
-+ * be the first one linked on blocked_list.
-+ *
-+ * Either way, we can get correct result by iterating blocked_list
-+ * of @grtlk, and don't have to bother on how to find out
-+ * owner of current key.
-+ */
-+ list_for_each_entry_safe(poslk, tmplk, &grtln->ln_blocked_list,
-+ lk_nodes[dep].ln_blocked_list) {
-+ if (grtlk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW ||
-+ poslk->lk_nodes[dep].ln_mode == HTREE_LOCK_PW)
-+ break;
-+ /* grant all readers */
-+ list_del_init(&poslk->lk_nodes[dep].ln_blocked_list);
-+ list_add(&poslk->lk_nodes[dep].ln_granted_list,
-+ &grtln->ln_granted_list);
-+
-+ BUG_ON(poslk->lk_task == NULL);
-+ wake_up_process(poslk->lk_task);
-+ }
-+
-+ /* if @curln is the owner of this key, replace it with @grtln */
-+ if (!htree_key_list_empty(curln))
-+ htree_key_list_replace_init(curln, grtln);
-+
-+ if (curln->ln_mode == HTREE_LOCK_INVAL)
-+ list_del_init(&curln->ln_alive_list);
-+}
-+
-+/*
-+ * it's just wrapper of htree_node_lock_internal, it returns 1 on granted
-+ * and 0 only if @wait is false and can't grant it immediately
-+ */
-+int
-+htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ rc = htree_node_lock_internal(lhead, lck, mode, key, dep, wait, event);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, dep);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_node_lock_try);
-+
-+/* it's wrapper of htree_node_unlock_internal */
-+void
-+htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+
-+ BUG_ON(dep >= lck->lk_depth);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_spin_lock(lhead, dep);
-+ htree_node_unlock_internal(lhead, lck, dep, event);
-+ htree_spin_unlock(lhead, dep);
-+}
-+EXPORT_SYMBOL(htree_node_unlock);
-+
-+/* stop listening on child-lock level @dep */
-+void
-+htree_node_stop_listen(struct htree_lock *lck, unsigned dep)
-+{
-+ struct htree_lock_node *ln = &lck->lk_nodes[dep];
-+ struct htree_lock_node *tmp;
-+
-+ BUG_ON(htree_node_is_granted(lck, dep));
-+ BUG_ON(!list_empty(&ln->ln_blocked_list));
-+ BUG_ON(!list_empty(&ln->ln_granted_list));
-+
-+ if (!htree_node_is_listening(lck, dep))
-+ return;
-+
-+ htree_spin_lock(lck->lk_head, dep);
-+ ln->ln_mode = HTREE_LOCK_INVAL;
-+ ln->ln_ev_target = NULL;
-+
-+ if (htree_key_list_empty(ln)) { /* not owner */
-+ list_del_init(&ln->ln_alive_list);
-+ goto out;
-+ }
-+
-+ /* I'm the owner... */
-+ if (list_empty(&ln->ln_alive_list)) { /* no more listener */
-+ htree_key_list_del_init(ln);
-+ goto out;
-+ }
-+
-+ tmp = list_entry(ln->ln_alive_list.next,
-+ struct htree_lock_node, ln_alive_list);
-+
-+ BUG_ON(tmp->ln_mode != HTREE_LOCK_NL);
-+ htree_key_list_replace_init(ln, tmp);
-+ list_del_init(&ln->ln_alive_list);
-+ out:
-+ htree_spin_unlock(lck->lk_head, dep);
-+}
-+EXPORT_SYMBOL(htree_node_stop_listen);
-+
-+/* release all child-locks if we have any */
-+static void
-+htree_node_release_all(struct htree_lock *lck)
-+{
-+ int i;
-+
-+ for (i = 0; i < lck->lk_depth; i++) {
-+ if (htree_node_is_granted(lck, i))
-+ htree_node_unlock(lck, i, NULL);
-+ else if (htree_node_is_listening(lck, i))
-+ htree_node_stop_listen(lck, i);
-+ }
-+}
-+
-+/*
-+ * obtain htree lock, it could be blocked inside if there's conflict
-+ * with any granted or blocked lock and @wait is true.
-+ * NB: ALWAYS called holding lhead::lh_lock
-+ */
-+static int
-+htree_lock_internal(struct htree_lock *lck, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int granted = 0;
-+ int blocked = 0;
-+ int i;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ if (lhead->lh_nblocked[i] != 0)
-+ blocked |= 1 << i;
-+ }
-+ if ((htree_lock_compat[lck->lk_mode] & granted) != granted ||
-+ (htree_lock_compat[lck->lk_mode] & blocked) != blocked) {
-+ /* will block current lock even it just conflicts with any
-+ * other blocked lock, so lock like EX wouldn't starve */
-+ if (!wait)
-+ return -1;
-+ lhead->lh_nblocked[lck->lk_mode]++;
-+ lk_block_inc(lck->lk_mode);
-+
-+ lck->lk_task = current;
-+ list_add_tail(&lck->lk_blocked_list, &lhead->lh_blocked_list);
-+
-+retry:
-+ set_current_state(TASK_UNINTERRUPTIBLE);
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ /* wait to be given the lock */
-+ if (lck->lk_task != NULL)
-+ schedule();
-+ /* granted, no doubt. wake up will set me RUNNING.
-+ * Since thread would be waken up accidentally,
-+ * so we need check lock whether granted or not again. */
-+ if (!list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ if (list_empty(&lck->lk_blocked_list)) {
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return 0;
-+ }
-+ goto retry;
-+ }
-+ return 0; /* without lh_lock */
-+ }
-+ lhead->lh_ngranted[lck->lk_mode]++;
-+ lk_grant_inc(lck->lk_mode);
-+ return 1;
-+}
-+
-+/* release htree lock. NB: ALWAYS called holding lhead::lh_lock */
-+static void
-+htree_unlock_internal(struct htree_lock *lck)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ struct htree_lock *tmp;
-+ struct htree_lock *tmp2;
-+ int granted = 0;
-+ int i;
-+
-+ BUG_ON(lhead->lh_ngranted[lck->lk_mode] == 0);
-+
-+ lhead->lh_ngranted[lck->lk_mode]--;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+
-+ for (i = 0; i < HTREE_LOCK_MAX; i++) {
-+ if (lhead->lh_ngranted[i] != 0)
-+ granted |= 1 << i;
-+ }
-+ list_for_each_entry_safe(tmp, tmp2,
-+ &lhead->lh_blocked_list, lk_blocked_list) {
-+ /* conflict with any granted lock? */
-+ if ((htree_lock_compat[tmp->lk_mode] & granted) != granted)
-+ break;
-+
-+ list_del_init(&tmp->lk_blocked_list);
-+
-+ BUG_ON(lhead->lh_nblocked[tmp->lk_mode] == 0);
-+
-+ lhead->lh_nblocked[tmp->lk_mode]--;
-+ lhead->lh_ngranted[tmp->lk_mode]++;
-+ granted |= 1 << tmp->lk_mode;
-+
-+ BUG_ON(tmp->lk_task == NULL);
-+ wake_up_process(tmp->lk_task);
-+ }
-+}
-+
-+/* it's wrapper of htree_lock_internal and exported interface.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait)
-+{
-+ int rc;
-+
-+ BUG_ON(lck->lk_depth > lhead->lh_depth);
-+ BUG_ON(lck->lk_head != NULL);
-+ BUG_ON(lck->lk_task != NULL);
-+
-+ lck->lk_head = lhead;
-+ lck->lk_mode = mode;
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_lock_try);
-+
-+/* it's wrapper of htree_unlock_internal and exported interface.
-+ * It will release all htree_node_locks and htree_lock */
-+void
-+htree_unlock(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_head == NULL);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lck->lk_head, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ htree_spin_unlock(lck->lk_head, HTREE_DEP_ROOT);
-+ lck->lk_head = NULL;
-+ lck->lk_task = NULL;
-+}
-+EXPORT_SYMBOL(htree_unlock);
-+
-+/* change lock mode */
-+void
-+htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode)
-+{
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL);
-+ lck->lk_mode = mode;
-+}
-+EXPORT_SYMBOL(htree_change_mode);
-+
-+/* release htree lock, and lock it again with new mode.
-+ * This function will first release all htree_node_locks and htree_lock,
-+ * then try to gain htree_lock with new @mode.
-+ * It always return 1 with granted lock if @wait is true, it can return 0
-+ * if @wait is false and locking request can't be granted immediately */
-+int
-+htree_change_lock_try(struct htree_lock *lck, htree_lock_mode_t mode, int wait)
-+{
-+ struct htree_lock_head *lhead = lck->lk_head;
-+ int rc;
-+
-+ BUG_ON(lhead == NULL);
-+ BUG_ON(lck->lk_mode == mode);
-+ BUG_ON(lck->lk_mode == HTREE_LOCK_INVAL || mode == HTREE_LOCK_INVAL);
-+
-+ htree_node_release_all(lck);
-+
-+ htree_spin_lock(lhead, HTREE_DEP_ROOT);
-+ htree_unlock_internal(lck);
-+ lck->lk_mode = mode;
-+ rc = htree_lock_internal(lck, wait);
-+ if (rc != 0)
-+ htree_spin_unlock(lhead, HTREE_DEP_ROOT);
-+ return rc >= 0;
-+}
-+EXPORT_SYMBOL(htree_change_lock_try);
-+
-+/* create a htree_lock head with @depth levels (number of child-locks),
-+ * it is a per resoruce structure */
-+struct htree_lock_head *
-+htree_lock_head_alloc(unsigned depth, unsigned hbits, unsigned priv)
-+{
-+ struct htree_lock_head *lhead;
-+ int i;
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+
-+ lhead = kzalloc(offsetof(struct htree_lock_head,
-+ lh_children[depth]) + priv, GFP_NOFS);
-+ if (lhead == NULL)
-+ return NULL;
-+
-+ if (hbits < HTREE_HBITS_MIN)
-+ lhead->lh_hbits = HTREE_HBITS_MIN;
-+ else if (hbits > HTREE_HBITS_MAX)
-+ lhead->lh_hbits = HTREE_HBITS_MAX;
-+
-+ lhead->lh_lock = 0;
-+ lhead->lh_depth = depth;
-+ INIT_LIST_HEAD(&lhead->lh_blocked_list);
-+ if (priv > 0) {
-+ lhead->lh_private = (void *)lhead +
-+ offsetof(struct htree_lock_head, lh_children[depth]);
-+ }
-+
-+ for (i = 0; i < depth; i++) {
-+ INIT_LIST_HEAD(&lhead->lh_children[i].lc_list);
-+ lhead->lh_children[i].lc_events = HTREE_EVENT_DISABLE;
-+ }
-+ return lhead;
-+}
-+EXPORT_SYMBOL(htree_lock_head_alloc);
-+
-+/* free the htree_lock head */
-+void
-+htree_lock_head_free(struct htree_lock_head *lhead)
-+{
-+ int i;
-+
-+ BUG_ON(!list_empty(&lhead->lh_blocked_list));
-+ for (i = 0; i < lhead->lh_depth; i++)
-+ BUG_ON(!list_empty(&lhead->lh_children[i].lc_list));
-+ kfree(lhead);
-+}
-+EXPORT_SYMBOL(htree_lock_head_free);
-+
-+/* register event callback for @events of child-lock at level @dep */
-+void
-+htree_lock_event_attach(struct htree_lock_head *lhead, unsigned dep,
-+ unsigned events, htree_event_cb_t callback)
-+{
-+ BUG_ON(lhead->lh_depth <= dep);
-+ lhead->lh_children[dep].lc_events = events;
-+ lhead->lh_children[dep].lc_callback = callback;
-+}
-+EXPORT_SYMBOL(htree_lock_event_attach);
-+
-+/* allocate a htree_lock, which is per-thread structure, @pbytes is some
-+ * extra-bytes as private data for caller */
-+struct htree_lock *
-+htree_lock_alloc(unsigned depth, unsigned pbytes)
-+{
-+ struct htree_lock *lck;
-+ int i = offsetof(struct htree_lock, lk_nodes[depth]);
-+
-+ if (depth > HTREE_LOCK_DEP_MAX) {
-+ printk(KERN_ERR "%d is larger than max htree_lock depth %d\n",
-+ depth, HTREE_LOCK_DEP_MAX);
-+ return NULL;
-+ }
-+ lck = kzalloc(i + pbytes, GFP_NOFS);
-+ if (lck == NULL)
-+ return NULL;
-+
-+ if (pbytes != 0)
-+ lck->lk_private = (void *)lck + i;
-+ lck->lk_mode = HTREE_LOCK_INVAL;
-+ lck->lk_depth = depth;
-+ INIT_LIST_HEAD(&lck->lk_blocked_list);
-+
-+ for (i = 0; i < depth; i++) {
-+ struct htree_lock_node *node = &lck->lk_nodes[i];
-+
-+ node->ln_mode = HTREE_LOCK_INVAL;
-+ INIT_LIST_HEAD(&node->ln_major_list);
-+ INIT_LIST_HEAD(&node->ln_minor_list);
-+ INIT_LIST_HEAD(&node->ln_alive_list);
-+ INIT_LIST_HEAD(&node->ln_blocked_list);
-+ INIT_LIST_HEAD(&node->ln_granted_list);
-+ }
-+
-+ return lck;
-+}
-+EXPORT_SYMBOL(htree_lock_alloc);
-+
-+/* free htree_lock node */
-+void
-+htree_lock_free(struct htree_lock *lck)
-+{
-+ BUG_ON(lck->lk_mode != HTREE_LOCK_INVAL);
-+ kfree(lck);
-+}
-+EXPORT_SYMBOL(htree_lock_free);
Index: linux-4.15.0/fs/ext4/namei.c
===================================================================
--- linux-4.15.0.orig/fs/ext4/namei.c
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
-Index: linux-4.15.0/include/linux/htree_lock.h
-===================================================================
---- /dev/null
-+++ linux-4.15.0/include/linux/htree_lock.h
-@@ -0,0 +1,187 @@
-+/*
-+ * include/linux/htree_lock.h
-+ *
-+ * Copyright (c) 2011, 2012, Intel Corporation.
-+ *
-+ * Author: Liang Zhen <liang@whamcloud.com>
-+ */
-+
-+/*
-+ * htree lock
-+ *
-+ * htree_lock is an advanced lock, it can support five lock modes (concept is
-+ * taken from DLM) and it's a sleeping lock.
-+ *
-+ * most common use case is:
-+ * - create a htree_lock_head for data
-+ * - each thread (contender) creates it's own htree_lock
-+ * - contender needs to call htree_lock(lock_node, mode) to protect data and
-+ * call htree_unlock to release lock
-+ *
-+ * Also, there is advanced use-case which is more complex, user can have
-+ * PW/PR lock on particular key, it's mostly used while user holding shared
-+ * lock on the htree (CW, CR)
-+ *
-+ * htree_lock(lock_node, HTREE_LOCK_CR); lock the htree with CR
-+ * htree_node_lock(lock_node, HTREE_LOCK_PR, key...); lock @key with PR
-+ * ...
-+ * htree_node_unlock(lock_node);; unlock the key
-+ *
-+ * Another tip is, we can have N-levels of this kind of keys, all we need to
-+ * do is specifying N-levels while creating htree_lock_head, then we can
-+ * lock/unlock a specific level by:
-+ * htree_node_lock(lock_node, mode1, key1, level1...);
-+ * do something;
-+ * htree_node_lock(lock_node, mode1, key2, level2...);
-+ * do something;
-+ * htree_node_unlock(lock_node, level2);
-+ * htree_node_unlock(lock_node, level1);
-+ *
-+ * NB: for multi-level, should be careful about locking order to avoid deadlock
-+ */
-+
-+#ifndef _LINUX_HTREE_LOCK_H
-+#define _LINUX_HTREE_LOCK_H
-+
-+#include <linux/list.h>
-+#include <linux/spinlock.h>
-+#include <linux/sched.h>
-+
-+/*
-+ * Lock Modes
-+ * more details can be found here:
-+ * http://en.wikipedia.org/wiki/Distributed_lock_manager
-+ */
-+typedef enum {
-+ HTREE_LOCK_EX = 0, /* exclusive lock: incompatible with all others */
-+ HTREE_LOCK_PW, /* protected write: allows only CR users */
-+ HTREE_LOCK_PR, /* protected read: allow PR, CR users */
-+ HTREE_LOCK_CW, /* concurrent write: allow CR, CW users */
-+ HTREE_LOCK_CR, /* concurrent read: allow all but EX users */
-+ HTREE_LOCK_MAX, /* number of lock modes */
-+} htree_lock_mode_t;
-+
-+#define HTREE_LOCK_NL HTREE_LOCK_MAX
-+#define HTREE_LOCK_INVAL 0xdead10c
-+
-+enum {
-+ HTREE_HBITS_MIN = 2,
-+ HTREE_HBITS_DEF = 14,
-+ HTREE_HBITS_MAX = 32,
-+};
-+
-+enum {
-+ HTREE_EVENT_DISABLE = (0),
-+ HTREE_EVENT_RD = (1 << HTREE_LOCK_PR),
-+ HTREE_EVENT_WR = (1 << HTREE_LOCK_PW),
-+ HTREE_EVENT_RDWR = (HTREE_EVENT_RD | HTREE_EVENT_WR),
-+};
-+
-+struct htree_lock;
-+
-+typedef void (*htree_event_cb_t)(void *target, void *event);
-+
-+struct htree_lock_child {
-+ struct list_head lc_list; /* granted list */
-+ htree_event_cb_t lc_callback; /* event callback */
-+ unsigned lc_events; /* event types */
-+};
-+
-+struct htree_lock_head {
-+ unsigned long lh_lock; /* bits lock */
-+ /* blocked lock list (htree_lock) */
-+ struct list_head lh_blocked_list;
-+ /* # key levels */
-+ u16 lh_depth;
-+ /* hash bits for key and limit number of locks */
-+ u16 lh_hbits;
-+ /* counters for blocked locks */
-+ u16 lh_nblocked[HTREE_LOCK_MAX];
-+ /* counters for granted locks */
-+ u16 lh_ngranted[HTREE_LOCK_MAX];
-+ /* private data */
-+ void *lh_private;
-+ /* array of children locks */
-+ struct htree_lock_child lh_children[0];
-+};
-+
-+/* htree_lock_node_t is child-lock for a specific key (ln_value) */
-+struct htree_lock_node {
-+ htree_lock_mode_t ln_mode;
-+ /* major hash key */
-+ u16 ln_major_key;
-+ /* minor hash key */
-+ u16 ln_minor_key;
-+ struct list_head ln_major_list;
-+ struct list_head ln_minor_list;
-+ /* alive list, all locks (granted, blocked, listening) are on it */
-+ struct list_head ln_alive_list;
-+ /* blocked list */
-+ struct list_head ln_blocked_list;
-+ /* granted list */
-+ struct list_head ln_granted_list;
-+ void *ln_ev_target;
-+};
-+
-+struct htree_lock {
-+ struct task_struct *lk_task;
-+ struct htree_lock_head *lk_head;
-+ void *lk_private;
-+ unsigned lk_depth;
-+ htree_lock_mode_t lk_mode;
-+ struct list_head lk_blocked_list;
-+ struct htree_lock_node lk_nodes[0];
-+};
-+
-+/* create a lock head, which stands for a resource */
-+struct htree_lock_head *htree_lock_head_alloc(unsigned depth,
-+ unsigned hbits, unsigned priv);
-+/* free a lock head */
-+void htree_lock_head_free(struct htree_lock_head *lhead);
-+/* register event callback for child lock at level @depth */
-+void htree_lock_event_attach(struct htree_lock_head *lhead, unsigned depth,
-+ unsigned events, htree_event_cb_t callback);
-+/* create a lock handle, which stands for a thread */
-+struct htree_lock *htree_lock_alloc(unsigned depth, unsigned pbytes);
-+/* free a lock handle */
-+void htree_lock_free(struct htree_lock *lck);
-+/* lock htree, when @wait is true, 0 is returned if the lock can't
-+ * be granted immediately */
-+int htree_lock_try(struct htree_lock *lck, struct htree_lock_head *lhead,
-+ htree_lock_mode_t mode, int wait);
-+/* unlock htree */
-+void htree_unlock(struct htree_lock *lck);
-+/* unlock and relock htree with @new_mode */
-+int htree_change_lock_try(struct htree_lock *lck,
-+ htree_lock_mode_t new_mode, int wait);
-+void htree_change_mode(struct htree_lock *lck, htree_lock_mode_t mode);
-+/* require child lock (key) of htree at level @dep, @event will be sent to all
-+ * listeners on this @key while lock being granted */
-+int htree_node_lock_try(struct htree_lock *lck, htree_lock_mode_t mode,
-+ u32 key, unsigned dep, int wait, void *event);
-+/* release child lock at level @dep, this lock will listen on it's key
-+ * if @event isn't NULL, event_cb will be called against @lck while granting
-+ * any other lock at level @dep with the same key */
-+void htree_node_unlock(struct htree_lock *lck, unsigned dep, void *event);
-+/* stop listening on child lock at level @dep */
-+void htree_node_stop_listen(struct htree_lock *lck, unsigned dep);
-+/* for debug */
-+void htree_lock_stat_print(int depth);
-+void htree_lock_stat_reset(void);
-+
-+#define htree_lock(lck, lh, mode) htree_lock_try(lck, lh, mode, 1)
-+#define htree_change_lock(lck, mode) htree_change_lock_try(lck, mode, 1)
-+
-+#define htree_lock_mode(lck) ((lck)->lk_mode)
-+
-+#define htree_node_lock(lck, mode, key, dep) \
-+ htree_node_lock_try(lck, mode, key, dep, 1, NULL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_granted(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_INVAL && \
-+ (lck)->lk_nodes[dep].ln_mode != HTREE_LOCK_NL)
-+/* this is only safe in thread context of lock owner */
-+#define htree_node_is_listening(lck, dep) \
-+ ((lck)->lk_nodes[dep].ln_mode == HTREE_LOCK_NL)
-+
-+#endif
rhel7.6/ext4-disable-mb-cache.patch
rhel7.6/ext4-nocmtime.patch
rhel7.6/ext4-large-dir.patch
+base/ext4-htree-lock.patch
rhel7.6/ext4-pdirop.patch
rhel7.6/ext4-max-dir-size.patch
rhel7.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
rhel7.6/ext4-disable-mb-cache.patch
rhel7.6/ext4-nocmtime.patch
rhel7.7/ext4-large-dir.patch
+base/ext4-htree-lock.patch
rhel7.7/ext4-pdirop.patch
rhel7.6/ext4-max-dir-size.patch
rhel7.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
rhel7.6/ext4-disable-mb-cache.patch
rhel7.6/ext4-nocmtime.patch
rhel7.7/ext4-large-dir.patch
+base/ext4-htree-lock.patch
rhel7.7/ext4-pdirop.patch
rhel7.6/ext4-max-dir-size.patch
rhel7.6/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
suse15/ext4-large-eas.patch
suse15/ext4-disable-mb-cache.patch
ubuntu18/ext4-nocmtime.patch
+base/ext4-htree-lock.patch
rhel8/ext4-pdirop.patch
suse15/ext4-max-dir-size.patch
suse15/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
ubuntu18/ext4-data-in-dirent.patch
ubuntu18/ext4-nocmtime.patch
+base/ext4-htree-lock.patch
ubuntu18/ext4-pdirop.patch
sles12sp2/ext4-max-dir-size.patch
ubuntu18/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
ubuntu18/ext4-data-in-dirent.patch
ubuntu18/ext4-nocmtime.patch
+base/ext4-htree-lock.patch
ubuntu18/ext4-pdirop.patch
sles12sp2/ext4-max-dir-size.patch
ubuntu18/ext4-corrupted-inode-block-bitmaps-handling-patches-001.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
ubuntu18/ext4-data-in-dirent.patch
rhel8/ext4-nocmtime.patch
+base/ext4-htree-lock.patch
rhel8/ext4-pdirop.patch
sles12sp3/ext4-max-dir-size.patch
rhel8/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
ubuntu18/ext4-data-in-dirent.patch
rhel8/ext4-nocmtime.patch
+base/ext4-htree-lock.patch
rhel8/ext4-pdirop.patch
sles12sp3/ext4-max-dir-size.patch
rhel8/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
sles12sp2/ext4-disable-mb-cache.patch
rhel7.6/ext4-nocmtime.patch
sles12sp2/ext4-large-dir.patch
+base/ext4-htree-lock.patch
sles12sp2/ext4-pdirop.patch
sles12sp2/ext4-max-dir-size.patch
sles12sp2/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
sles12sp3/ext4-disable-mb-cache.patch
rhel7.6/ext4-nocmtime.patch
sles12sp3/ext4-large-dir.patch
+base/ext4-htree-lock.patch
sles12sp3/ext4-pdirop.patch
sles12sp3/ext4-max-dir-size.patch
sles12sp3/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
ubuntu18/ext4-data-in-dirent.patch
rhel8/ext4-nocmtime.patch
+base/ext4-htree-lock.patch
rhel8/ext4-pdirop.patch
sles12sp3/ext4-max-dir-size.patch
rhel8/ext4-corrupted-inode-block-bitmaps-handling-patches.patch
rhel7.6/ext4-mballoc-pa-free-mismatch.patch
linux-5.4/ext4-data-in-dirent.patch
rhel8/ext4-nocmtime.patch
+base/ext4-htree-lock.patch
linux-5.4/ext4-pdirop.patch
sles12sp3/ext4-max-dir-size.patch
rhel8/ext4-corrupted-inode-block-bitmaps-handling-patches.patch