Whamcloud - gitweb
LU-1157 ldlm: replace waiting flock lists by hashes
[fs/lustre-release.git] / libcfs / include / libcfs / libcfs_hash.h
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * libcfs/include/libcfs/libcfs_hash.h
35  *
36  * Hashing routines
37  *
38  */
39
40 #ifndef __LIBCFS_HASH_H__
41 #define __LIBCFS_HASH_H__
42 /*
43  * Knuth recommends primes in approximately golden ratio to the maximum
44  * integer representable by a machine word for multiplicative hashing.
45  * Chuck Lever verified the effectiveness of this technique:
46  * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
47  *
48  * These primes are chosen to be bit-sparse, that is operations on
49  * them can use shifts and additions instead of multiplications for
50  * machines where multiplications are slow.
51  */
52 /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
53 #define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL
54 /*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
55 #define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
56
57 /*
58  * Ideally we would use HAVE_HASH_LONG for this, but on linux we configure
59  * the linux kernel and user space at the same time, so we need to differentiate
60  * between them explicitely. If this is not needed on other architectures, then
61  * we'll need to move the functions to archi specific headers.
62  */
63
64 #if (defined __linux__ && defined __KERNEL__)
65 #include <linux/hash.h>
66
67 #define cfs_hash_long(val, bits)    hash_long(val, bits)
68 #else
69 /* Fast hashing routine for a long.
70    (C) 2002 William Lee Irwin III, IBM */
71
72 #if BITS_PER_LONG == 32
73 /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
74 #define CFS_GOLDEN_RATIO_PRIME          CFS_GOLDEN_RATIO_PRIME_32
75 #elif BITS_PER_LONG == 64
76 /*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
77 #define CFS_GOLDEN_RATIO_PRIME          CFS_GOLDEN_RATIO_PRIME_64
78 #else
79 #error Define CFS_GOLDEN_RATIO_PRIME for your wordsize.
80 #endif
81
82 static inline unsigned long cfs_hash_long(unsigned long val, unsigned int bits)
83 {
84         unsigned long hash = val;
85
86 #if BITS_PER_LONG == 64
87         /*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
88         unsigned long n = hash;
89         n <<= 18;
90         hash -= n;
91         n <<= 33;
92         hash -= n;
93         n <<= 3;
94         hash += n;
95         n <<= 3;
96         hash -= n;
97         n <<= 4;
98         hash += n;
99         n <<= 2;
100         hash += n;
101 #else
102         /* On some cpus multiply is faster, on others gcc will do shifts */
103         hash *= CFS_GOLDEN_RATIO_PRIME;
104 #endif
105
106         /* High bits are more random, so use them. */
107         return hash >> (BITS_PER_LONG - bits);
108 }
109 #if 0
110 static inline unsigned long hash_ptr(void *ptr, unsigned int bits)
111 {
112         return cfs_hash_long((unsigned long)ptr, bits);
113 }
114 #endif
115
116 /* !(__linux__ && __KERNEL__) */
117 #endif
118
119 /** disable debug */
120 #define CFS_HASH_DEBUG_NONE         0
121 /** record hash depth and output to console when it's too deep,
122  *  computing overhead is low but consume more memory */
123 #define CFS_HASH_DEBUG_1            1
124 /** expensive, check key validation */
125 #define CFS_HASH_DEBUG_2            2
126
127 #define CFS_HASH_DEBUG_LEVEL        CFS_HASH_DEBUG_NONE
128
129 struct cfs_hash_ops;
130 struct cfs_hash_lock_ops;
131 struct cfs_hash_hlist_ops;
132
133 typedef union {
134         cfs_rwlock_t                rw;             /**< rwlock */
135         cfs_spinlock_t              spin;           /**< spinlock */
136 } cfs_hash_lock_t;
137
138 /**
139  * cfs_hash_bucket is a container of:
140  * - lock, couter ...
141  * - array of hash-head starting from hsb_head[0], hash-head can be one of
142  *   . cfs_hash_head_t
143  *   . cfs_hash_head_dep_t
144  *   . cfs_hash_dhead_t
145  *   . cfs_hash_dhead_dep_t
146  *   which depends on requirement of user
147  * - some extra bytes (caller can require it while creating hash)
148  */
149 typedef struct cfs_hash_bucket {
150         cfs_hash_lock_t             hsb_lock;       /**< bucket lock */
151         __u32                       hsb_count;      /**< current entries */
152         __u32                       hsb_version;    /**< change version */
153         unsigned int                hsb_index;      /**< index of bucket */
154         int                         hsb_depmax;     /**< max depth on bucket */
155         char                        hsb_head[0];    /**< hash-head array */
156 } cfs_hash_bucket_t;
157
158 /**
159  * cfs_hash bucket descriptor, it's normally in stack of caller
160  */
161 typedef struct cfs_hash_bd {
162         cfs_hash_bucket_t          *bd_bucket;      /**< address of bucket */
163         unsigned int                bd_offset;      /**< offset in bucket */
164 } cfs_hash_bd_t;
165
166 #define CFS_HASH_NAME_LEN           16      /**< default name length */
167 #define CFS_HASH_BIGNAME_LEN        64      /**< bigname for param tree */
168
169 #define CFS_HASH_BKT_BITS           3       /**< default bits of bucket */
170 #define CFS_HASH_BITS_MAX           30      /**< max bits of bucket */
171 #define CFS_HASH_BITS_MIN           CFS_HASH_BKT_BITS
172
173 /**
174  * common hash attributes.
175  */
176 enum cfs_hash_tag {
177         /**
178          * don't need any lock, caller will protect operations with it's
179          * own lock. With this flag:
180          *  . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK
181          *    will be ignored.
182          *  . Some functions will be disabled with this flag, i.e:
183          *    cfs_hash_for_each_empty, cfs_hash_rehash
184          */
185         CFS_HASH_NO_LOCK        = 1 << 0,
186         /** no bucket lock, use one spinlock to protect the whole hash */
187         CFS_HASH_NO_BKTLOCK     = 1 << 1,
188         /** rwlock to protect bucket */
189         CFS_HASH_RW_BKTLOCK     = 1 << 2,
190         /** spinlcok to protect bucket */
191         CFS_HASH_SPIN_BKTLOCK   = 1 << 3,
192         /** always add new item to tail */
193         CFS_HASH_ADD_TAIL       = 1 << 4,
194         /** hash-table doesn't have refcount on item */
195         CFS_HASH_NO_ITEMREF     = 1 << 5,
196         /** big name for param-tree */
197         CFS_HASH_BIGNAME        = 1 << 6,
198         /** track global count */
199         CFS_HASH_COUNTER        = 1 << 7,
200         /** rehash item by new key */
201         CFS_HASH_REHASH_KEY     = 1 << 8,
202         /** Enable dynamic hash resizing */
203         CFS_HASH_REHASH         = 1 << 9,
204         /** can shrink hash-size */
205         CFS_HASH_SHRINK         = 1 << 10,
206         /** assert hash is empty on exit */
207         CFS_HASH_ASSERT_EMPTY   = 1 << 11,
208         /** record hlist depth */
209         CFS_HASH_DEPTH          = 1 << 12,
210         /**
211          * rehash is always scheduled in a different thread, so current
212          * change on hash table is non-blocking
213          */
214         CFS_HASH_NBLK_CHANGE    = 1 << 13,
215         /** NB, we typed hs_flags as  __u16, please change it
216          * if you need to extend >=16 flags */
217 };
218
219 /** most used attributes */
220 #define CFS_HASH_DEFAULT       (CFS_HASH_RW_BKTLOCK | \
221                                 CFS_HASH_COUNTER | CFS_HASH_REHASH)
222
223 /**
224  * cfs_hash is a hash-table implementation for general purpose, it can support:
225  *    . two refcount modes
226  *      hash-table with & without refcount
227  *    . four lock modes
228  *      nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock
229  *    . general operations
230  *      lookup, add(add_tail or add_head), delete
231  *    . rehash
232  *      grows or shrink
233  *    . iteration
234  *      locked iteration and unlocked iteration
235  *    . bigname
236  *      support long name hash
237  *    . debug
238  *      trace max searching depth
239  *
240  * Rehash:
241  * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker)
242  * is spawned to handle the rehash in the background, it's possible that other
243  * processes can concurrently perform additions, deletions, and lookups
244  * without being blocked on rehash completion, because rehash will release
245  * the global wrlock for each bucket.
246  *
247  * rehash and iteration can't run at the same time because it's too tricky
248  * to keep both of them safe and correct.
249  * As they are relatively rare operations, so:
250  *   . if iteration is in progress while we try to launch rehash, then
251  *     it just giveup, iterator will launch rehash at the end.
252  *   . if rehash is in progress while we try to iterate the hash table,
253  *     then we just wait (shouldn't be very long time), anyway, nobody
254  *     should expect iteration of whole hash-table to be non-blocking.
255  *
256  * During rehashing, a (key,object) pair may be in one of two buckets,
257  * depending on whether the worker task has yet to transfer the object
258  * to its new location in the table. Lookups and deletions need to search both
259  * locations; additions must take care to only insert into the new bucket.
260  */
261
262 typedef struct cfs_hash {
263         /** serialize with rehash, or serialize all operations if
264          * the hash-table has CFS_HASH_NO_BKTLOCK */
265         cfs_hash_lock_t             hs_lock;
266         /** hash operations */
267         struct cfs_hash_ops        *hs_ops;
268         /** hash lock operations */
269         struct cfs_hash_lock_ops   *hs_lops;
270         /** hash list operations */
271         struct cfs_hash_hlist_ops  *hs_hops;
272         /** hash buckets-table */
273         cfs_hash_bucket_t         **hs_buckets;
274         /** total number of items on this hash-table */
275         cfs_atomic_t                hs_count;
276         /** hash flags, see cfs_hash_tag for detail */
277         __u16                       hs_flags;
278         /** # of extra-bytes for bucket, for user saving extended attributes */
279         __u16                       hs_extra_bytes;
280         /** wants to iterate */
281         __u8                        hs_iterating;
282         /** hash-table is dying */
283         __u8                        hs_exiting;
284         /** current hash bits */
285         __u8                        hs_cur_bits;
286         /** min hash bits */
287         __u8                        hs_min_bits;
288         /** max hash bits */
289         __u8                        hs_max_bits;
290         /** bits for rehash */
291         __u8                        hs_rehash_bits;
292         /** bits for each bucket */
293         __u8                        hs_bkt_bits;
294         /** resize min threshold */
295         __u16                       hs_min_theta;
296         /** resize max threshold */
297         __u16                       hs_max_theta;
298         /** resize count */
299         __u32                       hs_rehash_count;
300         /** # of iterators (caller of cfs_hash_for_each_*) */
301         __u32                       hs_iterators;
302         /** rehash workitem */
303         cfs_workitem_t              hs_rehash_wi;
304         /** refcount on this hash table */
305         cfs_atomic_t                hs_refcount;
306         /** rehash buckets-table */
307         cfs_hash_bucket_t         **hs_rehash_buckets;
308 #if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
309         /** serialize debug members */
310         cfs_spinlock_t              hs_dep_lock;
311         /** max depth */
312         unsigned int                hs_dep_max;
313         /** id of the deepest bucket */
314         unsigned int                hs_dep_bkt;
315         /** offset in the deepest bucket */
316         unsigned int                hs_dep_off;
317         /** bits when we found the max depth */
318         unsigned int                hs_dep_bits;
319         /** workitem to output max depth */
320         cfs_workitem_t              hs_dep_wi;
321 #endif
322         /** name of htable */
323         char                        hs_name[0];
324 } cfs_hash_t;
325
326 typedef struct cfs_hash_lock_ops {
327         /** lock the hash table */
328         void    (*hs_lock)(cfs_hash_lock_t *lock, int exclusive);
329         /** unlock the hash table */
330         void    (*hs_unlock)(cfs_hash_lock_t *lock, int exclusive);
331         /** lock the hash bucket */
332         void    (*hs_bkt_lock)(cfs_hash_lock_t *lock, int exclusive);
333         /** unlock the hash bucket */
334         void    (*hs_bkt_unlock)(cfs_hash_lock_t *lock, int exclusive);
335 } cfs_hash_lock_ops_t;
336
337 typedef struct cfs_hash_hlist_ops {
338         /** return hlist_head of hash-head of @bd */
339         cfs_hlist_head_t *(*hop_hhead)(cfs_hash_t *hs, cfs_hash_bd_t *bd);
340         /** return hash-head size */
341         int (*hop_hhead_size)(cfs_hash_t *hs);
342         /** add @hnode to hash-head of @bd */
343         int (*hop_hnode_add)(cfs_hash_t *hs,
344                              cfs_hash_bd_t *bd, cfs_hlist_node_t *hnode);
345         /** remove @hnode from hash-head of @bd */
346         int (*hop_hnode_del)(cfs_hash_t *hs,
347                              cfs_hash_bd_t *bd, cfs_hlist_node_t *hnode);
348 } cfs_hash_hlist_ops_t;
349
350 typedef struct cfs_hash_ops {
351         /** return hashed value from @key */
352         unsigned (*hs_hash)(cfs_hash_t *hs, const void *key, unsigned mask);
353         /** return key address of @hnode */
354         void *   (*hs_key)(cfs_hlist_node_t *hnode);
355         /** copy key from @hnode to @key */
356         void     (*hs_keycpy)(cfs_hlist_node_t *hnode, void *key);
357         /**
358          *  compare @key with key of @hnode
359          *  returns 1 on a match
360          */
361         int      (*hs_keycmp)(const void *key, cfs_hlist_node_t *hnode);
362         /** return object address of @hnode, i.e: container_of(...hnode) */
363         void *   (*hs_object)(cfs_hlist_node_t *hnode);
364         /** get refcount of item, always called with holding bucket-lock */
365         void     (*hs_get)(cfs_hash_t *hs, cfs_hlist_node_t *hnode);
366         /** release refcount of item */
367         void     (*hs_put)(cfs_hash_t *hs, cfs_hlist_node_t *hnode);
368         /** release refcount of item, always called with holding bucket-lock */
369         void     (*hs_put_locked)(cfs_hash_t *hs, cfs_hlist_node_t *hnode);
370         /** it's called before removing of @hnode */
371         void     (*hs_exit)(cfs_hash_t *hs, cfs_hlist_node_t *hnode);
372 } cfs_hash_ops_t;
373
374 /** total number of buckets in @hs */
375 #define CFS_HASH_NBKT(hs)       \
376         (1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits))
377
378 /** total number of buckets in @hs while rehashing */
379 #define CFS_HASH_RH_NBKT(hs)    \
380         (1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits))
381
382 /** number of hlist for in bucket */
383 #define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits)
384
385 /** total number of hlist in @hs */
386 #define CFS_HASH_NHLIST(hs)     (1U << (hs)->hs_cur_bits)
387
388 /** total number of hlist in @hs while rehashing */
389 #define CFS_HASH_RH_NHLIST(hs)  (1U << (hs)->hs_rehash_bits)
390
391 static inline int
392 cfs_hash_with_no_lock(cfs_hash_t *hs)
393 {
394         /* caller will serialize all operations for this hash-table */
395         return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0;
396 }
397
398 static inline int
399 cfs_hash_with_no_bktlock(cfs_hash_t *hs)
400 {
401         /* no bucket lock, one single lock to protect the hash-table */
402         return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0;
403 }
404
405 static inline int
406 cfs_hash_with_rw_bktlock(cfs_hash_t *hs)
407 {
408         /* rwlock to protect hash bucket */
409         return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0;
410 }
411
412 static inline int
413 cfs_hash_with_spin_bktlock(cfs_hash_t *hs)
414 {
415         /* spinlock to protect hash bucket */
416         return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0;
417 }
418
419 static inline int
420 cfs_hash_with_add_tail(cfs_hash_t *hs)
421 {
422         return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0;
423 }
424
425 static inline int
426 cfs_hash_with_no_itemref(cfs_hash_t *hs)
427 {
428         /* hash-table doesn't keep refcount on item,
429          * item can't be removed from hash unless it's
430          * ZERO refcount */
431         return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0;
432 }
433
434 static inline int
435 cfs_hash_with_bigname(cfs_hash_t *hs)
436 {
437         return (hs->hs_flags & CFS_HASH_BIGNAME) != 0;
438 }
439
440 static inline int
441 cfs_hash_with_counter(cfs_hash_t *hs)
442 {
443         return (hs->hs_flags & CFS_HASH_COUNTER) != 0;
444 }
445
446 static inline int
447 cfs_hash_with_rehash(cfs_hash_t *hs)
448 {
449         return (hs->hs_flags & CFS_HASH_REHASH) != 0;
450 }
451
452 static inline int
453 cfs_hash_with_rehash_key(cfs_hash_t *hs)
454 {
455         return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0;
456 }
457
458 static inline int
459 cfs_hash_with_shrink(cfs_hash_t *hs)
460 {
461         return (hs->hs_flags & CFS_HASH_SHRINK) != 0;
462 }
463
464 static inline int
465 cfs_hash_with_assert_empty(cfs_hash_t *hs)
466 {
467         return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0;
468 }
469
470 static inline int
471 cfs_hash_with_depth(cfs_hash_t *hs)
472 {
473         return (hs->hs_flags & CFS_HASH_DEPTH) != 0;
474 }
475
476 static inline int
477 cfs_hash_with_nblk_change(cfs_hash_t *hs)
478 {
479         return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0;
480 }
481
482 static inline int
483 cfs_hash_is_exiting(cfs_hash_t *hs)
484 {       /* cfs_hash_destroy is called */
485         return hs->hs_exiting;
486 }
487
488 static inline int
489 cfs_hash_is_rehashing(cfs_hash_t *hs)
490 {       /* rehash is launched */
491         return hs->hs_rehash_bits != 0;
492 }
493
494 static inline int
495 cfs_hash_is_iterating(cfs_hash_t *hs)
496 {       /* someone is calling cfs_hash_for_each_* */
497         return hs->hs_iterating || hs->hs_iterators != 0;
498 }
499
500 static inline int
501 cfs_hash_bkt_size(cfs_hash_t *hs)
502 {
503         return offsetof(cfs_hash_bucket_t, hsb_head[0]) +
504                hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) +
505                hs->hs_extra_bytes;
506 }
507
508 #define CFS_HOP(hs, op)           (hs)->hs_ops->hs_ ## op
509
510 static inline unsigned
511 cfs_hash_id(cfs_hash_t *hs, const void *key, unsigned mask)
512 {
513         return CFS_HOP(hs, hash)(hs, key, mask);
514 }
515
516 static inline void *
517 cfs_hash_key(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
518 {
519         return CFS_HOP(hs, key)(hnode);
520 }
521
522 static inline void
523 cfs_hash_keycpy(cfs_hash_t *hs, cfs_hlist_node_t *hnode, void *key)
524 {
525         if (CFS_HOP(hs, keycpy) != NULL)
526                 CFS_HOP(hs, keycpy)(hnode, key);
527 }
528
529 /**
530  * Returns 1 on a match,
531  */
532 static inline int
533 cfs_hash_keycmp(cfs_hash_t *hs, const void *key, cfs_hlist_node_t *hnode)
534 {
535         return CFS_HOP(hs, keycmp)(key, hnode);
536 }
537
538 static inline void *
539 cfs_hash_object(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
540 {
541         return CFS_HOP(hs, object)(hnode);
542 }
543
544 static inline void
545 cfs_hash_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
546 {
547         return CFS_HOP(hs, get)(hs, hnode);
548 }
549
550 static inline void
551 cfs_hash_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
552 {
553         LASSERT(CFS_HOP(hs, put_locked) != NULL);
554
555         return CFS_HOP(hs, put_locked)(hs, hnode);
556 }
557
558 static inline void
559 cfs_hash_put(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
560 {
561         LASSERT(CFS_HOP(hs, put) != NULL);
562
563         return CFS_HOP(hs, put)(hs, hnode);
564 }
565
566 static inline void
567 cfs_hash_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
568 {
569         if (CFS_HOP(hs, exit))
570                 CFS_HOP(hs, exit)(hs, hnode);
571 }
572
573 static inline void cfs_hash_lock(cfs_hash_t *hs, int excl)
574 {
575         hs->hs_lops->hs_lock(&hs->hs_lock, excl);
576 }
577
578 static inline void cfs_hash_unlock(cfs_hash_t *hs, int excl)
579 {
580         hs->hs_lops->hs_unlock(&hs->hs_lock, excl);
581 }
582
583 static inline int cfs_hash_dec_and_lock(cfs_hash_t *hs,
584                                         cfs_atomic_t *condition)
585 {
586         LASSERT(cfs_hash_with_no_bktlock(hs));
587         return cfs_atomic_dec_and_lock(condition, &hs->hs_lock.spin);
588 }
589
590 static inline void cfs_hash_bd_lock(cfs_hash_t *hs,
591                                     cfs_hash_bd_t *bd, int excl)
592 {
593         hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl);
594 }
595
596 static inline void cfs_hash_bd_unlock(cfs_hash_t *hs,
597                                       cfs_hash_bd_t *bd, int excl)
598 {
599         hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl);
600 }
601
602 /**
603  * operations on cfs_hash bucket (bd: bucket descriptor),
604  * they are normally for hash-table without rehash
605  */
606 void cfs_hash_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bd);
607
608 static inline void cfs_hash_bd_get_and_lock(cfs_hash_t *hs, const void *key,
609                                             cfs_hash_bd_t *bd, int excl)
610 {
611         cfs_hash_bd_get(hs, key, bd);
612         cfs_hash_bd_lock(hs, bd, excl);
613 }
614
615 static inline unsigned cfs_hash_bd_index_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
616 {
617         return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits);
618 }
619
620 static inline void cfs_hash_bd_index_set(cfs_hash_t *hs,
621                                          unsigned index, cfs_hash_bd_t *bd)
622 {
623         bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits];
624         bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U);
625 }
626
627 static inline void *
628 cfs_hash_bd_extra_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
629 {
630         return (void *)bd->bd_bucket +
631                cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
632 }
633
634 static inline __u32
635 cfs_hash_bd_version_get(cfs_hash_bd_t *bd)
636 {
637         /* need hold cfs_hash_bd_lock */
638         return bd->bd_bucket->hsb_version;
639 }
640
641 static inline __u32
642 cfs_hash_bd_count_get(cfs_hash_bd_t *bd)
643 {
644         /* need hold cfs_hash_bd_lock */
645         return bd->bd_bucket->hsb_count;
646 }
647
648 static inline int
649 cfs_hash_bd_depmax_get(cfs_hash_bd_t *bd)
650 {
651         return bd->bd_bucket->hsb_depmax;
652 }
653
654 static inline int
655 cfs_hash_bd_compare(cfs_hash_bd_t *bd1, cfs_hash_bd_t *bd2)
656 {
657         if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index)
658                 return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index;
659
660         if (bd1->bd_offset != bd2->bd_offset)
661                 return bd1->bd_offset - bd2->bd_offset;
662
663         return 0;
664 }
665
666 void cfs_hash_bd_add_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
667                             cfs_hlist_node_t *hnode);
668 void cfs_hash_bd_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
669                             cfs_hlist_node_t *hnode);
670 void cfs_hash_bd_move_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd_old,
671                              cfs_hash_bd_t *bd_new, cfs_hlist_node_t *hnode);
672
673 static inline int cfs_hash_bd_dec_and_lock(cfs_hash_t *hs, cfs_hash_bd_t *bd,
674                                            cfs_atomic_t *condition)
675 {
676         LASSERT(cfs_hash_with_spin_bktlock(hs));
677         return cfs_atomic_dec_and_lock(condition,
678                                        &bd->bd_bucket->hsb_lock.spin);
679 }
680
681 static inline cfs_hlist_head_t *cfs_hash_bd_hhead(cfs_hash_t *hs,
682                                                   cfs_hash_bd_t *bd)
683 {
684         return hs->hs_hops->hop_hhead(hs, bd);
685 }
686
687 cfs_hlist_node_t *cfs_hash_bd_lookup_locked(cfs_hash_t *hs,
688                                             cfs_hash_bd_t *bd, const void *key);
689 cfs_hlist_node_t *cfs_hash_bd_findadd_locked(cfs_hash_t *hs,
690                                              cfs_hash_bd_t *bd, const void *key,
691                                              cfs_hlist_node_t *hnode,
692                                              int insist_add);
693 cfs_hlist_node_t *cfs_hash_bd_finddel_locked(cfs_hash_t *hs,
694                                              cfs_hash_bd_t *bd, const void *key,
695                                              cfs_hlist_node_t *hnode);
696
697 /**
698  * operations on cfs_hash bucket (bd: bucket descriptor),
699  * they are safe for hash-table with rehash
700  */
701 void cfs_hash_dual_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bds);
702 void cfs_hash_dual_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
703 void cfs_hash_dual_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
704
705 static inline void cfs_hash_dual_bd_get_and_lock(cfs_hash_t *hs, const void *key,
706                                                  cfs_hash_bd_t *bds, int excl)
707 {
708         cfs_hash_dual_bd_get(hs, key, bds);
709         cfs_hash_dual_bd_lock(hs, bds, excl);
710 }
711
712 cfs_hlist_node_t *cfs_hash_dual_bd_lookup_locked(cfs_hash_t *hs,
713                                                  cfs_hash_bd_t *bds,
714                                                  const void *key);
715 cfs_hlist_node_t *cfs_hash_dual_bd_findadd_locked(cfs_hash_t *hs,
716                                                   cfs_hash_bd_t *bds,
717                                                   const void *key,
718                                                   cfs_hlist_node_t *hnode,
719                                                   int insist_add);
720 cfs_hlist_node_t *cfs_hash_dual_bd_finddel_locked(cfs_hash_t *hs,
721                                                   cfs_hash_bd_t *bds,
722                                                   const void *key,
723                                                   cfs_hlist_node_t *hnode);
724
725 /* Hash init/cleanup functions */
726 cfs_hash_t *cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
727                             unsigned bkt_bits, unsigned extra_bytes,
728                             unsigned min_theta, unsigned max_theta,
729                             cfs_hash_ops_t *ops, unsigned flags);
730
731 cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs);
732 void cfs_hash_putref(cfs_hash_t *hs);
733
734 /* Hash addition functions */
735 void cfs_hash_add(cfs_hash_t *hs, const void *key,
736                   cfs_hlist_node_t *hnode);
737 int cfs_hash_add_unique(cfs_hash_t *hs, const void *key,
738                         cfs_hlist_node_t *hnode);
739 void *cfs_hash_findadd_unique(cfs_hash_t *hs, const void *key,
740                               cfs_hlist_node_t *hnode);
741
742 /* Hash deletion functions */
743 void *cfs_hash_del(cfs_hash_t *hs, const void *key, cfs_hlist_node_t *hnode);
744 void *cfs_hash_del_key(cfs_hash_t *hs, const void *key);
745
746 /* Hash lookup/for_each functions */
747 #define CFS_HASH_LOOP_HOG       1024
748
749 typedef int (*cfs_hash_for_each_cb_t)(cfs_hash_t *hs, cfs_hash_bd_t *bd,
750                                       cfs_hlist_node_t *node, void *data);
751 void *cfs_hash_lookup(cfs_hash_t *hs, const void *key);
752 void cfs_hash_for_each(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
753 void cfs_hash_for_each_safe(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
754 int  cfs_hash_for_each_nolock(cfs_hash_t *hs,
755                               cfs_hash_for_each_cb_t, void *data);
756 int  cfs_hash_for_each_empty(cfs_hash_t *hs,
757                              cfs_hash_for_each_cb_t, void *data);
758 void cfs_hash_for_each_key(cfs_hash_t *hs, const void *key,
759                            cfs_hash_for_each_cb_t, void *data);
760 typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
761 void cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t, void *data);
762
763 void cfs_hash_hlist_for_each(cfs_hash_t *hs, unsigned hindex,
764                              cfs_hash_for_each_cb_t, void *data);
765 int  cfs_hash_is_empty(cfs_hash_t *hs);
766 __u64 cfs_hash_size_get(cfs_hash_t *hs);
767
768 /*
769  * Rehash - Theta is calculated to be the average chained
770  * hash depth assuming a perfectly uniform hash funcion.
771  */
772 void cfs_hash_rehash_cancel_locked(cfs_hash_t *hs);
773 void cfs_hash_rehash_cancel(cfs_hash_t *hs);
774 int  cfs_hash_rehash(cfs_hash_t *hs, int do_rehash);
775 void cfs_hash_rehash_key(cfs_hash_t *hs, const void *old_key,
776                          void *new_key, cfs_hlist_node_t *hnode);
777
778 #if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1
779 /* Validate hnode references the correct key */
780 static inline void
781 cfs_hash_key_validate(cfs_hash_t *hs, const void *key,
782                       cfs_hlist_node_t *hnode)
783 {
784         LASSERT(cfs_hash_keycmp(hs, key, hnode));
785 }
786
787 /* Validate hnode is in the correct bucket */
788 static inline void
789 cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
790                          cfs_hlist_node_t *hnode)
791 {
792         cfs_hash_bd_t   bds[2];
793
794         cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds);
795         LASSERT(bds[0].bd_bucket == bd->bd_bucket ||
796                 bds[1].bd_bucket == bd->bd_bucket);
797 }
798
799 #else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */
800
801 static inline void
802 cfs_hash_key_validate(cfs_hash_t *hs, const void *key,
803                       cfs_hlist_node_t *hnode) {}
804
805 static inline void
806 cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
807                          cfs_hlist_node_t *hnode) {}
808
809 #endif /* CFS_HASH_DEBUG_LEVEL */
810
811 #define CFS_HASH_THETA_BITS  10
812 #define CFS_HASH_MIN_THETA  (1U << (CFS_HASH_THETA_BITS - 1))
813 #define CFS_HASH_MAX_THETA  (1U << (CFS_HASH_THETA_BITS + 1))
814
815 /* Return integer component of theta */
816 static inline int __cfs_hash_theta_int(int theta)
817 {
818         return (theta >> CFS_HASH_THETA_BITS);
819 }
820
821 /* Return a fractional value between 0 and 999 */
822 static inline int __cfs_hash_theta_frac(int theta)
823 {
824         return ((theta * 1000) >> CFS_HASH_THETA_BITS) -
825                (__cfs_hash_theta_int(theta) * 1000);
826 }
827
828 static inline int __cfs_hash_theta(cfs_hash_t *hs)
829 {
830         return (cfs_atomic_read(&hs->hs_count) <<
831                 CFS_HASH_THETA_BITS) >> hs->hs_cur_bits;
832 }
833
834 static inline void __cfs_hash_set_theta(cfs_hash_t *hs, int min, int max)
835 {
836         LASSERT(min < max);
837         hs->hs_min_theta = (__u16)min;
838         hs->hs_max_theta = (__u16)max;
839 }
840
841 /* Generic debug formatting routines mainly for proc handler */
842 int cfs_hash_debug_header(char *str, int size);
843 int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size);
844
845 /*
846  * Generic djb2 hash algorithm for character arrays.
847  */
848 static inline unsigned
849 cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask)
850 {
851         unsigned i, hash = 5381;
852
853         LASSERT(key != NULL);
854
855         for (i = 0; i < size; i++)
856                 hash = hash * 33 + ((char *)key)[i];
857
858         return (hash & mask);
859 }
860
861 /*
862  * Generic u32 hash algorithm.
863  */
864 static inline unsigned
865 cfs_hash_u32_hash(const __u32 key, unsigned mask)
866 {
867         return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
868 }
869
870 /*
871  * Generic u64 hash algorithm.
872  */
873 static inline unsigned
874 cfs_hash_u64_hash(const __u64 key, unsigned mask)
875 {
876         return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
877 }
878
879 /** iterate over all buckets in @bds (array of cfs_hash_bd_t) */
880 #define cfs_hash_for_each_bd(bds, n, i) \
881         for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++)
882
883 /** iterate over all buckets of @hs */
884 #define cfs_hash_for_each_bucket(hs, bd, pos)                   \
885         for (pos = 0;                                           \
886              pos < CFS_HASH_NBKT(hs) &&                         \
887              ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++)
888
889 /** iterate over all hlist of bucket @bd */
890 #define cfs_hash_bd_for_each_hlist(hs, bd, hlist)               \
891         for ((bd)->bd_offset = 0;                               \
892              (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) &&       \
893              (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL;       \
894              (bd)->bd_offset++)
895
896 /* !__LIBCFS__HASH_H__ */
897 #endif