Whamcloud - gitweb
b=18551 libcfs hash
[fs/lustre-release.git] / libcfs / include / libcfs / libcfs_hash.h
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/include/libcfs/libcfs_hash.h
37  *
38  * Hashing routines
39  *
40  */
41
42 #ifndef __LIBCFS_HASH_H__
43 #define __LIBCFS_HASH_H__
44 /*
45  * Knuth recommends primes in approximately golden ratio to the maximum
46  * integer representable by a machine word for multiplicative hashing.
47  * Chuck Lever verified the effectiveness of this technique:
48  * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
49  *
50  * These primes are chosen to be bit-sparse, that is operations on
51  * them can use shifts and additions instead of multiplications for
52  * machines where multiplications are slow.
53  */
54 /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
55 #define CFS_GOLDEN_RATIO_PRIME_32 0x9e370001UL
56 /*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
57 #define CFS_GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL
58
59 /*
60  * Ideally we would use HAVE_HASH_LONG for this, but on linux we configure
61  * the linux kernel and user space at the same time, so we need to differentiate
62  * between them explicitely. If this is not needed on other architectures, then
63  * we'll need to move the functions to archi specific headers.
64  */
65
66 #if (defined __linux__ && defined __KERNEL__)
67 #include <linux/hash.h>
68
69 #define cfs_hash_long(val, bits)    hash_long(val, bits)
70 #else
71 /* Fast hashing routine for a long.
72    (C) 2002 William Lee Irwin III, IBM */
73
74 #if BITS_PER_LONG == 32
75 /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
76 #define CFS_GOLDEN_RATIO_PRIME          CFS_GOLDEN_RATIO_PRIME_32
77 #elif BITS_PER_LONG == 64
78 /*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
79 #define CFS_GOLDEN_RATIO_PRIME          CFS_GOLDEN_RATIO_PRIME_64
80 #else
81 #error Define CFS_GOLDEN_RATIO_PRIME for your wordsize.
82 #endif
83
84 static inline unsigned long cfs_hash_long(unsigned long val, unsigned int bits)
85 {
86         unsigned long hash = val;
87
88 #if BITS_PER_LONG == 64
89         /*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
90         unsigned long n = hash;
91         n <<= 18;
92         hash -= n;
93         n <<= 33;
94         hash -= n;
95         n <<= 3;
96         hash += n;
97         n <<= 3;
98         hash -= n;
99         n <<= 4;
100         hash += n;
101         n <<= 2;
102         hash += n;
103 #else
104         /* On some cpus multiply is faster, on others gcc will do shifts */
105         hash *= CFS_GOLDEN_RATIO_PRIME;
106 #endif
107
108         /* High bits are more random, so use them. */
109         return hash >> (BITS_PER_LONG - bits);
110 }
111 #if 0
112 static inline unsigned long hash_ptr(void *ptr, unsigned int bits)
113 {
114         return cfs_hash_long((unsigned long)ptr, bits);
115 }
116 #endif
117
118 /* !(__linux__ && __KERNEL__) */
119 #endif
120
121 /** disable debug */
122 #define CFS_HASH_DEBUG_NONE         0
123 /** record hash depth and output to console when it's too deep,
124  *  computing overhead is low but consume more memory */
125 #define CFS_HASH_DEBUG_1            1
126 /** expensive, check key validation */
127 #define CFS_HASH_DEBUG_2            2
128
129 #define CFS_HASH_DEBUG_LEVEL        CFS_HASH_DEBUG_NONE
130
131 struct cfs_hash_ops;
132 struct cfs_hash_lock_ops;
133 struct cfs_hash_hlist_ops;
134
135 typedef union {
136         cfs_rwlock_t                rw;             /**< rwlock */
137         cfs_spinlock_t              spin;           /**< spinlock */
138 } cfs_hash_lock_t;
139
140 /**
141  * cfs_hash_bucket is a container of:
142  * - lock, couter ...
143  * - array of hash-head starting from hsb_head[0], hash-head can be one of
144  *   . cfs_hash_head_t
145  *   . cfs_hash_head_dep_t
146  *   . cfs_hash_dhead_t
147  *   . cfs_hash_dhead_dep_t
148  *   which depends on requirement of user
149  * - some extra bytes (caller can require it while creating hash)
150  */
151 typedef struct cfs_hash_bucket {
152         cfs_hash_lock_t             hsb_lock;       /**< bucket lock */
153         __u32                       hsb_count;      /**< current entries */
154         __u32                       hsb_version;    /**< change version */
155         unsigned int                hsb_index;      /**< index of bucket */
156         int                         hsb_depmax;     /**< max depth on bucket */
157         char                        hsb_head[0];    /**< hash-head array */
158 } cfs_hash_bucket_t;
159
160 /**
161  * cfs_hash bucket descriptor, it's normally in stack of caller
162  */
163 typedef struct cfs_hash_bd {
164         cfs_hash_bucket_t          *bd_bucket;      /**< address of bucket */
165         unsigned int                bd_offset;      /**< offset in bucket */
166 } cfs_hash_bd_t;
167
168 #define CFS_HASH_NAME_LEN           16      /**< default name length */
169 #define CFS_HASH_BIGNAME_LEN        64      /**< bigname for param tree */
170
171 #define CFS_HASH_BKT_BITS           3       /**< default bits of bucket */
172 #define CFS_HASH_BITS_MAX           30      /**< max bits of bucket */
173 #define CFS_HASH_BITS_MIN           CFS_HASH_BKT_BITS
174
175 /**
176  * common hash attributes.
177  */
178 enum cfs_hash_tag {
179         /**
180          * don't need any lock, caller will protect operations with it's
181          * own lock. With this flag:
182          *  . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK
183          *    will be ignored.
184          *  . Some functions will be disabled with this flag, i.e:
185          *    cfs_hash_for_each_empty, cfs_hash_rehash
186          */
187         CFS_HASH_NO_LOCK        = 1 << 0,
188         /** no bucket lock, use one spinlock to protect the whole hash */
189         CFS_HASH_NO_BKTLOCK     = 1 << 1,
190         /** rwlock to protect bucket */
191         CFS_HASH_RW_BKTLOCK     = 1 << 2,
192         /** spinlcok to protect bucket */
193         CFS_HASH_SPIN_BKTLOCK   = 1 << 3,
194         /** always add new item to tail */
195         CFS_HASH_ADD_TAIL       = 1 << 4,
196         /** hash-table doesn't have refcount on item */
197         CFS_HASH_NO_ITEMREF     = 1 << 5,
198         /** big name for param-tree */
199         CFS_HASH_BIGNAME        = 1 << 6,
200         /** track global count */
201         CFS_HASH_COUNTER        = 1 << 7,
202         /** rehash item by new key */
203         CFS_HASH_REHASH_KEY     = 1 << 8,
204         /** Enable dynamic hash resizing */
205         CFS_HASH_REHASH         = 1 << 9,
206         /** can shrink hash-size */
207         CFS_HASH_SHRINK         = 1 << 10,
208         /** assert hash is empty on exit */
209         CFS_HASH_ASSERT_EMPTY   = 1 << 11,
210         /** record hlist depth */
211         CFS_HASH_DEPTH          = 1 << 12,
212         /**
213          * rehash is always scheduled in a different thread, so current
214          * change on hash table is non-blocking
215          */
216         CFS_HASH_NBLK_CHANGE    = 1 << 13,
217         /** NB, we typed hs_flags as  __u16, please change it
218          * if you need to extend >=16 flags */
219 };
220
221 /** most used attributes */
222 #define CFS_HASH_DEFAULT       (CFS_HASH_RW_BKTLOCK | \
223                                 CFS_HASH_COUNTER | CFS_HASH_REHASH)
224
225 /**
226  * cfs_hash is a hash-table implementation for general purpose, it can support:
227  *    . two refcount modes
228  *      hash-table with & without refcount
229  *    . four lock modes
230  *      nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock
231  *    . general operations
232  *      lookup, add(add_tail or add_head), delete
233  *    . rehash
234  *      grows or shrink
235  *    . iteration
236  *      locked iteration and unlocked iteration
237  *    . bigname
238  *      support long name hash
239  *    . debug
240  *      trace max searching depth
241  *
242  * Rehash:
243  * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker)
244  * is spawned to handle the rehash in the background, it's possible that other
245  * processes can concurrently perform additions, deletions, and lookups
246  * without being blocked on rehash completion, because rehash will release
247  * the global wrlock for each bucket.
248  *
249  * rehash and iteration can't run at the same time because it's too tricky
250  * to keep both of them safe and correct.
251  * As they are relatively rare operations, so:
252  *   . if iteration is in progress while we try to launch rehash, then
253  *     it just giveup, iterator will launch rehash at the end.
254  *   . if rehash is in progress while we try to iterate the hash table,
255  *     then we just wait (shouldn't be very long time), anyway, nobody
256  *     should expect iteration of whole hash-table to be non-blocking.
257  *
258  * During rehashing, a (key,object) pair may be in one of two buckets,
259  * depending on whether the worker task has yet to transfer the object
260  * to its new location in the table. Lookups and deletions need to search both
261  * locations; additions must take care to only insert into the new bucket.
262  */
263
264 typedef struct cfs_hash {
265         /** serialize with rehash, or serialize all operations if
266          * the hash-table has CFS_HASH_NO_BKTLOCK */
267         cfs_hash_lock_t             hs_lock;
268         /** hash operations */
269         struct cfs_hash_ops        *hs_ops;
270         /** hash lock operations */
271         struct cfs_hash_lock_ops   *hs_lops;
272         /** hash list operations */
273         struct cfs_hash_hlist_ops  *hs_hops;
274         /** hash buckets-table */
275         cfs_hash_bucket_t         **hs_buckets;
276         /** total number of items on this hash-table */
277         cfs_atomic_t                hs_count;
278         /** hash flags, see cfs_hash_tag for detail */
279         __u16                       hs_flags;
280         /** # of extra-bytes for bucket, for user saving extended attributes */
281         __u16                       hs_extra_bytes;
282         /** wants to iterate */
283         __u8                        hs_iterating;
284         /** hash-table is dying */
285         __u8                        hs_exiting;
286         /** current hash bits */
287         __u8                        hs_cur_bits;
288         /** min hash bits */
289         __u8                        hs_min_bits;
290         /** max hash bits */
291         __u8                        hs_max_bits;
292         /** bits for rehash */
293         __u8                        hs_rehash_bits;
294         /** bits for each bucket */
295         __u8                        hs_bkt_bits;
296         /** resize min threshold */
297         __u16                       hs_min_theta;
298         /** resize max threshold */
299         __u16                       hs_max_theta;
300         /** resize count */
301         __u32                       hs_rehash_count;
302         /** # of iterators (caller of cfs_hash_for_each_*) */
303         __u32                       hs_iterators;
304         /** rehash workitem */
305         cfs_workitem_t              hs_rehash_wi;
306         /** refcount on this hash table */
307         cfs_atomic_t                hs_refcount;
308         /** rehash buckets-table */
309         cfs_hash_bucket_t         **hs_rehash_buckets;
310 #if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1
311         /** serialize debug members */
312         cfs_spinlock_t              hs_dep_lock;
313         /** max depth */
314         unsigned int                hs_dep_max;
315         /** id of the deepest bucket */
316         unsigned int                hs_dep_bkt;
317         /** offset in the deepest bucket */
318         unsigned int                hs_dep_off;
319         /** bits when we found the max depth */
320         unsigned int                hs_dep_bits;
321         /** workitem to output max depth */
322         cfs_workitem_t              hs_dep_wi;
323 #endif
324         /** name of htable */
325         char                        hs_name[0];
326 } cfs_hash_t;
327
328 typedef struct cfs_hash_lock_ops {
329         /** lock the hash table */
330         void    (*hs_lock)(cfs_hash_lock_t *lock, int exclusive);
331         /** unlock the hash table */
332         void    (*hs_unlock)(cfs_hash_lock_t *lock, int exclusive);
333         /** lock the hash bucket */
334         void    (*hs_bkt_lock)(cfs_hash_lock_t *lock, int exclusive);
335         /** unlock the hash bucket */
336         void    (*hs_bkt_unlock)(cfs_hash_lock_t *lock, int exclusive);
337 } cfs_hash_lock_ops_t;
338
339 typedef struct cfs_hash_hlist_ops {
340         /** return hlist_head of hash-head of @bd */
341         cfs_hlist_head_t *(*hop_hhead)(cfs_hash_t *hs, cfs_hash_bd_t *bd);
342         /** return hash-head size */
343         int (*hop_hhead_size)(cfs_hash_t *hs);
344         /** add @hnode to hash-head of @bd */
345         int (*hop_hnode_add)(cfs_hash_t *hs,
346                              cfs_hash_bd_t *bd, cfs_hlist_node_t *hnode);
347         /** remove @hnode from hash-head of @bd */
348         int (*hop_hnode_del)(cfs_hash_t *hs,
349                              cfs_hash_bd_t *bd, cfs_hlist_node_t *hnode);
350 } cfs_hash_hlist_ops_t;
351
352 typedef struct cfs_hash_ops {
353         /** return hashed value from @key */
354         unsigned (*hs_hash)(cfs_hash_t *hs, void *key, unsigned mask);
355         /** return key address of @hnode */
356         void *   (*hs_key)(cfs_hlist_node_t *hnode);
357         /** copy key from @hnode to @key */
358         void     (*hs_keycpy)(cfs_hlist_node_t *hnode, void *key);
359         /** compare @key with key of @hnode */
360         int      (*hs_keycmp)(void *key, cfs_hlist_node_t *hnode);
361         /** return object address of @hnode, i.e: container_of(...hnode) */
362         void *   (*hs_object)(cfs_hlist_node_t *hnode);
363         /** get refcount of item, always called with holding bucket-lock */
364         void *   (*hs_get)(cfs_hlist_node_t *hnode);
365         /** release refcount of item */
366         void *   (*hs_put)(cfs_hlist_node_t *hnode);
367         /** release refcount of item, always called with holding bucket-lock */
368         void *   (*hs_put_locked)(cfs_hlist_node_t *hnode);
369         /** it's called before removing of @hnode */
370         void     (*hs_exit)(cfs_hlist_node_t *hnode);
371 } cfs_hash_ops_t;
372
373 /** total number of buckets in @hs */
374 #define CFS_HASH_NBKT(hs)       \
375         (1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits))
376
377 /** total number of buckets in @hs while rehashing */
378 #define CFS_HASH_RH_NBKT(hs)    \
379         (1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits))
380
381 /** number of hlist for in bucket */
382 #define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits)
383
384 /** total number of hlist in @hs */
385 #define CFS_HASH_NHLIST(hs)     (1U << (hs)->hs_cur_bits)
386
387 /** total number of hlist in @hs while rehashing */
388 #define CFS_HASH_RH_NHLIST(hs)  (1U << (hs)->hs_rehash_bits)
389
390 static inline int
391 cfs_hash_with_no_lock(cfs_hash_t *hs)
392 {
393         /* caller will serialize all operations for this hash-table */
394         return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0;
395 }
396
397 static inline int
398 cfs_hash_with_no_bktlock(cfs_hash_t *hs)
399 {
400         /* no bucket lock, one single lock to protect the hash-table */
401         return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0;
402 }
403
404 static inline int
405 cfs_hash_with_rw_bktlock(cfs_hash_t *hs)
406 {
407         /* rwlock to protect hash bucket */
408         return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0;
409 }
410
411 static inline int
412 cfs_hash_with_spin_bktlock(cfs_hash_t *hs)
413 {
414         /* spinlock to protect hash bucket */
415         return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0;
416 }
417
418 static inline int
419 cfs_hash_with_add_tail(cfs_hash_t *hs)
420 {
421         return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0;
422 }
423
424 static inline int
425 cfs_hash_with_no_itemref(cfs_hash_t *hs)
426 {
427         /* hash-table doesn't keep refcount on item,
428          * item can't be removed from hash unless it's
429          * ZERO refcount */
430         return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0;
431 }
432
433 static inline int
434 cfs_hash_with_bigname(cfs_hash_t *hs)
435 {
436         return (hs->hs_flags & CFS_HASH_BIGNAME) != 0;
437 }
438
439 static inline int
440 cfs_hash_with_counter(cfs_hash_t *hs)
441 {
442         return (hs->hs_flags & CFS_HASH_COUNTER) != 0;
443 }
444
445 static inline int
446 cfs_hash_with_rehash(cfs_hash_t *hs)
447 {
448         return (hs->hs_flags & CFS_HASH_REHASH) != 0;
449 }
450
451 static inline int
452 cfs_hash_with_rehash_key(cfs_hash_t *hs)
453 {
454         return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0;
455 }
456
457 static inline int
458 cfs_hash_with_shrink(cfs_hash_t *hs)
459 {
460         return (hs->hs_flags & CFS_HASH_SHRINK) != 0;
461 }
462
463 static inline int
464 cfs_hash_with_assert_empty(cfs_hash_t *hs)
465 {
466         return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0;
467 }
468
469 static inline int
470 cfs_hash_with_depth(cfs_hash_t *hs)
471 {
472         return (hs->hs_flags & CFS_HASH_DEPTH) != 0;
473 }
474
475 static inline int
476 cfs_hash_with_nblk_change(cfs_hash_t *hs)
477 {
478         return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0;
479 }
480
481 static inline int
482 cfs_hash_is_exiting(cfs_hash_t *hs)
483 {       /* cfs_hash_destroy is called */
484         return hs->hs_exiting;
485 }
486
487 static inline int
488 cfs_hash_is_rehashing(cfs_hash_t *hs)
489 {       /* rehash is launched */
490         return hs->hs_rehash_bits != 0;
491 }
492
493 static inline int
494 cfs_hash_is_iterating(cfs_hash_t *hs)
495 {       /* someone is calling cfs_hash_for_each_* */
496         return hs->hs_iterating || hs->hs_iterators != 0;
497 }
498
499 static inline int
500 cfs_hash_bkt_size(cfs_hash_t *hs)
501 {
502         return offsetof(cfs_hash_bucket_t, hsb_head[0]) +
503                hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) +
504                hs->hs_extra_bytes;
505 }
506
507 #define CFS_HOP(hs, op)           (hs)->hs_ops->hs_ ## op
508
509 static inline unsigned
510 cfs_hash_id(cfs_hash_t *hs, void *key, unsigned mask)
511 {
512         return CFS_HOP(hs, hash)(hs, key, mask);
513 }
514
515 static inline void *
516 cfs_hash_key(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
517 {
518         return CFS_HOP(hs, key)(hnode);
519 }
520
521 static inline void
522 cfs_hash_keycpy(cfs_hash_t *hs, cfs_hlist_node_t *hnode, void *key)
523 {
524         if (CFS_HOP(hs, keycpy) != NULL)
525                 CFS_HOP(hs, keycpy)(hnode, key);
526 }
527
528 /**
529  * Returns 1 on a match,
530  */
531 static inline int
532 cfs_hash_keycmp(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
533 {
534         return CFS_HOP(hs, keycmp)(key, hnode);
535 }
536
537 static inline void *
538 cfs_hash_object(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
539 {
540         return CFS_HOP(hs, object)(hnode);
541 }
542
543 static inline void *
544 cfs_hash_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
545 {
546         return CFS_HOP(hs, get)(hnode);
547 }
548
549 static inline void *
550 cfs_hash_put_locked(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
551 {
552         LASSERT(CFS_HOP(hs, put_locked) != NULL);
553
554         return CFS_HOP(hs, put_locked)(hnode);
555 }
556
557 static inline void *
558 cfs_hash_put(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
559 {
560         LASSERT(CFS_HOP(hs, put) != NULL);
561
562         return CFS_HOP(hs, put)(hnode);
563 }
564
565 static inline void
566 cfs_hash_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode)
567 {
568         if (CFS_HOP(hs, exit))
569                 CFS_HOP(hs, exit)(hnode);
570 }
571
572 static inline void cfs_hash_lock(cfs_hash_t *hs, int excl)
573 {
574         hs->hs_lops->hs_lock(&hs->hs_lock, excl);
575 }
576
577 static inline void cfs_hash_unlock(cfs_hash_t *hs, int excl)
578 {
579         hs->hs_lops->hs_unlock(&hs->hs_lock, excl);
580 }
581
582 static inline void cfs_hash_bd_lock(cfs_hash_t *hs,
583                                     cfs_hash_bd_t *bd, int excl)
584 {
585         hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl);
586 }
587
588 static inline void cfs_hash_bd_unlock(cfs_hash_t *hs,
589                                       cfs_hash_bd_t *bd, int excl)
590 {
591         hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl);
592 }
593
594 /**
595  * operations on cfs_hash bucket (bd: bucket descriptor),
596  * they are normally for hash-table without rehash
597  */
598 void cfs_hash_bd_get(cfs_hash_t *hs, void *key, cfs_hash_bd_t *bd);
599
600 static inline void cfs_hash_bd_get_and_lock(cfs_hash_t *hs, void *key,
601                                             cfs_hash_bd_t *bd, int excl)
602 {
603         cfs_hash_bd_get(hs, key, bd);
604         cfs_hash_bd_lock(hs, bd, excl);
605 }
606
607 static inline void *
608 cfs_hash_bd_extra_get(cfs_hash_t *hs, cfs_hash_bd_t *bd)
609 {
610         return (void *)bd->bd_bucket +
611                cfs_hash_bkt_size(hs) - hs->hs_extra_bytes;
612 }
613
614 static inline __u32
615 cfs_hash_bd_version_get(cfs_hash_bd_t *bd)
616 {
617         /* need hold cfs_hash_bd_lock */
618         return bd->bd_bucket->hsb_version;
619 }
620
621 static inline __u32
622 cfs_hash_bd_count_get(cfs_hash_bd_t *bd)
623 {
624         /* need hold cfs_hash_bd_lock */
625         return bd->bd_bucket->hsb_count;
626 }
627
628 static inline int
629 cfs_hash_bd_depmax_get(cfs_hash_bd_t *bd)
630 {
631         return bd->bd_bucket->hsb_depmax;
632 }
633
634 static inline int
635 cfs_hash_bd_compare(cfs_hash_bd_t *bd1, cfs_hash_bd_t *bd2)
636 {
637         if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index)
638                 return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index;
639
640         if (bd1->bd_offset != bd2->bd_offset)
641                 return bd1->bd_offset - bd2->bd_offset;
642
643         return 0;
644 }
645
646 void cfs_hash_bd_add_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
647                             cfs_hlist_node_t *hnode);
648 void cfs_hash_bd_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
649                             cfs_hlist_node_t *hnode);
650 void cfs_hash_bd_move_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd_old,
651                              cfs_hash_bd_t *bd_new, cfs_hlist_node_t *hnode);
652
653 static inline int cfs_hash_bd_dec_and_lock(cfs_hash_t *hs, cfs_hash_bd_t *bd,
654                                            cfs_atomic_t *condition)
655 {
656         LASSERT(cfs_hash_with_spin_bktlock(hs));
657         return cfs_atomic_dec_and_lock(condition,
658                                        &bd->bd_bucket->hsb_lock.spin);
659 }
660
661 static inline cfs_hlist_head_t *cfs_hash_bd_hhead(cfs_hash_t *hs,
662                                                   cfs_hash_bd_t *bd)
663 {
664         return hs->hs_hops->hop_hhead(hs, bd);
665 }
666
667 cfs_hlist_node_t *cfs_hash_bd_lookup_locked(cfs_hash_t *hs,
668                                             cfs_hash_bd_t *bd, void *key);
669 cfs_hlist_node_t *cfs_hash_bd_findadd_locked(cfs_hash_t *hs,
670                                              cfs_hash_bd_t *bd, void *key,
671                                              cfs_hlist_node_t *hnode,
672                                              int insist_add);
673 cfs_hlist_node_t *cfs_hash_bd_finddel_locked(cfs_hash_t *hs,
674                                              cfs_hash_bd_t *bd, void *key,
675                                              cfs_hlist_node_t *hnode);
676
677 /**
678  * operations on cfs_hash bucket (bd: bucket descriptor),
679  * they are safe for hash-table with rehash
680  */
681 void cfs_hash_dual_bd_get(cfs_hash_t *hs, void *key, cfs_hash_bd_t *bds);
682 void cfs_hash_dual_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
683 void cfs_hash_dual_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl);
684
685 static inline void cfs_hash_dual_bd_get_and_lock(cfs_hash_t *hs, void *key,
686                                                  cfs_hash_bd_t *bds, int excl)
687 {
688         cfs_hash_dual_bd_get(hs, key, bds);
689         cfs_hash_dual_bd_lock(hs, bds, excl);
690 }
691
692 cfs_hlist_node_t *cfs_hash_dual_bd_lookup_locked(cfs_hash_t *hs,
693                                                  cfs_hash_bd_t *bds, void *key);
694 cfs_hlist_node_t *cfs_hash_dual_bd_findadd_locked(cfs_hash_t *hs,
695                                                   cfs_hash_bd_t *bds, void *key,
696                                                   cfs_hlist_node_t *hnode,
697                                                   int insist_add);
698 cfs_hlist_node_t *cfs_hash_dual_bd_finddel_locked(cfs_hash_t *hs,
699                                                   cfs_hash_bd_t *bds, void *key,
700                                                   cfs_hlist_node_t *hnode);
701
702 /* Hash init/cleanup functions */
703 cfs_hash_t *cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits,
704                             unsigned bkt_bits, unsigned extra_bytes,
705                             unsigned min_theta, unsigned max_theta,
706                             cfs_hash_ops_t *ops, unsigned flags);
707
708 cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs);
709 void cfs_hash_putref(cfs_hash_t *hs);
710
711 /* Hash addition functions */
712 void cfs_hash_add(cfs_hash_t *hs, void *key,
713                   cfs_hlist_node_t *hnode);
714 int cfs_hash_add_unique(cfs_hash_t *hs, void *key,
715                         cfs_hlist_node_t *hnode);
716 void *cfs_hash_findadd_unique(cfs_hash_t *hs, void *key,
717                               cfs_hlist_node_t *hnode);
718
719 /* Hash deletion functions */
720 void *cfs_hash_del(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode);
721 void *cfs_hash_del_key(cfs_hash_t *hs, void *key);
722
723 /* Hash lookup/for_each functions */
724 #define CFS_HASH_LOOP_HOG       1024
725
726 typedef int (*cfs_hash_for_each_cb_t)(cfs_hash_t *hs, cfs_hash_bd_t *bd,
727                                       cfs_hlist_node_t *node, void *data);
728 void *cfs_hash_lookup(cfs_hash_t *hs, void *key);
729 void cfs_hash_for_each(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
730 void cfs_hash_for_each_safe(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data);
731 int  cfs_hash_for_each_nolock(cfs_hash_t *hs,
732                               cfs_hash_for_each_cb_t, void *data);
733 int  cfs_hash_for_each_empty(cfs_hash_t *hs,
734                              cfs_hash_for_each_cb_t, void *data);
735 void cfs_hash_for_each_key(cfs_hash_t *hs, void *key,
736                            cfs_hash_for_each_cb_t, void *data);
737 typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data);
738 void cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t, void *data);
739
740 void cfs_hash_hlist_for_each(cfs_hash_t *hs, unsigned hindex,
741                              cfs_hash_for_each_cb_t, void *data);
742 int  cfs_hash_is_empty(cfs_hash_t *hs);
743 __u64 cfs_hash_size_get(cfs_hash_t *hs);
744
745 /*
746  * Rehash - Theta is calculated to be the average chained
747  * hash depth assuming a perfectly uniform hash funcion.
748  */
749 void cfs_hash_rehash_cancel_locked(cfs_hash_t *hs);
750 void cfs_hash_rehash_cancel(cfs_hash_t *hs);
751 int  cfs_hash_rehash(cfs_hash_t *hs, int do_rehash);
752 void cfs_hash_rehash_key(cfs_hash_t *hs, void *old_key,
753                          void *new_key, cfs_hlist_node_t *hnode);
754
755 #if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1
756 /* Validate hnode references the correct key */
757 static inline void
758 cfs_hash_key_validate(cfs_hash_t *hs, void *key,
759                       cfs_hlist_node_t *hnode)
760 {
761         LASSERT(cfs_hash_keycmp(hs, key, hnode));
762 }
763
764 /* Validate hnode is in the correct bucket */
765 static inline void
766 cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
767                          cfs_hlist_node_t *hnode)
768 {
769         cfs_hash_bd_t   bds[2];
770
771         cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds);
772         LASSERT(bds[0].bd_bucket == bd->bd_bucket ||
773                 bds[1].bd_bucket == bd->bd_bucket);
774 }
775
776 #else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */
777
778 static inline void
779 cfs_hash_key_validate(cfs_hash_t *hs, void *key,
780                       cfs_hlist_node_t *hnode) {}
781
782 static inline void
783 cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd,
784                          cfs_hlist_node_t *hnode) {}
785
786 #endif /* CFS_HASH_DEBUG_LEVEL */
787
788 #define CFS_HASH_THETA_BITS  10
789 #define CFS_HASH_MIN_THETA  (1U << (CFS_HASH_THETA_BITS - 1))
790 #define CFS_HASH_MAX_THETA  (1U << (CFS_HASH_THETA_BITS + 1))
791
792 /* Return integer component of theta */
793 static inline int __cfs_hash_theta_int(int theta)
794 {
795         return (theta >> CFS_HASH_THETA_BITS);
796 }
797
798 /* Return a fractional value between 0 and 999 */
799 static inline int __cfs_hash_theta_frac(int theta)
800 {
801         return ((theta * 1000) >> CFS_HASH_THETA_BITS) -
802                (__cfs_hash_theta_int(theta) * 1000);
803 }
804
805 static inline int __cfs_hash_theta(cfs_hash_t *hs)
806 {
807         return (cfs_atomic_read(&hs->hs_count) <<
808                 CFS_HASH_THETA_BITS) >> hs->hs_cur_bits;
809 }
810
811 static inline void __cfs_hash_set_theta(cfs_hash_t *hs, int min, int max)
812 {
813         LASSERT(min < max);
814         hs->hs_min_theta = (__u16)min;
815         hs->hs_max_theta = (__u16)max;
816 }
817
818 /* Generic debug formatting routines mainly for proc handler */
819 int cfs_hash_debug_header(char *str, int size);
820 int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size);
821
822 /*
823  * Generic djb2 hash algorithm for character arrays.
824  */
825 static inline unsigned
826 cfs_hash_djb2_hash(void *key, size_t size, unsigned mask)
827 {
828         unsigned i, hash = 5381;
829
830         LASSERT(key != NULL);
831
832         for (i = 0; i < size; i++)
833                 hash = hash * 33 + ((char *)key)[i];
834
835         return (hash & mask);
836 }
837
838 /*
839  * Generic u32 hash algorithm.
840  */
841 static inline unsigned
842 cfs_hash_u32_hash(__u32 key, unsigned mask)
843 {
844         return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask);
845 }
846
847 /*
848  * Generic u64 hash algorithm.
849  */
850 static inline unsigned
851 cfs_hash_u64_hash(__u64 key, unsigned mask)
852 {
853         return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask);
854 }
855
856 /** iterate over all buckets in @bds (array of cfs_hash_bd_t) */
857 #define cfs_hash_for_each_bd(bds, n, i) \
858         for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++)
859
860 /** iterate over all buckets of @hs */
861 #define cfs_hash_for_each_bucket(hs, bd, pos)                   \
862         for (pos = 0;                                           \
863              pos < CFS_HASH_NBKT(hs) &&                         \
864              ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++)
865
866 /** iterate over all hlist of bucket @bd */
867 #define cfs_hash_bd_for_each_hlist(hs, bd, hlist)               \
868         for ((bd)->bd_offset = 0;                               \
869              (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) &&       \
870              (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL;       \
871              (bd)->bd_offset++)
872
873 /* !__LIBCFS__HASH_H__ */
874 #endif