Whamcloud - gitweb
b=22625 Fix libcfs_debug_file_path module option
[fs/lustre-release.git] / libcfs / libcfs / hash.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/hash.c
37  *
38  * Implement a hash class for hash process in lustre system.
39  *
40  * Author: YuZhangyong <yzy@clusterfs.com>
41  *
42  * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
43  * - Simplified API and improved documentation
44  * - Added per-hash feature flags:
45  *   * CFS_HASH_DEBUG additional validation
46  *   * CFS_HASH_REHASH dynamic rehashing
47  * - Added per-hash statistics
48  * - General performance enhancements
49  *
50  * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
51  * - move all stuff to libcfs
52  * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
53  * - ignore hs_rwlock if without CFS_HASH_REHASH setting
54  * - buckets are allocated one by one(intead of contiguous memory),
55  *   to avoid unnecessary cacheline conflict
56  */
57
58 #include <libcfs/libcfs.h>
59
60 static void cfs_hash_destroy(cfs_hash_t *hs);
61
62 static void
63 cfs_hash_rlock(cfs_hash_t *hs)
64 {
65         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
66                 cfs_read_lock(&hs->hs_rwlock);
67 }
68
69 static void
70 cfs_hash_runlock(cfs_hash_t *hs)
71 {
72         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
73                 cfs_read_unlock(&hs->hs_rwlock);
74 }
75
76 static void
77 cfs_hash_wlock(cfs_hash_t *hs)
78 {
79         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
80                 cfs_write_lock(&hs->hs_rwlock);
81 }
82
83 static void
84 cfs_hash_wunlock(cfs_hash_t *hs)
85 {
86         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
87                 cfs_write_unlock(&hs->hs_rwlock);
88 }
89
90 /**
91  * Initialize new libcfs hash, where:
92  * @name     - Descriptive hash name
93  * @cur_bits - Initial hash table size, in bits
94  * @max_bits - Maximum allowed hash table resize, in bits
95  * @ops      - Registered hash table operations
96  * @flags    - CFS_HASH_REHASH enable synamic hash resizing
97  *           - CFS_HASH_SORT enable chained hash sort
98  */
99 cfs_hash_t *
100 cfs_hash_create(char *name, unsigned int cur_bits,
101                 unsigned int max_bits, cfs_hash_ops_t *ops, int flags)
102 {
103         cfs_hash_t    *hs;
104         int            i;
105         ENTRY;
106
107         LASSERT(name != NULL);
108         LASSERT(ops != NULL);
109         /* The following ops are required for all hash table types */
110         LASSERT(ops->hs_hash != NULL);
111         LASSERT(ops->hs_key != NULL);
112         LASSERT(ops->hs_compare != NULL);
113         LASSERT(ops->hs_get != NULL);
114         LASSERT(ops->hs_put != NULL);
115
116         LASSERT(cur_bits > 0);
117         LASSERT(max_bits >= cur_bits);
118         LASSERT(max_bits < 31);
119         LASSERT(cur_bits == max_bits || (flags & CFS_HASH_REHASH) != 0);
120
121         CFS_ALLOC_PTR(hs);
122         if (!hs)
123                 RETURN(NULL);
124
125         strncpy(hs->hs_name, name, sizeof(hs->hs_name));
126         hs->hs_name[sizeof(hs->hs_name) - 1] = '\0';
127         cfs_atomic_set(&hs->hs_rehash_count, 0);
128         cfs_atomic_set(&hs->hs_refcount, 1);
129         cfs_atomic_set(&hs->hs_count, 0);
130         cfs_rwlock_init(&hs->hs_rwlock);
131         hs->hs_cur_bits = cur_bits;
132         hs->hs_cur_mask = (1 << cur_bits) - 1;
133         hs->hs_min_bits = cur_bits;
134         hs->hs_max_bits = max_bits;
135         /* XXX: need to fixup cfs_hash_rehash_bits() before this can be
136          *      anything other than 0.5 and 2.0 */
137         hs->hs_min_theta = 1 << (CFS_HASH_THETA_BITS - 1);
138         hs->hs_max_theta = 1 << (CFS_HASH_THETA_BITS + 1);
139         hs->hs_ops = ops;
140         hs->hs_flags = flags;
141
142         /* theta * 1000 */
143         __cfs_hash_set_theta(hs, 500, 2000);
144
145         LIBCFS_ALLOC(hs->hs_buckets,
146                      sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
147         if (hs->hs_buckets == NULL) {
148                 CFS_FREE_PTR(hs);
149                 RETURN(NULL);
150         }
151
152         for (i = 0; i <= hs->hs_cur_mask; i++) {
153                 CFS_ALLOC_PTR(hs->hs_buckets[i]);
154                 if (hs->hs_buckets[i] == NULL) {
155                         cfs_hash_destroy(hs);
156                         return NULL;
157                 }
158
159                 CFS_INIT_HLIST_HEAD(&hs->hs_buckets[i]->hsb_head);
160                 cfs_rwlock_init(&hs->hs_buckets[i]->hsb_rwlock);
161                 cfs_atomic_set(&hs->hs_buckets[i]->hsb_count, 0);
162         }
163
164         return hs;
165 }
166 CFS_EXPORT_SYMBOL(cfs_hash_create);
167
168 /**
169  * Cleanup libcfs hash @hs.
170  */
171 static void
172 cfs_hash_destroy(cfs_hash_t *hs)
173 {
174         cfs_hash_bucket_t    *hsb;
175         cfs_hlist_node_t     *hnode;
176         cfs_hlist_node_t     *pos;
177         int                   i;
178         ENTRY;
179
180         LASSERT(hs != NULL);
181
182         cfs_hash_wlock(hs);
183
184         cfs_hash_for_each_bucket(hs, hsb, i) {
185                 if (hsb == NULL)
186                         continue;
187
188                 cfs_write_lock(&hsb->hsb_rwlock);
189                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
190                         __cfs_hash_bucket_validate(hs, hsb, hnode);
191                         __cfs_hash_bucket_del(hs, hsb, hnode);
192                         cfs_hash_exit(hs, hnode);
193                 }
194
195                 LASSERT(cfs_hlist_empty(&(hsb->hsb_head)));
196                 LASSERT(cfs_atomic_read(&hsb->hsb_count) == 0);
197                 cfs_write_unlock(&hsb->hsb_rwlock);
198                 CFS_FREE_PTR(hsb);
199         }
200
201         LASSERT(cfs_atomic_read(&hs->hs_count) == 0);
202         cfs_hash_wunlock(hs);
203
204         LIBCFS_FREE(hs->hs_buckets,
205                     sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
206         CFS_FREE_PTR(hs);
207         EXIT;
208 }
209
210 cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs)
211 {
212         if (cfs_atomic_inc_not_zero(&hs->hs_refcount))
213                 return hs;
214         return NULL;
215 }
216 CFS_EXPORT_SYMBOL(cfs_hash_getref);
217
218 void cfs_hash_putref(cfs_hash_t *hs)
219 {
220         if (cfs_atomic_dec_and_test(&hs->hs_refcount))
221                 cfs_hash_destroy(hs);
222 }
223 CFS_EXPORT_SYMBOL(cfs_hash_putref);
224
225 static inline unsigned int
226 cfs_hash_rehash_bits(cfs_hash_t *hs)
227 {
228         if (!(hs->hs_flags & CFS_HASH_REHASH))
229                 return 0;
230
231         /* XXX: need to handle case with max_theta != 2.0
232          *      and the case with min_theta != 0.5 */
233         if ((hs->hs_cur_bits < hs->hs_max_bits) &&
234             (__cfs_hash_theta(hs) > hs->hs_max_theta))
235                 return hs->hs_cur_bits + 1;
236
237         if ((hs->hs_cur_bits > hs->hs_min_bits) &&
238             (__cfs_hash_theta(hs) < hs->hs_min_theta))
239                 return hs->hs_cur_bits - 1;
240
241         return 0;
242 }
243
244 /**
245  * Add item @hnode to libcfs hash @hs using @key.  The registered
246  * ops->hs_get function will be called when the item is added.
247  */
248 void
249 cfs_hash_add(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
250 {
251         cfs_hash_bucket_t    *hsb;
252         int                   bits;
253         unsigned              i;
254         ENTRY;
255
256         __cfs_hash_key_validate(hs, key, hnode);
257
258         cfs_hash_rlock(hs);
259         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
260         hsb = hs->hs_buckets[i];
261         LASSERT(i <= hs->hs_cur_mask);
262         LASSERT(cfs_hlist_unhashed(hnode));
263
264         cfs_write_lock(&hsb->hsb_rwlock);
265         __cfs_hash_bucket_add(hs, hsb, hnode);
266         cfs_write_unlock(&hsb->hsb_rwlock);
267
268         bits = cfs_hash_rehash_bits(hs);
269         cfs_hash_runlock(hs);
270         if (bits)
271                 cfs_hash_rehash(hs, bits);
272
273         EXIT;
274 }
275 CFS_EXPORT_SYMBOL(cfs_hash_add);
276
277 static cfs_hlist_node_t *
278 cfs_hash_findadd_unique_hnode(cfs_hash_t *hs, void *key,
279                               cfs_hlist_node_t *hnode)
280 {
281         int                   bits = 0;
282         cfs_hlist_node_t     *ehnode;
283         cfs_hash_bucket_t    *hsb;
284         unsigned              i;
285         ENTRY;
286
287         __cfs_hash_key_validate(hs, key, hnode);
288
289         cfs_hash_rlock(hs);
290         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
291         hsb = hs->hs_buckets[i];
292         LASSERT(i <= hs->hs_cur_mask);
293         LASSERT(cfs_hlist_unhashed(hnode));
294
295         cfs_write_lock(&hsb->hsb_rwlock);
296         ehnode = __cfs_hash_bucket_lookup(hs, hsb, key);
297         if (ehnode) {
298                 cfs_hash_get(hs, ehnode);
299         } else {
300                 __cfs_hash_bucket_add(hs, hsb, hnode);
301                 ehnode = hnode;
302                 bits = cfs_hash_rehash_bits(hs);
303         }
304         cfs_write_unlock(&hsb->hsb_rwlock);
305         cfs_hash_runlock(hs);
306         if (bits)
307                 cfs_hash_rehash(hs, bits);
308
309         RETURN(ehnode);
310 }
311
312 /**
313  * Add item @hnode to libcfs hash @hs using @key.  The registered
314  * ops->hs_get function will be called if the item was added.
315  * Returns 0 on success or -EALREADY on key collisions.
316  */
317 int
318 cfs_hash_add_unique(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
319 {
320         cfs_hlist_node_t    *ehnode;
321         ENTRY;
322
323         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
324         if (ehnode != hnode) {
325                 cfs_hash_put(hs, ehnode);
326                 RETURN(-EALREADY);
327         }
328         RETURN(0);
329 }
330 CFS_EXPORT_SYMBOL(cfs_hash_add_unique);
331
332 /**
333  * Add item @hnode to libcfs hash @hs using @key.  If this @key
334  * already exists in the hash then ops->hs_get will be called on the
335  * conflicting entry and that entry will be returned to the caller.
336  * Otherwise ops->hs_get is called on the item which was added.
337  */
338 void *
339 cfs_hash_findadd_unique(cfs_hash_t *hs, void *key,
340                         cfs_hlist_node_t *hnode)
341 {
342         cfs_hlist_node_t     *ehnode;
343         void                 *obj;
344         ENTRY;
345
346         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
347         obj = cfs_hash_get(hs, ehnode);
348         cfs_hash_put(hs, ehnode);
349         RETURN(obj);
350 }
351 CFS_EXPORT_SYMBOL(cfs_hash_findadd_unique);
352
353 /**
354  * Delete item @hnode from the libcfs hash @hs using @key.  The @key
355  * is required to ensure the correct hash bucket is locked since there
356  * is no direct linkage from the item to the bucket.  The object
357  * removed from the hash will be returned and obs->hs_put is called
358  * on the removed object.
359  */
360 void *
361 cfs_hash_del(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
362 {
363         cfs_hash_bucket_t    *hsb;
364         void                 *obj;
365         unsigned              i;
366         ENTRY;
367
368         __cfs_hash_key_validate(hs, key, hnode);
369
370         cfs_hash_rlock(hs);
371         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
372         hsb = hs->hs_buckets[i];
373         LASSERT(i <= hs->hs_cur_mask);
374         LASSERT(!cfs_hlist_unhashed(hnode));
375
376         cfs_write_lock(&hsb->hsb_rwlock);
377         obj = __cfs_hash_bucket_del(hs, hsb, hnode);
378         cfs_write_unlock(&hsb->hsb_rwlock);
379         cfs_hash_runlock(hs);
380
381         RETURN(obj);
382 }
383 CFS_EXPORT_SYMBOL(cfs_hash_del);
384
385 /**
386  * Delete item given @key in libcfs hash @hs.  The first @key found in
387  * the hash will be removed, if the key exists multiple times in the hash
388  * @hs this function must be called once per key.  The removed object
389  * will be returned and ops->hs_put is called on the removed object.
390  */
391 void *
392 cfs_hash_del_key(cfs_hash_t *hs, void *key)
393 {
394         void                 *obj = NULL;
395         cfs_hlist_node_t     *hnode;
396         cfs_hash_bucket_t    *hsb;
397         unsigned              i;
398         ENTRY;
399
400         cfs_hash_rlock(hs);
401         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
402         hsb = hs->hs_buckets[i];
403         LASSERT(i <= hs->hs_cur_mask);
404
405         cfs_write_lock(&hsb->hsb_rwlock);
406         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
407         if (hnode)
408                 obj = __cfs_hash_bucket_del(hs, hsb, hnode);
409
410         cfs_write_unlock(&hsb->hsb_rwlock);
411         cfs_hash_runlock(hs);
412
413         RETURN(obj);
414 }
415 CFS_EXPORT_SYMBOL(cfs_hash_del_key);
416
417 /**
418  * Lookup an item using @key in the libcfs hash @hs and return it.
419  * If the @key is found in the hash hs->hs_get() is called and the
420  * matching objects is returned.  It is the callers responsibility
421  * to call the counterpart ops->hs_put using the cfs_hash_put() macro
422  * when when finished with the object.  If the @key was not found
423  * in the hash @hs NULL is returned.
424  */
425 void *
426 cfs_hash_lookup(cfs_hash_t *hs, void *key)
427 {
428         void                 *obj = NULL;
429         cfs_hlist_node_t     *hnode;
430         cfs_hash_bucket_t    *hsb;
431         unsigned              i;
432         ENTRY;
433
434         cfs_hash_rlock(hs);
435         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
436         hsb = hs->hs_buckets[i];
437         LASSERT(i <= hs->hs_cur_mask);
438
439         cfs_read_lock(&hsb->hsb_rwlock);
440         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
441         if (hnode)
442                 obj = cfs_hash_get(hs, hnode);
443
444         cfs_read_unlock(&hsb->hsb_rwlock);
445         cfs_hash_runlock(hs);
446
447         RETURN(obj);
448 }
449 CFS_EXPORT_SYMBOL(cfs_hash_lookup);
450
451 /**
452  * For each item in the libcfs hash @hs call the passed callback @func
453  * and pass to it as an argument each hash item and the private @data.
454  * Before each callback ops->hs_get will be called, and after each
455  * callback ops->hs_put will be called.  Finally, during the callback
456  * the bucket lock is held so the callback must never sleep.
457  */
458 void
459 cfs_hash_for_each(cfs_hash_t *hs,
460                   cfs_hash_for_each_cb_t func, void *data)
461 {
462         cfs_hlist_node_t     *hnode;
463         cfs_hash_bucket_t    *hsb;
464         void                 *obj;
465         int                   i;
466         ENTRY;
467
468         cfs_hash_rlock(hs);
469         cfs_hash_for_each_bucket(hs, hsb, i) {
470                 cfs_read_lock(&hsb->hsb_rwlock);
471                 cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
472                         __cfs_hash_bucket_validate(hs, hsb, hnode);
473                         obj = cfs_hash_get(hs, hnode);
474                         func(obj, data);
475                         (void)cfs_hash_put(hs, hnode);
476                 }
477                 cfs_read_unlock(&hsb->hsb_rwlock);
478         }
479         cfs_hash_runlock(hs);
480
481         EXIT;
482 }
483 CFS_EXPORT_SYMBOL(cfs_hash_for_each);
484
485 /**
486  * For each item in the libcfs hash @hs call the passed callback @func
487  * and pass to it as an argument each hash item and the private @data.
488  * Before each callback ops->hs_get will be called, and after each
489  * callback ops->hs_put will be called.  During the callback the
490  * bucket lock will not be held will allows for the current item
491  * to be removed from the hash during the callback.  However, care
492  * should be taken to prevent other callers from operating on the
493  * hash concurrently or list corruption may occur.
494  */
495 void
496 cfs_hash_for_each_safe(cfs_hash_t *hs,
497                        cfs_hash_for_each_cb_t func, void *data)
498 {
499         cfs_hlist_node_t     *hnode;
500         cfs_hlist_node_t     *pos;
501         cfs_hash_bucket_t    *hsb;
502         void                 *obj;
503         int                   i;
504         ENTRY;
505
506         cfs_hash_rlock(hs);
507         cfs_hash_for_each_bucket(hs, hsb, i) {
508                 cfs_read_lock(&hsb->hsb_rwlock);
509                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
510                         __cfs_hash_bucket_validate(hs, hsb, hnode);
511                         obj = cfs_hash_get(hs, hnode);
512                         cfs_read_unlock(&hsb->hsb_rwlock);
513                         func(obj, data);
514                         cfs_read_lock(&hsb->hsb_rwlock);
515                         (void)cfs_hash_put(hs, hnode);
516                 }
517                 cfs_read_unlock(&hsb->hsb_rwlock);
518         }
519         cfs_hash_runlock(hs);
520         EXIT;
521 }
522 CFS_EXPORT_SYMBOL(cfs_hash_for_each_safe);
523
524 /**
525  * For each hash bucket in the libcfs hash @hs call the passed callback
526  * @func until all the hash buckets are empty.  The passed callback @func
527  * or the previously registered callback hs->hs_put must remove the item
528  * from the hash.  You may either use the cfs_hash_del() or hlist_del()
529  * functions.  No rwlocks will be held during the callback @func it is
530  * safe to sleep if needed.  This function will not terminate until the
531  * hash is empty.  Note it is still possible to concurrently add new
532  * items in to the hash.  It is the callers responsibility to ensure
533  * the required locking is in place to prevent concurrent insertions.
534  */
535 void
536 cfs_hash_for_each_empty(cfs_hash_t *hs,
537                         cfs_hash_for_each_cb_t func, void *data)
538 {
539         cfs_hlist_node_t     *hnode;
540         cfs_hash_bucket_t    *hsb;
541         cfs_hash_bucket_t    **hsb_last = NULL;
542         void                 *obj;
543         int                   i = 0;
544         ENTRY;
545
546 restart:
547         cfs_hash_rlock(hs);
548         /* If the hash table has changed since we last held lh_rwlock,
549          * we need to start traversing the list from the start. */
550         if (hs->hs_buckets != hsb_last) {
551                 i = 0;
552                 hsb_last = hs->hs_buckets;
553         }
554         cfs_hash_for_each_bucket_restart(hs, hsb, i) {
555                 cfs_write_lock(&hsb->hsb_rwlock);
556                 while (!cfs_hlist_empty(&hsb->hsb_head)) {
557                         hnode =  hsb->hsb_head.first;
558                         __cfs_hash_bucket_validate(hs, hsb, hnode);
559                         obj = cfs_hash_get(hs, hnode);
560                         cfs_write_unlock(&hsb->hsb_rwlock);
561                         cfs_hash_runlock(hs);
562                         func(obj, data);
563                         (void)cfs_hash_put(hs, hnode);
564                         cfs_cond_resched();
565                         goto restart;
566                 }
567                 cfs_write_unlock(&hsb->hsb_rwlock);
568         }
569         cfs_hash_runlock(hs);
570         EXIT;
571 }
572 CFS_EXPORT_SYMBOL(cfs_hash_for_each_empty);
573
574 /*
575  * For each item in the libcfs hash @hs which matches the @key call
576  * the passed callback @func and pass to it as an argument each hash
577  * item and the private @data.  Before each callback ops->hs_get will
578  * be called, and after each callback ops->hs_put will be called.
579  * Finally, during the callback the bucket lock is held so the
580  * callback must never sleep.
581    */
582 void
583 cfs_hash_for_each_key(cfs_hash_t *hs, void *key,
584                       cfs_hash_for_each_cb_t func, void *data)
585 {
586         cfs_hlist_node_t     *hnode;
587         cfs_hash_bucket_t    *hsb;
588         unsigned              i;
589         ENTRY;
590
591         cfs_hash_rlock(hs);
592         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
593         hsb = hs->hs_buckets[i];
594         LASSERT(i <= hs->hs_cur_mask);
595
596         cfs_read_lock(&hsb->hsb_rwlock);
597         cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
598                 __cfs_hash_bucket_validate(hs, hsb, hnode);
599
600                 if (!cfs_hash_compare(hs, key, hnode))
601                         continue;
602
603                 func(cfs_hash_get(hs, hnode), data);
604                 (void)cfs_hash_put(hs, hnode);
605         }
606
607         cfs_read_unlock(&hsb->hsb_rwlock);
608         cfs_hash_runlock(hs);
609
610         EXIT;
611 }
612 CFS_EXPORT_SYMBOL(cfs_hash_for_each_key);
613
614 /**
615  * Rehash the libcfs hash @hs to the given @bits.  This can be used
616  * to grow the hash size when excessive chaining is detected, or to
617  * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
618  * flag is set in @hs the libcfs hash may be dynamically rehashed
619  * during addition or removal if the hash's theta value exceeds
620  * either the hs->hs_min_theta or hs->max_theta values.  By default
621  * these values are tuned to keep the chained hash depth small, and
622  * this approach assumes a reasonably uniform hashing function.  The
623  * theta thresholds for @hs are tunable via cfs_hash_set_theta().
624  */
625 int
626 cfs_hash_rehash(cfs_hash_t *hs, int bits)
627 {
628         cfs_hlist_node_t      *hnode;
629         cfs_hlist_node_t      *pos;
630         cfs_hash_bucket_t    **old_buckets;
631         cfs_hash_bucket_t    **rehash_buckets;
632         cfs_hash_bucket_t     *hs_hsb;
633         cfs_hash_bucket_t     *rehash_hsb;
634         int                    i;
635         int                    theta;
636         int                    old_mask;
637         int                    old_bits;
638         int                    new_mask = (1 << bits) - 1;
639         int                    rc = 0;
640         void                  *key;
641         ENTRY;
642
643         LASSERT(!cfs_in_interrupt());
644         LASSERT(new_mask > 0);
645         LASSERT((hs->hs_flags & CFS_HASH_REHASH) != 0);
646
647         LIBCFS_ALLOC(rehash_buckets, sizeof(*rehash_buckets) << bits);
648         if (!rehash_buckets)
649                 RETURN(-ENOMEM);
650
651         for (i = 0; i <= new_mask; i++) {
652                 CFS_ALLOC_PTR(rehash_buckets[i]);
653                 if (rehash_buckets[i] == NULL)
654                         GOTO(free, rc = -ENOMEM);
655
656                 CFS_INIT_HLIST_HEAD(&rehash_buckets[i]->hsb_head);
657                 cfs_rwlock_init(&rehash_buckets[i]->hsb_rwlock);
658                 cfs_atomic_set(&rehash_buckets[i]->hsb_count, 0);
659         }
660
661         cfs_hash_wlock(hs);
662
663         /*
664          * Early return for multiple concurrent racing callers,
665          * ensure we only trigger the rehash if it is still needed.
666          */
667         theta = __cfs_hash_theta(hs);
668         if ((theta >= hs->hs_min_theta) && (theta <= hs->hs_max_theta)) {
669                 cfs_hash_wunlock(hs);
670                 GOTO(free, rc = -EALREADY);
671         }
672
673         old_bits = hs->hs_cur_bits;
674         old_buckets = hs->hs_buckets;
675         old_mask = (1 << old_bits) - 1;
676
677         hs->hs_cur_bits = bits;
678         hs->hs_cur_mask = (1 << bits) - 1;
679         hs->hs_buckets = rehash_buckets;
680         cfs_atomic_inc(&hs->hs_rehash_count);
681
682         for (i = 0; i <= old_mask; i++) {
683                 hs_hsb = old_buckets[i];
684
685                 cfs_write_lock(&hs_hsb->hsb_rwlock);
686                 cfs_hlist_for_each_safe(hnode, pos, &(hs_hsb->hsb_head)) {
687                         key = cfs_hash_key(hs, hnode);
688                         LASSERT(key);
689
690                         /*
691                          * Validate hnode is in the correct bucket.
692                          */
693                         if (unlikely(hs->hs_flags & CFS_HASH_DEBUG))
694                                 LASSERT(cfs_hash_id(hs, key, old_mask) == i);
695
696                         /*
697                          * Delete from old hash bucket.
698                          */
699                         cfs_hlist_del(hnode);
700                         LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) > 0);
701                         cfs_atomic_dec(&hs_hsb->hsb_count);
702
703                         /*
704                          * Add to rehash bucket, ops->hs_key must be defined.
705                          */
706                         rehash_hsb = rehash_buckets[cfs_hash_id(hs, key,
707                                                                 new_mask)];
708                         cfs_hlist_add_head(hnode, &(rehash_hsb->hsb_head));
709                         cfs_atomic_inc(&rehash_hsb->hsb_count);
710                 }
711
712                 LASSERT(cfs_hlist_empty(&(hs_hsb->hsb_head)));
713                 LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) == 0);
714                 cfs_write_unlock(&hs_hsb->hsb_rwlock);
715         }
716
717         cfs_hash_wunlock(hs);
718         rehash_buckets = old_buckets;
719         i = (1 << old_bits);
720         bits = old_bits;
721  free:
722         while (--i >= 0)
723                 CFS_FREE_PTR(rehash_buckets[i]);
724         LIBCFS_FREE(rehash_buckets, sizeof(*rehash_buckets) << bits);
725         RETURN(rc);
726 }
727 CFS_EXPORT_SYMBOL(cfs_hash_rehash);
728
729 /**
730  * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
731  * @old_key must be provided to locate the objects previous location
732  * in the hash, and the @new_key will be used to reinsert the object.
733  * Use this function instead of a cfs_hash_add() + cfs_hash_del()
734  * combo when it is critical that there is no window in time where the
735  * object is missing from the hash.  When an object is being rehashed
736  * the registered cfs_hash_get() and cfs_hash_put() functions will
737  * not be called.
738  */
739 void cfs_hash_rehash_key(cfs_hash_t *hs, void *old_key, void *new_key,
740                          cfs_hlist_node_t *hnode)
741 {
742         cfs_hash_bucket_t     *old_hsb;
743         cfs_hash_bucket_t     *new_hsb;
744         unsigned               i;
745         unsigned               j;
746         ENTRY;
747
748         __cfs_hash_key_validate(hs, new_key, hnode);
749         LASSERT(!cfs_hlist_unhashed(hnode));
750
751         cfs_hash_rlock(hs);
752
753         i = cfs_hash_id(hs, old_key, hs->hs_cur_mask);
754         old_hsb = hs->hs_buckets[i];
755         LASSERT(i <= hs->hs_cur_mask);
756
757         j = cfs_hash_id(hs, new_key, hs->hs_cur_mask);
758         new_hsb = hs->hs_buckets[j];
759         LASSERT(j <= hs->hs_cur_mask);
760
761         if (i < j) { /* write_lock ordering */
762                 cfs_write_lock(&old_hsb->hsb_rwlock);
763                 cfs_write_lock(&new_hsb->hsb_rwlock);
764         } else if (i > j) {
765                 cfs_write_lock(&new_hsb->hsb_rwlock);
766                 cfs_write_lock(&old_hsb->hsb_rwlock);
767         } else { /* do nothing */
768                 cfs_read_unlock(&hs->hs_rwlock);
769                 EXIT;
770                 return;
771         }
772
773         /*
774          * Migrate item between hash buckets without calling
775          * the cfs_hash_get() and cfs_hash_put() callback functions.
776          */
777         cfs_hlist_del(hnode);
778         LASSERT(cfs_atomic_read(&old_hsb->hsb_count) > 0);
779         cfs_atomic_dec(&old_hsb->hsb_count);
780         cfs_hlist_add_head(hnode, &(new_hsb->hsb_head));
781         cfs_atomic_inc(&new_hsb->hsb_count);
782
783         cfs_write_unlock(&new_hsb->hsb_rwlock);
784         cfs_write_unlock(&old_hsb->hsb_rwlock);
785         cfs_hash_runlock(hs);
786
787         EXIT;
788 }
789 CFS_EXPORT_SYMBOL(cfs_hash_rehash_key);
790
791 int cfs_hash_debug_header(char *str, int size)
792 {
793         return snprintf(str, size,
794                  "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", CFS_MAX_HASH_NAME,
795                  "name", "cur", "min", "max", "theta", "t-min", "t-max",
796                  "flags", "rehash", "count", " distribution");
797 }
798 CFS_EXPORT_SYMBOL(cfs_hash_debug_header);
799
800 int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size)
801 {
802         cfs_hash_bucket_t     *hsb;
803         int                    theta;
804         int                    i;
805         int                    c = 0;
806         int                    dist[8] = { 0, };
807
808         if (str == NULL || size == 0)
809                 return 0;
810
811         cfs_hash_rlock(hs);
812         theta = __cfs_hash_theta(hs);
813
814         c += snprintf(str + c, size - c, "%-*s ",
815                       CFS_MAX_HASH_NAME, hs->hs_name);
816         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_cur_bits);
817         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_min_bits);
818         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_max_bits);
819         c += snprintf(str + c, size - c, "%d.%03d ",
820                       __cfs_hash_theta_int(theta),
821                       __cfs_hash_theta_frac(theta));
822         c += snprintf(str + c, size - c, "%d.%03d ",
823                       __cfs_hash_theta_int(hs->hs_min_theta),
824                       __cfs_hash_theta_frac(hs->hs_min_theta));
825         c += snprintf(str + c, size - c, "%d.%03d ",
826                       __cfs_hash_theta_int(hs->hs_max_theta),
827                       __cfs_hash_theta_frac(hs->hs_max_theta));
828         c += snprintf(str + c, size - c, " 0x%02x ", hs->hs_flags);
829         c += snprintf(str + c, size - c, "%6d ",
830                       cfs_atomic_read(&hs->hs_rehash_count));
831         c += snprintf(str + c, size - c, "%5d ",
832                       cfs_atomic_read(&hs->hs_count));
833
834         /*
835          * The distribution is a summary of the chained hash depth in
836          * each of the libcfs hash buckets.  Each buckets hsb_count is
837          * divided by the hash theta value and used to generate a
838          * histogram of the hash distribution.  A uniform hash will
839          * result in all hash buckets being close to the average thus
840          * only the first few entries in the histogram will be non-zero.
841          * If you hash function results in a non-uniform hash the will
842          * be observable by outlier bucks in the distribution histogram.
843          *
844          * Uniform hash distribution:      128/128/0/0/0/0/0/0
845          * Non-Uniform hash distribution:  128/125/0/0/0/0/2/1
846          */
847         cfs_hash_for_each_bucket(hs, hsb, i)
848                 dist[min(__cfs_fls(cfs_atomic_read(&hsb->hsb_count)/max(theta,1)),7)]++;
849
850         for (i = 0; i < 8; i++)
851                 c += snprintf(str + c, size - c, "%d%c",  dist[i],
852                               (i == 7) ? '\n' : '/');
853
854         cfs_hash_runlock(hs);
855
856         return c;
857 }
858 CFS_EXPORT_SYMBOL(cfs_hash_debug_str);