Whamcloud - gitweb
6369ada7d27869724583d7de12164d4eee35eca4
[fs/lustre-release.git] / libcfs / libcfs / hash.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/hash.c
37  *
38  * Implement a hash class for hash process in lustre system.
39  *
40  * Author: YuZhangyong <yzy@clusterfs.com>
41  *
42  * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
43  * - Simplified API and improved documentation
44  * - Added per-hash feature flags:
45  *   * CFS_HASH_DEBUG additional validation
46  *   * CFS_HASH_REHASH dynamic rehashing
47  * - Added per-hash statistics
48  * - General performance enhancements
49  *
50  * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
51  * - move all stuff to libcfs
52  * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
53  * - ignore hs_rwlock if without CFS_HASH_REHASH setting
54  * - buckets are allocated one by one(intead of contiguous memory),
55  *   to avoid unnecessary cacheline conflict
56  */
57
58 #include <libcfs/libcfs.h>
59
60 static void
61 cfs_hash_rlock(cfs_hash_t *hs)
62 {
63         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
64                 cfs_read_lock(&hs->hs_rwlock);
65 }
66
67 static void
68 cfs_hash_runlock(cfs_hash_t *hs)
69 {
70         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
71                 cfs_read_unlock(&hs->hs_rwlock);
72 }
73
74 static void
75 cfs_hash_wlock(cfs_hash_t *hs)
76 {
77         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
78                 cfs_write_lock(&hs->hs_rwlock);
79 }
80
81 static void
82 cfs_hash_wunlock(cfs_hash_t *hs)
83 {
84         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
85                 cfs_write_unlock(&hs->hs_rwlock);
86 }
87
88 /**
89  * Initialize new libcfs hash, where:
90  * @name     - Descriptive hash name
91  * @cur_bits - Initial hash table size, in bits
92  * @max_bits - Maximum allowed hash table resize, in bits
93  * @ops      - Registered hash table operations
94  * @flags    - CFS_HASH_REHASH enable synamic hash resizing
95  *           - CFS_HASH_SORT enable chained hash sort
96  */
97 cfs_hash_t *
98 cfs_hash_create(char *name, unsigned int cur_bits,
99                 unsigned int max_bits, cfs_hash_ops_t *ops, int flags)
100 {
101         cfs_hash_t    *hs;
102         int            i;
103         ENTRY;
104
105         LASSERT(name != NULL);
106         LASSERT(ops != NULL);
107
108         LASSERT(cur_bits > 0);
109         LASSERT(max_bits >= cur_bits);
110         LASSERT(max_bits < 31);
111         LASSERT(cur_bits == max_bits || (flags & CFS_HASH_REHASH) != 0);
112
113         CFS_ALLOC_PTR(hs);
114         if (!hs)
115                 RETURN(NULL);
116
117         strncpy(hs->hs_name, name, sizeof(hs->hs_name));
118         hs->hs_name[sizeof(hs->hs_name) - 1] = '\0';
119         cfs_atomic_set(&hs->hs_rehash_count, 0);
120         cfs_atomic_set(&hs->hs_count, 0);
121         cfs_rwlock_init(&hs->hs_rwlock);
122         hs->hs_cur_bits = cur_bits;
123         hs->hs_cur_mask = (1 << cur_bits) - 1;
124         hs->hs_min_bits = cur_bits;
125         hs->hs_max_bits = max_bits;
126         /* XXX: need to fixup cfs_hash_rehash_bits() before this can be
127          *      anything other than 0.5 and 2.0 */
128         hs->hs_min_theta = 1 << (CFS_HASH_THETA_BITS - 1);
129         hs->hs_max_theta = 1 << (CFS_HASH_THETA_BITS + 1);
130         hs->hs_ops = ops;
131         hs->hs_flags = flags;
132
133         /* theta * 1000 */
134         __cfs_hash_set_theta(hs, 500, 2000);
135
136         LIBCFS_ALLOC(hs->hs_buckets,
137                      sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
138         if (hs->hs_buckets == NULL) {
139                 CFS_FREE_PTR(hs);
140                 RETURN(NULL);
141         }
142
143         for (i = 0; i <= hs->hs_cur_mask; i++) {
144                 CFS_ALLOC_PTR(hs->hs_buckets[i]);
145                 if (hs->hs_buckets[i] == NULL) {
146                         cfs_hash_destroy(hs);
147                         return NULL;
148                 }
149
150                 CFS_INIT_HLIST_HEAD(&hs->hs_buckets[i]->hsb_head);
151                 cfs_rwlock_init(&hs->hs_buckets[i]->hsb_rwlock);
152                 cfs_atomic_set(&hs->hs_buckets[i]->hsb_count, 0);
153         }
154
155         return hs;
156 }
157 CFS_EXPORT_SYMBOL(cfs_hash_create);
158
159 /**
160  * Cleanup libcfs hash @hs.
161  */
162 void
163 cfs_hash_destroy(cfs_hash_t *hs)
164 {
165         cfs_hash_bucket_t    *hsb;
166         cfs_hlist_node_t     *hnode;
167         cfs_hlist_node_t     *pos;
168         int                   i;
169         ENTRY;
170
171         LASSERT(hs != NULL);
172
173         cfs_hash_wlock(hs);
174
175         cfs_hash_for_each_bucket(hs, hsb, i) {
176                 if (hsb == NULL)
177                         continue;
178
179                 cfs_write_lock(&hsb->hsb_rwlock);
180                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
181                         __cfs_hash_bucket_validate(hs, hsb, hnode);
182                         __cfs_hash_bucket_del(hs, hsb, hnode);
183                         cfs_hash_exit(hs, hnode);
184                 }
185
186                 LASSERT(cfs_hlist_empty(&(hsb->hsb_head)));
187                 LASSERT(cfs_atomic_read(&hsb->hsb_count) == 0);
188                 cfs_write_unlock(&hsb->hsb_rwlock);
189                 CFS_FREE_PTR(hsb);
190         }
191
192         LASSERT(cfs_atomic_read(&hs->hs_count) == 0);
193         cfs_hash_wunlock(hs);
194
195         LIBCFS_FREE(hs->hs_buckets,
196                     sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
197         CFS_FREE_PTR(hs);
198         EXIT;
199 }
200 CFS_EXPORT_SYMBOL(cfs_hash_destroy);
201
202 static inline unsigned int
203 cfs_hash_rehash_bits(cfs_hash_t *hs)
204 {
205         if (!(hs->hs_flags & CFS_HASH_REHASH))
206                 return 0;
207
208         /* XXX: need to handle case with max_theta != 2.0
209          *      and the case with min_theta != 0.5 */
210         if ((hs->hs_cur_bits < hs->hs_max_bits) &&
211             (__cfs_hash_theta(hs) > hs->hs_max_theta))
212                 return hs->hs_cur_bits + 1;
213
214         if ((hs->hs_cur_bits > hs->hs_min_bits) &&
215             (__cfs_hash_theta(hs) < hs->hs_min_theta))
216                 return hs->hs_cur_bits - 1;
217
218         return 0;
219 }
220
221 /**
222  * Add item @hnode to libcfs hash @hs using @key.  The registered
223  * ops->hs_get function will be called when the item is added.
224  */
225 void
226 cfs_hash_add(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
227 {
228         cfs_hash_bucket_t    *hsb;
229         int                   bits;
230         unsigned              i;
231         ENTRY;
232
233         __cfs_hash_key_validate(hs, key, hnode);
234
235         cfs_hash_rlock(hs);
236         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
237         hsb = hs->hs_buckets[i];
238         LASSERT(i <= hs->hs_cur_mask);
239         LASSERT(cfs_hlist_unhashed(hnode));
240
241         cfs_write_lock(&hsb->hsb_rwlock);
242         __cfs_hash_bucket_add(hs, hsb, hnode);
243         cfs_write_unlock(&hsb->hsb_rwlock);
244
245         bits = cfs_hash_rehash_bits(hs);
246         cfs_hash_runlock(hs);
247         if (bits)
248                 cfs_hash_rehash(hs, bits);
249
250         EXIT;
251 }
252 CFS_EXPORT_SYMBOL(cfs_hash_add);
253
254 static cfs_hlist_node_t *
255 cfs_hash_findadd_unique_hnode(cfs_hash_t *hs, void *key,
256                               cfs_hlist_node_t *hnode)
257 {
258         int                   bits = 0;
259         cfs_hlist_node_t     *ehnode;
260         cfs_hash_bucket_t    *hsb;
261         unsigned              i;
262         ENTRY;
263
264         __cfs_hash_key_validate(hs, key, hnode);
265
266         cfs_hash_rlock(hs);
267         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
268         hsb = hs->hs_buckets[i];
269         LASSERT(i <= hs->hs_cur_mask);
270         LASSERT(cfs_hlist_unhashed(hnode));
271
272         cfs_write_lock(&hsb->hsb_rwlock);
273         ehnode = __cfs_hash_bucket_lookup(hs, hsb, key);
274         if (ehnode) {
275                 cfs_hash_get(hs, ehnode);
276         } else {
277                 __cfs_hash_bucket_add(hs, hsb, hnode);
278                 ehnode = hnode;
279                 bits = cfs_hash_rehash_bits(hs);
280         }
281         cfs_write_unlock(&hsb->hsb_rwlock);
282         cfs_hash_runlock(hs);
283         if (bits)
284                 cfs_hash_rehash(hs, bits);
285
286         RETURN(ehnode);
287 }
288
289 /**
290  * Add item @hnode to libcfs hash @hs using @key.  The registered
291  * ops->hs_get function will be called if the item was added.
292  * Returns 0 on success or -EALREADY on key collisions.
293  */
294 int
295 cfs_hash_add_unique(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
296 {
297         cfs_hlist_node_t    *ehnode;
298         ENTRY;
299
300         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
301         if (ehnode != hnode) {
302                 cfs_hash_put(hs, ehnode);
303                 RETURN(-EALREADY);
304         }
305         RETURN(0);
306 }
307 CFS_EXPORT_SYMBOL(cfs_hash_add_unique);
308
309 /**
310  * Add item @hnode to libcfs hash @hs using @key.  If this @key
311  * already exists in the hash then ops->hs_get will be called on the
312  * conflicting entry and that entry will be returned to the caller.
313  * Otherwise ops->hs_get is called on the item which was added.
314  */
315 void *
316 cfs_hash_findadd_unique(cfs_hash_t *hs, void *key,
317                         cfs_hlist_node_t *hnode)
318 {
319         cfs_hlist_node_t     *ehnode;
320         void                 *obj;
321         ENTRY;
322
323         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
324         obj = cfs_hash_get(hs, ehnode);
325         cfs_hash_put(hs, ehnode);
326         RETURN(obj);
327 }
328 CFS_EXPORT_SYMBOL(cfs_hash_findadd_unique);
329
330 /**
331  * Delete item @hnode from the libcfs hash @hs using @key.  The @key
332  * is required to ensure the correct hash bucket is locked since there
333  * is no direct linkage from the item to the bucket.  The object
334  * removed from the hash will be returned and obs->hs_put is called
335  * on the removed object.
336  */
337 void *
338 cfs_hash_del(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
339 {
340         cfs_hash_bucket_t    *hsb;
341         void                 *obj;
342         unsigned              i;
343         ENTRY;
344
345         __cfs_hash_key_validate(hs, key, hnode);
346
347         cfs_hash_rlock(hs);
348         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
349         hsb = hs->hs_buckets[i];
350         LASSERT(i <= hs->hs_cur_mask);
351         LASSERT(!cfs_hlist_unhashed(hnode));
352
353         cfs_write_lock(&hsb->hsb_rwlock);
354         obj = __cfs_hash_bucket_del(hs, hsb, hnode);
355         cfs_write_unlock(&hsb->hsb_rwlock);
356         cfs_hash_runlock(hs);
357
358         RETURN(obj);
359 }
360 CFS_EXPORT_SYMBOL(cfs_hash_del);
361
362 /**
363  * Delete item given @key in libcfs hash @hs.  The first @key found in
364  * the hash will be removed, if the key exists multiple times in the hash
365  * @hs this function must be called once per key.  The removed object
366  * will be returned and ops->hs_put is called on the removed object.
367  */
368 void *
369 cfs_hash_del_key(cfs_hash_t *hs, void *key)
370 {
371         void                 *obj = NULL;
372         cfs_hlist_node_t     *hnode;
373         cfs_hash_bucket_t    *hsb;
374         unsigned              i;
375         ENTRY;
376
377         cfs_hash_rlock(hs);
378         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
379         hsb = hs->hs_buckets[i];
380         LASSERT(i <= hs->hs_cur_mask);
381
382         cfs_write_lock(&hsb->hsb_rwlock);
383         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
384         if (hnode)
385                 obj = __cfs_hash_bucket_del(hs, hsb, hnode);
386
387         cfs_write_unlock(&hsb->hsb_rwlock);
388         cfs_hash_runlock(hs);
389
390         RETURN(obj);
391 }
392 CFS_EXPORT_SYMBOL(cfs_hash_del_key);
393
394 /**
395  * Lookup an item using @key in the libcfs hash @hs and return it.
396  * If the @key is found in the hash hs->hs_get() is called and the
397  * matching objects is returned.  It is the callers responsibility
398  * to call the counterpart ops->hs_put using the cfs_hash_put() macro
399  * when when finished with the object.  If the @key was not found
400  * in the hash @hs NULL is returned.
401  */
402 void *
403 cfs_hash_lookup(cfs_hash_t *hs, void *key)
404 {
405         void                 *obj = NULL;
406         cfs_hlist_node_t     *hnode;
407         cfs_hash_bucket_t    *hsb;
408         unsigned              i;
409         ENTRY;
410
411         cfs_hash_rlock(hs);
412         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
413         hsb = hs->hs_buckets[i];
414         LASSERT(i <= hs->hs_cur_mask);
415
416         cfs_read_lock(&hsb->hsb_rwlock);
417         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
418         if (hnode)
419                 obj = cfs_hash_get(hs, hnode);
420
421         cfs_read_unlock(&hsb->hsb_rwlock);
422         cfs_hash_runlock(hs);
423
424         RETURN(obj);
425 }
426 CFS_EXPORT_SYMBOL(cfs_hash_lookup);
427
428 /**
429  * For each item in the libcfs hash @hs call the passed callback @func
430  * and pass to it as an argument each hash item and the private @data.
431  * Before each callback ops->hs_get will be called, and after each
432  * callback ops->hs_put will be called.  Finally, during the callback
433  * the bucket lock is held so the callback must never sleep.
434  */
435 void
436 cfs_hash_for_each(cfs_hash_t *hs,
437                   cfs_hash_for_each_cb_t func, void *data)
438 {
439         cfs_hlist_node_t     *hnode;
440         cfs_hash_bucket_t    *hsb;
441         void                 *obj;
442         int                   i;
443         ENTRY;
444
445         cfs_hash_rlock(hs);
446         cfs_hash_for_each_bucket(hs, hsb, i) {
447                 cfs_read_lock(&hsb->hsb_rwlock);
448                 cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
449                         __cfs_hash_bucket_validate(hs, hsb, hnode);
450                         obj = cfs_hash_get(hs, hnode);
451                         func(obj, data);
452                         (void)cfs_hash_put(hs, hnode);
453                 }
454                 cfs_read_unlock(&hsb->hsb_rwlock);
455         }
456         cfs_hash_runlock(hs);
457
458         EXIT;
459 }
460 CFS_EXPORT_SYMBOL(cfs_hash_for_each);
461
462 /**
463  * For each item in the libcfs hash @hs call the passed callback @func
464  * and pass to it as an argument each hash item and the private @data.
465  * Before each callback ops->hs_get will be called, and after each
466  * callback ops->hs_put will be called.  During the callback the
467  * bucket lock will not be held will allows for the current item
468  * to be removed from the hash during the callback.  However, care
469  * should be taken to prevent other callers from operating on the
470  * hash concurrently or list corruption may occur.
471  */
472 void
473 cfs_hash_for_each_safe(cfs_hash_t *hs,
474                        cfs_hash_for_each_cb_t func, void *data)
475 {
476         cfs_hlist_node_t     *hnode;
477         cfs_hlist_node_t     *pos;
478         cfs_hash_bucket_t    *hsb;
479         void                 *obj;
480         int                   i;
481         ENTRY;
482
483         cfs_hash_rlock(hs);
484         cfs_hash_for_each_bucket(hs, hsb, i) {
485                 cfs_read_lock(&hsb->hsb_rwlock);
486                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
487                         __cfs_hash_bucket_validate(hs, hsb, hnode);
488                         obj = cfs_hash_get(hs, hnode);
489                         cfs_read_unlock(&hsb->hsb_rwlock);
490                         func(obj, data);
491                         cfs_read_lock(&hsb->hsb_rwlock);
492                         (void)cfs_hash_put(hs, hnode);
493                 }
494                 cfs_read_unlock(&hsb->hsb_rwlock);
495         }
496         cfs_hash_runlock(hs);
497         EXIT;
498 }
499 CFS_EXPORT_SYMBOL(cfs_hash_for_each_safe);
500
501 /**
502  * For each hash bucket in the libcfs hash @hs call the passed callback
503  * @func until all the hash buckets are empty.  The passed callback @func
504  * or the previously registered callback hs->hs_put must remove the item
505  * from the hash.  You may either use the cfs_hash_del() or hlist_del()
506  * functions.  No rwlocks will be held during the callback @func it is
507  * safe to sleep if needed.  This function will not terminate until the
508  * hash is empty.  Note it is still possible to concurrently add new
509  * items in to the hash.  It is the callers responsibility to ensure
510  * the required locking is in place to prevent concurrent insertions.
511  */
512 void
513 cfs_hash_for_each_empty(cfs_hash_t *hs,
514                         cfs_hash_for_each_cb_t func, void *data)
515 {
516         cfs_hlist_node_t     *hnode;
517         cfs_hash_bucket_t    *hsb;
518         void                 *obj;
519         int                   i;
520         ENTRY;
521
522 restart:
523         cfs_hash_rlock(hs);
524         cfs_hash_for_each_bucket(hs, hsb, i) {
525                 cfs_write_lock(&hsb->hsb_rwlock);
526                 while (!cfs_hlist_empty(&hsb->hsb_head)) {
527                         hnode =  hsb->hsb_head.first;
528                         __cfs_hash_bucket_validate(hs, hsb, hnode);
529                         obj = cfs_hash_get(hs, hnode);
530                         cfs_write_unlock(&hsb->hsb_rwlock);
531                         cfs_hash_runlock(hs);
532                         func(obj, data);
533                         (void)cfs_hash_put(hs, hnode);
534                         goto restart;
535                 }
536                 cfs_write_unlock(&hsb->hsb_rwlock);
537         }
538         cfs_hash_runlock(hs);
539         EXIT;
540 }
541 CFS_EXPORT_SYMBOL(cfs_hash_for_each_empty);
542
543 /*
544  * For each item in the libcfs hash @hs which matches the @key call
545  * the passed callback @func and pass to it as an argument each hash
546  * item and the private @data.  Before each callback ops->hs_get will
547  * be called, and after each callback ops->hs_put will be called.
548  * Finally, during the callback the bucket lock is held so the
549  * callback must never sleep.
550    */
551 void
552 cfs_hash_for_each_key(cfs_hash_t *hs, void *key,
553                       cfs_hash_for_each_cb_t func, void *data)
554 {
555         cfs_hlist_node_t     *hnode;
556         cfs_hash_bucket_t    *hsb;
557         unsigned              i;
558         ENTRY;
559
560         cfs_hash_rlock(hs);
561         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
562         hsb = hs->hs_buckets[i];
563         LASSERT(i <= hs->hs_cur_mask);
564
565         cfs_read_lock(&hsb->hsb_rwlock);
566         cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
567                 __cfs_hash_bucket_validate(hs, hsb, hnode);
568
569                 if (!cfs_hash_compare(hs, key, hnode))
570                         continue;
571
572                 func(cfs_hash_get(hs, hnode), data);
573                 (void)cfs_hash_put(hs, hnode);
574         }
575
576         cfs_read_unlock(&hsb->hsb_rwlock);
577         cfs_hash_runlock(hs);
578
579         EXIT;
580 }
581 CFS_EXPORT_SYMBOL(cfs_hash_for_each_key);
582
583 /**
584  * Rehash the libcfs hash @hs to the given @bits.  This can be used
585  * to grow the hash size when excessive chaining is detected, or to
586  * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
587  * flag is set in @hs the libcfs hash may be dynamically rehashed
588  * during addition or removal if the hash's theta value exceeds
589  * either the hs->hs_min_theta or hs->max_theta values.  By default
590  * these values are tuned to keep the chained hash depth small, and
591  * this approach assumes a reasonably uniform hashing function.  The
592  * theta thresholds for @hs are tunable via cfs_hash_set_theta().
593  */
594 int
595 cfs_hash_rehash(cfs_hash_t *hs, int bits)
596 {
597         cfs_hlist_node_t      *hnode;
598         cfs_hlist_node_t      *pos;
599         cfs_hash_bucket_t    **old_buckets;
600         cfs_hash_bucket_t    **rehash_buckets;
601         cfs_hash_bucket_t     *hs_hsb;
602         cfs_hash_bucket_t     *rehash_hsb;
603         int                    i;
604         int                    theta;
605         int                    old_mask;
606         int                    old_bits;
607         int                    new_mask = (1 << bits) - 1;
608         int                    rc = 0;
609         void                  *key;
610         ENTRY;
611
612         LASSERT(!cfs_in_interrupt());
613         LASSERT(new_mask > 0);
614         LASSERT((hs->hs_flags & CFS_HASH_REHASH) != 0);
615
616         LIBCFS_ALLOC(rehash_buckets, sizeof(*rehash_buckets) << bits);
617         if (!rehash_buckets)
618                 RETURN(-ENOMEM);
619
620         for (i = 0; i <= new_mask; i++) {
621                 CFS_ALLOC_PTR(rehash_buckets[i]);
622                 if (rehash_buckets[i] == NULL)
623                         GOTO(free, rc = -ENOMEM);
624
625                 CFS_INIT_HLIST_HEAD(&rehash_buckets[i]->hsb_head);
626                 cfs_rwlock_init(&rehash_buckets[i]->hsb_rwlock);
627                 cfs_atomic_set(&rehash_buckets[i]->hsb_count, 0);
628         }
629
630         cfs_hash_wlock(hs);
631
632         /*
633          * Early return for multiple concurrent racing callers,
634          * ensure we only trigger the rehash if it is still needed.
635          */
636         theta = __cfs_hash_theta(hs);
637         if ((theta >= hs->hs_min_theta) && (theta <= hs->hs_max_theta)) {
638                 cfs_hash_wunlock(hs);
639                 GOTO(free, rc = -EALREADY);
640         }
641
642         old_bits = hs->hs_cur_bits;
643         old_buckets = hs->hs_buckets;
644         old_mask = (1 << old_bits) - 1;
645
646         hs->hs_cur_bits = bits;
647         hs->hs_cur_mask = (1 << bits) - 1;
648         hs->hs_buckets = rehash_buckets;
649         cfs_atomic_inc(&hs->hs_rehash_count);
650
651         for (i = 0; i <= old_mask; i++) {
652                 hs_hsb = old_buckets[i];
653
654                 cfs_write_lock(&hs_hsb->hsb_rwlock);
655                 cfs_hlist_for_each_safe(hnode, pos, &(hs_hsb->hsb_head)) {
656                         key = cfs_hash_key(hs, hnode);
657                         LASSERT(key);
658
659                         /*
660                          * Validate hnode is in the correct bucket.
661                          */
662                         if (unlikely(hs->hs_flags & CFS_HASH_DEBUG))
663                                 LASSERT(cfs_hash_id(hs, key, old_mask) == i);
664
665                         /*
666                          * Delete from old hash bucket.
667                          */
668                         cfs_hlist_del(hnode);
669                         LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) > 0);
670                         cfs_atomic_dec(&hs_hsb->hsb_count);
671
672                         /*
673                          * Add to rehash bucket, ops->hs_key must be defined.
674                          */
675                         rehash_hsb = rehash_buckets[cfs_hash_id(hs, key,
676                                                                 new_mask)];
677                         cfs_hlist_add_head(hnode, &(rehash_hsb->hsb_head));
678                         cfs_atomic_inc(&rehash_hsb->hsb_count);
679                 }
680
681                 LASSERT(cfs_hlist_empty(&(hs_hsb->hsb_head)));
682                 LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) == 0);
683                 cfs_write_unlock(&hs_hsb->hsb_rwlock);
684         }
685
686         cfs_hash_wunlock(hs);
687         rehash_buckets = old_buckets;
688         i = (1 << old_bits);
689         bits = old_bits;
690  free:
691         while (--i >= 0)
692                 CFS_FREE_PTR(rehash_buckets[i]);
693         LIBCFS_FREE(rehash_buckets, sizeof(*rehash_buckets) << bits);
694         RETURN(rc);
695 }
696 CFS_EXPORT_SYMBOL(cfs_hash_rehash);
697
698 /**
699  * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
700  * @old_key must be provided to locate the objects previous location
701  * in the hash, and the @new_key will be used to reinsert the object.
702  * Use this function instead of a cfs_hash_add() + cfs_hash_del()
703  * combo when it is critical that there is no window in time where the
704  * object is missing from the hash.  When an object is being rehashed
705  * the registered cfs_hash_get() and cfs_hash_put() functions will
706  * not be called.
707  */
708 void cfs_hash_rehash_key(cfs_hash_t *hs, void *old_key, void *new_key,
709                          cfs_hlist_node_t *hnode)
710 {
711         cfs_hash_bucket_t     *old_hsb;
712         cfs_hash_bucket_t     *new_hsb;
713         unsigned               i;
714         unsigned               j;
715         ENTRY;
716
717         __cfs_hash_key_validate(hs, new_key, hnode);
718         LASSERT(!cfs_hlist_unhashed(hnode));
719
720         cfs_hash_rlock(hs);
721
722         i = cfs_hash_id(hs, old_key, hs->hs_cur_mask);
723         old_hsb = hs->hs_buckets[i];
724         LASSERT(i <= hs->hs_cur_mask);
725
726         j = cfs_hash_id(hs, new_key, hs->hs_cur_mask);
727         new_hsb = hs->hs_buckets[j];
728         LASSERT(j <= hs->hs_cur_mask);
729
730         if (i < j) { /* write_lock ordering */
731                 cfs_write_lock(&old_hsb->hsb_rwlock);
732                 cfs_write_lock(&new_hsb->hsb_rwlock);
733         } else if (i > j) {
734                 cfs_write_lock(&new_hsb->hsb_rwlock);
735                 cfs_write_lock(&old_hsb->hsb_rwlock);
736         } else { /* do nothing */
737                 cfs_read_unlock(&hs->hs_rwlock);
738                 EXIT;
739                 return;
740         }
741
742         /*
743          * Migrate item between hash buckets without calling
744          * the cfs_hash_get() and cfs_hash_put() callback functions.
745          */
746         cfs_hlist_del(hnode);
747         LASSERT(cfs_atomic_read(&old_hsb->hsb_count) > 0);
748         cfs_atomic_dec(&old_hsb->hsb_count);
749         cfs_hlist_add_head(hnode, &(new_hsb->hsb_head));
750         cfs_atomic_inc(&new_hsb->hsb_count);
751
752         cfs_write_unlock(&new_hsb->hsb_rwlock);
753         cfs_write_unlock(&old_hsb->hsb_rwlock);
754         cfs_hash_runlock(hs);
755
756         EXIT;
757 }
758 CFS_EXPORT_SYMBOL(cfs_hash_rehash_key);
759
760 int cfs_hash_debug_header(char *str, int size)
761 {
762         return snprintf(str, size,
763                  "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", CFS_MAX_HASH_NAME,
764                  "name", "cur", "min", "max", "theta", "t-min", "t-max",
765                  "flags", "rehash", "count", " distribution");
766 }
767 CFS_EXPORT_SYMBOL(cfs_hash_debug_header);
768
769 int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size)
770 {
771         cfs_hash_bucket_t     *hsb;
772         int                    theta;
773         int                    i;
774         int                    c = 0;
775         int                    dist[8] = { 0, };
776
777         if (str == NULL || size == 0)
778                 return 0;
779
780         cfs_hash_rlock(hs);
781         theta = __cfs_hash_theta(hs);
782
783         c += snprintf(str + c, size - c, "%-*s ",
784                       CFS_MAX_HASH_NAME, hs->hs_name);
785         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_cur_bits);
786         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_min_bits);
787         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_max_bits);
788         c += snprintf(str + c, size - c, "%d.%03d ",
789                       __cfs_hash_theta_int(theta),
790                       __cfs_hash_theta_frac(theta));
791         c += snprintf(str + c, size - c, "%d.%03d ",
792                       __cfs_hash_theta_int(hs->hs_min_theta),
793                       __cfs_hash_theta_frac(hs->hs_min_theta));
794         c += snprintf(str + c, size - c, "%d.%03d ",
795                       __cfs_hash_theta_int(hs->hs_max_theta),
796                       __cfs_hash_theta_frac(hs->hs_max_theta));
797         c += snprintf(str + c, size - c, " 0x%02x ", hs->hs_flags);
798         c += snprintf(str + c, size - c, "%6d ",
799                       cfs_atomic_read(&hs->hs_rehash_count));
800         c += snprintf(str + c, size - c, "%5d ",
801                       cfs_atomic_read(&hs->hs_count));
802
803         /*
804          * The distribution is a summary of the chained hash depth in
805          * each of the libcfs hash buckets.  Each buckets hsb_count is
806          * divided by the hash theta value and used to generate a
807          * histogram of the hash distribution.  A uniform hash will
808          * result in all hash buckets being close to the average thus
809          * only the first few entries in the histogram will be non-zero.
810          * If you hash function results in a non-uniform hash the will
811          * be observable by outlier bucks in the distribution histogram.
812          *
813          * Uniform hash distribution:      128/128/0/0/0/0/0/0
814          * Non-Uniform hash distribution:  128/125/0/0/0/0/2/1
815          */
816         cfs_hash_for_each_bucket(hs, hsb, i)
817                 dist[min(__cfs_fls(cfs_atomic_read(&hsb->hsb_count)/max(theta,1)),7)]++;
818
819         for (i = 0; i < 8; i++)
820                 c += snprintf(str + c, size - c, "%d%c",  dist[i],
821                               (i == 7) ? '\n' : '/');
822
823         cfs_hash_runlock(hs);
824
825         return c;
826 }
827 CFS_EXPORT_SYMBOL(cfs_hash_debug_str);