Whamcloud - gitweb
b=20953 sanity-quota test 30 fixes
[fs/lustre-release.git] / libcfs / libcfs / hash.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/hash.c
37  *
38  * Implement a hash class for hash process in lustre system.
39  *
40  * Author: YuZhangyong <yzy@clusterfs.com>
41  *
42  * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
43  * - Simplified API and improved documentation
44  * - Added per-hash feature flags:
45  *   * CFS_HASH_DEBUG additional validation
46  *   * CFS_HASH_REHASH dynamic rehashing
47  * - Added per-hash statistics
48  * - General performance enhancements
49  *
50  * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
51  * - move all stuff to libcfs
52  * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
53  * - ignore hs_rwlock if without CFS_HASH_REHASH setting
54  * - buckets are allocated one by one(intead of contiguous memory),
55  *   to avoid unnecessary cacheline conflict
56  */
57
58 #include <libcfs/libcfs.h>
59
60 static void cfs_hash_destroy(cfs_hash_t *hs);
61
62 static void
63 cfs_hash_rlock(cfs_hash_t *hs)
64 {
65         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
66                 cfs_read_lock(&hs->hs_rwlock);
67 }
68
69 static void
70 cfs_hash_runlock(cfs_hash_t *hs)
71 {
72         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
73                 cfs_read_unlock(&hs->hs_rwlock);
74 }
75
76 static void
77 cfs_hash_wlock(cfs_hash_t *hs)
78 {
79         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
80                 cfs_write_lock(&hs->hs_rwlock);
81 }
82
83 static void
84 cfs_hash_wunlock(cfs_hash_t *hs)
85 {
86         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
87                 cfs_write_unlock(&hs->hs_rwlock);
88 }
89
90 /**
91  * Initialize new libcfs hash, where:
92  * @name     - Descriptive hash name
93  * @cur_bits - Initial hash table size, in bits
94  * @max_bits - Maximum allowed hash table resize, in bits
95  * @ops      - Registered hash table operations
96  * @flags    - CFS_HASH_REHASH enable synamic hash resizing
97  *           - CFS_HASH_SORT enable chained hash sort
98  */
99 cfs_hash_t *
100 cfs_hash_create(char *name, unsigned int cur_bits,
101                 unsigned int max_bits, cfs_hash_ops_t *ops, int flags)
102 {
103         cfs_hash_t    *hs;
104         int            i;
105         ENTRY;
106
107         LASSERT(name != NULL);
108         LASSERT(ops != NULL);
109
110         LASSERT(cur_bits > 0);
111         LASSERT(max_bits >= cur_bits);
112         LASSERT(max_bits < 31);
113         LASSERT(cur_bits == max_bits || (flags & CFS_HASH_REHASH) != 0);
114
115         CFS_ALLOC_PTR(hs);
116         if (!hs)
117                 RETURN(NULL);
118
119         strncpy(hs->hs_name, name, sizeof(hs->hs_name));
120         hs->hs_name[sizeof(hs->hs_name) - 1] = '\0';
121         cfs_atomic_set(&hs->hs_rehash_count, 0);
122         cfs_atomic_set(&hs->hs_refcount, 1);
123         cfs_atomic_set(&hs->hs_count, 0);
124         cfs_rwlock_init(&hs->hs_rwlock);
125         hs->hs_cur_bits = cur_bits;
126         hs->hs_cur_mask = (1 << cur_bits) - 1;
127         hs->hs_min_bits = cur_bits;
128         hs->hs_max_bits = max_bits;
129         /* XXX: need to fixup cfs_hash_rehash_bits() before this can be
130          *      anything other than 0.5 and 2.0 */
131         hs->hs_min_theta = 1 << (CFS_HASH_THETA_BITS - 1);
132         hs->hs_max_theta = 1 << (CFS_HASH_THETA_BITS + 1);
133         hs->hs_ops = ops;
134         hs->hs_flags = flags;
135
136         /* theta * 1000 */
137         __cfs_hash_set_theta(hs, 500, 2000);
138
139         LIBCFS_ALLOC(hs->hs_buckets,
140                      sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
141         if (hs->hs_buckets == NULL) {
142                 CFS_FREE_PTR(hs);
143                 RETURN(NULL);
144         }
145
146         for (i = 0; i <= hs->hs_cur_mask; i++) {
147                 CFS_ALLOC_PTR(hs->hs_buckets[i]);
148                 if (hs->hs_buckets[i] == NULL) {
149                         cfs_hash_destroy(hs);
150                         return NULL;
151                 }
152
153                 CFS_INIT_HLIST_HEAD(&hs->hs_buckets[i]->hsb_head);
154                 cfs_rwlock_init(&hs->hs_buckets[i]->hsb_rwlock);
155                 cfs_atomic_set(&hs->hs_buckets[i]->hsb_count, 0);
156         }
157
158         return hs;
159 }
160 CFS_EXPORT_SYMBOL(cfs_hash_create);
161
162 /**
163  * Cleanup libcfs hash @hs.
164  */
165 static void
166 cfs_hash_destroy(cfs_hash_t *hs)
167 {
168         cfs_hash_bucket_t    *hsb;
169         cfs_hlist_node_t     *hnode;
170         cfs_hlist_node_t     *pos;
171         int                   i;
172         ENTRY;
173
174         LASSERT(hs != NULL);
175
176         cfs_hash_wlock(hs);
177
178         cfs_hash_for_each_bucket(hs, hsb, i) {
179                 if (hsb == NULL)
180                         continue;
181
182                 cfs_write_lock(&hsb->hsb_rwlock);
183                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
184                         __cfs_hash_bucket_validate(hs, hsb, hnode);
185                         __cfs_hash_bucket_del(hs, hsb, hnode);
186                         cfs_hash_exit(hs, hnode);
187                 }
188
189                 LASSERT(cfs_hlist_empty(&(hsb->hsb_head)));
190                 LASSERT(cfs_atomic_read(&hsb->hsb_count) == 0);
191                 cfs_write_unlock(&hsb->hsb_rwlock);
192                 CFS_FREE_PTR(hsb);
193         }
194
195         LASSERT(cfs_atomic_read(&hs->hs_count) == 0);
196         cfs_hash_wunlock(hs);
197
198         LIBCFS_FREE(hs->hs_buckets,
199                     sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
200         CFS_FREE_PTR(hs);
201         EXIT;
202 }
203
204 cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs)
205 {
206         if (cfs_atomic_inc_not_zero(&hs->hs_refcount))
207                 return hs;
208         return NULL;
209 }
210 CFS_EXPORT_SYMBOL(cfs_hash_getref);
211
212 void cfs_hash_putref(cfs_hash_t *hs)
213 {
214         if (cfs_atomic_dec_and_test(&hs->hs_refcount))
215                 cfs_hash_destroy(hs);
216 }
217 CFS_EXPORT_SYMBOL(cfs_hash_putref);
218
219 static inline unsigned int
220 cfs_hash_rehash_bits(cfs_hash_t *hs)
221 {
222         if (!(hs->hs_flags & CFS_HASH_REHASH))
223                 return 0;
224
225         /* XXX: need to handle case with max_theta != 2.0
226          *      and the case with min_theta != 0.5 */
227         if ((hs->hs_cur_bits < hs->hs_max_bits) &&
228             (__cfs_hash_theta(hs) > hs->hs_max_theta))
229                 return hs->hs_cur_bits + 1;
230
231         if ((hs->hs_cur_bits > hs->hs_min_bits) &&
232             (__cfs_hash_theta(hs) < hs->hs_min_theta))
233                 return hs->hs_cur_bits - 1;
234
235         return 0;
236 }
237
238 /**
239  * Add item @hnode to libcfs hash @hs using @key.  The registered
240  * ops->hs_get function will be called when the item is added.
241  */
242 void
243 cfs_hash_add(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
244 {
245         cfs_hash_bucket_t    *hsb;
246         int                   bits;
247         unsigned              i;
248         ENTRY;
249
250         __cfs_hash_key_validate(hs, key, hnode);
251
252         cfs_hash_rlock(hs);
253         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
254         hsb = hs->hs_buckets[i];
255         LASSERT(i <= hs->hs_cur_mask);
256         LASSERT(cfs_hlist_unhashed(hnode));
257
258         cfs_write_lock(&hsb->hsb_rwlock);
259         __cfs_hash_bucket_add(hs, hsb, hnode);
260         cfs_write_unlock(&hsb->hsb_rwlock);
261
262         bits = cfs_hash_rehash_bits(hs);
263         cfs_hash_runlock(hs);
264         if (bits)
265                 cfs_hash_rehash(hs, bits);
266
267         EXIT;
268 }
269 CFS_EXPORT_SYMBOL(cfs_hash_add);
270
271 static cfs_hlist_node_t *
272 cfs_hash_findadd_unique_hnode(cfs_hash_t *hs, void *key,
273                               cfs_hlist_node_t *hnode)
274 {
275         int                   bits = 0;
276         cfs_hlist_node_t     *ehnode;
277         cfs_hash_bucket_t    *hsb;
278         unsigned              i;
279         ENTRY;
280
281         __cfs_hash_key_validate(hs, key, hnode);
282
283         cfs_hash_rlock(hs);
284         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
285         hsb = hs->hs_buckets[i];
286         LASSERT(i <= hs->hs_cur_mask);
287         LASSERT(cfs_hlist_unhashed(hnode));
288
289         cfs_write_lock(&hsb->hsb_rwlock);
290         ehnode = __cfs_hash_bucket_lookup(hs, hsb, key);
291         if (ehnode) {
292                 cfs_hash_get(hs, ehnode);
293         } else {
294                 __cfs_hash_bucket_add(hs, hsb, hnode);
295                 ehnode = hnode;
296                 bits = cfs_hash_rehash_bits(hs);
297         }
298         cfs_write_unlock(&hsb->hsb_rwlock);
299         cfs_hash_runlock(hs);
300         if (bits)
301                 cfs_hash_rehash(hs, bits);
302
303         RETURN(ehnode);
304 }
305
306 /**
307  * Add item @hnode to libcfs hash @hs using @key.  The registered
308  * ops->hs_get function will be called if the item was added.
309  * Returns 0 on success or -EALREADY on key collisions.
310  */
311 int
312 cfs_hash_add_unique(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
313 {
314         cfs_hlist_node_t    *ehnode;
315         ENTRY;
316
317         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
318         if (ehnode != hnode) {
319                 cfs_hash_put(hs, ehnode);
320                 RETURN(-EALREADY);
321         }
322         RETURN(0);
323 }
324 CFS_EXPORT_SYMBOL(cfs_hash_add_unique);
325
326 /**
327  * Add item @hnode to libcfs hash @hs using @key.  If this @key
328  * already exists in the hash then ops->hs_get will be called on the
329  * conflicting entry and that entry will be returned to the caller.
330  * Otherwise ops->hs_get is called on the item which was added.
331  */
332 void *
333 cfs_hash_findadd_unique(cfs_hash_t *hs, void *key,
334                         cfs_hlist_node_t *hnode)
335 {
336         cfs_hlist_node_t     *ehnode;
337         void                 *obj;
338         ENTRY;
339
340         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
341         obj = cfs_hash_get(hs, ehnode);
342         cfs_hash_put(hs, ehnode);
343         RETURN(obj);
344 }
345 CFS_EXPORT_SYMBOL(cfs_hash_findadd_unique);
346
347 /**
348  * Delete item @hnode from the libcfs hash @hs using @key.  The @key
349  * is required to ensure the correct hash bucket is locked since there
350  * is no direct linkage from the item to the bucket.  The object
351  * removed from the hash will be returned and obs->hs_put is called
352  * on the removed object.
353  */
354 void *
355 cfs_hash_del(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
356 {
357         cfs_hash_bucket_t    *hsb;
358         void                 *obj;
359         unsigned              i;
360         ENTRY;
361
362         __cfs_hash_key_validate(hs, key, hnode);
363
364         cfs_hash_rlock(hs);
365         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
366         hsb = hs->hs_buckets[i];
367         LASSERT(i <= hs->hs_cur_mask);
368         LASSERT(!cfs_hlist_unhashed(hnode));
369
370         cfs_write_lock(&hsb->hsb_rwlock);
371         obj = __cfs_hash_bucket_del(hs, hsb, hnode);
372         cfs_write_unlock(&hsb->hsb_rwlock);
373         cfs_hash_runlock(hs);
374
375         RETURN(obj);
376 }
377 CFS_EXPORT_SYMBOL(cfs_hash_del);
378
379 /**
380  * Delete item given @key in libcfs hash @hs.  The first @key found in
381  * the hash will be removed, if the key exists multiple times in the hash
382  * @hs this function must be called once per key.  The removed object
383  * will be returned and ops->hs_put is called on the removed object.
384  */
385 void *
386 cfs_hash_del_key(cfs_hash_t *hs, void *key)
387 {
388         void                 *obj = NULL;
389         cfs_hlist_node_t     *hnode;
390         cfs_hash_bucket_t    *hsb;
391         unsigned              i;
392         ENTRY;
393
394         cfs_hash_rlock(hs);
395         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
396         hsb = hs->hs_buckets[i];
397         LASSERT(i <= hs->hs_cur_mask);
398
399         cfs_write_lock(&hsb->hsb_rwlock);
400         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
401         if (hnode)
402                 obj = __cfs_hash_bucket_del(hs, hsb, hnode);
403
404         cfs_write_unlock(&hsb->hsb_rwlock);
405         cfs_hash_runlock(hs);
406
407         RETURN(obj);
408 }
409 CFS_EXPORT_SYMBOL(cfs_hash_del_key);
410
411 /**
412  * Lookup an item using @key in the libcfs hash @hs and return it.
413  * If the @key is found in the hash hs->hs_get() is called and the
414  * matching objects is returned.  It is the callers responsibility
415  * to call the counterpart ops->hs_put using the cfs_hash_put() macro
416  * when when finished with the object.  If the @key was not found
417  * in the hash @hs NULL is returned.
418  */
419 void *
420 cfs_hash_lookup(cfs_hash_t *hs, void *key)
421 {
422         void                 *obj = NULL;
423         cfs_hlist_node_t     *hnode;
424         cfs_hash_bucket_t    *hsb;
425         unsigned              i;
426         ENTRY;
427
428         cfs_hash_rlock(hs);
429         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
430         hsb = hs->hs_buckets[i];
431         LASSERT(i <= hs->hs_cur_mask);
432
433         cfs_read_lock(&hsb->hsb_rwlock);
434         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
435         if (hnode)
436                 obj = cfs_hash_get(hs, hnode);
437
438         cfs_read_unlock(&hsb->hsb_rwlock);
439         cfs_hash_runlock(hs);
440
441         RETURN(obj);
442 }
443 CFS_EXPORT_SYMBOL(cfs_hash_lookup);
444
445 /**
446  * For each item in the libcfs hash @hs call the passed callback @func
447  * and pass to it as an argument each hash item and the private @data.
448  * Before each callback ops->hs_get will be called, and after each
449  * callback ops->hs_put will be called.  Finally, during the callback
450  * the bucket lock is held so the callback must never sleep.
451  */
452 void
453 cfs_hash_for_each(cfs_hash_t *hs,
454                   cfs_hash_for_each_cb_t func, void *data)
455 {
456         cfs_hlist_node_t     *hnode;
457         cfs_hash_bucket_t    *hsb;
458         void                 *obj;
459         int                   i;
460         ENTRY;
461
462         cfs_hash_rlock(hs);
463         cfs_hash_for_each_bucket(hs, hsb, i) {
464                 cfs_read_lock(&hsb->hsb_rwlock);
465                 cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
466                         __cfs_hash_bucket_validate(hs, hsb, hnode);
467                         obj = cfs_hash_get(hs, hnode);
468                         func(obj, data);
469                         (void)cfs_hash_put(hs, hnode);
470                 }
471                 cfs_read_unlock(&hsb->hsb_rwlock);
472         }
473         cfs_hash_runlock(hs);
474
475         EXIT;
476 }
477 CFS_EXPORT_SYMBOL(cfs_hash_for_each);
478
479 /**
480  * For each item in the libcfs hash @hs call the passed callback @func
481  * and pass to it as an argument each hash item and the private @data.
482  * Before each callback ops->hs_get will be called, and after each
483  * callback ops->hs_put will be called.  During the callback the
484  * bucket lock will not be held will allows for the current item
485  * to be removed from the hash during the callback.  However, care
486  * should be taken to prevent other callers from operating on the
487  * hash concurrently or list corruption may occur.
488  */
489 void
490 cfs_hash_for_each_safe(cfs_hash_t *hs,
491                        cfs_hash_for_each_cb_t func, void *data)
492 {
493         cfs_hlist_node_t     *hnode;
494         cfs_hlist_node_t     *pos;
495         cfs_hash_bucket_t    *hsb;
496         void                 *obj;
497         int                   i;
498         ENTRY;
499
500         cfs_hash_rlock(hs);
501         cfs_hash_for_each_bucket(hs, hsb, i) {
502                 cfs_read_lock(&hsb->hsb_rwlock);
503                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
504                         __cfs_hash_bucket_validate(hs, hsb, hnode);
505                         obj = cfs_hash_get(hs, hnode);
506                         cfs_read_unlock(&hsb->hsb_rwlock);
507                         func(obj, data);
508                         cfs_read_lock(&hsb->hsb_rwlock);
509                         (void)cfs_hash_put(hs, hnode);
510                 }
511                 cfs_read_unlock(&hsb->hsb_rwlock);
512         }
513         cfs_hash_runlock(hs);
514         EXIT;
515 }
516 CFS_EXPORT_SYMBOL(cfs_hash_for_each_safe);
517
518 /**
519  * For each hash bucket in the libcfs hash @hs call the passed callback
520  * @func until all the hash buckets are empty.  The passed callback @func
521  * or the previously registered callback hs->hs_put must remove the item
522  * from the hash.  You may either use the cfs_hash_del() or hlist_del()
523  * functions.  No rwlocks will be held during the callback @func it is
524  * safe to sleep if needed.  This function will not terminate until the
525  * hash is empty.  Note it is still possible to concurrently add new
526  * items in to the hash.  It is the callers responsibility to ensure
527  * the required locking is in place to prevent concurrent insertions.
528  */
529 void
530 cfs_hash_for_each_empty(cfs_hash_t *hs,
531                         cfs_hash_for_each_cb_t func, void *data)
532 {
533         cfs_hlist_node_t     *hnode;
534         cfs_hash_bucket_t    *hsb;
535         cfs_hash_bucket_t    **hsb_last = NULL;
536         void                 *obj;
537         int                   i = 0;
538         ENTRY;
539
540 restart:
541         cfs_hash_rlock(hs);
542         /* If the hash table has changed since we last held lh_rwlock,
543          * we need to start traversing the list from the start. */
544         if (hs->hs_buckets != hsb_last) {
545                 i = 0;
546                 hsb_last = hs->hs_buckets;
547         }
548         cfs_hash_for_each_bucket_restart(hs, hsb, i) {
549                 cfs_write_lock(&hsb->hsb_rwlock);
550                 while (!cfs_hlist_empty(&hsb->hsb_head)) {
551                         hnode =  hsb->hsb_head.first;
552                         __cfs_hash_bucket_validate(hs, hsb, hnode);
553                         obj = cfs_hash_get(hs, hnode);
554                         cfs_write_unlock(&hsb->hsb_rwlock);
555                         cfs_hash_runlock(hs);
556                         func(obj, data);
557                         (void)cfs_hash_put(hs, hnode);
558                         cfs_cond_resched();
559                         goto restart;
560                 }
561                 cfs_write_unlock(&hsb->hsb_rwlock);
562         }
563         cfs_hash_runlock(hs);
564         EXIT;
565 }
566 CFS_EXPORT_SYMBOL(cfs_hash_for_each_empty);
567
568 /*
569  * For each item in the libcfs hash @hs which matches the @key call
570  * the passed callback @func and pass to it as an argument each hash
571  * item and the private @data.  Before each callback ops->hs_get will
572  * be called, and after each callback ops->hs_put will be called.
573  * Finally, during the callback the bucket lock is held so the
574  * callback must never sleep.
575    */
576 void
577 cfs_hash_for_each_key(cfs_hash_t *hs, void *key,
578                       cfs_hash_for_each_cb_t func, void *data)
579 {
580         cfs_hlist_node_t     *hnode;
581         cfs_hash_bucket_t    *hsb;
582         unsigned              i;
583         ENTRY;
584
585         cfs_hash_rlock(hs);
586         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
587         hsb = hs->hs_buckets[i];
588         LASSERT(i <= hs->hs_cur_mask);
589
590         cfs_read_lock(&hsb->hsb_rwlock);
591         cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
592                 __cfs_hash_bucket_validate(hs, hsb, hnode);
593
594                 if (!cfs_hash_compare(hs, key, hnode))
595                         continue;
596
597                 func(cfs_hash_get(hs, hnode), data);
598                 (void)cfs_hash_put(hs, hnode);
599         }
600
601         cfs_read_unlock(&hsb->hsb_rwlock);
602         cfs_hash_runlock(hs);
603
604         EXIT;
605 }
606 CFS_EXPORT_SYMBOL(cfs_hash_for_each_key);
607
608 /**
609  * Rehash the libcfs hash @hs to the given @bits.  This can be used
610  * to grow the hash size when excessive chaining is detected, or to
611  * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
612  * flag is set in @hs the libcfs hash may be dynamically rehashed
613  * during addition or removal if the hash's theta value exceeds
614  * either the hs->hs_min_theta or hs->max_theta values.  By default
615  * these values are tuned to keep the chained hash depth small, and
616  * this approach assumes a reasonably uniform hashing function.  The
617  * theta thresholds for @hs are tunable via cfs_hash_set_theta().
618  */
619 int
620 cfs_hash_rehash(cfs_hash_t *hs, int bits)
621 {
622         cfs_hlist_node_t      *hnode;
623         cfs_hlist_node_t      *pos;
624         cfs_hash_bucket_t    **old_buckets;
625         cfs_hash_bucket_t    **rehash_buckets;
626         cfs_hash_bucket_t     *hs_hsb;
627         cfs_hash_bucket_t     *rehash_hsb;
628         int                    i;
629         int                    theta;
630         int                    old_mask;
631         int                    old_bits;
632         int                    new_mask = (1 << bits) - 1;
633         int                    rc = 0;
634         void                  *key;
635         ENTRY;
636
637         LASSERT(!cfs_in_interrupt());
638         LASSERT(new_mask > 0);
639         LASSERT((hs->hs_flags & CFS_HASH_REHASH) != 0);
640
641         LIBCFS_ALLOC(rehash_buckets, sizeof(*rehash_buckets) << bits);
642         if (!rehash_buckets)
643                 RETURN(-ENOMEM);
644
645         for (i = 0; i <= new_mask; i++) {
646                 CFS_ALLOC_PTR(rehash_buckets[i]);
647                 if (rehash_buckets[i] == NULL)
648                         GOTO(free, rc = -ENOMEM);
649
650                 CFS_INIT_HLIST_HEAD(&rehash_buckets[i]->hsb_head);
651                 cfs_rwlock_init(&rehash_buckets[i]->hsb_rwlock);
652                 cfs_atomic_set(&rehash_buckets[i]->hsb_count, 0);
653         }
654
655         cfs_hash_wlock(hs);
656
657         /*
658          * Early return for multiple concurrent racing callers,
659          * ensure we only trigger the rehash if it is still needed.
660          */
661         theta = __cfs_hash_theta(hs);
662         if ((theta >= hs->hs_min_theta) && (theta <= hs->hs_max_theta)) {
663                 cfs_hash_wunlock(hs);
664                 GOTO(free, rc = -EALREADY);
665         }
666
667         old_bits = hs->hs_cur_bits;
668         old_buckets = hs->hs_buckets;
669         old_mask = (1 << old_bits) - 1;
670
671         hs->hs_cur_bits = bits;
672         hs->hs_cur_mask = (1 << bits) - 1;
673         hs->hs_buckets = rehash_buckets;
674         cfs_atomic_inc(&hs->hs_rehash_count);
675
676         for (i = 0; i <= old_mask; i++) {
677                 hs_hsb = old_buckets[i];
678
679                 cfs_write_lock(&hs_hsb->hsb_rwlock);
680                 cfs_hlist_for_each_safe(hnode, pos, &(hs_hsb->hsb_head)) {
681                         key = cfs_hash_key(hs, hnode);
682                         LASSERT(key);
683
684                         /*
685                          * Validate hnode is in the correct bucket.
686                          */
687                         if (unlikely(hs->hs_flags & CFS_HASH_DEBUG))
688                                 LASSERT(cfs_hash_id(hs, key, old_mask) == i);
689
690                         /*
691                          * Delete from old hash bucket.
692                          */
693                         cfs_hlist_del(hnode);
694                         LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) > 0);
695                         cfs_atomic_dec(&hs_hsb->hsb_count);
696
697                         /*
698                          * Add to rehash bucket, ops->hs_key must be defined.
699                          */
700                         rehash_hsb = rehash_buckets[cfs_hash_id(hs, key,
701                                                                 new_mask)];
702                         cfs_hlist_add_head(hnode, &(rehash_hsb->hsb_head));
703                         cfs_atomic_inc(&rehash_hsb->hsb_count);
704                 }
705
706                 LASSERT(cfs_hlist_empty(&(hs_hsb->hsb_head)));
707                 LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) == 0);
708                 cfs_write_unlock(&hs_hsb->hsb_rwlock);
709         }
710
711         cfs_hash_wunlock(hs);
712         rehash_buckets = old_buckets;
713         i = (1 << old_bits);
714         bits = old_bits;
715  free:
716         while (--i >= 0)
717                 CFS_FREE_PTR(rehash_buckets[i]);
718         LIBCFS_FREE(rehash_buckets, sizeof(*rehash_buckets) << bits);
719         RETURN(rc);
720 }
721 CFS_EXPORT_SYMBOL(cfs_hash_rehash);
722
723 /**
724  * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
725  * @old_key must be provided to locate the objects previous location
726  * in the hash, and the @new_key will be used to reinsert the object.
727  * Use this function instead of a cfs_hash_add() + cfs_hash_del()
728  * combo when it is critical that there is no window in time where the
729  * object is missing from the hash.  When an object is being rehashed
730  * the registered cfs_hash_get() and cfs_hash_put() functions will
731  * not be called.
732  */
733 void cfs_hash_rehash_key(cfs_hash_t *hs, void *old_key, void *new_key,
734                          cfs_hlist_node_t *hnode)
735 {
736         cfs_hash_bucket_t     *old_hsb;
737         cfs_hash_bucket_t     *new_hsb;
738         unsigned               i;
739         unsigned               j;
740         ENTRY;
741
742         __cfs_hash_key_validate(hs, new_key, hnode);
743         LASSERT(!cfs_hlist_unhashed(hnode));
744
745         cfs_hash_rlock(hs);
746
747         i = cfs_hash_id(hs, old_key, hs->hs_cur_mask);
748         old_hsb = hs->hs_buckets[i];
749         LASSERT(i <= hs->hs_cur_mask);
750
751         j = cfs_hash_id(hs, new_key, hs->hs_cur_mask);
752         new_hsb = hs->hs_buckets[j];
753         LASSERT(j <= hs->hs_cur_mask);
754
755         if (i < j) { /* write_lock ordering */
756                 cfs_write_lock(&old_hsb->hsb_rwlock);
757                 cfs_write_lock(&new_hsb->hsb_rwlock);
758         } else if (i > j) {
759                 cfs_write_lock(&new_hsb->hsb_rwlock);
760                 cfs_write_lock(&old_hsb->hsb_rwlock);
761         } else { /* do nothing */
762                 cfs_read_unlock(&hs->hs_rwlock);
763                 EXIT;
764                 return;
765         }
766
767         /*
768          * Migrate item between hash buckets without calling
769          * the cfs_hash_get() and cfs_hash_put() callback functions.
770          */
771         cfs_hlist_del(hnode);
772         LASSERT(cfs_atomic_read(&old_hsb->hsb_count) > 0);
773         cfs_atomic_dec(&old_hsb->hsb_count);
774         cfs_hlist_add_head(hnode, &(new_hsb->hsb_head));
775         cfs_atomic_inc(&new_hsb->hsb_count);
776
777         cfs_write_unlock(&new_hsb->hsb_rwlock);
778         cfs_write_unlock(&old_hsb->hsb_rwlock);
779         cfs_hash_runlock(hs);
780
781         EXIT;
782 }
783 CFS_EXPORT_SYMBOL(cfs_hash_rehash_key);
784
785 int cfs_hash_debug_header(char *str, int size)
786 {
787         return snprintf(str, size,
788                  "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", CFS_MAX_HASH_NAME,
789                  "name", "cur", "min", "max", "theta", "t-min", "t-max",
790                  "flags", "rehash", "count", " distribution");
791 }
792 CFS_EXPORT_SYMBOL(cfs_hash_debug_header);
793
794 int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size)
795 {
796         cfs_hash_bucket_t     *hsb;
797         int                    theta;
798         int                    i;
799         int                    c = 0;
800         int                    dist[8] = { 0, };
801
802         if (str == NULL || size == 0)
803                 return 0;
804
805         cfs_hash_rlock(hs);
806         theta = __cfs_hash_theta(hs);
807
808         c += snprintf(str + c, size - c, "%-*s ",
809                       CFS_MAX_HASH_NAME, hs->hs_name);
810         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_cur_bits);
811         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_min_bits);
812         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_max_bits);
813         c += snprintf(str + c, size - c, "%d.%03d ",
814                       __cfs_hash_theta_int(theta),
815                       __cfs_hash_theta_frac(theta));
816         c += snprintf(str + c, size - c, "%d.%03d ",
817                       __cfs_hash_theta_int(hs->hs_min_theta),
818                       __cfs_hash_theta_frac(hs->hs_min_theta));
819         c += snprintf(str + c, size - c, "%d.%03d ",
820                       __cfs_hash_theta_int(hs->hs_max_theta),
821                       __cfs_hash_theta_frac(hs->hs_max_theta));
822         c += snprintf(str + c, size - c, " 0x%02x ", hs->hs_flags);
823         c += snprintf(str + c, size - c, "%6d ",
824                       cfs_atomic_read(&hs->hs_rehash_count));
825         c += snprintf(str + c, size - c, "%5d ",
826                       cfs_atomic_read(&hs->hs_count));
827
828         /*
829          * The distribution is a summary of the chained hash depth in
830          * each of the libcfs hash buckets.  Each buckets hsb_count is
831          * divided by the hash theta value and used to generate a
832          * histogram of the hash distribution.  A uniform hash will
833          * result in all hash buckets being close to the average thus
834          * only the first few entries in the histogram will be non-zero.
835          * If you hash function results in a non-uniform hash the will
836          * be observable by outlier bucks in the distribution histogram.
837          *
838          * Uniform hash distribution:      128/128/0/0/0/0/0/0
839          * Non-Uniform hash distribution:  128/125/0/0/0/0/2/1
840          */
841         cfs_hash_for_each_bucket(hs, hsb, i)
842                 dist[min(__cfs_fls(cfs_atomic_read(&hsb->hsb_count)/max(theta,1)),7)]++;
843
844         for (i = 0; i < 8; i++)
845                 c += snprintf(str + c, size - c, "%d%c",  dist[i],
846                               (i == 7) ? '\n' : '/');
847
848         cfs_hash_runlock(hs);
849
850         return c;
851 }
852 CFS_EXPORT_SYMBOL(cfs_hash_debug_str);