Whamcloud - gitweb
b=22598 osd thandle usage counters
[fs/lustre-release.git] / libcfs / libcfs / hash.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/hash.c
37  *
38  * Implement a hash class for hash process in lustre system.
39  *
40  * Author: YuZhangyong <yzy@clusterfs.com>
41  *
42  * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
43  * - Simplified API and improved documentation
44  * - Added per-hash feature flags:
45  *   * CFS_HASH_DEBUG additional validation
46  *   * CFS_HASH_REHASH dynamic rehashing
47  * - Added per-hash statistics
48  * - General performance enhancements
49  *
50  * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
51  * - move all stuff to libcfs
52  * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
53  * - ignore hs_rwlock if without CFS_HASH_REHASH setting
54  * - buckets are allocated one by one(intead of contiguous memory),
55  *   to avoid unnecessary cacheline conflict
56  */
57
58 #include <libcfs/libcfs.h>
59
60 static void cfs_hash_destroy(cfs_hash_t *hs);
61
62 static void
63 cfs_hash_rlock(cfs_hash_t *hs)
64 {
65         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
66                 cfs_read_lock(&hs->hs_rwlock);
67 }
68
69 static void
70 cfs_hash_runlock(cfs_hash_t *hs)
71 {
72         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
73                 cfs_read_unlock(&hs->hs_rwlock);
74 }
75
76 static void
77 cfs_hash_wlock(cfs_hash_t *hs)
78 {
79         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
80                 cfs_write_lock(&hs->hs_rwlock);
81 }
82
83 static void
84 cfs_hash_wunlock(cfs_hash_t *hs)
85 {
86         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
87                 cfs_write_unlock(&hs->hs_rwlock);
88 }
89
90 /**
91  * Initialize new libcfs hash, where:
92  * @name     - Descriptive hash name
93  * @cur_bits - Initial hash table size, in bits
94  * @max_bits - Maximum allowed hash table resize, in bits
95  * @ops      - Registered hash table operations
96  * @flags    - CFS_HASH_REHASH enable synamic hash resizing
97  *           - CFS_HASH_SORT enable chained hash sort
98  */
99 cfs_hash_t *
100 cfs_hash_create(char *name, unsigned int cur_bits,
101                 unsigned int max_bits, cfs_hash_ops_t *ops, int flags)
102 {
103         cfs_hash_t    *hs;
104         int            i;
105         ENTRY;
106
107         LASSERT(name != NULL);
108         LASSERT(ops != NULL);
109         /* The following ops are required for all hash table types */
110         LASSERT(ops->hs_hash != NULL);
111         LASSERT(ops->hs_key != NULL);
112         LASSERT(ops->hs_compare != NULL);
113         LASSERT(ops->hs_get != NULL);
114         LASSERT(ops->hs_put != NULL);
115
116         LASSERT(cur_bits > 0);
117         LASSERT(max_bits >= cur_bits);
118         LASSERT(max_bits < 31);
119         LASSERT(cur_bits == max_bits || (flags & CFS_HASH_REHASH) != 0);
120
121         CFS_ALLOC_PTR(hs);
122         if (!hs)
123                 RETURN(NULL);
124
125         strncpy(hs->hs_name, name, sizeof(hs->hs_name));
126         hs->hs_name[sizeof(hs->hs_name) - 1] = '\0';
127         cfs_atomic_set(&hs->hs_rehash_count, 0);
128         cfs_atomic_set(&hs->hs_refcount, 1);
129         cfs_atomic_set(&hs->hs_count, 0);
130         cfs_rwlock_init(&hs->hs_rwlock);
131         hs->hs_cur_bits = cur_bits;
132         hs->hs_cur_mask = (1 << cur_bits) - 1;
133         hs->hs_min_bits = cur_bits;
134         hs->hs_max_bits = max_bits;
135         /* XXX: need to fixup cfs_hash_rehash_bits() before this can be
136          *      anything other than 0.5 and 2.0 */
137         hs->hs_min_theta = 1 << (CFS_HASH_THETA_BITS - 1);
138         hs->hs_max_theta = 1 << (CFS_HASH_THETA_BITS + 1);
139         hs->hs_ops = ops;
140         hs->hs_flags = flags;
141
142         /* theta * 1000 */
143         __cfs_hash_set_theta(hs, 500, 2000);
144
145         LIBCFS_ALLOC(hs->hs_buckets,
146                      sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
147         if (hs->hs_buckets == NULL) {
148                 CFS_FREE_PTR(hs);
149                 RETURN(NULL);
150         }
151
152         for (i = 0; i <= hs->hs_cur_mask; i++) {
153                 CFS_ALLOC_PTR(hs->hs_buckets[i]);
154                 if (hs->hs_buckets[i] == NULL) {
155                         cfs_hash_destroy(hs);
156                         return NULL;
157                 }
158
159                 CFS_INIT_HLIST_HEAD(&hs->hs_buckets[i]->hsb_head);
160                 cfs_rwlock_init(&hs->hs_buckets[i]->hsb_rwlock);
161                 cfs_atomic_set(&hs->hs_buckets[i]->hsb_count, 0);
162         }
163
164         return hs;
165 }
166 CFS_EXPORT_SYMBOL(cfs_hash_create);
167
168 /**
169  * Cleanup libcfs hash @hs.
170  */
171 static void
172 cfs_hash_destroy(cfs_hash_t *hs)
173 {
174         cfs_hash_bucket_t    *hsb;
175         cfs_hlist_node_t     *hnode;
176         cfs_hlist_node_t     *pos;
177         int                   i;
178         ENTRY;
179
180         LASSERT(hs != NULL);
181
182         cfs_hash_wlock(hs);
183
184         cfs_hash_for_each_bucket(hs, hsb, i) {
185                 if (hsb == NULL)
186                         continue;
187
188                 cfs_write_lock(&hsb->hsb_rwlock);
189                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
190                         __cfs_hash_bucket_validate(hs, hsb, hnode);
191                         __cfs_hash_bucket_del(hs, hsb, hnode);
192                         cfs_hash_exit(hs, hnode);
193                 }
194
195                 LASSERT(cfs_hlist_empty(&(hsb->hsb_head)));
196                 LASSERT(cfs_atomic_read(&hsb->hsb_count) == 0);
197                 cfs_write_unlock(&hsb->hsb_rwlock);
198                 CFS_FREE_PTR(hsb);
199         }
200
201         LASSERT(cfs_atomic_read(&hs->hs_count) == 0);
202         cfs_hash_wunlock(hs);
203
204         LIBCFS_FREE(hs->hs_buckets,
205                     sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
206         CFS_FREE_PTR(hs);
207         EXIT;
208 }
209
210 cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs)
211 {
212         if (cfs_atomic_inc_not_zero(&hs->hs_refcount))
213                 return hs;
214         return NULL;
215 }
216 CFS_EXPORT_SYMBOL(cfs_hash_getref);
217
218 void cfs_hash_putref(cfs_hash_t *hs)
219 {
220         if (cfs_atomic_dec_and_test(&hs->hs_refcount))
221                 cfs_hash_destroy(hs);
222 }
223 CFS_EXPORT_SYMBOL(cfs_hash_putref);
224
225 static inline unsigned int
226 cfs_hash_rehash_bits(cfs_hash_t *hs)
227 {
228         if (!(hs->hs_flags & CFS_HASH_REHASH))
229                 return 0;
230
231         /* XXX: need to handle case with max_theta != 2.0
232          *      and the case with min_theta != 0.5 */
233         if ((hs->hs_cur_bits < hs->hs_max_bits) &&
234             (__cfs_hash_theta(hs) > hs->hs_max_theta))
235                 return hs->hs_cur_bits + 1;
236
237         if ((hs->hs_cur_bits > hs->hs_min_bits) &&
238             (__cfs_hash_theta(hs) < hs->hs_min_theta))
239                 return hs->hs_cur_bits - 1;
240
241         return 0;
242 }
243
244 /**
245  * Add item @hnode to libcfs hash @hs using @key.  The registered
246  * ops->hs_get function will be called when the item is added.
247  */
248 void
249 cfs_hash_add(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
250 {
251         cfs_hash_bucket_t    *hsb;
252         int                   bits;
253         unsigned              i;
254         ENTRY;
255
256         __cfs_hash_key_validate(hs, key, hnode);
257
258         cfs_hash_rlock(hs);
259         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
260         hsb = hs->hs_buckets[i];
261         LASSERT(i <= hs->hs_cur_mask);
262         LASSERT(cfs_hlist_unhashed(hnode));
263
264         cfs_write_lock(&hsb->hsb_rwlock);
265         __cfs_hash_bucket_add(hs, hsb, hnode);
266         cfs_write_unlock(&hsb->hsb_rwlock);
267
268         bits = cfs_hash_rehash_bits(hs);
269         cfs_hash_runlock(hs);
270         if (bits)
271                 cfs_hash_rehash(hs, bits);
272
273         EXIT;
274 }
275 CFS_EXPORT_SYMBOL(cfs_hash_add);
276
277 static cfs_hlist_node_t *
278 cfs_hash_findadd_unique_hnode(cfs_hash_t *hs, void *key,
279                               cfs_hlist_node_t *hnode)
280 {
281         int                   bits = 0;
282         cfs_hlist_node_t     *ehnode;
283         cfs_hash_bucket_t    *hsb;
284         unsigned              i;
285         ENTRY;
286
287         __cfs_hash_key_validate(hs, key, hnode);
288
289         cfs_hash_rlock(hs);
290         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
291         hsb = hs->hs_buckets[i];
292         LASSERT(i <= hs->hs_cur_mask);
293         LASSERT(cfs_hlist_unhashed(hnode));
294
295         cfs_write_lock(&hsb->hsb_rwlock);
296         ehnode = __cfs_hash_bucket_lookup(hs, hsb, key);
297         if (ehnode) {
298                 cfs_hash_get(hs, ehnode);
299         } else {
300                 __cfs_hash_bucket_add(hs, hsb, hnode);
301                 ehnode = hnode;
302                 bits = cfs_hash_rehash_bits(hs);
303         }
304         cfs_write_unlock(&hsb->hsb_rwlock);
305         cfs_hash_runlock(hs);
306         if (bits)
307                 cfs_hash_rehash(hs, bits);
308
309         RETURN(ehnode);
310 }
311
312 /**
313  * Add item @hnode to libcfs hash @hs using @key.  The registered
314  * ops->hs_get function will be called if the item was added.
315  * Returns 0 on success or -EALREADY on key collisions.
316  */
317 int
318 cfs_hash_add_unique(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
319 {
320         cfs_hlist_node_t    *ehnode;
321         ENTRY;
322
323         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
324         if (ehnode != hnode) {
325                 cfs_hash_put(hs, ehnode);
326                 RETURN(-EALREADY);
327         }
328         RETURN(0);
329 }
330 CFS_EXPORT_SYMBOL(cfs_hash_add_unique);
331
332 /**
333  * Add item @hnode to libcfs hash @hs using @key.  If this @key
334  * already exists in the hash then ops->hs_get will be called on the
335  * conflicting entry and that entry will be returned to the caller.
336  * Otherwise ops->hs_get is called on the item which was added.
337  */
338 void *
339 cfs_hash_findadd_unique(cfs_hash_t *hs, void *key,
340                         cfs_hlist_node_t *hnode)
341 {
342         cfs_hlist_node_t     *ehnode;
343         void                 *obj;
344         ENTRY;
345
346         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
347         obj = cfs_hash_get(hs, ehnode);
348         cfs_hash_put(hs, ehnode);
349         RETURN(obj);
350 }
351 CFS_EXPORT_SYMBOL(cfs_hash_findadd_unique);
352
353 /**
354  * Delete item @hnode from the libcfs hash @hs using @key.  The @key
355  * is required to ensure the correct hash bucket is locked since there
356  * is no direct linkage from the item to the bucket.  The object
357  * removed from the hash will be returned and obs->hs_put is called
358  * on the removed object.
359  */
360 void *
361 cfs_hash_del(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
362 {
363         cfs_hash_bucket_t    *hsb;
364         void                 *obj;
365         unsigned              i;
366         ENTRY;
367
368         __cfs_hash_key_validate(hs, key, hnode);
369
370         cfs_hash_rlock(hs);
371         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
372         hsb = hs->hs_buckets[i];
373         LASSERT(i <= hs->hs_cur_mask);
374         LASSERT(!cfs_hlist_unhashed(hnode));
375
376         cfs_write_lock(&hsb->hsb_rwlock);
377         obj = __cfs_hash_bucket_del(hs, hsb, hnode);
378         cfs_write_unlock(&hsb->hsb_rwlock);
379         cfs_hash_runlock(hs);
380
381         RETURN(obj);
382 }
383 CFS_EXPORT_SYMBOL(cfs_hash_del);
384
385 /**
386  * Delete item from the libcfs hash @hs when @func return true.
387  * The write lock being hold during loop for each bucket to avoid
388  * any object be reference.
389  */
390 void
391 cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t func, void *data)
392 {
393         cfs_hlist_node_t       *hnode;
394         cfs_hlist_node_t       *pos;
395         cfs_hash_bucket_t      *hsb;
396         int                    i;
397         ENTRY;
398
399         cfs_hash_wlock(hs);
400         cfs_hash_for_each_bucket(hs, hsb, i) {
401                 cfs_write_lock(&hsb->hsb_rwlock);
402                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
403                         __cfs_hash_bucket_validate(hs, hsb, hnode);
404                         if (func(cfs_hash_get(hs, hnode), data))
405                                 __cfs_hash_bucket_del(hs, hsb, hnode);
406                         (void)cfs_hash_put(hs, hnode);
407                 }
408                 cfs_write_unlock(&hsb->hsb_rwlock);
409         }
410         cfs_hash_wunlock(hs);
411
412         EXIT;
413 }
414 CFS_EXPORT_SYMBOL(cfs_hash_cond_del);
415
416 /**
417  * Delete item given @key in libcfs hash @hs.  The first @key found in
418  * the hash will be removed, if the key exists multiple times in the hash
419  * @hs this function must be called once per key.  The removed object
420  * will be returned and ops->hs_put is called on the removed object.
421  */
422 void *
423 cfs_hash_del_key(cfs_hash_t *hs, void *key)
424 {
425         void                 *obj = NULL;
426         cfs_hlist_node_t     *hnode;
427         cfs_hash_bucket_t    *hsb;
428         unsigned              i;
429         ENTRY;
430
431         cfs_hash_rlock(hs);
432         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
433         hsb = hs->hs_buckets[i];
434         LASSERT(i <= hs->hs_cur_mask);
435
436         cfs_write_lock(&hsb->hsb_rwlock);
437         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
438         if (hnode)
439                 obj = __cfs_hash_bucket_del(hs, hsb, hnode);
440
441         cfs_write_unlock(&hsb->hsb_rwlock);
442         cfs_hash_runlock(hs);
443
444         RETURN(obj);
445 }
446 CFS_EXPORT_SYMBOL(cfs_hash_del_key);
447
448 /**
449  * Lookup an item using @key in the libcfs hash @hs and return it.
450  * If the @key is found in the hash hs->hs_get() is called and the
451  * matching objects is returned.  It is the callers responsibility
452  * to call the counterpart ops->hs_put using the cfs_hash_put() macro
453  * when when finished with the object.  If the @key was not found
454  * in the hash @hs NULL is returned.
455  */
456 void *
457 cfs_hash_lookup(cfs_hash_t *hs, void *key)
458 {
459         void                 *obj = NULL;
460         cfs_hlist_node_t     *hnode;
461         cfs_hash_bucket_t    *hsb;
462         unsigned              i;
463         ENTRY;
464
465         cfs_hash_rlock(hs);
466         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
467         hsb = hs->hs_buckets[i];
468         LASSERT(i <= hs->hs_cur_mask);
469
470         cfs_read_lock(&hsb->hsb_rwlock);
471         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
472         if (hnode)
473                 obj = cfs_hash_get(hs, hnode);
474
475         cfs_read_unlock(&hsb->hsb_rwlock);
476         cfs_hash_runlock(hs);
477
478         RETURN(obj);
479 }
480 CFS_EXPORT_SYMBOL(cfs_hash_lookup);
481
482 /**
483  * For each item in the libcfs hash @hs call the passed callback @func
484  * and pass to it as an argument each hash item and the private @data.
485  * Before each callback ops->hs_get will be called, and after each
486  * callback ops->hs_put will be called.  Finally, during the callback
487  * the bucket lock is held so the callback must never sleep.
488  */
489 void
490 cfs_hash_for_each(cfs_hash_t *hs,
491                   cfs_hash_for_each_cb_t func, void *data)
492 {
493         cfs_hlist_node_t     *hnode;
494         cfs_hash_bucket_t    *hsb;
495         void                 *obj;
496         int                   i;
497         ENTRY;
498
499         cfs_hash_rlock(hs);
500         cfs_hash_for_each_bucket(hs, hsb, i) {
501                 cfs_read_lock(&hsb->hsb_rwlock);
502                 cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
503                         __cfs_hash_bucket_validate(hs, hsb, hnode);
504                         obj = cfs_hash_get(hs, hnode);
505                         func(obj, data);
506                         (void)cfs_hash_put(hs, hnode);
507                 }
508                 cfs_read_unlock(&hsb->hsb_rwlock);
509         }
510         cfs_hash_runlock(hs);
511
512         EXIT;
513 }
514 CFS_EXPORT_SYMBOL(cfs_hash_for_each);
515
516 /**
517  * For each item in the libcfs hash @hs call the passed callback @func
518  * and pass to it as an argument each hash item and the private @data.
519  * Before each callback ops->hs_get will be called, and after each
520  * callback ops->hs_put will be called.  During the callback the
521  * bucket lock will not be held will allows for the current item
522  * to be removed from the hash during the callback.  However, care
523  * should be taken to prevent other callers from operating on the
524  * hash concurrently or list corruption may occur.
525  */
526 void
527 cfs_hash_for_each_safe(cfs_hash_t *hs,
528                        cfs_hash_for_each_cb_t func, void *data)
529 {
530         cfs_hlist_node_t     *hnode;
531         cfs_hlist_node_t     *pos;
532         cfs_hash_bucket_t    *hsb;
533         void                 *obj;
534         int                   i;
535         ENTRY;
536
537         cfs_hash_rlock(hs);
538         cfs_hash_for_each_bucket(hs, hsb, i) {
539                 cfs_read_lock(&hsb->hsb_rwlock);
540                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
541                         __cfs_hash_bucket_validate(hs, hsb, hnode);
542                         obj = cfs_hash_get(hs, hnode);
543                         cfs_read_unlock(&hsb->hsb_rwlock);
544                         func(obj, data);
545                         cfs_read_lock(&hsb->hsb_rwlock);
546                         (void)cfs_hash_put(hs, hnode);
547                 }
548                 cfs_read_unlock(&hsb->hsb_rwlock);
549         }
550         cfs_hash_runlock(hs);
551         EXIT;
552 }
553 CFS_EXPORT_SYMBOL(cfs_hash_for_each_safe);
554
555 /**
556  * For each hash bucket in the libcfs hash @hs call the passed callback
557  * @func until all the hash buckets are empty.  The passed callback @func
558  * or the previously registered callback hs->hs_put must remove the item
559  * from the hash.  You may either use the cfs_hash_del() or hlist_del()
560  * functions.  No rwlocks will be held during the callback @func it is
561  * safe to sleep if needed.  This function will not terminate until the
562  * hash is empty.  Note it is still possible to concurrently add new
563  * items in to the hash.  It is the callers responsibility to ensure
564  * the required locking is in place to prevent concurrent insertions.
565  */
566 void
567 cfs_hash_for_each_empty(cfs_hash_t *hs,
568                         cfs_hash_for_each_cb_t func, void *data)
569 {
570         cfs_hlist_node_t     *hnode;
571         cfs_hash_bucket_t    *hsb;
572         cfs_hash_bucket_t    **hsb_last = NULL;
573         void                 *obj;
574         int                   i = 0;
575         ENTRY;
576
577 restart:
578         cfs_hash_rlock(hs);
579         /* If the hash table has changed since we last held lh_rwlock,
580          * we need to start traversing the list from the start. */
581         if (hs->hs_buckets != hsb_last) {
582                 i = 0;
583                 hsb_last = hs->hs_buckets;
584         }
585         cfs_hash_for_each_bucket_restart(hs, hsb, i) {
586                 cfs_write_lock(&hsb->hsb_rwlock);
587                 while (!cfs_hlist_empty(&hsb->hsb_head)) {
588                         hnode =  hsb->hsb_head.first;
589                         __cfs_hash_bucket_validate(hs, hsb, hnode);
590                         obj = cfs_hash_get(hs, hnode);
591                         cfs_write_unlock(&hsb->hsb_rwlock);
592                         cfs_hash_runlock(hs);
593                         func(obj, data);
594                         (void)cfs_hash_put(hs, hnode);
595                         cfs_cond_resched();
596                         goto restart;
597                 }
598                 cfs_write_unlock(&hsb->hsb_rwlock);
599         }
600         cfs_hash_runlock(hs);
601         EXIT;
602 }
603 CFS_EXPORT_SYMBOL(cfs_hash_for_each_empty);
604
605 /*
606  * For each item in the libcfs hash @hs which matches the @key call
607  * the passed callback @func and pass to it as an argument each hash
608  * item and the private @data.  Before each callback ops->hs_get will
609  * be called, and after each callback ops->hs_put will be called.
610  * Finally, during the callback the bucket lock is held so the
611  * callback must never sleep.
612    */
613 void
614 cfs_hash_for_each_key(cfs_hash_t *hs, void *key,
615                       cfs_hash_for_each_cb_t func, void *data)
616 {
617         cfs_hlist_node_t     *hnode;
618         cfs_hash_bucket_t    *hsb;
619         unsigned              i;
620         ENTRY;
621
622         cfs_hash_rlock(hs);
623         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
624         hsb = hs->hs_buckets[i];
625         LASSERT(i <= hs->hs_cur_mask);
626
627         cfs_read_lock(&hsb->hsb_rwlock);
628         cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
629                 __cfs_hash_bucket_validate(hs, hsb, hnode);
630
631                 if (!cfs_hash_compare(hs, key, hnode))
632                         continue;
633
634                 func(cfs_hash_get(hs, hnode), data);
635                 (void)cfs_hash_put(hs, hnode);
636         }
637
638         cfs_read_unlock(&hsb->hsb_rwlock);
639         cfs_hash_runlock(hs);
640
641         EXIT;
642 }
643 CFS_EXPORT_SYMBOL(cfs_hash_for_each_key);
644
645 /**
646  * Rehash the libcfs hash @hs to the given @bits.  This can be used
647  * to grow the hash size when excessive chaining is detected, or to
648  * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
649  * flag is set in @hs the libcfs hash may be dynamically rehashed
650  * during addition or removal if the hash's theta value exceeds
651  * either the hs->hs_min_theta or hs->max_theta values.  By default
652  * these values are tuned to keep the chained hash depth small, and
653  * this approach assumes a reasonably uniform hashing function.  The
654  * theta thresholds for @hs are tunable via cfs_hash_set_theta().
655  */
656 int
657 cfs_hash_rehash(cfs_hash_t *hs, int bits)
658 {
659         cfs_hlist_node_t      *hnode;
660         cfs_hlist_node_t      *pos;
661         cfs_hash_bucket_t    **old_buckets;
662         cfs_hash_bucket_t    **rehash_buckets;
663         cfs_hash_bucket_t     *hs_hsb;
664         cfs_hash_bucket_t     *rehash_hsb;
665         int                    i;
666         int                    theta;
667         int                    old_mask;
668         int                    old_bits;
669         int                    new_mask = (1 << bits) - 1;
670         int                    rc = 0;
671         void                  *key;
672         ENTRY;
673
674         LASSERT(!cfs_in_interrupt());
675         LASSERT(new_mask > 0);
676         LASSERT((hs->hs_flags & CFS_HASH_REHASH) != 0);
677
678         LIBCFS_ALLOC(rehash_buckets, sizeof(*rehash_buckets) << bits);
679         if (!rehash_buckets)
680                 RETURN(-ENOMEM);
681
682         for (i = 0; i <= new_mask; i++) {
683                 CFS_ALLOC_PTR(rehash_buckets[i]);
684                 if (rehash_buckets[i] == NULL)
685                         GOTO(free, rc = -ENOMEM);
686
687                 CFS_INIT_HLIST_HEAD(&rehash_buckets[i]->hsb_head);
688                 cfs_rwlock_init(&rehash_buckets[i]->hsb_rwlock);
689                 cfs_atomic_set(&rehash_buckets[i]->hsb_count, 0);
690         }
691
692         cfs_hash_wlock(hs);
693
694         /*
695          * Early return for multiple concurrent racing callers,
696          * ensure we only trigger the rehash if it is still needed.
697          */
698         theta = __cfs_hash_theta(hs);
699         if ((theta >= hs->hs_min_theta) && (theta <= hs->hs_max_theta)) {
700                 cfs_hash_wunlock(hs);
701                 GOTO(free, rc = -EALREADY);
702         }
703
704         old_bits = hs->hs_cur_bits;
705         old_buckets = hs->hs_buckets;
706         old_mask = (1 << old_bits) - 1;
707
708         hs->hs_cur_bits = bits;
709         hs->hs_cur_mask = (1 << bits) - 1;
710         hs->hs_buckets = rehash_buckets;
711         cfs_atomic_inc(&hs->hs_rehash_count);
712
713         for (i = 0; i <= old_mask; i++) {
714                 hs_hsb = old_buckets[i];
715
716                 cfs_write_lock(&hs_hsb->hsb_rwlock);
717                 cfs_hlist_for_each_safe(hnode, pos, &(hs_hsb->hsb_head)) {
718                         key = cfs_hash_key(hs, hnode);
719                         LASSERT(key);
720
721                         /*
722                          * Validate hnode is in the correct bucket.
723                          */
724                         if (unlikely(hs->hs_flags & CFS_HASH_DEBUG))
725                                 LASSERT(cfs_hash_id(hs, key, old_mask) == i);
726
727                         /*
728                          * Delete from old hash bucket.
729                          */
730                         cfs_hlist_del(hnode);
731                         LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) > 0);
732                         cfs_atomic_dec(&hs_hsb->hsb_count);
733
734                         /*
735                          * Add to rehash bucket, ops->hs_key must be defined.
736                          */
737                         rehash_hsb = rehash_buckets[cfs_hash_id(hs, key,
738                                                                 new_mask)];
739                         cfs_hlist_add_head(hnode, &(rehash_hsb->hsb_head));
740                         cfs_atomic_inc(&rehash_hsb->hsb_count);
741                 }
742
743                 LASSERT(cfs_hlist_empty(&(hs_hsb->hsb_head)));
744                 LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) == 0);
745                 cfs_write_unlock(&hs_hsb->hsb_rwlock);
746         }
747
748         cfs_hash_wunlock(hs);
749         rehash_buckets = old_buckets;
750         i = (1 << old_bits);
751         bits = old_bits;
752  free:
753         while (--i >= 0)
754                 CFS_FREE_PTR(rehash_buckets[i]);
755         LIBCFS_FREE(rehash_buckets, sizeof(*rehash_buckets) << bits);
756         RETURN(rc);
757 }
758 CFS_EXPORT_SYMBOL(cfs_hash_rehash);
759
760 /**
761  * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
762  * @old_key must be provided to locate the objects previous location
763  * in the hash, and the @new_key will be used to reinsert the object.
764  * Use this function instead of a cfs_hash_add() + cfs_hash_del()
765  * combo when it is critical that there is no window in time where the
766  * object is missing from the hash.  When an object is being rehashed
767  * the registered cfs_hash_get() and cfs_hash_put() functions will
768  * not be called.
769  */
770 void cfs_hash_rehash_key(cfs_hash_t *hs, void *old_key, void *new_key,
771                          cfs_hlist_node_t *hnode)
772 {
773         cfs_hash_bucket_t     *old_hsb;
774         cfs_hash_bucket_t     *new_hsb;
775         unsigned               i;
776         unsigned               j;
777         ENTRY;
778
779         __cfs_hash_key_validate(hs, new_key, hnode);
780         LASSERT(!cfs_hlist_unhashed(hnode));
781
782         cfs_hash_rlock(hs);
783
784         i = cfs_hash_id(hs, old_key, hs->hs_cur_mask);
785         old_hsb = hs->hs_buckets[i];
786         LASSERT(i <= hs->hs_cur_mask);
787
788         j = cfs_hash_id(hs, new_key, hs->hs_cur_mask);
789         new_hsb = hs->hs_buckets[j];
790         LASSERT(j <= hs->hs_cur_mask);
791
792         if (i < j) { /* write_lock ordering */
793                 cfs_write_lock(&old_hsb->hsb_rwlock);
794                 cfs_write_lock(&new_hsb->hsb_rwlock);
795         } else if (i > j) {
796                 cfs_write_lock(&new_hsb->hsb_rwlock);
797                 cfs_write_lock(&old_hsb->hsb_rwlock);
798         } else { /* do nothing */
799                 cfs_hash_runlock(hs);
800                 EXIT;
801                 return;
802         }
803
804         /*
805          * Migrate item between hash buckets without calling
806          * the cfs_hash_get() and cfs_hash_put() callback functions.
807          */
808         cfs_hlist_del(hnode);
809         LASSERT(cfs_atomic_read(&old_hsb->hsb_count) > 0);
810         cfs_atomic_dec(&old_hsb->hsb_count);
811         cfs_hlist_add_head(hnode, &(new_hsb->hsb_head));
812         cfs_atomic_inc(&new_hsb->hsb_count);
813
814         cfs_write_unlock(&new_hsb->hsb_rwlock);
815         cfs_write_unlock(&old_hsb->hsb_rwlock);
816         cfs_hash_runlock(hs);
817
818         EXIT;
819 }
820 CFS_EXPORT_SYMBOL(cfs_hash_rehash_key);
821
822 int cfs_hash_debug_header(char *str, int size)
823 {
824         return snprintf(str, size,
825                  "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", CFS_MAX_HASH_NAME,
826                  "name", "cur", "min", "max", "theta", "t-min", "t-max",
827                  "flags", "rehash", "count", " distribution");
828 }
829 CFS_EXPORT_SYMBOL(cfs_hash_debug_header);
830
831 int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size)
832 {
833         cfs_hash_bucket_t     *hsb;
834         int                    theta;
835         int                    i;
836         int                    c = 0;
837         int                    dist[8] = { 0, };
838
839         if (str == NULL || size == 0)
840                 return 0;
841
842         cfs_hash_rlock(hs);
843         theta = __cfs_hash_theta(hs);
844
845         c += snprintf(str + c, size - c, "%-*s ",
846                       CFS_MAX_HASH_NAME, hs->hs_name);
847         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_cur_bits);
848         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_min_bits);
849         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_max_bits);
850         c += snprintf(str + c, size - c, "%d.%03d ",
851                       __cfs_hash_theta_int(theta),
852                       __cfs_hash_theta_frac(theta));
853         c += snprintf(str + c, size - c, "%d.%03d ",
854                       __cfs_hash_theta_int(hs->hs_min_theta),
855                       __cfs_hash_theta_frac(hs->hs_min_theta));
856         c += snprintf(str + c, size - c, "%d.%03d ",
857                       __cfs_hash_theta_int(hs->hs_max_theta),
858                       __cfs_hash_theta_frac(hs->hs_max_theta));
859         c += snprintf(str + c, size - c, " 0x%02x ", hs->hs_flags);
860         c += snprintf(str + c, size - c, "%6d ",
861                       cfs_atomic_read(&hs->hs_rehash_count));
862         c += snprintf(str + c, size - c, "%5d ",
863                       cfs_atomic_read(&hs->hs_count));
864
865         /*
866          * The distribution is a summary of the chained hash depth in
867          * each of the libcfs hash buckets.  Each buckets hsb_count is
868          * divided by the hash theta value and used to generate a
869          * histogram of the hash distribution.  A uniform hash will
870          * result in all hash buckets being close to the average thus
871          * only the first few entries in the histogram will be non-zero.
872          * If you hash function results in a non-uniform hash the will
873          * be observable by outlier bucks in the distribution histogram.
874          *
875          * Uniform hash distribution:      128/128/0/0/0/0/0/0
876          * Non-Uniform hash distribution:  128/125/0/0/0/0/2/1
877          */
878         cfs_hash_for_each_bucket(hs, hsb, i)
879                 dist[min(__cfs_fls(cfs_atomic_read(&hsb->hsb_count)/max(theta,1)),7)]++;
880
881         for (i = 0; i < 8; i++)
882                 c += snprintf(str + c, size - c, "%d%c",  dist[i],
883                               (i == 7) ? '\n' : '/');
884
885         cfs_hash_runlock(hs);
886
887         return c;
888 }
889 CFS_EXPORT_SYMBOL(cfs_hash_debug_str);