libcfs/libcfs/hash.c

   1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
   2  * vim:expandtab:shiftwidth=8:tabstop=8:
   3  *
   4  * GPL HEADER START
   5  *
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License version 2 only,
  10  * as published by the Free Software Foundation.
  11  *
  12  * This program is distributed in the hope that it will be useful, but
  13  * WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * General Public License version 2 for more details (a copy is included
  16  * in the LICENSE file that accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * version 2 along with this program; If not, see
  20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
  21  *
  22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
  23  * CA 95054 USA or visit www.sun.com if you need additional information or
  24  * have any questions.
  25  *
  26  * GPL HEADER END
  27  */
  28 /*
  29  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  30  * Use is subject to license terms.
  31  */
  32 /*
  33  * This file is part of Lustre, http://www.lustre.org/
  34  * Lustre is a trademark of Sun Microsystems, Inc.
  35  *
  36  * libcfs/libcfs/hash.c
  37  *
  38  * Implement a hash class for hash process in lustre system.
  39  *
  40  * Author: YuZhangyong <yzy@clusterfs.com>
  41  *
  42  * 2008-08-15: Brian Behlendorf <behlendorf1@llnl.gov>
  43  * - Simplified API and improved documentation
  44  * - Added per-hash feature flags:
  45  *   * CFS_HASH_DEBUG additional validation
  46  *   * CFS_HASH_REHASH dynamic rehashing
  47  * - Added per-hash statistics
  48  * - General performance enhancements
  49  *
  50  * 2009-07-31: Liang Zhen <zhen.liang@sun.com>
  51  * - move all stuff to libcfs
  52  * - don't allow cur_bits != max_bits without setting of CFS_HASH_REHASH
  53  * - ignore hs_rwlock if without CFS_HASH_REHASH setting
  54  * - buckets are allocated one by one(intead of contiguous memory),
  55  *   to avoid unnecessary cacheline conflict
  56  */
  57
  58 #include <libcfs/libcfs.h>
  59
  60 static void cfs_hash_destroy(cfs_hash_t *hs);
  61
  62 static void
  63 cfs_hash_rlock(cfs_hash_t *hs)
  64 {
  65         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
  66                 cfs_read_lock(&hs->hs_rwlock);
  67 }
  68
  69 static void
  70 cfs_hash_runlock(cfs_hash_t *hs)
  71 {
  72         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
  73                 cfs_read_unlock(&hs->hs_rwlock);
  74 }
  75
  76 static void
  77 cfs_hash_wlock(cfs_hash_t *hs)
  78 {
  79         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
  80                 cfs_write_lock(&hs->hs_rwlock);
  81 }
  82
  83 static void
  84 cfs_hash_wunlock(cfs_hash_t *hs)
  85 {
  86         if ((hs->hs_flags & CFS_HASH_REHASH) != 0)
  87                 cfs_write_unlock(&hs->hs_rwlock);
  88 }
  89
  90 /**
  91  * Initialize new libcfs hash, where:
  92  * @name     - Descriptive hash name
  93  * @cur_bits - Initial hash table size, in bits
  94  * @max_bits - Maximum allowed hash table resize, in bits
  95  * @ops      - Registered hash table operations
  96  * @flags    - CFS_HASH_REHASH enable synamic hash resizing
  97  *           - CFS_HASH_SORT enable chained hash sort
  98  */
  99 cfs_hash_t *
 100 cfs_hash_create(char *name, unsigned int cur_bits,
 101                 unsigned int max_bits, cfs_hash_ops_t *ops, int flags)
 102 {
 103         cfs_hash_t    *hs;
 104         int            i;
 105         ENTRY;
 106
 107         LASSERT(name != NULL);
 108         LASSERT(ops != NULL);
 109         /* The following ops are required for all hash table types */
 110         LASSERT(ops->hs_hash != NULL);
 111         LASSERT(ops->hs_key != NULL);
 112         LASSERT(ops->hs_compare != NULL);
 113         LASSERT(ops->hs_get != NULL);
 114         LASSERT(ops->hs_put != NULL);
 115
 116         LASSERT(cur_bits > 0);
 117         LASSERT(max_bits >= cur_bits);
 118         LASSERT(max_bits < 31);
 119         LASSERT(cur_bits == max_bits || (flags & CFS_HASH_REHASH) != 0);
 120
 121         CFS_ALLOC_PTR(hs);
 122         if (!hs)
 123                 RETURN(NULL);
 124
 125         strncpy(hs->hs_name, name, sizeof(hs->hs_name));
 126         hs->hs_name[sizeof(hs->hs_name) - 1] = '\0';
 127         cfs_atomic_set(&hs->hs_rehash_count, 0);
 128         cfs_atomic_set(&hs->hs_refcount, 1);
 129         cfs_atomic_set(&hs->hs_count, 0);
 130         cfs_rwlock_init(&hs->hs_rwlock);
 131         hs->hs_cur_bits = cur_bits;
 132         hs->hs_cur_mask = (1 << cur_bits) - 1;
 133         hs->hs_min_bits = cur_bits;
 134         hs->hs_max_bits = max_bits;
 135         /* XXX: need to fixup cfs_hash_rehash_bits() before this can be
 136          *      anything other than 0.5 and 2.0 */
 137         hs->hs_min_theta = 1 << (CFS_HASH_THETA_BITS - 1);
 138         hs->hs_max_theta = 1 << (CFS_HASH_THETA_BITS + 1);
 139         hs->hs_ops = ops;
 140         hs->hs_flags = flags;
 141
 142         /* theta * 1000 */
 143         __cfs_hash_set_theta(hs, 500, 2000);
 144
 145         LIBCFS_ALLOC(hs->hs_buckets,
 146                      sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
 147         if (hs->hs_buckets == NULL) {
 148                 CFS_FREE_PTR(hs);
 149                 RETURN(NULL);
 150         }
 151
 152         for (i = 0; i <= hs->hs_cur_mask; i++) {
 153                 CFS_ALLOC_PTR(hs->hs_buckets[i]);
 154                 if (hs->hs_buckets[i] == NULL) {
 155                         cfs_hash_destroy(hs);
 156                         return NULL;
 157                 }
 158
 159                 CFS_INIT_HLIST_HEAD(&hs->hs_buckets[i]->hsb_head);
 160                 cfs_rwlock_init(&hs->hs_buckets[i]->hsb_rwlock);
 161                 cfs_atomic_set(&hs->hs_buckets[i]->hsb_count, 0);
 162         }
 163
 164         return hs;
 165 }
 166 CFS_EXPORT_SYMBOL(cfs_hash_create);
 167
 168 /**
 169  * Cleanup libcfs hash @hs.
 170  */
 171 static void
 172 cfs_hash_destroy(cfs_hash_t *hs)
 173 {
 174         cfs_hash_bucket_t    *hsb;
 175         cfs_hlist_node_t     *hnode;
 176         cfs_hlist_node_t     *pos;
 177         int                   i;
 178         ENTRY;
 179
 180         LASSERT(hs != NULL);
 181
 182         cfs_hash_wlock(hs);
 183
 184         cfs_hash_for_each_bucket(hs, hsb, i) {
 185                 if (hsb == NULL)
 186                         continue;
 187
 188                 cfs_write_lock(&hsb->hsb_rwlock);
 189                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
 190                         __cfs_hash_bucket_validate(hs, hsb, hnode);
 191                         __cfs_hash_bucket_del(hs, hsb, hnode);
 192                         cfs_hash_exit(hs, hnode);
 193                 }
 194
 195                 LASSERT(cfs_hlist_empty(&(hsb->hsb_head)));
 196                 LASSERT(cfs_atomic_read(&hsb->hsb_count) == 0);
 197                 cfs_write_unlock(&hsb->hsb_rwlock);
 198                 CFS_FREE_PTR(hsb);
 199         }
 200
 201         LASSERT(cfs_atomic_read(&hs->hs_count) == 0);
 202         cfs_hash_wunlock(hs);
 203
 204         LIBCFS_FREE(hs->hs_buckets,
 205                     sizeof(*hs->hs_buckets) << hs->hs_cur_bits);
 206         CFS_FREE_PTR(hs);
 207         EXIT;
 208 }
 209
 210 cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs)
 211 {
 212         if (cfs_atomic_inc_not_zero(&hs->hs_refcount))
 213                 return hs;
 214         return NULL;
 215 }
 216 CFS_EXPORT_SYMBOL(cfs_hash_getref);
 217
 218 void cfs_hash_putref(cfs_hash_t *hs)
 219 {
 220         if (cfs_atomic_dec_and_test(&hs->hs_refcount))
 221                 cfs_hash_destroy(hs);
 222 }
 223 CFS_EXPORT_SYMBOL(cfs_hash_putref);
 224
 225 static inline unsigned int
 226 cfs_hash_rehash_bits(cfs_hash_t *hs)
 227 {
 228         if (!(hs->hs_flags & CFS_HASH_REHASH))
 229                 return 0;
 230
 231         /* XXX: need to handle case with max_theta != 2.0
 232          *      and the case with min_theta != 0.5 */
 233         if ((hs->hs_cur_bits < hs->hs_max_bits) &&
 234             (__cfs_hash_theta(hs) > hs->hs_max_theta))
 235                 return hs->hs_cur_bits + 1;
 236
 237         if ((hs->hs_cur_bits > hs->hs_min_bits) &&
 238             (__cfs_hash_theta(hs) < hs->hs_min_theta))
 239                 return hs->hs_cur_bits - 1;
 240
 241         return 0;
 242 }
 243
 244 /**
 245  * Add item @hnode to libcfs hash @hs using @key.  The registered
 246  * ops->hs_get function will be called when the item is added.
 247  */
 248 void
 249 cfs_hash_add(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
 250 {
 251         cfs_hash_bucket_t    *hsb;
 252         int                   bits;
 253         unsigned              i;
 254         ENTRY;
 255
 256         __cfs_hash_key_validate(hs, key, hnode);
 257
 258         cfs_hash_rlock(hs);
 259         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
 260         hsb = hs->hs_buckets[i];
 261         LASSERT(i <= hs->hs_cur_mask);
 262         LASSERT(cfs_hlist_unhashed(hnode));
 263
 264         cfs_write_lock(&hsb->hsb_rwlock);
 265         __cfs_hash_bucket_add(hs, hsb, hnode);
 266         cfs_write_unlock(&hsb->hsb_rwlock);
 267
 268         bits = cfs_hash_rehash_bits(hs);
 269         cfs_hash_runlock(hs);
 270         if (bits)
 271                 cfs_hash_rehash(hs, bits);
 272
 273         EXIT;
 274 }
 275 CFS_EXPORT_SYMBOL(cfs_hash_add);
 276
 277 static cfs_hlist_node_t *
 278 cfs_hash_findadd_unique_hnode(cfs_hash_t *hs, void *key,
 279                               cfs_hlist_node_t *hnode)
 280 {
 281         int                   bits = 0;
 282         cfs_hlist_node_t     *ehnode;
 283         cfs_hash_bucket_t    *hsb;
 284         unsigned              i;
 285         ENTRY;
 286
 287         __cfs_hash_key_validate(hs, key, hnode);
 288
 289         cfs_hash_rlock(hs);
 290         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
 291         hsb = hs->hs_buckets[i];
 292         LASSERT(i <= hs->hs_cur_mask);
 293         LASSERT(cfs_hlist_unhashed(hnode));
 294
 295         cfs_write_lock(&hsb->hsb_rwlock);
 296         ehnode = __cfs_hash_bucket_lookup(hs, hsb, key);
 297         if (ehnode) {
 298                 cfs_hash_get(hs, ehnode);
 299         } else {
 300                 __cfs_hash_bucket_add(hs, hsb, hnode);
 301                 ehnode = hnode;
 302                 bits = cfs_hash_rehash_bits(hs);
 303         }
 304         cfs_write_unlock(&hsb->hsb_rwlock);
 305         cfs_hash_runlock(hs);
 306         if (bits)
 307                 cfs_hash_rehash(hs, bits);
 308
 309         RETURN(ehnode);
 310 }
 311
 312 /**
 313  * Add item @hnode to libcfs hash @hs using @key.  The registered
 314  * ops->hs_get function will be called if the item was added.
 315  * Returns 0 on success or -EALREADY on key collisions.
 316  */
 317 int
 318 cfs_hash_add_unique(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
 319 {
 320         cfs_hlist_node_t    *ehnode;
 321         ENTRY;
 322
 323         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
 324         if (ehnode != hnode) {
 325                 cfs_hash_put(hs, ehnode);
 326                 RETURN(-EALREADY);
 327         }
 328         RETURN(0);
 329 }
 330 CFS_EXPORT_SYMBOL(cfs_hash_add_unique);
 331
 332 /**
 333  * Add item @hnode to libcfs hash @hs using @key.  If this @key
 334  * already exists in the hash then ops->hs_get will be called on the
 335  * conflicting entry and that entry will be returned to the caller.
 336  * Otherwise ops->hs_get is called on the item which was added.
 337  */
 338 void *
 339 cfs_hash_findadd_unique(cfs_hash_t *hs, void *key,
 340                         cfs_hlist_node_t *hnode)
 341 {
 342         cfs_hlist_node_t     *ehnode;
 343         void                 *obj;
 344         ENTRY;
 345
 346         ehnode = cfs_hash_findadd_unique_hnode(hs, key, hnode);
 347         obj = cfs_hash_get(hs, ehnode);
 348         cfs_hash_put(hs, ehnode);
 349         RETURN(obj);
 350 }
 351 CFS_EXPORT_SYMBOL(cfs_hash_findadd_unique);
 352
 353 /**
 354  * Delete item @hnode from the libcfs hash @hs using @key.  The @key
 355  * is required to ensure the correct hash bucket is locked since there
 356  * is no direct linkage from the item to the bucket.  The object
 357  * removed from the hash will be returned and obs->hs_put is called
 358  * on the removed object.
 359  */
 360 void *
 361 cfs_hash_del(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode)
 362 {
 363         cfs_hash_bucket_t    *hsb;
 364         void                 *obj;
 365         unsigned              i;
 366         ENTRY;
 367
 368         __cfs_hash_key_validate(hs, key, hnode);
 369
 370         cfs_hash_rlock(hs);
 371         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
 372         hsb = hs->hs_buckets[i];
 373         LASSERT(i <= hs->hs_cur_mask);
 374         LASSERT(!cfs_hlist_unhashed(hnode));
 375
 376         cfs_write_lock(&hsb->hsb_rwlock);
 377         obj = __cfs_hash_bucket_del(hs, hsb, hnode);
 378         cfs_write_unlock(&hsb->hsb_rwlock);
 379         cfs_hash_runlock(hs);
 380
 381         RETURN(obj);
 382 }
 383 CFS_EXPORT_SYMBOL(cfs_hash_del);
 384
 385 /**
 386  * Delete item from the libcfs hash @hs when @func return true.
 387  * The write lock being hold during loop for each bucket to avoid
 388  * any object be reference.
 389  */
 390 void
 391 cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t func, void *data)
 392 {
 393         cfs_hlist_node_t       *hnode;
 394         cfs_hlist_node_t       *pos;
 395         cfs_hash_bucket_t      *hsb;
 396         int                    i;
 397         ENTRY;
 398
 399         cfs_hash_wlock(hs);
 400         cfs_hash_for_each_bucket(hs, hsb, i) {
 401                 cfs_write_lock(&hsb->hsb_rwlock);
 402                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
 403                         __cfs_hash_bucket_validate(hs, hsb, hnode);
 404                         if (func(cfs_hash_get(hs, hnode), data))
 405                                 __cfs_hash_bucket_del(hs, hsb, hnode);
 406                         (void)cfs_hash_put(hs, hnode);
 407                 }
 408                 cfs_write_unlock(&hsb->hsb_rwlock);
 409         }
 410         cfs_hash_wunlock(hs);
 411
 412         EXIT;
 413 }
 414 CFS_EXPORT_SYMBOL(cfs_hash_cond_del);
 415
 416 /**
 417  * Delete item given @key in libcfs hash @hs.  The first @key found in
 418  * the hash will be removed, if the key exists multiple times in the hash
 419  * @hs this function must be called once per key.  The removed object
 420  * will be returned and ops->hs_put is called on the removed object.
 421  */
 422 void *
 423 cfs_hash_del_key(cfs_hash_t *hs, void *key)
 424 {
 425         void                 *obj = NULL;
 426         cfs_hlist_node_t     *hnode;
 427         cfs_hash_bucket_t    *hsb;
 428         unsigned              i;
 429         ENTRY;
 430
 431         cfs_hash_rlock(hs);
 432         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
 433         hsb = hs->hs_buckets[i];
 434         LASSERT(i <= hs->hs_cur_mask);
 435
 436         cfs_write_lock(&hsb->hsb_rwlock);
 437         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
 438         if (hnode)
 439                 obj = __cfs_hash_bucket_del(hs, hsb, hnode);
 440
 441         cfs_write_unlock(&hsb->hsb_rwlock);
 442         cfs_hash_runlock(hs);
 443
 444         RETURN(obj);
 445 }
 446 CFS_EXPORT_SYMBOL(cfs_hash_del_key);
 447
 448 /**
 449  * Lookup an item using @key in the libcfs hash @hs and return it.
 450  * If the @key is found in the hash hs->hs_get() is called and the
 451  * matching objects is returned.  It is the callers responsibility
 452  * to call the counterpart ops->hs_put using the cfs_hash_put() macro
 453  * when when finished with the object.  If the @key was not found
 454  * in the hash @hs NULL is returned.
 455  */
 456 void *
 457 cfs_hash_lookup(cfs_hash_t *hs, void *key)
 458 {
 459         void                 *obj = NULL;
 460         cfs_hlist_node_t     *hnode;
 461         cfs_hash_bucket_t    *hsb;
 462         unsigned              i;
 463         ENTRY;
 464
 465         cfs_hash_rlock(hs);
 466         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
 467         hsb = hs->hs_buckets[i];
 468         LASSERT(i <= hs->hs_cur_mask);
 469
 470         cfs_read_lock(&hsb->hsb_rwlock);
 471         hnode = __cfs_hash_bucket_lookup(hs, hsb, key);
 472         if (hnode)
 473                 obj = cfs_hash_get(hs, hnode);
 474
 475         cfs_read_unlock(&hsb->hsb_rwlock);
 476         cfs_hash_runlock(hs);
 477
 478         RETURN(obj);
 479 }
 480 CFS_EXPORT_SYMBOL(cfs_hash_lookup);
 481
 482 /**
 483  * For each item in the libcfs hash @hs call the passed callback @func
 484  * and pass to it as an argument each hash item and the private @data.
 485  * Before each callback ops->hs_get will be called, and after each
 486  * callback ops->hs_put will be called.  Finally, during the callback
 487  * the bucket lock is held so the callback must never sleep.
 488  */
 489 void
 490 cfs_hash_for_each(cfs_hash_t *hs,
 491                   cfs_hash_for_each_cb_t func, void *data)
 492 {
 493         cfs_hlist_node_t     *hnode;
 494         cfs_hash_bucket_t    *hsb;
 495         void                 *obj;
 496         int                   i;
 497         ENTRY;
 498
 499         cfs_hash_rlock(hs);
 500         cfs_hash_for_each_bucket(hs, hsb, i) {
 501                 cfs_read_lock(&hsb->hsb_rwlock);
 502                 cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
 503                         __cfs_hash_bucket_validate(hs, hsb, hnode);
 504                         obj = cfs_hash_get(hs, hnode);
 505                         func(obj, data);
 506                         (void)cfs_hash_put(hs, hnode);
 507                 }
 508                 cfs_read_unlock(&hsb->hsb_rwlock);
 509         }
 510         cfs_hash_runlock(hs);
 511
 512         EXIT;
 513 }
 514 CFS_EXPORT_SYMBOL(cfs_hash_for_each);
 515
 516 /**
 517  * For each item in the libcfs hash @hs call the passed callback @func
 518  * and pass to it as an argument each hash item and the private @data.
 519  * Before each callback ops->hs_get will be called, and after each
 520  * callback ops->hs_put will be called.  During the callback the
 521  * bucket lock will not be held will allows for the current item
 522  * to be removed from the hash during the callback.  However, care
 523  * should be taken to prevent other callers from operating on the
 524  * hash concurrently or list corruption may occur.
 525  */
 526 void
 527 cfs_hash_for_each_safe(cfs_hash_t *hs,
 528                        cfs_hash_for_each_cb_t func, void *data)
 529 {
 530         cfs_hlist_node_t     *hnode;
 531         cfs_hlist_node_t     *pos;
 532         cfs_hash_bucket_t    *hsb;
 533         void                 *obj;
 534         int                   i;
 535         ENTRY;
 536
 537         cfs_hash_rlock(hs);
 538         cfs_hash_for_each_bucket(hs, hsb, i) {
 539                 cfs_read_lock(&hsb->hsb_rwlock);
 540                 cfs_hlist_for_each_safe(hnode, pos, &(hsb->hsb_head)) {
 541                         __cfs_hash_bucket_validate(hs, hsb, hnode);
 542                         obj = cfs_hash_get(hs, hnode);
 543                         cfs_read_unlock(&hsb->hsb_rwlock);
 544                         func(obj, data);
 545                         cfs_read_lock(&hsb->hsb_rwlock);
 546                         (void)cfs_hash_put(hs, hnode);
 547                 }
 548                 cfs_read_unlock(&hsb->hsb_rwlock);
 549         }
 550         cfs_hash_runlock(hs);
 551         EXIT;
 552 }
 553 CFS_EXPORT_SYMBOL(cfs_hash_for_each_safe);
 554
 555 /**
 556  * For each hash bucket in the libcfs hash @hs call the passed callback
 557  * @func until all the hash buckets are empty.  The passed callback @func
 558  * or the previously registered callback hs->hs_put must remove the item
 559  * from the hash.  You may either use the cfs_hash_del() or hlist_del()
 560  * functions.  No rwlocks will be held during the callback @func it is
 561  * safe to sleep if needed.  This function will not terminate until the
 562  * hash is empty.  Note it is still possible to concurrently add new
 563  * items in to the hash.  It is the callers responsibility to ensure
 564  * the required locking is in place to prevent concurrent insertions.
 565  */
 566 void
 567 cfs_hash_for_each_empty(cfs_hash_t *hs,
 568                         cfs_hash_for_each_cb_t func, void *data)
 569 {
 570         cfs_hlist_node_t     *hnode;
 571         cfs_hash_bucket_t    *hsb;
 572         cfs_hash_bucket_t    **hsb_last = NULL;
 573         void                 *obj;
 574         int                   i = 0;
 575         ENTRY;
 576
 577 restart:
 578         cfs_hash_rlock(hs);
 579         /* If the hash table has changed since we last held lh_rwlock,
 580          * we need to start traversing the list from the start. */
 581         if (hs->hs_buckets != hsb_last) {
 582                 i = 0;
 583                 hsb_last = hs->hs_buckets;
 584         }
 585         cfs_hash_for_each_bucket_restart(hs, hsb, i) {
 586                 cfs_write_lock(&hsb->hsb_rwlock);
 587                 while (!cfs_hlist_empty(&hsb->hsb_head)) {
 588                         hnode =  hsb->hsb_head.first;
 589                         __cfs_hash_bucket_validate(hs, hsb, hnode);
 590                         obj = cfs_hash_get(hs, hnode);
 591                         cfs_write_unlock(&hsb->hsb_rwlock);
 592                         cfs_hash_runlock(hs);
 593                         func(obj, data);
 594                         (void)cfs_hash_put(hs, hnode);
 595                         cfs_cond_resched();
 596                         goto restart;
 597                 }
 598                 cfs_write_unlock(&hsb->hsb_rwlock);
 599         }
 600         cfs_hash_runlock(hs);
 601         EXIT;
 602 }
 603 CFS_EXPORT_SYMBOL(cfs_hash_for_each_empty);
 604
 605 /*
 606  * For each item in the libcfs hash @hs which matches the @key call
 607  * the passed callback @func and pass to it as an argument each hash
 608  * item and the private @data.  Before each callback ops->hs_get will
 609  * be called, and after each callback ops->hs_put will be called.
 610  * Finally, during the callback the bucket lock is held so the
 611  * callback must never sleep.
 612    */
 613 void
 614 cfs_hash_for_each_key(cfs_hash_t *hs, void *key,
 615                       cfs_hash_for_each_cb_t func, void *data)
 616 {
 617         cfs_hlist_node_t     *hnode;
 618         cfs_hash_bucket_t    *hsb;
 619         unsigned              i;
 620         ENTRY;
 621
 622         cfs_hash_rlock(hs);
 623         i = cfs_hash_id(hs, key, hs->hs_cur_mask);
 624         hsb = hs->hs_buckets[i];
 625         LASSERT(i <= hs->hs_cur_mask);
 626
 627         cfs_read_lock(&hsb->hsb_rwlock);
 628         cfs_hlist_for_each(hnode, &(hsb->hsb_head)) {
 629                 __cfs_hash_bucket_validate(hs, hsb, hnode);
 630
 631                 if (!cfs_hash_compare(hs, key, hnode))
 632                         continue;
 633
 634                 func(cfs_hash_get(hs, hnode), data);
 635                 (void)cfs_hash_put(hs, hnode);
 636         }
 637
 638         cfs_read_unlock(&hsb->hsb_rwlock);
 639         cfs_hash_runlock(hs);
 640
 641         EXIT;
 642 }
 643 CFS_EXPORT_SYMBOL(cfs_hash_for_each_key);
 644
 645 /**
 646  * Rehash the libcfs hash @hs to the given @bits.  This can be used
 647  * to grow the hash size when excessive chaining is detected, or to
 648  * shrink the hash when it is larger than needed.  When the CFS_HASH_REHASH
 649  * flag is set in @hs the libcfs hash may be dynamically rehashed
 650  * during addition or removal if the hash's theta value exceeds
 651  * either the hs->hs_min_theta or hs->max_theta values.  By default
 652  * these values are tuned to keep the chained hash depth small, and
 653  * this approach assumes a reasonably uniform hashing function.  The
 654  * theta thresholds for @hs are tunable via cfs_hash_set_theta().
 655  */
 656 int
 657 cfs_hash_rehash(cfs_hash_t *hs, int bits)
 658 {
 659         cfs_hlist_node_t      *hnode;
 660         cfs_hlist_node_t      *pos;
 661         cfs_hash_bucket_t    **old_buckets;
 662         cfs_hash_bucket_t    **rehash_buckets;
 663         cfs_hash_bucket_t     *hs_hsb;
 664         cfs_hash_bucket_t     *rehash_hsb;
 665         int                    i;
 666         int                    theta;
 667         int                    old_mask;
 668         int                    old_bits;
 669         int                    new_mask = (1 << bits) - 1;
 670         int                    rc = 0;
 671         void                  *key;
 672         ENTRY;
 673
 674         LASSERT(!cfs_in_interrupt());
 675         LASSERT(new_mask > 0);
 676         LASSERT((hs->hs_flags & CFS_HASH_REHASH) != 0);
 677
 678         LIBCFS_ALLOC(rehash_buckets, sizeof(*rehash_buckets) << bits);
 679         if (!rehash_buckets)
 680                 RETURN(-ENOMEM);
 681
 682         for (i = 0; i <= new_mask; i++) {
 683                 CFS_ALLOC_PTR(rehash_buckets[i]);
 684                 if (rehash_buckets[i] == NULL)
 685                         GOTO(free, rc = -ENOMEM);
 686
 687                 CFS_INIT_HLIST_HEAD(&rehash_buckets[i]->hsb_head);
 688                 cfs_rwlock_init(&rehash_buckets[i]->hsb_rwlock);
 689                 cfs_atomic_set(&rehash_buckets[i]->hsb_count, 0);
 690         }
 691
 692         cfs_hash_wlock(hs);
 693
 694         /*
 695          * Early return for multiple concurrent racing callers,
 696          * ensure we only trigger the rehash if it is still needed.
 697          */
 698         theta = __cfs_hash_theta(hs);
 699         if ((theta >= hs->hs_min_theta) && (theta <= hs->hs_max_theta)) {
 700                 cfs_hash_wunlock(hs);
 701                 GOTO(free, rc = -EALREADY);
 702         }
 703
 704         old_bits = hs->hs_cur_bits;
 705         old_buckets = hs->hs_buckets;
 706         old_mask = (1 << old_bits) - 1;
 707
 708         hs->hs_cur_bits = bits;
 709         hs->hs_cur_mask = (1 << bits) - 1;
 710         hs->hs_buckets = rehash_buckets;
 711         cfs_atomic_inc(&hs->hs_rehash_count);
 712
 713         for (i = 0; i <= old_mask; i++) {
 714                 hs_hsb = old_buckets[i];
 715
 716                 cfs_write_lock(&hs_hsb->hsb_rwlock);
 717                 cfs_hlist_for_each_safe(hnode, pos, &(hs_hsb->hsb_head)) {
 718                         key = cfs_hash_key(hs, hnode);
 719                         LASSERT(key);
 720
 721                         /*
 722                          * Validate hnode is in the correct bucket.
 723                          */
 724                         if (unlikely(hs->hs_flags & CFS_HASH_DEBUG))
 725                                 LASSERT(cfs_hash_id(hs, key, old_mask) == i);
 726
 727                         /*
 728                          * Delete from old hash bucket.
 729                          */
 730                         cfs_hlist_del(hnode);
 731                         LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) > 0);
 732                         cfs_atomic_dec(&hs_hsb->hsb_count);
 733
 734                         /*
 735                          * Add to rehash bucket, ops->hs_key must be defined.
 736                          */
 737                         rehash_hsb = rehash_buckets[cfs_hash_id(hs, key,
 738                                                                 new_mask)];
 739                         cfs_hlist_add_head(hnode, &(rehash_hsb->hsb_head));
 740                         cfs_atomic_inc(&rehash_hsb->hsb_count);
 741                 }
 742
 743                 LASSERT(cfs_hlist_empty(&(hs_hsb->hsb_head)));
 744                 LASSERT(cfs_atomic_read(&hs_hsb->hsb_count) == 0);
 745                 cfs_write_unlock(&hs_hsb->hsb_rwlock);
 746         }
 747
 748         cfs_hash_wunlock(hs);
 749         rehash_buckets = old_buckets;
 750         i = (1 << old_bits);
 751         bits = old_bits;
 752  free:
 753         while (--i >= 0)
 754                 CFS_FREE_PTR(rehash_buckets[i]);
 755         LIBCFS_FREE(rehash_buckets, sizeof(*rehash_buckets) << bits);
 756         RETURN(rc);
 757 }
 758 CFS_EXPORT_SYMBOL(cfs_hash_rehash);
 759
 760 /**
 761  * Rehash the object referenced by @hnode in the libcfs hash @hs.  The
 762  * @old_key must be provided to locate the objects previous location
 763  * in the hash, and the @new_key will be used to reinsert the object.
 764  * Use this function instead of a cfs_hash_add() + cfs_hash_del()
 765  * combo when it is critical that there is no window in time where the
 766  * object is missing from the hash.  When an object is being rehashed
 767  * the registered cfs_hash_get() and cfs_hash_put() functions will
 768  * not be called.
 769  */
 770 void cfs_hash_rehash_key(cfs_hash_t *hs, void *old_key, void *new_key,
 771                          cfs_hlist_node_t *hnode)
 772 {
 773         cfs_hash_bucket_t     *old_hsb;
 774         cfs_hash_bucket_t     *new_hsb;
 775         unsigned               i;
 776         unsigned               j;
 777         ENTRY;
 778
 779         __cfs_hash_key_validate(hs, new_key, hnode);
 780         LASSERT(!cfs_hlist_unhashed(hnode));
 781
 782         cfs_hash_rlock(hs);
 783
 784         i = cfs_hash_id(hs, old_key, hs->hs_cur_mask);
 785         old_hsb = hs->hs_buckets[i];
 786         LASSERT(i <= hs->hs_cur_mask);
 787
 788         j = cfs_hash_id(hs, new_key, hs->hs_cur_mask);
 789         new_hsb = hs->hs_buckets[j];
 790         LASSERT(j <= hs->hs_cur_mask);
 791
 792         if (i < j) { /* write_lock ordering */
 793                 cfs_write_lock(&old_hsb->hsb_rwlock);
 794                 cfs_write_lock(&new_hsb->hsb_rwlock);
 795         } else if (i > j) {
 796                 cfs_write_lock(&new_hsb->hsb_rwlock);
 797                 cfs_write_lock(&old_hsb->hsb_rwlock);
 798         } else { /* do nothing */
 799                 cfs_hash_runlock(hs);
 800                 EXIT;
 801                 return;
 802         }
 803
 804         /*
 805          * Migrate item between hash buckets without calling
 806          * the cfs_hash_get() and cfs_hash_put() callback functions.
 807          */
 808         cfs_hlist_del(hnode);
 809         LASSERT(cfs_atomic_read(&old_hsb->hsb_count) > 0);
 810         cfs_atomic_dec(&old_hsb->hsb_count);
 811         cfs_hlist_add_head(hnode, &(new_hsb->hsb_head));
 812         cfs_atomic_inc(&new_hsb->hsb_count);
 813
 814         cfs_write_unlock(&new_hsb->hsb_rwlock);
 815         cfs_write_unlock(&old_hsb->hsb_rwlock);
 816         cfs_hash_runlock(hs);
 817
 818         EXIT;
 819 }
 820 CFS_EXPORT_SYMBOL(cfs_hash_rehash_key);
 821
 822 int cfs_hash_debug_header(char *str, int size)
 823 {
 824         return snprintf(str, size,
 825                  "%-*s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", CFS_MAX_HASH_NAME,
 826                  "name", "cur", "min", "max", "theta", "t-min", "t-max",
 827                  "flags", "rehash", "count", " distribution");
 828 }
 829 CFS_EXPORT_SYMBOL(cfs_hash_debug_header);
 830
 831 int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size)
 832 {
 833         cfs_hash_bucket_t     *hsb;
 834         int                    theta;
 835         int                    i;
 836         int                    c = 0;
 837         int                    dist[8] = { 0, };
 838
 839         if (str == NULL || size == 0)
 840                 return 0;
 841
 842         cfs_hash_rlock(hs);
 843         theta = __cfs_hash_theta(hs);
 844
 845         c += snprintf(str + c, size - c, "%-*s ",
 846                       CFS_MAX_HASH_NAME, hs->hs_name);
 847         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_cur_bits);
 848         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_min_bits);
 849         c += snprintf(str + c, size - c, "%5d ",  1 << hs->hs_max_bits);
 850         c += snprintf(str + c, size - c, "%d.%03d ",
 851                       __cfs_hash_theta_int(theta),
 852                       __cfs_hash_theta_frac(theta));
 853         c += snprintf(str + c, size - c, "%d.%03d ",
 854                       __cfs_hash_theta_int(hs->hs_min_theta),
 855                       __cfs_hash_theta_frac(hs->hs_min_theta));
 856         c += snprintf(str + c, size - c, "%d.%03d ",
 857                       __cfs_hash_theta_int(hs->hs_max_theta),
 858                       __cfs_hash_theta_frac(hs->hs_max_theta));
 859         c += snprintf(str + c, size - c, " 0x%02x ", hs->hs_flags);
 860         c += snprintf(str + c, size - c, "%6d ",
 861                       cfs_atomic_read(&hs->hs_rehash_count));
 862         c += snprintf(str + c, size - c, "%5d ",
 863                       cfs_atomic_read(&hs->hs_count));
 864
 865         /*
 866          * The distribution is a summary of the chained hash depth in
 867          * each of the libcfs hash buckets.  Each buckets hsb_count is
 868          * divided by the hash theta value and used to generate a
 869          * histogram of the hash distribution.  A uniform hash will
 870          * result in all hash buckets being close to the average thus
 871          * only the first few entries in the histogram will be non-zero.
 872          * If you hash function results in a non-uniform hash the will
 873          * be observable by outlier bucks in the distribution histogram.
 874          *
 875          * Uniform hash distribution:      128/128/0/0/0/0/0/0
 876          * Non-Uniform hash distribution:  128/125/0/0/0/0/2/1
 877          */
 878         cfs_hash_for_each_bucket(hs, hsb, i)
 879                 dist[min(__cfs_fls(cfs_atomic_read(&hsb->hsb_count)/max(theta,1)),7)]++;
 880
 881         for (i = 0; i < 8; i++)
 882                 c += snprintf(str + c, size - c, "%d%c",  dist[i],
 883                               (i == 7) ? '\n' : '/');
 884
 885         cfs_hash_runlock(hs);
 886
 887         return c;
 888 }
 889 CFS_EXPORT_SYMBOL(cfs_hash_debug_str);