X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=libcfs%2Finclude%2Flibcfs%2Flibcfs_hash.h;h=444053d691d54997e38f1557ef3020ab27050ba4;hp=82673de4a6ff39483a4c7168d7ddb3e8b91216a5;hb=976c0abd4efab4f56cf4b21b940eb1b976c37372;hpb=589bc6c478b91d2d0e4cdd2aefd83dd45be2ef51 diff --git a/libcfs/include/libcfs/libcfs_hash.h b/libcfs/include/libcfs/libcfs_hash.h index 82673de..444053d 100644 --- a/libcfs/include/libcfs/libcfs_hash.h +++ b/libcfs/include/libcfs/libcfs_hash.h @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2012, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -65,8 +65,6 @@ #if (defined __linux__ && defined __KERNEL__) #include - -#define cfs_hash_long(val, bits) hash_long(val, bits) #else /* Fast hashing routine for a long. (C) 2002 William Lee Irwin III, IBM */ @@ -81,7 +79,7 @@ #error Define CFS_GOLDEN_RATIO_PRIME for your wordsize. #endif -static inline unsigned long cfs_hash_long(unsigned long val, unsigned int bits) +static inline unsigned long hash_long(unsigned long val, unsigned int bits) { unsigned long hash = val; @@ -111,234 +109,707 @@ static inline unsigned long cfs_hash_long(unsigned long val, unsigned int bits) #if 0 static inline unsigned long hash_ptr(void *ptr, unsigned int bits) { - return cfs_hash_long((unsigned long)ptr, bits); + return hash_long((unsigned long)ptr, bits); } #endif /* !(__linux__ && __KERNEL__) */ #endif -struct cfs_hash_ops; +/** disable debug */ +#define CFS_HASH_DEBUG_NONE 0 +/** record hash depth and output to console when it's too deep, + * computing overhead is low but consume more memory */ +#define CFS_HASH_DEBUG_1 1 +/** expensive, check key validation */ +#define CFS_HASH_DEBUG_2 2 + +#define CFS_HASH_DEBUG_LEVEL CFS_HASH_DEBUG_NONE +struct cfs_hash_ops; +struct cfs_hash_lock_ops; +struct cfs_hash_hlist_ops; + +typedef union { + rwlock_t rw; /**< rwlock */ + spinlock_t spin; /**< spinlock */ +} cfs_hash_lock_t; + +/** + * cfs_hash_bucket is a container of: + * - lock, couter ... + * - array of hash-head starting from hsb_head[0], hash-head can be one of + * . cfs_hash_head_t + * . cfs_hash_head_dep_t + * . cfs_hash_dhead_t + * . cfs_hash_dhead_dep_t + * which depends on requirement of user + * - some extra bytes (caller can require it while creating hash) + */ typedef struct cfs_hash_bucket { - cfs_hlist_head_t hsb_head; /* entries list */ - cfs_atomic_t hsb_count; /* current entries */ - cfs_rwlock_t hsb_rwlock; /* cfs_hash_bucket */ + cfs_hash_lock_t hsb_lock; /**< bucket lock */ + __u32 hsb_count; /**< current entries */ + __u32 hsb_version; /**< change version */ + unsigned int hsb_index; /**< index of bucket */ + int hsb_depmax; /**< max depth on bucket */ + long hsb_head[0]; /**< hash-head array */ } cfs_hash_bucket_t; -#define CFS_MAX_HASH_NAME 16 +/** + * cfs_hash bucket descriptor, it's normally in stack of caller + */ +typedef struct cfs_hash_bd { + cfs_hash_bucket_t *bd_bucket; /**< address of bucket */ + unsigned int bd_offset; /**< offset in bucket */ +} cfs_hash_bd_t; + +#define CFS_HASH_NAME_LEN 16 /**< default name length */ +#define CFS_HASH_BIGNAME_LEN 64 /**< bigname for param tree */ + +#define CFS_HASH_BKT_BITS 3 /**< default bits of bucket */ +#define CFS_HASH_BITS_MAX 30 /**< max bits of bucket */ +#define CFS_HASH_BITS_MIN CFS_HASH_BKT_BITS + +/** + * common hash attributes. + */ +enum cfs_hash_tag { + /** + * don't need any lock, caller will protect operations with it's + * own lock. With this flag: + * . CFS_HASH_NO_BKTLOCK, CFS_HASH_RW_BKTLOCK, CFS_HASH_SPIN_BKTLOCK + * will be ignored. + * . Some functions will be disabled with this flag, i.e: + * cfs_hash_for_each_empty, cfs_hash_rehash + */ + CFS_HASH_NO_LOCK = 1 << 0, + /** no bucket lock, use one spinlock to protect the whole hash */ + CFS_HASH_NO_BKTLOCK = 1 << 1, + /** rwlock to protect bucket */ + CFS_HASH_RW_BKTLOCK = 1 << 2, + /** spinlcok to protect bucket */ + CFS_HASH_SPIN_BKTLOCK = 1 << 3, + /** always add new item to tail */ + CFS_HASH_ADD_TAIL = 1 << 4, + /** hash-table doesn't have refcount on item */ + CFS_HASH_NO_ITEMREF = 1 << 5, + /** big name for param-tree */ + CFS_HASH_BIGNAME = 1 << 6, + /** track global count */ + CFS_HASH_COUNTER = 1 << 7, + /** rehash item by new key */ + CFS_HASH_REHASH_KEY = 1 << 8, + /** Enable dynamic hash resizing */ + CFS_HASH_REHASH = 1 << 9, + /** can shrink hash-size */ + CFS_HASH_SHRINK = 1 << 10, + /** assert hash is empty on exit */ + CFS_HASH_ASSERT_EMPTY = 1 << 11, + /** record hlist depth */ + CFS_HASH_DEPTH = 1 << 12, + /** + * rehash is always scheduled in a different thread, so current + * change on hash table is non-blocking + */ + CFS_HASH_NBLK_CHANGE = 1 << 13, + /** NB, we typed hs_flags as __u16, please change it + * if you need to extend >=16 flags */ +}; + +/** most used attributes */ +#define CFS_HASH_DEFAULT (CFS_HASH_RW_BKTLOCK | \ + CFS_HASH_COUNTER | CFS_HASH_REHASH) + +/** + * cfs_hash is a hash-table implementation for general purpose, it can support: + * . two refcount modes + * hash-table with & without refcount + * . four lock modes + * nolock, one-spinlock, rw-bucket-lock, spin-bucket-lock + * . general operations + * lookup, add(add_tail or add_head), delete + * . rehash + * grows or shrink + * . iteration + * locked iteration and unlocked iteration + * . bigname + * support long name hash + * . debug + * trace max searching depth + * + * Rehash: + * When the htable grows or shrinks, a separate task (cfs_hash_rehash_worker) + * is spawned to handle the rehash in the background, it's possible that other + * processes can concurrently perform additions, deletions, and lookups + * without being blocked on rehash completion, because rehash will release + * the global wrlock for each bucket. + * + * rehash and iteration can't run at the same time because it's too tricky + * to keep both of them safe and correct. + * As they are relatively rare operations, so: + * . if iteration is in progress while we try to launch rehash, then + * it just giveup, iterator will launch rehash at the end. + * . if rehash is in progress while we try to iterate the hash table, + * then we just wait (shouldn't be very long time), anyway, nobody + * should expect iteration of whole hash-table to be non-blocking. + * + * During rehashing, a (key,object) pair may be in one of two buckets, + * depending on whether the worker task has yet to transfer the object + * to its new location in the table. Lookups and deletions need to search both + * locations; additions must take care to only insert into the new bucket. + */ typedef struct cfs_hash { - int hs_cur_bits; /* current hash bits */ - int hs_cur_mask; /* current hash mask */ - int hs_min_bits; /* min hash bits */ - int hs_max_bits; /* max hash bits */ - int hs_min_theta; /* resize min threshold */ - int hs_max_theta; /* resize max threshold */ - int hs_flags; /* hash flags */ - cfs_atomic_t hs_count; /* current entries */ - cfs_atomic_t hs_rehash_count;/* resize count */ - struct cfs_hash_bucket **hs_buckets; /* hash buckets */ - struct cfs_hash_ops *hs_ops; /* hash operations */ - cfs_rwlock_t hs_rwlock; /* cfs_hash */ - cfs_atomic_t hs_refcount; - char hs_name[CFS_MAX_HASH_NAME]; + /** serialize with rehash, or serialize all operations if + * the hash-table has CFS_HASH_NO_BKTLOCK */ + cfs_hash_lock_t hs_lock; + /** hash operations */ + struct cfs_hash_ops *hs_ops; + /** hash lock operations */ + struct cfs_hash_lock_ops *hs_lops; + /** hash list operations */ + struct cfs_hash_hlist_ops *hs_hops; + /** hash buckets-table */ + cfs_hash_bucket_t **hs_buckets; + /** total number of items on this hash-table */ + atomic_t hs_count; + /** hash flags, see cfs_hash_tag for detail */ + __u16 hs_flags; + /** # of extra-bytes for bucket, for user saving extended attributes */ + __u16 hs_extra_bytes; + /** wants to iterate */ + __u8 hs_iterating; + /** hash-table is dying */ + __u8 hs_exiting; + /** current hash bits */ + __u8 hs_cur_bits; + /** min hash bits */ + __u8 hs_min_bits; + /** max hash bits */ + __u8 hs_max_bits; + /** bits for rehash */ + __u8 hs_rehash_bits; + /** bits for each bucket */ + __u8 hs_bkt_bits; + /** resize min threshold */ + __u16 hs_min_theta; + /** resize max threshold */ + __u16 hs_max_theta; + /** resize count */ + __u32 hs_rehash_count; + /** # of iterators (caller of cfs_hash_for_each_*) */ + __u32 hs_iterators; + /** rehash workitem */ + cfs_workitem_t hs_rehash_wi; + /** refcount on this hash table */ + atomic_t hs_refcount; + /** rehash buckets-table */ + cfs_hash_bucket_t **hs_rehash_buckets; +#if CFS_HASH_DEBUG_LEVEL >= CFS_HASH_DEBUG_1 + /** serialize debug members */ + spinlock_t hs_dep_lock; + /** max depth */ + unsigned int hs_dep_max; + /** id of the deepest bucket */ + unsigned int hs_dep_bkt; + /** offset in the deepest bucket */ + unsigned int hs_dep_off; + /** bits when we found the max depth */ + unsigned int hs_dep_bits; + /** workitem to output max depth */ + cfs_workitem_t hs_dep_wi; +#endif + /** name of htable */ + char hs_name[0]; } cfs_hash_t; +typedef struct cfs_hash_lock_ops { + /** lock the hash table */ + void (*hs_lock)(cfs_hash_lock_t *lock, int exclusive); + /** unlock the hash table */ + void (*hs_unlock)(cfs_hash_lock_t *lock, int exclusive); + /** lock the hash bucket */ + void (*hs_bkt_lock)(cfs_hash_lock_t *lock, int exclusive); + /** unlock the hash bucket */ + void (*hs_bkt_unlock)(cfs_hash_lock_t *lock, int exclusive); +} cfs_hash_lock_ops_t; + +typedef struct cfs_hash_hlist_ops { + /** return hlist_head of hash-head of @bd */ + struct hlist_head *(*hop_hhead)(cfs_hash_t *hs, cfs_hash_bd_t *bd); + /** return hash-head size */ + int (*hop_hhead_size)(cfs_hash_t *hs); + /** add @hnode to hash-head of @bd */ + int (*hop_hnode_add)(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode); + /** remove @hnode from hash-head of @bd */ + int (*hop_hnode_del)(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode); +} cfs_hash_hlist_ops_t; + typedef struct cfs_hash_ops { - unsigned (*hs_hash)(cfs_hash_t *hs, void *key, unsigned mask); - void * (*hs_key)(cfs_hlist_node_t *hnode); - int (*hs_compare)(void *key, cfs_hlist_node_t *hnode); - void * (*hs_get)(cfs_hlist_node_t *hnode); - void * (*hs_put)(cfs_hlist_node_t *hnode); - void (*hs_exit)(cfs_hlist_node_t *hnode); + /** return hashed value from @key */ + unsigned (*hs_hash)(cfs_hash_t *hs, const void *key, unsigned mask); + /** return key address of @hnode */ + void * (*hs_key)(struct hlist_node *hnode); + /** copy key from @hnode to @key */ + void (*hs_keycpy)(struct hlist_node *hnode, void *key); + /** + * compare @key with key of @hnode + * returns 1 on a match + */ + int (*hs_keycmp)(const void *key, struct hlist_node *hnode); + /** return object address of @hnode, i.e: container_of(...hnode) */ + void * (*hs_object)(struct hlist_node *hnode); + /** get refcount of item, always called with holding bucket-lock */ + void (*hs_get)(cfs_hash_t *hs, struct hlist_node *hnode); + /** release refcount of item */ + void (*hs_put)(cfs_hash_t *hs, struct hlist_node *hnode); + /** release refcount of item, always called with holding bucket-lock */ + void (*hs_put_locked)(cfs_hash_t *hs, struct hlist_node *hnode); + /** it's called before removing of @hnode */ + void (*hs_exit)(cfs_hash_t *hs, struct hlist_node *hnode); } cfs_hash_ops_t; -#define CFS_HASH_DEBUG 0x0001 /* Enable expensive debug checks */ -#define CFS_HASH_REHASH 0x0002 /* Enable dynamic hash resizing */ +/** total number of buckets in @hs */ +#define CFS_HASH_NBKT(hs) \ + (1U << ((hs)->hs_cur_bits - (hs)->hs_bkt_bits)) -#define CFS_HO(hs) (hs)->hs_ops -#define CFS_HOP(hs, op) (hs)->hs_ops->hs_ ## op +/** total number of buckets in @hs while rehashing */ +#define CFS_HASH_RH_NBKT(hs) \ + (1U << ((hs)->hs_rehash_bits - (hs)->hs_bkt_bits)) -static inline unsigned -cfs_hash_id(cfs_hash_t *hs, void *key, unsigned mask) +/** number of hlist for in bucket */ +#define CFS_HASH_BKT_NHLIST(hs) (1U << (hs)->hs_bkt_bits) + +/** total number of hlist in @hs */ +#define CFS_HASH_NHLIST(hs) (1U << (hs)->hs_cur_bits) + +/** total number of hlist in @hs while rehashing */ +#define CFS_HASH_RH_NHLIST(hs) (1U << (hs)->hs_rehash_bits) + +static inline int +cfs_hash_with_no_lock(cfs_hash_t *hs) { - LASSERT(hs); - LASSERT(CFS_HO(hs)); - LASSERT(CFS_HOP(hs, hash)); + /* caller will serialize all operations for this hash-table */ + return (hs->hs_flags & CFS_HASH_NO_LOCK) != 0; +} - return CFS_HOP(hs, hash)(hs, key, mask); +static inline int +cfs_hash_with_no_bktlock(cfs_hash_t *hs) +{ + /* no bucket lock, one single lock to protect the hash-table */ + return (hs->hs_flags & CFS_HASH_NO_BKTLOCK) != 0; } -static inline void * -cfs_hash_key(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +static inline int +cfs_hash_with_rw_bktlock(cfs_hash_t *hs) { - LASSERT(hs); - LASSERT(hnode); - LASSERT(CFS_HO(hs)); - LASSERT(CFS_HOP(hs, key)); + /* rwlock to protect hash bucket */ + return (hs->hs_flags & CFS_HASH_RW_BKTLOCK) != 0; +} - return CFS_HOP(hs, key)(hnode); +static inline int +cfs_hash_with_spin_bktlock(cfs_hash_t *hs) +{ + /* spinlock to protect hash bucket */ + return (hs->hs_flags & CFS_HASH_SPIN_BKTLOCK) != 0; } -/* Returns 1 on a match, - * XXX: This would be better if it returned, -1, 0, or 1 for - * <, =, > respectivly. It could then be used to implement - * a CFS_HASH_SORT feature flags which could keep each hash - * bucket in order. This would increase insertion times - * but could reduce lookup times for deep chains. Ideally, - * the rehash should keep chain depth short but if that - * ends up not being the case this would be a nice feature. - */ static inline int -cfs_hash_compare(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode) +cfs_hash_with_add_tail(cfs_hash_t *hs) { - LASSERT(hs); - LASSERT(hnode); - LASSERT(CFS_HO(hs)); + return (hs->hs_flags & CFS_HASH_ADD_TAIL) != 0; +} - if (CFS_HOP(hs, compare)) - return CFS_HOP(hs, compare)(key, hnode); +static inline int +cfs_hash_with_no_itemref(cfs_hash_t *hs) +{ + /* hash-table doesn't keep refcount on item, + * item can't be removed from hash unless it's + * ZERO refcount */ + return (hs->hs_flags & CFS_HASH_NO_ITEMREF) != 0; +} - return -EOPNOTSUPP; +static inline int +cfs_hash_with_bigname(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_BIGNAME) != 0; } -static inline void * -cfs_hash_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +static inline int +cfs_hash_with_counter(cfs_hash_t *hs) { - LASSERT(hs); - LASSERT(hnode); - LASSERT(CFS_HO(hs)); + return (hs->hs_flags & CFS_HASH_COUNTER) != 0; +} - if (CFS_HOP(hs, get)) - return CFS_HOP(hs, get)(hnode); +static inline int +cfs_hash_with_rehash(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_REHASH) != 0; +} + +static inline int +cfs_hash_with_rehash_key(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_REHASH_KEY) != 0; +} + +static inline int +cfs_hash_with_shrink(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_SHRINK) != 0; +} - return NULL; +static inline int +cfs_hash_with_assert_empty(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_ASSERT_EMPTY) != 0; +} + +static inline int +cfs_hash_with_depth(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_DEPTH) != 0; +} + +static inline int +cfs_hash_with_nblk_change(cfs_hash_t *hs) +{ + return (hs->hs_flags & CFS_HASH_NBLK_CHANGE) != 0; +} + +static inline int +cfs_hash_is_exiting(cfs_hash_t *hs) +{ /* cfs_hash_destroy is called */ + return hs->hs_exiting; +} + +static inline int +cfs_hash_is_rehashing(cfs_hash_t *hs) +{ /* rehash is launched */ + return hs->hs_rehash_bits != 0; +} + +static inline int +cfs_hash_is_iterating(cfs_hash_t *hs) +{ /* someone is calling cfs_hash_for_each_* */ + return hs->hs_iterating || hs->hs_iterators != 0; +} + +static inline int +cfs_hash_bkt_size(cfs_hash_t *hs) +{ + return offsetof(cfs_hash_bucket_t, hsb_head[0]) + + hs->hs_hops->hop_hhead_size(hs) * CFS_HASH_BKT_NHLIST(hs) + + hs->hs_extra_bytes; +} + +#define CFS_HOP(hs, op) (hs)->hs_ops->hs_ ## op + +static inline unsigned +cfs_hash_id(cfs_hash_t *hs, const void *key, unsigned mask) +{ + return CFS_HOP(hs, hash)(hs, key, mask); } static inline void * -cfs_hash_put(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +cfs_hash_key(cfs_hash_t *hs, struct hlist_node *hnode) { - LASSERT(hs); - LASSERT(hnode); - LASSERT(CFS_HO(hs)); + return CFS_HOP(hs, key)(hnode); +} - if (CFS_HOP(hs, put)) - return CFS_HOP(hs, put)(hnode); +static inline void +cfs_hash_keycpy(cfs_hash_t *hs, struct hlist_node *hnode, void *key) +{ + if (CFS_HOP(hs, keycpy) != NULL) + CFS_HOP(hs, keycpy)(hnode, key); +} - return NULL; +/** + * Returns 1 on a match, + */ +static inline int +cfs_hash_keycmp(cfs_hash_t *hs, const void *key, struct hlist_node *hnode) +{ + return CFS_HOP(hs, keycmp)(key, hnode); +} + +static inline void * +cfs_hash_object(cfs_hash_t *hs, struct hlist_node *hnode) +{ + return CFS_HOP(hs, object)(hnode); } static inline void -cfs_hash_exit(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +cfs_hash_get(cfs_hash_t *hs, struct hlist_node *hnode) { - LASSERT(hs); - LASSERT(hnode); - LASSERT(CFS_HO(hs)); + return CFS_HOP(hs, get)(hs, hnode); +} - if (CFS_HOP(hs, exit)) - return CFS_HOP(hs, exit)(hnode); +static inline void +cfs_hash_put_locked(cfs_hash_t *hs, struct hlist_node *hnode) +{ + LASSERT(CFS_HOP(hs, put_locked) != NULL); + + return CFS_HOP(hs, put_locked)(hs, hnode); } -/* Validate hnode references the correct key */ static inline void -__cfs_hash_key_validate(cfs_hash_t *hs, void *key, - cfs_hlist_node_t *hnode) +cfs_hash_put(cfs_hash_t *hs, struct hlist_node *hnode) { - if (unlikely(hs->hs_flags & CFS_HASH_DEBUG)) - LASSERT(cfs_hash_compare(hs, key, hnode) > 0); + LASSERT(CFS_HOP(hs, put) != NULL); + + return CFS_HOP(hs, put)(hs, hnode); } -/* Validate hnode is in the correct bucket */ static inline void -__cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bucket_t *hsb, - cfs_hlist_node_t *hnode) +cfs_hash_exit(cfs_hash_t *hs, struct hlist_node *hnode) { - unsigned i; + if (CFS_HOP(hs, exit)) + CFS_HOP(hs, exit)(hs, hnode); +} - if (unlikely(hs->hs_flags & CFS_HASH_DEBUG)) { - i = cfs_hash_id(hs, cfs_hash_key(hs, hnode), hs->hs_cur_mask); - LASSERT(hs->hs_buckets[i] == hsb); - } +static inline void cfs_hash_lock(cfs_hash_t *hs, int excl) +{ + hs->hs_lops->hs_lock(&hs->hs_lock, excl); } -static inline cfs_hlist_node_t * -__cfs_hash_bucket_lookup(cfs_hash_t *hs, - cfs_hash_bucket_t *hsb, void *key) +static inline void cfs_hash_unlock(cfs_hash_t *hs, int excl) { - cfs_hlist_node_t *hnode; + hs->hs_lops->hs_unlock(&hs->hs_lock, excl); +} - cfs_hlist_for_each(hnode, &hsb->hsb_head) - if (cfs_hash_compare(hs, key, hnode) > 0) - return hnode; +static inline int cfs_hash_dec_and_lock(cfs_hash_t *hs, + atomic_t *condition) +{ + LASSERT(cfs_hash_with_no_bktlock(hs)); + return atomic_dec_and_lock(condition, &hs->hs_lock.spin); +} - return NULL; +static inline void cfs_hash_bd_lock(cfs_hash_t *hs, + cfs_hash_bd_t *bd, int excl) +{ + hs->hs_lops->hs_bkt_lock(&bd->bd_bucket->hsb_lock, excl); } -static inline void * -__cfs_hash_bucket_add(cfs_hash_t *hs, - cfs_hash_bucket_t *hsb, - cfs_hlist_node_t *hnode) +static inline void cfs_hash_bd_unlock(cfs_hash_t *hs, + cfs_hash_bd_t *bd, int excl) +{ + hs->hs_lops->hs_bkt_unlock(&bd->bd_bucket->hsb_lock, excl); +} + +/** + * operations on cfs_hash bucket (bd: bucket descriptor), + * they are normally for hash-table without rehash + */ +void cfs_hash_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bd); + +static inline void cfs_hash_bd_get_and_lock(cfs_hash_t *hs, const void *key, + cfs_hash_bd_t *bd, int excl) +{ + cfs_hash_bd_get(hs, key, bd); + cfs_hash_bd_lock(hs, bd, excl); +} + +static inline unsigned cfs_hash_bd_index_get(cfs_hash_t *hs, cfs_hash_bd_t *bd) { - cfs_hlist_add_head(hnode, &(hsb->hsb_head)); - cfs_atomic_inc(&hsb->hsb_count); - cfs_atomic_inc(&hs->hs_count); + return bd->bd_offset | (bd->bd_bucket->hsb_index << hs->hs_bkt_bits); +} - return cfs_hash_get(hs, hnode); +static inline void cfs_hash_bd_index_set(cfs_hash_t *hs, + unsigned index, cfs_hash_bd_t *bd) +{ + bd->bd_bucket = hs->hs_buckets[index >> hs->hs_bkt_bits]; + bd->bd_offset = index & (CFS_HASH_BKT_NHLIST(hs) - 1U); } static inline void * -__cfs_hash_bucket_del(cfs_hash_t *hs, - cfs_hash_bucket_t *hsb, - cfs_hlist_node_t *hnode) +cfs_hash_bd_extra_get(cfs_hash_t *hs, cfs_hash_bd_t *bd) +{ + return (void *)bd->bd_bucket + + cfs_hash_bkt_size(hs) - hs->hs_extra_bytes; +} + +static inline __u32 +cfs_hash_bd_version_get(cfs_hash_bd_t *bd) +{ + /* need hold cfs_hash_bd_lock */ + return bd->bd_bucket->hsb_version; +} + +static inline __u32 +cfs_hash_bd_count_get(cfs_hash_bd_t *bd) +{ + /* need hold cfs_hash_bd_lock */ + return bd->bd_bucket->hsb_count; +} + +static inline int +cfs_hash_bd_depmax_get(cfs_hash_bd_t *bd) +{ + return bd->bd_bucket->hsb_depmax; +} + +static inline int +cfs_hash_bd_compare(cfs_hash_bd_t *bd1, cfs_hash_bd_t *bd2) { - cfs_hlist_del_init(hnode); - LASSERT(cfs_atomic_read(&hsb->hsb_count) > 0); - cfs_atomic_dec(&hsb->hsb_count); - LASSERT(cfs_atomic_read(&hs->hs_count) > 0); - cfs_atomic_dec(&hs->hs_count); + if (bd1->bd_bucket->hsb_index != bd2->bd_bucket->hsb_index) + return bd1->bd_bucket->hsb_index - bd2->bd_bucket->hsb_index; - return cfs_hash_put(hs, hnode); + if (bd1->bd_offset != bd2->bd_offset) + return bd1->bd_offset - bd2->bd_offset; + + return 0; +} + +void cfs_hash_bd_add_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode); +void cfs_hash_bd_del_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode); +void cfs_hash_bd_move_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd_old, + cfs_hash_bd_t *bd_new, struct hlist_node *hnode); + +static inline int cfs_hash_bd_dec_and_lock(cfs_hash_t *hs, cfs_hash_bd_t *bd, + atomic_t *condition) +{ + LASSERT(cfs_hash_with_spin_bktlock(hs)); + return atomic_dec_and_lock(condition, &bd->bd_bucket->hsb_lock.spin); } +static inline struct hlist_head *cfs_hash_bd_hhead(cfs_hash_t *hs, + cfs_hash_bd_t *bd) +{ + return hs->hs_hops->hop_hhead(hs, bd); +} + +struct hlist_node *cfs_hash_bd_lookup_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, + const void *key); +struct hlist_node *cfs_hash_bd_peek_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, + const void *key); +struct hlist_node *cfs_hash_bd_findadd_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, + const void *key, + struct hlist_node *hnode, + int insist_add); +struct hlist_node *cfs_hash_bd_finddel_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, + const void *key, + struct hlist_node *hnode); + +/** + * operations on cfs_hash bucket (bd: bucket descriptor), + * they are safe for hash-table with rehash + */ +void cfs_hash_dual_bd_get(cfs_hash_t *hs, const void *key, cfs_hash_bd_t *bds); +void cfs_hash_dual_bd_lock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl); +void cfs_hash_dual_bd_unlock(cfs_hash_t *hs, cfs_hash_bd_t *bds, int excl); + +static inline void cfs_hash_dual_bd_get_and_lock(cfs_hash_t *hs, const void *key, + cfs_hash_bd_t *bds, int excl) +{ + cfs_hash_dual_bd_get(hs, key, bds); + cfs_hash_dual_bd_lock(hs, bds, excl); +} + +struct hlist_node * +cfs_hash_dual_bd_lookup_locked(cfs_hash_t *hs, cfs_hash_bd_t *bds, + const void *key); +struct hlist_node * +cfs_hash_dual_bd_findadd_locked(cfs_hash_t *hs, cfs_hash_bd_t *bds, + const void *key, struct hlist_node *hnode, + int insist_add); +struct hlist_node * +cfs_hash_dual_bd_finddel_locked(cfs_hash_t *hs, cfs_hash_bd_t *bds, + const void *key, struct hlist_node *hnode); + /* Hash init/cleanup functions */ -cfs_hash_t *cfs_hash_create(char *name, unsigned int cur_bits, - unsigned int max_bits, - cfs_hash_ops_t *ops, int flags); +cfs_hash_t *cfs_hash_create(char *name, unsigned cur_bits, unsigned max_bits, + unsigned bkt_bits, unsigned extra_bytes, + unsigned min_theta, unsigned max_theta, + cfs_hash_ops_t *ops, unsigned flags); + cfs_hash_t *cfs_hash_getref(cfs_hash_t *hs); void cfs_hash_putref(cfs_hash_t *hs); /* Hash addition functions */ -void cfs_hash_add(cfs_hash_t *hs, void *key, - cfs_hlist_node_t *hnode); -int cfs_hash_add_unique(cfs_hash_t *hs, void *key, - cfs_hlist_node_t *hnode); -void *cfs_hash_findadd_unique(cfs_hash_t *hs, void *key, - cfs_hlist_node_t *hnode); +void cfs_hash_add(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode); +int cfs_hash_add_unique(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode); +void *cfs_hash_findadd_unique(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode); /* Hash deletion functions */ -void *cfs_hash_del(cfs_hash_t *hs, void *key, cfs_hlist_node_t *hnode); -void *cfs_hash_del_key(cfs_hash_t *hs, void *key); +void *cfs_hash_del(cfs_hash_t *hs, const void *key, struct hlist_node *hnode); +void *cfs_hash_del_key(cfs_hash_t *hs, const void *key); /* Hash lookup/for_each functions */ -void *cfs_hash_lookup(cfs_hash_t *hs, void *key); -typedef void (*cfs_hash_for_each_cb_t)(void *obj, void *data); +#define CFS_HASH_LOOP_HOG 1024 + +typedef int (*cfs_hash_for_each_cb_t)(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *node, void *data); +void *cfs_hash_lookup(cfs_hash_t *hs, const void *key); void cfs_hash_for_each(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data); void cfs_hash_for_each_safe(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data); -void cfs_hash_for_each_empty(cfs_hash_t *hs, cfs_hash_for_each_cb_t, void *data); -void cfs_hash_for_each_key(cfs_hash_t *hs, void *key, - cfs_hash_for_each_cb_t, void *data); +int cfs_hash_for_each_nolock(cfs_hash_t *hs, cfs_hash_for_each_cb_t, + void *data); +int cfs_hash_for_each_empty(cfs_hash_t *hs, cfs_hash_for_each_cb_t, + void *data); +void cfs_hash_for_each_key(cfs_hash_t *hs, const void *key, + cfs_hash_for_each_cb_t, void *data); +typedef int (*cfs_hash_cond_opt_cb_t)(void *obj, void *data); +void cfs_hash_cond_del(cfs_hash_t *hs, cfs_hash_cond_opt_cb_t, void *data); + +void cfs_hash_hlist_for_each(cfs_hash_t *hs, unsigned hindex, + cfs_hash_for_each_cb_t, void *data); +int cfs_hash_is_empty(cfs_hash_t *hs); +__u64 cfs_hash_size_get(cfs_hash_t *hs); /* * Rehash - Theta is calculated to be the average chained * hash depth assuming a perfectly uniform hash funcion. */ -int cfs_hash_rehash(cfs_hash_t *hs, int bits); -void cfs_hash_rehash_key(cfs_hash_t *hs, void *old_key, - void *new_key, cfs_hlist_node_t *hnode); +void cfs_hash_rehash_cancel_locked(cfs_hash_t *hs); +void cfs_hash_rehash_cancel(cfs_hash_t *hs); +int cfs_hash_rehash(cfs_hash_t *hs, int do_rehash); +void cfs_hash_rehash_key(cfs_hash_t *hs, const void *old_key, + void *new_key, struct hlist_node *hnode); + +#if CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 +/* Validate hnode references the correct key */ +static inline void +cfs_hash_key_validate(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode) +{ + LASSERT(cfs_hash_keycmp(hs, key, hnode)); +} + +/* Validate hnode is in the correct bucket */ +static inline void +cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode) +{ + cfs_hash_bd_t bds[2]; + cfs_hash_dual_bd_get(hs, cfs_hash_key(hs, hnode), bds); + LASSERT(bds[0].bd_bucket == bd->bd_bucket || + bds[1].bd_bucket == bd->bd_bucket); +} + +#else /* CFS_HASH_DEBUG_LEVEL > CFS_HASH_DEBUG_1 */ + +static inline void +cfs_hash_key_validate(cfs_hash_t *hs, const void *key, + struct hlist_node *hnode) {} + +static inline void +cfs_hash_bucket_validate(cfs_hash_t *hs, cfs_hash_bd_t *bd, + struct hlist_node *hnode) {} + +#endif /* CFS_HASH_DEBUG_LEVEL */ #define CFS_HASH_THETA_BITS 10 +#define CFS_HASH_MIN_THETA (1U << (CFS_HASH_THETA_BITS - 1)) +#define CFS_HASH_MAX_THETA (1U << (CFS_HASH_THETA_BITS + 1)) /* Return integer component of theta */ static inline int __cfs_hash_theta_int(int theta) @@ -355,26 +826,31 @@ static inline int __cfs_hash_theta_frac(int theta) static inline int __cfs_hash_theta(cfs_hash_t *hs) { - return (cfs_atomic_read(&hs->hs_count) << - CFS_HASH_THETA_BITS) >> hs->hs_cur_bits; + return (atomic_read(&hs->hs_count) << + CFS_HASH_THETA_BITS) >> hs->hs_cur_bits; } static inline void __cfs_hash_set_theta(cfs_hash_t *hs, int min, int max) { LASSERT(min < max); - hs->hs_min_theta = min; - hs->hs_max_theta = max; + hs->hs_min_theta = (__u16)min; + hs->hs_max_theta = (__u16)max; } /* Generic debug formatting routines mainly for proc handler */ +#ifndef HAVE_ONLY_PROCFS_SEQ int cfs_hash_debug_header(char *str, int size); int cfs_hash_debug_str(cfs_hash_t *hs, char *str, int size); +#endif +struct seq_file; +int cfs_hash_debug_header_seq(struct seq_file *m); +int cfs_hash_debug_str_seq(cfs_hash_t *hs, struct seq_file *m); /* * Generic djb2 hash algorithm for character arrays. */ static inline unsigned -cfs_hash_djb2_hash(void *key, size_t size, unsigned mask) +cfs_hash_djb2_hash(const void *key, size_t size, unsigned mask) { unsigned i, hash = 5381; @@ -390,7 +866,7 @@ cfs_hash_djb2_hash(void *key, size_t size, unsigned mask) * Generic u32 hash algorithm. */ static inline unsigned -cfs_hash_u32_hash(__u32 key, unsigned mask) +cfs_hash_u32_hash(const __u32 key, unsigned mask) { return ((key * CFS_GOLDEN_RATIO_PRIME_32) & mask); } @@ -399,21 +875,27 @@ cfs_hash_u32_hash(__u32 key, unsigned mask) * Generic u64 hash algorithm. */ static inline unsigned -cfs_hash_u64_hash(__u64 key, unsigned mask) +cfs_hash_u64_hash(const __u64 key, unsigned mask) { return ((unsigned)(key * CFS_GOLDEN_RATIO_PRIME_64) & mask); } -#define cfs_hash_for_each_bucket(hs, hsb, pos) \ - for (pos = 0; \ - pos <= hs->hs_cur_mask && \ - (hsb = hs->hs_buckets[pos]); \ - pos++) +/** iterate over all buckets in @bds (array of cfs_hash_bd_t) */ +#define cfs_hash_for_each_bd(bds, n, i) \ + for (i = 0; i < n && (bds)[i].bd_bucket != NULL; i++) + +/** iterate over all buckets of @hs */ +#define cfs_hash_for_each_bucket(hs, bd, pos) \ + for (pos = 0; \ + pos < CFS_HASH_NBKT(hs) && \ + ((bd)->bd_bucket = (hs)->hs_buckets[pos]) != NULL; pos++) + +/** iterate over all hlist of bucket @bd */ +#define cfs_hash_bd_for_each_hlist(hs, bd, hlist) \ + for ((bd)->bd_offset = 0; \ + (bd)->bd_offset < CFS_HASH_BKT_NHLIST(hs) && \ + (hlist = cfs_hash_bd_hhead(hs, bd)) != NULL; \ + (bd)->bd_offset++) -#define cfs_hash_for_each_bucket_restart(hs, hsb, pos) \ - for (/* pos=0 done once by caller */; \ - pos <= hs->hs_cur_mask && \ - (hsb = hs->hs_buckets[pos]); \ - pos++) /* !__LIBCFS__HASH_H__ */ #endif