From 1a24137e8f26eaae9a2dac39a1e8a8a0bed46b6b Mon Sep 17 00:00:00 2001 From: yury Date: Wed, 3 Sep 2008 09:54:06 +0000 Subject: [PATCH 1/1] b=16776 r=vitaly,robert - new class_hash.c and using it in conncetions, nids, etc. Using it for held locks on server will come shortly. --- lustre/include/class_hash.h | 458 +++++++++---- lustre/include/liblustre.h | 37 ++ lustre/include/lprocfs_status.h | 4 + lustre/include/lustre_net.h | 20 +- lustre/include/obd.h | 6 +- lustre/ldlm/ldlm_lib.c | 21 +- lustre/mds/lproc_mds.c | 1 + lustre/mdt/mdt_lproc.c | 1 + lustre/mgs/lproc_mgs.c | 1 + lustre/obdclass/class_hash.c | 1241 +++++++++++++++++++----------------- lustre/obdclass/genops.c | 50 +- lustre/obdclass/lprocfs_status.c | 38 +- lustre/obdclass/obd_config.c | 239 ++++++- lustre/obdfilter/lproc_obdfilter.c | 1 + lustre/ptlrpc/client.c | 20 +- lustre/ptlrpc/connection.c | 326 +++++----- lustre/ptlrpc/import.c | 4 +- lustre/ptlrpc/niobuf.c | 4 +- lustre/ptlrpc/ptlrpc_module.c | 18 +- 19 files changed, 1499 insertions(+), 991 deletions(-) diff --git a/lustre/include/class_hash.h b/lustre/include/class_hash.h index 5eabcb8..fd7394e 100644 --- a/lustre/include/class_hash.h +++ b/lustre/include/class_hash.h @@ -39,132 +39,354 @@ #include -/* #define LUSTRE_HASH_DEBUG 1 */ - -/* define the hash bucket*/ -struct lustre_hash_bucket { - struct hlist_head lhb_head; - spinlock_t lhb_lock; -#ifdef LUSTRE_HASH_DEBUG - /* the number of hash item per bucket, - * it will help us to analyse the hash distribute - */ - int lhb_item_count; -#endif -}; - -struct lustre_hash_operations; - -struct lustre_class_hash_body { - char hashname[128]; - spinlock_t lchb_lock; /* body lock */ - struct lustre_hash_bucket *lchb_hash_tables; - __u32 lchb_hash_max_size; /* define the hash tables size */ - /* define the hash operations */ - struct lustre_hash_operations *lchb_hash_operations; -}; - -/* hash operations method define */ -struct lustre_hash_operations { - __u32 (*lustre_hashfn) (struct lustre_class_hash_body *hash_body, - void *key); - int (*lustre_hash_key_compare) (void *key, - struct hlist_node *compared_hnode); - /* add refcount */ - void* (*lustre_hash_object_refcount_get) (struct hlist_node *hash_item); - /* dec refcount */ - void (*lustre_hash_object_refcount_put) (struct hlist_node *hash_item); -}; - -static inline struct hlist_node * -lustre_hash_getitem_in_bucket_nolock(struct lustre_class_hash_body *hash_body, - int hashent, void *key) -{ - struct lustre_hash_bucket *bucket; - struct hlist_node *hash_item_node; - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; - int find = 0; - ENTRY; - - bucket = &hash_body->lchb_hash_tables[hashent]; - hlist_for_each(hash_item_node, &(bucket->lhb_head)) { - find = hop->lustre_hash_key_compare(key, hash_item_node); - if (find == 1) - break; - } - RETURN(find == 1 ? hash_item_node : NULL); +struct lustre_hash_ops; + +typedef struct lustre_hash_bucket { + /** + * Entries list. + */ + struct hlist_head lhb_head; + /** + * Current entries. + */ + atomic_t lhb_count; + /** + * Lustre_hash_bucket. + */ + rwlock_t lhb_rwlock; +} lustre_hash_bucket_t; + +typedef struct lustre_hash { + /** + * Hash name. + */ + char *lh_name; + /** + * Hash name size. + */ + unsigned int lh_name_size; + /** + * Current hash size. + */ + unsigned int lh_cur_size; + /** + * Min hash size. + */ + unsigned int lh_min_size; + /** + * Max hash size. + */ + unsigned int lh_max_size; + /** + * Resize min threshold. + */ + unsigned int lh_min_theta; + /** + * Resize max threshold. + */ + unsigned int lh_max_theta; + /** + * Hash flags. + */ + int lh_flags; + /** + * Current entries. + */ + atomic_t lh_count; + /** + * Resize count. + */ + atomic_t lh_rehash_count; + /** + * Hash buckets. + */ + struct lustre_hash_bucket *lh_buckets; + /** + * Hash operations. + */ + struct lustre_hash_ops *lh_ops; + /** + * Protects lustre_hash. + */ + rwlock_t lh_rwlock; +} lustre_hash_t; + +typedef struct lustre_hash_ops { + unsigned (*lh_hash)(lustre_hash_t *lh, void *key, unsigned mask); + void * (*lh_key)(struct hlist_node *hnode); + int (*lh_compare)(void *key, struct hlist_node *hnode); + void * (*lh_get)(struct hlist_node *hnode); + void * (*lh_put)(struct hlist_node *hnode); + void (*lh_exit)(struct hlist_node *hnode); +} lustre_hash_ops_t; + +/** + * Enable expensive debug checks. + */ +#define LH_DEBUG 0x0001 +/** + * Enable dynamic hash resizing. + */ +#define LH_REHASH 0x0002 + +#define LHO(lh) (lh)->lh_ops +#define LHP(lh, op) (lh)->lh_ops->lh_ ## op + +static inline unsigned +lh_hash(lustre_hash_t *lh, void *key, unsigned mask) +{ + LASSERT(lh); + + if (LHO(lh) && LHP(lh, hash)) + return LHP(lh, hash)(lh, key, mask); + + return -EOPNOTSUPP; } -static inline int -lustre_hash_delitem_nolock(struct lustre_class_hash_body *hash_body, - int hashent, struct hlist_node * hash_item) +static inline void * +lh_key(lustre_hash_t *lh, struct hlist_node *hnode) { - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; + LASSERT(lh); + LASSERT(hnode); - hlist_del_init(hash_item); + if (LHO(lh) && LHP(lh, key)) + return LHP(lh, key)(hnode); - hop->lustre_hash_object_refcount_put(hash_item); + return NULL; +} + +/** + * Returns 1 on a match, + * XXX: This would be better if it returned, -1, 0, or 1 for + * <, =, > respectivly. It could then be used to implement + * a LH_SORT feature flags which could keep each lustre hash + * bucket in order. This would increase insertion times + * but could reduce lookup times for deep chains. Ideally, + * the rehash should keep chain depth short but if that + * ends up not being the case this would be a nice feature. + */ +static inline int +lh_compare(lustre_hash_t *lh, void *key, struct hlist_node *hnode) +{ + LASSERT(lh); + LASSERT(hnode); + + if (LHO(lh) && LHP(lh, compare)) + return LHP(lh, compare)(key, hnode); + + return -EOPNOTSUPP; +} + +static inline void * +lh_get(lustre_hash_t *lh, struct hlist_node *hnode) +{ + LASSERT(lh); + LASSERT(hnode); + + if (LHO(lh) && LHP(lh, get)) + return LHP(lh, get)(hnode); + + return NULL; +} + +static inline void * +lh_put(lustre_hash_t *lh, struct hlist_node *hnode) +{ + LASSERT(lh); + LASSERT(hnode); + + if (LHO(lh) && LHP(lh, put)) + return LHP(lh, put)(hnode); + + return NULL; +} -#ifdef LUSTRE_HASH_DEBUG - hash_body->lchb_hash_tables[hashent].lhb_item_count--; - CDEBUG(D_INFO, "hashname[%s] bucket[%d] has [%d] hashitem\n", - hash_body->hashname, hashent, - hash_body->lchb_hash_tables[hashent].lhb_item_count); -#endif +static inline void +lh_exit(lustre_hash_t *lh, struct hlist_node *hnode) +{ + LASSERT(lh); + LASSERT(hnode); + + if (LHO(lh) && LHP(lh, exit)) + return LHP(lh, exit)(hnode); +} - RETURN(0); +/** + * Validate hnode references the correct key. + */ +static inline void +__lustre_hash_key_validate(lustre_hash_t *lh, void *key, + struct hlist_node *hnode) +{ + if (unlikely(lh->lh_flags & LH_DEBUG)) + LASSERT(lh_compare(lh, key, hnode)); +} + +/* + * Validate hnode is in the correct bucket. + */ +static inline void +__lustre_hash_bucket_validate(lustre_hash_t *lh, lustre_hash_bucket_t *lhb, + struct hlist_node *hnode) +{ + unsigned i; + + if (unlikely(lh->lh_flags & LH_DEBUG)) { + i = lh_hash(lh, lh_key(lh, hnode), lh->lh_cur_size - 1); + LASSERT(&lh->lh_buckets[i] == lhb); + } + } + +static inline struct hlist_node * +__lustre_hash_bucket_lookup(lustre_hash_t *lh, + lustre_hash_bucket_t *lhb, void *key) +{ + struct hlist_node *hnode; + + hlist_for_each(hnode, &lhb->lhb_head) + if (lh_compare(lh, key, hnode)) + return hnode; + + return NULL; +} + +static inline void * +__lustre_hash_bucket_add(lustre_hash_t *lh, + lustre_hash_bucket_t *lhb, + struct hlist_node *hnode) +{ + hlist_add_head(hnode, &(lhb->lhb_head)); + atomic_inc(&lhb->lhb_count); + atomic_inc(&lh->lh_count); + + return lh_get(lh, hnode); +} + +static inline void * +__lustre_hash_bucket_del(lustre_hash_t *lh, + lustre_hash_bucket_t *lhb, + struct hlist_node *hnode) +{ + hlist_del_init(hnode); + atomic_dec(&lhb->lhb_count); + atomic_dec(&lh->lh_count); + + return lh_put(lh, hnode); +} + +/* + * Hash init/cleanup functions. + */ +lustre_hash_t *lustre_hash_init(char *name, unsigned int cur_size, + unsigned int max_size, + lustre_hash_ops_t *ops, int flags); +void lustre_hash_exit(lustre_hash_t *lh); + +/* + * Hash addition functions. + */ +void lustre_hash_add(lustre_hash_t *lh, void *key, + struct hlist_node *hnode); +int lustre_hash_add_unique(lustre_hash_t *lh, void *key, + struct hlist_node *hnode); +void *lustre_hash_findadd_unique(lustre_hash_t *lh, void *key, + struct hlist_node *hnode); + +/* + * Hash deletion functions. + */ +void *lustre_hash_del(lustre_hash_t *lh, void *key, struct hlist_node *hnode); +void *lustre_hash_del_key(lustre_hash_t *lh, void *key); + +/* + * Hash lookup/for_each functions. + */ +void *lustre_hash_lookup(lustre_hash_t *lh, void *key); +typedef void (*lh_for_each_cb)(void *obj, void *data); +void lustre_hash_for_each(lustre_hash_t *lh, lh_for_each_cb, void *data); +void lustre_hash_for_each_safe(lustre_hash_t *lh, lh_for_each_cb, void *data); +void lustre_hash_for_each_empty(lustre_hash_t *lh, lh_for_each_cb, void *data); +void lustre_hash_for_each_key(lustre_hash_t *lh, void *key, + lh_for_each_cb, void *data); + +/* + * Rehash - theta is calculated to be the average chained + * hash depth assuming a perfectly uniform hash funcion. + */ +int lustre_hash_rehash(lustre_hash_t *lh, int size); +void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, + void *new_key, struct hlist_node *hnode); + + +static inline int +__lustre_hash_theta(lustre_hash_t *lh) +{ + return ((atomic_read(&lh->lh_count) * 1000) / lh->lh_cur_size); +} + +static inline void +__lustre_hash_set_theta(lustre_hash_t *lh, int min, int max) +{ + LASSERT(min < max); + lh->lh_min_theta = min; + lh->lh_min_theta = max; +} + +/* + * Generic debug formatting routines mainly for proc handler. + */ +int lustre_hash_debug_header(char *str, int size); +int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size); + + +/** + * 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 + */ +#define GOLDEN_RATIO_PRIME_32 0x9e370001UL +/** + * 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 + */ +#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001ULL + + +/** + * Generic djb2 hash algorithm for character arrays. + */ +static inline unsigned +lh_djb2_hash(void *key, size_t size, unsigned mask) +{ + unsigned i, hash = 5381; + + LASSERT(key != NULL); + + for (i = 0; i < size; i++) + hash = hash * 33 + ((char *)key)[i]; + + RETURN(hash & mask); +} + +/** + * Generic u32 hash algorithm. + */ +static inline unsigned +lh_u32_hash(__u32 key, unsigned mask) +{ + RETURN((key * GOLDEN_RATIO_PRIME_32) & mask); +} + +/** + * Generic u64 hash algorithm. + */ +static inline unsigned +lh_u64_hash(__u64 key, unsigned mask) +{ + RETURN((unsigned)(key * GOLDEN_RATIO_PRIME_64) & mask); } -typedef void (*hash_item_iterate_cb) (void *obj, void *data); - -int lustre_hash_init(struct lustre_class_hash_body **hash_body, - char *hashname, __u32 hashsize, - struct lustre_hash_operations *hash_operations); -void lustre_hash_exit(struct lustre_class_hash_body **hash_body); -int lustre_hash_additem_unique(struct lustre_class_hash_body *hash_body, - void *key, struct hlist_node *actual_hnode); -void *lustre_hash_findadd_unique(struct lustre_class_hash_body *hash_body, - void *key, struct hlist_node *actual_hnode); -int lustre_hash_additem(struct lustre_class_hash_body *hash_body, void *key, - struct hlist_node *actual_hnode); -int lustre_hash_delitem_by_key(struct lustre_class_hash_body *hash_body, - void *key); -int lustre_hash_delitem(struct lustre_class_hash_body *hash_body, void *key, - struct hlist_node *hash_item); -void lustre_hash_bucket_iterate(struct lustre_class_hash_body *hash_body, - void *key, hash_item_iterate_cb, - void *data); -void lustre_hash_iterate_all(struct lustre_class_hash_body *hash_body, - hash_item_iterate_cb, void *data); - -void * lustre_hash_get_object_by_key(struct lustre_class_hash_body *hash_body, - void *key); - -__u32 djb2_hashfn(struct lustre_class_hash_body *hash_body, void* key, - size_t size); - -/* ( uuid <-> export ) hash operations define */ -__u32 uuid_hashfn(struct lustre_class_hash_body *hash_body, void * key); -int uuid_hash_key_compare(void *key, struct hlist_node * compared_hnode); -void * uuid_export_refcount_get(struct hlist_node * actual_hnode); -void uuid_export_refcount_put(struct hlist_node * actual_hnode); - -/* ( nid <-> export ) hash operations define */ -__u32 nid_hashfn(struct lustre_class_hash_body *hash_body, void * key); -int nid_hash_key_compare(void *key, struct hlist_node * compared_hnode); -void * nid_export_refcount_get(struct hlist_node * actual_hnode); -void nid_export_refcount_put(struct hlist_node * actual_hnode); - -/* ( net_peer <-> connection ) hash operations define */ -__u32 conn_hashfn(struct lustre_class_hash_body *hash_body, void * key); -int conn_hash_key_compare(void *key, struct hlist_node * compared_hnode); -void * conn_refcount_get(struct hlist_node * actual_hnode); -void conn_refcount_put(struct hlist_node * actual_hnode); - -/* ( nid <-> nidstats ) hash operations define. uses nid_hashfn */ -int nidstats_hash_key_compare(void *key, struct hlist_node * compared_hnode); -void* nidstats_refcount_get(struct hlist_node * actual_hnode); -void nidstats_refcount_put(struct hlist_node * actual_hnode); -extern struct lustre_hash_operations nid_stat_hash_operations; +#define lh_for_each_bucket(lh, lhb, pos) \ + for (pos = 0; \ + pos < lh->lh_cur_size && \ + ({ lhb = &lh->lh_buckets[i]; 1; }); \ + pos++) #endif /* __CLASS_HASH_H */ diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index 38905dd..b62b08a 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -318,6 +318,43 @@ static inline int capable(int cap) #define might_sleep_if(c) #define smp_mb() +/** + * fls - find last (most-significant) bit set + * @x: the word to search + * + * This is defined the same way as ffs. + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. + */ +static inline +int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + static inline int test_and_set_bit(int nr, unsigned long *addr) { diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index 32d3248..319d799 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -527,6 +527,10 @@ extern int lprocfs_counter_write(struct file *file, const char *buffer, int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off, int count, int *eof, void *data); +/* lprocfs_statuc.c: hash statistics */ +int lprocfs_obd_rd_hash(char *page, char **start, off_t off, + int count, int *eof, void *data); + extern int lprocfs_seq_release(struct inode *, struct file *); /* in lprocfs_stat.c, to protect the private data for proc entries */ diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index b2d50df..380418d 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -184,7 +184,6 @@ #define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args) struct ptlrpc_connection { - struct list_head c_link; struct hlist_node c_hash; lnet_nid_t c_self; lnet_process_id_t c_peer; @@ -193,9 +192,9 @@ struct ptlrpc_connection { }; struct ptlrpc_client { - __u32 cli_request_portal; - __u32 cli_reply_portal; - char *cli_name; + __u32 cli_request_portal; + __u32 cli_reply_portal; + char *cli_name; }; /* state flags of requests */ @@ -789,14 +788,13 @@ extern void reply_out_callback(lnet_event_t *ev); extern void server_bulk_callback (lnet_event_t *ev); /* ptlrpc/connection.c */ -void ptlrpc_dump_connections(void); -void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *); -struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer, - lnet_nid_t self, struct obd_uuid *uuid); -int ptlrpc_put_connection(struct ptlrpc_connection *c); +struct ptlrpc_connection *ptlrpc_connection_get(lnet_process_id_t peer, + lnet_nid_t self, + struct obd_uuid *uuid); +int ptlrpc_connection_put(struct ptlrpc_connection *c); struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); -int ptlrpc_init_connection(void); -void ptlrpc_cleanup_connection(void); +int ptlrpc_connection_init(void); +void ptlrpc_connection_fini(void); extern lnet_pid_t ptl_get_pid(void); /* ptlrpc/niobuf.c */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 972ebed..54e7e92 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -929,11 +929,11 @@ struct obd_device { obd_inactive:1; /* device active/inactive * (for /proc/status only!!) */ /* uuid-export hash body */ - struct lustre_class_hash_body *obd_uuid_hash_body; + struct lustre_hash *obd_uuid_hash; /* nid-export hash body */ - struct lustre_class_hash_body *obd_nid_hash_body; + struct lustre_hash *obd_nid_hash; /* nid stats body */ - struct lustre_class_hash_body *obd_nid_stats_hash_body; + struct lustre_hash *obd_nid_stats_hash; struct list_head obd_nid_stats; atomic_t obd_refcount; cfs_waitq_t obd_refcount_waitq; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 7fa169b..c6de21f 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -119,7 +119,7 @@ out_free: if (imp_conn) OBD_FREE(imp_conn, sizeof(*imp_conn)); out_put: - ptlrpc_put_connection(ptlrpc_conn); + ptlrpc_connection_put(ptlrpc_conn); RETURN(rc); } @@ -162,20 +162,20 @@ int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid) GOTO(out, rc = -EBUSY); } - ptlrpc_put_connection(imp->imp_connection); + ptlrpc_connection_put(imp->imp_connection); imp->imp_connection = NULL; dlmexp = class_conn2export(&imp->imp_dlm_handle); if (dlmexp && dlmexp->exp_connection) { LASSERT(dlmexp->exp_connection == imp_conn->oic_conn); - ptlrpc_put_connection(dlmexp->exp_connection); + ptlrpc_connection_put(dlmexp->exp_connection); dlmexp->exp_connection = NULL; } } list_del(&imp_conn->oic_item); - ptlrpc_put_connection(imp_conn->oic_conn); + ptlrpc_connection_put(imp_conn->oic_conn); OBD_FREE(imp_conn, sizeof(*imp_conn)); CDEBUG(D_HA, "imp %p@%s: remove connection %s\n", imp, imp->imp_obd->obd_name, uuid->uuid); @@ -719,7 +719,7 @@ int target_handle_connect(struct ptlrpc_request *req) goto dont_check_exports; spin_lock(&target->obd_dev_lock); - export = lustre_hash_get_object_by_key(target->obd_uuid_hash_body, &cluuid); + export = lustre_hash_lookup(target->obd_uuid_hash, &cluuid); if (export != NULL && export->exp_connecting) { /* bug 9635, et. al. */ CWARN("%s: exp %p already connecting\n", @@ -903,17 +903,18 @@ dont_check_exports: } if (export->exp_connection != NULL) - ptlrpc_put_connection(export->exp_connection); - export->exp_connection = ptlrpc_get_connection(req->rq_peer, + ptlrpc_connection_put(export->exp_connection); + export->exp_connection = ptlrpc_connection_get(req->rq_peer, req->rq_self, &remote_uuid); spin_lock(&target->obd_dev_lock); /* Export might be hashed already, e.g. if this is reconnect */ if (hlist_unhashed(&export->exp_nid_hash)) - lustre_hash_additem(export->exp_obd->obd_nid_hash_body, - &export->exp_connection->c_peer.nid, - &export->exp_nid_hash); + lustre_hash_add(export->exp_obd->obd_nid_hash, + &export->exp_connection->c_peer.nid, + &export->exp_nid_hash); + spin_unlock(&target->obd_dev_lock); spin_lock_bh(&target->obd_processing_task_lock); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 35d0dbf..6349be1 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -331,6 +331,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "fstype", lprocfs_rd_fstype, 0, 0 }, { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 }, { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, + { "hash_stats", lprocfs_obd_rd_hash, 0, 0 }, { "evict_client", 0, lprocfs_mds_wr_evict_client, 0 }, { "evict_ost_nids", lprocfs_mds_rd_evictostnids, lprocfs_mds_wr_evictostnids, 0 }, diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index ffb2c41..9e45313 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -466,6 +466,7 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = { { "capa_count", lprocfs_rd_capa_count, 0, 0 }, { "site_stats", lprocfs_rd_site_stats, 0, 0 }, { "evict_client", 0, lprocfs_mdt_wr_evict_client, 0 }, + { "hash_stats", lprocfs_obd_rd_hash, 0, 0 }, { 0 } }; diff --git a/lustre/mgs/lproc_mgs.c b/lustre/mgs/lproc_mgs.c index 83a5f40..1d84a12 100644 --- a/lustre/mgs/lproc_mgs.c +++ b/lustre/mgs/lproc_mgs.c @@ -230,6 +230,7 @@ struct lprocfs_vars lprocfs_mgs_obd_vars[] = { { "fstype", lprocfs_rd_fstype, 0, 0 }, { "mntdev", lprocfs_mgs_rd_mntdev, 0, 0 }, { "num_exports", lprocfs_rd_num_exports, 0, 0 }, + { "hash_stats", lprocfs_obd_rd_hash, 0, 0 }, { "evict_client", 0, lprocfs_wr_evict_client, 0 }, { 0 } }; diff --git a/lustre/obdclass/class_hash.c b/lustre/obdclass/class_hash.c index 02ef3f4..46b4c5ed 100644 --- a/lustre/obdclass/class_hash.c +++ b/lustre/obdclass/class_hash.c @@ -38,6 +38,14 @@ * Implement a hash class for hash process in lustre system. * * Author: YuZhangyong + * + * 2008-08-15: Brian Behlendorf + * - Simplified API and improved documentation + * - Added per-hash feature flags: + * * LH_DEBUG additional validation + * * LH_REHASH dynamic rehashing + * - Added per-hash statistics + * - General performance enhancements */ #ifndef __KERNEL__ @@ -45,658 +53,721 @@ #include #endif -#include #include -#include -#include -#include -int lustre_hash_init(struct lustre_class_hash_body **hash_body_new, - char *hashname, __u32 hashsize, - struct lustre_hash_operations *hash_operations) +/** + * Initialize new lustre hash, where: + * @name - Descriptive hash name + * @cur_size - Initial hash table size + * @max_size - Maximum allowed hash table resize + * @ops - Registered hash table operations + * @flags - LH_REHASH enable synamic hash resizing + * - LH_SORT enable chained hash sort + */ +lustre_hash_t * +lustre_hash_init(char *name, unsigned int cur_size, unsigned int max_size, + lustre_hash_ops_t *ops, int flags) { - int i, n = 0; - struct lustre_class_hash_body *hash_body = NULL; - - LASSERT(hashsize > 0); - LASSERT(hash_operations != NULL); + lustre_hash_t *lh; + int i; ENTRY; - - i = hashsize; - while (i != 0) { - if (i & 0x1) - n++; - i >>= 1; - } - - LASSERTF(n == 1, "hashsize %u isn't 2^n\n", hashsize); - - /* alloc space for hash_body */ - OBD_ALLOC(hash_body, sizeof(*hash_body)); - - if (hash_body == NULL) { - CERROR("Cannot alloc space for hash body, hashname = %s \n", - hashname); - RETURN(-ENOMEM); + + LASSERT(name != NULL); + LASSERT(ops != NULL); + + /* + * Ensure hash is a power of two to allow the use of a bitmask + * in the hash function instead of a more expensive modulus. + */ + LASSERTF(cur_size && (cur_size & (cur_size - 1)) == 0, + "Size (%u) is not power of 2\n", cur_size); + LASSERTF(max_size && (max_size & (max_size - 1)) == 0, + "Size (%u) is not power of 2\n", max_size); + + OBD_ALLOC_PTR(lh); + if (!lh) + RETURN(NULL); + + lh->lh_name_size = strlen(name) + 1; + rwlock_init(&lh->lh_rwlock); + + OBD_ALLOC(lh->lh_name, lh->lh_name_size); + if (!lh->lh_name) { + OBD_FREE_PTR(lh); + RETURN(NULL); } - - LASSERT(hashname != NULL && - strlen(hashname) <= sizeof(hash_body->hashname)); - strcpy(hash_body->hashname, hashname); - hash_body->lchb_hash_max_size = hashsize; - hash_body->lchb_hash_operations = hash_operations; - - /* alloc space for the hash tables */ - OBD_ALLOC(hash_body->lchb_hash_tables, - sizeof(*hash_body->lchb_hash_tables) * hash_body->lchb_hash_max_size); - - if (hash_body->lchb_hash_tables == NULL) { - OBD_FREE(hash_body, sizeof(*hash_body)); - CERROR("Cannot alloc space for hashtables, hashname = %s \n", - hash_body->hashname); - RETURN(-ENOMEM); + + strncpy(lh->lh_name, name, lh->lh_name_size); + + atomic_set(&lh->lh_count, 0); + atomic_set(&lh->lh_rehash_count, 0); + lh->lh_cur_size = cur_size; + lh->lh_min_size = cur_size; + lh->lh_max_size = max_size; + lh->lh_min_theta = 500; /* theta * 1000 */ + lh->lh_max_theta = 2000; /* theta * 1000 */ + lh->lh_ops = ops; + lh->lh_flags = flags; + + OBD_VMALLOC(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size); + if (!lh->lh_buckets) { + OBD_FREE(lh->lh_name, lh->lh_name_size); + OBD_FREE_PTR(lh); + RETURN(NULL); } - - spin_lock_init(&hash_body->lchb_lock); /* initialize the body lock */ - - for(i = 0 ; i < hash_body->lchb_hash_max_size; i++) { - /* initial the bucket lock and list_head */ - INIT_HLIST_HEAD(&hash_body->lchb_hash_tables[i].lhb_head); - spin_lock_init(&hash_body->lchb_hash_tables[i].lhb_lock); + + for (i = 0; i < lh->lh_cur_size; i++) { + INIT_HLIST_HEAD(&lh->lh_buckets[i].lhb_head); + rwlock_init(&lh->lh_buckets[i].lhb_rwlock); + atomic_set(&lh->lh_buckets[i].lhb_count, 0); } - *hash_body_new = hash_body; - - RETURN(0); + + return lh; } EXPORT_SYMBOL(lustre_hash_init); - -void lustre_hash_exit(struct lustre_class_hash_body **new_hash_body) + +/** + * Cleanup lustre hash @lh. + */ +void +lustre_hash_exit(lustre_hash_t *lh) { - int i; - struct lustre_class_hash_body *hash_body = NULL; + lustre_hash_bucket_t *lhb; + struct hlist_node *hnode; + struct hlist_node *pos; + int i; ENTRY; - - hash_body = *new_hash_body; - - if (hash_body == NULL) { - CWARN("hash body has been deleted\n"); - goto out_hash; - } - - spin_lock(&hash_body->lchb_lock); /* lock the hash tables */ - - if (hash_body->lchb_hash_tables == NULL ) { - spin_unlock(&hash_body->lchb_lock); - CWARN("hash tables has been deleted\n"); - goto out_hash; - } - - for( i = 0; i < hash_body->lchb_hash_max_size; i++ ) { - struct lustre_hash_bucket * bucket; - struct hlist_node * actual_hnode, *pos; - - bucket = &hash_body->lchb_hash_tables[i]; - spin_lock(&bucket->lhb_lock); /* lock the bucket */ - hlist_for_each_safe(actual_hnode, pos, &(bucket->lhb_head)) { - lustre_hash_delitem_nolock(hash_body, i, actual_hnode); + + if (!lh) + return; + + write_lock(&lh->lh_rwlock); + + lh_for_each_bucket(lh, lhb, i) { + write_lock(&lhb->lhb_rwlock); + hlist_for_each_safe(hnode, pos, &(lhb->lhb_head)) { + __lustre_hash_bucket_validate(lh, lhb, hnode); + __lustre_hash_bucket_del(lh, lhb, hnode); + lh_exit(lh, hnode); } - spin_unlock(&bucket->lhb_lock); + + LASSERT(hlist_empty(&(lhb->lhb_head))); + LASSERT(atomic_read(&lhb->lhb_count) == 0); + write_unlock(&lhb->lhb_rwlock); } + + OBD_VFREE(lh->lh_buckets, sizeof(*lh->lh_buckets) * lh->lh_cur_size); + OBD_FREE(lh->lh_name, lh->lh_name_size); + + LASSERT(atomic_read(&lh->lh_count) == 0); + write_unlock(&lh->lh_rwlock); + + OBD_FREE_PTR(lh); + EXIT; +} +EXPORT_SYMBOL(lustre_hash_exit); - /* free the hash_tables's memory space */ - OBD_FREE(hash_body->lchb_hash_tables, - sizeof(*hash_body->lchb_hash_tables) * hash_body->lchb_hash_max_size); +static inline unsigned int lustre_hash_rehash_size(lustre_hash_t *lh) +{ + if (!(lh->lh_flags & LH_REHASH)) + return 0; - hash_body->lchb_hash_tables = NULL; + if ((lh->lh_cur_size < lh->lh_max_size) && + (__lustre_hash_theta(lh) > lh->lh_max_theta)) + return MIN(lh->lh_cur_size * 2, lh->lh_max_size); - spin_unlock(&hash_body->lchb_lock); + if ((lh->lh_cur_size > lh->lh_min_size) && + (__lustre_hash_theta(lh) < lh->lh_min_theta)) + return MAX(lh->lh_cur_size / 2, lh->lh_min_size); -out_hash : - /* free the hash_body's memory space */ - if (hash_body != NULL) { - OBD_FREE(hash_body, sizeof(*hash_body)); - *new_hash_body = NULL; - } + return 0; +} + +/** + * Add item @hnode to lustre hash @lh using @key. The registered + * ops->lh_get function will be called when the item is added. + */ +void +lustre_hash_add(lustre_hash_t *lh, void *key, struct hlist_node *hnode) +{ + lustre_hash_bucket_t *lhb; + int size; + unsigned i; + ENTRY; + + __lustre_hash_key_validate(lh, key, hnode); + + read_lock(&lh->lh_rwlock); + i = lh_hash(lh, key, lh->lh_cur_size - 1); + lhb = &lh->lh_buckets[i]; + LASSERT(i < lh->lh_cur_size); + LASSERT(hlist_unhashed(hnode)); + + write_lock(&lhb->lhb_rwlock); + __lustre_hash_bucket_add(lh, lhb, hnode); + write_unlock(&lhb->lhb_rwlock); + + size = lustre_hash_rehash_size(lh); + read_unlock(&lh->lh_rwlock); + if (size) + lustre_hash_rehash(lh, size); + EXIT; } -EXPORT_SYMBOL(lustre_hash_exit); - -/* - * only allow unique @key in hashtables, if the same @key has existed - * in hashtables, it will return with fails. +EXPORT_SYMBOL(lustre_hash_add); + +/** + * Add item @hnode to lustre hash @lh using @key. The registered + * ops->lh_get function will be called if the item was added. + * Returns 0 on success or -EALREADY on key collisions. */ -int lustre_hash_additem_unique(struct lustre_class_hash_body *hash_body, - void *key, struct hlist_node *actual_hnode) +int +lustre_hash_add_unique(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { - int hashent; - struct lustre_hash_bucket *bucket = NULL; - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; + lustre_hash_bucket_t *lhb; + int size; + int rc = -EALREADY; + unsigned i; ENTRY; - - LASSERT(hlist_unhashed(actual_hnode)); - hashent = hop->lustre_hashfn(hash_body, key); - - /* get the hash-bucket and lock it */ - bucket = &hash_body->lchb_hash_tables[hashent]; - spin_lock(&bucket->lhb_lock); - - if ( (lustre_hash_getitem_in_bucket_nolock(hash_body, hashent, key)) != NULL) { - /* the added-item exist in hashtables, so cannot add it again */ - spin_unlock(&bucket->lhb_lock); - - CWARN("Already found the key in hash [%s]\n", - hash_body->hashname); - RETURN(-EALREADY); + + __lustre_hash_key_validate(lh, key, hnode); + + read_lock(&lh->lh_rwlock); + i = lh_hash(lh, key, lh->lh_cur_size - 1); + lhb = &lh->lh_buckets[i]; + LASSERT(i < lh->lh_cur_size); + LASSERT(hlist_unhashed(hnode)); + + write_lock(&lhb->lhb_rwlock); + if (!__lustre_hash_bucket_lookup(lh, lhb, key)) { + __lustre_hash_bucket_add(lh, lhb, hnode); + rc = 0; } - - hlist_add_head(actual_hnode, &(bucket->lhb_head)); - -#ifdef LUSTRE_HASH_DEBUG - /* hash distribute debug */ - hash_body->lchb_hash_tables[hashent].lhb_item_count++; - CDEBUG(D_INFO, "hashname[%s] bucket[%d] has [%d] hashitem\n", - hash_body->hashname, hashent, - hash_body->lchb_hash_tables[hashent].lhb_item_count); -#endif - hop->lustre_hash_object_refcount_get(actual_hnode); - - spin_unlock(&bucket->lhb_lock); - - RETURN(0); + write_unlock(&lhb->lhb_rwlock); + + size = lustre_hash_rehash_size(lh); + read_unlock(&lh->lh_rwlock); + if (size) + lustre_hash_rehash(lh, size); + + RETURN(rc); } -EXPORT_SYMBOL(lustre_hash_additem_unique); - -/* - * only allow unique @key in hashtables, if the same @key has existed - * in hashtables, it will return with fails. +EXPORT_SYMBOL(lustre_hash_add_unique); + +/** + * Add item @hnode to lustre hash @lh using @key. If this @key + * already exists in the hash then ops->lh_get will be called on the + * conflicting entry and that entry will be returned to the caller. + * Otherwise ops->lh_get is called on the item which was added. */ -void* lustre_hash_findadd_unique(struct lustre_class_hash_body *hash_body, - void *key, struct hlist_node *actual_hnode) +void * +lustre_hash_findadd_unique(lustre_hash_t *lh, void *key, + struct hlist_node *hnode) { - int hashent; - struct lustre_hash_bucket *bucket = NULL; - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; - struct hlist_node * hash_item_hnode = NULL; - void *obj; + struct hlist_node *existing_hnode; + lustre_hash_bucket_t *lhb; + int size; + unsigned i; + void *obj; ENTRY; - - LASSERT(hlist_unhashed(actual_hnode)); - hashent = hop->lustre_hashfn(hash_body, key); - - /* get the hash-bucket and lock it */ - bucket = &hash_body->lchb_hash_tables[hashent]; - spin_lock(&bucket->lhb_lock); - - hash_item_hnode = lustre_hash_getitem_in_bucket_nolock(hash_body, - hashent, key); - if ( hash_item_hnode != NULL) { - /* the added-item exist in hashtables, so cannot add it again */ - obj = hop->lustre_hash_object_refcount_get(hash_item_hnode); - spin_unlock(&bucket->lhb_lock); - RETURN(obj); - } - - hlist_add_head(actual_hnode, &(bucket->lhb_head)); - -#ifdef LUSTRE_HASH_DEBUG - /* hash distribute debug */ - hash_body->lchb_hash_tables[hashent].lhb_item_count++; - CDEBUG(D_INFO, "hashname[%s] bucket[%d] has [%d] hashitem\n", - hash_body->hashname, hashent, - hash_body->lchb_hash_tables[hashent].lhb_item_count); -#endif - obj = hop->lustre_hash_object_refcount_get(actual_hnode); - - spin_unlock(&bucket->lhb_lock); - + + __lustre_hash_key_validate(lh, key, hnode); + + read_lock(&lh->lh_rwlock); + i = lh_hash(lh, key, lh->lh_cur_size - 1); + lhb = &lh->lh_buckets[i]; + LASSERT(i < lh->lh_cur_size); + LASSERT(hlist_unhashed(hnode)); + + write_lock(&lhb->lhb_rwlock); + existing_hnode = __lustre_hash_bucket_lookup(lh, lhb, key); + if (existing_hnode) + obj = lh_get(lh, existing_hnode); + else + obj = __lustre_hash_bucket_add(lh, lhb, hnode); + write_unlock(&lhb->lhb_rwlock); + + size = lustre_hash_rehash_size(lh); + read_unlock(&lh->lh_rwlock); + if (size) + lustre_hash_rehash(lh, size); + RETURN(obj); } EXPORT_SYMBOL(lustre_hash_findadd_unique); - -/* - * this version of additem, it allow multi same @key in hashtables. - * in this additem version, we don't need to check if exist same @key in hash - * tables, we only add it to related hashbucket. - * example: maybe same nid will be related to multi difference export + +/** + * Delete item @hnode from the lustre hash @lh using @key. The @key + * is required to ensure the correct hash bucket is locked since there + * is no direct linkage from the item to the bucket. The object + * removed from the hash will be returned and obs->lh_put is called + * on the removed object. */ -int lustre_hash_additem(struct lustre_class_hash_body *hash_body, void *key, - struct hlist_node *actual_hnode) +void * +lustre_hash_del(lustre_hash_t *lh, void *key, struct hlist_node *hnode) { - int hashent; - struct lustre_hash_bucket *bucket = NULL; - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; + lustre_hash_bucket_t *lhb; + int size; + unsigned i; + void *obj; ENTRY; - - LASSERT(hlist_unhashed(actual_hnode)); - - hashent = hop->lustre_hashfn(hash_body, key); - - /* get the hashbucket and lock it */ - bucket = &hash_body->lchb_hash_tables[hashent]; - spin_lock(&bucket->lhb_lock); - - hlist_add_head(actual_hnode, &(bucket->lhb_head)); - -#ifdef LUSTRE_HASH_DEBUG - /* hash distribute debug */ - hash_body->lchb_hash_tables[hashent].lhb_item_count++; - CDEBUG(D_INFO, "hashname[%s] bucket[%d] has [%d] hashitem\n", - hash_body->hashname, hashent, - hash_body->lchb_hash_tables[hashent].lhb_item_count); -#endif - hop->lustre_hash_object_refcount_get(actual_hnode); - - spin_unlock(&bucket->lhb_lock); - - RETURN(0); + + __lustre_hash_key_validate(lh, key, hnode); + + read_lock(&lh->lh_rwlock); + i = lh_hash(lh, key, lh->lh_cur_size - 1); + lhb = &lh->lh_buckets[i]; + LASSERT(i < lh->lh_cur_size); + LASSERT(!hlist_unhashed(hnode)); + + write_lock(&lhb->lhb_rwlock); + obj = __lustre_hash_bucket_del(lh, lhb, hnode); + write_unlock(&lhb->lhb_rwlock); + + size = lustre_hash_rehash_size(lh); + read_unlock(&lh->lh_rwlock); + if (size) + lustre_hash_rehash(lh, size); + + RETURN(obj); } -EXPORT_SYMBOL(lustre_hash_additem); - - -/* - * this version of delitem will delete a hashitem with given @key, - * we need to search the <@key, @value> in hashbucket with @key, - * if match, the hashitem will be delete. - * we have a no-search version of delitem, it will directly delete a hashitem, - * doesn't need to search it in hashtables, so it is a O(1) delete. +EXPORT_SYMBOL(lustre_hash_del); + +/** + * Delete item given @key in lustre hash @lh. The first @key found in + * the hash will be removed, if the key exists multiple times in the hash + * @lh this function must be called once per key. The removed object + * will be returned and ops->lh_put is called on the removed object. */ -int lustre_hash_delitem_by_key(struct lustre_class_hash_body *hash_body, - void *key) +void * +lustre_hash_del_key(lustre_hash_t *lh, void *key) { - int hashent ; - struct hlist_node * hash_item; - struct lustre_hash_bucket *bucket = NULL; - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; - int retval = 0; + struct hlist_node *hnode; + lustre_hash_bucket_t *lhb; + int size; + unsigned i; + void *obj = NULL; ENTRY; - - hashent = hop->lustre_hashfn(hash_body, key); - - /* first, lock the hashbucket */ - bucket = &hash_body->lchb_hash_tables[hashent]; - spin_lock(&bucket->lhb_lock); - - /* get the hash_item from hash_bucket */ - hash_item = lustre_hash_getitem_in_bucket_nolock(hash_body, hashent, - key); - - if (hash_item == NULL) { - spin_unlock(&bucket->lhb_lock); - RETURN(-ENOENT); - } - - /* call delitem_nolock() to delete the hash_item */ - retval = lustre_hash_delitem_nolock(hash_body, hashent, hash_item); - - spin_unlock(&bucket->lhb_lock); - - RETURN(retval); + + read_lock(&lh->lh_rwlock); + i = lh_hash(lh, key, lh->lh_cur_size - 1); + lhb = &lh->lh_buckets[i]; + LASSERT(i < lh->lh_cur_size); + + write_lock(&lhb->lhb_rwlock); + hnode = __lustre_hash_bucket_lookup(lh, lhb, key); + if (hnode) + obj = __lustre_hash_bucket_del(lh, lhb, hnode); + + write_unlock(&lhb->lhb_rwlock); + + size = lustre_hash_rehash_size(lh); + read_unlock(&lh->lh_rwlock); + if (size) + lustre_hash_rehash(lh, size); + + RETURN(obj); } -EXPORT_SYMBOL(lustre_hash_delitem_by_key); - -/* - * the O(1) version of delete hash item, - * it will directly delete the hashitem with given @hash_item, - * the parameter @key used to get the relation hash bucket and lock it. +EXPORT_SYMBOL(lustre_hash_del_key); + +/** + * Lookup an item using @key in the lustre hash @lh and return it. + * If the @key is found in the hash lh->lh_get() is called and the + * matching objects is returned. It is the callers responsibility + * to call the counterpart ops->lh_put using the lh_put() macro + * when when finished with the object. If the @key was not found + * in the hash @lh NULL is returned. */ -int lustre_hash_delitem(struct lustre_class_hash_body *hash_body, - void *key, struct hlist_node * hash_item) -{ - int hashent = 0; - int retval = 0; - struct lustre_hash_bucket *bucket = NULL; - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; +void * +lustre_hash_lookup(lustre_hash_t *lh, void *key) +{ + struct hlist_node *hnode; + lustre_hash_bucket_t *lhb; + unsigned i; + void *obj = NULL; ENTRY; - - hashent = hop->lustre_hashfn(hash_body, key); - - bucket = &hash_body->lchb_hash_tables[hashent]; - spin_lock(&bucket->lhb_lock); - - /* call delitem_nolock() to delete the hash_item */ - retval = lustre_hash_delitem_nolock(hash_body, hashent, hash_item); - - spin_unlock(&bucket->lhb_lock); - - RETURN(retval); + + read_lock(&lh->lh_rwlock); + i = lh_hash(lh, key, lh->lh_cur_size - 1); + lhb = &lh->lh_buckets[i]; + LASSERT(i < lh->lh_cur_size); + + read_lock(&lhb->lhb_rwlock); + hnode = __lustre_hash_bucket_lookup(lh, lhb, key); + if (hnode) + obj = lh_get(lh, hnode); + + read_unlock(&lhb->lhb_rwlock); + read_unlock(&lh->lh_rwlock); + + RETURN(obj); } -EXPORT_SYMBOL(lustre_hash_delitem); - -void lustre_hash_bucket_iterate(struct lustre_class_hash_body *hash_body, - void *key, hash_item_iterate_cb func, void *data) +EXPORT_SYMBOL(lustre_hash_lookup); + +/** + * For each item in the lustre hash @lh call the passed callback @func + * and pass to it as an argument each hash item and the private @data. + * Before each callback ops->lh_get will be called, and after each + * callback ops->lh_put will be called. Finally, during the callback + * the bucket lock is held so the callback must never sleep. + */ +void +lustre_hash_for_each(lustre_hash_t *lh, lh_for_each_cb func, void *data) { - int hashent, find = 0; - struct lustre_hash_bucket *bucket = NULL; - struct hlist_node *hash_item_node = NULL; - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; - struct obd_export *tmp = NULL; - + struct hlist_node *hnode; + lustre_hash_bucket_t *lhb; + void *obj; + int i; ENTRY; - - hashent = hop->lustre_hashfn(hash_body, key); - bucket = &hash_body->lchb_hash_tables[hashent]; - - spin_lock(&bucket->lhb_lock); - hlist_for_each(hash_item_node, &(bucket->lhb_head)) { - find = hop->lustre_hash_key_compare(key, hash_item_node); - if (find) { - tmp = hop->lustre_hash_object_refcount_get(hash_item_node); - func(tmp, data); - hop->lustre_hash_object_refcount_put(hash_item_node); + + read_lock(&lh->lh_rwlock); + lh_for_each_bucket(lh, lhb, i) { + read_lock(&lhb->lhb_rwlock); + hlist_for_each(hnode, &(lhb->lhb_head)) { + __lustre_hash_bucket_validate(lh, lhb, hnode); + obj = lh_get(lh, hnode); + func(obj, data); + (void)lh_put(lh, hnode); } + read_unlock(&lhb->lhb_rwlock); } - spin_unlock(&bucket->lhb_lock); -} -EXPORT_SYMBOL(lustre_hash_bucket_iterate); + read_unlock(&lh->lh_rwlock); -void lustre_hash_iterate_all(struct lustre_class_hash_body *hash_body, - hash_item_iterate_cb func, void *data) + EXIT; +} +EXPORT_SYMBOL(lustre_hash_for_each); + +/** + * For each item in the lustre hash @lh call the passed callback @func + * and pass to it as an argument each hash item and the private @data. + * Before each callback ops->lh_get will be called, and after each + * callback ops->lh_put will be called. During the callback the + * bucket lock will not be held will allows for the current item + * to be removed from the hash during the callback. However, care + * should be taken to prevent other callers from operating on the + * hash concurrently or list corruption may occur. + */ +void +lustre_hash_for_each_safe(lustre_hash_t *lh, lh_for_each_cb func, void *data) { - int i; - struct lustre_hash_operations *hop = hash_body->lchb_hash_operations; + struct hlist_node *hnode; + struct hlist_node *pos; + lustre_hash_bucket_t *lhb; + void *obj; + int i; ENTRY; - - for( i = 0; i < hash_body->lchb_hash_max_size; i++ ) { - struct lustre_hash_bucket * bucket; - struct hlist_node * actual_hnode, *pos; - void *obj; - - bucket = &hash_body->lchb_hash_tables[i]; -#ifdef LUSTRE_HASH_DEBUG - CDEBUG(D_INFO, "idx %d - bucket %p\n", i, bucket); -#endif - spin_lock(&bucket->lhb_lock); /* lock the bucket */ - hlist_for_each_safe(actual_hnode, pos, &(bucket->lhb_head)) { - obj = hop->lustre_hash_object_refcount_get(actual_hnode); + + read_lock(&lh->lh_rwlock); + lh_for_each_bucket(lh, lhb, i) { + read_lock(&lhb->lhb_rwlock); + hlist_for_each_safe(hnode, pos, &(lhb->lhb_head)) { + __lustre_hash_bucket_validate(lh, lhb, hnode); + obj = lh_get(lh, hnode); + read_unlock(&lhb->lhb_rwlock); func(obj, data); - hop->lustre_hash_object_refcount_put(actual_hnode); + read_lock(&lhb->lhb_rwlock); + (void)lh_put(lh, hnode); } - spin_unlock(&bucket->lhb_lock); + read_unlock(&lhb->lhb_rwlock); } + read_unlock(&lh->lh_rwlock); EXIT; } -EXPORT_SYMBOL(lustre_hash_iterate_all); - - -void * lustre_hash_get_object_by_key(struct lustre_class_hash_body *hash_body, - void *key) +EXPORT_SYMBOL(lustre_hash_for_each_safe); + +/** + * For each hash bucket in the lustre hash @lh call the passed callback + * @func until all the hash buckets are empty. The passed callback @func + * or the previously registered callback lh->lh_put must remove the item + * from the hash. You may either use the lustre_hash_del() or hlist_del() + * functions. No rwlocks will be held during the callback @func it is + * safe to sleep if needed. This function will not terminate until the + * hash is empty. Note it is still possible to concurrently add new + * items in to the hash. It is the callers responsibility to ensure + * the required locking is in place to prevent concurrent insertions. + */ +void +lustre_hash_for_each_empty(lustre_hash_t *lh, lh_for_each_cb func, void *data) { - int hashent ; - struct hlist_node * hash_item_hnode = NULL; - void * obj_value = NULL; - struct lustre_hash_bucket *bucket = NULL; - struct lustre_hash_operations * hop = hash_body->lchb_hash_operations; + struct hlist_node *hnode; + lustre_hash_bucket_t *lhb; + void *obj; + int i; ENTRY; - - /* get the hash value from the given item */ - hashent = hop->lustre_hashfn(hash_body, key); - - bucket = &hash_body->lchb_hash_tables[hashent]; - spin_lock(&bucket->lhb_lock); /* lock the bucket */ - - hash_item_hnode = lustre_hash_getitem_in_bucket_nolock(hash_body, - hashent, key); - - if (hash_item_hnode == NULL) { - spin_unlock(&bucket->lhb_lock); /* lock the bucket */ - RETURN(NULL); + +restart: + read_lock(&lh->lh_rwlock); + lh_for_each_bucket(lh, lhb, i) { + write_lock(&lhb->lhb_rwlock); + while (!hlist_empty(&lhb->lhb_head)) { + hnode = lhb->lhb_head.first; + __lustre_hash_bucket_validate(lh, lhb, hnode); + obj = lh_get(lh, hnode); + write_unlock(&lhb->lhb_rwlock); + read_unlock(&lh->lh_rwlock); + func(obj, data); + (void)lh_put(lh, hnode); + goto restart; + } + write_unlock(&lhb->lhb_rwlock); } - - obj_value = hop->lustre_hash_object_refcount_get(hash_item_hnode); - spin_unlock(&bucket->lhb_lock); /* lock the bucket */ - - RETURN(obj_value); -} -EXPORT_SYMBOL(lustre_hash_get_object_by_key); - -/* string hashing using djb2 hash algorithm */ -__u32 djb2_hashfn(struct lustre_class_hash_body *hash_body, void* key, - size_t size) -{ - __u32 hash = 5381; - int i; - char *ptr = key; - - LASSERT(key != NULL); - - for( i = 0; i < size; i++ ) - hash = hash * 33 + ptr[i]; - - hash &= (hash_body->lchb_hash_max_size - 1); - - RETURN(hash); -} - -/* - * define (uuid <-> export) hash operations and function define - */ - -/* define the uuid hash operations */ -struct lustre_hash_operations uuid_hash_operations = { - .lustre_hashfn = uuid_hashfn, - .lustre_hash_key_compare = uuid_hash_key_compare, - .lustre_hash_object_refcount_get = uuid_export_refcount_get, - .lustre_hash_object_refcount_put = uuid_export_refcount_put, -}; - -__u32 uuid_hashfn(struct lustre_class_hash_body *hash_body, void * key) -{ - struct obd_uuid * uuid_key = key; - - return djb2_hashfn(hash_body, uuid_key->uuid, sizeof(uuid_key->uuid)); -} - -/* Note, it is impossible to find an export that is in failed state with - * this function */ -int uuid_hash_key_compare(void *key, struct hlist_node *compared_hnode) -{ - struct obd_export *export = NULL; - struct obd_uuid *uuid_key = NULL, *compared_uuid = NULL; - - LASSERT( key != NULL); - - uuid_key = (struct obd_uuid*)key; - - export = hlist_entry(compared_hnode, struct obd_export, exp_uuid_hash); - - compared_uuid = &export->exp_client_uuid; - - RETURN(obd_uuid_equals(uuid_key, compared_uuid) && - !export->exp_failed); -} - -void * uuid_export_refcount_get(struct hlist_node * actual_hnode) -{ - struct obd_export *export = NULL; - - LASSERT(actual_hnode != NULL); - - export = hlist_entry(actual_hnode, struct obd_export, exp_uuid_hash); - - LASSERT(export != NULL); - - class_export_get(export); - - RETURN(export); + read_unlock(&lh->lh_rwlock); + EXIT; } - -void uuid_export_refcount_put(struct hlist_node * actual_hnode) +EXPORT_SYMBOL(lustre_hash_for_each_empty); + + /* + * For each item in the lustre hash @lh which matches the @key call + * the passed callback @func and pass to it as an argument each hash + * item and the private @data. Before each callback ops->lh_get will + * be called, and after each callback ops->lh_put will be called. + * Finally, during the callback the bucket lock is held so the + * callback must never sleep. + */ +void +lustre_hash_for_each_key(lustre_hash_t *lh, void *key, + lh_for_each_cb func, void *data) { - struct obd_export *export = NULL; - - LASSERT(actual_hnode != NULL); - - export = hlist_entry(actual_hnode, struct obd_export, exp_uuid_hash); - - LASSERT(export != NULL); - - class_export_put(export); + struct hlist_node *hnode; + lustre_hash_bucket_t *lhb; + unsigned i; + ENTRY; + + read_lock(&lh->lh_rwlock); + i = lh_hash(lh, key, lh->lh_cur_size - 1); + lhb = &lh->lh_buckets[i]; + LASSERT(i < lh->lh_cur_size); + + read_lock(&lhb->lhb_rwlock); + hlist_for_each(hnode, &(lhb->lhb_head)) { + __lustre_hash_bucket_validate(lh, lhb, hnode); + + if (!lh_compare(lh, key, hnode)) + continue; + + func(lh_get(lh, hnode), data); + (void)lh_put(lh, hnode); + } + + read_unlock(&lhb->lhb_rwlock); + read_unlock(&lh->lh_rwlock); + + EXIT; } - -/* - * define (nid <-> export) hash operations and function define +EXPORT_SYMBOL(lustre_hash_for_each_key); + +/** + * Rehash the lustre hash @lh to the given @size. This can be used + * to grow the hash size when excessive chaining is detected, or to + * shrink the hash when it is larger than needed. When the LH_REHASH + * flag is set in @lh the lustre hash may be dynamically rehashed + * during addition or removal if the hash's theta value exceeds + * either the lh->lh_min_theta or lh->max_theta values. By default + * these values are tuned to keep the chained hash depth small, and + * this approach assumes a reasonably uniform hashing function. The + * theta thresholds for @lh are tunable via lustre_hash_set_theta(). */ - -/* define the nid hash operations */ -struct lustre_hash_operations nid_hash_operations = { - .lustre_hashfn = nid_hashfn, - .lustre_hash_key_compare = nid_hash_key_compare, - .lustre_hash_object_refcount_get = nid_export_refcount_get, - .lustre_hash_object_refcount_put = nid_export_refcount_put, -}; - -__u32 nid_hashfn(struct lustre_class_hash_body *hash_body, void * key) -{ - return djb2_hashfn(hash_body, key, sizeof(lnet_nid_t)); -} - -/* Note, it is impossible to find an export that is in failed state with - * this function */ -int nid_hash_key_compare(void *key, struct hlist_node *compared_hnode) -{ - struct obd_export *export = NULL; - lnet_nid_t *nid_key = NULL; - - LASSERT( key != NULL); - - nid_key = (lnet_nid_t*)key; - - export = hlist_entry(compared_hnode, struct obd_export, exp_nid_hash); - - return (export->exp_connection->c_peer.nid == *nid_key && - !export->exp_failed); -} - -void *nid_export_refcount_get(struct hlist_node *actual_hnode) -{ - struct obd_export *export = NULL; - - LASSERT(actual_hnode != NULL); - - export = hlist_entry(actual_hnode, struct obd_export, exp_nid_hash); - - LASSERT(export != NULL); - - class_export_get(export); - - RETURN(export); -} - -void nid_export_refcount_put(struct hlist_node *actual_hnode) +int +lustre_hash_rehash(lustre_hash_t *lh, int size) { - struct obd_export *export = NULL; - - LASSERT(actual_hnode != NULL); - - export = hlist_entry(actual_hnode, struct obd_export, exp_nid_hash); - - LASSERT(export != NULL); - - class_export_put(export); + struct hlist_node *hnode; + struct hlist_node *pos; + lustre_hash_bucket_t *lh_buckets; + lustre_hash_bucket_t *rehash_buckets; + lustre_hash_bucket_t *lh_lhb; + lustre_hash_bucket_t *rehash_lhb; + int i; + int lh_size; + int theta; + void *key; + ENTRY; + + LASSERT(size > 0); + + OBD_VMALLOC(rehash_buckets, sizeof(*rehash_buckets) * size); + if (!rehash_buckets) + RETURN(-ENOMEM); + + for (i = 0; i < size; i++) { + INIT_HLIST_HEAD(&rehash_buckets[i].lhb_head); + rwlock_init(&rehash_buckets[i].lhb_rwlock); + atomic_set(&rehash_buckets[i].lhb_count, 0); + } + + write_lock(&lh->lh_rwlock); + + /* + * Early return for multiple concurrent racing callers, + * ensure we only trigger the rehash if it is still needed. + */ + theta = __lustre_hash_theta(lh); + if ((theta >= lh->lh_min_theta) && (theta <= lh->lh_max_theta)) { + OBD_VFREE(rehash_buckets, sizeof(*rehash_buckets) * size); + write_unlock(&lh->lh_rwlock); + RETURN(-EALREADY); + } + + lh_size = lh->lh_cur_size; + lh_buckets = lh->lh_buckets; + + lh->lh_cur_size = size; + lh->lh_buckets = rehash_buckets; + atomic_inc(&lh->lh_rehash_count); + + for (i = 0; i < lh_size; i++) { + lh_lhb = &lh_buckets[i]; + + write_lock(&lh_lhb->lhb_rwlock); + hlist_for_each_safe(hnode, pos, &(lh_lhb->lhb_head)) { + key = lh_key(lh, hnode); + LASSERT(key); + + /* + * Validate hnode is in the correct bucket. + */ + if (unlikely(lh->lh_flags & LH_DEBUG)) + LASSERT(lh_hash(lh, key, lh_size - 1) == i); + + /* + * Delete from old hash bucket. + */ + hlist_del(hnode); + LASSERT(atomic_read(&lh_lhb->lhb_count) > 0); + atomic_dec(&lh_lhb->lhb_count); + + /* + * Add to rehash bucket, ops->lh_key must be defined. + */ + rehash_lhb = &rehash_buckets[lh_hash(lh, key, size-1)]; + hlist_add_head(hnode, &(rehash_lhb->lhb_head)); + atomic_inc(&rehash_lhb->lhb_count); + } + + LASSERT(hlist_empty(&(lh_lhb->lhb_head))); + LASSERT(atomic_read(&lh_lhb->lhb_count) == 0); + write_unlock(&lh_lhb->lhb_rwlock); + } + + OBD_VFREE(lh_buckets, sizeof(*lh_buckets) * lh_size); + write_unlock(&lh->lh_rwlock); + + RETURN(0); } - -/* - * define (net_peer <-> connection) hash operations and function define +EXPORT_SYMBOL(lustre_hash_rehash); + +/** + * Rehash the object referenced by @hnode in the lustre hash @lh. The + * @old_key must be provided to locate the objects previous location + * in the hash, and the @new_key will be used to reinsert the object. + * Use this function instead of a lustre_hash_add() + lustre_hash_del() + * combo when it is critical that there is no window in time where the + * object is missing from the hash. When an object is being rehashed + * the registered lh_get() and lh_put() functions will not be called. */ - -/* define the conn hash operations */ -struct lustre_hash_operations conn_hash_operations = { - .lustre_hashfn = conn_hashfn, - .lustre_hash_key_compare = conn_hash_key_compare, - .lustre_hash_object_refcount_get = conn_refcount_get, - .lustre_hash_object_refcount_put = conn_refcount_put, -}; -EXPORT_SYMBOL(conn_hash_operations); - -__u32 conn_hashfn(struct lustre_class_hash_body *hash_body, void * key) +void lustre_hash_rehash_key(lustre_hash_t *lh, void *old_key, void *new_key, + struct hlist_node *hnode) { - return djb2_hashfn(hash_body, key, sizeof(lnet_process_id_t)); -} - -int conn_hash_key_compare(void *key, struct hlist_node *compared_hnode) -{ - struct ptlrpc_connection *c = NULL; - lnet_process_id_t *conn_key = NULL; - - LASSERT( key != NULL); - - conn_key = (lnet_process_id_t*)key; - - c = hlist_entry(compared_hnode, struct ptlrpc_connection, c_hash); - - return (conn_key->nid == c->c_peer.nid && - conn_key->pid == c->c_peer.pid); -} - -void *conn_refcount_get(struct hlist_node *actual_hnode) -{ - struct ptlrpc_connection *c = NULL; - - LASSERT(actual_hnode != NULL); - - c = hlist_entry(actual_hnode, struct ptlrpc_connection, c_hash); - - LASSERT(c != NULL); - - atomic_inc(&c->c_refcount); - - RETURN(c); -} - -void conn_refcount_put(struct hlist_node *actual_hnode) -{ - struct ptlrpc_connection *c = NULL; - - LASSERT(actual_hnode != NULL); - - c = hlist_entry(actual_hnode, struct ptlrpc_connection, c_hash); - - LASSERT(c != NULL); - - atomic_dec(&c->c_refcount); -} - -/*******************************************************************************/ -/* ( nid<>nidstats ) hash operations define */ - -struct lustre_hash_operations nid_stat_hash_operations = { - .lustre_hashfn = nid_hashfn, - .lustre_hash_key_compare = nidstats_hash_key_compare, - .lustre_hash_object_refcount_get = nidstats_refcount_get, - .lustre_hash_object_refcount_put = nidstats_refcount_put, -}; -EXPORT_SYMBOL(nid_stat_hash_operations); - -int nidstats_hash_key_compare(void *key, struct hlist_node * compared_hnode) -{ - struct nid_stat *data; - lnet_nid_t *nid_key; - - LASSERT( key != NULL); - - nid_key = (lnet_nid_t*)key; - data = hlist_entry(compared_hnode, struct nid_stat, nid_hash); - - return (data->nid == *nid_key); + lustre_hash_bucket_t *old_lhb; + lustre_hash_bucket_t *new_lhb; + unsigned i; + int j; + ENTRY; + + __lustre_hash_key_validate(lh, new_key, hnode); + LASSERT(!hlist_unhashed(hnode)); + + read_lock(&lh->lh_rwlock); + + i = lh_hash(lh, old_key, lh->lh_cur_size - 1); + old_lhb = &lh->lh_buckets[i]; + LASSERT(i < lh->lh_cur_size); + + j = lh_hash(lh, new_key, lh->lh_cur_size - 1); + new_lhb = &lh->lh_buckets[j]; + LASSERT(j < lh->lh_cur_size); + + write_lock(&old_lhb->lhb_rwlock); + write_lock(&new_lhb->lhb_rwlock); + + /* + * Migrate item between hash buckets without calling + * the lh_get() and lh_put() callback functions. + */ + hlist_del(hnode); + LASSERT(atomic_read(&old_lhb->lhb_count) > 0); + atomic_dec(&old_lhb->lhb_count); + hlist_add_head(hnode, &(new_lhb->lhb_head)); + atomic_inc(&new_lhb->lhb_count); + + write_unlock(&new_lhb->lhb_rwlock); + write_unlock(&old_lhb->lhb_rwlock); + read_unlock(&lh->lh_rwlock); + + EXIT; } - -void* nidstats_refcount_get(struct hlist_node * actual_hnode) +EXPORT_SYMBOL(lustre_hash_rehash_key); + +int lustre_hash_debug_header(char *str, int size) { - struct nid_stat *data; - - data = hlist_entry(actual_hnode, struct nid_stat, nid_hash); - data->nid_exp_ref_count++; - - RETURN(data); + return snprintf(str, size, + "%-36s%6s%6s%6s%6s%6s%6s%6s%7s%6s%s\n", + "name", "cur", "min", "max", "theta", "t-min", "t-max", + "flags", "rehash", "count", " distribution"); } +EXPORT_SYMBOL(lustre_hash_debug_header); -void nidstats_refcount_put(struct hlist_node * actual_hnode) +int lustre_hash_debug_str(lustre_hash_t *lh, char *str, int size) { - struct nid_stat *data; - - data = hlist_entry(actual_hnode, struct nid_stat, nid_hash); - data->nid_exp_ref_count--; - EXIT; + lustre_hash_bucket_t *lhb; + int theta; + int i; + int c = 0; + int dist[8] = { 0, }; + + if (str == NULL || size == 0) + return 0; + + read_lock(&lh->lh_rwlock); + theta = __lustre_hash_theta(lh); + + c += snprintf(str + c, size - c, "%-36s ",lh->lh_name); + c += snprintf(str + c, size - c, "%5d ", lh->lh_cur_size); + c += snprintf(str + c, size - c, "%5d ", lh->lh_min_size); + c += snprintf(str + c, size - c, "%5d ", lh->lh_max_size); + c += snprintf(str + c, size - c, "%d.%03d ", + theta / 1000, theta % 1000); + c += snprintf(str + c, size - c, "%d.%03d ", + lh->lh_min_theta / 1000, lh->lh_min_theta % 1000); + c += snprintf(str + c, size - c, "%d.%03d ", + lh->lh_max_theta / 1000, lh->lh_max_theta % 1000); + c += snprintf(str + c, size - c, " 0x%02x ", lh->lh_flags); + c += snprintf(str + c, size - c, "%6d ", + atomic_read(&lh->lh_rehash_count)); + c += snprintf(str + c, size - c, "%5d ", + atomic_read(&lh->lh_count)); + + /* + * The distribution is a summary of the chained hash depth in + * each of the lustre hash buckets. Each buckets lhb_count is + * divided by the hash theta value and used to generate a + * histogram of the hash distribution. A uniform hash will + * result in all hash buckets being close to the average thus + * only the first few entries in the histogram will be non-zero. + * If you hash function results in a non-uniform hash the will + * be observable by outlier bucks in the distribution histogram. + * + * Uniform hash distribution: 128/128/0/0/0/0/0/0 + * Non-Uniform hash distribution: 128/125/0/0/0/0/2/1 + */ + lh_for_each_bucket(lh, lhb, i) + dist[MIN(fls(atomic_read(&lhb->lhb_count)/MAX(theta,1)),7)]++; + + for (i = 0; i < 8; i++) + c += snprintf(str + c, size - c, "%d%c", dist[i], + (i == 7) ? '\n' : '/'); + + read_unlock(&lh->lh_rwlock); + + return c; } - -/*******************************************************************************/ +EXPORT_SYMBOL(lustre_hash_debug_str); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 4ea1aeb..3eb7073 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -758,16 +758,16 @@ struct obd_export *class_new_export(struct obd_device *obd, spin_lock(&obd->obd_dev_lock); if (!obd_uuid_equals(cluuid, &obd->obd_uuid)) { - rc = lustre_hash_additem_unique(obd->obd_uuid_hash_body, cluuid, - &export->exp_uuid_hash); - if (rc != 0) { - CWARN("%s: denying duplicate export for %s\n", - obd->obd_name, cluuid->uuid); - spin_unlock(&obd->obd_dev_lock); - class_handle_unhash(&export->exp_handle); - OBD_FREE_PTR(export); - return ERR_PTR(-EALREADY); - } + rc = lustre_hash_add_unique(obd->obd_uuid_hash, cluuid, + &export->exp_uuid_hash); + if (rc != 0) { + LCONSOLE_WARN("%s: denying duplicate export for %s, %d\n", + obd->obd_name, cluuid->uuid, rc); + spin_unlock(&obd->obd_dev_lock); + class_handle_unhash(&export->exp_handle); + OBD_FREE_PTR(export); + return ERR_PTR(-EALREADY); + } } LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */ @@ -788,10 +788,11 @@ void class_unlink_export(struct obd_export *exp) spin_lock(&exp->exp_obd->obd_dev_lock); /* delete an uuid-export hashitem from hashtables */ - if (!hlist_unhashed(&exp->exp_uuid_hash)) { - lustre_hash_delitem(exp->exp_obd->obd_uuid_hash_body, - &exp->exp_client_uuid, &exp->exp_uuid_hash); - } + if (!hlist_unhashed(&exp->exp_uuid_hash)) + lustre_hash_del(exp->exp_obd->obd_uuid_hash, + &exp->exp_client_uuid, + &exp->exp_uuid_hash); + list_del_init(&exp->exp_obd_chain); list_del_init(&exp->exp_obd_chain_timed); exp->exp_obd->obd_num_exports--; @@ -1010,10 +1011,11 @@ int class_disconnect(struct obd_export *export) already_disconnected = export->exp_disconnected; export->exp_disconnected = 1; - if (!hlist_unhashed(&export->exp_nid_hash)) { - lustre_hash_delitem(export->exp_obd->obd_nid_hash_body, - &export->exp_connection->c_peer.nid, &export->exp_nid_hash); - } + if (!hlist_unhashed(&export->exp_nid_hash)) + lustre_hash_del(export->exp_obd->obd_nid_hash, + &export->exp_connection->c_peer.nid, + &export->exp_nid_hash); + spin_unlock(&export->exp_lock); /* class_cleanup(), abort_recovery(), and class_fail_export() @@ -1341,8 +1343,7 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid) lnet_nid_t nid_key = libcfs_str2nid((char *)nid); do { - doomed_exp = lustre_hash_get_object_by_key(obd->obd_nid_hash_body, - &nid_key); + doomed_exp = lustre_hash_lookup(obd->obd_nid_hash, &nid_key); if (doomed_exp == NULL) break; @@ -1370,17 +1371,16 @@ EXPORT_SYMBOL(obd_export_evict_by_nid); int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) { struct obd_export *doomed_exp = NULL; - struct obd_uuid doomed; + struct obd_uuid doomed_uuid; int exports_evicted = 0; - obd_str2uuid(&doomed, uuid); - if (obd_uuid_equals(&doomed, &obd->obd_uuid)) { + obd_str2uuid(&doomed_uuid, uuid); + if (obd_uuid_equals(&doomed_uuid, &obd->obd_uuid)) { CERROR("%s: can't evict myself\n", obd->obd_name); return exports_evicted; } - doomed_exp = lustre_hash_get_object_by_key(obd->obd_uuid_hash_body, - &doomed); + doomed_exp = lustre_hash_lookup(obd->obd_uuid_hash, &doomed_uuid); if (doomed_exp == NULL) { CERROR("%s: can't disconnect %s: no exports found\n", diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 44c0dee..7841007 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1364,9 +1364,8 @@ int lprocfs_exp_rd_uuid(char *page, char **start, off_t off, int count, cb_data.count = count; cb_data.eof = eof; cb_data.len = &len; - lustre_hash_bucket_iterate(obd->obd_nid_hash_body, - &stats->nid, lprocfs_exp_print_uuid, - &cb_data); + lustre_hash_for_each_key(obd->obd_nid_hash, &stats->nid, + lprocfs_exp_print_uuid, &cb_data); return (*cb_data.len); } @@ -1417,8 +1416,8 @@ int lprocfs_nid_stats_clear_write(struct file *file, const char *buffer, struct nid_stat *client_stat; CFS_LIST_HEAD(free_list); - lustre_hash_iterate_all(obd->obd_nid_stats_hash_body, - lprocfs_nid_stats_clear_write_cb, &free_list); + lustre_hash_for_each(obd->obd_nid_stats_hash, + lprocfs_nid_stats_clear_write_cb, &free_list); while (!list_empty(&free_list)) { client_stat = list_entry(free_list.next, struct nid_stat, nid_list); @@ -1440,7 +1439,7 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid) *newnid = 0; if (!exp || !exp->exp_obd || !exp->exp_obd->obd_proc_exports_entry || - !exp->exp_obd->obd_nid_stats_hash_body) + !exp->exp_obd->obd_nid_stats_hash) RETURN(-EINVAL); /* not test against zero because eric say: @@ -1451,7 +1450,7 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid) obd = exp->exp_obd; - CDEBUG(D_CONFIG, "using hash %p\n", obd->obd_nid_stats_hash_body); + CDEBUG(D_CONFIG, "using hash %p\n", obd->obd_nid_stats_hash); OBD_ALLOC(tmp, sizeof(struct nid_stat)); if (tmp == NULL) @@ -1466,8 +1465,8 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid) list_add(&tmp->nid_list, &obd->obd_nid_stats); spin_unlock(&obd->obd_nid_lock); - tmp1= lustre_hash_findadd_unique(obd->obd_nid_stats_hash_body, nid, - &tmp->nid_hash); + tmp1 = lustre_hash_findadd_unique(obd->obd_nid_stats_hash, + nid, &tmp->nid_hash); CDEBUG(D_INFO, "Found stats %p for nid %s - ref %d\n", tmp1, libcfs_nid2str(*nid), tmp->nid_exp_ref_count); @@ -1481,8 +1480,7 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid, int *newnid) if (!tmp->nid_proc) { CERROR("Error making export directory for" " nid %s\n", libcfs_nid2str(*nid)); - lustre_hash_delitem(obd->obd_nid_stats_hash_body, nid, - &tmp->nid_hash); + lustre_hash_del(obd->obd_nid_stats_hash, nid, &tmp->nid_hash); GOTO(destroy_new, rc = -ENOMEM); } @@ -1750,6 +1748,24 @@ void lprocfs_oh_clear(struct obd_histogram *oh) } EXPORT_SYMBOL(lprocfs_oh_clear); +int lprocfs_obd_rd_hash(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + int c = 0; + + if (obd == NULL) + return 0; + + c += lustre_hash_debug_header(page, count); + c += lustre_hash_debug_str(obd->obd_uuid_hash, page + c, count - c); + c += lustre_hash_debug_str(obd->obd_nid_hash, page + c, count - c); + c += lustre_hash_debug_str(obd->obd_nid_stats_hash, page+c, count-c); + + return c; +} +EXPORT_SYMBOL(lprocfs_obd_rd_hash); + int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off, int count, int *eof, void *data) { diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index f4c88ca..fbc8a8d 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -53,8 +53,9 @@ #include #include -extern struct lustre_hash_operations uuid_hash_operations; -extern struct lustre_hash_operations nid_hash_operations; +static lustre_hash_ops_t uuid_hash_ops; +static lustre_hash_ops_t nid_hash_ops; +static lustre_hash_ops_t nid_stat_hash_ops; /*********** string parsing utils *********/ @@ -280,25 +281,28 @@ int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg) /* just leave this on forever. I can't use obd_set_up here because other fns check that status, and we're not actually set up yet. */ obd->obd_starting = 1; + obd->obd_uuid_hash = NULL; + obd->obd_nid_hash = NULL; + obd->obd_nid_stats_hash = NULL; spin_unlock(&obd->obd_dev_lock); - /* create an uuid-export hash body */ - err = lustre_hash_init(&obd->obd_uuid_hash_body, "UUID_HASH", - 128, &uuid_hash_operations); - if (err) - GOTO(err_hash, err); - - /* create a nid-export hash body */ - err = lustre_hash_init(&obd->obd_nid_hash_body, "NID_HASH", - 128, &nid_hash_operations); - if (err) - GOTO(err_hash, err); - - /* create a nid-stats hash body */ - err = lustre_hash_init(&obd->obd_nid_stats_hash_body, "NID_STATS", - 128, &nid_stat_hash_operations); - if (err) - GOTO(err_hash, err); + /* create an uuid-export lustre hash */ + obd->obd_uuid_hash = lustre_hash_init("UUID_HASH", 128, 128, + &uuid_hash_ops, 0); + if (!obd->obd_uuid_hash) + GOTO(err_hash, -ENOMEM); + + /* create a nid-export lustre hash */ + obd->obd_nid_hash = lustre_hash_init("NID_HASH", 128, 128, + &nid_hash_ops, 0); + if (!obd->obd_nid_hash) + GOTO(err_hash, -ENOMEM); + + /* create a nid-stats lustre hash */ + obd->obd_nid_stats_hash = lustre_hash_init("NID_STATS", 128, 128, + &nid_stat_hash_ops, 0); + if (!obd->obd_nid_stats_hash) + GOTO(err_hash, -ENOMEM); exp = class_new_export(obd, &obd->obd_uuid); if (IS_ERR(exp)) @@ -328,9 +332,9 @@ err_exp: class_unlink_export(obd->obd_self_export); obd->obd_self_export = NULL; err_hash: - lustre_hash_exit(&obd->obd_uuid_hash_body); - lustre_hash_exit(&obd->obd_nid_hash_body); - lustre_hash_exit(&obd->obd_nid_stats_hash_body); + lustre_hash_exit(obd->obd_uuid_hash); + lustre_hash_exit(obd->obd_nid_hash); + lustre_hash_exit(obd->obd_nid_stats_hash); obd->obd_starting = 0; CERROR("setup %s failed (%d)\n", obd->obd_name, err); RETURN(err); @@ -462,13 +466,13 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) LASSERT(obd->obd_self_export); /* destroy an uuid-export hash body */ - lustre_hash_exit(&obd->obd_uuid_hash_body); + lustre_hash_exit(obd->obd_uuid_hash); /* destroy a nid-export hash body */ - lustre_hash_exit(&obd->obd_nid_hash_body); + lustre_hash_exit(obd->obd_nid_hash); /* destroy a nid-stats hash body */ - lustre_hash_exit(&obd->obd_nid_stats_hash_body); + lustre_hash_exit(obd->obd_nid_stats_hash); /* Precleanup, we must make sure all exports get destroyed. */ err = obd_precleanup(obd, OBD_CLEANUP_EXPORTS); @@ -1236,3 +1240,188 @@ out: lustre_cfg_free(lcfg); RETURN(rc); } + +/* + * uuid<->export lustre hash operations + */ + +static unsigned +uuid_hash(lustre_hash_t *lh, void *key, unsigned mask) +{ + return lh_djb2_hash(((struct obd_uuid *)key)->uuid, + sizeof(((struct obd_uuid *)key)->uuid), mask); +} + +static void * +uuid_key(struct hlist_node *hnode) +{ + struct obd_export *exp; + + exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash); + + RETURN(&exp->exp_client_uuid); +} + +/* + * NOTE: It is impossible to find an export that is in failed + * state with this function + */ +static int +uuid_compare(void *key, struct hlist_node *hnode) +{ + struct obd_export *exp; + + LASSERT(key); + exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash); + + RETURN(obd_uuid_equals((struct obd_uuid *)key,&exp->exp_client_uuid) && + !exp->exp_failed); +} + +static void * +uuid_export_get(struct hlist_node *hnode) +{ + struct obd_export *exp; + + exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash); + class_export_get(exp); + + RETURN(exp); +} + +static void * +uuid_export_put(struct hlist_node *hnode) +{ + struct obd_export *exp; + + exp = hlist_entry(hnode, struct obd_export, exp_uuid_hash); + class_export_put(exp); + + RETURN(exp); +} + +static lustre_hash_ops_t uuid_hash_ops = { + .lh_hash = uuid_hash, + .lh_key = uuid_key, + .lh_compare = uuid_compare, + .lh_get = uuid_export_get, + .lh_put = uuid_export_put, +}; + + +/* + * nid<->export hash operations + */ + +static unsigned +nid_hash(lustre_hash_t *lh, void *key, unsigned mask) +{ + return lh_djb2_hash(key, sizeof(lnet_nid_t), mask); +} + +static void * +nid_key(struct hlist_node *hnode) +{ + struct obd_export *exp; + + exp = hlist_entry(hnode, struct obd_export, exp_nid_hash); + + RETURN(&exp->exp_connection->c_peer.nid); +} + +/* + * NOTE: It is impossible to find an export that is in failed + * state with this function + */ +static int +nid_compare(void *key, struct hlist_node *hnode) +{ + struct obd_export *exp; + + LASSERT(key); + exp = hlist_entry(hnode, struct obd_export, exp_nid_hash); + + RETURN(exp->exp_connection->c_peer.nid == *(lnet_nid_t *)key && + !exp->exp_failed); +} + +static void * +nid_export_get(struct hlist_node *hnode) +{ + struct obd_export *exp; + + exp = hlist_entry(hnode, struct obd_export, exp_nid_hash); + class_export_get(exp); + + RETURN(exp); +} + +static void * +nid_export_put(struct hlist_node *hnode) +{ + struct obd_export *exp; + + exp = hlist_entry(hnode, struct obd_export, exp_nid_hash); + class_export_put(exp); + + RETURN(exp); +} + +static lustre_hash_ops_t nid_hash_ops = { + .lh_hash = nid_hash, + .lh_key = nid_key, + .lh_compare = nid_compare, + .lh_get = nid_export_get, + .lh_put = nid_export_put, +}; + + +/* + * nid<->nidstats hash operations + */ + +static void * +nidstats_key(struct hlist_node *hnode) +{ + struct nid_stat *ns; + + ns = hlist_entry(hnode, struct nid_stat, nid_hash); + + RETURN(&ns->nid); +} + +static int +nidstats_compare(void *key, struct hlist_node *hnode) +{ + RETURN(*(lnet_nid_t *)nidstats_key(hnode) == *(lnet_nid_t *)key); +} + +static void * +nidstats_get(struct hlist_node *hnode) +{ + struct nid_stat *ns; + + ns = hlist_entry(hnode, struct nid_stat, nid_hash); + ns->nid_exp_ref_count++; + + RETURN(ns); +} + +static void * +nidstats_put(struct hlist_node *hnode) +{ + struct nid_stat *ns; + + ns = hlist_entry(hnode, struct nid_stat, nid_hash); + ns->nid_exp_ref_count--; + + RETURN(ns); +} + +static lustre_hash_ops_t nid_stat_hash_ops = { + .lh_hash = nid_hash, + .lh_key = nidstats_key, + .lh_compare = nidstats_compare, + .lh_get = nidstats_get, + .lh_put = nidstats_put, +}; diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 147f444..e25568c 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -257,6 +257,7 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = { { "tot_dirty", lprocfs_filter_rd_tot_dirty, 0, 0 }, { "tot_pending", lprocfs_filter_rd_tot_pending, 0, 0 }, { "tot_granted", lprocfs_filter_rd_tot_granted, 0, 0 }, + { "hash_stats", lprocfs_obd_rd_hash, 0, 0 }, { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, { "recovery_maxtime", lprocfs_obd_rd_recovery_maxtime, lprocfs_obd_wr_recovery_maxtime, 0}, diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 5d13a24d..6c794b8 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -71,7 +71,7 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) return NULL; } - c = ptlrpc_get_connection(peer, self, uuid); + c = ptlrpc_connection_get(peer, self, uuid); if (c) { memcpy(c->c_remote_uuid.uuid, uuid->uuid, sizeof(c->c_remote_uuid.uuid)); @@ -82,24 +82,6 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) return c; } -void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, - struct obd_uuid *uuid) -{ - lnet_nid_t self; - lnet_process_id_t peer; - int err; - - err = ptlrpc_uuid_to_peer(uuid, &peer, &self); - if (err != 0) { - CERROR("cannot find peer %s!\n", uuid->uuid); - return; - } - - conn->c_peer = peer; - conn->c_self = self; - return; -} - static inline struct ptlrpc_bulk_desc *new_bulk(int npages, int type, int portal) { struct ptlrpc_bulk_desc *desc; diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index 2b9fa8d..9442bf7 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -46,206 +46,192 @@ #include "ptlrpc_internal.h" #include -static spinlock_t conn_lock; -static struct list_head conn_list; -static struct lustre_class_hash_body *conn_hash_body; -static struct lustre_class_hash_body *conn_unused_hash_body; +static lustre_hash_t *conn_hash = NULL; +static lustre_hash_ops_t conn_hash_ops; -extern struct lustre_hash_operations conn_hash_operations; - -void ptlrpc_dump_connection(void *obj, void *data) +struct ptlrpc_connection * +ptlrpc_connection_get(lnet_process_id_t peer, lnet_nid_t self, + struct obd_uuid *uuid) { - struct ptlrpc_connection *c = obj; + struct ptlrpc_connection *conn, *conn2; + ENTRY; - CERROR("Connection %p/%s has refcount %d (nid=%s->%s)\n", - c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount), - libcfs_nid2str(c->c_self), - libcfs_nid2str(c->c_peer.nid)); + conn = lustre_hash_lookup(conn_hash, &peer); + if (conn) + GOTO(out, conn); + + OBD_ALLOC_PTR(conn); + if (!conn) + RETURN(NULL); + + conn->c_peer = peer; + conn->c_self = self; + INIT_HLIST_NODE(&conn->c_hash); + atomic_set(&conn->c_refcount, 1); + if (uuid) + obd_str2uuid(&conn->c_remote_uuid, uuid->uuid); + + /* + * Add the newly created conn to the hash, on key collision we + * lost a racing addition and must destroy our newly allocated + * connection. The object which exists in the has will be + * returned and may be compared against out object. + */ + conn2 = lustre_hash_findadd_unique(conn_hash, &peer, &conn->c_hash); + if (conn != conn2) { + OBD_FREE_PTR(conn); + conn = conn2; + } + EXIT; +out: + CDEBUG(D_INFO, "conn=%p refcount %d to %s\n", + conn, atomic_read(&conn->c_refcount), + libcfs_nid2str(conn->c_peer.nid)); + return conn; } - -void ptlrpc_dump_connections(void) + +int ptlrpc_connection_put(struct ptlrpc_connection *conn) { + int rc = 0; ENTRY; + + if (!conn) + RETURN(rc); + + LASSERT(!hlist_unhashed(&conn->c_hash)); + + /* + * We do not remove connection from hashtable and + * do not free it even if last caller released ref, + * as we want to have it cached for the case it is + * needed again. + * + * Deallocating it and later creating new connection + * again would be wastful. This way we also avoid + * expensive locking to protect things from get/put + * race when found cached connection is freed by + * ptlrpc_connection_put(). + * + * It will be freed later in module unload time, + * when ptlrpc_connection_fini()->lh_exit->conn_exit() + * path is called. + */ + if (atomic_dec_return(&conn->c_refcount) == 1) + rc = 1; - lustre_hash_iterate_all(conn_hash_body, ptlrpc_dump_connection, NULL); + CDEBUG(D_INFO, "PUT conn=%p refcount %d to %s\n", + conn, atomic_read(&conn->c_refcount), + libcfs_nid2str(conn->c_peer.nid)); - EXIT; + RETURN(rc); } - -struct ptlrpc_connection* -ptlrpc_lookup_conn_locked (lnet_process_id_t peer) + +struct ptlrpc_connection * +ptlrpc_connection_addref(struct ptlrpc_connection *conn) { - struct ptlrpc_connection *c = NULL; - int rc; - - c = lustre_hash_get_object_by_key(conn_hash_body, &peer); - if (c != NULL) - return c; - - c = lustre_hash_get_object_by_key(conn_unused_hash_body, &peer); - if (c != NULL) { - lustre_hash_delitem(conn_unused_hash_body, &peer, &c->c_hash); - rc = lustre_hash_additem_unique(conn_hash_body, &peer, - &c->c_hash); - if (rc) { - /* can't add - try with new item */ - OBD_FREE_PTR(c); - list_del(&c->c_link); - c = NULL; - } - } - - return c; -} + ENTRY; + atomic_inc(&conn->c_refcount); + CDEBUG(D_INFO, "conn=%p refcount %d to %s\n", + conn, atomic_read(&conn->c_refcount), + libcfs_nid2str(conn->c_peer.nid)); -struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer, - lnet_nid_t self, struct obd_uuid *uuid) + RETURN(conn); +} + +int ptlrpc_connection_init(void) { - struct ptlrpc_connection *c; - struct ptlrpc_connection *c2; - int rc = 0; ENTRY; - CDEBUG(D_INFO, "self %s peer %s\n", - libcfs_nid2str(self), libcfs_id2str(peer)); - - spin_lock(&conn_lock); - c = ptlrpc_lookup_conn_locked(peer); - spin_unlock(&conn_lock); - - if (c != NULL) - RETURN (c); - - OBD_ALLOC_PTR(c); - if (c == NULL) - RETURN (NULL); - - atomic_set(&c->c_refcount, 1); - c->c_peer = peer; - c->c_self = self; - INIT_HLIST_NODE(&c->c_hash); - CFS_INIT_LIST_HEAD(&c->c_link); - if (uuid != NULL) - obd_str2uuid(&c->c_remote_uuid, uuid->uuid); - - spin_lock(&conn_lock); - - c2 = ptlrpc_lookup_conn_locked(peer); - if (c2 == NULL) { - rc = lustre_hash_additem_unique(conn_hash_body, &peer, - &c->c_hash); - if (rc != 0) { - CERROR("Cannot add connection to conn_hash_body\n"); - goto out_conn; - } - list_add(&c->c_link, &conn_list); - } - -out_conn: - spin_unlock(&conn_lock); - - if (c2 == NULL && rc == 0) - RETURN (c); - - if (c != NULL) - OBD_FREE(c, sizeof(*c)); - RETURN (c2); + conn_hash = lustre_hash_init("CONN_HASH", 32, 32768, + &conn_hash_ops, LH_REHASH); + if (!conn_hash) + RETURN(-ENOMEM); + + RETURN(0); } - -int ptlrpc_put_connection(struct ptlrpc_connection *c) -{ - int rc = 0; + +void ptlrpc_connection_fini(void) { ENTRY; + lustre_hash_exit(conn_hash); + EXIT; +} - if (c == NULL) { - CERROR("NULL connection\n"); - RETURN(0); - } +/* + * Hash operations for net_peer<->connection + */ +static unsigned +conn_hashfn(lustre_hash_t *lh, void *key, unsigned mask) +{ + return lh_djb2_hash(key, sizeof(lnet_process_id_t), mask); +} - CDEBUG (D_INFO, "connection=%p refcount %d to %s\n", - c, atomic_read(&c->c_refcount) - 1, - libcfs_nid2str(c->c_peer.nid)); +static int +conn_compare(void *key, struct hlist_node *hnode) +{ + struct ptlrpc_connection *conn; + lnet_process_id_t *conn_key; - spin_lock(&conn_lock); - LASSERT(!hlist_unhashed(&c->c_hash)); - spin_unlock(&conn_lock); + LASSERT(key != NULL); + conn_key = (lnet_process_id_t*)key; + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); - if (atomic_dec_return(&c->c_refcount) == 1) { + return conn_key->nid == conn->c_peer.nid && + conn_key->pid == conn->c_peer.pid; +} - spin_lock(&conn_lock); - lustre_hash_delitem(conn_hash_body, &c->c_peer, &c->c_hash); - rc = lustre_hash_additem_unique(conn_unused_hash_body, &c->c_peer, - &c->c_hash); - spin_unlock(&conn_lock); - if (rc != 0) { - CERROR("Cannot hash connection to conn_hash_body\n"); - GOTO(ret, rc); - } +static void * +conn_key(struct hlist_node *hnode) +{ + struct ptlrpc_connection *conn; + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + return &conn->c_peer; +} - rc = 1; - } +static void * +conn_get(struct hlist_node *hnode) +{ + struct ptlrpc_connection *conn; - if (atomic_read(&c->c_refcount) < 0) - CERROR("connection %p refcount %d!\n", - c, atomic_read(&c->c_refcount)); -ret : + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + atomic_inc(&conn->c_refcount); - RETURN(rc); + return conn; } -struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c) +static void * +conn_put(struct hlist_node *hnode) { - ENTRY; - atomic_inc(&c->c_refcount); - CDEBUG (D_INFO, "connection=%p refcount %d to %s\n", - c, atomic_read(&c->c_refcount), - libcfs_nid2str(c->c_peer.nid)); - RETURN(c); -} + struct ptlrpc_connection *conn; -int ptlrpc_init_connection(void) -{ - int rc = 0; - CFS_INIT_LIST_HEAD(&conn_list); - rc = lustre_hash_init(&conn_hash_body, "CONN_HASH", - 128, &conn_hash_operations); - if (rc) - GOTO(ret, rc); - - rc = lustre_hash_init(&conn_unused_hash_body, "CONN_UNUSED_HASH", - 128, &conn_hash_operations); - if (rc) - GOTO(ret, rc); - - spin_lock_init(&conn_lock); -ret: - if (rc) { - lustre_hash_exit(&conn_hash_body); - lustre_hash_exit(&conn_unused_hash_body); - } - RETURN(rc); + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + atomic_dec(&conn->c_refcount); + + return conn; } -void ptlrpc_cleanup_connection(void) +static void +conn_exit(struct hlist_node *hnode) { - struct list_head *tmp, *pos; - struct ptlrpc_connection *c; - - spin_lock(&conn_lock); - - lustre_hash_exit(&conn_unused_hash_body); - lustre_hash_exit(&conn_hash_body); - - list_for_each_safe(tmp, pos, &conn_list) { - c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (atomic_read(&c->c_refcount)) - CERROR("Connection %p/%s has refcount %d (nid=%s)\n", - c, c->c_remote_uuid.uuid, - atomic_read(&c->c_refcount), - libcfs_nid2str(c->c_peer.nid)); - list_del(&c->c_link); - OBD_FREE(c, sizeof(*c)); - } - spin_unlock(&conn_lock); + struct ptlrpc_connection *conn; + + conn = hlist_entry(hnode, struct ptlrpc_connection, c_hash); + /* + * Nothing should be left. Connection user put it and + * connection also was deleted from table by this time + * so we should have 0 refs. + */ + LASSERTF(atomic_read(&conn->c_refcount) == 0, + "Busy connection with %d refs\n", + atomic_read(&conn->c_refcount)); + OBD_FREE_PTR(conn); } + +static lustre_hash_ops_t conn_hash_ops = { + .lh_hash = conn_hashfn, + .lh_compare = conn_compare, + .lh_key = conn_key, + .lh_get = conn_get, + .lh_put = conn_put, + .lh_exit = conn_exit, +}; diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 7406c4d..309729f 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -417,13 +417,13 @@ static int import_select_connection(struct obd_import *imp) /* switch connection, don't mind if it's same as the current one */ if (imp->imp_connection) - ptlrpc_put_connection(imp->imp_connection); + ptlrpc_connection_put(imp->imp_connection); imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); dlmexp = class_conn2export(&imp->imp_dlm_handle); LASSERT(dlmexp != NULL); if (dlmexp->exp_connection) - ptlrpc_put_connection(dlmexp->exp_connection); + ptlrpc_connection_put(dlmexp->exp_connection); dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); class_export_put(dlmexp); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 866ee44..3ca88b7 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -401,7 +401,7 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int flags) ptlrpc_at_set_reply(req, flags); if (req->rq_export == NULL || req->rq_export->exp_connection == NULL) - conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL); + conn = ptlrpc_connection_get(req->rq_peer, req->rq_self, NULL); else conn = ptlrpc_connection_addref(req->rq_export->exp_connection); @@ -427,7 +427,7 @@ out: atomic_dec (&svc->srv_outstanding_replies); ptlrpc_req_drop_rs(req); } - ptlrpc_put_connection(conn); + ptlrpc_connection_put(conn); return rc; } diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index cbdcc88..b6d9a6e 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -80,12 +80,12 @@ __init int ptlrpc_init(void) RETURN(rc); cleanup_phase = 2; - rc = ptlrpc_init_connection(); + rc = ptlrpc_connection_init(); if (rc) GOTO(cleanup, rc); cleanup_phase = 3; - ptlrpc_put_connection_superhack = ptlrpc_put_connection; + ptlrpc_put_connection_superhack = ptlrpc_connection_put; rc = ptlrpc_start_pinger(); if (rc) @@ -117,7 +117,7 @@ cleanup: case 4: ptlrpc_stop_pinger(); case 3: - ptlrpc_cleanup_connection(); + ptlrpc_connection_fini(); case 2: ptlrpc_exit_portals(); case 1: @@ -136,17 +136,15 @@ static void __exit ptlrpc_exit(void) ldlm_exit(); ptlrpc_stop_pinger(); ptlrpc_exit_portals(); - ptlrpc_cleanup_connection(); + ptlrpc_connection_fini(); } /* connection.c */ -EXPORT_SYMBOL(ptlrpc_dump_connections); -EXPORT_SYMBOL(ptlrpc_readdress_connection); -EXPORT_SYMBOL(ptlrpc_get_connection); -EXPORT_SYMBOL(ptlrpc_put_connection); +EXPORT_SYMBOL(ptlrpc_connection_get); +EXPORT_SYMBOL(ptlrpc_connection_put); EXPORT_SYMBOL(ptlrpc_connection_addref); -EXPORT_SYMBOL(ptlrpc_init_connection); -EXPORT_SYMBOL(ptlrpc_cleanup_connection); +EXPORT_SYMBOL(ptlrpc_connection_init); +EXPORT_SYMBOL(ptlrpc_connection_fini); /* niobuf.c */ EXPORT_SYMBOL(ptlrpc_start_bulk_transfer); -- 1.8.3.1