Whamcloud - gitweb
LU-2492 obdclass: lu_object_find_at() waits forever
authorBobi Jam <bobijam.xu@intel.com>
Sat, 5 Jan 2013 00:52:22 +0000 (08:52 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 8 Jan 2013 05:05:31 +0000 (00:05 -0500)
In htable_lookup(), cfs_hash_bd_lookup_locked() could add an object
refcount and it messes with lu_object_put(), and the object's supposed
last holder will miss the change to free the object, leaving an 0
referred object in the hash table, and keeps lu_object_find_at()
waiting forever.

In this patch, we do not take the refcount of the dying object
in htable_lookup().

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: I4bd57e944bf9167d70d97845be55241b538872df
Reviewed-on: http://review.whamcloud.com/3439
Reviewed-by: Fan Yong <fan.yong@intel.com>
Tested-by: Hudson
Reviewed-by: Liang Zhen <liang@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
libcfs/include/libcfs/libcfs_hash.h
libcfs/libcfs/hash.c
lustre/obdclass/lu_object.c

index 47f3408..3ecea0e 100644 (file)
@@ -686,6 +686,8 @@ static inline cfs_hlist_head_t *cfs_hash_bd_hhead(cfs_hash_t *hs,
 
 cfs_hlist_node_t *cfs_hash_bd_lookup_locked(cfs_hash_t *hs,
                                             cfs_hash_bd_t *bd, const void *key);
 
 cfs_hlist_node_t *cfs_hash_bd_lookup_locked(cfs_hash_t *hs,
                                             cfs_hash_bd_t *bd, const void *key);
+cfs_hlist_node_t *cfs_hash_bd_peek_locked(cfs_hash_t *hs,
+                                         cfs_hash_bd_t *bd, const void *key);
 cfs_hlist_node_t *cfs_hash_bd_findadd_locked(cfs_hash_t *hs,
                                              cfs_hash_bd_t *bd, const void *key,
                                              cfs_hlist_node_t *hnode,
 cfs_hlist_node_t *cfs_hash_bd_findadd_locked(cfs_hash_t *hs,
                                              cfs_hash_bd_t *bd, const void *key,
                                              cfs_hlist_node_t *hnode,
index 37308a7..398806a 100644 (file)
@@ -665,6 +665,14 @@ cfs_hash_bd_lookup_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, const void *key)
 CFS_EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
 
 cfs_hlist_node_t *
 CFS_EXPORT_SYMBOL(cfs_hash_bd_lookup_locked);
 
 cfs_hlist_node_t *
+cfs_hash_bd_peek_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd, const void *key)
+{
+       return cfs_hash_bd_lookup_intent(hs, bd, key, NULL,
+                                        CFS_HS_LOOKUP_IT_PEEK);
+}
+CFS_EXPORT_SYMBOL(cfs_hash_bd_peek_locked);
+
+cfs_hlist_node_t *
 cfs_hash_bd_findadd_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
                            const void *key, cfs_hlist_node_t *hnode,
                            int noref)
 cfs_hash_bd_findadd_locked(cfs_hash_t *hs, cfs_hash_bd_t *bd,
                            const void *key, cfs_hlist_node_t *hnode,
                            int noref)
index 8443bc2..f577dca 100644 (file)
@@ -519,9 +519,9 @@ static struct lu_object *htable_lookup(struct lu_site *s,
 
         *version = ver;
         bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
 
         *version = ver;
         bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
-        /* cfs_hash_bd_lookup_intent is a somehow "internal" function
-         * of cfs_hash, but we don't want refcount on object right now */
-        hnode = cfs_hash_bd_lookup_locked(s->ls_obj_hash, bd, (void *)f);
+       /* cfs_hash_bd_peek_locked is a somehow "internal" function
+        * of cfs_hash, it doesn't add refcount on object. */
+       hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f);
         if (hnode == NULL) {
                 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
                 return NULL;
         if (hnode == NULL) {
                 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
                 return NULL;
@@ -529,6 +529,7 @@ static struct lu_object *htable_lookup(struct lu_site *s,
 
         h = container_of0(hnode, struct lu_object_header, loh_hash);
         if (likely(!lu_object_is_dying(h))) {
 
         h = container_of0(hnode, struct lu_object_header, loh_hash);
         if (likely(!lu_object_is_dying(h))) {
+               cfs_hash_get(s->ls_obj_hash, hnode);
                 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
                 cfs_list_del_init(&h->loh_lru);
                 return lu_object_top(h);
                 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
                 cfs_list_del_init(&h->loh_lru);
                 return lu_object_top(h);
@@ -539,7 +540,6 @@ static struct lu_object *htable_lookup(struct lu_site *s,
          * returned (to assure that references to dying objects are eventually
          * drained), and moreover, lookup has to wait until object is freed.
          */
          * returned (to assure that references to dying objects are eventually
          * drained), and moreover, lookup has to wait until object is freed.
          */
-        cfs_atomic_dec(&h->loh_ref);
 
         cfs_waitlink_init(waiter);
         cfs_waitq_add(&bkt->lsb_marche_funebre, waiter);
 
         cfs_waitlink_init(waiter);
         cfs_waitq_add(&bkt->lsb_marche_funebre, waiter);