Whamcloud - gitweb
LU-3335 scrub: purge inconsistenct objects after OI scrub 97/6697/10
authorFan Yong <fan.yong@intel.com>
Sat, 15 Jun 2013 09:29:18 +0000 (17:29 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 10 Jul 2013 04:12:02 +0000 (04:12 +0000)
When OI scrub repaired the found inconsistency, it needs to purge the
old object out of cache; otherwise, others may still use those cached
stale information.

Test-Parameters: testlist=sanity-scrub
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Id3607c38952da9bfec01139743e817c95a854047
Reviewed-on: http://review.whamcloud.com/6697
Tested-by: Hudson
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lu_object.h
lustre/lfsck/lfsck_lib.c
lustre/obdclass/lu_object.c
lustre/osd-ldiskfs/osd_scrub.c
lustre/tests/sanity-scrub.sh

index 0d78047..85dcbfa 100644 (file)
@@ -744,6 +744,8 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
                                     struct lu_device *dev,
                                     const struct lu_fid *f,
                                     const struct lu_object_conf *conf);
+void lu_object_purge(const struct lu_env *env, struct lu_device *dev,
+                    const struct lu_fid *f);
 struct lu_object *lu_object_find_slice(const struct lu_env *env,
                                        struct lu_device *dev,
                                        const struct lu_fid *f,
index 6e20069..fc612fc 100644 (file)
@@ -869,7 +869,7 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        struct lfsck_component *com;
        struct l_wait_info      lwi    = { 0 };
        bool                    dirty  = false;
-       int                     rc     = 0;
+       long                    rc     = 0;
        __u16                   valid  = 0;
        __u16                   flags  = 0;
        ENTRY;
@@ -1015,16 +1015,18 @@ trigger:
        thread_set_flags(thread, 0);
        if (lfsck->li_master)
                rc = PTR_ERR(kthread_run(lfsck_master_engine, lfsck, "lfsck"));
-       if (rc < 0)
-               CERROR("%s: cannot start LFSCK thread, rc = %d\n",
+       if (IS_ERR_VALUE(rc)) {
+               CERROR("%s: cannot start LFSCK thread, rc = %ld\n",
                       lfsck_lfsck2name(lfsck), rc);
-       else
+       } else {
+               rc = 0;
                l_wait_event(thread->t_ctl_waitq,
                             thread_is_running(thread) ||
                             thread_is_stopped(thread),
                             &lwi);
+       }
 
-       GOTO(out, rc = 0);
+       GOTO(out, rc);
 
 out:
        mutex_unlock(&lfsck->li_mutex);
index e5021fd..084506c 100644 (file)
@@ -540,7 +540,7 @@ static struct lu_object *htable_lookup(struct lu_site *s,
         __u64  ver = cfs_hash_bd_version_get(bd);
 
         if (*version == ver)
-                return NULL;
+               return ERR_PTR(-ENOENT);
 
         *version = ver;
         bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd);
@@ -549,7 +549,7 @@ static struct lu_object *htable_lookup(struct lu_site *s,
        hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f);
         if (hnode == NULL) {
                 lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
-                return NULL;
+               return ERR_PTR(-ENOENT);
         }
 
         h = container_of0(hnode, struct lu_object_header, loh_hash);
@@ -573,6 +573,31 @@ static struct lu_object *htable_lookup(struct lu_site *s,
         return ERR_PTR(-EAGAIN);
 }
 
+static struct lu_object *htable_lookup_nowait(struct lu_site *s,
+                                             cfs_hash_bd_t *bd,
+                                             const struct lu_fid *f)
+{
+       cfs_hlist_node_t        *hnode;
+       struct lu_object_header *h;
+
+       /* cfs_hash_bd_peek_locked is a somehow "internal" function
+        * of cfs_hash, it doesn't add refcount on object. */
+       hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f);
+       if (hnode == NULL) {
+               lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS);
+               return ERR_PTR(-ENOENT);
+       }
+
+       h = container_of0(hnode, struct lu_object_header, loh_hash);
+       if (unlikely(lu_object_is_dying(h)))
+               return ERR_PTR(-ENOENT);
+
+       cfs_hash_get(s->ls_obj_hash, hnode);
+       lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT);
+       cfs_list_del_init(&h->loh_lru);
+       return lu_object_top(h);
+}
+
 /**
  * Search cache for an object with the fid \a f. If such object is found,
  * return it. Otherwise, create new object, insert it into cache and return
@@ -653,7 +678,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
         cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1);
         o = htable_lookup(s, &bd, f, waiter, &version);
         cfs_hash_bd_unlock(hs, &bd, 1);
-        if (o != NULL)
+       if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT)
                 return o;
 
         /*
@@ -669,7 +694,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env,
         cfs_hash_bd_lock(hs, &bd, 1);
 
         shadow = htable_lookup(s, &bd, f, waiter, &version);
-        if (likely(shadow == NULL)) {
+       if (likely(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT)) {
                 struct lu_site_bkt_data *bkt;
 
                 bkt = cfs_hash_bd_extra_get(hs, &bd);
@@ -715,6 +740,30 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
 EXPORT_SYMBOL(lu_object_find_at);
 
 /**
+ * Try to find the object in cache without waiting for the dead object
+ * to be released nor allocating object if no cached one was found.
+ *
+ * The found object will be set as LU_OBJECT_HEARD_BANSHEE for purging.
+ */
+void lu_object_purge(const struct lu_env *env, struct lu_device *dev,
+                    const struct lu_fid *f)
+{
+       struct lu_site          *s  = dev->ld_site;
+       cfs_hash_t              *hs = s->ls_obj_hash;
+       cfs_hash_bd_t            bd;
+       struct lu_object        *o;
+
+       cfs_hash_bd_get_and_lock(hs, f, &bd, 1);
+       o = htable_lookup_nowait(s, &bd, f);
+       cfs_hash_bd_unlock(hs, &bd, 1);
+       if (!IS_ERR(o)) {
+               set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags);
+               lu_object_put(env, o);
+       }
+}
+EXPORT_SYMBOL(lu_object_purge);
+
+/**
  * Find object with given fid, and return its slice belonging to given device.
  */
 struct lu_object *lu_object_find_slice(const struct lu_env *env,
@@ -2113,7 +2162,7 @@ void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o,
        cfs_hash_bd_get_and_lock(hs, (void *)fid, &bd, 1);
        shadow = htable_lookup(s, &bd, fid, &waiter, &version);
        /* supposed to be unique */
-       LASSERT(shadow == NULL);
+       LASSERT(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT);
        *old = *fid;
        bkt = cfs_hash_bd_extra_get(hs, &bd);
        cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash);
index c46e62a..72ed5b8 100644 (file)
@@ -517,6 +517,9 @@ iget:
                        sf->sf_items_updated_prior++;
                else
                        sf->sf_items_updated++;
+
+               /* The target has been changed, need to be re-loaded. */
+               lu_object_purge(info->oti_env, osd2lu_dev(dev), fid);
        }
 
        GOTO(out, rc);
index 4f73945..97e4187 100644 (file)
@@ -483,7 +483,7 @@ test_7() {
 run_test 7 "System is available during OI scrub scanning"
 
 test_8() {
-       scrub_prep 0
+       scrub_prep 128
        mds_backup_restore || error "(1) Fail to backup/restore!"
 
        echo "start $SINGLEMDS with disabling OI scrub"