From 895fbd93b2cfb1a1ea2ad676d1b7355867e9f1c0 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sat, 15 Jun 2013 17:29:18 +0800 Subject: [PATCH] LU-3335 scrub: purge inconsistenct objects after OI scrub When OI scrub repaired the found inconsistency, it needs to purge the old object out of cache; otherwise, others may still use those cached stale information. Test-Parameters: testlist=sanity-scrub Signed-off-by: Fan Yong Change-Id: Id3607c38952da9bfec01139743e817c95a854047 Reviewed-on: http://review.whamcloud.com/6697 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lu_object.h | 2 ++ lustre/lfsck/lfsck_lib.c | 12 +++++---- lustre/obdclass/lu_object.c | 59 ++++++++++++++++++++++++++++++++++++++---- lustre/osd-ldiskfs/osd_scrub.c | 3 +++ lustre/tests/sanity-scrub.sh | 2 +- 5 files changed, 67 insertions(+), 11 deletions(-) diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 0d78047..85dcbfa 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -744,6 +744,8 @@ struct lu_object *lu_object_find_at(const struct lu_env *env, struct lu_device *dev, const struct lu_fid *f, const struct lu_object_conf *conf); +void lu_object_purge(const struct lu_env *env, struct lu_device *dev, + const struct lu_fid *f); struct lu_object *lu_object_find_slice(const struct lu_env *env, struct lu_device *dev, const struct lu_fid *f, diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 6e20069..fc612fc 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -869,7 +869,7 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, struct lfsck_component *com; struct l_wait_info lwi = { 0 }; bool dirty = false; - int rc = 0; + long rc = 0; __u16 valid = 0; __u16 flags = 0; ENTRY; @@ -1015,16 +1015,18 @@ trigger: thread_set_flags(thread, 0); if (lfsck->li_master) rc = PTR_ERR(kthread_run(lfsck_master_engine, lfsck, "lfsck")); - if (rc < 0) - CERROR("%s: cannot start LFSCK thread, rc = %d\n", + if (IS_ERR_VALUE(rc)) { + CERROR("%s: cannot start LFSCK thread, rc = %ld\n", lfsck_lfsck2name(lfsck), rc); - else + } else { + rc = 0; l_wait_event(thread->t_ctl_waitq, thread_is_running(thread) || thread_is_stopped(thread), &lwi); + } - GOTO(out, rc = 0); + GOTO(out, rc); out: mutex_unlock(&lfsck->li_mutex); diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index e5021fd..084506c 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -540,7 +540,7 @@ static struct lu_object *htable_lookup(struct lu_site *s, __u64 ver = cfs_hash_bd_version_get(bd); if (*version == ver) - return NULL; + return ERR_PTR(-ENOENT); *version = ver; bkt = cfs_hash_bd_extra_get(s->ls_obj_hash, bd); @@ -549,7 +549,7 @@ static struct lu_object *htable_lookup(struct lu_site *s, hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f); if (hnode == NULL) { lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS); - return NULL; + return ERR_PTR(-ENOENT); } h = container_of0(hnode, struct lu_object_header, loh_hash); @@ -573,6 +573,31 @@ static struct lu_object *htable_lookup(struct lu_site *s, return ERR_PTR(-EAGAIN); } +static struct lu_object *htable_lookup_nowait(struct lu_site *s, + cfs_hash_bd_t *bd, + const struct lu_fid *f) +{ + cfs_hlist_node_t *hnode; + struct lu_object_header *h; + + /* cfs_hash_bd_peek_locked is a somehow "internal" function + * of cfs_hash, it doesn't add refcount on object. */ + hnode = cfs_hash_bd_peek_locked(s->ls_obj_hash, bd, (void *)f); + if (hnode == NULL) { + lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_MISS); + return ERR_PTR(-ENOENT); + } + + h = container_of0(hnode, struct lu_object_header, loh_hash); + if (unlikely(lu_object_is_dying(h))) + return ERR_PTR(-ENOENT); + + cfs_hash_get(s->ls_obj_hash, hnode); + lprocfs_counter_incr(s->ls_stats, LU_SS_CACHE_HIT); + cfs_list_del_init(&h->loh_lru); + return lu_object_top(h); +} + /** * Search cache for an object with the fid \a f. If such object is found, * return it. Otherwise, create new object, insert it into cache and return @@ -653,7 +678,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env, cfs_hash_bd_get_and_lock(hs, (void *)f, &bd, 1); o = htable_lookup(s, &bd, f, waiter, &version); cfs_hash_bd_unlock(hs, &bd, 1); - if (o != NULL) + if (!IS_ERR(o) || PTR_ERR(o) != -ENOENT) return o; /* @@ -669,7 +694,7 @@ static struct lu_object *lu_object_find_try(const struct lu_env *env, cfs_hash_bd_lock(hs, &bd, 1); shadow = htable_lookup(s, &bd, f, waiter, &version); - if (likely(shadow == NULL)) { + if (likely(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT)) { struct lu_site_bkt_data *bkt; bkt = cfs_hash_bd_extra_get(hs, &bd); @@ -715,6 +740,30 @@ struct lu_object *lu_object_find_at(const struct lu_env *env, EXPORT_SYMBOL(lu_object_find_at); /** + * Try to find the object in cache without waiting for the dead object + * to be released nor allocating object if no cached one was found. + * + * The found object will be set as LU_OBJECT_HEARD_BANSHEE for purging. + */ +void lu_object_purge(const struct lu_env *env, struct lu_device *dev, + const struct lu_fid *f) +{ + struct lu_site *s = dev->ld_site; + cfs_hash_t *hs = s->ls_obj_hash; + cfs_hash_bd_t bd; + struct lu_object *o; + + cfs_hash_bd_get_and_lock(hs, f, &bd, 1); + o = htable_lookup_nowait(s, &bd, f); + cfs_hash_bd_unlock(hs, &bd, 1); + if (!IS_ERR(o)) { + set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags); + lu_object_put(env, o); + } +} +EXPORT_SYMBOL(lu_object_purge); + +/** * Find object with given fid, and return its slice belonging to given device. */ struct lu_object *lu_object_find_slice(const struct lu_env *env, @@ -2113,7 +2162,7 @@ void lu_object_assign_fid(const struct lu_env *env, struct lu_object *o, cfs_hash_bd_get_and_lock(hs, (void *)fid, &bd, 1); shadow = htable_lookup(s, &bd, fid, &waiter, &version); /* supposed to be unique */ - LASSERT(shadow == NULL); + LASSERT(IS_ERR(shadow) && PTR_ERR(shadow) == -ENOENT); *old = *fid; bkt = cfs_hash_bd_extra_get(hs, &bd); cfs_hash_bd_add_locked(hs, &bd, &o->lo_header->loh_hash); diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index c46e62a..72ed5b8 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -517,6 +517,9 @@ iget: sf->sf_items_updated_prior++; else sf->sf_items_updated++; + + /* The target has been changed, need to be re-loaded. */ + lu_object_purge(info->oti_env, osd2lu_dev(dev), fid); } GOTO(out, rc); diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 4f73945..97e4187 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -483,7 +483,7 @@ test_7() { run_test 7 "System is available during OI scrub scanning" test_8() { - scrub_prep 0 + scrub_prep 128 mds_backup_restore || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" -- 1.8.3.1