From 1bdeabf2eb49e6c3dcfe83994b59426b36e724a0 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Thu, 27 Sep 2012 00:53:18 +0800 Subject: [PATCH] LU-2033 scrub: lookup server local files by name after backup/restore ldiskfs's OI becomes invalid (inodes change). OI scrubber scans a filesystem to rebuild OI. this can take quite long. to be able to start MDS earlier (right after restore) MDS should lookup by name all internal files required for startup as name->inode mapping is preserved by backup/restore. this patch make local object library to lookup a file storing last used fid within a given sequence. the patch also fixes sanity-scrub/11 to expect skipping of files created + directory + per-ost llog objects. Signed-off-by: Fan Yong Change-Id: I56179c03279062464960af73de031f13d31bac14 Reviewed-on: http://review.whamcloud.com/4106 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/mdd/mdd_lfsck.c | 2 +- lustre/obdclass/local_storage.c | 37 +++++++++++++++++---------- lustre/osd-ldiskfs/osd_handler.c | 54 +++++++++++++++++---------------------- lustre/osd-ldiskfs/osd_internal.h | 7 ++--- lustre/osd-ldiskfs/osd_lproc.c | 4 +-- lustre/osd-ldiskfs/osd_scrub.c | 2 +- lustre/osd-ldiskfs/osd_scrub.h | 1 - lustre/tests/sanity-scrub.sh | 14 +++++++--- 8 files changed, 63 insertions(+), 58 deletions(-) diff --git a/lustre/mdd/mdd_lfsck.c b/lustre/mdd/mdd_lfsck.c index 73281ef..0cf9380 100644 --- a/lustre/mdd/mdd_lfsck.c +++ b/lustre/mdd/mdd_lfsck.c @@ -311,7 +311,7 @@ int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd) lfsck->ml_bookmark_obj = obj; - obj = dt_locate(env, mdd->mdd_child, &lfsck_it_fid); + obj = dt_locate(env, mdd->mdd_bottom, &lfsck_it_fid); if (IS_ERR(obj)) return PTR_ERR(obj); diff --git a/lustre/obdclass/local_storage.c b/lustre/obdclass/local_storage.c index 1922804..3dd76e6 100644 --- a/lustre/obdclass/local_storage.c +++ b/lustre/obdclass/local_storage.c @@ -627,8 +627,8 @@ int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev, struct dt_thread_info *dti = dt_info(env); struct ls_device *ls; struct los_ondisk losd; - struct dt_object *o; struct dt_object *root = NULL; + struct dt_object *o = NULL; struct thandle *th; int rc; @@ -654,12 +654,6 @@ int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev, cfs_atomic_inc(&ls->ls_refcount); cfs_list_add(&(*los)->los_list, &ls->ls_los_list); - /* initialize data allowing to generate new fids, - * literally we need a sequence */ - o = ls_locate(env, ls, first_fid); - if (IS_ERR(o)) - GOTO(out_los, rc = PTR_ERR(o)); - rc = dt_root_get(env, dev, &dti->dti_fid); if (rc) GOTO(out_los, rc); @@ -668,11 +662,25 @@ int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev, if (IS_ERR(root)) GOTO(out_los, rc = PTR_ERR(root)); - if (dt_try_as_dir(env, root) == 0) - GOTO(out_los, rc = -ENOTDIR); + snprintf(dti->dti_buf, sizeof(dti->dti_buf), "seq-%Lx-lastid", + fid_seq(first_fid)); + rc = dt_lookup_dir(env, root, dti->dti_buf, &dti->dti_fid); + if (rc != 0 && rc != -ENOENT) + GOTO(out_los, rc); + + /* initialize data allowing to generate new fids, + * literally we need a sequence */ + if (rc == 0) + o = ls_locate(env, ls, &dti->dti_fid); + else + o = ls_locate(env, ls, first_fid); + if (IS_ERR(o)) + GOTO(out_los, rc = PTR_ERR(o)); dt_write_lock(env, o, 0); if (!dt_object_exists(o)) { + LASSERT(rc == -ENOENT); + th = dt_trans_create(env, dev); if (IS_ERR(th)) GOTO(out_lock, rc = PTR_ERR(th)); @@ -686,8 +694,6 @@ int local_oid_storage_init(const struct lu_env *env, struct dt_device *dev, if (rc) GOTO(out_trans, rc); - snprintf(dti->dti_buf, sizeof(dti->dti_buf), - "seq-%Lx-lastid", fid_seq(first_fid)); rc = dt_declare_insert(env, root, (const struct dt_rec *)lu_object_fid(&o->do_lu), (const struct dt_key *)dti->dti_buf, @@ -766,12 +772,15 @@ out_trans: out_lock: dt_write_unlock(env, o); out_los: - if (root) + if (root != NULL && !IS_ERR(root)) lu_object_put_nocache(env, &root->do_lu); - if (rc) { + + if (rc != 0) { + cfs_list_del(&(*los)->los_list); + cfs_atomic_dec(&ls->ls_refcount); OBD_FREE_PTR(*los); *los = NULL; - if (o) + if (o != NULL && !IS_ERR(o)) lu_object_put_nocache(env, &o->do_lu); } else { (*los)->los_seq = fid_seq(first_fid); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index ac8d51c..e5797c3 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -276,12 +276,10 @@ osd_iget_verify(struct osd_thread_info *info, struct osd_device *dev, return inode; rc = osd_get_lma(inode, &info->oti_obj_dentry, lma); + if (rc == -ENODATA) + return inode; + if (rc != 0) { - if (rc == -ENODATA) { - CDEBUG(D_LFSCK, "inconsistent obj: NULL, %lu, "DFID"\n", - inode->i_ino, PFID(fid)); - rc = -EREMCHG; - } iput(inode); return ERR_PTR(rc); } @@ -292,6 +290,7 @@ osd_iget_verify(struct osd_thread_info *info, struct osd_device *dev, iput(inode); return ERR_PTR(EREMCHG); } + return inode; } @@ -326,21 +325,19 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj, if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) RETURN(-ENOENT); - if (fid_is_norm(fid)) { - /* Search order: 1. per-thread cache. */ - if (lu_fid_eq(fid, &oic->oic_fid)) { + /* Search order: 1. per-thread cache. */ + if (lu_fid_eq(fid, &oic->oic_fid)) { + goto iget; + } else if (!cfs_list_empty(&scrub->os_inconsistent_items)) { + /* Search order: 2. OI scrub pending list. */ + result = osd_oii_lookup(dev, fid, id); + if (result == 0) goto iget; - } else if (!cfs_list_empty(&scrub->os_inconsistent_items)) { - /* Search order: 2. OI scrub pending list. */ - result = osd_oii_lookup(dev, fid, id); - if (result == 0) - goto iget; - } - - if (sf->sf_flags & SF_INCONSISTENT) - verify = 1; } + if (sf->sf_flags & SF_INCONSISTENT) + verify = 1; + /* * Objects are created as locking anchors or place holders for objects * yet to be created. No need to osd_oi_lookup() at here because FID @@ -380,7 +377,7 @@ iget: trigger: if (thread_is_running(&scrub->os_thread)) { result = -EINPROGRESS; - } else if (!scrub->os_no_scrub) { + } else if (!dev->od_noscrub) { result = osd_scrub_start(dev); LCONSOLE_ERROR("%.16s: trigger OI scrub by RPC " "for "DFID", rc = %d [1]\n", @@ -3321,6 +3318,9 @@ osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev, int rc; ENTRY; + if (!fid_is_norm(fid) && !fid_is_igif(fid)) + RETURN(0); + again: rc = osd_oi_lookup(oti, dev, fid, id); if (rc != 0 && rc != -ENOENT) @@ -3341,7 +3341,7 @@ again: RETURN(rc); } - if (!scrub->os_no_scrub && ++once == 1) { + if (!dev->od_noscrub && ++once == 1) { CDEBUG(D_LFSCK, "Trigger OI scrub by RPC for "DFID"\n", PFID(fid)); rc = osd_scrub_start(dev); @@ -3353,7 +3353,7 @@ again: goto again; } - RETURN(rc = -EREMCHG); + RETURN(0); } /** @@ -3406,7 +3406,7 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, rc = osd_ea_fid_get(env, obj, ino, fid, &oic->oic_lid); else osd_id_gen(&oic->oic_lid, ino, OSD_OII_NOGEN); - if (rc != 0 || !fid_is_norm(fid)) { + if (rc != 0) { fid_zero(&oic->oic_fid); GOTO(out, rc); } @@ -3416,7 +3416,7 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj, (sf->sf_flags & SF_INCONSISTENT || ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap))) - rc = osd_consistency_check(oti, dev, oic); + osd_consistency_check(oti, dev, oic); } else { rc = -ENOENT; } @@ -4148,16 +4148,11 @@ static inline int osd_it_ea_rec(const struct lu_env *env, it->oie_dirent->oied_name, it->oie_dirent->oied_namelen, it->oie_dirent->oied_type, attr); - if (!fid_is_norm(fid)) { - fid_zero(&oic->oic_fid); - RETURN(0); - } - oic->oic_fid = *fid; if ((scrub->os_pos_current <= ino) && (sf->sf_flags & SF_INCONSISTENT || ldiskfs_test_bit(osd_oi_fid2idx(dev, fid), sf->sf_oi_bitmap))) - rc = osd_consistency_check(oti, dev, oic); + osd_consistency_check(oti, dev, oic); RETURN(rc); } @@ -4227,7 +4222,6 @@ static int osd_index_ea_lookup(const struct lu_env *env, struct dt_object *dt, return -EACCES; rc = osd_ea_lookup_rec(env, obj, rec, key); - if (rc == 0) rc = +1; RETURN(rc); @@ -4430,7 +4424,7 @@ static int osd_mount(const struct lu_env *env, } else o->od_iop_mode = 1; if (lmd_flags & LMD_FLG_NOSCRUB) - o->od_scrub.os_no_scrub = 1; + o->od_noscrub = 1; out: if (__page) diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 86778fc..0a3480d 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -271,11 +271,8 @@ struct osd_device { struct obd_statfs od_statfs; cfs_spinlock_t od_osfs_lock; - /** - * The following flag indicates, if it is interop mode or not. - * It will be initialized, using mount param. - */ - __u32 od_iop_mode; + unsigned int od_iop_mode:1, + od_noscrub:1; struct fsfilt_operations *od_fsops; int od_connects; diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index b88128f..9e99fb3 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -452,7 +452,7 @@ static int lprocfs_osd_rd_auto_scrub(char *page, char **start, off_t off, return -EINPROGRESS; *eof = 1; - return snprintf(page, count, "%d\n", !dev->od_scrub.os_no_scrub); + return snprintf(page, count, "%d\n", !dev->od_noscrub); } static int lprocfs_osd_wr_auto_scrub(struct file *file, const char *buffer, @@ -469,7 +469,7 @@ static int lprocfs_osd_wr_auto_scrub(struct file *file, const char *buffer, if (rc) return rc; - dev->od_scrub.os_no_scrub = !val; + dev->od_noscrub = !val; return count; } diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 98827d7..7e58c20 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -1105,7 +1105,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) } } - if (rc == 0 && !scrub->os_no_scrub && + if (rc == 0 && !dev->od_noscrub && ((sf->sf_status == SS_PAUSED) || (sf->sf_status == SS_CRASHED && sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_AUTO)) || diff --git a/lustre/osd-ldiskfs/osd_scrub.h b/lustre/osd-ldiskfs/osd_scrub.h index c3e54b3..b07928e 100644 --- a/lustre/osd-ldiskfs/osd_scrub.h +++ b/lustre/osd-ldiskfs/osd_scrub.h @@ -198,7 +198,6 @@ struct osd_scrub { * found by RPC prior */ os_waiting:1, /* Waiting for scan window. */ os_full_speed:1, /* run w/o speed limit */ - os_no_scrub:1, /* NOT auto trigger OI scrub*/ os_paused:1; /* The scrub is paused. */ }; diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 1b2c6cd..cf54e80 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -122,7 +122,7 @@ test_1b() { local FLAGS=$($SHOW_SCRUB | awk '/^flags/ { print $2 }') [ "$FLAGS" == "recreated" ] || - error "(3) Expect 'recreated', but got '$STATUS'" + error "(3) Expect 'recreated', but got '$FLAGS'" $START_SCRUB || error "(4) Fail to start OI scrub!" sleep 3 @@ -683,11 +683,12 @@ test_11() { echo "setupall" setupall > /dev/null + local CREATED=100 local tname=`date +%s` rm -rf $MOUNT/$tname > /dev/null mkdir $MOUNT/$tname || error "(1) Fail to mkdir $MOUNT/$tname" - createmany -o $MOUNT/$tname/f 100 || error "(2) Fail to create!" + createmany -o $MOUNT/$tname/f $CREATED || error "(2) Fail to create!" # reset OI scrub start point by force $START_SCRUB -r || error "(3) Fail to start OI scrub!" @@ -698,8 +699,13 @@ test_11() { # OI scrub should skip the new created objects for the first accessing local SKIPPED=$($SHOW_SCRUB | awk '/^noscrub/ { print $2 }') - [ $SKIPPED -eq 101 ] || - error "(5) Expect 101 objects skipped, but got $SKIPPED" + # notice we're creating a new llog for every OST on every startup + # new features can make this even less stable, so we only check + # that the number of skipped files is less than 1.5x the number of files + local MAXIMUM=$((CREATED * 3 / 2)) + local MINIMUM=$((CREATED + 1)) # files + directory + [ $SKIPPED -ge $MAXIMUM -o $SKIPPED -lt $MINIMUM] && + error "(5) Expect [ $MINIMUM , $MAXIMUM ) objects skipped, got $SKIPPED" # reset OI scrub start point by force $START_SCRUB -r || error "(6) Fail to start OI scrub!" -- 1.8.3.1