From: Alex Zhuravlev Date: Fri, 28 Mar 2014 06:39:52 +0000 (+0400) Subject: LU-4838 osd: object iterator in zfs to sync before iteration X-Git-Tag: 2.5.58~16 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=f26aec12013fa84332911220f8416d2b21bdbe65 LU-4838 osd: object iterator in zfs to sync before iteration The issue with dmu_object_next() is that it doesn't expose non-committed blocks in the metadnode. Therefore the iterator doesn't find dnodes allocated in the current (non-committed) txg which breaks testing and may affect LFSCK. As a short-term solution, osd_zfs_otable_it_init() ensures the current txg is committed. Signed-off-by: Alex Zhuravlev Change-Id: I639f37ced5790d1e1514c3e24594e360fcd6c1a8 Signed-off-by: Nathaniel Clark Reviewed-on: http://review.whamcloud.com/9838 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Tested-by: Andreas Dilger --- diff --git a/lustre/osd-zfs/osd_index.c b/lustre/osd-zfs/osd_index.c index b96e642..cbf391c 100644 --- a/lustre/osd-zfs/osd_index.c +++ b/lustre/osd-zfs/osd_index.c @@ -1360,6 +1360,11 @@ static struct dt_it *osd_zfs_otable_it_init(const struct lu_env *env, it->mit_dev = dev; + /* XXX: dmu_object_next() does NOT find dnodes allocated + * in the current non-committed txg, so we force txg + * commit to find all existing dnodes ... */ + txg_wait_synced(dmu_objset_pool(dev->od_objset.os), 0ULL); + RETURN((struct dt_it *)it); } @@ -1425,7 +1430,6 @@ static int osd_zfs_otable_it_next(const struct lu_env *env, struct dt_it *di) uchar_t *v; __u64 dnode; int rc, s; - ENTRY; memset(&it->mit_fid, 0, sizeof(it->mit_fid)); @@ -1473,7 +1477,7 @@ static int osd_zfs_otable_it_next(const struct lu_env *env, struct dt_it *di) it->mit_pos, PFID(&it->mit_fid), rc); out: - RETURN(rc); + return rc; } static struct dt_key *osd_zfs_otable_it_key(const struct lu_env *env, diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index f1ae69b..49519c4 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -844,10 +844,11 @@ test_9a() { do_facet $SINGLEMDS \ $LCTL set_param -n mdd.${MDT_DEV}.lfsck_speed_limit 0 - sleep 5 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(7) Expect 'completed', but got '$STATUS'" + + wait_update_facet $SINGLEMDS \ + "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace|\ + awk '/^status/ { print \\\$2 }'" "completed" 30 || + error "(7) Failed to get expected 'completed'" } run_test 9a "LFSCK speed control (1)" @@ -1433,7 +1434,8 @@ test_15b() { mkdir -p $DIR/$tdir $LFS setstripe -c 1 -i 0 $DIR/$tdir - touch $DIR/$tdir/guard + dd if=/dev/zero of=$DIR/$tdir/guard bs=1K count=1 + cancel_lru_locks osc echo "Inject failure stub to make the OST-object to back point to" echo "other MDT-object"