From: Alex Zhuravlev Date: Sat, 27 Jul 2013 20:07:57 +0000 (+0400) Subject: LU-3489 osd-zfs: object iterator support X-Git-Tag: 2.5.58~87 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=cf753f6b4bdc267b5169e97293f69b5362b826de;p=fs%2Flustre-release.git LU-3489 osd-zfs: object iterator support a trivial object iterator for ZFS using dmu_object_next(). the iterator skips all objects with no LMA EA. It does implement simple prefetching algoright. It also enables few tests in sanity-lfsck to be run with ZFS. Test-Parameters: mdtfilesystemtype=zfs \ ostfilesystemtype=zfs mdsfilesystemtype=zfs \ testlist=sanity-scrub,sanity-lfsck Signed-off-by: Alex Zhuravlev Change-Id: Icf02342ac351a163e23575963d3366761c59b02b Reviewed-on: http://review.whamcloud.com/7149 Reviewed-by: Andreas Dilger Tested-by: Jenkins Reviewed-by: Fan Yong Tested-by: Maloo Reviewed-by: Nathaniel Clark --- diff --git a/lustre/osd-zfs/osd_index.c b/lustre/osd-zfs/osd_index.c index 51db373..b96e642 100644 --- a/lustre/osd-zfs/osd_index.c +++ b/lustre/osd-zfs/osd_index.c @@ -1338,6 +1338,212 @@ static struct dt_index_operations osd_index_ops = { } }; +struct osd_metadnode_it { + struct osd_device *mit_dev; + __u64 mit_pos; + struct lu_fid mit_fid; + int mit_prefetched; + __u64 mit_prefetched_dnode; +}; + +static struct dt_it *osd_zfs_otable_it_init(const struct lu_env *env, + struct dt_object *dt, __u32 attr, + struct lustre_capa *capa) +{ + struct osd_device *dev = osd_dev(dt->do_lu.lo_dev); + struct osd_metadnode_it *it; + ENTRY; + + OBD_ALLOC_PTR(it); + if (unlikely(it == NULL)) + RETURN(ERR_PTR(-ENOMEM)); + + it->mit_dev = dev; + + RETURN((struct dt_it *)it); +} + +static void osd_zfs_otable_it_fini(const struct lu_env *env, struct dt_it *di) +{ + struct osd_metadnode_it *it = (struct osd_metadnode_it *)di; + + OBD_FREE_PTR(it); +} + +static int osd_zfs_otable_it_get(const struct lu_env *env, + struct dt_it *di, const struct dt_key *key) +{ + return 0; +} + +static void osd_zfs_otable_it_put(const struct lu_env *env, struct dt_it *di) +{ +} + +#define OTABLE_PREFETCH 256 + +static void osd_zfs_otable_prefetch(const struct lu_env *env, + struct osd_metadnode_it *it) +{ + struct osd_device *dev = it->mit_dev; + udmu_objset_t *uos = &dev->od_objset; + int rc; + + /* can go negative on the very first access to the iterator + * or if some non-Lustre objects were found */ + if (unlikely(it->mit_prefetched < 0)) + it->mit_prefetched = 0; + + if (it->mit_prefetched >= (OTABLE_PREFETCH >> 1)) + return; + + if (it->mit_prefetched_dnode == 0) + it->mit_prefetched_dnode = it->mit_pos; + + while (it->mit_prefetched < OTABLE_PREFETCH) { + rc = -dmu_object_next(uos->os, &it->mit_prefetched_dnode, + B_FALSE, 0); + if (unlikely(rc != 0)) + break; + + /* dmu_prefetch() was exported in 0.6.2, if you use with + * an older release, just comment it out - this is an + * optimization */ + dmu_prefetch(uos->os, it->mit_prefetched_dnode, 0, 0); + + it->mit_prefetched++; + } +} + +static int osd_zfs_otable_it_next(const struct lu_env *env, struct dt_it *di) +{ + struct osd_metadnode_it *it = (struct osd_metadnode_it *)di; + struct lustre_mdt_attrs *lma; + struct osd_device *dev = it->mit_dev; + udmu_objset_t *uos = &dev->od_objset; + nvlist_t *nvbuf = NULL; + uchar_t *v; + __u64 dnode; + int rc, s; + ENTRY; + + memset(&it->mit_fid, 0, sizeof(it->mit_fid)); + + dnode = it->mit_pos; + do { + rc = -dmu_object_next(uos->os, &it->mit_pos, B_FALSE, 0); + if (unlikely(rc != 0)) + GOTO(out, rc = 1); + it->mit_prefetched--; + + /* LMA is required for this to be a Lustre object. + * If there is no xattr skip it. */ + rc = __osd_xattr_load(uos, it->mit_pos, &nvbuf); + if (unlikely(rc != 0)) + continue; + + LASSERT(nvbuf != NULL); + rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA, &v, &s); + if (likely(rc == 0)) { + /* Lustre object */ + lma = (struct lustre_mdt_attrs *)v; + lustre_lma_swab(lma); + it->mit_fid = lma->lma_self_fid; + nvlist_free(nvbuf); + break; + } else { + /* not a Lustre object, try next one */ + nvlist_free(nvbuf); + } + + } while (1); + + + /* we aren't prefetching in the above loop because the number of + * non-Lustre objects is very small and we will be repeating very + * rare. in case we want to use this to iterate over non-Lustre + * objects (i.e. when we convert regular ZFS in Lustre) it makes + * sense to initiate prefetching in the loop */ + + /* 0 - there are more items, +1 - the end */ + if (likely(rc == 0)) + osd_zfs_otable_prefetch(env, it); + + CDEBUG(D_OTHER, "advance: %llu -> %llu "DFID": %d\n", dnode, + it->mit_pos, PFID(&it->mit_fid), rc); + +out: + RETURN(rc); +} + +static struct dt_key *osd_zfs_otable_it_key(const struct lu_env *env, + const struct dt_it *di) +{ + return NULL; +} + +static int osd_zfs_otable_it_key_size(const struct lu_env *env, + const struct dt_it *di) +{ + return sizeof(__u64); +} + +static int osd_zfs_otable_it_rec(const struct lu_env *env, + const struct dt_it *di, + struct dt_rec *rec, __u32 attr) +{ + struct osd_metadnode_it *it = (struct osd_metadnode_it *)di; + struct lu_fid *fid = (struct lu_fid *)rec; + ENTRY; + + *fid = it->mit_fid; + + RETURN(0); +} + + +static __u64 osd_zfs_otable_it_store(const struct lu_env *env, + const struct dt_it *di) +{ + struct osd_metadnode_it *it = (struct osd_metadnode_it *)di; + + return it->mit_pos; +} + +static int osd_zfs_otable_it_load(const struct lu_env *env, + const struct dt_it *di, __u64 hash) +{ + struct osd_metadnode_it *it = (struct osd_metadnode_it *)di; + + it->mit_pos = hash; + it->mit_prefetched = 0; + it->mit_prefetched_dnode = 0; + + return osd_zfs_otable_it_next(env, (struct dt_it *)di); +} + +static int osd_zfs_otable_it_key_rec(const struct lu_env *env, + const struct dt_it *di, void *key_rec) +{ + return 0; +} + +const struct dt_index_operations osd_zfs_otable_ops = { + .dio_it = { + .init = osd_zfs_otable_it_init, + .fini = osd_zfs_otable_it_fini, + .get = osd_zfs_otable_it_get, + .put = osd_zfs_otable_it_put, + .next = osd_zfs_otable_it_next, + .key = osd_zfs_otable_it_key, + .key_size = osd_zfs_otable_it_key_size, + .rec = osd_zfs_otable_it_rec, + .store = osd_zfs_otable_it_store, + .load = osd_zfs_otable_it_load, + .key_rec = osd_zfs_otable_it_key_rec, + } +}; + int osd_index_try(const struct lu_env *env, struct dt_object *dt, const struct dt_index_features *feat) { @@ -1353,9 +1559,10 @@ int osd_index_try(const struct lu_env *env, struct dt_object *dt, if (feat->dif_flags & DT_IND_RANGE) RETURN(-ERANGE); - if (unlikely(feat == &dt_otable_features)) - /* do not support oi scrub yet. */ - RETURN(-ENOTSUPP); + if (unlikely(feat == &dt_otable_features)) { + dt->do_index_ops = &osd_zfs_otable_ops; + RETURN(0); + } LASSERT(obj->oo_db != NULL); if (likely(feat == &dt_directory_features)) { @@ -1400,4 +1607,3 @@ int osd_index_try(const struct lu_env *env, struct dt_object *dt, RETURN(0); } - diff --git a/lustre/tests/lfsck-performance.sh b/lustre/tests/lfsck-performance.sh index c30ab30..17da00e 100644 --- a/lustre/tests/lfsck-performance.sh +++ b/lustre/tests/lfsck-performance.sh @@ -13,10 +13,6 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -#remove it when zfs-based backend iteration is enabled -[ $(facet_fstype $SINGLEMDS) != ldiskfs ] && - skip "lfsck performance only for ldiskfs" && exit 0 - require_dsh_mds || exit 0 require_dsh_ost || exit 0 @@ -143,8 +139,8 @@ test_0() { do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local reformat_external_journal add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) --backfstype \ - ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || - error "Fail to reformat the MDS!" + $(facet_fstype ${SINGLEMDS}) --reformat ${MDT_DEVNAME} \ + $(mdsvdevname 1) >/dev/null || error "Fail to reformat the MDS!" for ((i=$MINCOUNT; i<=$MAXCOUNT; i=$((i * FACTOR)))); do local nfiles=$((i - BCOUNT)) @@ -178,6 +174,9 @@ test_0() { run_test 0 "lfsck namespace performance (routine case) without load" test_1() { + [ $(facet_fstype $SINGLEMDS) != ldiskfs ] && + skip "not implemented for ZFS" && return + local BCOUNT=0 local i @@ -185,8 +184,8 @@ test_1() { do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local reformat_external_journal add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) --backfstype \ - ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || - error "Fail to reformat the MDS!" + $(facet_fstype ${SINGLEMDS}) --reformat ${MDT_DEVNAME} \ + $(mdsvdevname 1) > /dev/null || error "Fail to reformat the MDS" for ((i=$MINCOUNT_REPAIR; i<=$MAXCOUNT_REPAIR; i=$((i * FACTOR)))); do local nfiles=$((i - BCOUNT)) @@ -237,8 +236,8 @@ test_2() { do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local reformat_external_journal add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) \ - --backfstype ldiskfs --reformat ${MDT_DEVNAME} \ - $(mdsvdevname 1) > /dev/null || + --backfstype $(facet_fstype ${SINGLEMDS}) --reformat \ + ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || error "Fail to reformat the MDS!" echo "+++ start to create for ${i} files set at: $(date) +++" @@ -279,8 +278,8 @@ test_3() { do_rpc_nodes $(facet_active_host $SINGLEMDS) load_modules_local reformat_external_journal add ${SINGLEMDS} $(mkfs_opts ${SINGLEMDS} ${MDT_DEVNAME}) --backfstype \ - ldiskfs --reformat ${MDT_DEVNAME} $(mdsvdevname 1) > /dev/null || - error "Fail to reformat the MDS!" + $(facet_fstype ${SINGLEMDS}) --reformat ${MDT_DEVNAME} \ + $(mdsvdevname 1) > /dev/null || error "Fail to reformat the MDS" for ((i=$inc_count; i<=$BASE_COUNT; i=$((i + inc_count)))); do local nfiles=$((i - BCOUNT)) diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 55b562c..f1ae69b 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -17,12 +17,6 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -# remove the check when ZFS backend iteration is ready -[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && - skip "test LFSCK only for ldiskfs" && exit 0 -[ $(facet_fstype ost1) != ldiskfs ] && - skip "test LFSCK only for ldiskfs" && exit 0 - require_dsh_mds || exit 0 MCREATE=${MCREATE:-mcreate} @@ -137,10 +131,10 @@ test_0() { do_facet $SINGLEMDS $LCTL set_param fail_loc=0 do_facet $SINGLEMDS $LCTL set_param fail_val=0 - sleep 3 - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(9) Expect 'completed', but got '$STATUS'" + wait_update_facet $SINGLEMDS \ + "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace | \ + awk '/^status/ { print \\\$2 }'" "completed" 20 || \ + error "(9) unexpected status" local repaired=$($SHOW_NAMESPACE | awk '/^updated_phase1/ { print $2 }') @@ -149,11 +143,10 @@ test_0() { local scanned1=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }') $START_NAMESPACE -r || error "(11) Fail to reset LFSCK!" - sleep 3 - - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "completed" ] || - error "(12) Expect 'completed', but got '$STATUS'" + wait_update_facet $SINGLEMDS \ + "$LCTL get_param -n mdd.${MDT_DEV}.lfsck_namespace | \ + awk '/^status/ { print \\\$2 }'" "completed" 20 || \ + error "(12) unexpected status" local scanned2=$($SHOW_NAMESPACE | awk '/^success_count/ { print $2 }') [ $((scanned1 + 1)) -eq $scanned2 ] || @@ -165,6 +158,9 @@ test_0() { run_test 0 "Control LFSCK manually" test_1a() { + [ $(facet_fstype $SINGLEMDS) != ldiskfs ] && + skip "OI Scrub not implemented for ZFS" && return + lfsck_prep 1 1 echo "start $SINGLEMDS" start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || @@ -202,6 +198,9 @@ run_test 1a "LFSCK can find out and repair crashed FID-in-dirent" test_1b() { + [ $(facet_fstype $SINGLEMDS) != ldiskfs ] && + skip "OI Scrub not implemented for ZFS" && return + lfsck_prep 1 1 echo "start $SINGLEMDS" start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || @@ -358,6 +357,9 @@ run_test 2c "LFSCK can find out and remove repeated linkEA entry" test_4() { + [ $(facet_fstype $SINGLEMDS) != ldiskfs ] && + skip "OI Scrub not implemented for ZFS" && return + lfsck_prep 3 3 mds_backup_restore $SINGLEMDS || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" @@ -409,6 +411,9 @@ run_test 4 "FID-in-dirent can be rebuilt after MDT file-level backup/restore" test_5() { + [ $(facet_fstype $SINGLEMDS) != ldiskfs ] && + skip "OI Scrub not implemented for ZFS" && return + lfsck_prep 1 1 1 mds_backup_restore $SINGLEMDS 1 || error "(1) Fail to backup/restore!" echo "start $SINGLEMDS with disabling OI scrub" @@ -938,6 +943,9 @@ run_test 9b "LFSCK speed control (2)" test_10() { + [ $(facet_fstype $SINGLEMDS) != ldiskfs ] && + skip "lookup(..)/linkea on ZFS issue" && return + lfsck_prep 1 1 echo "start $SINGLEMDS" start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || @@ -1024,7 +1032,7 @@ ost_remove_lastid() { # step 1: local mount mount_fstype ost${ost} || return 1 # step 2: remove the specified LAST_ID - ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/LAST_ID + ${rcmd} rm -fv $(facet_mntpt ost${ost})/O/${idx}/{LAST_ID,d0/0} # step 3: umount unmount_fstype ost${ost} || return 2 }