From 5bd666d45ab6d0d852012c5fa53e05ec0a24501f Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sat, 6 Jul 2013 18:14:57 +0800 Subject: [PATCH] LU-3335 scrub: convert filter_fid_old to LMA The OI scrub will generate FID-in-LMA for old OST-object, which will unify OSD behaviors. To make LMA to fit into the 256-byte OST inode, it will shrink the filter_fid_old as filter_fid, and the parent FID will be kept. Signed-off-by: Fan Yong Change-Id: Ife669f88b4657928abee774b089115d78b53c380 Reviewed-on: http://review.whamcloud.com/7143 Reviewed-by: Alex Zhuravlev Tested-by: Hudson Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/osd-ldiskfs/osd_compat.c | 14 +++- lustre/osd-ldiskfs/osd_handler.c | 5 +- lustre/osd-ldiskfs/osd_internal.h | 11 ++- lustre/osd-ldiskfs/osd_scrub.c | 162 +++++++++++++++++++++++++++++--------- lustre/osd-ldiskfs/osd_scrub.h | 14 +++- lustre/tests/conf-sanity.sh | 41 ++++++++++ lustre/tests/sanity-lfsck.sh | 2 +- lustre/tests/sanity-scrub.sh | 4 +- lustre/utils/lustre_lfsck.c | 2 +- 9 files changed, 205 insertions(+), 50 deletions(-) diff --git a/lustre/osd-ldiskfs/osd_compat.c b/lustre/osd-ldiskfs/osd_compat.c index 2f22f5a..a12c4e7 100644 --- a/lustre/osd-ldiskfs/osd_compat.c +++ b/lustre/osd-ldiskfs/osd_compat.c @@ -396,9 +396,21 @@ static int osd_ost_init(const struct lu_env *env, struct osd_device *dev) LASSERT(dev->od_fsops); osd_push_ctxt(dev, &new, &save); - d = simple_mkdir(rootd, dev->od_mnt, "O", 0755, 1); + d = ll_lookup_one_len("O", rootd, strlen("O")); if (IS_ERR(d)) GOTO(cleanup, rc = PTR_ERR(d)); + if (d->d_inode == NULL) { + dput(d); + /* The lookup() may be called again inside simple_mkdir(). + * Since the repeated lookup() only be called for "/O" at + * mount time, it will not affect the whole performance. */ + d = simple_mkdir(rootd, dev->od_mnt, "O", 0755, 1); + if (IS_ERR(d)) + GOTO(cleanup, rc = PTR_ERR(d)); + + /* It is quite probably that the device is new formatted. */ + dev->od_maybe_new = 1; + } inode = d->d_inode; ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 523cfdf..dec4da4 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -317,11 +317,12 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) CLASSERT(LMA_OLD_SIZE >= sizeof(*lma)); rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA, info->oti_mdt_attrs_old, LMA_OLD_SIZE); - if (rc == -ENODATA) { + if (rc == -ENODATA && !fid_is_igif(lu_object_fid(&obj->oo_dt.do_lu)) && + osd_obj2dev(obj)->od_check_ff) { fid = &lma->lma_self_fid; rc = osd_get_idif(info, inode, dentry, fid); if (rc > 0) - rc = 0; + RETURN(0); } if (unlikely(rc == -ENODATA)) diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index efe0313..3daf5b2 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -239,11 +239,11 @@ struct osd_device { * Fid Capability */ unsigned int od_fl_capa:1, - od_is_md:1, /* set in ->ldo_prepare */ + od_maybe_new:1, od_noscrub:1, od_dirent_journal:1, - od_handle_nolma:1, - od_igif_inoi:1; + od_igif_inoi:1, + od_check_ff:1; unsigned long od_capa_timeout; __u32 od_capa_alg; @@ -595,7 +595,10 @@ struct osd_thread_info { bool oti_rollback; char oti_name[48]; - struct filter_fid_old oti_ff; + union { + struct filter_fid_old oti_ff; + struct filter_fid oti_ff_new; + }; }; extern int ldiskfs_pdo; diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 6be982c..299b532 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "osd_internal.h" #include "osd_oi.h" @@ -221,6 +222,7 @@ static void osd_scrub_file_to_cpu(struct scrub_file *des, des->sf_run_time = le32_to_cpu(src->sf_run_time); des->sf_success_count = le32_to_cpu(src->sf_success_count); des->sf_oi_count = le16_to_cpu(src->sf_oi_count); + des->sf_internal_flags = le16_to_cpu(src->sf_internal_flags); memcpy(des->sf_oi_bitmap, src->sf_oi_bitmap, SCRUB_OI_BITMAP_SIZE); } @@ -255,6 +257,7 @@ static void osd_scrub_file_to_le(struct scrub_file *des, des->sf_run_time = cpu_to_le32(src->sf_run_time); des->sf_success_count = cpu_to_le32(src->sf_success_count); des->sf_oi_count = cpu_to_le16(src->sf_oi_count); + des->sf_internal_flags = cpu_to_le16(src->sf_internal_flags); memcpy(des->sf_oi_bitmap, src->sf_oi_bitmap, SCRUB_OI_BITMAP_SIZE); } @@ -417,6 +420,80 @@ static int osd_scrub_prep(struct osd_device *dev) } static int +osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev, + struct inode *inode, const struct lu_fid *fid) +{ + struct filter_fid_old *ff = &info->oti_ff; + struct dentry *dentry = &info->oti_obj_dentry; + handle_t *jh; + int size = 0; + int rc; + bool removed = false; + bool reset = true; + ENTRY; + + /* We want the LMA to fit into the 256-byte OST inode, so operate + * as following: + * 1) read old XATTR_NAME_FID and save the parent FID; + * 2) delete the old XATTR_NAME_FID; + * 3) make new LMA and add it; + * 4) generate new XATTR_NAME_FID with the saved parent FID and add it. + * + * Making the LMA to fit into the 256-byte OST inode can save time for + * normal osd_check_lma() and for other OI scrub scanning in future. + * So it is worth to make some slow conversion here. */ + jh = ldiskfs_journal_start_sb(osd_sb(dev), + osd_dto_credits_noquota[DTO_XATTR_SET] * 3); + if (IS_ERR(jh)) { + rc = PTR_ERR(jh); + CERROR("%s: fail to start trans for convert ff: "DFID + ": rc = %d\n", + osd_name(dev), PFID(fid), rc); + RETURN(rc); + } + + /* 1) read old XATTR_NAME_FID and save the parent FID */ + rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff)); + if (rc == sizeof(*ff)) { + /* 2) delete the old XATTR_NAME_FID */ + ll_vfs_dq_init(inode); + rc = inode->i_op->removexattr(dentry, XATTR_NAME_FID); + if (rc != 0) + GOTO(stop, rc); + + removed = true; + } else if (unlikely(rc == -ENODATA)) { + reset = false; + } else if (rc != sizeof(struct filter_fid)) { + GOTO(stop, rc = -EINVAL); + } + + /* 3) make new LMA and add it */ + rc = osd_ea_fid_set(info, inode, fid, LMAC_FID_ON_OST, 0); + if (rc == 0 && reset) + size = sizeof(struct filter_fid); + else if (rc != 0 && removed) + /* If failed, we should try to add the old back. */ + size = sizeof(struct filter_fid_old); + + /* 4) generate new XATTR_NAME_FID with the saved parent FID and add it*/ + if (size > 0) { + int rc1; + + rc1 = __osd_xattr_set(info, inode, XATTR_NAME_FID, ff, size, + XATTR_CREATE); + if (rc1 != 0 && rc != 0) + rc = rc1; + } + + GOTO(stop, rc); + +stop: + ldiskfs_journal_stop(jh); + return rc; +} + +static int osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, struct osd_idmap_cache *oic, int val) { @@ -430,6 +507,7 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, int ops = DTO_INDEX_UPDATE; int idx; int rc; + bool converted = false; ENTRY; down_write(&scrub->os_rwsem); @@ -458,14 +536,19 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev, } sf->sf_flags |= SF_UPGRADE; + sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID; + dev->od_check_ff = 1; + rc = osd_scrub_convert_ff(info, dev, inode, fid); rc = osd_ea_fid_set(info, inode, fid, LMAC_FID_ON_OST, 0); if (rc != 0) GOTO(out, rc); + + converted = true; } if ((val == SCRUB_NEXT_NOLMA) && - (!dev->od_handle_nolma || OBD_FAIL_CHECK(OBD_FAIL_FID_NOLMA))) + (!scrub->os_convert_igif || OBD_FAIL_CHECK(OBD_FAIL_FID_NOLMA))) GOTO(out, rc = 0); if ((oii != NULL && oii->oii_insert) || (val == SCRUB_NEXT_NOLMA)) @@ -514,6 +597,9 @@ iget: break; } } else if (osd_id_eq(lid, lid2)) { + if (converted) + sf->sf_items_updated++; + GOTO(out, rc = 0); } else { scrub->os_full_speed = 1; @@ -627,6 +713,7 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result) container_of0(scrub, struct osd_device, od_scrub); dev->od_igif_inoi = 1; + dev->od_check_ff = 0; sf->sf_status = SS_COMPLETED; memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE); sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | @@ -770,7 +857,7 @@ static int osd_scrub_get_fid(struct osd_thread_info *info, return rc; if (!has_lma) { - if (dev->od_handle_nolma) { + if (dev->od_scrub.os_convert_igif) { lu_igif_build(fid, inode->i_ino, inode->i_generation); if (scrub) @@ -1357,32 +1444,6 @@ osd_ios_lookup_one_len(const char *name, struct dentry *parent, int namelen) return dentry; } -static inline void -osd_ios_llogname2fid(struct lu_fid *fid, const char *name, int namelen) -{ - obd_id id = 0; - int i = 0; - - fid->f_seq = FID_SEQ_LLOG; - while (i < namelen) - id = id * 10 + name[i++] - '0'; - - fid->f_oid = id & 0x00000000ffffffffULL; - fid->f_ver = id >> 32; -} - -static inline void -osd_ios_Oname2fid(struct lu_fid *fid, const char *name, int namelen) -{ - __u64 seq = 0; - int i = 0; - - while (i < namelen) - seq = seq * 10 + name[i++] - '0'; - - lu_last_id_fid(fid, seq); -} - static int osd_ios_new_item(struct osd_device *dev, struct dentry *dentry, scandir_t scandir, filldir_t filldir) @@ -1661,7 +1722,7 @@ osd_ios_ROOT_scan(struct osd_thread_info *info, struct osd_device *dev, * and try to re-generate the LMA from the OI mapping. But if the * OI mapping crashed or lost also, then we have to give up under * double failure cases. */ - dev->od_handle_nolma = 1; + scrub->os_convert_igif = 1; child = osd_ios_lookup_one_len(dot_lustre_name, dentry, strlen(dot_lustre_name)); if (IS_ERR(child)) { @@ -1672,6 +1733,7 @@ osd_ios_ROOT_scan(struct osd_thread_info *info, struct osd_device *dev, osd_scrub_file_reset(scrub, LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, SF_UPGRADE); + sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID; rc = osd_scrub_file_store(scrub); } else { rc = 0; @@ -1713,10 +1775,19 @@ static int osd_ios_OBJECTS_scan(struct osd_thread_info *info, struct osd_device *dev, struct dentry *dentry, filldir_t filldir) { - struct dentry *child; - int rc; + struct osd_scrub *scrub = &dev->od_scrub; + struct scrub_file *sf = &scrub->os_file; + struct dentry *child; + int rc; ENTRY; + if (unlikely(sf->sf_internal_flags & SIF_NO_HANDLE_OLD_FID)) { + sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID; + rc = osd_scrub_file_store(scrub); + if (rc != 0) + RETURN(rc); + } + child = osd_ios_lookup_one_len(ADMIN_USR, dentry, strlen(ADMIN_USR)); if (!IS_ERR(child)) { rc = osd_ios_scan_one(info, dev, child->d_inode, NULL, 0); @@ -1975,6 +2046,20 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) rc = osd_scrub_file_load(scrub); if (rc == -ENOENT) { osd_scrub_file_init(scrub, es->s_uuid); + /* If the "/O" dir does not exist when mount (indicated by + * osd_device::od_maybe_new), neither for the "/OI_scrub", + * then it is quite probably that the device is a new one, + * under such case, mark it as SIF_NO_HANDLE_OLD_FID. + * + * For the rare case that "/O" and "OI_scrub" both lost on + * an old device, it can be found and cleared later. + * + * For the system with "SIF_NO_HANDLE_OLD_FID", we do not + * need to check "filter_fid_old" and to convert it to + * "filter_fid" for each object, and all the IGIF should + * have their FID mapping in OI files already. */ + if (dev->od_maybe_new) + sf->sf_internal_flags = SIF_NO_HANDLE_OLD_FID; dirty = 1; } else if (rc != 0) { RETURN(rc); @@ -2006,12 +2091,17 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev) rc = osd_initial_OI_scrub(info, dev); if (rc == 0) { - if ((sf->sf_flags & SF_UPGRADE) && - !(sf->sf_flags & SF_INCONSISTENT)) - /* The 'od_igif_inoi' will be set after the - * upgrading completed, needs NOT remount. */ + if (sf->sf_flags & SF_UPGRADE || + !(sf->sf_internal_flags & SIF_NO_HANDLE_OLD_FID || + sf->sf_success_count > 0)) { dev->od_igif_inoi = 0; - else + dev->od_check_ff = 1; + } else { + dev->od_igif_inoi = 1; + dev->od_check_ff = 0; + } + + if (sf->sf_flags & SF_INCONSISTENT) /* The 'od_igif_inoi' will be set under the * following cases: * 1) new created system, or diff --git a/lustre/osd-ldiskfs/osd_scrub.h b/lustre/osd-ldiskfs/osd_scrub.h index f5eb375..03d90ab 100644 --- a/lustre/osd-ldiskfs/osd_scrub.h +++ b/lustre/osd-ldiskfs/osd_scrub.h @@ -97,6 +97,12 @@ enum scrub_start { SS_AUTO = 0x00000008, }; +/* The flags here are only used inside OSD, NOT be visible by dump(). */ +enum scrub_internal_flags { + /* This is a new formatted device. */ + SIF_NO_HANDLE_OLD_FID = 0x0001, +}; + struct scrub_file { /* 128-bit uuid for volume. */ __u8 sf_uuid[16]; @@ -158,8 +164,9 @@ struct scrub_file { /* How many OI files. */ __u16 sf_oi_count; - /* Update the magic or flags if want to use the reserved fields. */ - __u16 sf_reserved_0; + /* Keep the flags after scrub reset. See 'enum scrub_internal_flags' */ + __u16 sf_internal_flags; + __u32 sf_reserved_1; __u64 sf_reserved_2[16]; @@ -211,7 +218,8 @@ struct osd_scrub { * found by RPC prior */ os_waiting:1, /* Waiting for scan window. */ os_full_speed:1, /* run w/o speed limit */ - os_paused:1; /* The scrub is paused. */ + os_paused:1, /* The scrub is paused. */ + os_convert_igif:1; }; #endif /* _OSD_SCRUB_H */ diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index fb5db0c..d1010cc 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1490,6 +1490,7 @@ t32_test() { local tarball=$1 local writeconf=$2 local dne_upgrade=${dne_upgrade:-"no"} + local ff_convert=${ff_convert:-"no"} local shall_cleanup_mdt=false local shall_cleanup_mdt1=false local shall_cleanup_ost=false @@ -1530,6 +1531,9 @@ t32_test() { echo " Kernel: $img_kernel" echo " Arch: $img_arch" + local version=$(version_code $img_commit) + [[ $version -gt $(version_code 2.4.0) ]] && ff_convert="no" + $r $LCTL set_param debug="$PTLDEBUG" $r $TUNEFS --dryrun $tmp/mdt || { @@ -1663,6 +1667,30 @@ t32_test() { return 1 } + if [ "$ff_convert" != "no" -a $(facet_fstype ost1) == "ldiskfs" ]; then + $r $LCTL lfsck_start -M $fsname-OST0000 || { + error_noexit "Start OI scrub on OST0" + return 1 + } + + # The oi_scrub should be on ost1, but for test_32(), + # all on the SINGLEMDS. + wait_update_facet $SINGLEMDS "$LCTL get_param -n \ + osd-ldiskfs.$fsname-OST0000.oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 30 || { + error_noexit "Failed to get the expected 'completed'" + return 1 + } + + local UPDATED=$($r $LCTL get_param -n \ + osd-ldiskfs.$fsname-OST0000.oi_scrub | + awk '/^updated/ { print $2 }') + [ $UPDATED -ge 1 ] || { + error_noexit "Only $UPDATED objects have been converted" + return 1 + } + fi + if [ "$dne_upgrade" != "no" ]; then $r $LCTL conf_param \ $fsname-MDT0001.mdc.max_rpcs_in_flight=9 || { @@ -1892,6 +1920,19 @@ test_32c() { } run_test 32c "dne upgrade test" +test_32d() { + local tarballs + local tarball + local rc=0 + + t32_check + for tarball in $tarballs; do + ff_convert=yes t32_test $tarball || rc=$? + done + return $rc +} +run_test 32d "convert ff test" + test_33a() { # bug 12333, was test_33 local rc=0 local FSNAME2=test-123 diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 1724c78..9ab0b9a 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -17,7 +17,7 @@ init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} init_logging -[ $(facet_fstype $SINGLEMDS) != ldiskfs ] && +[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && skip "test LFSCK only for ldiskfs" && exit 0 require_dsh_mds || exit 0 diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 894ae9f..3f8ccd0 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -29,10 +29,10 @@ OSTSIZE=100000 MOUNT_2="" check_and_setup_lustre -[ $(facet_fstype $SINGLEMDS) != ldiskfs ] && +[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] && skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && exit 0 -[ $(facet_fstype ost1) != ldiskfs ] && +[ $(facet_fstype ost1) != "ldiskfs" ] && skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre && exit 0 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] && diff --git a/lustre/utils/lustre_lfsck.c b/lustre/utils/lustre_lfsck.c index 20ea2be..289c8fd 100644 --- a/lustre/utils/lustre_lfsck.c +++ b/lustre/utils/lustre_lfsck.c @@ -325,7 +325,7 @@ int jt_lfsck_stop(int argc, char **argv) return rc; } else { fprintf(stderr, - "Must sepcify device to stop LFSCK.\n"); + "Must specify device to stop LFSCK.\n"); return -EINVAL; } } -- 1.8.3.1