Whamcloud - gitweb
LU-3335 scrub: convert filter_fid_old to LMA 43/7143/3
authorFan Yong <fan.yong@intel.com>
Sat, 6 Jul 2013 10:14:57 +0000 (18:14 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 1 Aug 2013 01:56:51 +0000 (01:56 +0000)
The OI scrub will generate FID-in-LMA for old OST-object, which will
unify OSD behaviors. To make LMA to fit into the 256-byte OST inode,
it will shrink the filter_fid_old as filter_fid, and the parent FID
will be kept.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Ife669f88b4657928abee774b089115d78b53c380
Reviewed-on: http://review.whamcloud.com/7143
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/osd-ldiskfs/osd_compat.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_scrub.c
lustre/osd-ldiskfs/osd_scrub.h
lustre/tests/conf-sanity.sh
lustre/tests/sanity-lfsck.sh
lustre/tests/sanity-scrub.sh
lustre/utils/lustre_lfsck.c

index 2f22f5a..a12c4e7 100644 (file)
@@ -396,9 +396,21 @@ static int osd_ost_init(const struct lu_env *env, struct osd_device *dev)
         LASSERT(dev->od_fsops);
         osd_push_ctxt(dev, &new, &save);
 
-        d = simple_mkdir(rootd, dev->od_mnt, "O", 0755, 1);
+       d = ll_lookup_one_len("O", rootd, strlen("O"));
        if (IS_ERR(d))
                GOTO(cleanup, rc = PTR_ERR(d));
+       if (d->d_inode == NULL) {
+               dput(d);
+               /* The lookup() may be called again inside simple_mkdir().
+                * Since the repeated lookup() only be called for "/O" at
+                * mount time, it will not affect the whole performance. */
+               d = simple_mkdir(rootd, dev->od_mnt, "O", 0755, 1);
+               if (IS_ERR(d))
+                       GOTO(cleanup, rc = PTR_ERR(d));
+
+               /* It is quite probably that the device is new formatted. */
+               dev->od_maybe_new = 1;
+       }
 
        inode = d->d_inode;
        ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI);
index 523cfdf..dec4da4 100644 (file)
@@ -317,11 +317,12 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        CLASSERT(LMA_OLD_SIZE >= sizeof(*lma));
        rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMA,
                             info->oti_mdt_attrs_old, LMA_OLD_SIZE);
-       if (rc == -ENODATA) {
+       if (rc == -ENODATA && !fid_is_igif(lu_object_fid(&obj->oo_dt.do_lu)) &&
+           osd_obj2dev(obj)->od_check_ff) {
                fid = &lma->lma_self_fid;
                rc = osd_get_idif(info, inode, dentry, fid);
                if (rc > 0)
-                       rc = 0;
+                       RETURN(0);
        }
 
        if (unlikely(rc == -ENODATA))
index efe0313..3daf5b2 100644 (file)
@@ -239,11 +239,11 @@ struct osd_device {
          * Fid Capability
          */
        unsigned int              od_fl_capa:1,
-                                 od_is_md:1, /* set in ->ldo_prepare */
+                                 od_maybe_new:1,
                                  od_noscrub:1,
                                  od_dirent_journal:1,
-                                 od_handle_nolma:1,
-                                 od_igif_inoi:1;
+                                 od_igif_inoi:1,
+                                 od_check_ff:1;
 
         unsigned long             od_capa_timeout;
         __u32                     od_capa_alg;
@@ -595,7 +595,10 @@ struct osd_thread_info {
        bool                    oti_rollback;
 
        char                    oti_name[48];
-       struct filter_fid_old   oti_ff;
+       union {
+               struct filter_fid_old   oti_ff;
+               struct filter_fid       oti_ff_new;
+       };
 };
 
 extern int ldiskfs_pdo;
index 6be982c..299b532 100644 (file)
@@ -40,6 +40,7 @@
 #include <lustre/lustre_idl.h>
 #include <lustre_disk.h>
 #include <dt_object.h>
+#include <linux/xattr.h>
 
 #include "osd_internal.h"
 #include "osd_oi.h"
@@ -221,6 +222,7 @@ static void osd_scrub_file_to_cpu(struct scrub_file *des,
        des->sf_run_time        = le32_to_cpu(src->sf_run_time);
        des->sf_success_count   = le32_to_cpu(src->sf_success_count);
        des->sf_oi_count        = le16_to_cpu(src->sf_oi_count);
+       des->sf_internal_flags  = le16_to_cpu(src->sf_internal_flags);
        memcpy(des->sf_oi_bitmap, src->sf_oi_bitmap, SCRUB_OI_BITMAP_SIZE);
 }
 
@@ -255,6 +257,7 @@ static void osd_scrub_file_to_le(struct scrub_file *des,
        des->sf_run_time        = cpu_to_le32(src->sf_run_time);
        des->sf_success_count   = cpu_to_le32(src->sf_success_count);
        des->sf_oi_count        = cpu_to_le16(src->sf_oi_count);
+       des->sf_internal_flags  = cpu_to_le16(src->sf_internal_flags);
        memcpy(des->sf_oi_bitmap, src->sf_oi_bitmap, SCRUB_OI_BITMAP_SIZE);
 }
 
@@ -417,6 +420,80 @@ static int osd_scrub_prep(struct osd_device *dev)
 }
 
 static int
+osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev,
+                    struct inode *inode, const struct lu_fid *fid)
+{
+       struct filter_fid_old   *ff      = &info->oti_ff;
+       struct dentry           *dentry  = &info->oti_obj_dentry;
+       handle_t                *jh;
+       int                      size    = 0;
+       int                      rc;
+       bool                     removed = false;
+       bool                     reset   = true;
+       ENTRY;
+
+       /* We want the LMA to fit into the 256-byte OST inode, so operate
+        * as following:
+        * 1) read old XATTR_NAME_FID and save the parent FID;
+        * 2) delete the old XATTR_NAME_FID;
+        * 3) make new LMA and add it;
+        * 4) generate new XATTR_NAME_FID with the saved parent FID and add it.
+        *
+        * Making the LMA to fit into the 256-byte OST inode can save time for
+        * normal osd_check_lma() and for other OI scrub scanning in future.
+        * So it is worth to make some slow conversion here. */
+       jh = ldiskfs_journal_start_sb(osd_sb(dev),
+                               osd_dto_credits_noquota[DTO_XATTR_SET] * 3);
+       if (IS_ERR(jh)) {
+               rc = PTR_ERR(jh);
+               CERROR("%s: fail to start trans for convert ff: "DFID
+                      ": rc = %d\n",
+                      osd_name(dev), PFID(fid), rc);
+               RETURN(rc);
+       }
+
+       /* 1) read old XATTR_NAME_FID and save the parent FID */
+       rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff));
+       if (rc == sizeof(*ff)) {
+               /* 2) delete the old XATTR_NAME_FID */
+               ll_vfs_dq_init(inode);
+               rc = inode->i_op->removexattr(dentry, XATTR_NAME_FID);
+               if (rc != 0)
+                       GOTO(stop, rc);
+
+               removed = true;
+       } else if (unlikely(rc == -ENODATA)) {
+               reset = false;
+       } else if (rc != sizeof(struct filter_fid)) {
+               GOTO(stop, rc = -EINVAL);
+       }
+
+       /* 3) make new LMA and add it */
+       rc = osd_ea_fid_set(info, inode, fid, LMAC_FID_ON_OST, 0);
+       if (rc == 0 && reset)
+               size = sizeof(struct filter_fid);
+       else if (rc != 0 && removed)
+               /* If failed, we should try to add the old back. */
+               size = sizeof(struct filter_fid_old);
+
+       /* 4) generate new XATTR_NAME_FID with the saved parent FID and add it*/
+       if (size > 0) {
+               int rc1;
+
+               rc1 = __osd_xattr_set(info, inode, XATTR_NAME_FID, ff, size,
+                                     XATTR_CREATE);
+               if (rc1 != 0 && rc != 0)
+                       rc = rc1;
+       }
+
+       GOTO(stop, rc);
+
+stop:
+       ldiskfs_journal_stop(jh);
+       return rc;
+}
+
+static int
 osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
                       struct osd_idmap_cache *oic, int val)
 {
@@ -430,6 +507,7 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
        int                           ops    = DTO_INDEX_UPDATE;
        int                           idx;
        int                           rc;
+       bool                          converted = false;
        ENTRY;
 
        down_write(&scrub->os_rwsem);
@@ -458,14 +536,19 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
                }
 
                sf->sf_flags |= SF_UPGRADE;
+               sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
+               dev->od_check_ff = 1;
+               rc = osd_scrub_convert_ff(info, dev, inode, fid);
                rc = osd_ea_fid_set(info, inode, fid,
                                    LMAC_FID_ON_OST, 0);
                if (rc != 0)
                        GOTO(out, rc);
+
+               converted = true;
        }
 
        if ((val == SCRUB_NEXT_NOLMA) &&
-           (!dev->od_handle_nolma || OBD_FAIL_CHECK(OBD_FAIL_FID_NOLMA)))
+           (!scrub->os_convert_igif || OBD_FAIL_CHECK(OBD_FAIL_FID_NOLMA)))
                GOTO(out, rc = 0);
 
        if ((oii != NULL && oii->oii_insert) || (val == SCRUB_NEXT_NOLMA))
@@ -514,6 +597,9 @@ iget:
                        break;
                }
        } else if (osd_id_eq(lid, lid2)) {
+               if (converted)
+                       sf->sf_items_updated++;
+
                GOTO(out, rc = 0);
        } else {
                scrub->os_full_speed = 1;
@@ -627,6 +713,7 @@ static void osd_scrub_post(struct osd_scrub *scrub, int result)
                        container_of0(scrub, struct osd_device, od_scrub);
 
                dev->od_igif_inoi = 1;
+               dev->od_check_ff = 0;
                sf->sf_status = SS_COMPLETED;
                memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE);
                sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT |
@@ -770,7 +857,7 @@ static int osd_scrub_get_fid(struct osd_thread_info *info,
                        return rc;
 
                if (!has_lma) {
-                       if (dev->od_handle_nolma) {
+                       if (dev->od_scrub.os_convert_igif) {
                                lu_igif_build(fid, inode->i_ino,
                                              inode->i_generation);
                                if (scrub)
@@ -1357,32 +1444,6 @@ osd_ios_lookup_one_len(const char *name, struct dentry *parent, int namelen)
        return dentry;
 }
 
-static inline void
-osd_ios_llogname2fid(struct lu_fid *fid, const char *name, int namelen)
-{
-       obd_id id = 0;
-       int    i  = 0;
-
-       fid->f_seq = FID_SEQ_LLOG;
-       while (i < namelen)
-               id = id * 10 + name[i++] - '0';
-
-       fid->f_oid = id & 0x00000000ffffffffULL;
-       fid->f_ver = id >> 32;
-}
-
-static inline void
-osd_ios_Oname2fid(struct lu_fid *fid, const char *name, int namelen)
-{
-       __u64 seq = 0;
-       int   i   = 0;
-
-       while (i < namelen)
-               seq = seq * 10 + name[i++] - '0';
-
-       lu_last_id_fid(fid, seq);
-}
-
 static int
 osd_ios_new_item(struct osd_device *dev, struct dentry *dentry,
                 scandir_t scandir, filldir_t filldir)
@@ -1661,7 +1722,7 @@ osd_ios_ROOT_scan(struct osd_thread_info *info, struct osd_device *dev,
         *      and try to re-generate the LMA from the OI mapping. But if the
         *      OI mapping crashed or lost also, then we have to give up under
         *      double failure cases. */
-       dev->od_handle_nolma = 1;
+       scrub->os_convert_igif = 1;
        child = osd_ios_lookup_one_len(dot_lustre_name, dentry,
                                       strlen(dot_lustre_name));
        if (IS_ERR(child)) {
@@ -1672,6 +1733,7 @@ osd_ios_ROOT_scan(struct osd_thread_info *info, struct osd_device *dev,
                                osd_scrub_file_reset(scrub,
                                        LDISKFS_SB(osd_sb(dev))->s_es->s_uuid,
                                        SF_UPGRADE);
+                               sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
                                rc = osd_scrub_file_store(scrub);
                        } else {
                                rc = 0;
@@ -1713,10 +1775,19 @@ static int
 osd_ios_OBJECTS_scan(struct osd_thread_info *info, struct osd_device *dev,
                     struct dentry *dentry, filldir_t filldir)
 {
-       struct dentry *child;
-       int            rc;
+       struct osd_scrub  *scrub  = &dev->od_scrub;
+       struct scrub_file *sf     = &scrub->os_file;
+       struct dentry     *child;
+       int                rc;
        ENTRY;
 
+       if (unlikely(sf->sf_internal_flags & SIF_NO_HANDLE_OLD_FID)) {
+               sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
+               rc = osd_scrub_file_store(scrub);
+               if (rc != 0)
+                       RETURN(rc);
+       }
+
        child = osd_ios_lookup_one_len(ADMIN_USR, dentry, strlen(ADMIN_USR));
        if (!IS_ERR(child)) {
                rc = osd_ios_scan_one(info, dev, child->d_inode, NULL, 0);
@@ -1975,6 +2046,20 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
        rc = osd_scrub_file_load(scrub);
        if (rc == -ENOENT) {
                osd_scrub_file_init(scrub, es->s_uuid);
+               /* If the "/O" dir does not exist when mount (indicated by
+                * osd_device::od_maybe_new), neither for the "/OI_scrub",
+                * then it is quite probably that the device is a new one,
+                * under such case, mark it as SIF_NO_HANDLE_OLD_FID.
+                *
+                * For the rare case that "/O" and "OI_scrub" both lost on
+                * an old device, it can be found and cleared later.
+                *
+                * For the system with "SIF_NO_HANDLE_OLD_FID", we do not
+                * need to check "filter_fid_old" and to convert it to
+                * "filter_fid" for each object, and all the IGIF should
+                * have their FID mapping in OI files already. */
+               if (dev->od_maybe_new)
+                       sf->sf_internal_flags = SIF_NO_HANDLE_OLD_FID;
                dirty = 1;
        } else if (rc != 0) {
                RETURN(rc);
@@ -2006,12 +2091,17 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
 
        rc = osd_initial_OI_scrub(info, dev);
        if (rc == 0) {
-               if ((sf->sf_flags & SF_UPGRADE) &&
-                  !(sf->sf_flags & SF_INCONSISTENT))
-                       /* The 'od_igif_inoi' will be set after the
-                        * upgrading completed, needs NOT remount. */
+               if (sf->sf_flags & SF_UPGRADE ||
+                   !(sf->sf_internal_flags & SIF_NO_HANDLE_OLD_FID ||
+                     sf->sf_success_count > 0)) {
                        dev->od_igif_inoi = 0;
-               else
+                       dev->od_check_ff = 1;
+               } else {
+                       dev->od_igif_inoi = 1;
+                       dev->od_check_ff = 0;
+               }
+
+               if (sf->sf_flags & SF_INCONSISTENT)
                        /* The 'od_igif_inoi' will be set under the
                         * following cases:
                         * 1) new created system, or
index f5eb375..03d90ab 100644 (file)
@@ -97,6 +97,12 @@ enum scrub_start {
        SS_AUTO                 = 0x00000008,
 };
 
+/* The flags here are only used inside OSD, NOT be visible by dump(). */
+enum scrub_internal_flags {
+       /* This is a new formatted device. */
+       SIF_NO_HANDLE_OLD_FID   = 0x0001,
+};
+
 struct scrub_file {
        /* 128-bit uuid for volume. */
        __u8    sf_uuid[16];
@@ -158,8 +164,9 @@ struct scrub_file {
        /* How many OI files. */
        __u16   sf_oi_count;
 
-       /* Update the magic or flags if want to use the reserved fields. */
-       __u16   sf_reserved_0;
+       /* Keep the flags after scrub reset. See 'enum scrub_internal_flags' */
+       __u16   sf_internal_flags;
+
        __u32   sf_reserved_1;
        __u64   sf_reserved_2[16];
 
@@ -211,7 +218,8 @@ struct osd_scrub {
                                                * found by RPC prior */
                                os_waiting:1, /* Waiting for scan window. */
                                os_full_speed:1, /* run w/o speed limit */
-                               os_paused:1; /* The scrub is paused. */
+                               os_paused:1, /* The scrub is paused. */
+                               os_convert_igif:1;
 };
 
 #endif /* _OSD_SCRUB_H */
index fb5db0c..d1010cc 100644 (file)
@@ -1490,6 +1490,7 @@ t32_test() {
        local tarball=$1
        local writeconf=$2
        local dne_upgrade=${dne_upgrade:-"no"}
+       local ff_convert=${ff_convert:-"no"}
        local shall_cleanup_mdt=false
        local shall_cleanup_mdt1=false
        local shall_cleanup_ost=false
@@ -1530,6 +1531,9 @@ t32_test() {
        echo "  Kernel: $img_kernel"
        echo "    Arch: $img_arch"
 
+       local version=$(version_code $img_commit)
+       [[ $version -gt $(version_code 2.4.0) ]] && ff_convert="no"
+
        $r $LCTL set_param debug="$PTLDEBUG"
 
        $r $TUNEFS --dryrun $tmp/mdt || {
@@ -1663,6 +1667,30 @@ t32_test() {
                return 1
        }
 
+       if [ "$ff_convert" != "no" -a $(facet_fstype ost1) == "ldiskfs" ]; then
+               $r $LCTL lfsck_start -M $fsname-OST0000 || {
+                       error_noexit "Start OI scrub on OST0"
+                       return 1
+               }
+
+               # The oi_scrub should be on ost1, but for test_32(),
+               # all on the SINGLEMDS.
+               wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+                       osd-ldiskfs.$fsname-OST0000.oi_scrub |
+                       awk '/^status/ { print \\\$2 }'" "completed" 30 || {
+                       error_noexit "Failed to get the expected 'completed'"
+                       return 1
+               }
+
+               local UPDATED=$($r $LCTL get_param -n \
+                               osd-ldiskfs.$fsname-OST0000.oi_scrub |
+                               awk '/^updated/ { print $2 }')
+               [ $UPDATED -ge 1 ] || {
+                       error_noexit "Only $UPDATED objects have been converted"
+                       return 1
+               }
+       fi
+
        if [ "$dne_upgrade" != "no" ]; then
                $r $LCTL conf_param \
                                $fsname-MDT0001.mdc.max_rpcs_in_flight=9 || {
@@ -1892,6 +1920,19 @@ test_32c() {
 }
 run_test 32c "dne upgrade test"
 
+test_32d() {
+       local tarballs
+       local tarball
+       local rc=0
+
+       t32_check
+       for tarball in $tarballs; do
+               ff_convert=yes t32_test $tarball || rc=$?
+       done
+       return $rc
+}
+run_test 32d "convert ff test"
+
 test_33a() { # bug 12333, was test_33
         local rc=0
         local FSNAME2=test-123
index 1724c78..9ab0b9a 100644 (file)
@@ -17,7 +17,7 @@ init_test_env $@
 . ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh}
 init_logging
 
-[ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
+[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
        skip "test LFSCK only for ldiskfs" && exit 0
 require_dsh_mds || exit 0
 
index 894ae9f..3f8ccd0 100644 (file)
@@ -29,10 +29,10 @@ OSTSIZE=100000
 MOUNT_2=""
 check_and_setup_lustre
 
-[ $(facet_fstype $SINGLEMDS) != ldiskfs ] &&
+[ $(facet_fstype $SINGLEMDS) != "ldiskfs" ] &&
        skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
        exit 0
-[ $(facet_fstype ost1) != ldiskfs ] &&
+[ $(facet_fstype ost1) != "ldiskfs" ] &&
        skip "test OI scrub only for ldiskfs" && check_and_cleanup_lustre &&
        exit 0
 [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.2.90) ]] &&
index 20ea2be..289c8fd 100644 (file)
@@ -325,7 +325,7 @@ int jt_lfsck_stop(int argc, char **argv)
                                return rc;
                } else {
                        fprintf(stderr,
-                               "Must sepcify device to stop LFSCK.\n");
+                               "Must specify device to stop LFSCK.\n");
                        return -EINVAL;
                }
        }