Whamcloud - gitweb
LU-3934 scrub: detect upgraded from 1.8 correctly 25/7625/2
authorFan Yong <fan.yong@intel.com>
Sun, 18 Aug 2013 22:00:46 +0000 (06:00 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 30 Oct 2013 21:47:32 +0000 (21:47 +0000)
The OI scrub should has the capability to detect the case of the MDT
device upgraded from lustre-1.8 and trigger full OI scrub to process
IGIF mapping into OI files.

Originally, we detect that by checking /ROOT/.lustre via initial OI
scrub, but if the MDT is upgraded to lustre-2.1 firstly, and then
continue to lustre-2.4, the such detect mechanism will be failed.
Because lustre-2.1 does not support OI scrub, but will create the
/ROOT/.lustre.

The new detect mechanism will combine the checking OI file name and OI
scrub successfully scan count: if there is old OI file "oi.16" and OI
scrub has never successfully run on the device, then it is regarded as
upgrading case.

Test-Parameters: testlist=sanity-scrub
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I5e57aae972e4fe93d3cb9e7cc7ca2e5b95dce4b2
Reviewed-on: http://review.whamcloud.com/7625
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: wangdi <di.wang@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_oi.c

index 335d1e0..13863f4 100644 (file)
@@ -344,6 +344,15 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
        if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT))
                RETURN(-ENOENT);
 
+       /* Objects are created as locking anchors or place holders for objects
+        * yet to be created. No need to osd_oi_lookup() at here because FID
+        * shouldn't never be re-used, if it's really a duplicate FID from
+        * unexpected reason, we should be able to detect it later by calling
+        * do_create->osd_oi_insert()
+        */
+       if (conf != NULL && conf->loc_flags & LOC_F_NEW)
+               GOTO(out, result = 0);
+
        /* Search order: 1. per-thread cache. */
        if (lu_fid_eq(fid, &oic->oic_fid)) {
                id = &oic->oic_lid;
@@ -358,19 +367,19 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
                        goto iget;
        }
 
-       if (sf->sf_flags & SF_INCONSISTENT)
+       /* One corner case that if the OI_scrub file crashed or lost and we
+        * regard it as upgrade, then we allow IGIF lookup to bypass OI files.
+        *
+        * The risk is that osd_fid_lookup() may found a wrong inode with the
+        * given IGIF especially when the MDT has performed file-level backup
+        * and restored after former upgrading from 1.8 to 2.x.
+        *
+        * To decrease the risk, we force the osd_fid_lookup() to verify the
+        * inode for such case. */
+       if ((sf->sf_flags & SF_INCONSISTENT) ||
+           (!dev->od_igif_inoi && fid_is_igif(fid)))
                verify = true;
 
-       /*
-        * Objects are created as locking anchors or place holders for objects
-        * yet to be created. No need to osd_oi_lookup() at here because FID
-        * shouldn't never be re-used, if it's really a duplicate FID from
-        * unexpected reason, we should be able to detect it later by calling
-        * do_create->osd_oi_insert()
-        */
-       if (conf != NULL && conf->loc_flags & LOC_F_NEW)
-               GOTO(out, result = 0);
-
        /* Search order: 3. OI files. */
        result = osd_oi_lookup(info, dev, fid, id, true);
        if (result == -ENOENT) {
@@ -397,7 +406,7 @@ iget:
                if (result == -ENOENT || result == -ESTALE) {
                        if (!in_oi) {
                                fid_zero(&oic->oic_fid);
-                               GOTO(out, result = 0);
+                               GOTO(out, result = -ENOENT);
                        }
 
                        /* XXX: There are three possible cases:
@@ -415,10 +424,6 @@ iget:
                        if (result == 0)
                                /* It is the case 1 or 2. */
                                goto trigger;
-
-                       if (result == -ENOENT)
-                               /* It is the case 3. */
-                               result = 0;
                } else if (result == -EREMCHG) {
 
 trigger:
index bf01f4d..40dad75 100644 (file)
@@ -361,6 +361,21 @@ int osd_oi_init(struct osd_thread_info *info, struct osd_device *osd)
        /* if previous failed then try found single OI from old filesystem */
        rc = osd_oi_open(info, osd, OSD_OI_NAME_BASE, &oi[0], false);
        if (rc == 0) { /* found single OI from old filesystem */
+               if (sf->sf_success_count == 0)
+                       /* XXX: There is one corner case that if the OI_scrub
+                        *      file crashed or lost and we regard it upgrade,
+                        *      then we allow IGIF lookup to bypass OI files.
+                        *
+                        *      The risk is that osd_fid_lookup() may found
+                        *      a wrong inode with the given IGIF especially
+                        *      when the MDT has performed file-level backup
+                        *      and restored after former upgrading from 1.8
+                        *      to 2.x. To decrease the risk, we will force
+                        *      the osd_fid_lookup() to verify the inode for
+                        *      such case. */
+                       osd_scrub_file_reset(scrub,
+                                       LDISKFS_SB(osd_sb(osd))->s_es->s_uuid,
+                                       SF_UPGRADE);
                GOTO(out, rc = 1);
        } else if (rc != -ENOENT) {
                CERROR("%.16s: can't open %s: rc = %d\n",
@@ -401,7 +416,16 @@ out:
                LASSERT((rc & (rc - 1)) == 0);
                osd->od_oi_table = oi;
                osd->od_oi_count = rc;
-               rc = 0;
+               if (sf->sf_oi_count != rc) {
+                       sf->sf_oi_count = rc;
+                       rc = osd_scrub_file_store(scrub);
+                       if (rc < 0) {
+                               osd_oi_table_put(info, oi, sf->sf_oi_count);
+                               OBD_FREE(oi, sizeof(*oi) * OSD_OI_FID_NR_MAX);
+                       }
+               } else {
+                       rc = 0;
+               }
        }
 
        mutex_unlock(&oi_init_lock);