Whamcloud - gitweb
LU-10550 flr: resync RDONLY state FLR file 10/31010/8
authorBobi Jam <bobijam.xu@intel.com>
Wed, 24 Jan 2018 15:32:37 +0000 (23:32 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 27 Feb 2018 03:41:57 +0000 (03:41 +0000)
When some components are failed to resync due to various reasons,
those components will still have STALE bit set but the file statue may
become to RDONLY.

This patch makes resync RDONLY FLR file possible.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: I2e3b518bb969aedd7f214e6b09b895079cab69ab
Reviewed-on: https://review.whamcloud.com/31010
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/lod/lod_object.c
lustre/mdd/mdd_object.c
lustre/mdt/mdt_open.c
lustre/mdt/mdt_reint.c
lustre/tests/sanity-flr.sh
lustre/utils/lfs.c

index 7a5697e..8735bee 100644 (file)
@@ -5717,6 +5717,49 @@ static int lod_primary_pick(const struct lu_env *env, struct lod_object *lo,
        RETURN(picked);
 }
 
+/**
+ * figure out the components should be instantiated for resync.
+ */
+static int lod_prepare_resync(const struct lu_env *env, struct lod_object *lo,
+                             struct lu_extent *extent)
+{
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lod_layout_component *lod_comp;
+       unsigned int need_sync = 0;
+       int i;
+
+       CDEBUG(D_LAYOUT,
+              DFID": instantiate all stale components in "DEXT"\n",
+              PFID(lod_object_fid(lo)), PEXT(extent));
+
+       /**
+        * instantiate all components within this extent, even non-stale
+        * components.
+        */
+       for (i = 0; i < lo->ldo_mirror_count; i++) {
+               if (!lo->ldo_mirrors[i].lme_stale)
+                       continue;
+
+               lod_foreach_mirror_comp(lod_comp, lo, i) {
+                       if (!lu_extent_is_overlapped(extent,
+                                               &lod_comp->llc_extent))
+                               break;
+
+                       need_sync++;
+
+                       if (lod_comp_inited(lod_comp))
+                               continue;
+
+                       CDEBUG(D_LAYOUT, "resync instantiate %d / %d\n",
+                              i, lod_comp_index(lo, lod_comp));
+                       info->lti_comp_idx[info->lti_count++] =
+                                       lod_comp_index(lo, lod_comp);
+               }
+       }
+
+       return need_sync ? 0 : -EALREADY;
+}
+
 static int lod_declare_update_rdonly(const struct lu_env *env,
                struct lod_object *lo, struct md_layout_change *mlc,
                struct thandle *th)
@@ -5724,58 +5767,91 @@ static int lod_declare_update_rdonly(const struct lu_env *env,
        struct lod_thread_info *info = lod_env_info(env);
        struct lu_attr *layout_attr = &info->lti_layout_attr;
        struct lod_layout_component *lod_comp;
-       struct layout_intent *layout = mlc->mlc_intent;
-       struct lu_extent extent = layout->li_extent;
-       int picked;
+       struct lu_extent extent = { 0 };
        int rc;
        ENTRY;
 
-       LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE);
        LASSERT(lo->ldo_flr_state == LCM_FL_RDONLY);
+       LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE ||
+               mlc->mlc_opc == MD_LAYOUT_RESYNC);
        LASSERT(lo->ldo_mirror_count > 0);
 
-       CDEBUG(D_LAYOUT, DFID": trying to write :"DEXT"\n",
-              PFID(lod_object_fid(lo)), PEXT(&extent));
+       if (mlc->mlc_opc == MD_LAYOUT_WRITE) {
+               struct layout_intent *layout = mlc->mlc_intent;
+               int picked;
 
-       picked = lod_primary_pick(env, lo, &extent);
-       if (picked < 0)
-               RETURN(picked);
+               extent = layout->li_extent;
+               CDEBUG(D_LAYOUT, DFID": trying to write :"DEXT"\n",
+                      PFID(lod_object_fid(lo)), PEXT(&extent));
 
-       CDEBUG(D_LAYOUT, DFID": picked mirror %u as primary\n",
-              PFID(lod_object_fid(lo)), lo->ldo_mirrors[picked].lme_id);
+               picked = lod_primary_pick(env, lo, &extent);
+               if (picked < 0)
+                       RETURN(picked);
 
-       if (layout->li_opc == LAYOUT_INTENT_TRUNC) {
-               /**
-                * trunc transfers [0, size) in the intent extent, we'd
-                * stale components overlapping [size, eof).
-                */
-               extent.e_start = extent.e_end;
-               extent.e_end = OBD_OBJECT_EOF;
-       }
+               CDEBUG(D_LAYOUT, DFID": picked mirror id %u as primary\n",
+                      PFID(lod_object_fid(lo)),
+                      lo->ldo_mirrors[picked].lme_id);
 
-       /* stale overlapping components from other mirrors */
-       lod_stale_components(lo, picked, &extent);
+               if (layout->li_opc == LAYOUT_INTENT_TRUNC) {
+                       /**
+                        * trunc transfers [0, size) in the intent extent, we'd
+                        * stale components overlapping [size, eof).
+                        */
+                       extent.e_start = extent.e_end;
+                       extent.e_end = OBD_OBJECT_EOF;
+               }
 
-       /* restore truncate intent extent */
-       if (layout->li_opc == LAYOUT_INTENT_TRUNC)
-               extent.e_end = extent.e_start;
+               /* stale overlapping components from other mirrors */
+               lod_stale_components(lo, picked, &extent);
 
-       /* instantiate components for the picked mirror, start from 0 */
-       extent.e_start = 0;
+               /* restore truncate intent extent */
+               if (layout->li_opc == LAYOUT_INTENT_TRUNC)
+                       extent.e_end = extent.e_start;
 
-       lod_foreach_mirror_comp(lod_comp, lo, picked) {
-               if (!lu_extent_is_overlapped(&extent,
-                                            &lod_comp->llc_extent))
-                       break;
+               /* instantiate components for the picked mirror, start from 0 */
+               extent.e_start = 0;
 
-               if (lod_comp_inited(lod_comp))
-                       continue;
+               lod_foreach_mirror_comp(lod_comp, lo, picked) {
+                       if (!lu_extent_is_overlapped(&extent,
+                                                    &lod_comp->llc_extent))
+                               break;
+
+                       if (lod_comp_inited(lod_comp))
+                               continue;
 
-               info->lti_comp_idx[info->lti_count++] =
+                       info->lti_comp_idx[info->lti_count++] =
                                                lod_comp_index(lo, lod_comp);
-       }
+               }
 
-       lo->ldo_flr_state = LCM_FL_WRITE_PENDING;
+               lo->ldo_flr_state = LCM_FL_WRITE_PENDING;
+       } else { /* MD_LAYOUT_RESYNC */
+               int i;
+
+               /**
+                * could contain multiple non-stale mirrors, so we need to
+                * prep uninited all components assuming any non-stale mirror
+                * could be picked as the primary mirror.
+                */
+               for (i = 0; i < lo->ldo_mirror_count; i++) {
+                       if (lo->ldo_mirrors[i].lme_stale)
+                               continue;
+
+                       lod_foreach_mirror_comp(lod_comp, lo, i) {
+                               if (!lod_comp_inited(lod_comp))
+                                       break;
+
+                               if (extent.e_end < lod_comp->llc_extent.e_end)
+                                       extent.e_end =
+                                               lod_comp->llc_extent.e_end;
+                       }
+               }
+
+               rc = lod_prepare_resync(env, lo, &extent);
+               if (rc)
+                       GOTO(out, rc);
+               /* change the file state to SYNC_PENDING */
+               lo->ldo_flr_state = LCM_FL_SYNC_PENDING;
+       }
 
        /* Reset the layout version once it's becoming too large.
         * This way it can make sure that the layout version is
@@ -5794,6 +5870,8 @@ static int lod_declare_update_rdonly(const struct lu_env *env,
 
        layout_attr->la_valid = LA_LAYOUT_VERSION;
        layout_attr->la_layout_version = 0; /* set current version */
+       if (mlc->mlc_opc == MD_LAYOUT_RESYNC)
+               layout_attr->la_layout_version = LU_LAYOUT_RESYNC;
        rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th);
        if (rc)
                GOTO(out, rc);
@@ -5894,9 +5972,6 @@ static int lod_declare_update_write_pending(const struct lu_env *env,
                                                lod_comp_index(lo, lod_comp);
                }
        } else { /* MD_LAYOUT_RESYNC */
-               /* figure out the components that have been instantiated in
-                * in primary to decide what components should be instantiated
-                * in stale mirrors */
                lod_foreach_mirror_comp(lod_comp, lo, primary) {
                        if (!lod_comp_inited(lod_comp))
                                break;
@@ -5904,36 +5979,9 @@ static int lod_declare_update_write_pending(const struct lu_env *env,
                        extent.e_end = lod_comp->llc_extent.e_end;
                }
 
-               CDEBUG(D_LAYOUT,
-                      DFID": instantiate all stale components in "DEXT"\n",
-                      PFID(lod_object_fid(lo)), PEXT(&extent));
-
-               /* 1. instantiate all components within this extent, even
-                * non-stale components so that it won't need to instantiate
-                * those components for mirror truncate later. */
-               for (i = 0; i < lo->ldo_mirror_count; i++) {
-                       if (primary == i)
-                               continue;
-
-                       LASSERTF(lo->ldo_mirrors[i].lme_stale,
-                                "both %d and %d are primary\n", i, primary);
-
-                       lod_foreach_mirror_comp(lod_comp, lo, i) {
-                               if (!lu_extent_is_overlapped(&extent,
-                                                       &lod_comp->llc_extent))
-                                       break;
-
-                               if (lod_comp_inited(lod_comp))
-                                       continue;
-
-                               CDEBUG(D_LAYOUT, "resync instantiate %d / %d\n",
-                                      i, lod_comp_index(lo, lod_comp));
-
-                               info->lti_comp_idx[info->lti_count++] =
-                                               lod_comp_index(lo, lod_comp);
-                       }
-               }
-
+               rc = lod_prepare_resync(env, lo, &extent);
+               if (rc)
+                       GOTO(out, rc);
                /* change the file state to SYNC_PENDING */
                lo->ldo_flr_state = LCM_FL_SYNC_PENDING;
        }
@@ -6023,8 +6071,8 @@ static int lod_declare_update_sync_pending(const struct lu_env *env,
                GOTO(out, rc = -EINVAL);
        }
 
-       if (!sync_components || !resync_components) {
-               CDEBUG(D_LAYOUT, DFID": no mirror in sync or resync\n",
+       if (!sync_components || (mlc->mlc_resync_count && !resync_components)) {
+               CDEBUG(D_LAYOUT, DFID": no mirror in sync\n",
                       PFID(lod_object_fid(lo)));
 
                /* tend to return an error code here to prevent
index d816c53..d0539fe 100644 (file)
@@ -2509,11 +2509,10 @@ mdd_layout_update_rdonly(const struct lu_env *env, struct mdd_object *obj,
        /* Verify acceptable operations */
        switch (mlc->mlc_opc) {
        case MD_LAYOUT_WRITE:
-               break;
        case MD_LAYOUT_RESYNC:
                /* these are legal operations - this represents the case that
-                * a few mirrors were missed in the last resync.
-                * XXX: it will be supported later */
+                * a few mirrors were missed in the last resync. */
+               break;
        case MD_LAYOUT_RESYNC_DONE:
        default:
                RETURN(0);
index 32b9aea..5f2d340 100644 (file)
@@ -2130,8 +2130,6 @@ static int mdt_close_resync_done(struct mdt_thread_info *info,
                GOTO(out_unlock, rc = -ESTALE);
 
        resync_count = data->cd_resync.resync_count;
-       if (!resync_count)
-               GOTO(out_unlock, rc = 0);
 
        if (resync_count > INLINE_RESYNC_ARRAY_SIZE) {
                void *data;
index a865f75..7f9b8f9 100644 (file)
@@ -2259,7 +2259,7 @@ static int mdt_reint_resync(struct mdt_thread_info *info,
        layout.mlc_opc = MD_LAYOUT_RESYNC;
        rc = mdt_layout_change(info, mo, &layout);
        if (rc)
-               GOTO(out_unlock, rc = -EBUSY);
+               GOTO(out_unlock, rc);
 
        ma->ma_need = MA_INODE;
        ma->ma_valid = 0;
index f21f3d8..eb99d6d 100644 (file)
@@ -1382,12 +1382,14 @@ test_41() {
        local tf=$DIR/$tfile
 
        rm -f $tf $tf-1
+       echo " **create two FLR files $tf $tf-1"
        $LFS mirror create -N -E2m -E4m -E-1 -N -E1m -E2m -E3m -E-1 $tf ||
                error "create PFLR file $tf failed"
-       $LFS mirror create -N -E4m -E-1 -N -E2m -E3m -E-1 $tf-1 ||
-               error "create PFLR file $tf-1 failed"
+       $LFS mirror create -N -E2m -Eeof -N -E1m -Eeof --flags prefer \
+               -N -E4m -Eeof $tf-1 || error "create PFLR file $tf-1 failed"
 
        # file should be in ro status
+       echo " **verify files be RDONLY"
        verify_flr_state $tf "ro"
        verify_flr_state $tf-1 "ro"
 
@@ -1397,26 +1399,60 @@ test_41() {
        dd if=/dev/zero of=$tf-1 bs=1M count=4 conv=notrunc ||
                error "writing $tf-1 failed"
 
+       echo " **verify files be WRITE_PENDING"
        verify_flr_state $tf "wp"
        verify_flr_state $tf-1 "wp"
 
        # file should have stale component
+       echo " **verify files have stale component"
        $LFS getstripe $tf | grep lcme_flags | grep stale > /dev/null ||
                error "after writing $tf, it does not contain stale component"
        $LFS getstripe $tf-1 | grep lcme_flags | grep stale > /dev/null ||
                error "after writing $tf-1, it does not contain stale component"
 
+       echo " **full resync"
        $LFS mirror resync $tf $tf-1 || error "mirror resync $tf $tf-1 failed"
 
+       echo " **verify files be RDONLY"
        verify_flr_state $tf "ro"
        verify_flr_state $tf-1 "ro"
 
        # file should not have stale component
+       echo " **verify files do not contain stale component"
        $LFS getstripe $tf | grep lcme_flags | grep stale &&
                error "after resyncing $tf, it contains stale component"
        $LFS getstripe $tf-1 | grep lcme_flags | grep stale &&
                error "after resyncing $tf, it contains stale component"
 
+       # verify partial resync
+       echo " **write $tf-1 for partial resync test"
+       dd if=/dev/zero of=$tf-1 bs=1M count=2 conv=notrunc ||
+               error "writing $tf-1 failed"
+
+       echo " **only resync mirror 2"
+       verify_flr_state $tf-1 "wp"
+       $LFS mirror resync --only 2 $tf-1 ||
+               error "resync mirror 2 of $tf-1 failed"
+       verify_flr_state $tf "ro"
+
+       # resync synced mirror
+       echo " **resync mirror 2 again"
+       $LFS mirror resync --only 2 $tf-1 ||
+               error "resync mirror 2 of $tf-1 failed"
+       verify_flr_state $tf "ro"
+       echo " **verify $tf-1 contains stale component"
+       $LFS getstripe $tf-1 | grep lcme_flags | grep stale > /dev/null ||
+               error "after writing $tf-1, it does not contain stale component"
+
+       echo " **full resync $tf-1"
+       $LFS mirror resync $tf-1 || error "resync of $tf-1 failed"
+       verify_flr_state $tf "ro"
+       echo " **full resync $tf-1 again"
+       $LFS mirror resync $tf-1 || error "resync of $tf-1 failed"
+       echo " **verify $tf-1 does not contain stale component"
+       $LFS getstripe $tf | grep lcme_flags | grep stale &&
+               error "after resyncing $tf, it contains stale component"
+
        return 0
 }
 run_test 41 "lfs mirror resync check"
index 66c531f..7d91dbf 100644 (file)
@@ -7268,16 +7268,6 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
                goto error;
        }
 
-       ioc->lil_mode = LL_LEASE_WRLCK;
-       ioc->lil_flags = LL_LEASE_RESYNC;
-       rc = llapi_lease_set(fd, ioc);
-       if (rc < 0) {
-               fprintf(stderr,
-                       "%s: '%s' llapi_lease_set resync failed: %s.\n",
-                       progname, fname, strerror(errno));
-               goto close_fd;
-       }
-
        layout = llapi_layout_get_by_fd(fd, 0);
        if (layout == NULL) {
                fprintf(stderr, "%s: '%s' llapi_layout_get_by_fd failed: %s.\n",
@@ -7291,28 +7281,37 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
                fprintf(stderr, "%s: '%s' llapi_layout_flags_get failed: %s.\n",
                        progname, fname, strerror(errno));
                rc = -errno;
-               goto close_fd;
+               goto free_layout;
        }
 
        flr_state &= LCM_FL_FLR_MASK;
-       switch (flr_state) {
-       case LCM_FL_NONE:
+       if (flr_state == LCM_FL_NONE) {
                rc = -EINVAL;
-       case LCM_FL_RDONLY:
-               fprintf(stderr, "%s: '%s' file state error: %s\n", progname,
-                       fname, llapi_layout_flags_string(flr_state));
-               goto close_fd;
-       default:
-               break;
+               fprintf(stderr, "%s: '%s' is not a FLR file.\n",
+                       progname, fname);
+               goto free_layout;
        }
 
        /* get stale component info */
        comp_size = llapi_mirror_find_stale(layout, comp_array,
                                            ARRAY_SIZE(comp_array),
                                            mirror_ids, ids_nr);
-       if (comp_size < 0) {
+       if (comp_size <= 0) {
                rc = comp_size;
-               goto close_fd;
+               goto free_layout;
+       }
+
+       ioc->lil_mode = LL_LEASE_WRLCK;
+       ioc->lil_flags = LL_LEASE_RESYNC;
+       rc = llapi_lease_set(fd, ioc);
+       if (rc < 0) {
+               if (rc == -EALREADY)
+                       rc = 0;
+               else
+                       fprintf(stderr,
+                           "%s: '%s' llapi_lease_get_ext resync failed: %s.\n",
+                               progname, fname, strerror(errno));
+               goto free_layout;
        }
 
        idx = 0;
@@ -7326,7 +7325,7 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
                if (rc != LL_LEASE_WRLCK) {
                        fprintf(stderr, "%s: '%s' lost lease lock.\n",
                                progname, fname);
-                       goto close_fd;
+                       goto free_layout;
                }
 
                mirror_id = comp_array[idx].lrc_mirror_id;
@@ -7347,7 +7346,7 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
                        fprintf(stderr, "%s: '%s' llapi_mirror_resync_one: "
                                "%ld.\n", progname, fname, result);
                        rc = result;
-                       goto close_fd;
+                       goto unlock;
                } else if (result > 0) {
                        int j;
 
@@ -7359,6 +7358,7 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
                idx = i;
        }
 
+unlock:
        /* prepare ioc for lease put */
        ioc->lil_mode = LL_LEASE_UNLCK;
        ioc->lil_flags = LL_LEASE_RESYNC_DONE;
@@ -7370,15 +7370,13 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
                }
        }
 
-       llapi_layout_free(layout);
-
        rc = llapi_lease_set(fd, ioc);
        if (rc <= 0) {
                if (rc == 0) /* lost lease lock */
                        rc = -EBUSY;
                fprintf(stderr, "%s: resync file '%s' failed: %s.\n",
                        progname, fname, strerror(errno));
-               goto close_fd;
+               goto free_layout;
        }
        /**
         * llapi_lease_set returns lease mode when it request to unlock
@@ -7386,6 +7384,8 @@ int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
         */
        rc = 0;
 
+free_layout:
+       llapi_layout_free(layout);
 close_fd:
        close(fd);
 error: