Whamcloud - gitweb
LU-13730 lod: don't confuse stale with primary flag 03/42003/7
authorAlex Zhuravlev <bzzz@whamcloud.com>
Thu, 11 Mar 2021 05:47:34 +0000 (08:47 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 13 Mar 2021 18:33:09 +0000 (18:33 +0000)
there can be few in-sync replicas which are not primry.

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I8b984463a2665bc88f2f76247df5366a68d74ea6
Reviewed-on: https://review.whamcloud.com/42003
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Yingjin Qian <qian@ddn.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lod/lod_object.c
lustre/tests/sanity-flr.sh

index 660dac0..f418cd1 100644 (file)
@@ -7459,18 +7459,32 @@ static int lod_declare_update_write_pending(const struct lu_env *env,
        for (i = 0; i < lo->ldo_mirror_count; i++) {
                if (lo->ldo_mirrors[i].lme_stale)
                        continue;
+               if (lo->ldo_mirrors[i].lme_primary == 0)
+                       continue;
 
-               LASSERTF(primary < 0, DFID " has multiple primary: %u / %u\n",
-                        PFID(lod_object_fid(lo)),
-                        lo->ldo_mirrors[i].lme_id,
-                        lo->ldo_mirrors[primary].lme_id);
+               if (unlikely(primary >= 0)) {
+                       CERROR(DFID " has multiple primary: %u / %u\n",
+                              PFID(lod_object_fid(lo)),
+                              lo->ldo_mirrors[i].lme_id,
+                              lo->ldo_mirrors[primary].lme_id);
+                       RETURN(-EIO);
+               }
 
                primary = i;
        }
        if (primary < 0) {
-               CERROR(DFID ": doesn't have a primary mirror\n",
-                      PFID(lod_object_fid(lo)));
-               GOTO(out, rc = -ENODATA);
+               /* no primary, use any in-sync */
+               for (i = 0; i < lo->ldo_mirror_count; i++) {
+                       if (lo->ldo_mirrors[i].lme_stale)
+                               continue;
+                       primary = i;
+                       break;
+               }
+               if (primary < 0) {
+                       CERROR(DFID ": doesn't have a primary mirror\n",
+                              PFID(lod_object_fid(lo)));
+                       GOTO(out, rc = -ENODATA);
+               }
        }
 
        CDEBUG(D_LAYOUT, DFID": found primary %u\n",
index 9c7329a..2ee60c8 100644 (file)
@@ -3069,6 +3069,47 @@ function test_206() {
 }
 run_test 206 "lfs setstripe -pool .. --comp-flags=.. "
 
+test_207() {
+       local file=$DIR/$tfile
+       local tmpfile=$DIR/$tfile-tt
+
+       [ $MDS1_VERSION -lt $(version_code 2.14.50) ] &&
+               skip "Need MDS version at least 2.14.50"
+
+       stack_trap "rm -f $tmpfile $file"
+
+       # generate data for verification
+       dd if=/dev/urandom of=$tmpfile bs=1M count=1 ||
+               error "can't generate file with random data"
+
+       # create a mirrored file with one stale replica
+       $LFS mirror create -N -S 4M -c 2 -N -S 1M -c -1 $file ||
+               error "create mirrored file $file failed"
+       get_mirror_ids $file
+       echo "mirror IDs: ${mirror_array[@]}"
+
+       dd if=$tmpfile of=$file bs=1M || error "can't copy"
+       get_mirror_ids $file
+       echo "mirror IDs: ${mirror_array[@]}"
+
+       drop_client_cache
+       cmp $tmpfile $file || error "files don't match"
+       get_mirror_ids $file
+       echo "mirror IDs: ${mirror_array[@]}"
+
+       # mirror creation should work fine
+       $LFS mirror extend -N -S 8M -c -1 $file ||
+               error "mirror extend $file failed"
+
+       get_mirror_ids $file
+       echo "mirror IDs: ${mirror_array[@]}"
+
+       drop_client_cache
+       $LFS mirror verify -v $file || error "verification failed"
+       cmp $tmpfile $file || error "files don't match"
+}
+run_test 207 "create another replica with existing out-of-sync one"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status