Whamcloud - gitweb
LU-15513 lod: skip uninit component in lod_fill_mirrors
authorAndreas Dilger <adilger@whamcloud.com>
Wed, 2 Feb 2022 22:05:18 +0000 (15:05 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Wed, 9 Mar 2022 17:09:54 +0000 (17:09 +0000)
Do not iterate over the "objects" in lod_fill_mirrors() to check
for non-rotational OSTs if the component is uninitialized.  In
cases where an OST is not present (e.g. sparse OST indexes used)
the lod_tgt_desc[] array has holes and OST_TGT() returns NULL.

Skip the loop entirely if the component is not initialized, but
also add some sanity checks to verify that the OST index values
are sane in case there are other problems in the future (e.g.
corrupt/invalid layout on disk).

Lustre-change: https://review.whamcloud.com/46435
Lustre-commit: 591a990c617f9b953d2e838427d45fa1de061a83

Fixes: 8507472dd37e ("LU-14996 lov: prefer mirrors on non-rotational OSTs")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I8ec23367059a4ec9e483adb768095b24f03ebbe5
Reviewed-on: https://review.whamcloud.com/46437
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
lustre/lod/lod_lov.c
lustre/tests/conf-sanity.sh

index dad0b8f..b3ccfe7 100644 (file)
@@ -609,18 +609,36 @@ int lod_fill_mirrors(struct lod_object *lo)
        lod_comp = &lo->ldo_comp_entries[0];
 
        for (i = 0; i < lo->ldo_comp_cnt; i++, lod_comp++) {
-               int stale = !!(lod_comp->llc_flags & LCME_FL_STALE);
-               int preferred = !!(lod_comp->llc_flags & LCME_FL_PREF_WR);
-               int mirror_hsm = !!(lod_is_hsm(lod_comp));
+               bool stale = !!(lod_comp->llc_flags & LCME_FL_STALE);
+               bool preferred = !!(lod_comp->llc_flags & LCME_FL_PREF_WR);
+               bool mirror_hsm = !!(lod_is_hsm(lod_comp));
+               bool init = lod_comp_inited(lod_comp);
+
                int j;
 
                pref = 0;
                /* calculate component preference over all used OSTs */
-               for (j = 0; j < lod_comp->llc_stripes_allocated; j++) {
-                       int idx = lod_comp->llc_ost_indices[j];
-                       struct obd_statfs *osfs = &OST_TGT(lod,idx)->ltd_statfs;
+               for (j = 0; init && j < lod_comp->llc_stripes_allocated; j++) {
+                       __u32 idx = lod_comp->llc_ost_indices[j];
+                       struct lod_tgt_desc *ltd;
+
+                       if (unlikely(idx > lod->lod_ost_descs.ltd_tgts_size)) {
+                               CERROR("%s: "DFID" OST idx %u > max %u\n",
+                                      lod2obd(lod)->obd_name,
+                                      PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
+                                      idx, lod->lod_ost_descs.ltd_tgts_size);
+                               continue;
+                       }
+                       ltd = OST_TGT(lod, idx);
+                       if (unlikely(!ltd)) {
+                               CERROR("%s: "DFID" OST idx %u is NULL\n",
+                                      lod2obd(lod)->obd_name,
+                                      PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
+                                      idx);
+                               continue;
+                       }
 
-                       if (osfs->os_state & OS_STATFS_NONROT)
+                       if (ltd->ltd_statfs.os_state & OS_STATFS_NONROT)
                                pref++;
                }
 
@@ -1082,7 +1100,7 @@ static int validate_lod_and_idx(struct lod_device *md, __u32 idx)
  * Instantiate objects for stripes.
  *
  * Allocate and initialize LU-objects representing the stripes. The number
- * of the stripes (ldo_stripe_count) must be initialized already. The caller
+ * of the stripes (llc_stripe_count) must be initialized already. The caller
  * must ensure nobody else is calling the function on the object at the same
  * time. FLDB service must be running to be able to map a FID to the targets
  * and find appropriate device representing that target.
index a94f674..d13becc 100644 (file)
@@ -2527,7 +2527,7 @@ test_32e() {
 run_test 32e "dom upgrade test"
 
 test_33a() { # bug 12333, was test_33
-       local FSNAME2=test-123
+       local FSNAME2=test-$testnum
        local MDSDEV=$(mdsdevname ${SINGLEMDS//mds/})
        local mkfsoptions
 
@@ -4370,6 +4370,12 @@ test_56a() {
        echo ok
        $LFS osts
 
+       # test instantiating PFL components with sparse index LU-15513
+       mkdir -p $MOUNT/$tdir
+       $LFS setstripe -E 4M -c 1 -E 1G -c 4 -S4M -E eof -c -1 $MOUNT/$tdir
+       dd if=/dev/zero of=$MOUNT/$tdir/$tfile bs=4K count=1 seek=10k ||
+               error "dd to second component failed"
+
        if [[ "$MDS1_VERSION" -ge $(version_code 2.6.54) ]] ||
           [[ "$MDS1_VERSION" -ge $(version_code 2.5.4) &&
              "$MDS1_VERSION" -lt $(version_code 2.5.11) ]]; then