Whamcloud - gitweb
LU-15513 lod: skip uninit component in lod_fill_mirrors 35/46435/5
authorAndreas Dilger <adilger@whamcloud.com>
Wed, 2 Feb 2022 22:05:18 +0000 (15:05 -0700)
committerOleg Drokin <green@whamcloud.com>
Sat, 5 Mar 2022 20:43:12 +0000 (20:43 +0000)
Do not iterate over the "objects" in lod_fill_mirrors() to check
for non-rotational OSTs if the component is uninitialized.  In
cases where an OST is not present (e.g. sparse OST indexes used)
the lod_tgt_desc[] array has holes and OST_TGT() returns NULL.

Skip the loop entirely if the component is not initialized, but
also add some sanity checks to verify that the OST index values
are sane in case there are other problems in the future (e.g.
corrupt/invalid layout on disk).

Fixes: 8507472dd37e ("LU-14996 lov: prefer mirrors on non-rotational OSTs")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I8ec23367059a4ec9e483adb768095b24f03ebbe5
Reviewed-on: https://review.whamcloud.com/46435
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lod/lod_lov.c
lustre/tests/conf-sanity.sh

index 47ebff7..4017dca 100644 (file)
@@ -605,17 +605,34 @@ int lod_fill_mirrors(struct lod_object *lo)
        lod_comp = &lo->ldo_comp_entries[0];
 
        for (i = 0; i < lo->ldo_comp_cnt; i++, lod_comp++) {
-               int stale = !!(lod_comp->llc_flags & LCME_FL_STALE);
-               int preferred = !!(lod_comp->llc_flags & LCME_FL_PREF_WR);
+               bool stale = lod_comp->llc_flags & LCME_FL_STALE;
+               bool preferred = lod_comp->llc_flags & LCME_FL_PREF_WR;
+               bool init = lod_comp_inited(lod_comp);
                int j;
 
                pref = 0;
                /* calculate component preference over all used OSTs */
-               for (j = 0; j < lod_comp->llc_stripes_allocated; j++) {
-                       int idx = lod_comp->llc_ost_indices[j];
-                       struct obd_statfs *osfs = &OST_TGT(lod,idx)->ltd_statfs;
+               for (j = 0; init && j < lod_comp->llc_stripes_allocated; j++) {
+                       __u32 idx = lod_comp->llc_ost_indices[j];
+                       struct lod_tgt_desc *ltd;
 
-                       if (osfs->os_state & OS_STATFS_NONROT)
+                       if (unlikely(idx > lod->lod_ost_descs.ltd_tgts_size)) {
+                               CERROR("%s: "DFID" OST idx %u > max %u\n",
+                                      lod2obd(lod)->obd_name,
+                                      PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
+                                      idx, lod->lod_ost_descs.ltd_tgts_size);
+                               continue;
+                       }
+                       ltd = OST_TGT(lod, idx);
+                       if (unlikely(!ltd)) {
+                               CERROR("%s: "DFID" OST idx %u is NULL\n",
+                                      lod2obd(lod)->obd_name,
+                                      PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
+                                      idx);
+                               continue;
+                       }
+
+                       if (ltd->ltd_statfs.os_state & OS_STATFS_NONROT)
                                pref++;
                }
 
@@ -1033,7 +1050,7 @@ int validate_lod_and_idx(struct lod_device *md, __u32 idx)
  * Instantiate objects for stripes.
  *
  * Allocate and initialize LU-objects representing the stripes. The number
- * of the stripes (ldo_stripe_count) must be initialized already. The caller
+ * of the stripes (llc_stripe_count) must be initialized already. The caller
  * must ensure nobody else is calling the function on the object at the same
  * time. FLDB service must be running to be able to map a FID to the targets
  * and find appropriate device representing that target.
index 4e0416e..52b83d1 100644 (file)
@@ -2921,7 +2921,7 @@ test_32g() {
 run_test 32g "flr/dom upgrade test"
 
 test_33a() { # bug 12333, was test_33
-       local FSNAME2=test-123
+       local FSNAME2=test-$testnum
        local MDSDEV=$(mdsdevname ${SINGLEMDS//mds/})
        local mkfsoptions
 
@@ -4764,6 +4764,12 @@ test_56a() {
        echo ok
        $LFS osts
 
+       # test instantiating PFL components with sparse index LU-15513
+       mkdir -p $MOUNT/$tdir
+       $LFS setstripe -E 4M -c 1 -E 1G -c 4 -S4M -E eof -c -1 $MOUNT/$tdir
+       dd if=/dev/zero of=$MOUNT/$tdir/$tfile bs=4K count=1 seek=10k ||
+               error "dd to second component failed"
+
        if [[ "$MDS1_VERSION" -ge $(version_code 2.6.54) ]] ||
           [[ "$MDS1_VERSION" -ge $(version_code 2.5.4) &&
              "$MDS1_VERSION" -lt $(version_code 2.5.11) ]]; then