Whamcloud - gitweb
LU-17334 lov: handle object created on newly added OST 35/53335/8
authorAndreas Dilger <adilger@whamcloud.com>
Tue, 5 Dec 2023 20:45:44 +0000 (12:45 -0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 20 Dec 2023 01:58:13 +0000 (01:58 +0000)
When a new OST is added to a filesystem without no_create,
then a new object created on the OST relatively quickly
after it is added to the filesystem, in particular because
the new OST would be preferred by QOS space balancing
due to lots of free space. However, it might take a few
seconds for the addition of the new OST to be propagated
across all of the clients, so there is a risk that the MDS
creates file object on OSTs that a client is not yet aware of,
which returns an error to the application immediately.

This patch fixes the issue by adding a loop in lsme_unpack()
that is waiting and retrying for some number of seconds for
the filesystem layout to be updated if either the
"loi->loi_ost_idx >= lov->desc.ld_tgt_count" or "!ltd"
condition is hit.

Change-Id: Idc29b8c66079afaea25428577daf51370fa2b084
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Jian Yu <yujian@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53335
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/lov/lov_ea.c

index 62656c3..17cf85c 100644 (file)
@@ -196,6 +196,7 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
        loff_t lov_bytes;
        u32 magic;
        u32 pattern;
+       time64_t retry_limit = 0;
        unsigned int stripe_count;
        unsigned int i;
        int rc;
@@ -258,7 +259,9 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
 
        for (i = 0; i < stripe_count; i++) {
                struct lov_oinfo *loi;
-               struct lov_tgt_desc *ltd;
+               struct lov_tgt_desc *ltd = NULL;
+               static time64_t next_print;
+               unsigned int level;
 
                OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS);
                if (!loi)
@@ -272,21 +275,41 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
                if (lov_oinfo_is_dummy(loi))
                        continue;
 
-               if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
-                   !lov2obd(lov)->obd_process_conf) {
-                       CERROR("%s: OST index %d more than OST count %d\n",
-                              (char*)lov->desc.ld_uuid.uuid,
-                              loi->loi_ost_idx, lov->desc.ld_tgt_count);
-                       lov_dump_lmm_v1(D_WARNING, lmm);
-                       GOTO(out_lsme, rc = -EINVAL);
-               }
+retry_new_ost:
+               if (unlikely(loi->loi_ost_idx >= lov->desc.ld_tgt_count ||
+                            !(ltd = lov->lov_tgts[loi->loi_ost_idx]))) {
+                       time64_t now = ktime_get_seconds();
+
+                       /* print message on the first hit, error if giving up */
+                       if (retry_limit == 0) {
+                               level = now > next_print ? D_WARNING : D_INFO;
+                               retry_limit = now + RECONNECT_DELAY_MAX;
+                       } else if (now > retry_limit) {
+                               level = D_ERROR;
+                       } else {
+                               level = D_INFO;
+                       }
 
-               ltd = lov->lov_tgts[loi->loi_ost_idx];
-               if (!ltd) {
-                       CERROR("%s: OST index %d missing\n",
-                              (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx);
+                       /* log debug every loop, just to see it is trying */
+                       CDEBUG_LIMIT(level,
+                               loi->loi_ost_idx < lov->desc.ld_tgt_count ?
+                               "%s: FID "DOSTID" OST index %d/%u missing\n" :
+                               "%s: FID "DOSTID" OST index %d more than OST count %u\n",
+                               lov->desc.ld_uuid.uuid, POSTID(&loi->loi_oi),
+                               loi->loi_ost_idx, lov->desc.ld_tgt_count);
+                       if (now > next_print) {
+                               LCONSOLE_INFO("%s: wait %ds while client connects to new OST\n",
+                                             lov->desc.ld_uuid.uuid,
+                                             (int)(retry_limit - now));
+                               next_print = retry_limit + 600;
+                       }
+                       if (now < retry_limit) {
+                               rc = schedule_timeout_interruptible(cfs_time_seconds(1));
+                               if (rc == 0)
+                                       goto retry_new_ost;
+                       }
                        lov_dump_lmm_v1(D_WARNING, lmm);
-                       continue;
+                       GOTO(out_lsme, rc = -EINVAL);
                }
 
                lov_bytes = lov_tgt_maxbytes(ltd);