Whamcloud - gitweb
LU-17334 lov: handle object created on newly added OST
authorAndreas Dilger <adilger@whamcloud.com>
Wed, 6 Dec 2023 18:32:57 +0000 (10:32 -0800)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 7 Dec 2023 11:11:04 +0000 (11:11 +0000)
When a new OST is added to a filesystem without no_create,
then a new object created on the OST relatively quickly
after it is added to the filesystem, in particular because
the new OST would be preferred by QOS space balancing
due to lots of free space. However, it might take a few
seconds for the addition of the new OST to be propagated
across all of the clients, so there is a risk that the MDS
creates file object on OSTs that a client is not yet aware of,
which returns an error to the application immediately.

This patch fixes the issue by adding a loop in lsme_unpack()
that is waiting and retrying for some number of seconds for
the filesystem layout to be updated if either the
"loi->loi_ost_idx >= lov->desc.ld_tgt_count" or "!ltd"
condition is hit.

Lustre-change: https://review.whamcloud.com/53335
Lustre-commit: TBD (from e1de624373ce6082253ddbdd987d36eb56ca6490)

Change-Id: Idc29b8c66079afaea25428577daf51370fa2b084
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Jian Yu <yujian@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53353
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/lov/lov_ea.c

index 1fe52c1..b1546e1 100644 (file)
@@ -206,6 +206,7 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
        loff_t lov_bytes;
        u32 magic;
        u32 pattern;
+       time64_t retry_limit = 0;
        unsigned int stripe_count;
        unsigned int i;
        int rc;
@@ -268,7 +269,9 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
 
        for (i = 0; i < stripe_count; i++) {
                struct lov_oinfo *loi;
-               struct lov_tgt_desc *ltd;
+               struct lov_tgt_desc *ltd = NULL;
+               static time64_t next_print;
+               unsigned int level;
 
                OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS);
                if (!loi)
@@ -282,21 +285,41 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
                if (lov_oinfo_is_dummy(loi))
                        continue;
 
-               if (loi->loi_ost_idx >= lov->desc.ld_tgt_count &&
-                   !lov2obd(lov)->obd_process_conf) {
-                       CERROR("%s: OST index %d more than OST count %d\n",
-                              (char*)lov->desc.ld_uuid.uuid,
-                              loi->loi_ost_idx, lov->desc.ld_tgt_count);
-                       lov_dump_lmm_v1(D_WARNING, lmm);
-                       GOTO(out_lsme, rc = -EINVAL);
-               }
+retry_new_ost:
+               if (unlikely(loi->loi_ost_idx >= lov->desc.ld_tgt_count ||
+                            !(ltd = lov->lov_tgts[loi->loi_ost_idx]))) {
+                       time64_t now = ktime_get_seconds();
+
+                       /* print message on the first hit, error if giving up */
+                       if (retry_limit == 0) {
+                               level = now > next_print ? D_WARNING : D_INFO;
+                               retry_limit = now + RECONNECT_DELAY_MAX;
+                       } else if (now > retry_limit) {
+                               level = D_ERROR;
+                       } else {
+                               level = D_INFO;
+                       }
 
-               ltd = lov->lov_tgts[loi->loi_ost_idx];
-               if (!ltd) {
-                       CERROR("%s: OST index %d missing\n",
-                              (char*)lov->desc.ld_uuid.uuid, loi->loi_ost_idx);
+                       /* log debug every loop, just to see it is trying */
+                       CDEBUG_LIMIT(level,
+                               loi->loi_ost_idx < lov->desc.ld_tgt_count ?
+                               "%s: FID "DOSTID" OST index %d/%u missing\n" :
+                               "%s: FID "DOSTID" OST index %d more than OST count %u\n",
+                               lov->desc.ld_uuid.uuid, POSTID(&loi->loi_oi),
+                               loi->loi_ost_idx, lov->desc.ld_tgt_count);
+                       if (now > next_print) {
+                               LCONSOLE_INFO("%s: wait %ds while client connects to new OST\n",
+                                             lov->desc.ld_uuid.uuid,
+                                             (int)(retry_limit - now));
+                               next_print = retry_limit + 600;
+                       }
+                       if (now < retry_limit) {
+                               rc = schedule_timeout_interruptible(cfs_time_seconds(1));
+                               if (rc == 0)
+                                       goto retry_new_ost;
+                       }
                        lov_dump_lmm_v1(D_WARNING, lmm);
-                       continue;
+                       GOTO(out_lsme, rc = -EINVAL);
                }
 
                lov_bytes = lov_tgt_maxbytes(ltd);