Whamcloud - gitweb
LU-14956 fld: repeat failed FLDB lookup 23/44723/13
authorAlex Zhuravlev <bzzz@whamcloud.com>
Mon, 23 Aug 2021 07:29:18 +0000 (10:29 +0300)
committerOleg Drokin <green@whamcloud.com>
Mon, 13 Dec 2021 03:53:24 +0000 (03:53 +0000)
it's possible that LWP reconnection is in progress after remote
MDS restart. if FLDB misses an entry, then FLDB lookup can fail
with EAGAIN and whole RPC processing (like MDS_REINT) can fail
as well. try to lookup few times in cases of EAGAIN.

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: Ib6aeaf7706a6465b0c8bee696d985bb440ed192e
Reviewed-on: https://review.whamcloud.com/44723
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/fld/fld_handler.c

index c5fe552..0fbd690 100644 (file)
@@ -263,6 +263,8 @@ int fld_server_lookup(const struct lu_env *env, struct lu_server_fld *fld,
                       fld->lsf_name, seq, -ENOENT);
                RETURN(-ENOENT);
        } else {
+               int i;
+
                if (!fld->lsf_control_exp) {
                        CERROR("%s: lookup %#llx, but not connects to MDT0 yet: rc = %d.\n",
                               fld->lsf_name, seq, -EIO);
@@ -274,8 +276,13 @@ int fld_server_lookup(const struct lu_env *env, struct lu_server_fld *fld,
                 * replication on all mdt servers.
                 */
                range->lsr_start = seq;
-               rc = fld_client_rpc(fld->lsf_control_exp,
-                                   range, FLD_QUERY, NULL);
+               for (i = 0; i < 5; i++) {
+                       rc = fld_client_rpc(fld->lsf_control_exp,
+                                           range, FLD_QUERY, NULL);
+                       if (rc != -EAGAIN)
+                               break;
+                       schedule_timeout_interruptible(cfs_time_seconds(1));
+               }
                if (rc == 0)
                        fld_cache_insert(fld->lsf_cache, range);
        }