From 978458e05db4cad21e3ee32384168f53fd3e2d72 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Fri, 14 Aug 2015 11:01:50 +0800 Subject: [PATCH] LU-7190 lfsck: tolerate MDT-OST communication failures During the 2nd phase scanning, the layout LFSCK slave engine on the OST will query the master engine status from the MDT periodically. Sometimes, the query RPC may hit failure that may because network trouble, or the MDS node issues. To make the LFSCK can go ahead, the slave engine will not wait for ever, instead, it will assume the master engine has exited without notifying (or fail to notify) the slave engine. So the slave engine will exit also and clean up the LFSCK environment on the OST, including the OST-object access bitmap that is used to find out orphan OST-objects. On the other hand, the assumption of master engine exit maybe wrong. If the master engine does not exit, and the network trouble between the MDS and OSS recovered after the slave engine exited, then the master engine will try to find out orphan OST-objects during its 2nd phase scanning. But because the slave engine has already exited and released the OST-object access bitmap, the master engine has no way to find out orphan OST-objects. To avoid above trouble, we make some compromise: when the slave engine on the OST failed to query the master engine status, it will not exit at once, instead, it will try several times. If the network trouble can recover during such interval, the LFSCK will go ahead; otherwise, the slave engine will exit as original does. Signed-off-by: Fan Yong Change-Id: Ifa06552c61d885297a54ab6bfdc92d48c8f56fa3 Reviewed-on: http://review.whamcloud.com/16667 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/lfsck/lfsck_layout.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 903b4d3..0206c5b 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -72,6 +72,8 @@ struct lfsck_layout_slave_target { __u64 llst_gen; atomic_t llst_ref; __u32 llst_index; + /* How many times we have failed to get the master status. */ + int llst_failures; }; struct lfsck_layout_slave_data { @@ -3442,11 +3444,22 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, bool done = false; if (rc != 0) { - /* It is quite probably caused by target crash, - * to make the LFSCK can go ahead, assume that - * the target finished the LFSCK prcoessing. */ - done = true; + /* It is probably caused by network trouble, or target crash, + * it will try several times (depends on the obd_timeout, and + * will not less than 3 times). But to make the LFSCK can go + * ahead, we should not try for ever. After some try but still + * hit failure, it will assume that the target exit the LFSCK + * prcoessing and stop try. */ + if (rc == -ENOTCONN || rc == -ESHUTDOWN) { + int max_try = max_t(int, obd_timeout / 30, 3); + + if (++(llst->llst_failures) > max_try) + done = true; + } else { + done = true; + } } else { + llst->llst_failures = 0; lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY); if (lr->lr_status != LS_SCANNING_PHASE1 && lr->lr_status != LS_SCANNING_PHASE2) @@ -3455,8 +3468,9 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env, if (done) { CDEBUG(D_LFSCK, "%s: layout LFSCK slave gets the MDT %x " - "status %d\n", lfsck_lfsck2name(com->lc_lfsck), - llst->llst_index, lr != NULL ? lr->lr_status : rc); + "status %d, failures_try %d\n", lfsck_lfsck2name(com->lc_lfsck), + llst->llst_index, lr != NULL ? lr->lr_status : rc, + llst->llst_failures); lfsck_layout_llst_del(llsd, llst); } -- 1.8.3.1