summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
0feec5a)
Timeout-ed rpc sent by fld_client_rpc() may lead to client operation
failure.
Have fld_client_rpc() to repeat rpc in case of EAGAIN after a while.
Test to illustrate the issue is added.
Typo in fld_client_rpc() in failure simulation is fixed.
recovery-small.sh:test_110k() is changed so that fld_client_rpc()
failed only once, otherwise it would fall into endless loop.
HPE-bug-id: LUS-8652
Fixes:
e3f6111dfd1c ("LU-11761 fld: lets caller to retry FLD_QUERY")
Signed-off-by: Vladimir Saveliev <vlaidimir.saveliev@hpe.com>
Change-Id: I145e719ec2fb5f5dbf9b5aa4b2a5b7e62f98c19f
Reviewed-on: https://review.whamcloud.com/38302
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
do {
rc = fld_client_rpc(fld->lsf_control_exp, range, FLD_READ,
&req);
do {
rc = fld_client_rpc(fld->lsf_control_exp, range, FLD_READ,
&req);
- if (rc != 0 && rc != -EAGAIN)
GOTO(out, rc);
LASSERT(req != NULL);
GOTO(out, rc);
LASSERT(req != NULL);
#include <libcfs/libcfs.h>
#include <linux/module.h>
#include <linux/math64.h>
#include <libcfs/libcfs.h>
#include <linux/module.h>
#include <linux/math64.h>
+#include <linux/delay.h>
#include <obd.h>
#include <obd_class.h>
#include <obd.h>
#include <obd_class.h>
LASSERT(exp != NULL);
imp = class_exp2cliimp(exp);
LASSERT(exp != NULL);
imp = class_exp2cliimp(exp);
switch (fld_op) {
case FLD_QUERY:
req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY,
switch (fld_op) {
case FLD_QUERY:
req = ptlrpc_request_alloc_pack(imp, &RQF_FLD_QUERY,
req->rq_reply_portal = MDC_REPLY_PORTAL;
ptlrpc_at_set_req_timeout(req);
req->rq_reply_portal = MDC_REPLY_PORTAL;
ptlrpc_at_set_req_timeout(req);
- if (OBD_FAIL_CHECK(OBD_FAIL_FLD_QUERY_REQ && req->rq_no_delay)) {
+ if (OBD_FAIL_CHECK(OBD_FAIL_FLD_QUERY_REQ) && req->rq_no_delay) {
/* the same error returned by ptlrpc_import_delay_req */
rc = -EAGAIN;
req->rq_status = rc;
/* the same error returned by ptlrpc_import_delay_req */
rc = -EAGAIN;
req->rq_status = rc;
imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS &&
OCD_HAS_FLAG(&imp->imp_connect_data, LIGHTWEIGHT) &&
rc != -ENOTSUPP) {
imp->imp_connect_flags_orig & OBD_CONNECT_MDS_MDS &&
OCD_HAS_FLAG(&imp->imp_connect_data, LIGHTWEIGHT) &&
rc != -ENOTSUPP) {
- /*
- * Since LWP is not replayable, so notify the caller
- * to retry if needed after a while.
- */
+ /* LWP is not replayable, retry after a while */
+ if (rc == -EAGAIN) {
+ ptlrpc_req_finished(req);
+ if (msleep_interruptible(2 * MSEC_PER_SEC))
+ GOTO(out_req, rc = -EINTR);
+ rc = 0;
+ goto again;
+ }
stop mds2 || error "stop mds2 failed"
#define OBD_FAIL_FLD_QUERY_REQ 0x1103
stop mds2 || error "stop mds2 failed"
#define OBD_FAIL_FLD_QUERY_REQ 0x1103
- do_facet mds2 lctl set_param fail_loc=0x1103
+ do_facet mds2 lctl set_param fail_loc=0x80001103
local OPTS="$MDS_MOUNT_OPTS -o abort_recovery"
start mds2 $(mdsdevname 2) $OPTS ||
error "start MDS with abort_recovery should succeed"
local OPTS="$MDS_MOUNT_OPTS -o abort_recovery"
start mds2 $(mdsdevname 2) $OPTS ||
error "start MDS with abort_recovery should succeed"
}
run_test 24G "migrate symlink in rename"
}
run_test 24G "migrate symlink in rename"
+test_24H() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
+ [[ $(hostname) != $(facet_active_host mds2) ]] ||
+ skip "MDT1 should be on another node"
+
+ test_mkdir -i 1 -c 1 $DIR/$tdir
+#define OBD_FAIL_FLD_QUERY_REQ 0x1103
+ do_facet mds2 $LCTL set_param fail_loc=0x80001103
+ touch $DIR/$tdir/$tfile || error "touch failed"
+}
+run_test 24H "repeat FLD_QUERY rpc"
+
test_25a() {
echo '== symlink sanity ============================================='
test_25a() {
echo '== symlink sanity ============================================='