/** RPC stages */
enum rq_phase {
- RQ_PHASE_NEW = 0xebc0de00,
- RQ_PHASE_RPC = 0xebc0de01,
- RQ_PHASE_BULK = 0xebc0de02,
- RQ_PHASE_INTERPRET = 0xebc0de03,
- RQ_PHASE_COMPLETE = 0xebc0de04,
- RQ_PHASE_UNREG_RPC = 0xebc0de05,
- RQ_PHASE_UNREG_BULK = 0xebc0de06,
- RQ_PHASE_UNDEFINED = 0xebc0de07
+ RQ_PHASE_NEW = 0xebc0de00,
+ RQ_PHASE_RPC = 0xebc0de01,
+ RQ_PHASE_BULK = 0xebc0de02,
+ RQ_PHASE_INTERPRET = 0xebc0de03,
+ RQ_PHASE_COMPLETE = 0xebc0de04,
+ RQ_PHASE_UNREGISTERING = 0xebc0de05,
+ RQ_PHASE_UNDEFINED = 0xebc0de06
};
/** Type of request interpreter call-back */
time_t cr_reply_deadline;
/** when req bulk unlink must finish. */
time_t cr_bulk_deadline;
- /** when req unlink must finish. */
- time_t cr_req_deadline;
/** Portal to which this request would be sent */
short cr_req_ptl;
/** Portal where to wait for reply and where reply would be sent */
#define rq_real_sent rq_cli.cr_sent_out
#define rq_reply_deadline rq_cli.cr_reply_deadline
#define rq_bulk_deadline rq_cli.cr_bulk_deadline
-#define rq_req_deadline rq_cli.cr_req_deadline
#define rq_nr_resend rq_cli.cr_resend_nr
#define rq_request_portal rq_cli.cr_req_ptl
#define rq_reply_portal rq_cli.cr_rep_ptl
static inline const char *
ptlrpc_phase2str(enum rq_phase phase)
{
- switch (phase) {
- case RQ_PHASE_NEW:
- return "New";
- case RQ_PHASE_RPC:
- return "Rpc";
- case RQ_PHASE_BULK:
- return "Bulk";
- case RQ_PHASE_INTERPRET:
- return "Interpret";
- case RQ_PHASE_COMPLETE:
- return "Complete";
- case RQ_PHASE_UNREG_RPC:
- return "UnregRPC";
- case RQ_PHASE_UNREG_BULK:
- return "UnregBULK";
- default:
- return "?Phase?";
- }
+ switch (phase) {
+ case RQ_PHASE_NEW:
+ return "New";
+ case RQ_PHASE_RPC:
+ return "Rpc";
+ case RQ_PHASE_BULK:
+ return "Bulk";
+ case RQ_PHASE_INTERPRET:
+ return "Interpret";
+ case RQ_PHASE_COMPLETE:
+ return "Complete";
+ case RQ_PHASE_UNREGISTERING:
+ return "Unregistering";
+ default:
+ return "?Phase?";
+ }
}
/**
#define FLAG(field, str) (field ? str : "")
/** Convert bit flags into a string */
-#define DEBUG_REQ_FLAGS(req) \
- ptlrpc_rqphase2str(req), \
- FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
- FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \
- FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
- FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
- FLAG(req->rq_no_resend, "N"), \
- FLAG(req->rq_waiting, "W"), \
- FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \
- FLAG(req->rq_committed, "M")
-
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s"
+#define DEBUG_REQ_FLAGS(req) \
+ ptlrpc_rqphase2str(req), \
+ FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
+ FLAG(req->rq_err, "E"), \
+ FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
+ FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
+ FLAG(req->rq_no_resend, "N"), \
+ FLAG(req->rq_waiting, "W"), \
+ FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \
+ FLAG(req->rq_committed, "M")
+
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
void _debug_req(struct ptlrpc_request *req,
struct libcfs_debug_msg_data *data, const char *fmt, ...)
struct ptlrpc_bulk_desc *desc;
int rc;
- LASSERT(req != NULL);
+ LASSERT(req != NULL);
desc = req->rq_bulk;
- if (req->rq_bulk_deadline > cfs_time_current_sec())
- return 1;
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
+ req->rq_bulk_deadline > cfs_time_current_sec())
+ return 1;
- if (!desc)
- return 0;
+ if (!desc)
+ return 0;
spin_lock(&desc->bd_lock);
rc = desc->bd_md_count;
if (req->rq_phase == new_phase)
return;
- if (new_phase == RQ_PHASE_UNREG_RPC ||
- new_phase == RQ_PHASE_UNREG_BULK) {
- /* No embedded unregistering phases */
- if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
- req->rq_phase == RQ_PHASE_UNREG_BULK)
- return;
-
+ if (new_phase == RQ_PHASE_UNREGISTERING) {
req->rq_next_phase = req->rq_phase;
if (req->rq_import)
atomic_inc(&req->rq_import->imp_unregistering);
}
- if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
- req->rq_phase == RQ_PHASE_UNREG_BULK) {
+ if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
if (req->rq_import)
atomic_dec(&req->rq_import->imp_unregistering);
}
static inline int
ptlrpc_client_early(struct ptlrpc_request *req)
{
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec())
+ return 0;
return req->rq_early;
}
static inline int
ptlrpc_client_replied(struct ptlrpc_request *req)
{
- if (req->rq_reply_deadline > cfs_time_current_sec())
- return 0;
- return req->rq_replied;
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec())
+ return 0;
+ return req->rq_replied;
}
/** Returns true if request \a req is in process of receiving server reply */
static inline int
ptlrpc_client_recv(struct ptlrpc_request *req)
{
- if (req->rq_reply_deadline > cfs_time_current_sec())
- return 1;
- return req->rq_receiving_reply;
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec())
+ return 1;
+ return req->rq_receiving_reply;
}
static inline int
int rc;
spin_lock(&req->rq_lock);
- if (req->rq_reply_deadline > cfs_time_current_sec()) {
- spin_unlock(&req->rq_lock);
- return 1;
- }
- if (req->rq_req_deadline > cfs_time_current_sec()) {
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ req->rq_reply_deadline > cfs_time_current_sec()) {
spin_unlock(&req->rq_lock);
return 1;
}
-
rc = !req->rq_req_unlinked || !req->rq_reply_unlinked ||
req->rq_receiving_reply;
spin_unlock(&req->rq_lock);
#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515
#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516
#define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517
-#define OBD_FAIL_PTLRPC_DROP_BULK 0x51a
-#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
-#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB3 0x520
#define OBD_FAIL_OBD_PING_NET 0x600
request->rq_reply_cbid.cbid_arg = request;
request->rq_reply_deadline = 0;
- request->rq_bulk_deadline = 0;
- request->rq_req_deadline = 0;
request->rq_phase = RQ_PHASE_NEW;
request->rq_next_phase = RQ_PHASE_UNDEFINED;
lustre_msg_set_opc(request->rq_reqmsg, opcode);
ptlrpc_assign_next_xid(request);
- /* Let's setup deadline for req/reply/bulk unlink for opcode. */
- if (cfs_fail_val == opcode) {
- time_t *fail_t = NULL, *fail2_t = NULL;
-
- if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
- fail_t = &request->rq_bulk_deadline;
- else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
- fail_t = &request->rq_reply_deadline;
- else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
- fail_t = &request->rq_req_deadline;
- else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) {
- fail_t = &request->rq_reply_deadline;
- fail2_t = &request->rq_bulk_deadline;
- }
-
- if (fail_t) {
- *fail_t = cfs_time_current_sec() + LONG_UNLINK;
-
- if (fail2_t)
- *fail2_t = cfs_time_current_sec() + LONG_UNLINK;
-
- /* The RPC is infected, let the test to change the
- * fail_loc */
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(cfs_time_seconds(2));
- set_current_state(TASK_RUNNING);
- }
- }
-
RETURN(0);
out_ctx:
if (!(req->rq_phase == RQ_PHASE_RPC ||
req->rq_phase == RQ_PHASE_BULK ||
req->rq_phase == RQ_PHASE_INTERPRET ||
- req->rq_phase == RQ_PHASE_UNREG_RPC ||
- req->rq_phase == RQ_PHASE_UNREG_BULK)) {
- DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
- LBUG();
- }
+ req->rq_phase == RQ_PHASE_UNREGISTERING)) {
+ DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
+ LBUG();
+ }
- if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
- req->rq_phase == RQ_PHASE_UNREG_BULK) {
- LASSERT(req->rq_next_phase != req->rq_phase);
- LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
-
- if (req->rq_req_deadline &&
- !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
- req->rq_req_deadline = 0;
- if (req->rq_reply_deadline &&
- !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
- req->rq_reply_deadline = 0;
- if (req->rq_bulk_deadline &&
- !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
- req->rq_bulk_deadline = 0;
+ if (req->rq_phase == RQ_PHASE_UNREGISTERING) {
+ LASSERT(req->rq_next_phase != req->rq_phase);
+ LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
- /*
- * Skip processing until reply is unlinked. We
- * can't return to pool before that and we can't
- * call interpret before that. We need to make
- * sure that all rdma transfers finished and will
- * not corrupt any data.
- */
- if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
- ptlrpc_client_recv_or_unlink(req))
- continue;
- if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
- ptlrpc_client_bulk_active(req))
- continue;
+ /*
+ * Skip processing until reply is unlinked. We
+ * can't return to pool before that and we can't
+ * call interpret before that. We need to make
+ * sure that all rdma transfers finished and will
+ * not corrupt any data.
+ */
+ if (ptlrpc_client_recv_or_unlink(req) ||
+ ptlrpc_client_bulk_active(req))
+ continue;
/*
* Turn fail_loc off to prevent it from looping
continue;
if (req->rq_phase != RQ_PHASE_RPC &&
- req->rq_phase != RQ_PHASE_UNREG_RPC &&
+ req->rq_phase != RQ_PHASE_UNREGISTERING &&
!req->rq_allow_intr)
continue;
*/
LASSERT(!in_interrupt());
- /* Let's setup deadline for reply unlink. */
- if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
- async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
- request->rq_reply_deadline =
- cfs_time_current_sec() + LONG_UNLINK;
+ /*
+ * Let's setup deadline for reply unlink.
+ */
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
+ async && request->rq_reply_deadline == 0)
+ request->rq_reply_deadline = cfs_time_current_sec()+LONG_UNLINK;
/*
* Nothing left to do.
if (!ptlrpc_client_recv_or_unlink(request))
RETURN(1);
- /* Move to "Unregistering" phase as reply was not unlinked yet. */
- ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC);
+ /*
+ * Move to "Unregistering" phase as reply was not unlinked yet.
+ */
+ ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING);
/*
* Do not wait for unlink to finish.
req->rq_timeout = obd_timeout;
req->rq_sent = cfs_time_current_sec();
req->rq_deadline = req->rq_sent + req->rq_timeout;
+ req->rq_reply_deadline = req->rq_deadline;
req->rq_phase = RQ_PHASE_INTERPRET;
req->rq_next_phase = RQ_PHASE_COMPLETE;
req->rq_xid = ptlrpc_next_xid();
"still on delayed list");
}
- CERROR("%s: Unregistering RPCs found (%d). "
+ CERROR("%s: RPCs in \"%s\" phase found (%d). "
"Network is sluggish? Waiting them "
"to error out.\n", cli_tgt,
+ ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
atomic_read(&imp->imp_unregistering));
}
spin_unlock(&imp->imp_lock);
/* Let's setup deadline for reply unlink. */
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
- async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
+ async && req->rq_bulk_deadline == 0)
req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK;
if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
RETURN(1); /* never registered */
- /* Move to "Unregistering" phase as bulk was not unlinked yet. */
- ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);
+ /* Move to "Unregistering" phase as bulk was not unlinked yet. */
+ ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING);
- /* Do not wait for unlink to finish. */
- if (async)
- RETURN(0);
+ /* Do not wait for unlink to finish. */
+ if (async)
+ RETURN(0);
for (;;) {
/* The wq argument is ignored by user-space wait_event macros */
/* Check if client was evicted while we were doing i/o before touching
* network */
if (likely(rc == 0 &&
- !CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2) &&
- !CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_BULK))) {
+ !CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2))) {
rc = target_bulk_io(exp, desc, &lwi);
no_reply = rc != 0;
}
df -h $MOUNT &
log "sleep 60 sec"
sleep 60
-#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
- do_facet client "$LCTL set_param fail_loc=0x50f fail_val=0"
+ #define OBD_FAIL_PTLRPC_LONG_UNLINK 0x50f
+ do_facet client "$LCTL set_param fail_loc=0x50f"
log "sleep 10 sec"
sleep 10
manual_umount_client --force || error "manual_umount_client failed"
}
run_test 112a "bulk resend while orignal request is in progress"
-test_115_read() {
- local fail1=$1
- local fail2=$2
-
- df $DIR
- dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1
- cancel_lru_locks osc
-
- # OST_READ = 3,
- $LCTL set_param fail_loc=$fail1 fail_val=3
- dd of=/dev/null if=$DIR/$tfile bs=4096 count=1 &
- pid=$!
- sleep 1
-
- set_nodes_failloc "$(osts_nodes)" $fail2
-
- wait $pid || error "dd failed"
- return 0
-}
-
-test_115_write() {
- local fail1=$1
- local fail2=$2
- local error=$3
-
- df $DIR
- touch $DIR/$tfile
-
- # OST_WRITE = 4,
- $LCTL set_param fail_loc=$fail1 fail_val=4
- dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 oflag=dsync &
- pid=$!
- sleep 1
-
- df $MOUNT
- set_nodes_failloc "$(osts_nodes)" $fail2
-
- wait $pid
- rc=$?
- [ $error -eq 0 ] && [ $rc -ne 0 ] && error "dd error ($rc)"
- [ $error -ne 0 ] && [ $rc -eq 0 ] && error "dd success"
- return 0
-}
-
-test_115a() {
- [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] &&
- skip "need at least 2.8.50 on OST" && return 0
-
- #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
- #define OBD_FAIL_PTLRPC_DROP_BULK 0x51a
- test_115_read 0x8000051b 0x8000051a
-}
-run_test 115a "read: late REQ MDunlink and no bulk"
-
-test_115b() {
- [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] &&
- skip "need at least 2.8.50 on OST" && return 0
-
- #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
- #define OBD_FAIL_OST_ENOSPC 0x215
- test_115_write 0x8000051b 0x80000215 1
-}
-run_test 115b "write: late REQ MDunlink and no bulk"
-
-test_115c() {
- [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] &&
- skip "need at least 2.8.50 on OST" && return 0
-
- #define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
- #define OBD_FAIL_PTLRPC_DROP_BULK 0x51a
- test_115_read 0x8000050f 0x8000051a
-}
-run_test 115c "read: late Reply MDunlink and no bulk"
-
-test_115d() {
- [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] &&
- skip "need at least 2.8.50 on OST" && return 0
-
- #define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f
- #define OBD_FAIL_OST_ENOSPC 0x215
- test_115_write 0x8000050f 0x80000215 0
-}
-run_test 115d "write: late Reply MDunlink and no bulk"
-
-test_115e() {
- [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] &&
- skip "need at least 2.8.50 on OST" && return 0
-
- #define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510
- #define OBD_FAIL_OST_ALL_REPLY_NET 0x211
- test_115_read 0x80000510 0x80000211
-}
-run_test 115e "read: late Bulk MDunlink and no reply"
-
-test_115f() {
- [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] &&
- skip "need at least 2.8.50 on OST" && return 0
-
- #define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
- #define OBD_FAIL_OST_ALL_REPLY_NET 0x211
- test_115_read 0x8000051b 0x80000211
-}
-run_test 115f "read: late REQ MDunlink and no reply"
-
-test_115g() {
- [ $(lustre_version_code ost1) -lt $(version_code 2.8.50) ] &&
- skip "need at least 2.8.50 on OST" && return 0
-
- #define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
- test_115_read 0x8000051c 0
-}
-run_test 115g "read: late REQ MDunlink and Reply MDunlink"
-
# parameters: fail_loc CMD RC
test_120_reply() {
local PID