static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd)
{
- struct ptlrpc_request *req = NULL;
- ENTRY;
+ struct ptlrpc_request *req = NULL;
+ ENTRY;
- CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
- obd->obd_next_recovery_transno);
+ CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
+ obd->obd_next_recovery_transno);
- if (target_recovery_overseer(obd, check_for_next_transno,
- exp_req_replay_healthy)) {
- abort_req_replay_queue(obd);
- abort_lock_replay_queue(obd);
- }
+ CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+
+ if (target_recovery_overseer(obd, check_for_next_transno,
+ exp_req_replay_healthy)) {
+ abort_req_replay_queue(obd);
+ abort_lock_replay_queue(obd);
+ }
spin_lock(&obd->obd_recovery_task_lock);
if (!list_empty(&obd->obd_req_replay_queue)) {
RETURN(-ENOSYS);
}
- if (req->rq_export &&
- lustre_msg_get_flags(req->rq_reqmsg) &
- (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
- /* During recovery, we don't want to send too many early
- * replies, but on the other hand we want to make sure the
- * client has enough time to resend if the rpc is lost. So
- * during the recovery period send at least 4 early replies,
- * spacing them every at_extra if we can. at_estimate should
- * always equal this fixed value during recovery. */
- at_measured(&svcpt->scp_at_estimate, min(at_extra,
+ if (req->rq_export &&
+ lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) {
+ /* During recovery, we don't want to send too many early
+ * replies, but on the other hand we want to make sure the
+ * client has enough time to resend if the rpc is lost. So
+ * during the recovery period send at least 4 early replies,
+ * spacing them every at_extra if we can. at_estimate should
+ * always equal this fixed value during recovery. */
+ at_measured(&svcpt->scp_at_estimate,
+ cfs_time_current_sec() -
+ req->rq_arrival_time.tv_sec + min(at_extra,
req->rq_export->exp_obd->obd_recovery_timeout / 4));
} else {
/* We want to extend the request deadline by at_extra seconds,
cfs_time_current_sec() -
req->rq_arrival_time.tv_sec);
- /* Check to see if we've actually increased the deadline -
- * we may be past adaptive_max */
- if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
- at_get(&svcpt->scp_at_estimate)) {
- DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
- "(%ld/%ld), not sending early reply\n",
- olddl, req->rq_arrival_time.tv_sec +
- at_get(&svcpt->scp_at_estimate) -
- cfs_time_current_sec());
- RETURN(-ETIMEDOUT);
- }
+ }
+ /* Check to see if we've actually increased the deadline -
+ * we may be past adaptive_max */
+ if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
+ at_get(&svcpt->scp_at_estimate)) {
+ DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
+ "(%ld/%ld), not sending early reply\n",
+ olddl, req->rq_arrival_time.tv_sec +
+ at_get(&svcpt->scp_at_estimate) -
+ cfs_time_current_sec());
+ RETURN(-ETIMEDOUT);
}
reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS);
}
run_test 8e "Verify that ptlrpc resends request on -EINPROGRESS"
+test_9() {
+ [ $(lustre_version_code ost1) -ge $(version_code 2.6.54) ] ||
+ { skip "Need OST version at least 2.6.54"; return; }
+ $SETSTRIPE -i 0 -c 1 $DIR/$tfile
+ replay_barrier ost1
+ # do IO
+ dd if=/dev/zero of=$DIR/$tfile count=1 bs=1M > /dev/null ||
+ error "failed to write"
+ # failover, replay and resend replayed waiting request
+ #define OBD_FAIL_TGT_REPLAY_DELAY2 0x714
+ do_facet ost1 $LCTL set_param fail_loc=0x00000714
+ do_facet ost1 $LCTL set_param fail_val=$TIMEOUT
+ fail ost1
+ do_facet ost1 $LCTL set_param fail_loc=0
+ do_facet ost1 "dmesg | tail -n 100" |\
+ sed -n '/no req deadline/,$ p' | grep -q 'Already past' &&
+ return 1
+ return 0
+}
+run_test 9 "Verify that no req deadline happened during recovery"
+
complete $SECONDS
check_and_cleanup_lustre
exit_status