spinlock_t exp_lock; /* protects flags int below */
/* ^ protects exp_outstanding_replies too */
int exp_flags;
- int exp_failed:1;
- int exp_libclient:1; /* liblustre client? */
+ int exp_failed:1,
+ exp_replay_needed:1,
+ exp_libclient:1; /* liblustre client? */
union {
struct mds_export_data eu_mds_data;
struct filter_export_data eu_filter_data;
if (obd->obd_processing_task == current->pid ||
transno < obd->obd_next_recovery_transno) {
/* Processing the queue right now, don't re-add. */
+ lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT);
LASSERT(list_empty(&req->rq_list));
spin_unlock_bh(&obd->obd_processing_task_lock);
OBD_FREE(reqmsg, req->rq_reqlen);
list_add(&req->rq_list, &obd->obd_delayed_reply_queue);
spin_lock_bh(&obd->obd_processing_task_lock);
- --obd->obd_recoverable_clients;
+ /* only count the first "replay over" request from each
+ export */
+ if (req->rq_export->exp_replay_needed) {
+ --obd->obd_recoverable_clients;
+ req->rq_export->exp_replay_needed = 0;
+ }
recovery_done = (obd->obd_recoverable_clients == 0);
spin_unlock_bh(&obd->obd_processing_task_lock);
spin_lock_init(&med->med_open_lock);
mcd = NULL;
+ exp->exp_replay_needed = 1;
obd->obd_recoverable_clients++;
obd->obd_max_recoverable_clients++;
class_export_put(exp);
spin_lock_init(&fed->fed_lock);
fcd = NULL;
+ exp->exp_replay_needed = 1;
obd->obd_recoverable_clients++;
+ obd->obd_max_recoverable_clients++;
class_export_put(exp);
CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
LASSERT((void *)(niobuf - niocount) ==
lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0);
- spin_lock_irqsave(&req->rq_lock, flags);
- req->rq_no_resend = 1;
- spin_unlock_irqrestore(&req->rq_lock, flags);
/* size[0] still sizeof (*body) */
if (opc == OST_WRITE) {
rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm,
page_count, pga, &requested_nob, &niocount,
&request);
- /* NB ^ sets rq_no_resend */
-
if (rc != 0)
return (rc);
struct brw_page *pga = aa->aa_pga;
ENTRY;
- /* XXX bug 937 here */
- if (rc == -ETIMEDOUT && request->rq_resend) {
- DEBUG_REQ(D_HA, request, "BULK TIMEOUT");
- LBUG(); /* re-send. later. */
- //goto restart_bulk;
- }
-
rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
page_count, pga, rc);
RETURN (rc);
rc = osc_brw_prep_request(cmd, class_exp2cliimp(exp), oa, lsm,
page_count, pga, &requested_nob, &nio_count,
&request);
- /* NB ^ sets rq_no_resend */
-
if (rc == 0) {
LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
aa = (struct osc_brw_async_args *)&request->rq_async_args;
spin_lock_irqsave(&imp->imp_lock, flags);
if (imp->imp_state == LUSTRE_IMP_FULL) {
+ CERROR("%s: connection lost to %s@%s\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
spin_unlock_irqrestore(&imp->imp_lock, flags);
obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
if (imp->imp_invalid) {
IMPORT_SET_STATE(imp, LUSTRE_IMP_EVICTED);
} else if (MSG_CONNECT_RECOVERING & msg_flags) {
+ CDEBUG(D_HA, "%s: reconnected to %s during replay\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid);
imp->imp_resend_replay = 1;
IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY);
} else {
void * data, int rc)
{
atomic_dec(&req->rq_import->imp_replay_inflight);
- ptlrpc_import_recovery_state_machine(req->rq_import);
+ if (req->rq_status == 0) {
+ ptlrpc_import_recovery_state_machine(req->rq_import);
+ } else {
+ CDEBUG(D_HA, "%s: LAST_REPLAY message error: %d, "
+ "reconnecting\n",
+ req->rq_import->imp_obd->obd_name, req->rq_status);
+ ptlrpc_connect_import(req->rq_import, NULL);
+ }
+
RETURN(0);
}
GOTO(out, rc);
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
ptlrpc_activate_import(imp);
+ CERROR("%s: connection restored to %s@%s\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
}
if (imp->imp_state == LUSTRE_IMP_FULL) {