ptlrpc_connection_put(imp->imp_connection);
imp->imp_connection = NULL;
- dlmexp = class_conn2export(&imp->imp_dlm_handle);
- if (dlmexp && dlmexp->exp_connection) {
- LASSERT(dlmexp->exp_connection ==
- imp_conn->oic_conn);
- ptlrpc_connection_put(dlmexp->exp_connection);
- dlmexp->exp_connection = NULL;
- }
- }
+ dlmexp = class_conn2export(&imp->imp_dlm_handle);
+ if (dlmexp && dlmexp->exp_connection) {
+ LASSERT(dlmexp->exp_connection ==
+ imp_conn->oic_conn);
+ ptlrpc_connection_put(dlmexp->exp_connection);
+ dlmexp->exp_connection = NULL;
+ }
+
+ if (dlmexp != NULL)
+ class_export_put(dlmexp);
+ }
list_del(&imp_conn->oic_item);
ptlrpc_connection_put(imp_conn->oic_conn);
bool is_mdc = false;
ENTRY;
- *exp = NULL;
+ *exp = NULL;
down_write(&cli->cl_sem);
if (cli->cl_conn_count > 0)
GOTO(out_sem, rc = -EALREADY);
- rc = class_connect(&conn, obd, cluuid);
- if (rc)
- GOTO(out_sem, rc);
+ rc = class_connect(&conn, obd, cluuid);
+ if (rc)
+ GOTO(out_sem, rc);
- cli->cl_conn_count++;
- *exp = class_conn2export(&conn);
+ cli->cl_conn_count++;
+ *exp = class_conn2export(&conn);
- LASSERT(obd->obd_namespace);
+ LASSERT(obd->obd_namespace);
- imp->imp_dlm_handle = conn;
- rc = ptlrpc_init_import(imp);
- if (rc != 0)
- GOTO(out_ldlm, rc);
+ imp->imp_dlm_handle = conn;
+ rc = ptlrpc_init_import(imp);
+ if (rc != 0)
+ GOTO(out_ldlm, rc);
- ocd = &imp->imp_connect_data;
- if (data) {
- *ocd = *data;
+ ocd = &imp->imp_connect_data;
+ if (data) {
+ *ocd = *data;
is_mdc = strncmp(imp->imp_obd->obd_type->typ_name,
LUSTRE_MDC_NAME, 3) == 0;
if (is_mdc)
data->ocd_connect_flags |= OBD_CONNECT_MULTIMODRPCS;
- imp->imp_connect_flags_orig = data->ocd_connect_flags;
- }
+ imp->imp_connect_flags_orig = data->ocd_connect_flags;
+ imp->imp_connect_flags2_orig = data->ocd_connect_flags2;
+ }
- rc = ptlrpc_connect_import(imp);
- if (rc != 0) {
- LASSERT (imp->imp_state == LUSTRE_IMP_DISCON);
- GOTO(out_ldlm, rc);
- }
+ rc = ptlrpc_connect_import(imp);
+ if (rc != 0) {
+ if (data && is_mdc)
+ data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS;
+ LASSERT(imp->imp_state == LUSTRE_IMP_DISCON);
+ GOTO(out_ldlm, rc);
+ }
LASSERT(*exp != NULL && (*exp)->exp_connection);
- if (data) {
- LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) ==
- ocd->ocd_connect_flags, "old "LPX64", new "LPX64"\n",
- data->ocd_connect_flags, ocd->ocd_connect_flags);
- data->ocd_connect_flags = ocd->ocd_connect_flags;
+ if (data) {
+ LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) ==
+ ocd->ocd_connect_flags, "old %#llx, new %#llx\n",
+ data->ocd_connect_flags, ocd->ocd_connect_flags);
+ data->ocd_connect_flags = ocd->ocd_connect_flags;
/* clear the flag as it was not set and is not known
* by upper layers */
if (is_mdc)
data->ocd_connect_flags &= ~OBD_CONNECT_MULTIMODRPCS;
- }
+ }
- ptlrpc_pinger_add_import(imp);
+ ptlrpc_pinger_add_import(imp);
- EXIT;
+ EXIT;
- if (rc) {
+ if (rc) {
out_ldlm:
- cli->cl_conn_count--;
- class_disconnect(*exp);
- *exp = NULL;
- }
+ cli->cl_conn_count--;
+ class_disconnect(*exp);
+ *exp = NULL;
+ }
out_sem:
up_write(&cli->cl_sem);
ENTRY;
if (!obd) {
- CERROR("invalid export for disconnect: exp %p cookie "LPX64"\n",
+ CERROR("invalid export for disconnect: exp %p cookie %#llx\n",
exp, exp ? exp->exp_handle.h_cookie : -1);
RETURN(-EINVAL);
}
if (!exp->exp_connection || !lustre_handle_is_used(hdl)) {
conn->cookie = exp->exp_handle.h_cookie;
CDEBUG(D_HA, "connect export for UUID '%s' at %p,"
- " cookie "LPX64"\n", cluuid->uuid, exp, conn->cookie);
+ " cookie %#llx\n", cluuid->uuid, exp, conn->cookie);
RETURN(0);
}
/* Might be a re-connect after a partition. */
if (memcmp(&conn->cookie, &hdl->cookie, sizeof conn->cookie)) {
LCONSOLE_WARN("%s: already connected client %s (at %s) "
- "with handle "LPX64". Rejecting client "
+ "with handle %#llx. Rejecting client "
"with the same UUID trying to reconnect "
- "with handle "LPX64"\n", target->obd_name,
+ "with handle %#llx\n", target->obd_name,
obd_uuid2str(&exp->exp_client_uuid),
obd_export_nid2str(exp),
hdl->cookie, conn->cookie);
class_export_put(export);
export = NULL;
rc = -EALREADY;
- } else if ((mds_conn || lw_client) && export->exp_connection != NULL) {
+ } else if ((mds_conn || lw_client ||
+ data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) &&
+ export->exp_connection != NULL) {
spin_unlock(&export->exp_lock);
- if (req->rq_peer.nid != export->exp_connection->c_peer.nid)
+ if (req->rq_peer.nid != export->exp_connection->c_peer.nid) {
/* MDS or LWP reconnected after failover. */
LCONSOLE_WARN("%s: Received %s connection from "
"%s, removing former export from %s\n",
target->obd_name, mds_conn ? "MDS" : "LWP",
libcfs_nid2str(req->rq_peer.nid),
libcfs_nid2str(export->exp_connection->c_peer.nid));
- else
+ } else {
/* New MDS connection from the same NID. */
LCONSOLE_WARN("%s: Received new %s connection from "
"%s, removing former export from same NID\n",
target->obd_name, mds_conn ? "MDS" : "LWP",
libcfs_nid2str(req->rq_peer.nid));
- class_fail_export(export);
- class_export_put(export);
- export = NULL;
- rc = 0;
+ }
+
+ if (req->rq_peer.nid == export->exp_connection->c_peer.nid &&
+ data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) {
+ /* Because exports between MDTs will always be
+ * kept, let's do not fail such export if they
+ * come from the same NID, otherwise it might
+ * cause eviction between MDTs, which might
+ * cause namespace inconsistency */
+ spin_lock(&export->exp_lock);
+ export->exp_connecting = 1;
+ spin_unlock(&export->exp_lock);
+ conn.cookie = export->exp_handle.h_cookie;
+ rc = EALREADY;
+ } else {
+ class_fail_export(export);
+ class_export_put(export);
+ export = NULL;
+ rc = 0;
+ }
} else if (export->exp_connection != NULL &&
req->rq_peer.nid != export->exp_connection->c_peer.nid &&
(lustre_msg_get_op_flags(req->rq_reqmsg) &
GOTO(out, rc);
}
- CDEBUG(D_HA, "%s: connection from %s@%s %st"LPU64" exp %p cur %ld last %ld\n",
+ CDEBUG(D_HA, "%s: connection from %s@%s %st%llu exp %p cur %ld last %ld\n",
target->obd_name, cluuid.uuid, libcfs_nid2str(req->rq_peer.nid),
target->obd_recovering ? "recovering/" : "", data->ocd_transno,
export, (long)cfs_time_current_sec(),
next_transno = obd->obd_next_recovery_transno;
CDEBUG(D_HA, "max: %d, connected: %d, completed: %d, queue_len: %d, "
- "req_transno: "LPU64", next_transno: "LPU64"\n",
+ "req_transno: %llu, next_transno: %llu\n",
obd->obd_max_recoverable_clients, connected, completed,
queue_len, req_transno, next_transno);
wake_up = 1;
} else if (tdtd != NULL && req != NULL &&
is_req_replayed_by_update(req)) {
- LASSERTF(req_transno < next_transno, "req_transno "LPU64
- "next_transno"LPU64"\n", req_transno, next_transno);
- CDEBUG(D_HA, "waking for duplicate req ("LPU64")\n",
+ LASSERTF(req_transno < next_transno, "req_transno %llu"
+ "next_transno%llu\n", req_transno, next_transno);
+ CDEBUG(D_HA, "waking for duplicate req (%llu)\n",
req_transno);
wake_up = 1;
} else if (req_transno == next_transno ||
(update_transno != 0 && update_transno <= next_transno)) {
- CDEBUG(D_HA, "waking for next ("LPD64")\n", next_transno);
+ CDEBUG(D_HA, "waking for next (%lld)\n", next_transno);
wake_up = 1;
} else if (queue_len > 0 &&
queue_len == atomic_read(&obd->obd_req_replay_clients)) {
- int d_lvl = D_HA;
/** handle gaps occured due to lost reply or VBR */
LASSERTF(req_transno >= next_transno,
- "req_transno: "LPU64", next_transno: "LPU64"\n",
+ "req_transno: %llu, next_transno: %llu\n",
req_transno, next_transno);
- if (req_transno > obd->obd_last_committed &&
- !obd->obd_version_recov)
- d_lvl = D_ERROR;
- CDEBUG(d_lvl,
+ CDEBUG(D_HA,
"%s: waking for gap in transno, VBR is %s (skip: "
- LPD64", ql: %d, comp: %d, conn: %d, next: "LPD64
- ", next_update "LPD64" last_committed: "LPD64")\n",
+ "%lld, ql: %d, comp: %d, conn: %d, next: %lld"
+ ", next_update %lld last_committed: %lld)\n",
obd->obd_name, obd->obd_version_recov ? "ON" : "OFF",
next_transno, queue_len, completed, connected,
req_transno, update_transno, obd->obd_last_committed);
wake_up = 1;
} else if (OBD_FAIL_CHECK(OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS)) {
CDEBUG(D_HA, "accepting transno gaps is explicitly allowed"
- " by fail_lock, waking up ("LPD64")\n", next_transno);
+ " by fail_lock, waking up (%lld)\n", next_transno);
obd->obd_next_recovery_transno = req_transno;
wake_up = 1;
}
* clients */
abort_req_replay_queue(obd);
abort_lock_replay_queue(obd);
- CDEBUG(D_HA, "%s: there are still update replay ("LPX64
+ CDEBUG(D_HA, "%s: there are still update replay (%#llx"
")in the queue.\n", obd->obd_name,
next_update_transno);
} else {
struct obd_device *obd,
struct ptlrpc_request *req)
{
- DEBUG_REQ(D_HA, req, "remove t"LPD64" from %s because of duplicate"
+ DEBUG_REQ(D_HA, req, "remove t%lld from %s because of duplicate"
" update records are found.\n",
lustre_msg_get_transno(req->rq_reqmsg),
libcfs_nid2str(req->rq_peer.nid));
__u64 transno;
ENTRY;
- CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
+ CDEBUG(D_HA, "Waiting for transno %lld\n",
obd->obd_next_recovery_transno);
/* Replay all of request and update by transno */
}
LASSERT(trd->trd_processing_task == current_pid());
- DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s",
+ DEBUG_REQ(D_HA, req, "processing t%lld from %s",
lustre_msg_get_transno(req->rq_reqmsg),
libcfs_nid2str(req->rq_peer.nid));
extend_recovery_timer(obd, obd_timeout, true);
if (rc == 0 && dtrq->dtrq_xid != 0) {
- CDEBUG(D_HA, "Move x"LPU64" t"LPU64
+ CDEBUG(D_HA, "Move x%llu t%llu"
" to finish list\n", dtrq->dtrq_xid,
dtrq->dtrq_master_transno);
/* next stage: replay requests or update */
delta = jiffies;
- CDEBUG(D_INFO, "1: request replay stage - %d clients from t"LPU64"\n",
+ CDEBUG(D_INFO, "1: request replay stage - %d clients from t%llu\n",
atomic_read(&obd->obd_req_replay_clients),
obd->obd_next_recovery_transno);
replay_request_or_update(env, lut, trd, thread);
}
CDEBUG(D_HA, "RECOVERY: service %s, %d recoverable clients, "
- "last_transno "LPU64"\n", obd->obd_name,
+ "last_transno %llu\n", obd->obd_name,
obd->obd_max_recoverable_clients, obd->obd_last_committed);
LASSERT(obd->obd_stopping == 0);
obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
wake_up(&obd->obd_next_transno_waitq);
spin_lock(&obd->obd_recovery_task_lock);
if (obd->obd_recovering) {
+ struct ptlrpc_request *tmp;
+ struct ptlrpc_request *duplicate = NULL;
+
+ if (likely(!req->rq_export->exp_replay_done)) {
+ req->rq_export->exp_replay_done = 1;
+ list_add_tail(&req->rq_list,
+ &obd->obd_final_req_queue);
+ spin_unlock(&obd->obd_recovery_task_lock);
+ RETURN(0);
+ }
+
+ /* XXX O(n), but only happens if final ping is
+ * timed out, probably reorganize the list as
+ * a hash list later */
+ list_for_each_entry_safe(reqiter, tmp,
+ &obd->obd_final_req_queue,
+ rq_list) {
+ if (reqiter->rq_export == req->rq_export) {
+ list_del_init(&reqiter->rq_list);
+ duplicate = reqiter;
+ break;
+ }
+ }
+
list_add_tail(&req->rq_list,
- &obd->obd_final_req_queue);
+ &obd->obd_final_req_queue);
+ req->rq_export->exp_replay_done = 1;
+ spin_unlock(&obd->obd_recovery_task_lock);
+
+ if (duplicate != NULL) {
+ DEBUG_REQ(D_HA, duplicate,
+ "put prev final req\n");
+ target_request_copy_put(duplicate);
+ }
+ RETURN(0);
} else {
spin_unlock(&obd->obd_recovery_task_lock);
target_request_copy_put(req);
RETURN(obd->obd_stopping ? -ENOTCONN : 1);
}
- spin_unlock(&obd->obd_recovery_task_lock);
- RETURN(0);
}
if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REQ_REPLAY_DONE) {
/* client declares he's ready to replay locks */
* Also, a resent, replayed request that has already been
* handled will pass through here and be processed immediately.
*/
- CDEBUG(D_HA, "Next recovery transno: "LPU64
- ", current: "LPU64", replaying\n",
+ CDEBUG(D_HA, "Next recovery transno: %llu"
+ ", current: %llu, replaying\n",
obd->obd_next_recovery_transno, transno);
/* If the request has been replayed by update replay, then sends this
"%d)", exp->exp_obd->obd_no_transno,
req->rq_repmsg == NULL);
- CDEBUG(D_INFO, "last_committed "LPU64", transno "LPU64", xid "LPU64"\n",
+ CDEBUG(D_INFO, "last_committed %llu, transno %llu, xid %llu\n",
exp->exp_last_committed, req->rq_transno, req->rq_xid);
}
rs->rs_opc = lustre_msg_get_opc(req->rq_reqmsg);
spin_lock(&exp->exp_uncommitted_replies_lock);
- CDEBUG(D_NET, "rs transno = "LPU64", last committed = "LPU64"\n",
+ CDEBUG(D_NET, "rs transno = %llu, last committed = %llu\n",
rs->rs_transno, exp->exp_last_committed);
if (rs->rs_transno > exp->exp_last_committed) {
/* not committed already */