}
EXPORT_SYMBOL(client_import_add_conn);
+int client_import_dyn_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
+ lnet_nid_t prim_nid, int priority)
+{
+ struct ptlrpc_connection *ptlrpc_conn;
+ int rc;
+
+ ptlrpc_conn = ptlrpc_uuid_to_connection(uuid, prim_nid);
+ if (!ptlrpc_conn) {
+ const char *str_uuid = obd_uuid2str(uuid);
+
+ rc = class_add_uuid(str_uuid, prim_nid);
+ if (rc) {
+ CERROR("%s: failed to add UUID '%s': rc = %d\n",
+ imp->imp_obd->obd_name, str_uuid, rc);
+ return rc;
+ }
+ }
+ return import_set_conn(imp, uuid, priority, 1);
+}
+EXPORT_SYMBOL(client_import_dyn_add_conn);
+
+int client_import_add_nids_to_conn(struct obd_import *imp, lnet_nid_t *nids,
+ int nid_count, struct obd_uuid *uuid)
+{
+ struct obd_import_conn *conn;
+ int rc = -ENOENT;
+
+ ENTRY;
+ if (nid_count <= 0 || !nids)
+ return rc;
+
+ spin_lock(&imp->imp_lock);
+ list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ if (class_check_uuid(&conn->oic_uuid, nids[0])) {
+ *uuid = conn->oic_uuid;
+ rc = class_add_nids_to_uuid(&conn->oic_uuid, nids,
+ nid_count);
+ break;
+ }
+ }
+ spin_unlock(&imp->imp_lock);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(client_import_add_nids_to_conn);
+
int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
{
struct obd_import_conn *imp_conn;
if (lustre_cfg_buf(lcfg, 4)) {
__u32 refnet = libcfs_str2net(lustre_cfg_string(lcfg, 4));
- if (refnet == LNET_NIDNET(LNET_NID_ANY)) {
+ if (refnet == LNET_NET_ANY) {
rc = -EINVAL;
CERROR("%s: bad mount option 'network=%s': rc = %d\n",
obd->obd_name, lustre_cfg_string(lcfg, 4),
LDLM_NAMESPACE_CLIENT,
LDLM_NAMESPACE_GREEDY,
ns_type);
- if (obd->obd_namespace == NULL) {
- CERROR("Unable to create client namespace - %s\n",
- obd->obd_name);
- GOTO(err_import, rc = -ENOMEM);
+ if (IS_ERR(obd->obd_namespace)) {
+ rc = PTR_ERR(obd->obd_namespace);
+ CERROR("%s: unable to create client namespace: rc = %d\n",
+ obd->obd_name, rc);
+ obd->obd_namespace = NULL;
+ GOTO(err_import, rc);
}
RETURN(rc);
OBD_FREE(cli->cl_mod_tag_bitmap,
BITS_TO_LONGS(OBD_MAX_RIF_MAX) * sizeof(long));
cli->cl_mod_tag_bitmap = NULL;
- RETURN(rc);
+ RETURN(rc);
}
EXPORT_SYMBOL(client_obd_setup);
if (!target->obd_recovering || target->obd_recovery_start == 0)
return 0;
- remaining = hrtimer_expires_remaining(&target->obd_recovery_timer);
+ remaining = hrtimer_get_remaining(&target->obd_recovery_timer);
timeout = ktime_divns(remaining, NSEC_PER_SEC);
if (timeout > -30)
return 0;
GOTO(out_already, rc);
}
- remaining = hrtimer_expires_remaining(&target->obd_recovery_timer);
+ remaining = hrtimer_get_remaining(&target->obd_recovery_timer);
timeout = ktime_divns(remaining, NSEC_PER_SEC);
if (timeout > 0) {
LCONSOLE_WARN("%s: Client %s (at %s) reconnected, waiting for %d clients in recovery for %lld:%.02lld\n",
/* avoid sending a request until import flags are changed */
ptlrpc_import_enter_resend(revimp);
- if (revimp->imp_connection != NULL)
- ptlrpc_connection_put(revimp->imp_connection);
+ ptlrpc_connection_put(revimp->imp_connection);
/*
* client from recovery don't have a handle so we need to take from
} else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 &&
rc != EALREADY) {
if (!strstr(cluuid.uuid, "mdt"))
- LCONSOLE_WARN("%s: Rejecting reconnect from the known client %s (at %s) because it is indicating it is a new client",
+ LCONSOLE_WARN("%s: Rejecting reconnect from the known client %s (at %s) because it is indicating it is a new client\n",
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid));
GOTO(out, rc = -EALREADY);
known =
atomic_read(&target->obd_max_recoverable_clients);
stale = target->obd_stale_clients;
- remaining = hrtimer_expires_remaining(timer);
+ remaining = hrtimer_get_remaining(timer);
left = ktime_divns(remaining, NSEC_PER_SEC);
if (ktime_to_ns(remaining) > 0) {
if (rc)
RETURN(rc);
+ /* In case of target disconnect, updating sec ctx immediately is
+ * required in order to record latest sequence number used.
+ * Sequence is normally updated on export destroy, but this event
+ * can occur too late, ie after a new target connect request has
+ * been processed.
+ * Maintaining correct sequence when client connection becomes idle
+ * ensures that GSS does not erroneously consider requests as replays.
+ */
+ rc = sptlrpc_export_update_ctx(req->rq_export);
+ if (rc)
+ RETURN(rc);
+
/* Keep the rq_export around so we can send the reply. */
req->rq_status = obd_disconnect(class_export_get(req->rq_export));
}
LASSERT(obd->obd_recovery_start != 0);
- left_ns = hrtimer_expires_remaining(&obd->obd_recovery_timer);
+ left_ns = hrtimer_get_remaining(&obd->obd_recovery_timer);
left = ktime_divns(left_ns, NSEC_PER_SEC);
if (extend) {
req_transno = lustre_msg_get_transno(req->rq_reqmsg);
}
- if (tdtd != NULL)
+ if (!obd->obd_abort_recov_mdt && tdtd)
update_transno = distribute_txn_get_next_transno(tdtd);
connected = atomic_read(&obd->obd_connected_clients);
} else if (obd->obd_recovery_expired) {
CDEBUG(D_HA, "waking for expired recovery\n");
wake_up = 1;
- } else if (tdtd != NULL && req != NULL &&
+ } else if (!obd->obd_abort_recov_mdt && tdtd && req &&
is_req_replayed_by_update(req)) {
LASSERTF(req_transno < next_transno,
"req_transno %llu next_transno%llu\n", req_transno,
return wake_up;
}
+static int check_update_llog(struct lu_target *lut)
+{
+ struct obd_device *obd = lut->lut_obd;
+ struct target_distribute_txn_data *tdtd = lut->lut_tdtd;
+
+ if (obd->obd_abort_recovery) {
+ CDEBUG(D_HA, "waking for aborted recovery\n");
+ return 1;
+ }
+
+ if (atomic_read(&tdtd->tdtd_recovery_threads_count) == 0) {
+ CDEBUG(D_HA, "waking for completion of reading update log\n");
+ return 1;
+ }
+
+ return 0;
+}
+
/**
* wait for recovery events,
* check its status with help of check_routine
* left in the queue
*/
spin_lock(&obd->obd_recovery_task_lock);
- if (lut->lut_tdtd != NULL) {
+ if (!obd->obd_abort_recov_mdt && lut->lut_tdtd) {
next_update_transno =
distribute_txn_get_next_transno(lut->lut_tdtd);
*/
if (next_update_transno == 0) {
spin_unlock(&obd->obd_recovery_task_lock);
- wait_event_idle(
+
+ while (wait_event_timeout(
tdtd->tdtd_recovery_threads_waitq,
- atomic_read(&tdtd->tdtd_recovery_threads_count)
- == 0);
+ check_update_llog(lut),
+ cfs_time_seconds(60)) == 0);
spin_lock(&obd->obd_recovery_task_lock);
next_update_transno =
- distribute_txn_get_next_transno(
- lut->lut_tdtd);
+ distribute_txn_get_next_transno(tdtd);
}
}
/** evict exports which didn't finish recovery yet */
class_disconnect_stale_exports(obd, exp_finished);
return 1;
- } else if (obd->obd_recovery_expired &&
- obd->obd_recovery_timeout < obd->obd_recovery_time_hard) {
+ } else if (obd->obd_recovery_expired) {
obd->obd_recovery_expired = 0;
/** If some clients died being recovered, evict them */
return 0;
}
- if (lut->lut_tdtd != NULL) {
+ if (!obd->obd_abort_recov_mdt && lut->lut_tdtd != NULL) {
if (!lut->lut_tdtd->tdtd_replay_ready &&
!obd->obd_abort_recovery && !obd->obd_stopping) {
/*
if (type != NULL)
*type = REQUEST_RECOVERY;
- if (tdtd == NULL)
+ if (!tdtd || obd->obd_abort_recov_mdt)
RETURN(transno);
update_transno = distribute_txn_get_next_transno(tdtd);
obd->obd_replayed_requests++;
}
+#define WATCHDOG_TIMEOUT (obd_timeout * 10)
+
static void replay_request_or_update(struct lu_env *env,
struct lu_target *lut,
struct target_recovery_data *trd,
lustre_msg_get_transno(req->rq_reqmsg),
libcfs_nid2str(req->rq_peer.nid));
+ ptlrpc_watchdog_init(&thread->t_watchdog,
+ WATCHDOG_TIMEOUT);
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
+ ptlrpc_watchdog_disable(&thread->t_watchdog);
+
/**
* bz18031: increase next_recovery_transno before
* target_request_copy_put() will drop exp_rpc reference
LASSERT(tdtd != NULL);
dtrq = distribute_txn_get_next_req(tdtd);
lu_context_enter(&thread->t_env->le_ctx);
+ ptlrpc_watchdog_init(&thread->t_watchdog,
+ WATCHDOG_TIMEOUT);
rc = tdtd->tdtd_replay_handler(env, tdtd, dtrq);
+ ptlrpc_watchdog_disable(&thread->t_watchdog);
lu_context_exit(&thread->t_env->le_ctx);
extend_recovery_timer(obd, obd_timeout, true);
int rc = 0;
ENTRY;
-
- unshare_fs_struct();
OBD_ALLOC_PTR(thread);
if (thread == NULL)
RETURN(-ENOMEM);
thread->t_env = env;
thread->t_id = -1; /* force filter_iobuf_get/put to use local buffers */
+ thread->t_task = current;
env->le_ctx.lc_thread = thread;
tgt_io_thread_init(thread); /* init thread_big_cache for IO requests */
LASSERT(trd->trd_processing_task == current->pid);
DEBUG_REQ(D_HA, req, "processing lock from %s:",
libcfs_nid2str(req->rq_peer.nid));
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_LOCK_REPLAY)) {
+ req->rq_status = -ENODEV;
+ target_request_copy_put(req);
+ continue;
+ }
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
target_request_copy_put(req);