* Use is subject to license terms.
*/
/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
RETURN(rc);
}
+/**
+ * Find conn uuid by peer nid. @peer is a server nid. This function is used
+ * to find a conn uuid of @imp which can reach @peer.
+ */
+int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer,
+ struct obd_uuid *uuid)
+{
+ struct obd_import_conn *conn;
+ int rc = -ENOENT;
+ ENTRY;
+
+ cfs_spin_lock(&imp->imp_lock);
+ cfs_list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ /* check if conn uuid does have this peer nid */
+ if (class_check_uuid(&conn->oic_uuid, peer)) {
+ *uuid = conn->oic_uuid;
+ rc = 0;
+ break;
+ }
+ }
+ cfs_spin_unlock(&imp->imp_lock);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(client_import_find_conn);
+
void client_destroy_import(struct obd_import *imp)
{
/* drop security policy instance after all rpc finished/aborted
CFS_INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
CFS_INIT_LIST_HEAD(&cli->cl_loi_write_list);
CFS_INIT_LIST_HEAD(&cli->cl_loi_read_list);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_sync_fs_list);
client_obd_list_lock_init(&cli->cl_loi_list_lock);
cli->cl_r_in_flight = 0;
cli->cl_w_in_flight = 0;
imp->imp_connect_flags_orig = data->ocd_connect_flags;
}
- rc = ptlrpc_connect_import(imp, NULL);
+ rc = ptlrpc_connect_import(imp);
if (rc != 0) {
LASSERT (imp->imp_state == LUSTRE_IMP_DISCON);
GOTO(out_ldlm, rc);
int rc = 0;
int mds_conn = 0;
struct obd_connect_data *data, *tmpdata;
+ int size, tmpsize;
lnet_nid_t *client_nid = NULL;
ENTRY;
if (!target || target->obd_stopping || !target->obd_set_up) {
LCONSOLE_ERROR_MSG(0x137, "UUID '%s' is not available "
- " for connect (%s)\n", str,
+ "for connect (%s)\n", str,
!target ? "no target" :
(target->obd_stopping ? "stopping" :
"not set up"));
conn = *tmp;
+ size = req_capsule_get_size(&req->rq_pill, &RMF_CONNECT_DATA,
+ RCL_CLIENT);
data = req_capsule_client_get(&req->rq_pill, &RMF_CONNECT_DATA);
if (!data)
GOTO(out, rc = -EPROTO);
export, (long)cfs_time_current_sec(),
export ? (long)export->exp_last_request_time : 0);
- /* Tell the client if we're in recovery. */
- if (target->obd_recovering) {
- lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
- /* If this is the first time a client connects,
- reset the recovery timer */
- if (rc == 0)
- target_start_and_reset_recovery_timer(target, req,
- !export);
- }
+ /* If this is the first time a client connects,
+ * reset the recovery timer */
+ if (rc == 0 && target->obd_recovering)
+ target_start_and_reset_recovery_timer(target, req, !export);
/* We want to handle EALREADY but *not* -EALREADY from
* target_handle_reconnect(), return reconnection state in a flag */
}
if (rc)
GOTO(out, rc);
+
+ LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
+ data->ocd_instance = target->u.obt.obt_instance;
+
/* Return only the parts of obd_connect_data that we understand, so the
* client knows that we don't understand the rest. */
if (data) {
- tmpdata = req_capsule_server_get(&req->rq_pill,
- &RMF_CONNECT_DATA);
- //data->ocd_connect_flags &= OBD_CONNECT_SUPPORTED;
- *tmpdata = *data;
+ tmpsize = req_capsule_get_size(&req->rq_pill, &RMF_CONNECT_DATA,
+ RCL_SERVER);
+ tmpdata = req_capsule_server_get(&req->rq_pill,
+ &RMF_CONNECT_DATA);
+ /* Don't use struct assignment here, because the client reply
+ * buffer may be smaller/larger than the local struct
+ * obd_connect_data. */
+ memcpy(tmpdata, data, min(tmpsize, size));
}
/* If all else goes well, this is our RPC return code. */
CWARN("Connect with zero transno!\n");
if ((lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_TRANSNO)
- && data->ocd_transno < target->obd_next_recovery_transno)
+ && data->ocd_transno < target->obd_next_recovery_transno &&
+ data->ocd_transno > target->obd_last_committed)
target->obd_next_recovery_transno = data->ocd_transno;
target->obd_connected_clients++;
cfs_atomic_inc(&target->obd_req_replay_clients);
cfs_waitq_signal(&target->obd_next_transno_waitq);
}
cfs_spin_unlock(&target->obd_recovery_task_lock);
+
+ /* Tell the client we're in recovery, when client is involved in it. */
+ if (target->obd_recovering)
+ lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
+
tmp = req_capsule_client_get(&req->rq_pill, &RMF_CONN);
conn = *tmp;
cfs_atomic_inc(&req->rq_refcount);
/** let export know it has replays to be handled */
cfs_atomic_inc(&req->rq_export->exp_replay_count);
- /* release service thread while request is queued
- * we are moving the request from active processing
- * to waiting on the replay queue */
- ptlrpc_server_active_request_dec(req);
}
static void target_request_copy_put(struct ptlrpc_request *req)
cfs_atomic_dec(&req->rq_export->exp_replay_count);
class_export_rpc_put(req->rq_export);
- /* ptlrpc_server_drop_request() assumes the request is active */
- ptlrpc_server_active_request_inc(req);
ptlrpc_server_drop_request(req);
}
#ifdef __KERNEL__
static void target_finish_recovery(struct obd_device *obd)
{
+ time_t elapsed_time = max_t(time_t, 1, cfs_time_current_sec() -
+ obd->obd_recovery_start);
ENTRY;
- LCONSOLE_INFO("%s: sending delayed replies to recovered clients\n",
- obd->obd_name);
+
+ LCONSOLE_INFO("%s: Recovery over after %d:%.02d, of %d clients "
+ "%d recovered and %d %s evicted.\n", obd->obd_name,
+ (int)elapsed_time / 60, (int)elapsed_time % 60,
+ obd->obd_max_recoverable_clients,
+ obd->obd_connected_clients, obd->obd_stale_clients,
+ obd->obd_stale_clients == 1 ? "was" : "were");
ldlm_reprocess_all_ns(obd->obd_namespace);
cfs_spin_lock(&obd->obd_recovery_task_lock);
env.le_ctx.lc_thread = thread;
thread->t_data = NULL;
- CERROR("%s: started recovery thread pid %d\n", obd->obd_name,
+ CDEBUG(D_HA, "%s: started recovery thread pid %d\n", obd->obd_name,
cfs_curproc_pid());
trd->trd_processing_task = cfs_curproc_pid();
cfs_init_completion(&trd->trd_finishing);
trd->trd_recovery_handler = handler;
- if (cfs_kernel_thread(target_recovery_thread, lut, 0) > 0) {
+ if (cfs_create_thread(target_recovery_thread, lut, 0) > 0) {
cfs_wait_for_completion(&trd->trd_starting);
LASSERT(obd->obd_recovering != 0);
} else
cfs_spin_unlock(&exp->exp_locks_list_guard);
}
#endif
+
+static int target_bulk_timeout(void *data)
+{
+ ENTRY;
+ /* We don't fail the connection here, because having the export
+ * killed makes the (vital) call to commitrw very sad.
+ */
+ RETURN(1);
+}
+
+static inline char *bulk2type(struct ptlrpc_bulk_desc *desc)
+{
+ return desc->bd_type == BULK_GET_SINK ? "GET" : "PUT";
+}
+
+int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
+ struct l_wait_info *lwi)
+{
+ struct ptlrpc_request *req = desc->bd_req;
+ int rc = 0;
+ ENTRY;
+
+ /* Check if there is eviction in progress, and if so, wait for
+ * it to finish */
+ if (unlikely(cfs_atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
+ *lwi = LWI_INTR(NULL, NULL);
+ rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
+ !cfs_atomic_read(&exp->exp_obd->
+ obd_evict_inprogress),
+ lwi);
+ }
+
+ /* Check if client was evicted or tried to reconnect already */
+ if (exp->exp_failed || exp->exp_abort_active_req) {
+ rc = -ENOTCONN;
+ } else {
+ if (desc->bd_type == BULK_PUT_SINK)
+ rc = sptlrpc_svc_wrap_bulk(req, desc);
+ if (rc == 0)
+ rc = ptlrpc_start_bulk_transfer(desc);
+ }
+
+ if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) {
+ ptlrpc_abort_bulk(desc);
+ } else if (rc == 0) {
+ time_t start = cfs_time_current_sec();
+ do {
+ long timeoutl = req->rq_deadline - cfs_time_current_sec();
+ cfs_duration_t timeout = timeoutl <= 0 ?
+ CFS_TICK : cfs_time_seconds(timeoutl);
+ *lwi = LWI_TIMEOUT_INTERVAL(timeout,
+ cfs_time_seconds(1),
+ target_bulk_timeout,
+ desc);
+ rc = l_wait_event(desc->bd_waitq,
+ !ptlrpc_server_bulk_active(desc) ||
+ exp->exp_failed ||
+ exp->exp_abort_active_req,
+ lwi);
+ LASSERT(rc == 0 || rc == -ETIMEDOUT);
+ /* Wait again if we changed deadline */
+ } while ((rc == -ETIMEDOUT) &&
+ (req->rq_deadline > cfs_time_current_sec()));
+
+ if (rc == -ETIMEDOUT) {
+ DEBUG_REQ(D_ERROR, req,
+ "timeout on bulk %s after %ld%+lds",
+ bulk2type(desc),
+ req->rq_deadline - start,
+ cfs_time_current_sec() -
+ req->rq_deadline);
+ ptlrpc_abort_bulk(desc);
+ } else if (exp->exp_failed) {
+ DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s",
+ bulk2type(desc));
+ rc = -ENOTCONN;
+ ptlrpc_abort_bulk(desc);
+ } else if (exp->exp_abort_active_req) {
+ DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s",
+ bulk2type(desc));
+ /* we don't reply anyway */
+ rc = -ETIMEDOUT;
+ ptlrpc_abort_bulk(desc);
+ } else if (!desc->bd_success ||
+ desc->bd_nob_transferred != desc->bd_nob) {
+ DEBUG_REQ(D_ERROR, req, "%s bulk %s %d(%d)",
+ desc->bd_success ?
+ "truncated" : "network error on",
+ bulk2type(desc),
+ desc->bd_nob_transferred,
+ desc->bd_nob);
+ /* XXX should this be a different errno? */
+ rc = -ETIMEDOUT;
+ } else if (desc->bd_type == BULK_GET_SINK) {
+ rc = sptlrpc_svc_unwrap_bulk(req, desc);
+ }
+ } else {
+ DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d",
+ bulk2type(desc), rc);
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(target_bulk_io);