* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2013, Intel Corporation.
+ * Copyright (c) 2010, 2014, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define DEBUG_SUBSYSTEM S_LDLM
+#include <linux/kthread.h>
#include <libcfs/libcfs.h>
#include <obd.h>
#include <obd_class.h>
INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
INIT_LIST_HEAD(&cli->cl_loi_write_list);
INIT_LIST_HEAD(&cli->cl_loi_read_list);
- client_obd_list_lock_init(&cli->cl_loi_list_lock);
+ spin_lock_init(&cli->cl_loi_list_lock);
atomic_set(&cli->cl_pending_w_pages, 0);
atomic_set(&cli->cl_pending_r_pages, 0);
cli->cl_r_in_flight = 0;
atomic_long_set(&cli->cl_lru_busy, 0);
atomic_long_set(&cli->cl_lru_in_list, 0);
INIT_LIST_HEAD(&cli->cl_lru_list);
- client_obd_list_lock_init(&cli->cl_lru_list_lock);
+ spin_lock_init(&cli->cl_lru_list_lock);
atomic_long_set(&cli->cl_unstable_count, 0);
init_waitqueue_head(&cli->cl_destroy_waitq);
*exp = NULL;
down_write(&cli->cl_sem);
- if (cli->cl_conn_count > 0 )
+ if (cli->cl_conn_count > 0)
GOTO(out_sem, rc = -EALREADY);
rc = class_connect(&conn, obd, cluuid);
imp = cli->cl_import;
down_write(&cli->cl_sem);
- CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name,
- cli->cl_conn_count);
+ CDEBUG(D_INFO, "disconnect %s - %zu\n", obd->obd_name,
+ cli->cl_conn_count);
- if (!cli->cl_conn_count) {
+ if (cli->cl_conn_count == 0) {
CERROR("disconnecting disconnected device (%s)\n",
obd->obd_name);
GOTO(out_disconnect, rc = -EINVAL);
}
cli->cl_conn_count--;
- if (cli->cl_conn_count)
+ if (cli->cl_conn_count != 0)
GOTO(out_disconnect, rc = 0);
/* Mark import deactivated now, so we don't try to reconnect if any
spin_unlock(&exp->exp_lock);
class_export_cb_put(exp);
}
-EXPORT_SYMBOL(target_client_add_cb);
static void
check_and_start_recovery_timer(struct obd_device *obd,
if (rc)
GOTO(out, rc);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
+ /* Don't allow clients to connect that are using old 1.8 format
+ * protocol conventions (LUSTRE_MSG_MAGIC_v1, !MSGHDR_CKSUM_INCOMPAT18,
+ * ldlm_flock_policy_wire format, MDT_ATTR_xTIME_SET, etc). The
+ * FULL20 flag should be set on all connections since 2.0, but no
+ * longer affects behaviour.
+ *
+ * Later this check will be disabled and the flag can be retired
+ * completely once interop with 3.0 is no longer needed.
+ */
+ if (!(data->ocd_connect_flags & OBD_CONNECT_FULL20))
+ GOTO(out, rc = -EPROTO);
+#endif
+
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
if (data->ocd_version < LUSTRE_VERSION_CODE -
LUSTRE_VERSION_ALLOWED_OFFSET ||
mds_conn = (data->ocd_connect_flags & OBD_CONNECT_MDS) != 0;
lw_client = (data->ocd_connect_flags &
OBD_CONNECT_LIGHTWEIGHT) != 0;
+
+ /* OBD_CONNECT_MNE_SWAB is defined as OBD_CONNECT_MDS_MDS
+ * for Imperative Recovery connection from MGC to MGS.
+ *
+ * Via check OBD_CONNECT_FID, we can distinguish whether
+ * the OBD_CONNECT_MDS_MDS/OBD_CONNECT_MNE_SWAB is from
+ * MGC or MDT. */
+ if (!lw_client &&
+ (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) &&
+ (data->ocd_connect_flags & OBD_CONNECT_FID) &&
+ (data->ocd_connect_flags & OBD_CONNECT_VERSION)) {
+ __u32 major = OBD_OCD_VERSION_MAJOR(data->ocd_version);
+ __u32 minor = OBD_OCD_VERSION_MINOR(data->ocd_version);
+ __u32 patch = OBD_OCD_VERSION_PATCH(data->ocd_version);
+
+ /* We do not support the MDT-MDT interoperations with
+ * different version MDT because of protocol changes. */
+ if (unlikely(major != LUSTRE_MAJOR ||
+ minor != LUSTRE_MINOR ||
+ abs(patch - LUSTRE_PATCH) > 3)) {
+ LCONSOLE_WARN("%s (%u.%u.%u.%u) refused the "
+ "connection from different version MDT "
+ "(%d.%d.%d.%d) %s %s\n",
+ target->obd_name, LUSTRE_MAJOR,
+ LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX,
+ major, minor, patch,
+ OBD_OCD_VERSION_FIX(data->ocd_version),
+ libcfs_nid2str(req->rq_peer.nid), str);
+
+ GOTO(out, rc = -EPROTO);
+ }
+ }
}
/* lctl gets a backstage, all-access pass. */
t = cfs_timer_deadline(&target->obd_recovery_timer);
t = cfs_time_sub(t, cfs_time_current());
t = cfs_duration_sec(t);
- LCONSOLE_WARN("%s: Denying connection for new client "
- "%s (at %s), waiting for all %d known "
- "clients (%d recovered, %d in progress, "
- "and %d evicted) to recover in %d:%.02d\n",
+ LCONSOLE_WARN("%s: Denying connection for new client %s"
+ "(at %s), waiting for %d known clients "
+ "(%d recovered, %d in progress, and %d "
+ "evicted) to recover in %d:%.02d\n",
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid), k,
c - i, i, s, (int)t / 60,
(int)t % 60);
- rc = -EBUSY;
- } else {
+ rc = -EBUSY;
+ } else {
dont_check_exports:
- rc = obd_connect(req->rq_svc_thread->t_env,
- &export, target, &cluuid, data,
- client_nid);
+ rc = obd_connect(req->rq_svc_thread->t_env,
+ &export, target, &cluuid, data,
+ client_nid);
if (mds_conn && OBD_FAIL_CHECK(OBD_FAIL_TGT_RCVG_FLAG))
lustre_msg_add_op_flags(req->rq_repmsg,
- MSG_CONNECT_RECOVERING);
- if (rc == 0)
- conn.cookie = export->exp_handle.h_cookie;
- }
- } else {
- rc = obd_reconnect(req->rq_svc_thread->t_env,
- export, target, &cluuid, data, client_nid);
- }
+ MSG_CONNECT_RECOVERING);
+ if (rc == 0)
+ conn.cookie = export->exp_handle.h_cookie;
+ }
+ } else {
+ rc = obd_reconnect(req->rq_svc_thread->t_env,
+ export, target, &cluuid, data, client_nid);
+ }
if (rc)
GOTO(out, rc);
* ptlrpc_handle_server_req_in->lustre_unpack_msg(). */
revimp->imp_msg_magic = req->rq_reqmsg->lm_magic;
- if ((data->ocd_connect_flags & OBD_CONNECT_AT) &&
- (revimp->imp_msg_magic != LUSTRE_MSG_MAGIC_V1))
+ if (data->ocd_connect_flags & OBD_CONNECT_AT)
revimp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
else
revimp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
- if ((data->ocd_connect_flags & OBD_CONNECT_FULL20) &&
- (revimp->imp_msg_magic != LUSTRE_MSG_MAGIC_V1))
- revimp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
- else
- revimp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+ revimp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
rc = sptlrpc_import_sec_adapt(revimp, req->rq_svc_ctx, &req->rq_flvr);
if (rc) {
req->rq_status = rc;
RETURN(rc);
}
-EXPORT_SYMBOL(target_handle_connect);
int target_handle_disconnect(struct ptlrpc_request *req)
{
RETURN(0);
}
-EXPORT_SYMBOL(target_handle_disconnect);
void target_destroy_export(struct obd_export *exp)
{
spin_unlock(&obd->obd_recovery_task_lock);
list_for_each_entry_safe(req, n, &clean_list, rq_list) {
- LASSERT(req->rq_reply_state == 0);
+ LASSERT(req->rq_reply_state == NULL);
target_exp_dequeue_req_replay(req);
target_request_copy_put(req);
}
spin_unlock(&obd->obd_recovery_task_lock);
list_for_each_entry_safe(req, n, &clean_list, rq_list) {
- LASSERT(req->rq_reply_state == 0);
+ LASSERT(req->rq_reply_state == NULL);
target_request_copy_put(req);
}
CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name);
cfs_timer_disarm(&obd->obd_recovery_timer);
}
-EXPORT_SYMBOL(target_cancel_recovery_timer);
static void target_start_recovery_timer(struct obd_device *obd)
{
to += drt - left;
} else if (!extend && (drt > to)) {
to = drt;
- /* reduce drt by already passed time */
- drt -= obd->obd_recovery_timeout - left;
}
if (to > obd->obd_recovery_time_hard)
to = obd->obd_recovery_time_hard;
- if (obd->obd_recovery_timeout < to ||
- obd->obd_recovery_timeout == obd->obd_recovery_time_hard) {
+ if (obd->obd_recovery_timeout < to) {
obd->obd_recovery_timeout = to;
- cfs_timer_arm(&obd->obd_recovery_timer,
- cfs_time_shift(drt));
+ end = obd->obd_recovery_start + to;
+ cfs_timer_arm(&obd->obd_recovery_timer,
+ cfs_time_shift(end - now));
}
spin_unlock(&obd->obd_dev_lock);
CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n",
- obd->obd_name, (unsigned)drt);
+ obd->obd_name, (unsigned)cfs_time_sub(end, now));
}
/* Reset the timer with each new client connection */
int (*health_check)(struct obd_export *))
{
repeat:
- wait_event(obd->obd_next_transno_waitq, check_routine(obd));
+ if ((obd->obd_recovery_start != 0) && (cfs_time_current_sec() >=
+ (obd->obd_recovery_start + obd->obd_recovery_time_hard))) {
+ CWARN("recovery is aborted by hard timeout\n");
+ obd->obd_abort_recovery = 1;
+ }
+
+ while (wait_event_timeout(obd->obd_next_transno_waitq,
+ check_routine(obd),
+ msecs_to_jiffies(60 * MSEC_PER_SEC)) == 0)
+ /* wait indefinitely for event, but don't trigger watchdog */;
+
if (obd->obd_abort_recovery) {
CWARN("recovery is aborted, evict exports in recovery\n");
/** evict exports which didn't finish recovery yet */
obd->obd_next_recovery_transno);
CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+ /** It is needed to extend recovery window above recovery_time_soft.
+ * Extending is possible only in the end of recovery window
+ * (see more details in handle_recovery_req).
+ */
+ CFS_FAIL_TIMEOUT_MS(OBD_FAIL_TGT_REPLAY_DELAY, 300);
if (target_recovery_overseer(obd, check_for_next_transno,
exp_req_replay_healthy)) {
wake_up(&obd->obd_next_transno_waitq);
RETURN(0);
}
-EXPORT_SYMBOL(target_queue_recovery_request);
int target_handle_ping(struct ptlrpc_request *req)
{
obd_ping(req->rq_svc_thread->t_env, req->rq_export);
return req_capsule_server_pack(&req->rq_pill);
}
-EXPORT_SYMBOL(target_handle_ping);
void target_committed_to_req(struct ptlrpc_request *req)
{
CDEBUG(D_INFO, "last_committed "LPU64", transno "LPU64", xid "LPU64"\n",
exp->exp_last_committed, req->rq_transno, req->rq_xid);
}
-EXPORT_SYMBOL(target_committed_to_req);
#endif /* HAVE_SERVER_SUPPORT */
RETURN(0);
}
-EXPORT_SYMBOL(target_pack_pool_reply);
-int target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id)
+static int target_send_reply_msg(struct ptlrpc_request *req,
+ int rc, int fail_id)
{
if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
DEBUG_REQ(D_ERROR, req, "dropping reply");
spin_unlock(&svcpt->scp_rep_lock);
EXIT;
}
-EXPORT_SYMBOL(target_send_reply);
ldlm_mode_t lck_compat_array[] = {
[LCK_EX] = LCK_COMPAT_EX,
}
return error;
}
-EXPORT_SYMBOL(ldlm_errno2error);
#if LUSTRE_TRACKS_LOCK_EXP_REFS
void ldlm_dump_export_locks(struct obd_export *exp)
RETURN(1);
}
-static inline char *bulk2type(struct ptlrpc_bulk_desc *desc)
+static inline const char *bulk2type(struct ptlrpc_request *req)
{
- return desc->bd_type == BULK_GET_SINK ? "GET" : "PUT";
+ if (req->rq_bulk_read)
+ return "READ";
+ if (req->rq_bulk_write)
+ return "WRITE";
+ return "UNKNOWN";
}
int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
exp->exp_conn_cnt > lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
rc = -ENOTCONN;
} else {
- if (desc->bd_type == BULK_PUT_SINK)
+ if (req->rq_bulk_read)
rc = sptlrpc_svc_wrap_bulk(req, desc);
if (rc == 0)
rc = ptlrpc_start_bulk_transfer(desc);
if (rc < 0) {
DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d",
- bulk2type(desc), rc);
+ bulk2type(req), rc);
RETURN(rc);
}
long timeoutl = deadline - cfs_time_current_sec();
cfs_duration_t timeout = timeoutl <= 0 ?
CFS_TICK : cfs_time_seconds(timeoutl);
+ time_t rq_deadline;
*lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1),
target_bulk_timeout, desc);
lwi);
LASSERT(rc == 0 || rc == -ETIMEDOUT);
/* Wait again if we changed rq_deadline. */
+ rq_deadline = ACCESS_ONCE(req->rq_deadline);
deadline = start + bulk_timeout;
- if (deadline > req->rq_deadline)
- deadline = req->rq_deadline;
+ if (deadline > rq_deadline)
+ deadline = rq_deadline;
} while ((rc == -ETIMEDOUT) &&
(deadline > cfs_time_current_sec()));
if (rc == -ETIMEDOUT) {
DEBUG_REQ(D_ERROR, req, "timeout on bulk %s after %ld%+lds",
- bulk2type(desc), deadline - start,
+ bulk2type(req), deadline - start,
cfs_time_current_sec() - deadline);
ptlrpc_abort_bulk(desc);
} else if (exp->exp_failed) {
DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s",
- bulk2type(desc));
+ bulk2type(req));
rc = -ENOTCONN;
ptlrpc_abort_bulk(desc);
} else if (exp->exp_conn_cnt >
lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s",
- bulk2type(desc));
+ bulk2type(req));
/* We don't reply anyway. */
rc = -ETIMEDOUT;
ptlrpc_abort_bulk(desc);
- } else if (desc->bd_failure ||
- desc->bd_nob_transferred != desc->bd_nob) {
- DEBUG_REQ(D_ERROR, req, "%s bulk %s %d(%d)",
- desc->bd_failure ? "network error on" : "truncated",
- bulk2type(desc), desc->bd_nob_transferred,
- desc->bd_nob);
- /* XXX Should this be a different errno? */
+ } else if (desc->bd_failure) {
+ DEBUG_REQ(D_ERROR, req, "network error on bulk %s",
+ bulk2type(req));
+ /* XXX should this be a different errno? */
rc = -ETIMEDOUT;
- } else if (desc->bd_type == BULK_GET_SINK) {
- rc = sptlrpc_svc_unwrap_bulk(req, desc);
+ } else {
+ if (req->rq_bulk_write)
+ rc = sptlrpc_svc_unwrap_bulk(req, desc);
+ if (rc == 0 && desc->bd_nob_transferred != desc->bd_nob) {
+ DEBUG_REQ(D_ERROR, req, "truncated bulk %s %d(%d)",
+ bulk2type(req), desc->bd_nob_transferred,
+ desc->bd_nob);
+ /* XXX should this be a different errno? */
+ rc = -ETIMEDOUT;
+ }
}
RETURN(rc);