* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2013, Intel Corporation.
+ * Copyright (c) 2010, 2014, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define DEBUG_SUBSYSTEM S_LDLM
-#ifdef __KERNEL__
-# include <libcfs/libcfs.h>
-#else
-# include <liblustre.h>
-#endif
+#include <linux/kthread.h>
+#include <libcfs/libcfs.h>
#include <obd.h>
#include <obd_class.h>
#include <lustre_dlm.h>
}
spin_lock(&imp->imp_lock);
- cfs_list_for_each_entry(item, &imp->imp_conn_list, oic_item) {
+ list_for_each_entry(item, &imp->imp_conn_list, oic_item) {
if (obd_uuid_equals(uuid, &item->oic_uuid)) {
if (priority) {
- cfs_list_del(&item->oic_item);
- cfs_list_add(&item->oic_item,
+ list_del(&item->oic_item);
+ list_add(&item->oic_item,
&imp->imp_conn_list);
item->oic_last_attempt = 0;
}
imp_conn->oic_uuid = *uuid;
imp_conn->oic_last_attempt = 0;
if (priority)
- cfs_list_add(&imp_conn->oic_item, &imp->imp_conn_list);
+ list_add(&imp_conn->oic_item, &imp->imp_conn_list);
else
- cfs_list_add_tail(&imp_conn->oic_item,
+ list_add_tail(&imp_conn->oic_item,
&imp->imp_conn_list);
CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n",
imp, imp->imp_obd->obd_name, uuid->uuid,
ENTRY;
spin_lock(&imp->imp_lock);
- if (cfs_list_empty(&imp->imp_conn_list)) {
+ if (list_empty(&imp->imp_conn_list)) {
LASSERT(!imp->imp_connection);
GOTO(out, rc);
}
- cfs_list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
+ list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
if (!obd_uuid_equals(uuid, &imp_conn->oic_uuid))
continue;
LASSERT(imp_conn->oic_conn);
}
}
- cfs_list_del(&imp_conn->oic_item);
+ list_del(&imp_conn->oic_item);
ptlrpc_connection_put(imp_conn->oic_conn);
OBD_FREE(imp_conn, sizeof(*imp_conn));
CDEBUG(D_HA, "imp %p@%s: remove connection %s\n",
ENTRY;
spin_lock(&imp->imp_lock);
- cfs_list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
/* Check if conn UUID does have this peer NID. */
if (class_check_uuid(&conn->oic_uuid, peer)) {
*uuid = conn->oic_uuid;
min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
sizeof(server_uuid)));
- cli->cl_dirty = 0;
- cli->cl_avail_grant = 0;
- /* FIXME: Should limit this for the sum of all cl_dirty_max. */
- cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
- if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8)
- cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3);
- CFS_INIT_LIST_HEAD(&cli->cl_cache_waiters);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_ready_list);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_write_list);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_read_list);
- client_obd_list_lock_init(&cli->cl_loi_list_lock);
+ cli->cl_dirty_pages = 0;
+ cli->cl_avail_grant = 0;
+ /* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */
+ /* cl_dirty_max_pages may be changed at connect time in
+ * ptlrpc_connect_interpret(). */
+ client_adjust_max_dirty(cli);
+ INIT_LIST_HEAD(&cli->cl_cache_waiters);
+ INIT_LIST_HEAD(&cli->cl_loi_ready_list);
+ INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
+ INIT_LIST_HEAD(&cli->cl_loi_write_list);
+ INIT_LIST_HEAD(&cli->cl_loi_read_list);
+ spin_lock_init(&cli->cl_loi_list_lock);
atomic_set(&cli->cl_pending_w_pages, 0);
atomic_set(&cli->cl_pending_r_pages, 0);
cli->cl_r_in_flight = 0;
spin_lock_init(&cli->cl_write_offset_hist.oh_lock);
/* lru for osc. */
- CFS_INIT_LIST_HEAD(&cli->cl_lru_osc);
+ INIT_LIST_HEAD(&cli->cl_lru_osc);
atomic_set(&cli->cl_lru_shrinkers, 0);
- atomic_set(&cli->cl_lru_busy, 0);
- atomic_set(&cli->cl_lru_in_list, 0);
- CFS_INIT_LIST_HEAD(&cli->cl_lru_list);
- client_obd_list_lock_init(&cli->cl_lru_list_lock);
- atomic_set(&cli->cl_unstable_count, 0);
+ atomic_long_set(&cli->cl_lru_busy, 0);
+ atomic_long_set(&cli->cl_lru_in_list, 0);
+ INIT_LIST_HEAD(&cli->cl_lru_list);
+ spin_lock_init(&cli->cl_lru_list_lock);
+ atomic_long_set(&cli->cl_unstable_count, 0);
init_waitqueue_head(&cli->cl_destroy_waitq);
atomic_set(&cli->cl_destroy_in_flight, 0);
*exp = NULL;
down_write(&cli->cl_sem);
- if (cli->cl_conn_count > 0 )
+ if (cli->cl_conn_count > 0)
GOTO(out_sem, rc = -EALREADY);
rc = class_connect(&conn, obd, cluuid);
imp = cli->cl_import;
down_write(&cli->cl_sem);
- CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name,
- cli->cl_conn_count);
+ CDEBUG(D_INFO, "disconnect %s - %zu\n", obd->obd_name,
+ cli->cl_conn_count);
- if (!cli->cl_conn_count) {
+ if (cli->cl_conn_count == 0) {
CERROR("disconnecting disconnected device (%s)\n",
obd->obd_name);
GOTO(out_disconnect, rc = -EINVAL);
}
cli->cl_conn_count--;
- if (cli->cl_conn_count)
+ if (cli->cl_conn_count != 0)
GOTO(out_disconnect, rc = 0);
/* Mark import deactivated now, so we don't try to reconnect if any
/* complete all outstanding replies */
spin_lock(&exp->exp_lock);
- while (!cfs_list_empty(&exp->exp_outstanding_replies)) {
+ while (!list_empty(&exp->exp_outstanding_replies)) {
struct ptlrpc_reply_state *rs =
- cfs_list_entry(exp->exp_outstanding_replies.next,
+ list_entry(exp->exp_outstanding_replies.next,
struct ptlrpc_reply_state, rs_exp_list);
struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
spin_lock(&svcpt->scp_rep_lock);
- cfs_list_del_init(&rs->rs_exp_list);
+ list_del_init(&rs->rs_exp_list);
spin_lock(&rs->rs_lock);
ptlrpc_schedule_difficult_reply(rs);
spin_unlock(&rs->rs_lock);
spin_unlock(&exp->exp_lock);
class_export_cb_put(exp);
}
-EXPORT_SYMBOL(target_client_add_cb);
-#ifdef __KERNEL__
static void
check_and_start_recovery_timer(struct obd_device *obd,
struct ptlrpc_request *req, int new_client);
-#else
-static inline void
-check_and_start_recovery_timer(struct obd_device *obd,
- struct ptlrpc_request *req, int new_client)
-{
-}
-#endif
int target_handle_connect(struct ptlrpc_request *req)
{
if (rc)
GOTO(out, rc);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
+ /* Don't allow clients to connect that are using old 1.8 format
+ * protocol conventions (LUSTRE_MSG_MAGIC_v1, !MSGHDR_CKSUM_INCOMPAT18,
+ * ldlm_flock_policy_wire format, MDT_ATTR_xTIME_SET, etc). The
+ * FULL20 flag should be set on all connections since 2.0, but no
+ * longer affects behaviour.
+ *
+ * Later this check will be disabled and the flag can be retired
+ * completely once interop with 3.0 is no longer needed.
+ */
+ if (!(data->ocd_connect_flags & OBD_CONNECT_FULL20))
+ GOTO(out, rc = -EPROTO);
+#endif
+
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
if (data->ocd_version < LUSTRE_VERSION_CODE -
LUSTRE_VERSION_ALLOWED_OFFSET ||
}
}
- if ((lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) &&
- (data->ocd_connect_flags & OBD_CONNECT_MDS))
- mds_conn = true;
-
- if ((data->ocd_connect_flags & OBD_CONNECT_LIGHTWEIGHT) != 0)
- lw_client = true;
+ if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) {
+ mds_conn = (data->ocd_connect_flags & OBD_CONNECT_MDS) != 0;
+ lw_client = (data->ocd_connect_flags &
+ OBD_CONNECT_LIGHTWEIGHT) != 0;
+
+ /* OBD_CONNECT_MNE_SWAB is defined as OBD_CONNECT_MDS_MDS
+ * for Imperative Recovery connection from MGC to MGS.
+ *
+ * Via check OBD_CONNECT_FID, we can distinguish whether
+ * the OBD_CONNECT_MDS_MDS/OBD_CONNECT_MNE_SWAB is from
+ * MGC or MDT. */
+ if (!lw_client &&
+ (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) &&
+ (data->ocd_connect_flags & OBD_CONNECT_FID) &&
+ (data->ocd_connect_flags & OBD_CONNECT_VERSION)) {
+ __u32 major = OBD_OCD_VERSION_MAJOR(data->ocd_version);
+ __u32 minor = OBD_OCD_VERSION_MINOR(data->ocd_version);
+ __u32 patch = OBD_OCD_VERSION_PATCH(data->ocd_version);
+
+ /* We do not support the MDT-MDT interoperations with
+ * different version MDT because of protocol changes. */
+ if (unlikely(major != LUSTRE_MAJOR ||
+ minor != LUSTRE_MINOR ||
+ abs(patch - LUSTRE_PATCH) > 3)) {
+ LCONSOLE_WARN("%s (%u.%u.%u.%u) refused the "
+ "connection from different version MDT "
+ "(%d.%d.%d.%d) %s %s\n",
+ target->obd_name, LUSTRE_MAJOR,
+ LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX,
+ major, minor, patch,
+ OBD_OCD_VERSION_FIX(data->ocd_version),
+ libcfs_nid2str(req->rq_peer.nid), str);
+
+ GOTO(out, rc = -EPROTO);
+ }
+ }
+ }
/* lctl gets a backstage, all-access pass. */
if (obd_uuid_equals(&cluuid, &target->obd_uuid))
class_export_put(export);
export = NULL;
rc = -EALREADY;
- } else if (mds_conn && export->exp_connection) {
+ } else if ((mds_conn || lw_client) && export->exp_connection != NULL) {
spin_unlock(&export->exp_lock);
- if (req->rq_peer.nid != export->exp_connection->c_peer.nid)
- /* MDS reconnected after failover. */
- LCONSOLE_WARN("%s: Received MDS connection from "
+ if (req->rq_peer.nid != export->exp_connection->c_peer.nid)
+ /* MDS or LWP reconnected after failover. */
+ LCONSOLE_WARN("%s: Received %s connection from "
"%s, removing former export from %s\n",
- target->obd_name, libcfs_nid2str(req->rq_peer.nid),
+ target->obd_name, mds_conn ? "MDS" : "LWP",
+ libcfs_nid2str(req->rq_peer.nid),
libcfs_nid2str(export->exp_connection->c_peer.nid));
else
/* New MDS connection from the same NID. */
- LCONSOLE_WARN("%s: Received new MDS connection from "
- "%s, removing former export from same NID\n",
- target->obd_name, libcfs_nid2str(req->rq_peer.nid));
+ LCONSOLE_WARN("%s: Received new %s connection from "
+ "%s, removing former export from same NID\n",
+ target->obd_name, mds_conn ? "MDS" : "LWP",
+ libcfs_nid2str(req->rq_peer.nid));
class_fail_export(export);
class_export_put(export);
export = NULL;
t = cfs_timer_deadline(&target->obd_recovery_timer);
t = cfs_time_sub(t, cfs_time_current());
t = cfs_duration_sec(t);
- LCONSOLE_WARN("%s: Denying connection for new client "
- "%s (at %s), waiting for all %d known "
- "clients (%d recovered, %d in progress, "
- "and %d evicted) to recover in %d:%.02d\n",
+ LCONSOLE_WARN("%s: Denying connection for new client %s"
+ "(at %s), waiting for %d known clients "
+ "(%d recovered, %d in progress, and %d "
+ "evicted) to recover in %d:%.02d\n",
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid), k,
c - i, i, s, (int)t / 60,
(int)t % 60);
- rc = -EBUSY;
- } else {
+ rc = -EBUSY;
+ } else {
dont_check_exports:
- rc = obd_connect(req->rq_svc_thread->t_env,
- &export, target, &cluuid, data,
- client_nid);
+ rc = obd_connect(req->rq_svc_thread->t_env,
+ &export, target, &cluuid, data,
+ client_nid);
if (mds_conn && OBD_FAIL_CHECK(OBD_FAIL_TGT_RCVG_FLAG))
lustre_msg_add_op_flags(req->rq_repmsg,
- MSG_CONNECT_RECOVERING);
- if (rc == 0)
- conn.cookie = export->exp_handle.h_cookie;
- }
- } else {
- rc = obd_reconnect(req->rq_svc_thread->t_env,
- export, target, &cluuid, data, client_nid);
- }
- if (rc)
- GOTO(out, rc);
-
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
- /* 2.2.0 clients always swab nidtbl entries due to a bug, so server
- * will do the swabbing for if the client is using the same endianness.
- *
- * This fixup is version-limited, because we don't want to carry the
- * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we need
- * interop with unpatched 2.2 clients. For newer clients, servers
- * will never do MNE swabbing, let the client handle that. LU-1644 */
- export->exp_need_mne_swab = !ptlrpc_req_need_swab(req) &&
- !(data->ocd_connect_flags & OBD_CONNECT_MNE_SWAB);
-#else
-#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and exp_need_mne_swab"
-#endif
+ MSG_CONNECT_RECOVERING);
+ if (rc == 0)
+ conn.cookie = export->exp_handle.h_cookie;
+ }
+ } else {
+ rc = obd_reconnect(req->rq_svc_thread->t_env,
+ export, target, &cluuid, data, client_nid);
+ }
+ if (rc)
+ GOTO(out, rc);
- LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
- data->ocd_instance = target->u.obt.obt_instance;
+ LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
+ data->ocd_instance = target->u.obt.obt_instance;
/* Return only the parts of obd_connect_data that we understand, so the
* client knows that we don't understand the rest. */
spin_unlock(&export->exp_lock);
spin_lock(&target->obd_dev_lock);
- cfs_list_del_init(&export->exp_obd_chain_timed);
+ list_del_init(&export->exp_obd_chain_timed);
spin_unlock(&target->obd_dev_lock);
} else {
spin_unlock(&export->exp_lock);
if (export->exp_connection != NULL) {
/* Check to see if connection came from another NID. */
if ((export->exp_connection->c_peer.nid != req->rq_peer.nid) &&
- !cfs_hlist_unhashed(&export->exp_nid_hash))
+ !hlist_unhashed(&export->exp_nid_hash))
cfs_hash_del(export->exp_obd->obd_nid_hash,
&export->exp_connection->c_peer.nid,
&export->exp_nid_hash);
export->exp_connection = ptlrpc_connection_get(req->rq_peer,
req->rq_self,
&remote_uuid);
- if (cfs_hlist_unhashed(&export->exp_nid_hash)) {
+ if (hlist_unhashed(&export->exp_nid_hash)) {
cfs_hash_add(export->exp_obd->obd_nid_hash,
&export->exp_connection->c_peer.nid,
&export->exp_nid_hash);
* ptlrpc_handle_server_req_in->lustre_unpack_msg(). */
revimp->imp_msg_magic = req->rq_reqmsg->lm_magic;
- if ((data->ocd_connect_flags & OBD_CONNECT_AT) &&
- (revimp->imp_msg_magic != LUSTRE_MSG_MAGIC_V1))
+ if (data->ocd_connect_flags & OBD_CONNECT_AT)
revimp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
else
revimp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
- if ((data->ocd_connect_flags & OBD_CONNECT_FULL20) &&
- (revimp->imp_msg_magic != LUSTRE_MSG_MAGIC_V1))
- revimp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
- else
- revimp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+ revimp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
rc = sptlrpc_import_sec_adapt(revimp, req->rq_svc_ctx, &req->rq_flvr);
if (rc) {
req->rq_status = rc;
RETURN(rc);
}
-EXPORT_SYMBOL(target_handle_connect);
int target_handle_disconnect(struct ptlrpc_request *req)
{
RETURN(0);
}
-EXPORT_SYMBOL(target_handle_disconnect);
void target_destroy_export(struct obd_export *exp)
{
static void target_request_copy_get(struct ptlrpc_request *req)
{
class_export_rpc_inc(req->rq_export);
- LASSERT(cfs_list_empty(&req->rq_list));
- CFS_INIT_LIST_HEAD(&req->rq_replay_list);
+ LASSERT(list_empty(&req->rq_list));
+ INIT_LIST_HEAD(&req->rq_replay_list);
/* Increase refcount to keep request in queue. */
atomic_inc(&req->rq_refcount);
static void target_request_copy_put(struct ptlrpc_request *req)
{
- LASSERT(cfs_list_empty(&req->rq_replay_list));
+ LASSERT(list_empty(&req->rq_replay_list));
LASSERT_ATOMIC_POS(&req->rq_export->exp_replay_count);
atomic_dec(&req->rq_export->exp_replay_count);
LASSERT(exp);
spin_lock(&exp->exp_lock);
- cfs_list_for_each_entry(reqiter, &exp->exp_req_replay_queue,
+ list_for_each_entry(reqiter, &exp->exp_req_replay_queue,
rq_replay_list) {
if (lustre_msg_get_transno(reqiter->rq_reqmsg) == transno) {
dup = 1;
CERROR("invalid flags %x of resent replay\n",
lustre_msg_get_flags(req->rq_reqmsg));
} else {
- cfs_list_add_tail(&req->rq_replay_list,
+ list_add_tail(&req->rq_replay_list,
&exp->exp_req_replay_queue);
}
static void target_exp_dequeue_req_replay(struct ptlrpc_request *req)
{
- LASSERT(!cfs_list_empty(&req->rq_replay_list));
+ LASSERT(!list_empty(&req->rq_replay_list));
LASSERT(req->rq_export);
spin_lock(&req->rq_export->exp_lock);
- cfs_list_del_init(&req->rq_replay_list);
+ list_del_init(&req->rq_replay_list);
spin_unlock(&req->rq_export->exp_lock);
}
-#ifdef __KERNEL__
static void target_finish_recovery(struct obd_device *obd)
{
ENTRY;
ldlm_reprocess_all_ns(obd->obd_namespace);
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_req_replay_queue) ||
- !cfs_list_empty(&obd->obd_lock_replay_queue) ||
- !cfs_list_empty(&obd->obd_final_req_queue)) {
+ if (!list_empty(&obd->obd_req_replay_queue) ||
+ !list_empty(&obd->obd_lock_replay_queue) ||
+ !list_empty(&obd->obd_final_req_queue)) {
CERROR("%s: Recovery queues ( %s%s%s) are not empty\n",
obd->obd_name,
- cfs_list_empty(&obd->obd_req_replay_queue) ? "" : "req ",
- cfs_list_empty(&obd->obd_lock_replay_queue) ? \
+ list_empty(&obd->obd_req_replay_queue) ? "" : "req ",
+ list_empty(&obd->obd_lock_replay_queue) ? \
"" : "lock ",
- cfs_list_empty(&obd->obd_final_req_queue) ? \
+ list_empty(&obd->obd_final_req_queue) ? \
"" : "final ");
spin_unlock(&obd->obd_recovery_task_lock);
LBUG();
static void abort_req_replay_queue(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- cfs_list_t abort_list;
+ struct list_head abort_list;
- CFS_INIT_LIST_HEAD(&abort_list);
+ INIT_LIST_HEAD(&abort_list);
spin_lock(&obd->obd_recovery_task_lock);
- cfs_list_splice_init(&obd->obd_req_replay_queue, &abort_list);
+ list_splice_init(&obd->obd_req_replay_queue, &abort_list);
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_list_for_each_entry_safe(req, n, &abort_list, rq_list) {
+ list_for_each_entry_safe(req, n, &abort_list, rq_list) {
DEBUG_REQ(D_WARNING, req, "aborted:");
req->rq_status = -ENOTCONN;
if (ptlrpc_error(req)) {
static void abort_lock_replay_queue(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- cfs_list_t abort_list;
+ struct list_head abort_list;
- CFS_INIT_LIST_HEAD(&abort_list);
+ INIT_LIST_HEAD(&abort_list);
spin_lock(&obd->obd_recovery_task_lock);
- cfs_list_splice_init(&obd->obd_lock_replay_queue, &abort_list);
+ list_splice_init(&obd->obd_lock_replay_queue, &abort_list);
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_list_for_each_entry_safe(req, n, &abort_list, rq_list){
+ list_for_each_entry_safe(req, n, &abort_list, rq_list) {
DEBUG_REQ(D_ERROR, req, "aborted:");
req->rq_status = -ENOTCONN;
if (ptlrpc_error(req)) {
void target_cleanup_recovery(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- cfs_list_t clean_list;
+ struct list_head clean_list;
ENTRY;
- CFS_INIT_LIST_HEAD(&clean_list);
+ INIT_LIST_HEAD(&clean_list);
spin_lock(&obd->obd_dev_lock);
if (!obd->obd_recovering) {
spin_unlock(&obd->obd_dev_lock);
spin_lock(&obd->obd_recovery_task_lock);
target_cancel_recovery_timer(obd);
- cfs_list_splice_init(&obd->obd_req_replay_queue, &clean_list);
+ list_splice_init(&obd->obd_req_replay_queue, &clean_list);
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_list_for_each_entry_safe(req, n, &clean_list, rq_list) {
- LASSERT(req->rq_reply_state == 0);
+ list_for_each_entry_safe(req, n, &clean_list, rq_list) {
+ LASSERT(req->rq_reply_state == NULL);
target_exp_dequeue_req_replay(req);
target_request_copy_put(req);
}
spin_lock(&obd->obd_recovery_task_lock);
- cfs_list_splice_init(&obd->obd_lock_replay_queue, &clean_list);
- cfs_list_splice_init(&obd->obd_final_req_queue, &clean_list);
+ list_splice_init(&obd->obd_lock_replay_queue, &clean_list);
+ list_splice_init(&obd->obd_final_req_queue, &clean_list);
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_list_for_each_entry_safe(req, n, &clean_list, rq_list){
- LASSERT(req->rq_reply_state == 0);
+ list_for_each_entry_safe(req, n, &clean_list, rq_list) {
+ LASSERT(req->rq_reply_state == NULL);
target_request_copy_put(req);
}
CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name);
cfs_timer_disarm(&obd->obd_recovery_timer);
}
-EXPORT_SYMBOL(target_cancel_recovery_timer);
static void target_start_recovery_timer(struct obd_device *obd)
{
to += drt - left;
} else if (!extend && (drt > to)) {
to = drt;
- /* reduce drt by already passed time */
- drt -= obd->obd_recovery_timeout - left;
}
if (to > obd->obd_recovery_time_hard)
to = obd->obd_recovery_time_hard;
- if (obd->obd_recovery_timeout < to ||
- obd->obd_recovery_timeout == obd->obd_recovery_time_hard) {
+ if (obd->obd_recovery_timeout < to) {
obd->obd_recovery_timeout = to;
- cfs_timer_arm(&obd->obd_recovery_timer,
- cfs_time_shift(drt));
+ end = obd->obd_recovery_start + to;
+ cfs_timer_arm(&obd->obd_recovery_timer,
+ cfs_time_shift(end - now));
}
spin_unlock(&obd->obd_dev_lock);
CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n",
- obd->obd_name, (unsigned)drt);
+ obd->obd_name, (unsigned)cfs_time_sub(end, now));
}
/* Reset the timer with each new client connection */
ENTRY;
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_req_replay_queue)) {
- req = cfs_list_entry(obd->obd_req_replay_queue.next,
+ if (!list_empty(&obd->obd_req_replay_queue)) {
+ req = list_entry(obd->obd_req_replay_queue.next,
struct ptlrpc_request, rq_list);
req_transno = lustre_msg_get_transno(req->rq_reqmsg);
} else {
int wake_up = 0;
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_lock_replay_queue)) {
+ if (!list_empty(&obd->obd_lock_replay_queue)) {
CDEBUG(D_HA, "waking for next lock\n");
wake_up = 1;
} else if (atomic_read(&obd->obd_lock_replay_clients) == 0) {
int (*health_check)(struct obd_export *))
{
repeat:
- wait_event(obd->obd_next_transno_waitq, check_routine(obd));
+ if ((obd->obd_recovery_start != 0) && (cfs_time_current_sec() >=
+ (obd->obd_recovery_start + obd->obd_recovery_time_hard))) {
+ CWARN("recovery is aborted by hard timeout\n");
+ obd->obd_abort_recovery = 1;
+ }
+
+ while (wait_event_timeout(obd->obd_next_transno_waitq,
+ check_routine(obd),
+ msecs_to_jiffies(60 * MSEC_PER_SEC)) == 0)
+ /* wait indefinitely for event, but don't trigger watchdog */;
+
if (obd->obd_abort_recovery) {
CWARN("recovery is aborted, evict exports in recovery\n");
/** evict exports which didn't finish recovery yet */
static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd)
{
- struct ptlrpc_request *req = NULL;
- ENTRY;
+ struct ptlrpc_request *req = NULL;
+ ENTRY;
- CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
- obd->obd_next_recovery_transno);
+ CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
+ obd->obd_next_recovery_transno);
- if (target_recovery_overseer(obd, check_for_next_transno,
- exp_req_replay_healthy)) {
- abort_req_replay_queue(obd);
- abort_lock_replay_queue(obd);
- }
+ CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+ /** It is needed to extend recovery window above recovery_time_soft.
+ * Extending is possible only in the end of recovery window
+ * (see more details in handle_recovery_req).
+ */
+ CFS_FAIL_TIMEOUT_MS(OBD_FAIL_TGT_REPLAY_DELAY, 300);
+
+ if (target_recovery_overseer(obd, check_for_next_transno,
+ exp_req_replay_healthy)) {
+ abort_req_replay_queue(obd);
+ abort_lock_replay_queue(obd);
+ }
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_req_replay_queue)) {
- req = cfs_list_entry(obd->obd_req_replay_queue.next,
+ if (!list_empty(&obd->obd_req_replay_queue)) {
+ req = list_entry(obd->obd_req_replay_queue.next,
struct ptlrpc_request, rq_list);
- cfs_list_del_init(&req->rq_list);
+ list_del_init(&req->rq_list);
obd->obd_requests_queued_for_recovery--;
spin_unlock(&obd->obd_recovery_task_lock);
} else {
spin_unlock(&obd->obd_recovery_task_lock);
- LASSERT(cfs_list_empty(&obd->obd_req_replay_queue));
+ LASSERT(list_empty(&obd->obd_req_replay_queue));
LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0);
/** evict exports failed VBR */
class_disconnect_stale_exports(obd, exp_vbr_healthy);
abort_lock_replay_queue(obd);
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_lock_replay_queue)) {
- req = cfs_list_entry(obd->obd_lock_replay_queue.next,
+ if (!list_empty(&obd->obd_lock_replay_queue)) {
+ req = list_entry(obd->obd_lock_replay_queue.next,
struct ptlrpc_request, rq_list);
- cfs_list_del_init(&req->rq_list);
+ list_del_init(&req->rq_list);
spin_unlock(&obd->obd_recovery_task_lock);
} else {
spin_unlock(&obd->obd_recovery_task_lock);
- LASSERT(cfs_list_empty(&obd->obd_lock_replay_queue));
+ LASSERT(list_empty(&obd->obd_lock_replay_queue));
LASSERT(atomic_read(&obd->obd_lock_replay_clients) == 0);
/** evict exports failed VBR */
class_disconnect_stale_exports(obd, exp_vbr_healthy);
struct ptlrpc_request *req = NULL;
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_final_req_queue)) {
- req = cfs_list_entry(obd->obd_final_req_queue.next,
+ if (!list_empty(&obd->obd_final_req_queue)) {
+ req = list_entry(obd->obd_final_req_queue.next,
struct ptlrpc_request, rq_list);
- cfs_list_del_init(&req->rq_list);
+ list_del_init(&req->rq_list);
spin_unlock(&obd->obd_recovery_task_lock);
if (req->rq_export->exp_in_recovery) {
spin_lock(&req->rq_export->exp_lock);
return req;
}
-static int handle_recovery_req(struct ptlrpc_thread *thread,
- struct ptlrpc_request *req,
- svc_handler_t handler)
+static void handle_recovery_req(struct ptlrpc_thread *thread,
+ struct ptlrpc_request *req,
+ svc_handler_t handler)
{
- int rc;
-
ENTRY;
/**
* it after that, discard such request silently
*/
if (req->rq_export->exp_disconnected)
- GOTO(reqcopy_put, rc = 0);
+ RETURN_EXIT;
req->rq_session.lc_thread = thread;
req->rq_svc_thread = thread;
}
extend_recovery_timer(class_exp2obd(req->rq_export), to, true);
}
-reqcopy_put:
- RETURN(rc);
+ EXIT;
}
static int target_recovery_thread(void *arg)
* The third stage: reply on final pings, at this moment all clients
* must have request in final queue
*/
+ CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_RECONNECT, cfs_fail_val);
CDEBUG(D_INFO, "3: final stage - process recovery completion pings\n");
/** Update server last boot epoch */
tgt_boot_epoch_update(lut);
libcfs_nid2str(req->rq_peer.nid));
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
- target_request_copy_put(req);
- }
+ /* Because the waiting client can not send ping to server,
+ * so we need refresh the last_request_time, to avoid the
+ * export is being evicted */
+ ptlrpc_update_export_timer(req->rq_export, 0);
+ target_request_copy_put(req);
+ }
- delta = (jiffies - delta) / HZ;
+ delta = jiffies_to_msecs(jiffies - delta) / MSEC_PER_SEC;
CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n",
delta, obd->obd_replayed_requests, obd->obd_replayed_locks);
if (delta > OBD_RECOVERY_TIME_SOFT) {
}
EXPORT_SYMBOL(target_recovery_init);
-#endif /* __KERNEL__ */
static int target_process_req_flags(struct obd_device *obd,
struct ptlrpc_request *req)
int target_queue_recovery_request(struct ptlrpc_request *req,
struct obd_device *obd)
{
- cfs_list_t *tmp;
- int inserted = 0;
__u64 transno = lustre_msg_get_transno(req->rq_reqmsg);
- ENTRY;
+ struct ptlrpc_request *reqiter;
+ int inserted = 0;
+ ENTRY;
if (obd->obd_recovery_data.trd_processing_task == current_pid()) {
/* Processing the queue right now, don't re-add. */
wake_up(&obd->obd_next_transno_waitq);
spin_lock(&obd->obd_recovery_task_lock);
if (obd->obd_recovering) {
- cfs_list_add_tail(&req->rq_list,
+ list_add_tail(&req->rq_list,
&obd->obd_final_req_queue);
} else {
spin_unlock(&obd->obd_recovery_task_lock);
RETURN(-ENOTCONN);
}
LASSERT(req->rq_export->exp_lock_replay_needed);
- cfs_list_add_tail(&req->rq_list, &obd->obd_lock_replay_queue);
+ list_add_tail(&req->rq_list, &obd->obd_lock_replay_queue);
spin_unlock(&obd->obd_recovery_task_lock);
RETURN(0);
}
* buffers (eg mdt_body, ost_body etc) have NOT been swabbed. */
if (!transno) {
- CFS_INIT_LIST_HEAD(&req->rq_list);
+ INIT_LIST_HEAD(&req->rq_list);
DEBUG_REQ(D_HA, req, "not queueing");
RETURN(1);
}
spin_lock(&obd->obd_recovery_task_lock);
if (transno < obd->obd_next_recovery_transno) {
/* Processing the queue right now, don't re-add. */
- LASSERT(cfs_list_empty(&req->rq_list));
+ LASSERT(list_empty(&req->rq_list));
spin_unlock(&obd->obd_recovery_task_lock);
RETURN(1);
}
RETURN(0);
}
- /* XXX O(n^2) */
+ /* XXX O(n^2) */
spin_lock(&obd->obd_recovery_task_lock);
- LASSERT(obd->obd_recovering);
- cfs_list_for_each(tmp, &obd->obd_req_replay_queue) {
- struct ptlrpc_request *reqiter =
- cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
-
- if (lustre_msg_get_transno(reqiter->rq_reqmsg) > transno) {
- cfs_list_add_tail(&req->rq_list, &reqiter->rq_list);
- inserted = 1;
- break;
- }
+ LASSERT(obd->obd_recovering);
+ list_for_each_entry(reqiter, &obd->obd_req_replay_queue, rq_list) {
+ if (lustre_msg_get_transno(reqiter->rq_reqmsg) > transno) {
+ list_add_tail(&req->rq_list, &reqiter->rq_list);
+ inserted = 1;
+ goto added;
+ }
if (unlikely(lustre_msg_get_transno(reqiter->rq_reqmsg) ==
transno)) {
RETURN(0);
}
}
-
+added:
if (!inserted)
- cfs_list_add_tail(&req->rq_list, &obd->obd_req_replay_queue);
+ list_add_tail(&req->rq_list, &obd->obd_req_replay_queue);
obd->obd_requests_queued_for_recovery++;
spin_unlock(&obd->obd_recovery_task_lock);
wake_up(&obd->obd_next_transno_waitq);
RETURN(0);
}
-EXPORT_SYMBOL(target_queue_recovery_request);
int target_handle_ping(struct ptlrpc_request *req)
{
obd_ping(req->rq_svc_thread->t_env, req->rq_export);
return req_capsule_server_pack(&req->rq_pill);
}
-EXPORT_SYMBOL(target_handle_ping);
void target_committed_to_req(struct ptlrpc_request *req)
{
CDEBUG(D_INFO, "last_committed "LPU64", transno "LPU64", xid "LPU64"\n",
exp->exp_last_committed, req->rq_transno, req->rq_xid);
}
-EXPORT_SYMBOL(target_committed_to_req);
#endif /* HAVE_SERVER_SUPPORT */
RETURN(0);
}
-EXPORT_SYMBOL(target_pack_pool_reply);
-int target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id)
+static int target_send_reply_msg(struct ptlrpc_request *req,
+ int rc, int fail_id)
{
if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
DEBUG_REQ(D_ERROR, req, "dropping reply");
}
/* must be an export if locks saved */
- LASSERT (req->rq_export != NULL);
+ LASSERT(req->rq_export != NULL);
/* req/reply consistent */
LASSERT(rs->rs_svcpt == svcpt);
/* "fresh" reply */
- LASSERT (!rs->rs_scheduled);
- LASSERT (!rs->rs_scheduled_ever);
- LASSERT (!rs->rs_handled);
- LASSERT (!rs->rs_on_net);
- LASSERT (rs->rs_export == NULL);
- LASSERT (cfs_list_empty(&rs->rs_obd_list));
- LASSERT (cfs_list_empty(&rs->rs_exp_list));
+ LASSERT(!rs->rs_scheduled);
+ LASSERT(!rs->rs_scheduled_ever);
+ LASSERT(!rs->rs_handled);
+ LASSERT(!rs->rs_on_net);
+ LASSERT(rs->rs_export == NULL);
+ LASSERT(list_empty(&rs->rs_obd_list));
+ LASSERT(list_empty(&rs->rs_exp_list));
- exp = class_export_get (req->rq_export);
+ exp = class_export_get(req->rq_export);
/* disable reply scheduling while I'm setting up */
rs->rs_scheduled = 1;
rs->rs_transno, exp->exp_last_committed);
if (rs->rs_transno > exp->exp_last_committed) {
/* not committed already */
- cfs_list_add_tail(&rs->rs_obd_list,
+ list_add_tail(&rs->rs_obd_list,
&exp->exp_uncommitted_replies);
}
spin_unlock(&exp->exp_uncommitted_replies_lock);
spin_lock(&exp->exp_lock);
- cfs_list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies);
+ list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies);
spin_unlock(&exp->exp_lock);
netrc = target_send_reply_msg(req, rc, fail_id);
spin_lock(&rs->rs_lock);
if (rs->rs_transno <= exp->exp_last_committed ||
(!rs->rs_on_net && !rs->rs_no_ack) ||
- cfs_list_empty(&rs->rs_exp_list) || /* completed already */
- cfs_list_empty(&rs->rs_obd_list)) {
+ list_empty(&rs->rs_exp_list) || /* completed already */
+ list_empty(&rs->rs_obd_list)) {
CDEBUG(D_HA, "Schedule reply immediately\n");
ptlrpc_dispatch_difficult_reply(rs);
} else {
- cfs_list_add(&rs->rs_list, &svcpt->scp_rep_active);
+ list_add(&rs->rs_list, &svcpt->scp_rep_active);
rs->rs_scheduled = 0; /* allow notifier to schedule */
}
spin_unlock(&rs->rs_lock);
spin_unlock(&svcpt->scp_rep_lock);
EXIT;
}
-EXPORT_SYMBOL(target_send_reply);
ldlm_mode_t lck_compat_array[] = {
[LCK_EX] = LCK_COMPAT_EX,
switch (error) {
case ELDLM_OK:
+ case ELDLM_LOCK_MATCHED:
result = 0;
break;
case ELDLM_LOCK_CHANGED:
}
return error;
}
-EXPORT_SYMBOL(ldlm_errno2error);
#if LUSTRE_TRACKS_LOCK_EXP_REFS
void ldlm_dump_export_locks(struct obd_export *exp)
{
spin_lock(&exp->exp_locks_list_guard);
- if (!cfs_list_empty(&exp->exp_locks_list)) {
+ if (!list_empty(&exp->exp_locks_list)) {
struct ldlm_lock *lock;
CERROR("dumping locks for export %p,"
"ignore if the unmount doesn't hang\n", exp);
- cfs_list_for_each_entry(lock, &exp->exp_locks_list,
+ list_for_each_entry(lock, &exp->exp_locks_list,
l_exp_refs_link)
LDLM_ERROR(lock, "lock:");
}
RETURN(1);
}
-static inline char *bulk2type(struct ptlrpc_bulk_desc *desc)
+static inline const char *bulk2type(struct ptlrpc_request *req)
{
- return desc->bd_type == BULK_GET_SINK ? "GET" : "PUT";
+ if (req->rq_bulk_read)
+ return "READ";
+ if (req->rq_bulk_write)
+ return "WRITE";
+ return "UNKNOWN";
}
int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
{
struct ptlrpc_request *req = desc->bd_req;
time_t start = cfs_time_current_sec();
+ time_t deadline;
int rc = 0;
ENTRY;
*lwi = LWI_INTR(NULL, NULL);
rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
!atomic_read(&exp->exp_obd->
- obd_evict_inprogress),
+ obd_evict_inprogress),
lwi);
}
exp->exp_conn_cnt > lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
rc = -ENOTCONN;
} else {
- if (desc->bd_type == BULK_PUT_SINK)
+ if (req->rq_bulk_read)
rc = sptlrpc_svc_wrap_bulk(req, desc);
if (rc == 0)
rc = ptlrpc_start_bulk_transfer(desc);
if (rc < 0) {
DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d",
- bulk2type(desc), rc);
+ bulk2type(req), rc);
RETURN(rc);
}
RETURN(0);
}
+ /* limit actual bulk transfer to bulk_timeout seconds */
+ deadline = start + bulk_timeout;
+ if (deadline > req->rq_deadline)
+ deadline = req->rq_deadline;
+
do {
- long timeoutl = req->rq_deadline - cfs_time_current_sec();
+ long timeoutl = deadline - cfs_time_current_sec();
cfs_duration_t timeout = timeoutl <= 0 ?
CFS_TICK : cfs_time_seconds(timeoutl);
+ time_t rq_deadline;
*lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1),
target_bulk_timeout, desc);
lustre_msg_get_conn_cnt(req->rq_reqmsg),
lwi);
LASSERT(rc == 0 || rc == -ETIMEDOUT);
- /* Wait again if we changed deadline. */
+ /* Wait again if we changed rq_deadline. */
+ rq_deadline = ACCESS_ONCE(req->rq_deadline);
+ deadline = start + bulk_timeout;
+ if (deadline > rq_deadline)
+ deadline = rq_deadline;
} while ((rc == -ETIMEDOUT) &&
- (req->rq_deadline > cfs_time_current_sec()));
+ (deadline > cfs_time_current_sec()));
if (rc == -ETIMEDOUT) {
DEBUG_REQ(D_ERROR, req, "timeout on bulk %s after %ld%+lds",
- bulk2type(desc), req->rq_deadline - start,
- cfs_time_current_sec() - req->rq_deadline);
+ bulk2type(req), deadline - start,
+ cfs_time_current_sec() - deadline);
ptlrpc_abort_bulk(desc);
} else if (exp->exp_failed) {
DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s",
- bulk2type(desc));
+ bulk2type(req));
rc = -ENOTCONN;
ptlrpc_abort_bulk(desc);
} else if (exp->exp_conn_cnt >
lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s",
- bulk2type(desc));
+ bulk2type(req));
/* We don't reply anyway. */
rc = -ETIMEDOUT;
ptlrpc_abort_bulk(desc);
- } else if (desc->bd_failure ||
- desc->bd_nob_transferred != desc->bd_nob) {
- DEBUG_REQ(D_ERROR, req, "%s bulk %s %d(%d)",
- desc->bd_failure ? "network error on" : "truncated",
- bulk2type(desc), desc->bd_nob_transferred,
- desc->bd_nob);
- /* XXX Should this be a different errno? */
+ } else if (desc->bd_failure) {
+ DEBUG_REQ(D_ERROR, req, "network error on bulk %s",
+ bulk2type(req));
+ /* XXX should this be a different errno? */
rc = -ETIMEDOUT;
- } else if (desc->bd_type == BULK_GET_SINK) {
- rc = sptlrpc_svc_unwrap_bulk(req, desc);
+ } else {
+ if (req->rq_bulk_write)
+ rc = sptlrpc_svc_unwrap_bulk(req, desc);
+ if (rc == 0 && desc->bd_nob_transferred != desc->bd_nob) {
+ DEBUG_REQ(D_ERROR, req, "truncated bulk %s %d(%d)",
+ bulk2type(req), desc->bd_nob_transferred,
+ desc->bd_nob);
+ /* XXX should this be a different errno? */
+ rc = -ETIMEDOUT;
+ }
}
RETURN(rc);