* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2013, Intel Corporation.
+ * Copyright (c) 2010, 2014, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define DEBUG_SUBSYSTEM S_LDLM
-#ifdef __KERNEL__
-# include <libcfs/libcfs.h>
-#else
-# include <liblustre.h>
-#endif
+#include <linux/kthread.h>
+#include <libcfs/libcfs.h>
#include <obd.h>
#include <obd_class.h>
#include <lustre_dlm.h>
}
spin_lock(&imp->imp_lock);
- cfs_list_for_each_entry(item, &imp->imp_conn_list, oic_item) {
+ list_for_each_entry(item, &imp->imp_conn_list, oic_item) {
if (obd_uuid_equals(uuid, &item->oic_uuid)) {
if (priority) {
- cfs_list_del(&item->oic_item);
- cfs_list_add(&item->oic_item,
+ list_del(&item->oic_item);
+ list_add(&item->oic_item,
&imp->imp_conn_list);
item->oic_last_attempt = 0;
}
imp_conn->oic_uuid = *uuid;
imp_conn->oic_last_attempt = 0;
if (priority)
- cfs_list_add(&imp_conn->oic_item, &imp->imp_conn_list);
+ list_add(&imp_conn->oic_item, &imp->imp_conn_list);
else
- cfs_list_add_tail(&imp_conn->oic_item,
+ list_add_tail(&imp_conn->oic_item,
&imp->imp_conn_list);
CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n",
imp, imp->imp_obd->obd_name, uuid->uuid,
ENTRY;
spin_lock(&imp->imp_lock);
- if (cfs_list_empty(&imp->imp_conn_list)) {
+ if (list_empty(&imp->imp_conn_list)) {
LASSERT(!imp->imp_connection);
GOTO(out, rc);
}
- cfs_list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
+ list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) {
if (!obd_uuid_equals(uuid, &imp_conn->oic_uuid))
continue;
LASSERT(imp_conn->oic_conn);
}
}
- cfs_list_del(&imp_conn->oic_item);
+ list_del(&imp_conn->oic_item);
ptlrpc_connection_put(imp_conn->oic_conn);
OBD_FREE(imp_conn, sizeof(*imp_conn));
CDEBUG(D_HA, "imp %p@%s: remove connection %s\n",
ENTRY;
spin_lock(&imp->imp_lock);
- cfs_list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
+ list_for_each_entry(conn, &imp->imp_conn_list, oic_item) {
/* Check if conn UUID does have this peer NID. */
if (class_check_uuid(&conn->oic_uuid, peer)) {
*uuid = conn->oic_uuid;
}
init_rwsem(&cli->cl_sem);
- sema_init(&cli->cl_mgc_sem, 1);
+ mutex_init(&cli->cl_mgc_mutex);
cli->cl_conn_count = 0;
memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2),
min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2),
sizeof(server_uuid)));
- cli->cl_dirty = 0;
- cli->cl_avail_grant = 0;
- /* FIXME: Should limit this for the sum of all cl_dirty_max. */
- cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
- if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8)
- cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3);
- CFS_INIT_LIST_HEAD(&cli->cl_cache_waiters);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_ready_list);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_write_list);
- CFS_INIT_LIST_HEAD(&cli->cl_loi_read_list);
- client_obd_list_lock_init(&cli->cl_loi_list_lock);
- cfs_atomic_set(&cli->cl_pending_w_pages, 0);
- cfs_atomic_set(&cli->cl_pending_r_pages, 0);
+ cli->cl_dirty_pages = 0;
+ cli->cl_avail_grant = 0;
+ /* FIXME: Should limit this for the sum of all cl_dirty_max_pages. */
+ /* cl_dirty_max_pages may be changed at connect time in
+ * ptlrpc_connect_interpret(). */
+ client_adjust_max_dirty(cli);
+ INIT_LIST_HEAD(&cli->cl_cache_waiters);
+ INIT_LIST_HEAD(&cli->cl_loi_ready_list);
+ INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
+ INIT_LIST_HEAD(&cli->cl_loi_write_list);
+ INIT_LIST_HEAD(&cli->cl_loi_read_list);
+ spin_lock_init(&cli->cl_loi_list_lock);
+ atomic_set(&cli->cl_pending_w_pages, 0);
+ atomic_set(&cli->cl_pending_r_pages, 0);
cli->cl_r_in_flight = 0;
cli->cl_w_in_flight = 0;
spin_lock_init(&cli->cl_write_offset_hist.oh_lock);
/* lru for osc. */
- CFS_INIT_LIST_HEAD(&cli->cl_lru_osc);
- cfs_atomic_set(&cli->cl_lru_shrinkers, 0);
- cfs_atomic_set(&cli->cl_lru_busy, 0);
- cfs_atomic_set(&cli->cl_lru_in_list, 0);
- CFS_INIT_LIST_HEAD(&cli->cl_lru_list);
- client_obd_list_lock_init(&cli->cl_lru_list_lock);
- cfs_atomic_set(&cli->cl_unstable_count, 0);
+ INIT_LIST_HEAD(&cli->cl_lru_osc);
+ atomic_set(&cli->cl_lru_shrinkers, 0);
+ atomic_long_set(&cli->cl_lru_busy, 0);
+ atomic_long_set(&cli->cl_lru_in_list, 0);
+ INIT_LIST_HEAD(&cli->cl_lru_list);
+ spin_lock_init(&cli->cl_lru_list_lock);
+ atomic_long_set(&cli->cl_unstable_count, 0);
init_waitqueue_head(&cli->cl_destroy_waitq);
- cfs_atomic_set(&cli->cl_destroy_in_flight, 0);
+ atomic_set(&cli->cl_destroy_in_flight, 0);
#ifdef ENABLE_CHECKSUM
/* Turn on checksumming by default. */
cli->cl_checksum = 1;
- /*
- * The supported checksum types will be worked out at connect time
- * Set cl_chksum* to CRC32 for now to avoid returning screwed info
- * through procfs.
- */
- cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
+ /*
+ * The supported checksum types will be worked out at connect time
+ * Set cl_chksum* to CRC32 for now to avoid returning screwed info
+ * through procfs.
+ */
+ cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32;
#endif
- cfs_atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
+ atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
/* This value may be reduced at connect time in
* ptlrpc_connect_interpret() . We initialize it to only
cli->cl_chunkbits = PAGE_CACHE_SHIFT;
if (!strcmp(name, LUSTRE_MDC_NAME)) {
- cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
+ cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) {
cli->cl_max_rpcs_in_flight = 2;
} else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) {
cli->cl_max_rpcs_in_flight = 4;
} else {
if (osc_on_mdt(obddev->obd_name))
- cli->cl_max_rpcs_in_flight = MDS_OSC_MAX_RIF_DEFAULT;
+ cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_MAX;
else
- cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
+ cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT;
}
rc = ldlm_get_ref();
if (rc) {
*exp = NULL;
down_write(&cli->cl_sem);
- if (cli->cl_conn_count > 0 )
+ if (cli->cl_conn_count > 0)
GOTO(out_sem, rc = -EALREADY);
rc = class_connect(&conn, obd, cluuid);
LASSERT (imp->imp_state == LUSTRE_IMP_DISCON);
GOTO(out_ldlm, rc);
}
- LASSERT((*exp)->exp_connection);
+ LASSERT(*exp != NULL && (*exp)->exp_connection);
if (data) {
LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) ==
imp = cli->cl_import;
down_write(&cli->cl_sem);
- CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name,
- cli->cl_conn_count);
+ CDEBUG(D_INFO, "disconnect %s - %zu\n", obd->obd_name,
+ cli->cl_conn_count);
- if (!cli->cl_conn_count) {
+ if (cli->cl_conn_count == 0) {
CERROR("disconnecting disconnected device (%s)\n",
obd->obd_name);
GOTO(out_disconnect, rc = -EINVAL);
}
cli->cl_conn_count--;
- if (cli->cl_conn_count)
+ if (cli->cl_conn_count != 0)
GOTO(out_disconnect, rc = 0);
/* Mark import deactivated now, so we don't try to reconnect if any
/* complete all outstanding replies */
spin_lock(&exp->exp_lock);
- while (!cfs_list_empty(&exp->exp_outstanding_replies)) {
+ while (!list_empty(&exp->exp_outstanding_replies)) {
struct ptlrpc_reply_state *rs =
- cfs_list_entry(exp->exp_outstanding_replies.next,
+ list_entry(exp->exp_outstanding_replies.next,
struct ptlrpc_reply_state, rs_exp_list);
struct ptlrpc_service_part *svcpt = rs->rs_svcpt;
spin_lock(&svcpt->scp_rep_lock);
- cfs_list_del_init(&rs->rs_exp_list);
+ list_del_init(&rs->rs_exp_list);
spin_lock(&rs->rs_lock);
ptlrpc_schedule_difficult_reply(rs);
spin_unlock(&rs->rs_lock);
spin_unlock(&exp->exp_lock);
class_export_cb_put(exp);
}
-EXPORT_SYMBOL(target_client_add_cb);
-#ifdef __KERNEL__
static void
check_and_start_recovery_timer(struct obd_device *obd,
struct ptlrpc_request *req, int new_client);
-#else
-static inline void
-check_and_start_recovery_timer(struct obd_device *obd,
- struct ptlrpc_request *req, int new_client)
-{
-}
-#endif
int target_handle_connect(struct ptlrpc_request *req)
{
if (rc)
GOTO(out, rc);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
+ /* Don't allow clients to connect that are using old 1.8 format
+ * protocol conventions (LUSTRE_MSG_MAGIC_v1, !MSGHDR_CKSUM_INCOMPAT18,
+ * ldlm_flock_policy_wire format, MDT_ATTR_xTIME_SET, etc). The
+ * FULL20 flag should be set on all connections since 2.0, but no
+ * longer affects behaviour.
+ *
+ * Later this check will be disabled and the flag can be retired
+ * completely once interop with 3.0 is no longer needed.
+ */
+ if (!(data->ocd_connect_flags & OBD_CONNECT_FULL20))
+ GOTO(out, rc = -EPROTO);
+#endif
+
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
if (data->ocd_version < LUSTRE_VERSION_CODE -
LUSTRE_VERSION_ALLOWED_OFFSET ||
}
}
- if ((lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) &&
- (data->ocd_connect_flags & OBD_CONNECT_MDS))
- mds_conn = true;
-
- if ((data->ocd_connect_flags & OBD_CONNECT_LIGHTWEIGHT) != 0)
- lw_client = true;
+ if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) {
+ mds_conn = (data->ocd_connect_flags & OBD_CONNECT_MDS) != 0;
+ lw_client = (data->ocd_connect_flags &
+ OBD_CONNECT_LIGHTWEIGHT) != 0;
+
+ /* OBD_CONNECT_MNE_SWAB is defined as OBD_CONNECT_MDS_MDS
+ * for Imperative Recovery connection from MGC to MGS.
+ *
+ * Via check OBD_CONNECT_FID, we can distinguish whether
+ * the OBD_CONNECT_MDS_MDS/OBD_CONNECT_MNE_SWAB is from
+ * MGC or MDT. */
+ if (!lw_client &&
+ (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) &&
+ (data->ocd_connect_flags & OBD_CONNECT_FID) &&
+ (data->ocd_connect_flags & OBD_CONNECT_VERSION)) {
+ __u32 major = OBD_OCD_VERSION_MAJOR(data->ocd_version);
+ __u32 minor = OBD_OCD_VERSION_MINOR(data->ocd_version);
+ __u32 patch = OBD_OCD_VERSION_PATCH(data->ocd_version);
+
+ /* We do not support the MDT-MDT interoperations with
+ * different version MDT because of protocol changes. */
+ if (unlikely(major != LUSTRE_MAJOR ||
+ minor != LUSTRE_MINOR ||
+ abs(patch - LUSTRE_PATCH) > 3)) {
+ LCONSOLE_WARN("%s (%u.%u.%u.%u) refused the "
+ "connection from different version MDT "
+ "(%d.%d.%d.%d) %s %s\n",
+ target->obd_name, LUSTRE_MAJOR,
+ LUSTRE_MINOR, LUSTRE_PATCH, LUSTRE_FIX,
+ major, minor, patch,
+ OBD_OCD_VERSION_FIX(data->ocd_version),
+ libcfs_nid2str(req->rq_peer.nid), str);
+
+ GOTO(out, rc = -EPROTO);
+ }
+ }
+ }
/* lctl gets a backstage, all-access pass. */
if (obd_uuid_equals(&cluuid, &target->obd_uuid))
class_export_put(export);
export = NULL;
rc = -EALREADY;
- } else if (mds_conn && export->exp_connection) {
+ } else if ((mds_conn || lw_client) && export->exp_connection != NULL) {
spin_unlock(&export->exp_lock);
- if (req->rq_peer.nid != export->exp_connection->c_peer.nid)
- /* MDS reconnected after failover. */
- LCONSOLE_WARN("%s: Received MDS connection from "
+ if (req->rq_peer.nid != export->exp_connection->c_peer.nid)
+ /* MDS or LWP reconnected after failover. */
+ LCONSOLE_WARN("%s: Received %s connection from "
"%s, removing former export from %s\n",
- target->obd_name, libcfs_nid2str(req->rq_peer.nid),
+ target->obd_name, mds_conn ? "MDS" : "LWP",
+ libcfs_nid2str(req->rq_peer.nid),
libcfs_nid2str(export->exp_connection->c_peer.nid));
else
/* New MDS connection from the same NID. */
- LCONSOLE_WARN("%s: Received new MDS connection from "
- "%s, removing former export from same NID\n",
- target->obd_name, libcfs_nid2str(req->rq_peer.nid));
+ LCONSOLE_WARN("%s: Received new %s connection from "
+ "%s, removing former export from same NID\n",
+ target->obd_name, mds_conn ? "MDS" : "LWP",
+ libcfs_nid2str(req->rq_peer.nid));
class_fail_export(export);
class_export_put(export);
export = NULL;
no_export:
OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_CONNECT, 2 * obd_timeout);
} else if (req->rq_export == NULL &&
- cfs_atomic_read(&export->exp_rpc_count) > 0) {
+ atomic_read(&export->exp_rpc_count) > 0) {
LCONSOLE_WARN("%s: Client %s (at %s) refused connection, "
"still busy with %d references\n",
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid),
- cfs_atomic_read(&export->exp_refcount));
+ atomic_read(&export->exp_refcount));
GOTO(out, rc = -EBUSY);
} else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1) {
if (!strstr(cluuid.uuid, "mdt"))
int k; /* known */
int s; /* stale/evicted */
- c = cfs_atomic_read(&target->obd_connected_clients);
- i = cfs_atomic_read(&target->obd_lock_replay_clients);
+ c = atomic_read(&target->obd_connected_clients);
+ i = atomic_read(&target->obd_lock_replay_clients);
k = target->obd_max_recoverable_clients;
s = target->obd_stale_clients;
t = cfs_timer_deadline(&target->obd_recovery_timer);
t = cfs_time_sub(t, cfs_time_current());
t = cfs_duration_sec(t);
- LCONSOLE_WARN("%s: Denying connection for new client "
- "%s (at %s), waiting for all %d known "
- "clients (%d recovered, %d in progress, "
- "and %d evicted) to recover in %d:%.02d\n",
+ LCONSOLE_WARN("%s: Denying connection for new client %s"
+ "(at %s), waiting for %d known clients "
+ "(%d recovered, %d in progress, and %d "
+ "evicted) to recover in %d:%.02d\n",
target->obd_name, cluuid.uuid,
libcfs_nid2str(req->rq_peer.nid), k,
c - i, i, s, (int)t / 60,
(int)t % 60);
- rc = -EBUSY;
- } else {
+ rc = -EBUSY;
+ } else {
dont_check_exports:
- rc = obd_connect(req->rq_svc_thread->t_env,
- &export, target, &cluuid, data,
- client_nid);
+ rc = obd_connect(req->rq_svc_thread->t_env,
+ &export, target, &cluuid, data,
+ client_nid);
if (mds_conn && OBD_FAIL_CHECK(OBD_FAIL_TGT_RCVG_FLAG))
lustre_msg_add_op_flags(req->rq_repmsg,
- MSG_CONNECT_RECOVERING);
- if (rc == 0)
- conn.cookie = export->exp_handle.h_cookie;
- }
- } else {
- rc = obd_reconnect(req->rq_svc_thread->t_env,
- export, target, &cluuid, data, client_nid);
- }
- if (rc)
- GOTO(out, rc);
-
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 50, 0)
- /* 2.2.0 clients always swab nidtbl entries due to a bug, so server
- * will do the swabbing for if the client is using the same endianness.
- *
- * This fixup is version-limited, because we don't want to carry the
- * OBD_CONNECT_MNE_SWAB flag around forever, just so long as we need
- * interop with unpatched 2.2 clients. For newer clients, servers
- * will never do MNE swabbing, let the client handle that. LU-1644 */
- export->exp_need_mne_swab = !ptlrpc_req_need_swab(req) &&
- !(data->ocd_connect_flags & OBD_CONNECT_MNE_SWAB);
-#else
-#warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and exp_need_mne_swab"
-#endif
+ MSG_CONNECT_RECOVERING);
+ if (rc == 0)
+ conn.cookie = export->exp_handle.h_cookie;
+ }
+ } else {
+ rc = obd_reconnect(req->rq_svc_thread->t_env,
+ export, target, &cluuid, data, client_nid);
+ }
+ if (rc)
+ GOTO(out, rc);
- LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
- data->ocd_instance = target->u.obt.obt_instance;
+ LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
+ data->ocd_instance = target->u.obt.obt_instance;
/* Return only the parts of obd_connect_data that we understand, so the
* client knows that we don't understand the rest. */
spin_unlock(&export->exp_lock);
spin_lock(&target->obd_dev_lock);
- cfs_list_del_init(&export->exp_obd_chain_timed);
+ list_del_init(&export->exp_obd_chain_timed);
spin_unlock(&target->obd_dev_lock);
} else {
spin_unlock(&export->exp_lock);
if (export->exp_connection != NULL) {
/* Check to see if connection came from another NID. */
if ((export->exp_connection->c_peer.nid != req->rq_peer.nid) &&
- !cfs_hlist_unhashed(&export->exp_nid_hash))
+ !hlist_unhashed(&export->exp_nid_hash))
cfs_hash_del(export->exp_obd->obd_nid_hash,
&export->exp_connection->c_peer.nid,
&export->exp_nid_hash);
export->exp_connection = ptlrpc_connection_get(req->rq_peer,
req->rq_self,
&remote_uuid);
- if (cfs_hlist_unhashed(&export->exp_nid_hash)) {
+ if (hlist_unhashed(&export->exp_nid_hash)) {
cfs_hash_add(export->exp_obd->obd_nid_hash,
&export->exp_connection->c_peer.nid,
&export->exp_nid_hash);
spin_unlock(&target->obd_recovery_task_lock);
}
- cfs_atomic_inc(&target->obd_req_replay_clients);
- cfs_atomic_inc(&target->obd_lock_replay_clients);
- if (cfs_atomic_inc_return(&target->obd_connected_clients) ==
+ atomic_inc(&target->obd_req_replay_clients);
+ atomic_inc(&target->obd_lock_replay_clients);
+ if (atomic_inc_return(&target->obd_connected_clients) ==
target->obd_max_recoverable_clients)
wake_up(&target->obd_next_transno_waitq);
}
* ptlrpc_handle_server_req_in->lustre_unpack_msg(). */
revimp->imp_msg_magic = req->rq_reqmsg->lm_magic;
- if ((data->ocd_connect_flags & OBD_CONNECT_AT) &&
- (revimp->imp_msg_magic != LUSTRE_MSG_MAGIC_V1))
+ if (data->ocd_connect_flags & OBD_CONNECT_AT)
revimp->imp_msghdr_flags |= MSGHDR_AT_SUPPORT;
else
revimp->imp_msghdr_flags &= ~MSGHDR_AT_SUPPORT;
- if ((data->ocd_connect_flags & OBD_CONNECT_FULL20) &&
- (revimp->imp_msg_magic != LUSTRE_MSG_MAGIC_V1))
- revimp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
- else
- revimp->imp_msghdr_flags &= ~MSGHDR_CKSUM_INCOMPAT18;
+ revimp->imp_msghdr_flags |= MSGHDR_CKSUM_INCOMPAT18;
rc = sptlrpc_import_sec_adapt(revimp, req->rq_svc_ctx, &req->rq_flvr);
if (rc) {
req->rq_status = rc;
RETURN(rc);
}
-EXPORT_SYMBOL(target_handle_connect);
int target_handle_disconnect(struct ptlrpc_request *req)
{
RETURN(0);
}
-EXPORT_SYMBOL(target_handle_disconnect);
void target_destroy_export(struct obd_export *exp)
{
static void target_request_copy_get(struct ptlrpc_request *req)
{
class_export_rpc_inc(req->rq_export);
- LASSERT(cfs_list_empty(&req->rq_list));
- CFS_INIT_LIST_HEAD(&req->rq_replay_list);
+ LASSERT(list_empty(&req->rq_list));
+ INIT_LIST_HEAD(&req->rq_replay_list);
/* Increase refcount to keep request in queue. */
- cfs_atomic_inc(&req->rq_refcount);
+ atomic_inc(&req->rq_refcount);
/* Let export know it has replays to be handled. */
- cfs_atomic_inc(&req->rq_export->exp_replay_count);
+ atomic_inc(&req->rq_export->exp_replay_count);
}
static void target_request_copy_put(struct ptlrpc_request *req)
{
- LASSERT(cfs_list_empty(&req->rq_replay_list));
- LASSERT_ATOMIC_POS(&req->rq_export->exp_replay_count);
+ LASSERT(list_empty(&req->rq_replay_list));
+ LASSERT_ATOMIC_POS(&req->rq_export->exp_replay_count);
- cfs_atomic_dec(&req->rq_export->exp_replay_count);
+ atomic_dec(&req->rq_export->exp_replay_count);
class_export_rpc_dec(req->rq_export);
- ptlrpc_server_drop_request(req);
+ ptlrpc_server_drop_request(req);
}
static int target_exp_enqueue_req_replay(struct ptlrpc_request *req)
LASSERT(exp);
spin_lock(&exp->exp_lock);
- cfs_list_for_each_entry(reqiter, &exp->exp_req_replay_queue,
+ list_for_each_entry(reqiter, &exp->exp_req_replay_queue,
rq_replay_list) {
if (lustre_msg_get_transno(reqiter->rq_reqmsg) == transno) {
dup = 1;
CERROR("invalid flags %x of resent replay\n",
lustre_msg_get_flags(req->rq_reqmsg));
} else {
- cfs_list_add_tail(&req->rq_replay_list,
+ list_add_tail(&req->rq_replay_list,
&exp->exp_req_replay_queue);
}
static void target_exp_dequeue_req_replay(struct ptlrpc_request *req)
{
- LASSERT(!cfs_list_empty(&req->rq_replay_list));
+ LASSERT(!list_empty(&req->rq_replay_list));
LASSERT(req->rq_export);
spin_lock(&req->rq_export->exp_lock);
- cfs_list_del_init(&req->rq_replay_list);
+ list_del_init(&req->rq_replay_list);
spin_unlock(&req->rq_export->exp_lock);
}
-#ifdef __KERNEL__
static void target_finish_recovery(struct obd_device *obd)
{
ENTRY;
"%d recovered and %d %s evicted.\n", obd->obd_name,
(int)elapsed_time / 60, (int)elapsed_time % 60,
obd->obd_max_recoverable_clients,
- cfs_atomic_read(&obd->obd_connected_clients),
+ atomic_read(&obd->obd_connected_clients),
obd->obd_stale_clients,
obd->obd_stale_clients == 1 ? "was" : "were");
}
ldlm_reprocess_all_ns(obd->obd_namespace);
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_req_replay_queue) ||
- !cfs_list_empty(&obd->obd_lock_replay_queue) ||
- !cfs_list_empty(&obd->obd_final_req_queue)) {
+ if (!list_empty(&obd->obd_req_replay_queue) ||
+ !list_empty(&obd->obd_lock_replay_queue) ||
+ !list_empty(&obd->obd_final_req_queue)) {
CERROR("%s: Recovery queues ( %s%s%s) are not empty\n",
obd->obd_name,
- cfs_list_empty(&obd->obd_req_replay_queue) ? "" : "req ",
- cfs_list_empty(&obd->obd_lock_replay_queue) ? \
+ list_empty(&obd->obd_req_replay_queue) ? "" : "req ",
+ list_empty(&obd->obd_lock_replay_queue) ? \
"" : "lock ",
- cfs_list_empty(&obd->obd_final_req_queue) ? \
+ list_empty(&obd->obd_final_req_queue) ? \
"" : "final ");
spin_unlock(&obd->obd_recovery_task_lock);
LBUG();
static void abort_req_replay_queue(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- cfs_list_t abort_list;
+ struct list_head abort_list;
- CFS_INIT_LIST_HEAD(&abort_list);
+ INIT_LIST_HEAD(&abort_list);
spin_lock(&obd->obd_recovery_task_lock);
- cfs_list_splice_init(&obd->obd_req_replay_queue, &abort_list);
+ list_splice_init(&obd->obd_req_replay_queue, &abort_list);
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_list_for_each_entry_safe(req, n, &abort_list, rq_list) {
+ list_for_each_entry_safe(req, n, &abort_list, rq_list) {
DEBUG_REQ(D_WARNING, req, "aborted:");
req->rq_status = -ENOTCONN;
if (ptlrpc_error(req)) {
static void abort_lock_replay_queue(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- cfs_list_t abort_list;
+ struct list_head abort_list;
- CFS_INIT_LIST_HEAD(&abort_list);
+ INIT_LIST_HEAD(&abort_list);
spin_lock(&obd->obd_recovery_task_lock);
- cfs_list_splice_init(&obd->obd_lock_replay_queue, &abort_list);
+ list_splice_init(&obd->obd_lock_replay_queue, &abort_list);
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_list_for_each_entry_safe(req, n, &abort_list, rq_list){
+ list_for_each_entry_safe(req, n, &abort_list, rq_list) {
DEBUG_REQ(D_ERROR, req, "aborted:");
req->rq_status = -ENOTCONN;
if (ptlrpc_error(req)) {
void target_cleanup_recovery(struct obd_device *obd)
{
struct ptlrpc_request *req, *n;
- cfs_list_t clean_list;
+ struct list_head clean_list;
ENTRY;
- CFS_INIT_LIST_HEAD(&clean_list);
+ INIT_LIST_HEAD(&clean_list);
spin_lock(&obd->obd_dev_lock);
if (!obd->obd_recovering) {
spin_unlock(&obd->obd_dev_lock);
spin_lock(&obd->obd_recovery_task_lock);
target_cancel_recovery_timer(obd);
- cfs_list_splice_init(&obd->obd_req_replay_queue, &clean_list);
+ list_splice_init(&obd->obd_req_replay_queue, &clean_list);
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_list_for_each_entry_safe(req, n, &clean_list, rq_list) {
- LASSERT(req->rq_reply_state == 0);
+ list_for_each_entry_safe(req, n, &clean_list, rq_list) {
+ LASSERT(req->rq_reply_state == NULL);
target_exp_dequeue_req_replay(req);
target_request_copy_put(req);
}
spin_lock(&obd->obd_recovery_task_lock);
- cfs_list_splice_init(&obd->obd_lock_replay_queue, &clean_list);
- cfs_list_splice_init(&obd->obd_final_req_queue, &clean_list);
+ list_splice_init(&obd->obd_lock_replay_queue, &clean_list);
+ list_splice_init(&obd->obd_final_req_queue, &clean_list);
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_list_for_each_entry_safe(req, n, &clean_list, rq_list){
- LASSERT(req->rq_reply_state == 0);
+ list_for_each_entry_safe(req, n, &clean_list, rq_list) {
+ LASSERT(req->rq_reply_state == NULL);
target_request_copy_put(req);
}
CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name);
cfs_timer_disarm(&obd->obd_recovery_timer);
}
-EXPORT_SYMBOL(target_cancel_recovery_timer);
static void target_start_recovery_timer(struct obd_device *obd)
{
to += drt - left;
} else if (!extend && (drt > to)) {
to = drt;
- /* reduce drt by already passed time */
- drt -= obd->obd_recovery_timeout - left;
}
if (to > obd->obd_recovery_time_hard)
to = obd->obd_recovery_time_hard;
- if (obd->obd_recovery_timeout < to ||
- obd->obd_recovery_timeout == obd->obd_recovery_time_hard) {
+ if (obd->obd_recovery_timeout < to) {
obd->obd_recovery_timeout = to;
- cfs_timer_arm(&obd->obd_recovery_timer,
- cfs_time_shift(drt));
+ end = obd->obd_recovery_start + to;
+ cfs_timer_arm(&obd->obd_recovery_timer,
+ cfs_time_shift(end - now));
}
spin_unlock(&obd->obd_dev_lock);
CDEBUG(D_HA, "%s: recovery timer will expire in %u seconds\n",
- obd->obd_name, (unsigned)drt);
+ obd->obd_name, (unsigned)cfs_time_sub(end, now));
}
/* Reset the timer with each new client connection */
/** if export done req_replay or has replay in queue */
static inline int exp_req_replay_healthy(struct obd_export *exp)
{
- return (!exp->exp_req_replay_needed ||
- cfs_atomic_read(&exp->exp_replay_count) > 0);
+ return (!exp->exp_req_replay_needed ||
+ atomic_read(&exp->exp_replay_count) > 0);
}
/** if export done lock_replay or has replay in queue */
static inline int exp_lock_replay_healthy(struct obd_export *exp)
{
- return (!exp->exp_lock_replay_needed ||
- cfs_atomic_read(&exp->exp_replay_count) > 0);
+ return (!exp->exp_lock_replay_needed ||
+ atomic_read(&exp->exp_replay_count) > 0);
}
static inline int exp_vbr_healthy(struct obd_export *exp)
/** Checking routines for recovery */
static int check_for_clients(struct obd_device *obd)
{
- unsigned int clnts = cfs_atomic_read(&obd->obd_connected_clients);
+ unsigned int clnts = atomic_read(&obd->obd_connected_clients);
- if (obd->obd_abort_recovery || obd->obd_recovery_expired)
- return 1;
- LASSERT(clnts <= obd->obd_max_recoverable_clients);
+ if (obd->obd_abort_recovery || obd->obd_recovery_expired)
+ return 1;
+ LASSERT(clnts <= obd->obd_max_recoverable_clients);
return (clnts + obd->obd_stale_clients ==
obd->obd_max_recoverable_clients);
}
ENTRY;
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_req_replay_queue)) {
- req = cfs_list_entry(obd->obd_req_replay_queue.next,
- struct ptlrpc_request, rq_list);
- req_transno = lustre_msg_get_transno(req->rq_reqmsg);
- } else {
- req_transno = 0;
- }
+ if (!list_empty(&obd->obd_req_replay_queue)) {
+ req = list_entry(obd->obd_req_replay_queue.next,
+ struct ptlrpc_request, rq_list);
+ req_transno = lustre_msg_get_transno(req->rq_reqmsg);
+ } else {
+ req_transno = 0;
+ }
- connected = cfs_atomic_read(&obd->obd_connected_clients);
- completed = connected - cfs_atomic_read(&obd->obd_req_replay_clients);
- queue_len = obd->obd_requests_queued_for_recovery;
- next_transno = obd->obd_next_recovery_transno;
-
- CDEBUG(D_HA, "max: %d, connected: %d, completed: %d, queue_len: %d, "
- "req_transno: "LPU64", next_transno: "LPU64"\n",
- obd->obd_max_recoverable_clients, connected, completed,
- queue_len, req_transno, next_transno);
-
- if (obd->obd_abort_recovery) {
- CDEBUG(D_HA, "waking for aborted recovery\n");
- wake_up = 1;
- } else if (obd->obd_recovery_expired) {
- CDEBUG(D_HA, "waking for expired recovery\n");
- wake_up = 1;
- } else if (req_transno == next_transno) {
- CDEBUG(D_HA, "waking for next ("LPD64")\n", next_transno);
- wake_up = 1;
+ connected = atomic_read(&obd->obd_connected_clients);
+ completed = connected - atomic_read(&obd->obd_req_replay_clients);
+ queue_len = obd->obd_requests_queued_for_recovery;
+ next_transno = obd->obd_next_recovery_transno;
+
+ CDEBUG(D_HA, "max: %d, connected: %d, completed: %d, queue_len: %d, "
+ "req_transno: "LPU64", next_transno: "LPU64"\n",
+ obd->obd_max_recoverable_clients, connected, completed,
+ queue_len, req_transno, next_transno);
+
+ if (obd->obd_abort_recovery) {
+ CDEBUG(D_HA, "waking for aborted recovery\n");
+ wake_up = 1;
+ } else if (obd->obd_recovery_expired) {
+ CDEBUG(D_HA, "waking for expired recovery\n");
+ wake_up = 1;
+ } else if (req_transno == next_transno) {
+ CDEBUG(D_HA, "waking for next ("LPD64")\n", next_transno);
+ wake_up = 1;
} else if (queue_len > 0 &&
- queue_len == cfs_atomic_read(&obd->obd_req_replay_clients)) {
- int d_lvl = D_HA;
- /** handle gaps occured due to lost reply or VBR */
- LASSERTF(req_transno >= next_transno,
- "req_transno: "LPU64", next_transno: "LPU64"\n",
- req_transno, next_transno);
- if (req_transno > obd->obd_last_committed &&
- !obd->obd_version_recov)
- d_lvl = D_ERROR;
- CDEBUG(d_lvl,
- "%s: waking for gap in transno, VBR is %s (skip: "
- LPD64", ql: %d, comp: %d, conn: %d, next: "LPD64
- ", last_committed: "LPD64")\n",
- obd->obd_name, obd->obd_version_recov ? "ON" : "OFF",
- next_transno, queue_len, completed, connected,
- req_transno, obd->obd_last_committed);
- obd->obd_next_recovery_transno = req_transno;
- wake_up = 1;
- } else if (cfs_atomic_read(&obd->obd_req_replay_clients) == 0) {
+ queue_len == atomic_read(&obd->obd_req_replay_clients)) {
+ int d_lvl = D_HA;
+ /** handle gaps occured due to lost reply or VBR */
+ LASSERTF(req_transno >= next_transno,
+ "req_transno: "LPU64", next_transno: "LPU64"\n",
+ req_transno, next_transno);
+ if (req_transno > obd->obd_last_committed &&
+ !obd->obd_version_recov)
+ d_lvl = D_ERROR;
+ CDEBUG(d_lvl,
+ "%s: waking for gap in transno, VBR is %s (skip: "
+ LPD64", ql: %d, comp: %d, conn: %d, next: "LPD64
+ ", last_committed: "LPD64")\n",
+ obd->obd_name, obd->obd_version_recov ? "ON" : "OFF",
+ next_transno, queue_len, completed, connected,
+ req_transno, obd->obd_last_committed);
+ obd->obd_next_recovery_transno = req_transno;
+ wake_up = 1;
+ } else if (atomic_read(&obd->obd_req_replay_clients) == 0) {
CDEBUG(D_HA, "waking for completed recovery\n");
wake_up = 1;
- } else if (OBD_FAIL_CHECK(OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS)) {
- CDEBUG(D_HA, "accepting transno gaps is explicitly allowed"
- " by fail_lock, waking up ("LPD64")\n", next_transno);
- obd->obd_next_recovery_transno = req_transno;
- wake_up = 1;
- }
+ } else if (OBD_FAIL_CHECK(OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS)) {
+ CDEBUG(D_HA, "accepting transno gaps is explicitly allowed"
+ " by fail_lock, waking up ("LPD64")\n", next_transno);
+ obd->obd_next_recovery_transno = req_transno;
+ wake_up = 1;
+ }
spin_unlock(&obd->obd_recovery_task_lock);
return wake_up;
}
int wake_up = 0;
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_lock_replay_queue)) {
- CDEBUG(D_HA, "waking for next lock\n");
- wake_up = 1;
- } else if (cfs_atomic_read(&obd->obd_lock_replay_clients) == 0) {
- CDEBUG(D_HA, "waking for completed lock replay\n");
- wake_up = 1;
- } else if (obd->obd_abort_recovery) {
- CDEBUG(D_HA, "waking for aborted recovery\n");
- wake_up = 1;
- } else if (obd->obd_recovery_expired) {
- CDEBUG(D_HA, "waking for expired recovery\n");
- wake_up = 1;
- }
+ if (!list_empty(&obd->obd_lock_replay_queue)) {
+ CDEBUG(D_HA, "waking for next lock\n");
+ wake_up = 1;
+ } else if (atomic_read(&obd->obd_lock_replay_clients) == 0) {
+ CDEBUG(D_HA, "waking for completed lock replay\n");
+ wake_up = 1;
+ } else if (obd->obd_abort_recovery) {
+ CDEBUG(D_HA, "waking for aborted recovery\n");
+ wake_up = 1;
+ } else if (obd->obd_recovery_expired) {
+ CDEBUG(D_HA, "waking for expired recovery\n");
+ wake_up = 1;
+ }
spin_unlock(&obd->obd_recovery_task_lock);
return wake_up;
int (*health_check)(struct obd_export *))
{
repeat:
- wait_event(obd->obd_next_transno_waitq, check_routine(obd));
+ if ((obd->obd_recovery_start != 0) && (cfs_time_current_sec() >=
+ (obd->obd_recovery_start + obd->obd_recovery_time_hard))) {
+ CWARN("recovery is aborted by hard timeout\n");
+ obd->obd_abort_recovery = 1;
+ }
+
+ while (wait_event_timeout(obd->obd_next_transno_waitq,
+ check_routine(obd),
+ msecs_to_jiffies(60 * MSEC_PER_SEC)) == 0)
+ /* wait indefinitely for event, but don't trigger watchdog */;
+
if (obd->obd_abort_recovery) {
CWARN("recovery is aborted, evict exports in recovery\n");
/** evict exports which didn't finish recovery yet */
static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd)
{
- struct ptlrpc_request *req = NULL;
- ENTRY;
+ struct ptlrpc_request *req = NULL;
+ ENTRY;
- CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
- obd->obd_next_recovery_transno);
+ CDEBUG(D_HA, "Waiting for transno "LPD64"\n",
+ obd->obd_next_recovery_transno);
- if (target_recovery_overseer(obd, check_for_next_transno,
- exp_req_replay_healthy)) {
- abort_req_replay_queue(obd);
- abort_lock_replay_queue(obd);
- }
+ CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val);
+ /** It is needed to extend recovery window above recovery_time_soft.
+ * Extending is possible only in the end of recovery window
+ * (see more details in handle_recovery_req).
+ */
+ CFS_FAIL_TIMEOUT_MS(OBD_FAIL_TGT_REPLAY_DELAY, 300);
+
+ if (target_recovery_overseer(obd, check_for_next_transno,
+ exp_req_replay_healthy)) {
+ abort_req_replay_queue(obd);
+ abort_lock_replay_queue(obd);
+ }
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_req_replay_queue)) {
- req = cfs_list_entry(obd->obd_req_replay_queue.next,
+ if (!list_empty(&obd->obd_req_replay_queue)) {
+ req = list_entry(obd->obd_req_replay_queue.next,
struct ptlrpc_request, rq_list);
- cfs_list_del_init(&req->rq_list);
+ list_del_init(&req->rq_list);
obd->obd_requests_queued_for_recovery--;
spin_unlock(&obd->obd_recovery_task_lock);
} else {
spin_unlock(&obd->obd_recovery_task_lock);
- LASSERT(cfs_list_empty(&obd->obd_req_replay_queue));
- LASSERT(cfs_atomic_read(&obd->obd_req_replay_clients) == 0);
+ LASSERT(list_empty(&obd->obd_req_replay_queue));
+ LASSERT(atomic_read(&obd->obd_req_replay_clients) == 0);
/** evict exports failed VBR */
class_disconnect_stale_exports(obd, exp_vbr_healthy);
}
static struct ptlrpc_request *target_next_replay_lock(struct obd_device *obd)
{
- struct ptlrpc_request *req = NULL;
+ struct ptlrpc_request *req = NULL;
- CDEBUG(D_HA, "Waiting for lock\n");
- if (target_recovery_overseer(obd, check_for_next_lock,
- exp_lock_replay_healthy))
- abort_lock_replay_queue(obd);
+ CDEBUG(D_HA, "Waiting for lock\n");
+ if (target_recovery_overseer(obd, check_for_next_lock,
+ exp_lock_replay_healthy))
+ abort_lock_replay_queue(obd);
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_lock_replay_queue)) {
- req = cfs_list_entry(obd->obd_lock_replay_queue.next,
+ if (!list_empty(&obd->obd_lock_replay_queue)) {
+ req = list_entry(obd->obd_lock_replay_queue.next,
struct ptlrpc_request, rq_list);
- cfs_list_del_init(&req->rq_list);
+ list_del_init(&req->rq_list);
spin_unlock(&obd->obd_recovery_task_lock);
} else {
spin_unlock(&obd->obd_recovery_task_lock);
- LASSERT(cfs_list_empty(&obd->obd_lock_replay_queue));
- LASSERT(cfs_atomic_read(&obd->obd_lock_replay_clients) == 0);
- /** evict exports failed VBR */
- class_disconnect_stale_exports(obd, exp_vbr_healthy);
- }
- return req;
+ LASSERT(list_empty(&obd->obd_lock_replay_queue));
+ LASSERT(atomic_read(&obd->obd_lock_replay_clients) == 0);
+ /** evict exports failed VBR */
+ class_disconnect_stale_exports(obd, exp_vbr_healthy);
+ }
+ return req;
}
static struct ptlrpc_request *target_next_final_ping(struct obd_device *obd)
struct ptlrpc_request *req = NULL;
spin_lock(&obd->obd_recovery_task_lock);
- if (!cfs_list_empty(&obd->obd_final_req_queue)) {
- req = cfs_list_entry(obd->obd_final_req_queue.next,
+ if (!list_empty(&obd->obd_final_req_queue)) {
+ req = list_entry(obd->obd_final_req_queue.next,
struct ptlrpc_request, rq_list);
- cfs_list_del_init(&req->rq_list);
+ list_del_init(&req->rq_list);
spin_unlock(&obd->obd_recovery_task_lock);
if (req->rq_export->exp_in_recovery) {
spin_lock(&req->rq_export->exp_lock);
return req;
}
-static int handle_recovery_req(struct ptlrpc_thread *thread,
- struct ptlrpc_request *req,
- svc_handler_t handler)
+static void handle_recovery_req(struct ptlrpc_thread *thread,
+ struct ptlrpc_request *req,
+ svc_handler_t handler)
{
- int rc;
-
ENTRY;
/**
* it after that, discard such request silently
*/
if (req->rq_export->exp_disconnected)
- GOTO(reqcopy_put, rc = 0);
+ RETURN_EXIT;
req->rq_session.lc_thread = thread;
req->rq_svc_thread = thread;
}
extend_recovery_timer(class_exp2obd(req->rq_export), to, true);
}
-reqcopy_put:
- RETURN(rc);
+ EXIT;
}
static int target_recovery_thread(void *arg)
abort_lock_replay_queue(obd);
}
- /* next stage: replay requests */
- delta = jiffies;
- CDEBUG(D_INFO, "1: request replay stage - %d clients from t"LPU64"\n",
- cfs_atomic_read(&obd->obd_req_replay_clients),
- obd->obd_next_recovery_transno);
+ /* next stage: replay requests */
+ delta = jiffies;
+ CDEBUG(D_INFO, "1: request replay stage - %d clients from t"LPU64"\n",
+ atomic_read(&obd->obd_req_replay_clients),
+ obd->obd_next_recovery_transno);
while ((req = target_next_replay_req(obd))) {
LASSERT(trd->trd_processing_task == current_pid());
DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s",
obd->obd_replayed_requests++;
}
- /**
- * The second stage: replay locks
- */
- CDEBUG(D_INFO, "2: lock replay stage - %d clients\n",
- cfs_atomic_read(&obd->obd_lock_replay_clients));
+ /**
+ * The second stage: replay locks
+ */
+ CDEBUG(D_INFO, "2: lock replay stage - %d clients\n",
+ atomic_read(&obd->obd_lock_replay_clients));
while ((req = target_next_replay_lock(obd))) {
LASSERT(trd->trd_processing_task == current_pid());
DEBUG_REQ(D_HA, req, "processing lock from %s: ",
libcfs_nid2str(req->rq_peer.nid));
- handle_recovery_req(thread, req,
- trd->trd_recovery_handler);
- target_request_copy_put(req);
- obd->obd_replayed_locks++;
- }
+ handle_recovery_req(thread, req,
+ trd->trd_recovery_handler);
+ target_request_copy_put(req);
+ obd->obd_replayed_locks++;
+ }
/**
* The third stage: reply on final pings, at this moment all clients
* must have request in final queue
*/
+ CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_RECONNECT, cfs_fail_val);
CDEBUG(D_INFO, "3: final stage - process recovery completion pings\n");
/** Update server last boot epoch */
tgt_boot_epoch_update(lut);
libcfs_nid2str(req->rq_peer.nid));
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
- target_request_copy_put(req);
- }
+ /* Because the waiting client can not send ping to server,
+ * so we need refresh the last_request_time, to avoid the
+ * export is being evicted */
+ ptlrpc_update_export_timer(req->rq_export, 0);
+ target_request_copy_put(req);
+ }
- delta = (jiffies - delta) / HZ;
+ delta = jiffies_to_msecs(jiffies - delta) / MSEC_PER_SEC;
CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n",
delta, obd->obd_replayed_requests, obd->obd_replayed_locks);
if (delta > OBD_RECOVERY_TIME_SOFT) {
struct obd_device *obd = (struct obd_device *)castmeharder;
CDEBUG(D_HA, "%s: recovery timed out; %d clients are still in recovery"
" after %lds (%d clients connected)\n",
- obd->obd_name, cfs_atomic_read(&obd->obd_lock_replay_clients),
+ obd->obd_name, atomic_read(&obd->obd_lock_replay_clients),
cfs_time_current_sec()- obd->obd_recovery_start,
- cfs_atomic_read(&obd->obd_connected_clients));
+ atomic_read(&obd->obd_connected_clients));
obd->obd_recovery_expired = 1;
wake_up(&obd->obd_next_transno_waitq);
}
EXPORT_SYMBOL(target_recovery_init);
-#endif /* __KERNEL__ */
static int target_process_req_flags(struct obd_device *obd,
struct ptlrpc_request *req)
spin_unlock(&exp->exp_lock);
LASSERT_ATOMIC_POS(&obd->obd_req_replay_clients);
- cfs_atomic_dec(&obd->obd_req_replay_clients);
+ atomic_dec(&obd->obd_req_replay_clients);
} else {
spin_unlock(&exp->exp_lock);
}
spin_unlock(&exp->exp_lock);
LASSERT_ATOMIC_POS(&obd->obd_lock_replay_clients);
- cfs_atomic_dec(&obd->obd_lock_replay_clients);
+ atomic_dec(&obd->obd_lock_replay_clients);
} else {
spin_unlock(&exp->exp_lock);
}
int target_queue_recovery_request(struct ptlrpc_request *req,
struct obd_device *obd)
{
- cfs_list_t *tmp;
- int inserted = 0;
__u64 transno = lustre_msg_get_transno(req->rq_reqmsg);
- ENTRY;
+ struct ptlrpc_request *reqiter;
+ int inserted = 0;
+ ENTRY;
if (obd->obd_recovery_data.trd_processing_task == current_pid()) {
/* Processing the queue right now, don't re-add. */
wake_up(&obd->obd_next_transno_waitq);
spin_lock(&obd->obd_recovery_task_lock);
if (obd->obd_recovering) {
- cfs_list_add_tail(&req->rq_list,
+ list_add_tail(&req->rq_list,
&obd->obd_final_req_queue);
} else {
spin_unlock(&obd->obd_recovery_task_lock);
RETURN(-ENOTCONN);
}
LASSERT(req->rq_export->exp_lock_replay_needed);
- cfs_list_add_tail(&req->rq_list, &obd->obd_lock_replay_queue);
+ list_add_tail(&req->rq_list, &obd->obd_lock_replay_queue);
spin_unlock(&obd->obd_recovery_task_lock);
RETURN(0);
}
* buffers (eg mdt_body, ost_body etc) have NOT been swabbed. */
if (!transno) {
- CFS_INIT_LIST_HEAD(&req->rq_list);
+ INIT_LIST_HEAD(&req->rq_list);
DEBUG_REQ(D_HA, req, "not queueing");
RETURN(1);
}
spin_lock(&obd->obd_recovery_task_lock);
if (transno < obd->obd_next_recovery_transno) {
/* Processing the queue right now, don't re-add. */
- LASSERT(cfs_list_empty(&req->rq_list));
+ LASSERT(list_empty(&req->rq_list));
spin_unlock(&obd->obd_recovery_task_lock);
RETURN(1);
}
RETURN(0);
}
- /* XXX O(n^2) */
+ /* XXX O(n^2) */
spin_lock(&obd->obd_recovery_task_lock);
- LASSERT(obd->obd_recovering);
- cfs_list_for_each(tmp, &obd->obd_req_replay_queue) {
- struct ptlrpc_request *reqiter =
- cfs_list_entry(tmp, struct ptlrpc_request, rq_list);
-
- if (lustre_msg_get_transno(reqiter->rq_reqmsg) > transno) {
- cfs_list_add_tail(&req->rq_list, &reqiter->rq_list);
- inserted = 1;
- break;
- }
+ LASSERT(obd->obd_recovering);
+ list_for_each_entry(reqiter, &obd->obd_req_replay_queue, rq_list) {
+ if (lustre_msg_get_transno(reqiter->rq_reqmsg) > transno) {
+ list_add_tail(&req->rq_list, &reqiter->rq_list);
+ inserted = 1;
+ goto added;
+ }
if (unlikely(lustre_msg_get_transno(reqiter->rq_reqmsg) ==
transno)) {
RETURN(0);
}
}
-
+added:
if (!inserted)
- cfs_list_add_tail(&req->rq_list, &obd->obd_req_replay_queue);
+ list_add_tail(&req->rq_list, &obd->obd_req_replay_queue);
obd->obd_requests_queued_for_recovery++;
spin_unlock(&obd->obd_recovery_task_lock);
wake_up(&obd->obd_next_transno_waitq);
RETURN(0);
}
-EXPORT_SYMBOL(target_queue_recovery_request);
int target_handle_ping(struct ptlrpc_request *req)
{
obd_ping(req->rq_svc_thread->t_env, req->rq_export);
return req_capsule_server_pack(&req->rq_pill);
}
-EXPORT_SYMBOL(target_handle_ping);
void target_committed_to_req(struct ptlrpc_request *req)
{
CDEBUG(D_INFO, "last_committed "LPU64", transno "LPU64", xid "LPU64"\n",
exp->exp_last_committed, req->rq_transno, req->rq_xid);
}
-EXPORT_SYMBOL(target_committed_to_req);
#endif /* HAVE_SERVER_SUPPORT */
RETURN(0);
}
-EXPORT_SYMBOL(target_pack_pool_reply);
-int target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id)
+static int target_send_reply_msg(struct ptlrpc_request *req,
+ int rc, int fail_id)
{
if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) {
DEBUG_REQ(D_ERROR, req, "dropping reply");
}
/* must be an export if locks saved */
- LASSERT (req->rq_export != NULL);
+ LASSERT(req->rq_export != NULL);
/* req/reply consistent */
LASSERT(rs->rs_svcpt == svcpt);
/* "fresh" reply */
- LASSERT (!rs->rs_scheduled);
- LASSERT (!rs->rs_scheduled_ever);
- LASSERT (!rs->rs_handled);
- LASSERT (!rs->rs_on_net);
- LASSERT (rs->rs_export == NULL);
- LASSERT (cfs_list_empty(&rs->rs_obd_list));
- LASSERT (cfs_list_empty(&rs->rs_exp_list));
+ LASSERT(!rs->rs_scheduled);
+ LASSERT(!rs->rs_scheduled_ever);
+ LASSERT(!rs->rs_handled);
+ LASSERT(!rs->rs_on_net);
+ LASSERT(rs->rs_export == NULL);
+ LASSERT(list_empty(&rs->rs_obd_list));
+ LASSERT(list_empty(&rs->rs_exp_list));
- exp = class_export_get (req->rq_export);
+ exp = class_export_get(req->rq_export);
/* disable reply scheduling while I'm setting up */
rs->rs_scheduled = 1;
rs->rs_transno, exp->exp_last_committed);
if (rs->rs_transno > exp->exp_last_committed) {
/* not committed already */
- cfs_list_add_tail(&rs->rs_obd_list,
+ list_add_tail(&rs->rs_obd_list,
&exp->exp_uncommitted_replies);
}
spin_unlock(&exp->exp_uncommitted_replies_lock);
spin_lock(&exp->exp_lock);
- cfs_list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies);
+ list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies);
spin_unlock(&exp->exp_lock);
netrc = target_send_reply_msg(req, rc, fail_id);
spin_lock(&svcpt->scp_rep_lock);
- cfs_atomic_inc(&svcpt->scp_nreps_difficult);
+ atomic_inc(&svcpt->scp_nreps_difficult);
- if (netrc != 0) {
- /* error sending: reply is off the net. Also we need +1
- * reply ref until ptlrpc_handle_rs() is done
- * with the reply state (if the send was successful, there
- * would have been +1 ref for the net, which
- * reply_out_callback leaves alone) */
- rs->rs_on_net = 0;
- ptlrpc_rs_addref(rs);
- }
+ if (netrc != 0) {
+ /* error sending: reply is off the net. Also we need +1
+ * reply ref until ptlrpc_handle_rs() is done
+ * with the reply state (if the send was successful, there
+ * would have been +1 ref for the net, which
+ * reply_out_callback leaves alone) */
+ rs->rs_on_net = 0;
+ ptlrpc_rs_addref(rs);
+ }
spin_lock(&rs->rs_lock);
if (rs->rs_transno <= exp->exp_last_committed ||
(!rs->rs_on_net && !rs->rs_no_ack) ||
- cfs_list_empty(&rs->rs_exp_list) || /* completed already */
- cfs_list_empty(&rs->rs_obd_list)) {
+ list_empty(&rs->rs_exp_list) || /* completed already */
+ list_empty(&rs->rs_obd_list)) {
CDEBUG(D_HA, "Schedule reply immediately\n");
ptlrpc_dispatch_difficult_reply(rs);
} else {
- cfs_list_add(&rs->rs_list, &svcpt->scp_rep_active);
+ list_add(&rs->rs_list, &svcpt->scp_rep_active);
rs->rs_scheduled = 0; /* allow notifier to schedule */
}
spin_unlock(&rs->rs_lock);
spin_unlock(&svcpt->scp_rep_lock);
EXIT;
}
-EXPORT_SYMBOL(target_send_reply);
ldlm_mode_t lck_compat_array[] = {
- [LCK_EX] LCK_COMPAT_EX,
- [LCK_PW] LCK_COMPAT_PW,
- [LCK_PR] LCK_COMPAT_PR,
- [LCK_CW] LCK_COMPAT_CW,
- [LCK_CR] LCK_COMPAT_CR,
- [LCK_NL] LCK_COMPAT_NL,
- [LCK_GROUP] LCK_COMPAT_GROUP,
- [LCK_COS] LCK_COMPAT_COS,
+ [LCK_EX] = LCK_COMPAT_EX,
+ [LCK_PW] = LCK_COMPAT_PW,
+ [LCK_PR] = LCK_COMPAT_PR,
+ [LCK_CW] = LCK_COMPAT_CW,
+ [LCK_CR] = LCK_COMPAT_CR,
+ [LCK_NL] = LCK_COMPAT_NL,
+ [LCK_GROUP] = LCK_COMPAT_GROUP,
+ [LCK_COS] = LCK_COMPAT_COS,
};
/**
switch (error) {
case ELDLM_OK:
+ case ELDLM_LOCK_MATCHED:
result = 0;
break;
case ELDLM_LOCK_CHANGED:
}
return error;
}
-EXPORT_SYMBOL(ldlm_errno2error);
#if LUSTRE_TRACKS_LOCK_EXP_REFS
void ldlm_dump_export_locks(struct obd_export *exp)
{
spin_lock(&exp->exp_locks_list_guard);
- if (!cfs_list_empty(&exp->exp_locks_list)) {
+ if (!list_empty(&exp->exp_locks_list)) {
struct ldlm_lock *lock;
CERROR("dumping locks for export %p,"
"ignore if the unmount doesn't hang\n", exp);
- cfs_list_for_each_entry(lock, &exp->exp_locks_list,
+ list_for_each_entry(lock, &exp->exp_locks_list,
l_exp_refs_link)
LDLM_ERROR(lock, "lock:");
}
RETURN(1);
}
-static inline char *bulk2type(struct ptlrpc_bulk_desc *desc)
+static inline const char *bulk2type(struct ptlrpc_request *req)
{
- return desc->bd_type == BULK_GET_SINK ? "GET" : "PUT";
+ if (req->rq_bulk_read)
+ return "READ";
+ if (req->rq_bulk_write)
+ return "WRITE";
+ return "UNKNOWN";
}
int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
{
struct ptlrpc_request *req = desc->bd_req;
time_t start = cfs_time_current_sec();
+ time_t deadline;
int rc = 0;
ENTRY;
/* If there is eviction in progress, wait for it to finish. */
- if (unlikely(cfs_atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
+ if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
*lwi = LWI_INTR(NULL, NULL);
rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
- !cfs_atomic_read(&exp->exp_obd->
- obd_evict_inprogress),
+ !atomic_read(&exp->exp_obd->
+ obd_evict_inprogress),
lwi);
}
exp->exp_conn_cnt > lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
rc = -ENOTCONN;
} else {
- if (desc->bd_type == BULK_PUT_SINK)
+ if (req->rq_bulk_read)
rc = sptlrpc_svc_wrap_bulk(req, desc);
if (rc == 0)
rc = ptlrpc_start_bulk_transfer(desc);
if (rc < 0) {
DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d",
- bulk2type(desc), rc);
+ bulk2type(req), rc);
RETURN(rc);
}
RETURN(0);
}
+ /* limit actual bulk transfer to bulk_timeout seconds */
+ deadline = start + bulk_timeout;
+ if (deadline > req->rq_deadline)
+ deadline = req->rq_deadline;
+
do {
- long timeoutl = req->rq_deadline - cfs_time_current_sec();
+ long timeoutl = deadline - cfs_time_current_sec();
cfs_duration_t timeout = timeoutl <= 0 ?
CFS_TICK : cfs_time_seconds(timeoutl);
+ time_t rq_deadline;
*lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1),
target_bulk_timeout, desc);
lustre_msg_get_conn_cnt(req->rq_reqmsg),
lwi);
LASSERT(rc == 0 || rc == -ETIMEDOUT);
- /* Wait again if we changed deadline. */
+ /* Wait again if we changed rq_deadline. */
+ rq_deadline = ACCESS_ONCE(req->rq_deadline);
+ deadline = start + bulk_timeout;
+ if (deadline > rq_deadline)
+ deadline = rq_deadline;
} while ((rc == -ETIMEDOUT) &&
- (req->rq_deadline > cfs_time_current_sec()));
+ (deadline > cfs_time_current_sec()));
if (rc == -ETIMEDOUT) {
DEBUG_REQ(D_ERROR, req, "timeout on bulk %s after %ld%+lds",
- bulk2type(desc), req->rq_deadline - start,
- cfs_time_current_sec() - req->rq_deadline);
+ bulk2type(req), deadline - start,
+ cfs_time_current_sec() - deadline);
ptlrpc_abort_bulk(desc);
} else if (exp->exp_failed) {
DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s",
- bulk2type(desc));
+ bulk2type(req));
rc = -ENOTCONN;
ptlrpc_abort_bulk(desc);
} else if (exp->exp_conn_cnt >
lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s",
- bulk2type(desc));
+ bulk2type(req));
/* We don't reply anyway. */
rc = -ETIMEDOUT;
ptlrpc_abort_bulk(desc);
- } else if (desc->bd_failure ||
- desc->bd_nob_transferred != desc->bd_nob) {
- DEBUG_REQ(D_ERROR, req, "%s bulk %s %d(%d)",
- desc->bd_failure ? "network error on" : "truncated",
- bulk2type(desc), desc->bd_nob_transferred,
- desc->bd_nob);
- /* XXX Should this be a different errno? */
+ } else if (desc->bd_failure) {
+ DEBUG_REQ(D_ERROR, req, "network error on bulk %s",
+ bulk2type(req));
+ /* XXX should this be a different errno? */
rc = -ETIMEDOUT;
- } else if (desc->bd_type == BULK_GET_SINK) {
- rc = sptlrpc_svc_unwrap_bulk(req, desc);
+ } else {
+ if (req->rq_bulk_write)
+ rc = sptlrpc_svc_unwrap_bulk(req, desc);
+ if (rc == 0 && desc->bd_nob_transferred != desc->bd_nob) {
+ DEBUG_REQ(D_ERROR, req, "truncated bulk %s %d(%d)",
+ bulk2type(req), desc->bd_nob_transferred,
+ desc->bd_nob);
+ /* XXX should this be a different errno? */
+ rc = -ETIMEDOUT;
+ }
}
RETURN(rc);