* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2012, Intel Corporation.
+ * Copyright (c) 2010, 2013, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
char *name = obddev->obd_type->typ_name;
ldlm_ns_type_t ns_type = LDLM_NS_TYPE_UNKNOWN;
int rc;
- char *cli_name = lustre_cfg_buf(lcfg, 0);
ENTRY;
/* In a more perfect world, we would hang a ptlrpc_client off of
* obd_type and just use the values from there. */
- if (!strcmp(name, LUSTRE_OSC_NAME) ||
- (!(strcmp(name, LUSTRE_OSP_NAME)) &&
- (is_osp_on_mdt(cli_name) &&
- strstr(lustre_cfg_buf(lcfg, 1), "OST") != NULL))) {
- /* OSC or OSP_on_MDT for OSTs */
- rq_portal = OST_REQUEST_PORTAL;
- rp_portal = OSC_REPLY_PORTAL;
- connect_op = OST_CONNECT;
- cli->cl_sp_me = LUSTRE_SP_CLI;
- cli->cl_sp_to = LUSTRE_SP_OST;
- ns_type = LDLM_NS_TYPE_OSC;
+ if (!strcmp(name, LUSTRE_OSC_NAME)) {
+ rq_portal = OST_REQUEST_PORTAL;
+ rp_portal = OSC_REPLY_PORTAL;
+ connect_op = OST_CONNECT;
+ cli->cl_sp_me = LUSTRE_SP_CLI;
+ cli->cl_sp_to = LUSTRE_SP_OST;
+ ns_type = LDLM_NS_TYPE_OSC;
} else if (!strcmp(name, LUSTRE_MDC_NAME) ||
- !strcmp(name, LUSTRE_LWP_NAME) ||
- (!strcmp(name, LUSTRE_OSP_NAME) &&
- (is_osp_on_mdt(cli_name) &&
- strstr(lustre_cfg_buf(lcfg, 1), "OST") == NULL))) {
- /* MDC or OSP_on_MDT for other MDTs */
- rq_portal = MDS_REQUEST_PORTAL;
- rp_portal = MDC_REPLY_PORTAL;
- connect_op = MDS_CONNECT;
- cli->cl_sp_me = LUSTRE_SP_CLI;
- cli->cl_sp_to = LUSTRE_SP_MDT;
- ns_type = LDLM_NS_TYPE_MDC;
+ !strcmp(name, LUSTRE_LWP_NAME)) {
+ rq_portal = MDS_REQUEST_PORTAL;
+ rp_portal = MDC_REPLY_PORTAL;
+ connect_op = MDS_CONNECT;
+ cli->cl_sp_me = LUSTRE_SP_CLI;
+ cli->cl_sp_to = LUSTRE_SP_MDT;
+ ns_type = LDLM_NS_TYPE_MDC;
+ } else if (!strcmp(name, LUSTRE_OSP_NAME)) {
+ if (strstr(lustre_cfg_buf(lcfg, 1), "OST") == NULL) {
+ /* OSP_on_MDT for other MDTs */
+ connect_op = MDS_CONNECT;
+ cli->cl_sp_to = LUSTRE_SP_MDT;
+ ns_type = LDLM_NS_TYPE_MDC;
+ rq_portal = OUT_PORTAL;
+ } else {
+ /* OSP on MDT for OST */
+ connect_op = OST_CONNECT;
+ cli->cl_sp_to = LUSTRE_SP_OST;
+ ns_type = LDLM_NS_TYPE_OSC;
+ rq_portal = OST_REQUEST_PORTAL;
+ }
+ rp_portal = OSC_REPLY_PORTAL;
+ cli->cl_sp_me = LUSTRE_SP_CLI;
} else if (!strcmp(name, LUSTRE_MGC_NAME)) {
rq_portal = MGS_REQUEST_PORTAL;
rp_portal = MGC_REPLY_PORTAL;
cli->cl_dirty = 0;
cli->cl_avail_grant = 0;
/* FIXME: Should limit this for the sum of all cl_dirty_max. */
- cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
- if (cli->cl_dirty_max >> CFS_PAGE_SHIFT > cfs_num_physpages / 8)
- cli->cl_dirty_max = cfs_num_physpages << (CFS_PAGE_SHIFT - 3);
+ cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
+ if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8)
+ cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3);
CFS_INIT_LIST_HEAD(&cli->cl_cache_waiters);
CFS_INIT_LIST_HEAD(&cli->cl_loi_ready_list);
CFS_INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list);
cfs_atomic_set(&cli->cl_lru_in_list, 0);
CFS_INIT_LIST_HEAD(&cli->cl_lru_list);
client_obd_list_lock_init(&cli->cl_lru_list_lock);
+ cfs_atomic_set(&cli->cl_unstable_count, 0);
- cfs_waitq_init(&cli->cl_destroy_waitq);
- cfs_atomic_set(&cli->cl_destroy_in_flight, 0);
+ init_waitqueue_head(&cli->cl_destroy_waitq);
+ cfs_atomic_set(&cli->cl_destroy_in_flight, 0);
#ifdef ENABLE_CHECKSUM
- /* Turn on checksumming by default. */
- cli->cl_checksum = 1;
+ /* Turn on checksumming by default. */
+ cli->cl_checksum = 1;
/*
* The supported checksum types will be worked out at connect time
* Set cl_chksum* to CRC32 for now to avoid returning screwed info
#endif
cfs_atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS);
- /* This value may be changed at connect time in
- ptlrpc_connect_interpret. */
- cli->cl_max_pages_per_rpc = min((int)PTLRPC_MAX_BRW_PAGES,
- (int)(1024 * 1024 >> CFS_PAGE_SHIFT));
-
- if (!strcmp(name, LUSTRE_MDC_NAME)) {
- cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
- } else if (cfs_num_physpages >> (20 - CFS_PAGE_SHIFT) <= 128 /* MB */) {
- cli->cl_max_rpcs_in_flight = 2;
- } else if (cfs_num_physpages >> (20 - CFS_PAGE_SHIFT) <= 256 /* MB */) {
- cli->cl_max_rpcs_in_flight = 3;
- } else if (cfs_num_physpages >> (20 - CFS_PAGE_SHIFT) <= 512 /* MB */) {
- cli->cl_max_rpcs_in_flight = 4;
- } else {
+ /* This value may be reduced at connect time in
+ * ptlrpc_connect_interpret() . We initialize it to only
+ * 1MB until we know what the performance looks like.
+ * In the future this should likely be increased. LU-1431 */
+ cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES,
+ LNET_MTU >> PAGE_CACHE_SHIFT);
+
+ /* set cl_chunkbits default value to PAGE_CACHE_SHIFT,
+ * it will be updated at OSC connection time. */
+ cli->cl_chunkbits = PAGE_CACHE_SHIFT;
+
+ if (!strcmp(name, LUSTRE_MDC_NAME)) {
+ cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT;
+ } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 2;
+ } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 3;
+ } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) {
+ cli->cl_max_rpcs_in_flight = 4;
+ } else {
if (osc_on_mdt(obddev->obd_name))
cli->cl_max_rpcs_in_flight = MDS_OSC_MAX_RIF_DEFAULT;
else
GOTO(err_ldlm, rc = -ENOENT);
imp->imp_client = &obddev->obd_ldlm_client;
imp->imp_connect_op = connect_op;
- CFS_INIT_LIST_HEAD(&imp->imp_pinger_chain);
memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1),
LUSTRE_CFG_BUFLEN(lcfg, 1));
class_import_put(imp);
int client_obd_cleanup(struct obd_device *obddev)
{
- ENTRY;
+ ENTRY;
- ldlm_namespace_free_post(obddev->obd_namespace);
- obddev->obd_namespace = NULL;
+ ldlm_namespace_free_post(obddev->obd_namespace);
+ obddev->obd_namespace = NULL;
- LASSERT(obddev->u.cli.cl_import == NULL);
+ obd_cleanup_client_import(obddev);
+ LASSERT(obddev->u.cli.cl_import == NULL);
- ldlm_put_ref();
- RETURN(0);
+ ldlm_put_ref();
+ RETURN(0);
}
EXPORT_SYMBOL(client_obd_cleanup);
struct obd_device *obd, struct obd_uuid *cluuid,
struct obd_connect_data *data, void *localdata)
{
- struct client_obd *cli = &obd->u.cli;
- struct obd_import *imp = cli->cl_import;
- struct obd_connect_data *ocd;
- struct lustre_handle conn = { 0 };
- int rc;
- ENTRY;
+ struct client_obd *cli = &obd->u.cli;
+ struct obd_import *imp = cli->cl_import;
+ struct obd_connect_data *ocd;
+ struct lustre_handle conn = { 0 };
+ int rc;
+ ENTRY;
*exp = NULL;
down_write(&cli->cl_sem);
out_sem:
up_write(&cli->cl_sem);
- return rc;
+ return rc;
}
EXPORT_SYMBOL(client_connect_import);
if (!target) {
deuuidify(str, NULL, &target_start, &target_len);
- LCONSOLE_ERROR_MSG(0x137, "UUID '%s' is not available for "
- "connect (no target)\n", str);
+ LCONSOLE_ERROR_MSG(0x137, "%s: not available for connect "
+ "from %s (no target). If you are running "
+ "an HA pair check that the target is "
+ "mounted on the other server.\n", str,
+ libcfs_nid2str(req->rq_peer.nid));
GOTO(out, rc = -ENODEV);
}
spin_unlock(&target->obd_dev_lock);
deuuidify(str, NULL, &target_start, &target_len);
- LCONSOLE_ERROR_MSG(0x137, "%.*s: Not available for connect "
- "from %s (%s)\n", target_len, target_start,
- libcfs_nid2str(req->rq_peer.nid),
- (target->obd_stopping ?
- "stopping" : "not set up"));
+ LCONSOLE_INFO("%.*s: Not available for connect from %s (%s)\n",
+ target_len, target_start,
+ libcfs_nid2str(req->rq_peer.nid),
+ (target->obd_stopping ?
+ "stopping" : "not set up"));
GOTO(out, rc = -ENODEV);
}
/* Make sure the target isn't cleaned up while we're here. Yes,
* there's still a race between the above check and our incref here.
* Really, class_uuid2obd should take the ref. */
- targref = class_incref(target, __FUNCTION__, cfs_current());
+ targref = class_incref(target, __FUNCTION__, current);
target->obd_conn_inprogress++;
spin_unlock(&target->obd_dev_lock);
if (rc)
GOTO(out, rc);
- if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
- if (!data) {
- DEBUG_REQ(D_WARNING, req, "Refusing old (unversioned) "
- "libclient connection attempt");
- GOTO(out, rc = -EPROTO);
- } else if (data->ocd_version < LUSTRE_VERSION_CODE -
- LUSTRE_VERSION_ALLOWED_OFFSET ||
- data->ocd_version > LUSTRE_VERSION_CODE +
- LUSTRE_VERSION_ALLOWED_OFFSET) {
- DEBUG_REQ(D_WARNING, req, "Refusing %s (%d.%d.%d.%d) "
- "libclient connection attempt",
- data->ocd_version < LUSTRE_VERSION_CODE ?
- "old" : "new",
- OBD_OCD_VERSION_MAJOR(data->ocd_version),
- OBD_OCD_VERSION_MINOR(data->ocd_version),
- OBD_OCD_VERSION_PATCH(data->ocd_version),
- OBD_OCD_VERSION_FIX(data->ocd_version));
- data = req_capsule_server_sized_get(&req->rq_pill,
- &RMF_CONNECT_DATA,
- offsetof(typeof(*data), ocd_version) +
- sizeof(data->ocd_version));
- if (data) {
- data->ocd_connect_flags = OBD_CONNECT_VERSION;
- data->ocd_version = LUSTRE_VERSION_CODE;
- }
- GOTO(out, rc = -EPROTO);
- }
- }
+ if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
+ if (data->ocd_version < LUSTRE_VERSION_CODE -
+ LUSTRE_VERSION_ALLOWED_OFFSET ||
+ data->ocd_version > LUSTRE_VERSION_CODE +
+ LUSTRE_VERSION_ALLOWED_OFFSET) {
+ DEBUG_REQ(D_WARNING, req, "Refusing %s (%d.%d.%d.%d) "
+ "libclient connection attempt",
+ data->ocd_version < LUSTRE_VERSION_CODE ?
+ "old" : "new",
+ OBD_OCD_VERSION_MAJOR(data->ocd_version),
+ OBD_OCD_VERSION_MINOR(data->ocd_version),
+ OBD_OCD_VERSION_PATCH(data->ocd_version),
+ OBD_OCD_VERSION_FIX(data->ocd_version));
+ data = req_capsule_server_sized_get(&req->rq_pill,
+ &RMF_CONNECT_DATA,
+ offsetof(typeof(*data), ocd_version) +
+ sizeof(data->ocd_version));
+ if (data) {
+ data->ocd_connect_flags = OBD_CONNECT_VERSION;
+ data->ocd_version = LUSTRE_VERSION_CODE;
+ }
+ GOTO(out, rc = -EPROTO);
+ }
+ }
if ((lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_INITIAL) &&
(data->ocd_connect_flags & OBD_CONNECT_MDS))
libcfs_nid2str(req->rq_peer.nid),
cfs_atomic_read(&export->exp_refcount));
GOTO(out, rc = -EBUSY);
- } else if (req->rq_export != NULL &&
- (cfs_atomic_read(&export->exp_rpc_count) > 1)) {
- /* The current connect RPC has increased exp_rpc_count. */
- LCONSOLE_WARN("%s: Client %s (at %s) refused reconnection, "
- "still busy with %d active RPCs\n",
- target->obd_name, cluuid.uuid,
- libcfs_nid2str(req->rq_peer.nid),
- cfs_atomic_read(&export->exp_rpc_count) - 1);
- spin_lock(&export->exp_lock);
- if (req->rq_export->exp_conn_cnt <
- lustre_msg_get_conn_cnt(req->rq_reqmsg))
- /* try to abort active requests */
- req->rq_export->exp_abort_active_req = 1;
- spin_unlock(&export->exp_lock);
- GOTO(out, rc = -EBUSY);
} else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1) {
if (!strstr(cluuid.uuid, "mdt"))
LCONSOLE_WARN("%s: Rejecting reconnect from the "
export, (long)cfs_time_current_sec(),
export ? (long)export->exp_last_request_time : 0);
- /* If this is the first time a client connects, reset the recovery
+ /* If this is the first time a client connects, reset the recovery
* timer. Discard lightweight connections which might be local. */
if (!lw_client && rc == 0 && target->obd_recovering)
check_and_start_recovery_timer(target, req, export == NULL);
rc = obd_connect(req->rq_svc_thread->t_env,
&export, target, &cluuid, data,
client_nid);
+ if (mds_conn && OBD_FAIL_CHECK(OBD_FAIL_TGT_RCVG_FLAG))
+ lustre_msg_add_op_flags(req->rq_repmsg,
+ MSG_CONNECT_RECOVERING);
if (rc == 0)
conn.cookie = export->exp_handle.h_cookie;
}
* XXX this will go away when shaver stops sending the "connect" handle
* in the real "remote handle" field of the request --phik 24 Apr 2003
*/
- if (req->rq_export != NULL)
- class_export_put(req->rq_export);
-
- /* Request takes one export reference. */
- req->rq_export = class_export_get(export);
+ ptlrpc_request_change_export(req, export);
spin_lock(&export->exp_lock);
if (export->exp_conn_cnt >= lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
}
LASSERT(lustre_msg_get_conn_cnt(req->rq_reqmsg) > 0);
export->exp_conn_cnt = lustre_msg_get_conn_cnt(req->rq_reqmsg);
- export->exp_abort_active_req = 0;
/* Don't evict liblustre clients for not pinging. */
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) {
spin_unlock(&target->obd_recovery_task_lock);
}
- cfs_atomic_inc(&target->obd_req_replay_clients);
- cfs_atomic_inc(&target->obd_lock_replay_clients);
- if (cfs_atomic_inc_return(&target->obd_connected_clients) ==
- target->obd_max_recoverable_clients)
- cfs_waitq_signal(&target->obd_next_transno_waitq);
- }
+ cfs_atomic_inc(&target->obd_req_replay_clients);
+ cfs_atomic_inc(&target->obd_lock_replay_clients);
+ if (cfs_atomic_inc_return(&target->obd_connected_clients) ==
+ target->obd_max_recoverable_clients)
+ wake_up(&target->obd_next_transno_waitq);
+ }
/* Tell the client we're in recovery, when client is involved in it. */
if (target->obd_recovering && !lw_client)
target->obd_conn_inprogress--;
spin_unlock(&target->obd_dev_lock);
- class_decref(targref, __func__, cfs_current());
+ class_decref(targref, __func__, current);
}
if (rc)
req->rq_status = rc;
*/
static void target_request_copy_get(struct ptlrpc_request *req)
{
- class_export_rpc_get(req->rq_export);
+ class_export_rpc_inc(req->rq_export);
LASSERT(cfs_list_empty(&req->rq_list));
CFS_INIT_LIST_HEAD(&req->rq_replay_list);
LASSERT_ATOMIC_POS(&req->rq_export->exp_replay_count);
cfs_atomic_dec(&req->rq_export->exp_replay_count);
- class_export_rpc_put(req->rq_export);
+ class_export_rpc_dec(req->rq_export);
ptlrpc_server_drop_request(req);
}
{
int service_time = lustre_msg_get_service_time(req->rq_reqmsg);
struct obd_device_target *obt = &obd->u.obt;
- struct lustre_sb_info *lsi;
if (!new_client && service_time)
/* Teach server about old server's estimates, as first guess
service_time += 2 * INITIAL_CONNECT_TIMEOUT;
LASSERT(obt->obt_magic == OBT_MAGIC);
- lsi = s2lsi(obt->obt_sb);
- if (!(lsi->lsi_flags | LDD_F_IR_CAPABLE))
- service_time += 2 * (CONNECTION_SWITCH_MAX +
- CONNECTION_SWITCH_INC);
- if (service_time > obd->obd_recovery_timeout && !new_client)
- extend_recovery_timer(obd, service_time, false);
+ service_time += 2 * (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC);
+ if (service_time > obd->obd_recovery_timeout && !new_client)
+ extend_recovery_timer(obd, service_time, false);
}
/** Health checking routines */
* evict dead clients via health_check
*/
static int target_recovery_overseer(struct obd_device *obd,
- int (*check_routine)(struct obd_device *),
- int (*health_check)(struct obd_export *))
+ int (*check_routine)(struct obd_device *),
+ int (*health_check)(struct obd_export *))
{
repeat:
- cfs_wait_event(obd->obd_next_transno_waitq, check_routine(obd));
- if (obd->obd_abort_recovery) {
- CWARN("recovery is aborted, evict exports in recovery\n");
- /** evict exports which didn't finish recovery yet */
- class_disconnect_stale_exports(obd, exp_finished);
- return 1;
- } else if (obd->obd_recovery_expired) {
- obd->obd_recovery_expired = 0;
- /** If some clients died being recovered, evict them */
- LCONSOLE_WARN("%s: recovery is timed out, "
- "evict stale exports\n", obd->obd_name);
- /** evict cexports with no replay in queue, they are stalled */
- class_disconnect_stale_exports(obd, health_check);
- /** continue with VBR */
+ wait_event(obd->obd_next_transno_waitq, check_routine(obd));
+ if (obd->obd_abort_recovery) {
+ CWARN("recovery is aborted, evict exports in recovery\n");
+ /** evict exports which didn't finish recovery yet */
+ class_disconnect_stale_exports(obd, exp_finished);
+ return 1;
+ } else if (obd->obd_recovery_expired) {
+ obd->obd_recovery_expired = 0;
+ /** If some clients died being recovered, evict them */
+ LCONSOLE_WARN("%s: recovery is timed out, "
+ "evict stale exports\n", obd->obd_name);
+ /** evict cexports with no replay in queue, they are stalled */
+ class_disconnect_stale_exports(obd, health_check);
+ /** continue with VBR */
spin_lock(&obd->obd_dev_lock);
obd->obd_version_recov = 1;
spin_unlock(&obd->obd_dev_lock);
- /**
- * reset timer, recovery will proceed with versions now,
- * timeout is set just to handle reconnection delays
- */
- extend_recovery_timer(obd, RECONNECT_DELAY_MAX, true);
- /** Wait for recovery events again, after evicting bad clients */
- goto repeat;
- }
- return 0;
+ /**
+ * reset timer, recovery will proceed with versions now,
+ * timeout is set just to handle reconnection delays
+ */
+ extend_recovery_timer(obd, RECONNECT_DELAY_MAX, true);
+ /** Wait for recovery events again, after evicting bad clients */
+ goto repeat;
+ }
+ return 0;
}
static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd)
if (req->rq_export->exp_disconnected)
GOTO(reqcopy_put, rc = 0);
- rc = lu_context_init(&req->rq_recov_session, LCT_SESSION);
+ rc = lu_context_init(&req->rq_recov_session, LCT_SERVER_SESSION);
if (rc) {
CERROR("Failure to initialize session: %d\n", rc);
GOTO(reqcopy_put, rc);
struct ptlrpc_request *req;
struct target_recovery_data *trd = &obd->obd_recovery_data;
unsigned long delta;
- unsigned long flags;
struct lu_env *env;
struct ptlrpc_thread *thread = NULL;
int rc = 0;
ENTRY;
- cfs_daemonize_ctxt("tgt_recov");
-
- SIGNAL_MASK_LOCK(current, flags);
- sigfillset(¤t->blocked);
- RECALC_SIGPENDING;
- SIGNAL_MASK_UNLOCK(current, flags);
-
+ unshare_fs_struct();
OBD_ALLOC_PTR(thread);
if (thread == NULL)
RETURN(-ENOMEM);
thread->t_env = env;
thread->t_id = -1; /* force filter_iobuf_get/put to use local buffers */
env->le_ctx.lc_thread = thread;
- thread->t_data = NULL;
- thread->t_watchdog = NULL;
+ tgt_io_thread_init(thread); /* init thread_big_cache for IO requests */
+ thread->t_watchdog = NULL;
- CDEBUG(D_HA, "%s: started recovery thread pid %d\n", obd->obd_name,
- cfs_curproc_pid());
- trd->trd_processing_task = cfs_curproc_pid();
+ CDEBUG(D_HA, "%s: started recovery thread pid %d\n", obd->obd_name,
+ current_pid());
+ trd->trd_processing_task = current_pid();
spin_lock(&obd->obd_dev_lock);
obd->obd_recovering = 1;
CDEBUG(D_INFO, "1: request replay stage - %d clients from t"LPU64"\n",
cfs_atomic_read(&obd->obd_req_replay_clients),
obd->obd_next_recovery_transno);
- while ((req = target_next_replay_req(obd))) {
- LASSERT(trd->trd_processing_task == cfs_curproc_pid());
- DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s",
- lustre_msg_get_transno(req->rq_reqmsg),
- libcfs_nid2str(req->rq_peer.nid));
+ while ((req = target_next_replay_req(obd))) {
+ LASSERT(trd->trd_processing_task == current_pid());
+ DEBUG_REQ(D_HA, req, "processing t"LPD64" from %s",
+ lustre_msg_get_transno(req->rq_reqmsg),
+ libcfs_nid2str(req->rq_peer.nid));
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
/**
*/
CDEBUG(D_INFO, "2: lock replay stage - %d clients\n",
cfs_atomic_read(&obd->obd_lock_replay_clients));
- while ((req = target_next_replay_lock(obd))) {
- LASSERT(trd->trd_processing_task == cfs_curproc_pid());
- DEBUG_REQ(D_HA, req, "processing lock from %s: ",
- libcfs_nid2str(req->rq_peer.nid));
+ while ((req = target_next_replay_lock(obd))) {
+ LASSERT(trd->trd_processing_task == current_pid());
+ DEBUG_REQ(D_HA, req, "processing lock from %s: ",
+ libcfs_nid2str(req->rq_peer.nid));
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
target_request_copy_put(req);
spin_lock(&obd->obd_recovery_task_lock);
target_cancel_recovery_timer(obd);
spin_unlock(&obd->obd_recovery_task_lock);
- while ((req = target_next_final_ping(obd))) {
- LASSERT(trd->trd_processing_task == cfs_curproc_pid());
- DEBUG_REQ(D_HA, req, "processing final ping from %s: ",
- libcfs_nid2str(req->rq_peer.nid));
+ while ((req = target_next_final_ping(obd))) {
+ LASSERT(trd->trd_processing_task == current_pid());
+ DEBUG_REQ(D_HA, req, "processing final ping from %s: ",
+ libcfs_nid2str(req->rq_peer.nid));
handle_recovery_req(thread, req,
trd->trd_recovery_handler);
target_request_copy_put(req);
}
- delta = (jiffies - delta) / CFS_HZ;
- CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n",
- delta, obd->obd_replayed_requests, obd->obd_replayed_locks);
- if (delta > OBD_RECOVERY_TIME_SOFT) {
- CWARN("too long recovery - read logs\n");
- libcfs_debug_dumplog();
- }
+ delta = (jiffies - delta) / HZ;
+ CDEBUG(D_INFO,"4: recovery completed in %lus - %d/%d reqs/locks\n",
+ delta, obd->obd_replayed_requests, obd->obd_replayed_locks);
+ if (delta > OBD_RECOVERY_TIME_SOFT) {
+ CWARN("too long recovery - read logs\n");
+ libcfs_debug_dumplog();
+ }
target_finish_recovery(obd);
trd->trd_processing_task = 0;
complete(&trd->trd_finishing);
- OBD_FREE_PTR(thread);
- OBD_FREE_PTR(env);
- RETURN(rc);
+ tgt_io_thread_done(thread);
+ OBD_FREE_PTR(thread);
+ OBD_FREE_PTR(env);
+ RETURN(rc);
}
static int target_start_recovery_thread(struct lu_target *lut,
svc_handler_t handler)
{
- struct obd_device *obd = lut->lut_obd;
- int rc = 0;
- struct target_recovery_data *trd = &obd->obd_recovery_data;
+ struct obd_device *obd = lut->lut_obd;
+ int rc = 0;
+ struct target_recovery_data *trd = &obd->obd_recovery_data;
- memset(trd, 0, sizeof(*trd));
+ memset(trd, 0, sizeof(*trd));
init_completion(&trd->trd_starting);
init_completion(&trd->trd_finishing);
- trd->trd_recovery_handler = handler;
+ trd->trd_recovery_handler = handler;
- if (cfs_create_thread(target_recovery_thread, lut, 0) > 0) {
+ if (!IS_ERR(kthread_run(target_recovery_thread,
+ lut, "tgt_recov"))) {
wait_for_completion(&trd->trd_starting);
- LASSERT(obd->obd_recovering != 0);
- } else
- rc = -ECHILD;
+ LASSERT(obd->obd_recovering != 0);
+ } else {
+ rc = -ECHILD;
+ }
- return rc;
+ return rc;
}
void target_stop_recovery_thread(struct obd_device *obd)
if (obd->obd_recovering) {
CERROR("%s: Aborting recovery\n", obd->obd_name);
obd->obd_abort_recovery = 1;
- cfs_waitq_signal(&obd->obd_next_transno_waitq);
+ wake_up(&obd->obd_next_transno_waitq);
}
spin_unlock(&obd->obd_dev_lock);
wait_for_completion(&trd->trd_finishing);
static void target_recovery_expired(unsigned long castmeharder)
{
- struct obd_device *obd = (struct obd_device *)castmeharder;
- CDEBUG(D_HA, "%s: recovery timed out; %d clients are still in recovery"
- " after %lds (%d clients connected)\n",
- obd->obd_name, cfs_atomic_read(&obd->obd_lock_replay_clients),
- cfs_time_current_sec()- obd->obd_recovery_start,
- cfs_atomic_read(&obd->obd_connected_clients));
-
- obd->obd_recovery_expired = 1;
- cfs_waitq_signal(&obd->obd_next_transno_waitq);
+ struct obd_device *obd = (struct obd_device *)castmeharder;
+ CDEBUG(D_HA, "%s: recovery timed out; %d clients are still in recovery"
+ " after %lds (%d clients connected)\n",
+ obd->obd_name, cfs_atomic_read(&obd->obd_lock_replay_clients),
+ cfs_time_current_sec()- obd->obd_recovery_start,
+ cfs_atomic_read(&obd->obd_connected_clients));
+
+ obd->obd_recovery_expired = 1;
+ wake_up(&obd->obd_next_transno_waitq);
}
void target_recovery_init(struct lu_target *lut, svc_handler_t handler)
__u64 transno = lustre_msg_get_transno(req->rq_reqmsg);
ENTRY;
- if (obd->obd_recovery_data.trd_processing_task == cfs_curproc_pid()) {
- /* Processing the queue right now, don't re-add. */
- RETURN(1);
- }
+ if (obd->obd_recovery_data.trd_processing_task == current_pid()) {
+ /* Processing the queue right now, don't re-add. */
+ RETURN(1);
+ }
target_process_req_flags(obd, req);
if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LOCK_REPLAY_DONE) {
/* client declares he's ready to complete recovery
* so, we put the request on th final queue */
- target_request_copy_get(req);
- DEBUG_REQ(D_HA, req, "queue final req");
- cfs_waitq_signal(&obd->obd_next_transno_waitq);
+ target_request_copy_get(req);
+ DEBUG_REQ(D_HA, req, "queue final req");
+ wake_up(&obd->obd_next_transno_waitq);
spin_lock(&obd->obd_recovery_task_lock);
if (obd->obd_recovering) {
cfs_list_add_tail(&req->rq_list,
/* client declares he's ready to replay locks */
target_request_copy_get(req);
DEBUG_REQ(D_HA, req, "queue lock replay req");
- cfs_waitq_signal(&obd->obd_next_transno_waitq);
+ wake_up(&obd->obd_next_transno_waitq);
spin_lock(&obd->obd_recovery_task_lock);
LASSERT(obd->obd_recovering);
/* usually due to recovery abort */
obd->obd_requests_queued_for_recovery++;
spin_unlock(&obd->obd_recovery_task_lock);
- cfs_waitq_signal(&obd->obd_next_transno_waitq);
+ wake_up(&obd->obd_next_transno_waitq);
RETURN(0);
}
EXPORT_SYMBOL(target_queue_recovery_request);
int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc,
struct l_wait_info *lwi)
{
- struct ptlrpc_request *req = desc->bd_req;
- int rc = 0;
- ENTRY;
+ struct ptlrpc_request *req = desc->bd_req;
+ time_t start = cfs_time_current_sec();
+ int rc = 0;
+
+ ENTRY;
/* If there is eviction in progress, wait for it to finish. */
- if (unlikely(cfs_atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
- *lwi = LWI_INTR(NULL, NULL);
- rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
- !cfs_atomic_read(&exp->exp_obd->
- obd_evict_inprogress),
- lwi);
- }
+ if (unlikely(cfs_atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
+ *lwi = LWI_INTR(NULL, NULL);
+ rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
+ !cfs_atomic_read(&exp->exp_obd->
+ obd_evict_inprogress),
+ lwi);
+ }
- /* Check if client was evicted or tried to reconnect already. */
- if (exp->exp_failed || exp->exp_abort_active_req) {
- rc = -ENOTCONN;
- } else {
- if (desc->bd_type == BULK_PUT_SINK)
- rc = sptlrpc_svc_wrap_bulk(req, desc);
- if (rc == 0)
- rc = ptlrpc_start_bulk_transfer(desc);
- }
-
- if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) {
- ptlrpc_abort_bulk(desc);
- } else if (rc == 0) {
- time_t start = cfs_time_current_sec();
- do {
- long timeoutl = req->rq_deadline - cfs_time_current_sec();
- cfs_duration_t timeout = timeoutl <= 0 ?
- CFS_TICK : cfs_time_seconds(timeoutl);
- *lwi = LWI_TIMEOUT_INTERVAL(timeout,
- cfs_time_seconds(1),
- target_bulk_timeout,
- desc);
- rc = l_wait_event(desc->bd_waitq,
- !ptlrpc_server_bulk_active(desc) ||
- exp->exp_failed ||
- exp->exp_abort_active_req,
- lwi);
- LASSERT(rc == 0 || rc == -ETIMEDOUT);
- /* Wait again if we changed deadline. */
- } while ((rc == -ETIMEDOUT) &&
- (req->rq_deadline > cfs_time_current_sec()));
-
- if (rc == -ETIMEDOUT) {
- DEBUG_REQ(D_ERROR, req,
- "timeout on bulk %s after %ld%+lds",
- bulk2type(desc),
- req->rq_deadline - start,
- cfs_time_current_sec() -
- req->rq_deadline);
- ptlrpc_abort_bulk(desc);
- } else if (exp->exp_failed) {
- DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s",
- bulk2type(desc));
- rc = -ENOTCONN;
- ptlrpc_abort_bulk(desc);
- } else if (exp->exp_abort_active_req) {
- DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s",
- bulk2type(desc));
- /* We don't reply anyway. */
- rc = -ETIMEDOUT;
- ptlrpc_abort_bulk(desc);
- } else if (!desc->bd_success ||
- desc->bd_nob_transferred != desc->bd_nob) {
- DEBUG_REQ(D_ERROR, req, "%s bulk %s %d(%d)",
- desc->bd_success ?
- "truncated" : "network error on",
- bulk2type(desc),
- desc->bd_nob_transferred,
- desc->bd_nob);
- /* XXX Should this be a different errno? */
- rc = -ETIMEDOUT;
- } else if (desc->bd_type == BULK_GET_SINK) {
- rc = sptlrpc_svc_unwrap_bulk(req, desc);
- }
- } else {
- DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d",
- bulk2type(desc), rc);
- }
+ /* Check if client was evicted or reconnected already. */
+ if (exp->exp_failed ||
+ exp->exp_conn_cnt > lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
+ rc = -ENOTCONN;
+ } else {
+ if (desc->bd_type == BULK_PUT_SINK)
+ rc = sptlrpc_svc_wrap_bulk(req, desc);
+ if (rc == 0)
+ rc = ptlrpc_start_bulk_transfer(desc);
+ }
- RETURN(rc);
+ if (rc < 0) {
+ DEBUG_REQ(D_ERROR, req, "bulk %s failed: rc %d",
+ bulk2type(desc), rc);
+ RETURN(rc);
+ }
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) {
+ ptlrpc_abort_bulk(desc);
+ RETURN(0);
+ }
+
+ do {
+ long timeoutl = req->rq_deadline - cfs_time_current_sec();
+ cfs_duration_t timeout = timeoutl <= 0 ?
+ CFS_TICK : cfs_time_seconds(timeoutl);
+
+ *lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1),
+ target_bulk_timeout, desc);
+ rc = l_wait_event(desc->bd_waitq,
+ !ptlrpc_server_bulk_active(desc) ||
+ exp->exp_failed ||
+ exp->exp_conn_cnt >
+ lustre_msg_get_conn_cnt(req->rq_reqmsg),
+ lwi);
+ LASSERT(rc == 0 || rc == -ETIMEDOUT);
+ /* Wait again if we changed deadline. */
+ } while ((rc == -ETIMEDOUT) &&
+ (req->rq_deadline > cfs_time_current_sec()));
+
+ if (rc == -ETIMEDOUT) {
+ DEBUG_REQ(D_ERROR, req, "timeout on bulk %s after %ld%+lds",
+ bulk2type(desc), req->rq_deadline - start,
+ cfs_time_current_sec() - req->rq_deadline);
+ ptlrpc_abort_bulk(desc);
+ } else if (exp->exp_failed) {
+ DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s",
+ bulk2type(desc));
+ rc = -ENOTCONN;
+ ptlrpc_abort_bulk(desc);
+ } else if (exp->exp_conn_cnt >
+ lustre_msg_get_conn_cnt(req->rq_reqmsg)) {
+ DEBUG_REQ(D_ERROR, req, "Reconnect on bulk %s",
+ bulk2type(desc));
+ /* We don't reply anyway. */
+ rc = -ETIMEDOUT;
+ ptlrpc_abort_bulk(desc);
+ } else if (desc->bd_failure ||
+ desc->bd_nob_transferred != desc->bd_nob) {
+ DEBUG_REQ(D_ERROR, req, "%s bulk %s %d(%d)",
+ desc->bd_failure ? "network error on" : "truncated",
+ bulk2type(desc), desc->bd_nob_transferred,
+ desc->bd_nob);
+ /* XXX Should this be a different errno? */
+ rc = -ETIMEDOUT;
+ } else if (desc->bd_type == BULK_GET_SINK) {
+ rc = sptlrpc_svc_unwrap_bulk(req, desc);
+ }
+
+ RETURN(rc);
}
EXPORT_SYMBOL(target_bulk_io);