add_wait_queue(&wq, &__wait); \
__state = TASK_UNINTERRUPTIBLE; \
for (;;) { \
- set_current_state(__state); \
- if (condition) \
+ set_current_state(__state); \
+ if (condition) \
+ break; \
+ /* We only become INTERRUPTIBLE if a timeout has fired, and \
+ * the caller has given us some signals to care about. \
+ * \
+ * XXXshaver we should check against info->wli_signals here, \
+ * XXXshaver instead of just using l_killable_pending, perhaps. \
+ */ \
+ if (__state == TASK_INTERRUPTIBLE && \
+ l_killable_pending(current)) { \
+ CERROR("lwe: interrupt for %d\n", current->pid); \
+ if (info->lwi_on_signal) \
+ info->lwi_on_signal(info->lwi_cb_data); \
+ ret = -EINTR; \
+ break; \
+ } \
+ if (info->lwi_timeout) { \
+ if (schedule_timeout(info->lwi_timeout) == 0) { \
+ CERROR("lwe: timeout for %d\n", current->pid); \
+ if (!info->lwi_on_timeout || \
+ info->lwi_on_timeout(info->lwi_cb_data)) { \
+ ret = -ETIMEDOUT; \
break; \
- /* We only become INTERRUPTIBLE if a timeout has fired, and \
- * the caller has given us some signals to care about. \
- * \
- * XXXshaver we should check against info->wli_signals here, \
- * XXXshaver instead of just using l_killable_pending, perhaps. \
- */ \
- if (__state == TASK_INTERRUPTIBLE && \
- l_killable_pending(current)) { \
- if (info->lwi_on_signal) \
- info->lwi_on_signal(info->lwi_cb_data); \
- ret = -EINTR; \
- break; \
- } \
- if (info->lwi_timeout) { \
- if (schedule_timeout(info->lwi_timeout) == 0) { \
- /* We'll take signals only after a timeout. */ \
- if (info->lwi_signals) \
- __state = TASK_INTERRUPTIBLE; \
- if (info->lwi_on_timeout && \
- info->lwi_on_timeout(info->lwi_cb_data)) { \
- ret = -ETIMEDOUT; \
- break; \
- } \
+ } \
+ /* We'll take signals only after a timeout. */ \
+ if (info->lwi_signals) { \
+ __state = TASK_INTERRUPTIBLE; \
+ /* Check for a pending interrupt. */ \
+ if (info->lwi_signals && \
+ l_killable_pending(current)) { \
+ CERROR("lwe: pending interrupt for %d\n", \
+ current->pid); \
+ if (info->lwi_on_signal) \
+ info->lwi_on_signal(info->lwi_cb_data); \
+ ret = -EINTR; \
+ break; \
} \
- } else { \
- schedule(); \
+ } \
} \
+ } else { \
+ schedule(); \
+ } \
} \
current->state = TASK_RUNNING; \
remove_wait_queue(&wq, &__wait); \
int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data);
int class_multi_cleanup(struct obd_device *obddev);
+extern void (*class_signal_client_failure)(struct ptlrpc_client *);
#endif
#define OBD_FAIL_OST_PUNCH_NET 0x20b
#define OBD_FAIL_OST_STATFS_NET 0x20c
#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d
+#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e
+#define OBD_FAIL_OST_BRW_READ_BULK 0x20f
#define OBB_FAIL_LDLM 0x300
#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301
/* XXXshaver Do we need a resend strategy, or do we just
* XXXshaver return -ERESTARTSYS and punt it?
*/
-#if 0
- recovd_cli_fail(desc->b_client);
-#endif
+ CERROR("signalling failure of client %p\n", desc->b_client);
+ class_signal_client_failure(desc->b_client);
}
/* We go back to sleep, until we're resumed or interrupted. */
ENTRY;
if (phase == CB_PHASE_START) {
-#warning shaver hardcoded timeout
+#warning shaver hardcoded timeout (/proc/sys/lustre/timeout)
struct l_wait_info lwi;
- lwi = LWI_TIMEOUT_INTR(100, sync_io_timeout,
+ lwi = LWI_TIMEOUT_INTR(100 * HZ, sync_io_timeout,
SIGTERM | SIGKILL | SIGINT, sync_io_intr,
data);
ret = l_wait_event(data->waitq, data->complete, &lwi);
&obd_psdev_fops
};
+void (*class_signal_client_failure)(struct ptlrpc_client *);
EXPORT_SYMBOL(obd_dev);
EXPORT_SYMBOL(obdo_cachep);
//EXPORT_SYMBOL(class_multi_setup);
//EXPORT_SYMBOL(class_multi_cleanup);
+EXPORT_SYMBOL(class_signal_client_failure);
+
static int __init init_obdclass(void)
{
int err;
if (req->rq_status)
GOTO(out_free, rc = 0); /* XXX is this correct? */
+ if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK))
+ GOTO(fail_preprw, rc = 0);
+
desc = ptlrpc_prep_bulk(req->rq_connection);
if (desc == NULL)
GOTO(fail_preprw, rc = -ENOMEM);
GOTO(out, rc = 1);
}
-#if 0
- if (req->rq_flags & PTL_RPC_FL_RESEND) {
- if (l_killable_pending(current)) {
- CERROR("-- INTR --\n");
- req->rq_flags |= PTL_RPC_FL_INTR;
- GOTO(out, rc = 1);
- }
- CERROR("-- RESEND --\n");
- GOTO(out, rc = 1);
- }
-#endif
-
if (req->rq_flags & PTL_RPC_FL_RECOVERY) {
CERROR("-- RESTART --\n");
GOTO(out, rc = 1);
}
- if (req->rq_flags & PTL_RPC_FL_TIMEOUT && l_killable_pending(current)) {
- req->rq_flags |= PTL_RPC_FL_INTR;
- GOTO(out, rc = 1);
- }
-
- if (req->rq_timeout &&
- (CURRENT_TIME - req->rq_time >= req->rq_timeout)) {
- CERROR("-- REQ TIMEOUT ON CONNID %d XID %Ld --\n",
- req->rq_connid, (unsigned long long)req->rq_xid);
- /* clear the timeout */
- req->rq_timeout = 0;
- req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
- req->rq_flags |= PTL_RPC_FL_TIMEOUT;
- if (req->rq_client && req->rq_client->cli_recovd)
- recovd_cli_fail(req->rq_client);
- if (req->rq_level < LUSTRE_CONN_FULL) {
- rc = 1;
- } else if (l_killable_pending(current)) {
- req->rq_flags |= PTL_RPC_FL_INTR;
- rc = 1;
- } else {
- rc = 0;
- }
- GOTO(out, rc);
- }
-
out:
CDEBUG(D_NET, "req = %p, rc = %d\n", req, rc);
return rc;
struct ptlrpc_request *req = data;
ENTRY;
+ CERROR("req timeout on connid %d xid %Ld\n", req->rq_connid,
+ (unsigned long long)req->rq_xid);
req->rq_timeout = 0;
req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
req->rq_flags |= PTL_RPC_FL_TIMEOUT;
int recovd_setup(struct recovd_obd *recovd)
{
int rc;
+ extern void (*class_signal_client_failure)(struct ptlrpc_client *);
+
ENTRY;
INIT_LIST_HEAD(&recovd->recovd_clients_lh);
}
wait_event(recovd->recovd_ctl_waitq, recovd->recovd_flags & RECOVD_IDLE);
+ /* exported and called by obdclass timeout handlers */
+ class_signal_client_failure = recovd_cli_fail;
+
RETURN(0);
}