extern unsigned int obd_timeout;
extern char obd_lustre_upcall[128];
extern unsigned int obd_sync_filter;
+extern wait_queue_head_t obd_race_waitq;
#define OBD_FAIL_MDS 0x100
#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101
#define OBD_FAIL_OBD_LOGD_NET 0x602
#define OBD_FAIL_TGT_REPLY_NET 0x700
+#define OBD_FAIL_TGT_CONN_RACE 0x701
/* preparation for a more advanced failure testbed (not functional yet) */
#define OBD_FAIL_MASK_SYS 0x0000FF00
} \
} while(0)
+/* The idea here is to synchronise two threads to force a race. The
+ * first thread that calls this with a matching fail_loc is put to
+ * sleep. The next thread that calls with the same fail_loc wakes up
+ * the first and continues. */
+#define OBD_RACE(id) \
+do { \
+ if (OBD_FAIL_CHECK_ONCE(id)) { \
+ CERROR("obd_race id %x sleeping\n", (id)); \
+ sleep_on(&obd_race_waitq); \
+ CERROR("obd_fail_race id %x awake\n", (id)); \
+ } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) == \
+ ((id) & OBD_FAIL_MASK_LOC)) { \
+ wake_up(&obd_race_waitq); \
+ } \
+} while(0)
+
#define fixme() CDEBUG(D_OTHER, "FIXME\n");
#ifdef __KERNEL__
struct list_head *p;
char *str, *tmp;
int rc = 0, abort_recovery;
+ unsigned long flags;
ENTRY;
+ OBD_RACE(OBD_FAIL_TGT_CONN_RACE);
+
LASSERT_REQSWAB (req, 0);
str = lustre_msg_string(req->rq_reqmsg, 0, sizeof(tgtuuid) - 1);
if (str == NULL) {
if (!target) {
target = class_name2obd(str);
}
-
+
if (!target || target->obd_stopping || !target->obd_set_up) {
CERROR("UUID '%s' is not available for connect\n", str);
GOTO(out, rc = -ENODEV);
export = req->rq_export = class_conn2export(&conn);
LASSERT(export != NULL);
+ spin_lock_irqsave(&export->exp_lock, flags);
+ if (export->exp_conn_cnt >= req->rq_reqmsg->conn_cnt) {
+ CERROR("%s: already connected at a higher conn_cnt: %d > %d\n",
+ cluuid.uuid, export->exp_conn_cnt,
+ req->rq_reqmsg->conn_cnt);
+ spin_unlock_irqrestore(&export->exp_lock, flags);
+ GOTO(out, rc = -EALREADY);
+ }
+ export->exp_conn_cnt = req->rq_reqmsg->conn_cnt;
+ spin_unlock_irqrestore(&export->exp_lock, flags);
+
/* request from liblustre? */
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT)
export->exp_libclient = 1;
export->exp_connection = ptlrpc_get_connection(&req->rq_peer,
&remote_uuid);
- LASSERT(export->exp_conn_cnt < req->rq_reqmsg->conn_cnt);
- export->exp_conn_cnt = req->rq_reqmsg->conn_cnt;
-
if (rc == EALREADY) {
/* We indicate the reconnection in a flag, not an error code. */
lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT);
char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall */
unsigned int obd_sync_filter; /* = 0, don't sync by default */
+DECLARE_WAIT_QUEUE_HEAD(obd_race_waitq);
+
#ifdef __KERNEL__
/* opening /dev/obd */
static int obd_class_open(struct inode * inode, struct file * file)
EXPORT_SYMBOL(obd_dev);
EXPORT_SYMBOL(obdo_cachep);
EXPORT_SYMBOL(obd_fail_loc);
+EXPORT_SYMBOL(obd_race_waitq);
EXPORT_SYMBOL(obd_timeout);
EXPORT_SYMBOL(obd_lustre_upcall);
EXPORT_SYMBOL(obd_sync_filter);
LASSERT(root != NULL);
parent = root->parent;
LASSERT(parent != NULL);
-
+
while (1) {
while (temp->subdir != NULL)
temp = temp->subdir;
OBD_SYNCFILTER, /* XXX temporary, as we play with sync osts.. */
};
+int proc_fail_loc(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp);
+
static ctl_table obd_table[] = {
{OBD_FAIL_LOC, "fail_loc", &obd_fail_loc, sizeof(int), 0644, NULL,
&proc_dointvec},
{OBD_TIMEOUT, "timeout", &obd_timeout, sizeof(int), 0644, NULL,
- &proc_dointvec},
+ &proc_fail_loc},
/* XXX need to lock so we avoid update races with recovery upcall! */
{OBD_UPCALL, "upcall", obd_lustre_upcall, 128, 0644, NULL,
&proc_dostring, &sysctl_string },
obd_table_header = NULL;
#endif
}
+
+int proc_fail_loc(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ int rc;
+ int old_fail_loc = obd_fail_loc;
+
+ rc = proc_dointvec(table,write,filp,buffer,lenp);
+ if (old_fail_loc != obd_fail_loc)
+ wake_up(&obd_race_waitq);
+ return rc;
+}
}
run_test 43 "mds osc import failure during recovery; don't LBUG"
+test_44() {
+ mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
+ $LCTL --device $mdcdev recover
+ df $MOUNT
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+ return 0
+}
+run_test 44 "race in target handle connect"
+
equals_msg test complete, cleaning up
$CLEANUP