#define OBD_FAIL_TGT_RECOVERY_REQ_RACE 0x721
#define OBD_FAIL_TGT_REPLY_DATA_RACE 0x722
#define OBD_FAIL_TGT_NO_GRANT 0x725
+#define OBD_FAIL_TGT_TXN_NO_CANCEL 0x726
#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801
#define OBD_FAIL_LLOG_PURGE_DELAY 0x1318
#define OBD_FAIL_CATLIST 0x131b
#define OBD_FAIL_LLOG_PAUSE_AFTER_PAD 0x131c
+#define OBD_FAIL_LLOG_ADD_GAP 0x131d
#define OBD_FAIL_LLITE 0x1400
#define OBD_FAIL_LLITE_FAULT_TRUNC_RACE 0x1401
continue;
}
- if (rec->lrh_index != index) {
- /*
- * the last time we couldn't parse the block due
- * to corruption, thus has no idea about the
- * next index, take it from the block, once.
- */
- if (refresh_idx) {
- refresh_idx = false;
- index = rec->lrh_index;
- } else {
- CERROR("%s: "DFID" Invalid record: index"
- " %u but expected %u\n",
- loghandle2name(loghandle),
- PFID(&loghandle->lgh_id.lgl_oi.oi_fid),
- rec->lrh_index, index);
- GOTO(out, rc = -ERANGE);
- }
+ if (rec->lrh_index > index) {
+ /* the record itself looks good, but we met a
+ * gap which can be result of old bugs, just
+ * keep going */
+ CERROR("%s: "DFID" index %u, expected %u\n",
+ loghandle2name(loghandle),
+ PFID(&loghandle->lgh_id.lgl_oi.oi_fid),
+ rec->lrh_index, index);
+ index = rec->lrh_index;
}
CDEBUG(D_OTHER,
down_write(&loghandle->lgh_last_sem);
/* increment the last_idx along with llh_tail index, they should
* be equal for a llog lifetime */
+ if (OBD_FAIL_CHECK(OBD_FAIL_LLOG_ADD_GAP) && --cfs_fail_val == 0)
+ loghandle->lgh_last_idx++;
loghandle->lgh_last_idx++;
index = loghandle->lgh_last_idx;
LLOG_HDR_TAIL(llh)->lrt_index = index;
ENTRY;
LASSERT(oth != NULL);
+ LASSERT(osp->opd_obd);
if (ou && ou->ou_generation != our->our_generation) {
const struct lnet_process_id *peer =
struct sub_thandle *st;
ENTRY;
+ if (OBD_FAIL_CHECK(OBD_FAIL_TGT_TXN_NO_CANCEL))
+ RETURN(0);
+
top_multiple_thandle_dump(tmt, D_INFO);
/* Cancel update logs on other MDTs */
list_for_each_entry(st, &tmt->tmt_sub_thandle_list, st_sub_list) {
}
run_test 28 "lock replay should be ordered: waiting after granted"
+test_32() {
+ (( $MDSCOUNT < 2 )) && skip_env "needs >= 2 MDTs"
+
+ # inject a gap with 10th transaction
+#define OBD_FAIL_LLOG_ADD_GAP 0x131d
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x0000131d fail_val=10
+ for ((i=0; i < 20; i++)); do
+ $LFS setdirstripe -i1 $DIR/$tdir-$i ||
+ error "can't mkdir $DIR/$tdir-$i"
+ done
+
+ # prevent update llog cancellation, so next boot MDS has
+ # process the update llog with gap injected
+#define OBD_FAIL_TGT_TXN_NO_CANCEL 0x726
+ $LCTL set_param fail_loc=0x726
+
+ stop mds2
+ stop mds1
+
+ $LCTL set_param fail_loc=0
+
+ mount_facet mds1
+ mount_facet mds2
+
+ $LFS df $DIR
+
+ local testid=$(echo $TESTNAME | tr '_' ' ')
+ dmesg | tac | sed "/$testid/,$ d" | grep "This client was evicted" &&
+ error "client got evicted due to aborted recovery"
+ return 0
+}
+run_test 32 "gap in update llog shouldn't break recovery"
+
complete $SECONDS
SLEEP=$((SECONDS - $NOW))
[ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP