From b9e1bb635039c6d2d985754a9a029c9d5c20b569 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Tue, 29 Mar 2016 14:39:47 +0300 Subject: [PATCH] LU-7934 osp: fix tr->otr_next_id overflow The tracker next_id and current_id u32 type was based on max llog records. But llog use cyclic store for records, so llog could store infinite number of records and limited by max number at moument of time. The u32 type could be overflowed easy if a server isn`t rebooted. osp_sync_id_get()) snx11126-OST0045-osc-MDT0000: next 0, last synced 4294967205 This fix changes type u32 to u64 for *id. Now, we store only low part current_id to llog record header id. And restore the full u64 from record header later. It is possible because llog catalog can store 64768^2 and it is less than u32 max value. The patch adds test to check u32 overflow for otr_next_id field of osp_id_tracker. Recreate the next assertion LustreError: 185667:0:(osp_sync.c:1544:osp_sync_id_get()) snx11126-OST0045-osc-MDT0000: next 0, last synced 4294967205 LustreError: 231396:0:(osp_sync.c:1545:osp_sync_id_get()) LBUG Seagate-bug-id: MRP-3367 Signed-off-by: Alexander Boyko Change-Id: I89d70ecb068f8d0b5a1e1ac35b85a4b6e53211e5 Reviewed-on: http://review.whamcloud.com/19190 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/osp/osp_dev.c | 2 +- lustre/osp/osp_internal.h | 12 +++++------ lustre/osp/osp_sync.c | 47 ++++++++++++++++++++++++++++++++----------- lustre/tests/replay-single.sh | 17 ++++++++++++++++ 5 files changed, 60 insertions(+), 19 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 81a32c7..a6a83d9 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -250,6 +250,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDS_LLOG_CREATE_FAILED2 0x15b #define OBD_FAIL_MDS_FLD_LOOKUP 0x15c #define OBD_FAIL_MDS_INTENT_DELAY 0x160 +#define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162 /* layout lock */ #define OBD_FAIL_MDS_NO_LL_GETATTR 0x170 diff --git a/lustre/osp/osp_dev.c b/lustre/osp/osp_dev.c index 505e484..329da7b 100644 --- a/lustre/osp/osp_dev.c +++ b/lustre/osp/osp_dev.c @@ -811,7 +811,7 @@ static int osp_sync(const struct lu_env *env, struct dt_device *dev) if (rc != 0) GOTO(out, rc); - CDEBUG(D_CACHE, "%s: id: used %lu, processed %lu\n", + CDEBUG(D_CACHE, "%s: id: used %lu, processed "LPU64"\n", d->opd_obd->obd_name, id, d->opd_syn_last_processed_id); /* wait till all-in-line are processed */ diff --git a/lustre/osp/osp_internal.h b/lustre/osp/osp_internal.h index 88e2777..3714dc2 100644 --- a/lustre/osp/osp_internal.h +++ b/lustre/osp/osp_internal.h @@ -55,8 +55,8 @@ */ struct osp_id_tracker { spinlock_t otr_lock; - __u32 otr_next_id; - __u32 otr_committed_id; + __u64 otr_next_id; + __u64 otr_committed_id; /* callback is register once per diskfs -- that's the whole point */ struct dt_txn_callback otr_tx_cb; /* single node can run many clusters */ @@ -215,12 +215,12 @@ struct osp_device { /* osd api's commit cb control structure */ struct dt_txn_callback opd_syn_txn_cb; /* last used change number -- semantically similar to transno */ - unsigned long opd_syn_last_used_id; + __u64 opd_syn_last_used_id; /* last committed change number -- semantically similar to * last_committed */ - unsigned long opd_syn_last_committed_id; + __u64 opd_syn_last_committed_id; /* last processed (taken from llog) id */ - unsigned long opd_syn_last_processed_id; + __u64 opd_syn_last_processed_id; struct osp_id_tracker *opd_syn_tracker; struct list_head opd_syn_ontrack; /* stop processing new requests until barrier=0 */ @@ -420,7 +420,7 @@ static inline struct osp_thread_info *osp_env_info(const struct lu_env *env) } struct osp_txn_info { - __u32 oti_current_id; + __u64 oti_current_id; }; extern struct lu_context_key osp_txn_key; diff --git a/lustre/osp/osp_sync.c b/lustre/osp/osp_sync.c index 2c74443..582296f 100644 --- a/lustre/osp/osp_sync.c +++ b/lustre/osp/osp_sync.c @@ -50,7 +50,7 @@ static int osp_sync_id_traction_init(struct osp_device *d); static void osp_sync_id_traction_fini(struct osp_device *d); -static __u32 osp_sync_id_get(struct osp_device *d, __u32 id); +static __u64 osp_sync_id_get(struct osp_device *d, __u64 id); static void osp_sync_remove_from_tracker(struct osp_device *d); /* @@ -238,6 +238,25 @@ void __osp_sync_check_for_work(struct osp_device *d) osp_sync_check_for_work(d); } +static inline __u64 osp_sync_correct_id(struct osp_device *d, + struct llog_rec_hdr *rec) +{ + /* + * llog use cyclic store with 32 bit lrh_id + * so overflow lrh_id is possible. Range between + * last_processed and last_committed is less than + * 64745 ^ 2 and less than 2^32 - 1 + */ + __u64 correct_id = d->opd_syn_last_committed_id; + + if ((correct_id & 0xffffffffULL) < rec->lrh_id) + correct_id -= 0x100000000ULL; + + correct_id &= ~0xffffffffULL; + correct_id |= rec->lrh_id; + + return correct_id; +} /** * Check and return ready-for-new status. * @@ -271,7 +290,8 @@ static inline int osp_sync_can_process_new(struct osp_device *d, return 1; if (d->opd_syn_changes == 0) return 0; - if (rec == NULL || rec->lrh_id <= d->opd_syn_last_committed_id) + if (rec == NULL || + osp_sync_correct_id(d, rec) <= d->opd_syn_last_committed_id) return 1; return 0; } @@ -402,8 +422,7 @@ static int osp_sync_add_rec(const struct lu_env *env, struct osp_device *d, LASSERT(txn); txn->oti_current_id = osp_sync_id_get(d, txn->oti_current_id); - osi->osi_hdr.lrh_id = txn->oti_current_id; - + osi->osi_hdr.lrh_id = (txn->oti_current_id & 0xffffffffULL); ctxt = llog_get_context(d->opd_obd, LLOG_MDS_OST_ORIG_CTXT); if (ctxt == NULL) RETURN(-ENOMEM); @@ -915,13 +934,14 @@ static void osp_sync_process_record(const struct lu_env *env, * we should decrease changes and bump last_processed_id. */ if (d->opd_syn_prev_done) { + __u64 correct_id = osp_sync_correct_id(d, rec); LASSERT(d->opd_syn_changes > 0); - LASSERT(rec->lrh_id <= d->opd_syn_last_committed_id); + LASSERT(correct_id <= d->opd_syn_last_committed_id); /* NOTE: it's possible to meet same id if * OST stores few stripes of same file */ - if (rec->lrh_id > d->opd_syn_last_processed_id) { - d->opd_syn_last_processed_id = rec->lrh_id; + if (correct_id > d->opd_syn_last_processed_id) { + d->opd_syn_last_processed_id = correct_id; wake_up(&d->opd_syn_barrier_waitq); } d->opd_syn_changes--; @@ -1487,7 +1507,7 @@ static void osp_sync_tracker_commit_cb(struct thandle *th, void *cookie) spin_lock(&tr->otr_lock); if (likely(txn->oti_current_id > tr->otr_committed_id)) { - CDEBUG(D_OTHER, "committed: %u -> %u\n", + CDEBUG(D_OTHER, "committed: "LPU64" -> "LPU64"\n", tr->otr_committed_id, txn->oti_current_id); tr->otr_committed_id = txn->oti_current_id; @@ -1607,14 +1627,14 @@ static void osp_sync_id_traction_fini(struct osp_device *d) * Generates a new ID using the tracker associated with the given OSP device * \a d, if the given ID \a id is non-zero. Unconditially adds OSP device to * the wakeup list, so OSP won't miss when a transaction using the ID is - * committed. Notice ID is 32bit, but llog doesn't support >2^32 records anyway. + * committed. * * \param[in] d OSP device * \param[in] id 0 or ID generated previously * * \retval ID the caller should use */ -static __u32 osp_sync_id_get(struct osp_device *d, __u32 id) +static __u64 osp_sync_id_get(struct osp_device *d, __u64 id) { struct osp_id_tracker *tr; @@ -1623,9 +1643,12 @@ static __u32 osp_sync_id_get(struct osp_device *d, __u32 id) /* XXX: we can improve this introducing per-cpu preallocated ids? */ spin_lock(&tr->otr_lock); + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_TRACK_OVERFLOW)) + tr->otr_next_id = 0xfffffff0; + if (unlikely(tr->otr_next_id <= d->opd_syn_last_used_id)) { spin_unlock(&tr->otr_lock); - CERROR("%s: next %u, last synced %lu\n", + CERROR("%s: next "LPU64", last synced "LPU64"\n", d->opd_obd->obd_name, tr->otr_next_id, d->opd_syn_last_used_id); LBUG(); @@ -1638,7 +1661,7 @@ static __u32 osp_sync_id_get(struct osp_device *d, __u32 id) if (list_empty(&d->opd_syn_ontrack)) list_add(&d->opd_syn_ontrack, &tr->otr_wakeup_list); spin_unlock(&tr->otr_lock); - CDEBUG(D_OTHER, "new id %u\n", (unsigned) id); + CDEBUG(D_OTHER, "new id "LPU64"\n", id); return id; } diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 0df8804..31586b2 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -3598,6 +3598,23 @@ test_102d() { } run_test 102d "check replay & reconstruction with multiple mod RPCs in flight" +test_103() { + remote_mds_nodsh && skip "remote MDS with nodsh" && return +#define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162 + do_facet mds1 $LCTL set_param fail_loc=0x80000162 + + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/t- 30 || + error "create files on remote directory failed" + sync + rm -rf $DIR/$tdir/t-* + sync +#MDS should crash with tr->otr_next_id overflow + fail mds1 +} +run_test 103 "Check otr_next_id overflow" + + check_striped_dir_110() { $CHECKSTAT -t dir $DIR/$tdir/striped_dir || -- 1.8.3.1