From: Di Wang Date: Wed, 14 Dec 2016 22:13:30 +0000 (-0500) Subject: LU-8753 osp: add rpc generation X-Git-Tag: 2.9.52~31 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=0844905a308d614c86b56df70c8f03e5d59ee286 LU-8753 osp: add rpc generation Add rpc generation to make sure current update request will not be sent until the remote llog object got refresh. Signed-off-by: Di Wang Change-Id: Iae678686b522d545b69510444805a1e411acfcfe Reviewed-on: https://review.whamcloud.com/24364 Reviewed-by: Fan Yong Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index cbd3b92b..588d0c3 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -1859,9 +1859,11 @@ struct thandle { unsigned int th_sync:1, /* local transation, no need to inform other layers */ th_local:1, - /* Whether we need wait the transaction to be submitted */ + /* Whether we need wait the transaction to be submitted + * (send to remote target) */ th_wait_submit:1, - /* complex transaction which will track updates on all targets */ + /* complex transaction which will track updates on all targets, + * including OSTs */ th_complex:1; }; diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 80e4f12..7251144 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -538,9 +538,13 @@ repeat: } if (rec->lrh_len == 0 || rec->lrh_len > chunk_size) { - CWARN("invalid length %d in llog record for " - "index %d/%d\n", rec->lrh_len, - rec->lrh_index, index); + CWARN("%s: invalid length %d in llog "DOSTID + "record for index %d/%d\n", + loghandle->lgh_ctxt->loc_obd->obd_name, + rec->lrh_len, + POSTID(&loghandle->lgh_id.lgl_oi), + rec->lrh_index, index); + GOTO(out, rc = -EINVAL); } @@ -551,9 +555,10 @@ repeat: } if (rec->lrh_index != index) { - CERROR("%s: Invalid record: index %u but " - "expected %u\n", + CERROR("%s: "DOSTID" Invalid record: index %u" + " but expected %u\n", loghandle->lgh_ctxt->loc_obd->obd_name, + POSTID(&loghandle->lgh_id.lgl_oi), rec->lrh_index, index); GOTO(out, rc = -ERANGE); } diff --git a/lustre/osp/osp_dev.c b/lustre/osp/osp_dev.c index 7b5d36f..138bb43 100644 --- a/lustre/osp/osp_dev.c +++ b/lustre/osp/osp_dev.c @@ -510,6 +510,7 @@ static int osp_update_init(struct osp_device *osp) INIT_LIST_HEAD(&osp->opd_update->ou_list); osp->opd_update->ou_rpc_version = 1; osp->opd_update->ou_version = 1; + osp->opd_update->ou_generation = 0; /* start thread handling sending updates to the remote MDT */ task = kthread_run(osp_send_update_thread, osp, diff --git a/lustre/osp/osp_internal.h b/lustre/osp/osp_internal.h index f61b2f5..481932d 100644 --- a/lustre/osp/osp_internal.h +++ b/lustre/osp/osp_internal.h @@ -115,6 +115,7 @@ struct osp_update_request { struct osp_thandle *our_th; __u64 our_version; + __u64 our_generation; /* protect our_list and flag */ spinlock_t our_list_lock; /* linked to the list(ou_list) in osp_updates */ @@ -128,9 +129,22 @@ struct osp_updates { struct list_head ou_list; spinlock_t ou_lock; wait_queue_head_t ou_waitq; - /* wait for next updates */ + + /* The next rpc version which supposed to be sent in + * osp_send_update_thread().*/ __u64 ou_rpc_version; + + /* The rpc version assigned to the osp thandle during (osp_md_write()), + * which will be sent by this order. Note: the osp_thandle has be sent + * by this order to make sure the remote update log will follow the + * llog format rule. XXX: these probably should be removed once we + * invent new llog format */ __u64 ou_version; + + /* The generation of current osp update RPC, which is used to make sure + * those stale RPC(with older generation) will not be sent, otherwise it + * will cause update lllog corruption */ + __u64 ou_generation; }; struct osp_device { diff --git a/lustre/osp/osp_trans.c b/lustre/osp/osp_trans.c index 545fefd..ebecd8a 100644 --- a/lustre/osp/osp_trans.c +++ b/lustre/osp/osp_trans.c @@ -1258,7 +1258,7 @@ struct thandle *osp_get_storage_thandle(const struct lu_env *env, * * Set the version for the transaction and add the request to * the sending list, then after transaction stop, the request - * will be picked in the order of version, by sending thread. + * will be sent in the order of version by the sending thread. * * \param [in] oth osp thandle to be set version. * @@ -1288,6 +1288,7 @@ int osp_check_and_set_rpc_version(struct osp_thandle *oth, /* Assign the version and add it to the sending list */ osp_thandle_get(oth); oth->ot_our->our_version = ou->ou_version++; + oth->ot_our->our_generation = ou->ou_generation; list_add_tail(&oth->ot_our->our_list, &osp->opd_update->ou_list); oth->ot_our->our_req_ready = 0; @@ -1295,8 +1296,8 @@ int osp_check_and_set_rpc_version(struct osp_thandle *oth, spin_unlock(&ou->ou_lock); LASSERT(oth->ot_super.th_wait_submit == 1); - CDEBUG(D_INFO, "%s: version %llu oth:version %p:%llu\n", - osp->opd_obd->obd_name, ou->ou_version, oth, + CDEBUG(D_INFO, "%s: version %llu gen %llu oth:version %p:%llu\n", + osp->opd_obd->obd_name, ou->ou_version, ou->ou_generation, oth, oth->ot_our->our_version); return 0; @@ -1372,6 +1373,11 @@ void osp_invalidate_request(struct osp_device *osp) if (rc < 0) { CERROR("%s: init env error: rc = %d\n", osp->opd_obd->obd_name, rc); + + spin_lock(&ou->ou_lock); + ou->ou_generation++; + spin_unlock(&ou->ou_lock); + return; } @@ -1397,6 +1403,9 @@ void osp_invalidate_request(struct osp_device *osp) our); } + /* Increase the generation, then the update request with old generation + * will fail with -EIO. */ + ou->ou_generation++; spin_unlock(&ou->ou_lock); /* invalidate all of request in the sending list */ @@ -1464,7 +1473,8 @@ int osp_send_update_thread(void *arg) osp_trans_callback(&env, our->our_th, our->our_th->ot_super.th_result); rc = our->our_th->ot_super.th_result; - } else if (OBD_FAIL_CHECK(OBD_FAIL_INVALIDATE_UPDATE)) { + } else if (ou->ou_generation != our->our_generation || + OBD_FAIL_CHECK(OBD_FAIL_INVALIDATE_UPDATE)) { rc = -EIO; osp_trans_callback(&env, our->our_th, rc); } else {