From: Alexey Lyashkov Date: Wed, 1 Aug 2018 15:52:28 +0000 (+0300) Subject: LU-11020 osp: fix race during lov_objids update X-Git-Tag: 2.12.0-RC1~39 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=8cd4760536d7f423db87c67bdc8214f13ede3ca8 LU-11020 osp: fix race during lov_objids update First thread can be delayed due to reading from disk, so it will completed after second thread and overwrite the on-disk lov_objids data with an older OID for that OST. If the transaction commits during this window and then the MDS crashes, it is possible that the stale lov_objids results in an OST object being deleted during MDS->OSS recovery that should have been kept. Use a single buffer shared between threads to store lov_objids so that even if multiple threads are updating the lov_objids file at once, the latest OID will be written to disk even if the threads commit their transactions out of order. Cray-bug-id: LUS-5841 Change-Id: I0984e5f55d569260c1219bf87c82423cc5b8589b Signed-off-by: Alexey Lyashkov Reviewed-on: https://review.whamcloud.com/32867 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/osp/osp_dev.c b/lustre/osp/osp_dev.c index bc5c7ed..cb475de 100644 --- a/lustre/osp/osp_dev.c +++ b/lustre/osp/osp_dev.c @@ -258,25 +258,23 @@ static int osp_init_last_objid(const struct lu_env *env, struct osp_device *osp) if (IS_ERR(dto)) RETURN(PTR_ERR(dto)); + osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osp->opd_last_id, + osp->opd_index); + /* object will be released in device cleanup path */ - if (osi->osi_attr.la_size >= - sizeof(osi->osi_id) * (osp->opd_index + 1)) { - osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osi->osi_id, - osp->opd_index); + if (osi->osi_attr.la_size >= (osi->osi_off + osi->osi_lb.lb_len)) { rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off); if (rc != 0 && rc != -EFAULT) GOTO(out, rc); /* In case of idif bits 32-48 go to f_seq * (see osp_init_last_seq). So don't care * about u64->u32 convertion. */ - fid->f_oid = osi->osi_id; + fid->f_oid = osp->opd_last_id; } if (rc == -EFAULT) { /* fresh LAST_ID */ - osi->osi_id = 0; + osp->opd_last_id = 0; fid->f_oid = 0; - osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osi->osi_id, - osp->opd_index); rc = osp_write_local_file(env, osp, dto, &osi->osi_lb, osi->osi_off); if (rc != 0) @@ -320,22 +318,21 @@ static int osp_init_last_seq(const struct lu_env *env, struct osp_device *osp) if (IS_ERR(dto)) RETURN(PTR_ERR(dto)); + osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq, + osp->opd_index); + /* object will be released in device cleanup path */ - if (osi->osi_attr.la_size >= - sizeof(osi->osi_id) * (osp->opd_index + 1)) { - osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq, - osp->opd_index); + if (osi->osi_attr.la_size >= (osi->osi_off + osi->osi_lb.lb_len)) { rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off); if (rc != 0 && rc != -EFAULT) GOTO(out, rc); if (fid_is_idif(fid)) - fid->f_seq = fid_idif_seq(osi->osi_id, osp->opd_index); + fid->f_seq = fid_idif_seq(osp->opd_last_id, + osp->opd_index); } if (rc == -EFAULT) { /* fresh OSP */ fid->f_seq = 0; - osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq, - osp->opd_index); rc = osp_write_local_file(env, osp, dto, &osi->osi_lb, osi->osi_off); if (rc != 0) @@ -412,6 +409,7 @@ static int osp_last_used_init(const struct lu_env *env, struct osp_device *osp) GOTO(out, rc = -EINVAL); } + osp_fid_to_obdid(&osp->opd_last_used_fid, &osp->opd_last_id); CDEBUG(D_INFO, "%s: Init last used fid "DFID"\n", osp->opd_obd->obd_name, PFID(&osp->opd_last_used_fid)); out: diff --git a/lustre/osp/osp_internal.h b/lustre/osp/osp_internal.h index 09ddfac..64ef6e3 100644 --- a/lustre/osp/osp_internal.h +++ b/lustre/osp/osp_internal.h @@ -169,6 +169,8 @@ struct osp_device { * and required le64_to_cpu() conversion before use. * Protected by opd_pre_lock */ struct lu_fid opd_last_used_fid; + /* on disk copy last_used_fid.f_oid or idif */ + u64 opd_last_id; struct lu_fid opd_gap_start_fid; int opd_gap_count; /* connection to OST */ @@ -335,7 +337,6 @@ struct osp_thread_info { struct lu_attr osi_attr; struct ost_id osi_oi; struct ost_id osi_oi2; - u64 osi_id; loff_t osi_off; union { struct llog_rec_hdr osi_hdr; @@ -606,17 +607,26 @@ static inline int osp_fid_diff(const struct lu_fid *fid1, fid_idif_id(fid2->f_seq, fid2->f_oid, 0); } - LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID - ", fid2:"DFID"\n", PFID(fid1), PFID(fid2)); + LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID", fid2:"DFID"\n", + PFID(fid1), PFID(fid2)); return fid_oid(fid1) - fid_oid(fid2); } +static inline void osp_fid_to_obdid(struct lu_fid *last_fid, u64 *osi_id) +{ + if (fid_is_idif((last_fid))) + *osi_id = fid_idif_id(fid_seq(last_fid), fid_oid(last_fid), + fid_ver(last_fid)); + else + *osi_id = fid_oid(last_fid); +} static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid) { int diff = osp_fid_diff(fid, &d->opd_last_used_fid); struct lu_fid *gap_start = &d->opd_gap_start_fid; + /* * we might have lost precreated objects due to VBR and precreate * orphans, the gap in objid can be calculated properly only here @@ -636,6 +646,7 @@ static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid) PFID(&d->opd_gap_start_fid), d->opd_gap_count); } d->opd_last_used_fid = *fid; + osp_fid_to_obdid(fid, &d->opd_last_id); } } diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c index e327cbb..336eb54 100644 --- a/lustre/osp/osp_object.c +++ b/lustre/osp/osp_object.c @@ -1434,10 +1434,8 @@ static int osp_declare_create(const struct lu_env *env, struct dt_object *dt, if (unlikely(!fid_is_zero(fid))) { /* replay case: caller knows fid */ - osi->osi_off = sizeof(osi->osi_id) * d->opd_index; - osi->osi_lb.lb_len = sizeof(osi->osi_id); - osi->osi_lb.lb_buf = NULL; - + osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, NULL, + d->opd_index); rc = dt_declare_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb, osi->osi_off, local_th); @@ -1461,9 +1459,8 @@ static int osp_declare_create(const struct lu_env *env, struct dt_object *dt, o->opo_reserved = 1; /* common for all OSPs file hystorically */ - osi->osi_off = sizeof(osi->osi_id) * d->opd_index; - osi->osi_lb.lb_len = sizeof(osi->osi_id); - osi->osi_lb.lb_buf = NULL; + osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, NULL, + d->opd_index); rc = dt_declare_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb, osi->osi_off, local_th); @@ -1506,7 +1503,6 @@ static int osp_create(const struct lu_env *env, struct dt_object *dt, int rc = 0; struct lu_fid *fid = &osi->osi_fid; struct thandle *local_th; - struct lu_fid *last_fid = &d->opd_last_used_fid; ENTRY; if (is_only_remote_trans(th) && @@ -1586,13 +1582,8 @@ static int osp_create(const struct lu_env *env, struct dt_object *dt, /* Only need update last_used oid file, seq file will only be update * during seq rollover */ - if (fid_is_idif((last_fid))) - osi->osi_id = fid_idif_id(fid_seq(last_fid), - fid_oid(last_fid), fid_ver(last_fid)); - else - osi->osi_id = fid_oid(last_fid); osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, - &osi->osi_id, d->opd_index); + &d->opd_last_id, d->opd_index); rc = dt_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb, &osi->osi_off, local_th); diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index 937e9ac..e9ea3b9 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -385,6 +385,7 @@ int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp, struct lu_buf *lb_oid = &oti->osi_lb; struct lu_buf *lb_oseq = &oti->osi_lb2; loff_t oid_off; + u64 oid; loff_t oseq_off; struct thandle *th; int rc; @@ -395,13 +396,12 @@ int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp, /* Note: through f_oid is only 32 bits, it will also write 64 bits * for oid to keep compatibility with the previous version. */ - lb_oid->lb_buf = &fid->f_oid; - lb_oid->lb_len = sizeof(u64); - oid_off = sizeof(u64) * osp->opd_index; + oid = fid->f_oid; + osp_objid_buf_prep(lb_oid, &oid_off, + &oid, osp->opd_index); - lb_oseq->lb_buf = &fid->f_seq; - lb_oseq->lb_len = sizeof(u64); - oseq_off = sizeof(u64) * osp->opd_index; + osp_objseq_buf_prep(lb_oseq, &oseq_off, + &fid->f_seq, osp->opd_index); th = dt_trans_create(env, osp->opd_storage); if (IS_ERR(th)) @@ -494,6 +494,7 @@ static int osp_precreate_rollover_new_seq(struct lu_env *env, /* Update last_xxx to the new seq */ spin_lock(&osp->opd_pre_lock); osp->opd_last_used_fid = *fid; + osp_fid_to_obdid(fid, &osp->opd_last_id); osp->opd_gap_start_fid = *fid; osp->opd_pre_used_fid = *fid; osp->opd_pre_last_created_fid = *fid; @@ -1703,6 +1704,7 @@ int osp_init_precreate(struct osp_device *d) d->opd_pre_used_fid.f_oid = 1; fid_zero(&d->opd_pre_last_created_fid); d->opd_pre_last_created_fid.f_oid = 1; + d->opd_last_id = 0; d->opd_pre_reserved = 0; d->opd_got_disconnected = 1; d->opd_pre_create_slow = 0;