Whamcloud - gitweb
LU-11020 osp: fix race during lov_objids update 67/32867/11
authorAlexey Lyashkov <c17817@cray.com>
Wed, 1 Aug 2018 15:52:28 +0000 (18:52 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 17 Nov 2018 01:25:48 +0000 (01:25 +0000)
First thread can be delayed due to reading from disk, so it
will completed after second thread and overwrite the on-disk
lov_objids data with an older OID for that OST.

If the transaction commits during this window and then the
MDS crashes, it is possible that the stale lov_objids results
in an OST object being deleted during MDS->OSS recovery that
should have been kept.

Use a single buffer shared between threads to store lov_objids
so that even if multiple threads are updating the lov_objids
file at once, the latest OID will be written to disk even if
the threads commit their transactions out of order.

Cray-bug-id: LUS-5841
Change-Id: I0984e5f55d569260c1219bf87c82423cc5b8589b
Signed-off-by: Alexey Lyashkov <c17817@cray.com>
Reviewed-on: https://review.whamcloud.com/32867
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/osp/osp_object.c
lustre/osp/osp_precreate.c

index bc5c7ed..cb475de 100644 (file)
@@ -258,25 +258,23 @@ static int osp_init_last_objid(const struct lu_env *env, struct osp_device *osp)
        if (IS_ERR(dto))
                RETURN(PTR_ERR(dto));
 
        if (IS_ERR(dto))
                RETURN(PTR_ERR(dto));
 
+       osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osp->opd_last_id,
+                          osp->opd_index);
+
        /* object will be released in device cleanup path */
        /* object will be released in device cleanup path */
-       if (osi->osi_attr.la_size >=
-           sizeof(osi->osi_id) * (osp->opd_index + 1)) {
-               osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osi->osi_id,
-                                  osp->opd_index);
+       if (osi->osi_attr.la_size >= (osi->osi_off + osi->osi_lb.lb_len)) {
                rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
                if (rc != 0 && rc != -EFAULT)
                        GOTO(out, rc);
                /* In case of idif bits 32-48 go to f_seq
                 * (see osp_init_last_seq). So don't care
                 * about u64->u32 convertion. */
                rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
                if (rc != 0 && rc != -EFAULT)
                        GOTO(out, rc);
                /* In case of idif bits 32-48 go to f_seq
                 * (see osp_init_last_seq). So don't care
                 * about u64->u32 convertion. */
-               fid->f_oid = osi->osi_id;
+               fid->f_oid = osp->opd_last_id;
        }
 
        if (rc == -EFAULT) { /* fresh LAST_ID */
        }
 
        if (rc == -EFAULT) { /* fresh LAST_ID */
-               osi->osi_id = 0;
+               osp->opd_last_id = 0;
                fid->f_oid = 0;
                fid->f_oid = 0;
-               osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osi->osi_id,
-                                  osp->opd_index);
                rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
                                          osi->osi_off);
                if (rc != 0)
                rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
                                          osi->osi_off);
                if (rc != 0)
@@ -320,22 +318,21 @@ static int osp_init_last_seq(const struct lu_env *env, struct osp_device *osp)
        if (IS_ERR(dto))
                RETURN(PTR_ERR(dto));
 
        if (IS_ERR(dto))
                RETURN(PTR_ERR(dto));
 
+       osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
+                          osp->opd_index);
+
        /* object will be released in device cleanup path */
        /* object will be released in device cleanup path */
-       if (osi->osi_attr.la_size >=
-           sizeof(osi->osi_id) * (osp->opd_index + 1)) {
-               osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
-                                  osp->opd_index);
+       if (osi->osi_attr.la_size >= (osi->osi_off + osi->osi_lb.lb_len)) {
                rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
                if (rc != 0 && rc != -EFAULT)
                        GOTO(out, rc);
                if (fid_is_idif(fid))
                rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
                if (rc != 0 && rc != -EFAULT)
                        GOTO(out, rc);
                if (fid_is_idif(fid))
-                       fid->f_seq = fid_idif_seq(osi->osi_id, osp->opd_index);
+                       fid->f_seq = fid_idif_seq(osp->opd_last_id,
+                                                 osp->opd_index);
        }
 
        if (rc == -EFAULT) { /* fresh OSP */
                fid->f_seq = 0;
        }
 
        if (rc == -EFAULT) { /* fresh OSP */
                fid->f_seq = 0;
-               osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
-                                   osp->opd_index);
                rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
                                          osi->osi_off);
                if (rc != 0)
                rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
                                          osi->osi_off);
                if (rc != 0)
@@ -412,6 +409,7 @@ static int osp_last_used_init(const struct lu_env *env, struct osp_device *osp)
                GOTO(out, rc = -EINVAL);
        }
 
                GOTO(out, rc = -EINVAL);
        }
 
+       osp_fid_to_obdid(&osp->opd_last_used_fid, &osp->opd_last_id);
        CDEBUG(D_INFO, "%s: Init last used fid "DFID"\n",
               osp->opd_obd->obd_name, PFID(&osp->opd_last_used_fid));
 out:
        CDEBUG(D_INFO, "%s: Init last used fid "DFID"\n",
               osp->opd_obd->obd_name, PFID(&osp->opd_last_used_fid));
 out:
index 09ddfac..64ef6e3 100644 (file)
@@ -169,6 +169,8 @@ struct osp_device {
         * and required le64_to_cpu() conversion before use.
         * Protected by opd_pre_lock */
        struct lu_fid                   opd_last_used_fid;
         * and required le64_to_cpu() conversion before use.
         * Protected by opd_pre_lock */
        struct lu_fid                   opd_last_used_fid;
+       /* on disk copy last_used_fid.f_oid or idif */
+       u64                             opd_last_id;
        struct lu_fid                   opd_gap_start_fid;
        int                              opd_gap_count;
        /* connection to OST */
        struct lu_fid                   opd_gap_start_fid;
        int                              opd_gap_count;
        /* connection to OST */
@@ -335,7 +337,6 @@ struct osp_thread_info {
        struct lu_attr           osi_attr;
        struct ost_id            osi_oi;
        struct ost_id            osi_oi2;
        struct lu_attr           osi_attr;
        struct ost_id            osi_oi;
        struct ost_id            osi_oi2;
-       u64                      osi_id;
        loff_t                   osi_off;
        union {
                struct llog_rec_hdr             osi_hdr;
        loff_t                   osi_off;
        union {
                struct llog_rec_hdr             osi_hdr;
@@ -606,17 +607,26 @@ static inline int osp_fid_diff(const struct lu_fid *fid1,
                       fid_idif_id(fid2->f_seq, fid2->f_oid, 0);
        }
 
                       fid_idif_id(fid2->f_seq, fid2->f_oid, 0);
        }
 
-       LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID
-                ", fid2:"DFID"\n", PFID(fid1), PFID(fid2));
+       LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID", fid2:"DFID"\n",
+                PFID(fid1), PFID(fid2));
 
        return fid_oid(fid1) - fid_oid(fid2);
 }
 
 
        return fid_oid(fid1) - fid_oid(fid2);
 }
 
+static inline void osp_fid_to_obdid(struct lu_fid *last_fid, u64 *osi_id)
+{
+       if (fid_is_idif((last_fid)))
+               *osi_id = fid_idif_id(fid_seq(last_fid), fid_oid(last_fid),
+                                     fid_ver(last_fid));
+       else
+               *osi_id = fid_oid(last_fid);
+}
 
 static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid)
 {
        int diff = osp_fid_diff(fid, &d->opd_last_used_fid);
        struct lu_fid *gap_start = &d->opd_gap_start_fid;
 
 static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid)
 {
        int diff = osp_fid_diff(fid, &d->opd_last_used_fid);
        struct lu_fid *gap_start = &d->opd_gap_start_fid;
+
        /*
         * we might have lost precreated objects due to VBR and precreate
         * orphans, the gap in objid can be calculated properly only here
        /*
         * we might have lost precreated objects due to VBR and precreate
         * orphans, the gap in objid can be calculated properly only here
@@ -636,6 +646,7 @@ static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid)
                               PFID(&d->opd_gap_start_fid), d->opd_gap_count);
                }
                d->opd_last_used_fid = *fid;
                               PFID(&d->opd_gap_start_fid), d->opd_gap_count);
                }
                d->opd_last_used_fid = *fid;
+               osp_fid_to_obdid(fid, &d->opd_last_id);
        }
 }
 
        }
 }
 
index e327cbb..336eb54 100644 (file)
@@ -1434,10 +1434,8 @@ static int osp_declare_create(const struct lu_env *env, struct dt_object *dt,
 
        if (unlikely(!fid_is_zero(fid))) {
                /* replay case: caller knows fid */
 
        if (unlikely(!fid_is_zero(fid))) {
                /* replay case: caller knows fid */
-               osi->osi_off = sizeof(osi->osi_id) * d->opd_index;
-               osi->osi_lb.lb_len = sizeof(osi->osi_id);
-               osi->osi_lb.lb_buf = NULL;
-
+               osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, NULL,
+                                  d->opd_index);
                rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
                                             &osi->osi_lb, osi->osi_off,
                                             local_th);
                rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
                                             &osi->osi_lb, osi->osi_off,
                                             local_th);
@@ -1461,9 +1459,8 @@ static int osp_declare_create(const struct lu_env *env, struct dt_object *dt,
                o->opo_reserved = 1;
 
                /* common for all OSPs file hystorically */
                o->opo_reserved = 1;
 
                /* common for all OSPs file hystorically */
-               osi->osi_off = sizeof(osi->osi_id) * d->opd_index;
-               osi->osi_lb.lb_len = sizeof(osi->osi_id);
-               osi->osi_lb.lb_buf = NULL;
+               osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, NULL,
+                                  d->opd_index);
                rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
                                             &osi->osi_lb, osi->osi_off,
                                             local_th);
                rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
                                             &osi->osi_lb, osi->osi_off,
                                             local_th);
@@ -1506,7 +1503,6 @@ static int osp_create(const struct lu_env *env, struct dt_object *dt,
        int                     rc = 0;
        struct lu_fid           *fid = &osi->osi_fid;
        struct thandle          *local_th;
        int                     rc = 0;
        struct lu_fid           *fid = &osi->osi_fid;
        struct thandle          *local_th;
-       struct lu_fid           *last_fid = &d->opd_last_used_fid;
        ENTRY;
 
        if (is_only_remote_trans(th) &&
        ENTRY;
 
        if (is_only_remote_trans(th) &&
@@ -1586,13 +1582,8 @@ static int osp_create(const struct lu_env *env, struct dt_object *dt,
 
        /* Only need update last_used oid file, seq file will only be update
         * during seq rollover */
 
        /* Only need update last_used oid file, seq file will only be update
         * during seq rollover */
-       if (fid_is_idif((last_fid)))
-               osi->osi_id = fid_idif_id(fid_seq(last_fid),
-                                         fid_oid(last_fid), fid_ver(last_fid));
-       else
-               osi->osi_id = fid_oid(last_fid);
        osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off,
        osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off,
-                          &osi->osi_id, d->opd_index);
+                          &d->opd_last_id, d->opd_index);
 
        rc = dt_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb,
                             &osi->osi_off, local_th);
 
        rc = dt_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb,
                             &osi->osi_off, local_th);
index 937e9ac..e9ea3b9 100644 (file)
@@ -385,6 +385,7 @@ int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
        struct lu_buf      *lb_oid = &oti->osi_lb;
        struct lu_buf      *lb_oseq = &oti->osi_lb2;
        loff_t             oid_off;
        struct lu_buf      *lb_oid = &oti->osi_lb;
        struct lu_buf      *lb_oseq = &oti->osi_lb2;
        loff_t             oid_off;
+       u64                oid;
        loff_t             oseq_off;
        struct thandle    *th;
        int                   rc;
        loff_t             oseq_off;
        struct thandle    *th;
        int                   rc;
@@ -395,13 +396,12 @@ int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
 
        /* Note: through f_oid is only 32 bits, it will also write 64 bits
         * for oid to keep compatibility with the previous version. */
 
        /* Note: through f_oid is only 32 bits, it will also write 64 bits
         * for oid to keep compatibility with the previous version. */
-       lb_oid->lb_buf = &fid->f_oid;
-       lb_oid->lb_len = sizeof(u64);
-       oid_off = sizeof(u64) * osp->opd_index;
+       oid = fid->f_oid;
+       osp_objid_buf_prep(lb_oid, &oid_off,
+                          &oid, osp->opd_index);
 
 
-       lb_oseq->lb_buf = &fid->f_seq;
-       lb_oseq->lb_len = sizeof(u64);
-       oseq_off = sizeof(u64) * osp->opd_index;
+       osp_objseq_buf_prep(lb_oseq, &oseq_off,
+                           &fid->f_seq, osp->opd_index);
 
        th = dt_trans_create(env, osp->opd_storage);
        if (IS_ERR(th))
 
        th = dt_trans_create(env, osp->opd_storage);
        if (IS_ERR(th))
@@ -494,6 +494,7 @@ static int osp_precreate_rollover_new_seq(struct lu_env *env,
        /* Update last_xxx to the new seq */
        spin_lock(&osp->opd_pre_lock);
        osp->opd_last_used_fid = *fid;
        /* Update last_xxx to the new seq */
        spin_lock(&osp->opd_pre_lock);
        osp->opd_last_used_fid = *fid;
+       osp_fid_to_obdid(fid, &osp->opd_last_id);
        osp->opd_gap_start_fid = *fid;
        osp->opd_pre_used_fid = *fid;
        osp->opd_pre_last_created_fid = *fid;
        osp->opd_gap_start_fid = *fid;
        osp->opd_pre_used_fid = *fid;
        osp->opd_pre_last_created_fid = *fid;
@@ -1703,6 +1704,7 @@ int osp_init_precreate(struct osp_device *d)
        d->opd_pre_used_fid.f_oid = 1;
        fid_zero(&d->opd_pre_last_created_fid);
        d->opd_pre_last_created_fid.f_oid = 1;
        d->opd_pre_used_fid.f_oid = 1;
        fid_zero(&d->opd_pre_last_created_fid);
        d->opd_pre_last_created_fid.f_oid = 1;
+       d->opd_last_id = 0;
        d->opd_pre_reserved = 0;
        d->opd_got_disconnected = 1;
        d->opd_pre_create_slow = 0;
        d->opd_pre_reserved = 0;
        d->opd_got_disconnected = 1;
        d->opd_pre_create_slow = 0;