First thread can be delayed due to reading from disk, so it
will completed after second thread and overwrite the on-disk
lov_objids data with an older OID for that OST.
If the transaction commits during this window and then the
MDS crashes, it is possible that the stale lov_objids results
in an OST object being deleted during MDS->OSS recovery that
should have been kept.
Use a single buffer shared between threads to store lov_objids
so that even if multiple threads are updating the lov_objids
file at once, the latest OID will be written to disk even if
the threads commit their transactions out of order.
Cray-bug-id: LUS-5841
Change-Id: I0984e5f55d569260c1219bf87c82423cc5b8589b
Signed-off-by: Alexey Lyashkov <c17817@cray.com>
Reviewed-on: https://review.whamcloud.com/32867
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
if (IS_ERR(dto))
RETURN(PTR_ERR(dto));
if (IS_ERR(dto))
RETURN(PTR_ERR(dto));
+ osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osp->opd_last_id,
+ osp->opd_index);
+
/* object will be released in device cleanup path */
/* object will be released in device cleanup path */
- if (osi->osi_attr.la_size >=
- sizeof(osi->osi_id) * (osp->opd_index + 1)) {
- osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osi->osi_id,
- osp->opd_index);
+ if (osi->osi_attr.la_size >= (osi->osi_off + osi->osi_lb.lb_len)) {
rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
if (rc != 0 && rc != -EFAULT)
GOTO(out, rc);
/* In case of idif bits 32-48 go to f_seq
* (see osp_init_last_seq). So don't care
* about u64->u32 convertion. */
rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
if (rc != 0 && rc != -EFAULT)
GOTO(out, rc);
/* In case of idif bits 32-48 go to f_seq
* (see osp_init_last_seq). So don't care
* about u64->u32 convertion. */
- fid->f_oid = osi->osi_id;
+ fid->f_oid = osp->opd_last_id;
}
if (rc == -EFAULT) { /* fresh LAST_ID */
}
if (rc == -EFAULT) { /* fresh LAST_ID */
- osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &osi->osi_id,
- osp->opd_index);
rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
osi->osi_off);
if (rc != 0)
rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
osi->osi_off);
if (rc != 0)
if (IS_ERR(dto))
RETURN(PTR_ERR(dto));
if (IS_ERR(dto))
RETURN(PTR_ERR(dto));
+ osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
+ osp->opd_index);
+
/* object will be released in device cleanup path */
/* object will be released in device cleanup path */
- if (osi->osi_attr.la_size >=
- sizeof(osi->osi_id) * (osp->opd_index + 1)) {
- osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
- osp->opd_index);
+ if (osi->osi_attr.la_size >= (osi->osi_off + osi->osi_lb.lb_len)) {
rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
if (rc != 0 && rc != -EFAULT)
GOTO(out, rc);
if (fid_is_idif(fid))
rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
if (rc != 0 && rc != -EFAULT)
GOTO(out, rc);
if (fid_is_idif(fid))
- fid->f_seq = fid_idif_seq(osi->osi_id, osp->opd_index);
+ fid->f_seq = fid_idif_seq(osp->opd_last_id,
+ osp->opd_index);
}
if (rc == -EFAULT) { /* fresh OSP */
fid->f_seq = 0;
}
if (rc == -EFAULT) { /* fresh OSP */
fid->f_seq = 0;
- osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
- osp->opd_index);
rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
osi->osi_off);
if (rc != 0)
rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
osi->osi_off);
if (rc != 0)
GOTO(out, rc = -EINVAL);
}
GOTO(out, rc = -EINVAL);
}
+ osp_fid_to_obdid(&osp->opd_last_used_fid, &osp->opd_last_id);
CDEBUG(D_INFO, "%s: Init last used fid "DFID"\n",
osp->opd_obd->obd_name, PFID(&osp->opd_last_used_fid));
out:
CDEBUG(D_INFO, "%s: Init last used fid "DFID"\n",
osp->opd_obd->obd_name, PFID(&osp->opd_last_used_fid));
out:
* and required le64_to_cpu() conversion before use.
* Protected by opd_pre_lock */
struct lu_fid opd_last_used_fid;
* and required le64_to_cpu() conversion before use.
* Protected by opd_pre_lock */
struct lu_fid opd_last_used_fid;
+ /* on disk copy last_used_fid.f_oid or idif */
+ u64 opd_last_id;
struct lu_fid opd_gap_start_fid;
int opd_gap_count;
/* connection to OST */
struct lu_fid opd_gap_start_fid;
int opd_gap_count;
/* connection to OST */
struct lu_attr osi_attr;
struct ost_id osi_oi;
struct ost_id osi_oi2;
struct lu_attr osi_attr;
struct ost_id osi_oi;
struct ost_id osi_oi2;
loff_t osi_off;
union {
struct llog_rec_hdr osi_hdr;
loff_t osi_off;
union {
struct llog_rec_hdr osi_hdr;
fid_idif_id(fid2->f_seq, fid2->f_oid, 0);
}
fid_idif_id(fid2->f_seq, fid2->f_oid, 0);
}
- LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID
- ", fid2:"DFID"\n", PFID(fid1), PFID(fid2));
+ LASSERTF(fid_seq(fid1) == fid_seq(fid2), "fid1:"DFID", fid2:"DFID"\n",
+ PFID(fid1), PFID(fid2));
return fid_oid(fid1) - fid_oid(fid2);
}
return fid_oid(fid1) - fid_oid(fid2);
}
+static inline void osp_fid_to_obdid(struct lu_fid *last_fid, u64 *osi_id)
+{
+ if (fid_is_idif((last_fid)))
+ *osi_id = fid_idif_id(fid_seq(last_fid), fid_oid(last_fid),
+ fid_ver(last_fid));
+ else
+ *osi_id = fid_oid(last_fid);
+}
static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid)
{
int diff = osp_fid_diff(fid, &d->opd_last_used_fid);
struct lu_fid *gap_start = &d->opd_gap_start_fid;
static inline void osp_update_last_fid(struct osp_device *d, struct lu_fid *fid)
{
int diff = osp_fid_diff(fid, &d->opd_last_used_fid);
struct lu_fid *gap_start = &d->opd_gap_start_fid;
/*
* we might have lost precreated objects due to VBR and precreate
* orphans, the gap in objid can be calculated properly only here
/*
* we might have lost precreated objects due to VBR and precreate
* orphans, the gap in objid can be calculated properly only here
PFID(&d->opd_gap_start_fid), d->opd_gap_count);
}
d->opd_last_used_fid = *fid;
PFID(&d->opd_gap_start_fid), d->opd_gap_count);
}
d->opd_last_used_fid = *fid;
+ osp_fid_to_obdid(fid, &d->opd_last_id);
if (unlikely(!fid_is_zero(fid))) {
/* replay case: caller knows fid */
if (unlikely(!fid_is_zero(fid))) {
/* replay case: caller knows fid */
- osi->osi_off = sizeof(osi->osi_id) * d->opd_index;
- osi->osi_lb.lb_len = sizeof(osi->osi_id);
- osi->osi_lb.lb_buf = NULL;
-
+ osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, NULL,
+ d->opd_index);
rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
&osi->osi_lb, osi->osi_off,
local_th);
rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
&osi->osi_lb, osi->osi_off,
local_th);
o->opo_reserved = 1;
/* common for all OSPs file hystorically */
o->opo_reserved = 1;
/* common for all OSPs file hystorically */
- osi->osi_off = sizeof(osi->osi_id) * d->opd_index;
- osi->osi_lb.lb_len = sizeof(osi->osi_id);
- osi->osi_lb.lb_buf = NULL;
+ osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, NULL,
+ d->opd_index);
rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
&osi->osi_lb, osi->osi_off,
local_th);
rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
&osi->osi_lb, osi->osi_off,
local_th);
int rc = 0;
struct lu_fid *fid = &osi->osi_fid;
struct thandle *local_th;
int rc = 0;
struct lu_fid *fid = &osi->osi_fid;
struct thandle *local_th;
- struct lu_fid *last_fid = &d->opd_last_used_fid;
ENTRY;
if (is_only_remote_trans(th) &&
ENTRY;
if (is_only_remote_trans(th) &&
/* Only need update last_used oid file, seq file will only be update
* during seq rollover */
/* Only need update last_used oid file, seq file will only be update
* during seq rollover */
- if (fid_is_idif((last_fid)))
- osi->osi_id = fid_idif_id(fid_seq(last_fid),
- fid_oid(last_fid), fid_ver(last_fid));
- else
- osi->osi_id = fid_oid(last_fid);
osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off,
osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off,
- &osi->osi_id, d->opd_index);
+ &d->opd_last_id, d->opd_index);
rc = dt_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb,
&osi->osi_off, local_th);
rc = dt_record_write(env, d->opd_last_used_oid_file, &osi->osi_lb,
&osi->osi_off, local_th);
struct lu_buf *lb_oid = &oti->osi_lb;
struct lu_buf *lb_oseq = &oti->osi_lb2;
loff_t oid_off;
struct lu_buf *lb_oid = &oti->osi_lb;
struct lu_buf *lb_oseq = &oti->osi_lb2;
loff_t oid_off;
loff_t oseq_off;
struct thandle *th;
int rc;
loff_t oseq_off;
struct thandle *th;
int rc;
/* Note: through f_oid is only 32 bits, it will also write 64 bits
* for oid to keep compatibility with the previous version. */
/* Note: through f_oid is only 32 bits, it will also write 64 bits
* for oid to keep compatibility with the previous version. */
- lb_oid->lb_buf = &fid->f_oid;
- lb_oid->lb_len = sizeof(u64);
- oid_off = sizeof(u64) * osp->opd_index;
+ oid = fid->f_oid;
+ osp_objid_buf_prep(lb_oid, &oid_off,
+ &oid, osp->opd_index);
- lb_oseq->lb_buf = &fid->f_seq;
- lb_oseq->lb_len = sizeof(u64);
- oseq_off = sizeof(u64) * osp->opd_index;
+ osp_objseq_buf_prep(lb_oseq, &oseq_off,
+ &fid->f_seq, osp->opd_index);
th = dt_trans_create(env, osp->opd_storage);
if (IS_ERR(th))
th = dt_trans_create(env, osp->opd_storage);
if (IS_ERR(th))
/* Update last_xxx to the new seq */
spin_lock(&osp->opd_pre_lock);
osp->opd_last_used_fid = *fid;
/* Update last_xxx to the new seq */
spin_lock(&osp->opd_pre_lock);
osp->opd_last_used_fid = *fid;
+ osp_fid_to_obdid(fid, &osp->opd_last_id);
osp->opd_gap_start_fid = *fid;
osp->opd_pre_used_fid = *fid;
osp->opd_pre_last_created_fid = *fid;
osp->opd_gap_start_fid = *fid;
osp->opd_pre_used_fid = *fid;
osp->opd_pre_last_created_fid = *fid;
d->opd_pre_used_fid.f_oid = 1;
fid_zero(&d->opd_pre_last_created_fid);
d->opd_pre_last_created_fid.f_oid = 1;
d->opd_pre_used_fid.f_oid = 1;
fid_zero(&d->opd_pre_last_created_fid);
d->opd_pre_last_created_fid.f_oid = 1;
d->opd_pre_reserved = 0;
d->opd_got_disconnected = 1;
d->opd_pre_create_slow = 0;
d->opd_pre_reserved = 0;
d->opd_got_disconnected = 1;
d->opd_pre_create_slow = 0;