if (idx == LLOG_HEADER_IDX) {
/* llog header update */
- LASSERT(reclen >= sizeof(struct llog_log_hdr));
- LASSERT(rec == &llh->llh_hdr);
+ __u32 *bitmap = LLOG_HDR_BITMAP(llh);
lgi->lgi_off = 0;
- lgi->lgi_buf.lb_len = reclen;
- lgi->lgi_buf.lb_buf = rec;
+
+ /* If it does not indicate the bitmap index
+ * (reccookie == NULL), then it means update
+ * the whole update header. Otherwise only
+ * update header and bits needs to be updated,
+ * and in DNE cases, it will signaficantly
+ * shrink the RPC size.
+ * see distribute_txn_cancel_records()*/
+ if (reccookie == NULL) {
+ lgi->lgi_buf.lb_len = reclen;
+ lgi->lgi_buf.lb_buf = rec;
+ rc = dt_record_write(env, o, &lgi->lgi_buf,
+ &lgi->lgi_off, th);
+ RETURN(rc);
+ }
+
+ /* update the header */
+ lgi->lgi_buf.lb_len = llh->llh_bitmap_offset;
+ lgi->lgi_buf.lb_buf = llh;
+ rc = dt_record_write(env, o, &lgi->lgi_buf,
+ &lgi->lgi_off, th);
+ if (rc != 0)
+ RETURN(rc);
+
+ /* update the bitmap */
+ index = reccookie->lgc_index;
+ lgi->lgi_off = llh->llh_bitmap_offset +
+ (index / (sizeof(*bitmap) * 8)) *
+ sizeof(*bitmap);
+ lgi->lgi_buf.lb_len = sizeof(*bitmap);
+ lgi->lgi_buf.lb_buf =
+ &bitmap[index/(sizeof(*bitmap)*8)];
rc = dt_record_write(env, o, &lgi->lgi_buf,
&lgi->lgi_off, th);
+
RETURN(rc);
} else if (loghandle->lgh_cur_idx > 0) {
/**
"len:%u offset %llu\n",
POSTID(&loghandle->lgh_id.lgl_oi), idx,
rec->lrh_len, (long long)lgi->lgi_off);
- } else if (llh->llh_size > 0) {
- if (llh->llh_size != rec->lrh_len) {
+ } else if (llh->llh_flags & LLOG_F_IS_FIXSIZE) {
+ if (llh->llh_size == 0 ||
+ llh->llh_size != rec->lrh_len) {
CERROR("%s: wrong record size, llh_size is %u"
" but record size is %u\n",
o->do_lu.lo_dev->ld_obd->obd_name,
}
llh->llh_count++;
+ if (llh->llh_flags & LLOG_F_IS_FIXSIZE) {
+ LASSERT(llh->llh_size == reclen);
+ } else {
+ /* Update the minimum size of the llog record */
+ if (llh->llh_size == 0)
+ llh->llh_size = reclen;
+ else if (reclen < llh->llh_size)
+ llh->llh_size = reclen;
+ }
+
if (lgi->lgi_attr.la_size == 0) {
lgi->lgi_off = 0;
lgi->lgi_buf.lb_len = llh->llh_hdr.lrh_len;
* the RPC (1MB limit), if we write 8K for each operation, which
* will cost a lot space, and keep us adding more updates to one
* update log.*/
- lgi->lgi_off = offsetof(typeof(*llh), llh_count);
- lgi->lgi_buf.lb_len = sizeof(llh->llh_count);
- lgi->lgi_buf.lb_buf = &llh->llh_count;
+ lgi->lgi_off = 0;
+ lgi->lgi_buf.lb_len = llh->llh_bitmap_offset;
+ lgi->lgi_buf.lb_buf = &llh->llh_hdr;
rc = dt_record_write(env, o, &lgi->lgi_buf, &lgi->lgi_off, th);
if (rc != 0)
GOTO(out_unlock, rc);
* that we are not far enough along the log (because the
* actual records are larger than minimum size) we just skip
* some more records.
+ *
+ * Note: in llog_process_thread, it will use bitmap offset as
+ * the index to locate the record, which also includs some pad
+ * records, whose record size is very small, and it also does not
+ * consider pad record when recording minimum record size (otherwise
+ * min_record size might be too small), so in some rare cases,
+ * it might skip too much record for @goal, see llog_osd_next_block().
+ *
+ * When force_mini_rec is true, it means we have to use LLOG_MIN_REC_SIZE
+ * as the min record size to skip over, usually because in the previous
+ * try, it skip too much record, see loog_osd_next(prev)_block().
*/
-static inline void llog_skip_over(struct llog_log_hdr *llh, __u64 *off,
- int curr, int goal, __u32 chunk_size)
+static inline void llog_skip_over(struct llog_handle *lgh, __u64 *off,
+ int curr, int goal, __u32 chunk_size,
+ bool force_mini_rec)
{
+ struct llog_log_hdr *llh = lgh->lgh_hdr;
+
+ /* Goal should not bigger than the record count */
+ if (goal > lgh->lgh_last_idx)
+ goal = lgh->lgh_last_idx;
+
if (goal > curr) {
- if (llh->llh_size == 0) {
- /* variable size records */
- *off = *off + (goal - curr - 1) * LLOG_MIN_REC_SIZE;
- } else {
+ if (llh->llh_flags & LLOG_F_IS_FIXSIZE) {
*off = chunk_size + (goal - 1) * llh->llh_size;
+ } else {
+ __u64 min_rec_size = LLOG_MIN_REC_SIZE;
+
+ if (llh->llh_size > 0 && !force_mini_rec)
+ min_rec_size = llh->llh_size;
+
+ *off = *off + (goal - curr - 1) * min_rec_size;
}
}
/* always align with lower chunk boundary*/
struct dt_device *dt;
int rc;
__u32 chunk_size;
+ int last_idx = *cur_idx;
+ __u64 last_offset = *cur_offset;
+ bool force_mini_rec = false;
ENTRY;
struct llog_rec_hdr *rec, *last_rec;
struct llog_rec_tail *tail;
- llog_skip_over(loghandle->lgh_hdr, cur_offset, *cur_idx,
- next_idx, chunk_size);
+ llog_skip_over(loghandle, cur_offset, *cur_idx,
+ next_idx, chunk_size, force_mini_rec);
/* read up to next llog chunk_size block */
lgi->lgi_buf.lb_len = chunk_size -
rc = dt_read(env, o, &lgi->lgi_buf, cur_offset);
if (rc < 0) {
+ if (rc == -EBADR && !force_mini_rec)
+ goto retry;
+
CERROR("%s: can't read llog block from log "DFID
" offset "LPU64": rc = %d\n",
o->do_lu.lo_dev->ld_obd->obd_name,
memset(buf + rc, 0, len - rc);
}
- if (rc == 0) /* end of file, nothing to do */
+ if (rc == 0) { /* end of file, nothing to do */
+ if (!force_mini_rec)
+ goto retry;
GOTO(out, rc);
+ }
if (rc < sizeof(*tail)) {
+ if (!force_mini_rec)
+ goto retry;
+
CERROR("%s: invalid llog block at log id "DOSTID"/%u "
"offset "LPU64"\n",
o->do_lu.lo_dev->ld_obd->obd_name,
loghandle->lgh_id.lgl_ogen, *cur_offset);
GOTO(out, rc = -EINVAL);
}
- if (tail->lrt_index < next_idx)
+ if (tail->lrt_index < next_idx) {
+ last_idx = *cur_idx;
+ last_offset = *cur_offset;
continue;
+ }
/* sanity check that the start of the new buffer is no farther
* than the record that we wanted. This shouldn't happen. */
if (rec->lrh_index > next_idx) {
+ if (!force_mini_rec && next_idx > last_idx)
+ goto retry;
+
CERROR("%s: missed desired record? %u > %u\n",
o->do_lu.lo_dev->ld_obd->obd_name,
rec->lrh_index, next_idx);
CLF_VERSION | CLF_RENAME);
GOTO(out, rc = 0);
+
+retry:
+ /* Note: because there are some pad records in the
+ * llog, so llog_skip_over() might skip too much
+ * records, let's try skip again with minimum record */
+ force_mini_rec = true;
+ *cur_offset = last_offset;
+ *cur_idx = last_idx;
}
GOTO(out, rc = -EIO);
out:
dt = lu2dt_dev(o->do_lu.lo_dev);
LASSERT(dt);
+ /* Let's only use mini record size for previous block read
+ * for now XXX */
cur_offset = chunk_size;
- llog_skip_over(loghandle->lgh_hdr, &cur_offset, 0, prev_idx,
- chunk_size);
+ llog_skip_over(loghandle, &cur_offset, 0, prev_idx,
+ chunk_size, true);
rc = dt_attr_get(env, o, &lgi->lgi_attr);
if (rc)
dt = ctxt->loc_exp->exp_obd->obd_lvfs_ctxt.dt;
LASSERT(dt);
if (ctxt->loc_flags & LLOG_CTXT_FLAG_NORMAL_FID) {
+ struct lu_object_conf conf = { 0 };
if (logid != NULL) {
logid_to_fid(logid, &lgi->lgi_fid);
} else {
if (rc < 0)
RETURN(rc);
rc = 0;
+ conf.loc_flags = LOC_F_NEW;
}
- o = dt_locate(env, dt, &lgi->lgi_fid);
+ o = dt_locate_at(env, dt, &lgi->lgi_fid,
+ dt->dd_lu_dev.ld_site->ls_top_dev, &conf);
if (IS_ERR(o))
RETURN(PTR_ERR(o));
RETURN(rc);
}
-
/**
- * Implementation of the llog_operations::lop_destroy
+ * Implementation of the llog_operations::lop_declare_destroy
*
- * This function destroys the llog and deletes also entry in the
+ * This function declare destroys the llog and deletes also entry in the
* llog directory in case of named llog. Llog should be opened prior that.
- * Destroy method is not part of external transaction and does everything
- * inside.
*
* \param[in] env execution environment
* \param[in] loghandle llog handle of the current llog
* \retval 0 on successful destroy
* \retval negative value on error
*/
-static int llog_osd_destroy(const struct lu_env *env,
- struct llog_handle *loghandle)
+static int llog_osd_declare_destroy(const struct lu_env *env,
+ struct llog_handle *loghandle,
+ struct thandle *th)
{
struct llog_ctxt *ctxt;
struct dt_object *o, *llog_dir = NULL;
- struct dt_device *d;
- struct thandle *th;
- char *name = NULL;
int rc;
ENTRY;
o = loghandle->lgh_obj;
LASSERT(o);
- d = lu2dt_dev(o->do_lu.lo_dev);
- LASSERT(d);
- LASSERT(d == ctxt->loc_exp->exp_obd->obd_lvfs_ctxt.dt);
-
- th = dt_trans_create(env, d);
- if (IS_ERR(th))
- RETURN(PTR_ERR(th));
-
if (loghandle->lgh_name) {
llog_dir = llog_osd_dir_get(env, ctxt);
if (IS_ERR(llog_dir))
- GOTO(out_trans, rc = PTR_ERR(llog_dir));
+ RETURN(PTR_ERR(llog_dir));
- name = loghandle->lgh_name;
rc = dt_declare_delete(env, llog_dir,
- (struct dt_key *)name, th);
- if (rc)
- GOTO(out_trans, rc);
+ (struct dt_key *)loghandle->lgh_name,
+ th);
+ if (rc < 0)
+ GOTO(out_put, rc);
}
rc = dt_declare_ref_del(env, o, th);
if (rc < 0)
- GOTO(out_trans, rc);
+ GOTO(out_put, rc);
rc = dt_declare_destroy(env, o, th);
- if (rc)
- GOTO(out_trans, rc);
+ if (rc < 0)
+ GOTO(out_put, rc);
if (loghandle->lgh_ctxt->loc_flags & LLOG_CTXT_FLAG_NORMAL_FID) {
rc = llog_osd_regular_fid_del_name_entry(env, o, th, true);
if (rc < 0)
- GOTO(out_trans, rc);
+ GOTO(out_put, rc);
}
- rc = dt_trans_start_local(env, d, th);
- if (rc)
- GOTO(out_trans, rc);
+out_put:
+ if (!(IS_ERR_OR_NULL(llog_dir)))
+ lu_object_put(env, &llog_dir->do_lu);
- th->th_wait_submit = 1;
+ RETURN(rc);
+}
+
+
+/**
+ * Implementation of the llog_operations::lop_destroy
+ *
+ * This function destroys the llog and deletes also entry in the
+ * llog directory in case of named llog. Llog should be opened prior that.
+ * Destroy method is not part of external transaction and does everything
+ * inside.
+ *
+ * \param[in] env execution environment
+ * \param[in] loghandle llog handle of the current llog
+ *
+ * \retval 0 on successful destroy
+ * \retval negative value on error
+ */
+static int llog_osd_destroy(const struct lu_env *env,
+ struct llog_handle *loghandle, struct thandle *th)
+{
+ struct llog_ctxt *ctxt;
+ struct dt_object *o, *llog_dir = NULL;
+ int rc;
+
+ ENTRY;
+
+ ctxt = loghandle->lgh_ctxt;
+ LASSERT(ctxt != NULL);
+
+ o = loghandle->lgh_obj;
+ LASSERT(o != NULL);
dt_write_lock(env, o, 0);
- if (dt_object_exists(o)) {
- if (name) {
- dt_read_lock(env, llog_dir, 0);
- rc = dt_delete(env, llog_dir,
- (struct dt_key *) name,
- th);
- dt_read_unlock(env, llog_dir);
- if (rc) {
- CERROR("%s: can't remove llog %s: rc = %d\n",
- o->do_lu.lo_dev->ld_obd->obd_name,
- name, rc);
- GOTO(out_unlock, rc);
- }
- }
- dt_ref_del(env, o, th);
- rc = dt_destroy(env, o, th);
- if (rc)
- GOTO(out_unlock, rc);
+ if (!dt_object_exists(o))
+ GOTO(out_unlock, rc = 0);
- if (loghandle->lgh_ctxt->loc_flags &
- LLOG_CTXT_FLAG_NORMAL_FID) {
- rc = llog_osd_regular_fid_del_name_entry(env, o, th,
- false);
- if (rc < 0)
- GOTO(out_unlock, rc);
+ if (loghandle->lgh_name) {
+ llog_dir = llog_osd_dir_get(env, ctxt);
+ if (IS_ERR(llog_dir))
+ RETURN(PTR_ERR(llog_dir));
+
+ dt_read_lock(env, llog_dir, 0);
+ rc = dt_delete(env, llog_dir,
+ (struct dt_key *)loghandle->lgh_name,
+ th);
+ dt_read_unlock(env, llog_dir);
+ if (rc) {
+ CERROR("%s: can't remove llog %s: rc = %d\n",
+ o->do_lu.lo_dev->ld_obd->obd_name,
+ loghandle->lgh_name, rc);
+ GOTO(out_unlock, rc);
}
}
+
+ dt_ref_del(env, o, th);
+ rc = dt_destroy(env, o, th);
+ if (rc < 0)
+ GOTO(out_unlock, rc);
+
+ if (loghandle->lgh_ctxt->loc_flags & LLOG_CTXT_FLAG_NORMAL_FID) {
+ rc = llog_osd_regular_fid_del_name_entry(env, o, th, false);
+ if (rc < 0)
+ GOTO(out_unlock, rc);
+ }
+
out_unlock:
dt_write_unlock(env, o);
-out_trans:
- dt_trans_stop(env, d, th);
if (!(IS_ERR_OR_NULL(llog_dir)))
lu_object_put(env, &llog_dir->do_lu);
RETURN(rc);
.lop_next_block = llog_osd_next_block,
.lop_prev_block = llog_osd_prev_block,
.lop_read_header = llog_osd_read_header,
+ .lop_declare_destroy = llog_osd_declare_destroy,
.lop_destroy = llog_osd_destroy,
.lop_setup = llog_osd_setup,
.lop_cleanup = llog_osd_cleanup,
.lop_next_block = llog_osd_next_block,
.lop_prev_block = llog_osd_prev_block,
.lop_read_header = llog_osd_read_header,
+ .lop_declare_destroy = llog_osd_declare_destroy,
.lop_destroy = llog_osd_destroy,
.lop_setup = llog_osd_setup,
.lop_cleanup = llog_osd_cleanup,
lgi->lgi_buf.lb_buf = idarray;
rc = dt_declare_record_write(env, o, &lgi->lgi_buf, lgi->lgi_off, th);
if (rc)
- GOTO(out, rc);
+ GOTO(out_trans, rc);
/* For update log, this happens during initialization,
* see lod_sub_prep_llog(), and we need make sure catlog