For ENOSPC errors during dt_write() and threads races, the changelog
could have a sparse file with zeros inside. The current processing
logic skips records for the next chunk.
The patch adds the abilty to skip only zeros in the buffer and start
from a valid record.
Also fix changes the llog_test 8 so that it uses non-zero byte for
corruption.
Fixes:
cb1290768df9 ("LU-18218 mdd: changelog specific write function")
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I7263764ba6a89f226995b8967631eaa6d5bdd4dd
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/59267
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
#define OBD_FAIL_MDS_CHANGELOG_ENOSPC 0x18c
#define OBD_FAIL_MDS_BATCH_NET 0x18d
#define OBD_FAIL_MDS_HSM_DATA_VERSION_NET 0x18e
+#define OBD_FAIL_MDS_CHANGELOG_FAIL_WRITE 0x18f
/* OI scrub */
#define OBD_FAIL_OSD_SCRUB_DELAY 0x190
return 0;
}
-static int llog_zeroes(const struct lu_env *env, struct dt_object *o,
- __u64 start, __u64 end)
+static int llog_fill_bytes(const struct lu_env *env, struct dt_object *o,
+ __u64 start, __u64 end, char byte)
{
struct lu_attr la;
struct thandle *th;
if (!buf)
RETURN(-ENOMEM);
+ memset(buf, byte, end - start);
+
LASSERT(o);
d = lu2dt_dev(o->do_lu.lo_dev);
LASSERT(d);
/* must lost all 20 records */
CWARN("8b: clean first llog record in catalog\n");
- llog_zeroes(env, llh->lgh_obj, 8192 + plain_pos,
- 8192 + plain_pos + sizeof(struct llog_logid_rec));
+ llog_fill_bytes(env, llh->lgh_obj, 8192 + plain_pos,
+ 8192 + plain_pos + sizeof(struct llog_logid_rec), 0x5a);
rc2 = llog_cat_close(env, llh);
if (rc2) {
/* lost 28 records, from 5 to 32 in block */
CWARN("8c: corrupt first chunk in the middle\n");
- llog_zeroes(env, obj, 8192 + reclen * 4, 8192 + reclen * 10);
+ llog_fill_bytes(env, obj, 8192 + reclen * 4, 8192 + reclen * 10, 0xff);
/* lost whole chunk - 32 records */
CWARN("8c: corrupt second chunk at start\n");
- llog_zeroes(env, obj, 16384, 16384 + reclen);
+ llog_fill_bytes(env, obj, 16384, 16384 + reclen, 0x01);
CWARN("8d: count survived records\n");
rc = llog_open(env, ctxt, &llh, &cat_logid, NULL, LLOG_OPEN_EXISTS);
lgi_buf.lb_len = rec->cr_hdr.lrh_len;
lgi_buf.lb_buf = rec;
- rc = dt_record_write(env, o, &lgi_buf, &offset, th);
+ if (CFS_FAIL_CHECK(OBD_FAIL_MDS_CHANGELOG_FAIL_WRITE) &&
+ (rec->cr.cr_index % (cfs_fail_val + 1)) == 0)
+ rc = -EIO;
+ else
+ rc = dt_record_write(env, o, &lgi_buf, &offset, th);
if (rc) {
CERROR("%s: failed to write changelog record file "DFID" rec idx %u off %llu chnlg idx %llu: rc = %d\n",
return !test_bit_le(idx, LLOG_HDR_BITMAP(llh));
}
+static inline int llog_skip_gap(struct llog_rec_hdr *start, char *end)
+{
+ struct llog_rec_hdr *rec = start;
+
+ /* skipping zero gap */
+ while ((rec->lrh_index == 0 || rec->lrh_len == 0) &&
+ (char *)rec < (char *)end)
+ rec = (typeof(rec))(((char *)rec) + 4);
+
+ if ((char *)rec > end ||
+ !((rec->lrh_type & LLOG_OP_MASK) == LLOG_OP_MAGIC ||
+ ((rec->lrh_type & __swab32(LLOG_OP_MASK)) ==
+ __swab32(LLOG_OP_MAGIC))))
+ return -ENOENT;
+
+ return (int)((char *)rec - (char *)start);
+}
+
static int llog_process_thread(void *arg)
{
struct llog_process_info *lpi = arg;
rc = llog_verify_record(loghandle, rec);
if (rc) {
+ int gap_size;
+
CDEBUG(D_OTHER, "invalid record at index %d\n",
index);
/*
* for fixed-sized llogs we can skip one record
* by using llh_size from llog header.
- * Otherwise skip the next llog chunk.
*/
rc = 0;
if (llh->llh_flags & LLOG_F_IS_FIXSIZE) {
rec->lrh_len = llh->llh_size;
goto next_rec;
}
+ /*
+ * for zero gap we can find a next record.
+ * Otherwise skip the next llog chunk.
+ */
+ gap_size = llog_skip_gap(rec, buf + chunk_size -
+ LLOG_MIN_REC_SIZE);
+ if (gap_size > 0) {
+ rec->lrh_len = gap_size;
+ goto next_rec;
+ }
/* make sure that is always next block */
cur_offset = chunk_offset + chunk_size;
/* no goal to find, just next block to read */
}
run_test 162 "File attributes should be persisted after MDS failover"
+test_163() {
+ remote_mds_nodsh && skip "remote MDS with nodsh"
+ (( "$MDS1_VERSION" >= $(version_code 2.16.54) )) ||
+ skip "Need MDS version at least 2.16.54 to skip llog holes"
+
+ local mdtidx
+ local mdtsvc
+
+ changelog_register || error "changelog_register failed"
+ stack_trap changelog_deregister EXIT
+ test_mkdir -c 0 $DIR/$tdir || error "mkdir $tdir failed"
+ mdtidx=$(($($LFS getdirstripe -i $DIR/$tdir) + 1))
+ mdtsvc=$(facet_svc mds$mdtidx)
+ echo mds$mdtidx $mdtsvc
+
+ cl_mask=$(do_facet mds$mdtidx $LCTL get_param mdd.$mdtsvc.changelog_mask -n)
+ changelog_chmask "ALL"
+ stack_trap "do_facet mds$mdtidx \
+ $LCTL set_param mdd.$mdtsvc.changelog_mask=\'$cl_mask\' -n" EXIT
+
+ #define OBD_FAIL_MDS_CHANGELOG_FAIL_WRITE 0x18f
+ do_facet mds$mdtidx $LCTL set_param fail_loc=0x18f fail_val=30
+
+ # generate some changelog records to create a gap every 31 index
+ for (( i = 0; i < 10; i++)); do
+ createmany -m $DIR/$tdir/$tfile_$i 40 &
+ done
+
+ # Check changelog gap processing without a jump to a next chunk
+ changelog_dump | awk -F'[ .]' '{if(prev != "" && $2 - prev > 2) \
+ {print"Errot between "prev" and "$2; exit 1}prev=$2}' ||
+ error "Found a gap"
+}
+run_test 163 "changelog check for fail write and processing records"
+
complete_test $SECONDS
check_and_cleanup_lustre
exit_status