From: Alexander Boyko Date: Thu, 8 Apr 2021 08:23:54 +0000 (-0400) Subject: LU-14598 ofd: fix for IDIF sequence at ofd_preprw_write X-Git-Tag: 2.14.52~121 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=747fed818be5a4e09281ab1d9fd5b3a13763ab40 LU-14598 ofd: fix for IDIF sequence at ofd_preprw_write During recovery write operation could create and load a sequence if it comes before creation request from MDT0. ofd_preprw_write() uses wrong logic for taking sequence for IDIF fids. And if oid overflows 32bit and takes a part at IDIF sequence, write request loads wrong ofd sequence. And after that it is used for other IO. The next create from MDT0 cause an error: Too many FIDs to precreate OST replaced or reformatted... The test 122b reproduce issue when OST using a wrong sequence for MDT0 IDIF. This error requires objects id grater than 32bit, and write request during recovery, it should be processed before a create requset from MDT0. For a visible error at console the last object id should be 1<<32 + (OST_MAX_PRECREATE * 5). Error is lustre-OST0000: Too many FIDs to precreate OST replaced or reformatted: LFSCK will clean up HPE-bug-id: LUS-9595 Signed-off-by: Alexander Boyko Change-Id: I09e6f88b1f0d03fec59b24ef096cbc7baa5388ae Reviewed-on: https://review.whamcloud.com/43248 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Sergey Cheremencev --- diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 97ca99c..5440c05 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -1663,18 +1663,12 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) * (possibly filling the OST), only precreate the last batch. * LFSCK will eventually clean up any orphans. LU-14 */ if (diff > 5 * OST_MAX_PRECREATE) { + /* Message below is checked in conf-sanity test_122b */ + LCONSOLE_WARN("%s: precreate FID "DOSTID" is over %lld higher than LAST_ID "DOSTID", only precreating the last %u objects. OST replaced or reformatted?\n", + ofd_name(ofd), POSTID(&oa->o_oi), diff, + POSTID(&oseq->os_oi), + OST_MAX_PRECREATE / 2); diff = OST_MAX_PRECREATE / 2; - LCONSOLE_WARN("%s: Too many FIDs to precreate " - "OST replaced or reformatted: " - "LFSCK will clean up", - ofd_name(ofd)); - - CDEBUG(D_HA, "%s: precreate FID "DOSTID" is over " - "%u larger than the LAST_ID "DOSTID", only " - "precreating the last %lld objects.\n", - ofd_name(ofd), POSTID(&oa->o_oi), - 5 * OST_MAX_PRECREATE, - POSTID(&oseq->os_oi), diff); ofd_seq_last_oid_set(oseq, ostid_id(&oa->o_oi) - diff); } diff --git a/lustre/ofd/ofd_fs.c b/lustre/ofd/ofd_fs.c index 25b82f5..062bb05 100644 --- a/lustre/ofd/ofd_fs.c +++ b/lustre/ofd/ofd_fs.c @@ -377,8 +377,11 @@ struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd, /* if seq is already initialized */ oseq = ofd_seq_get(ofd, seq); - if (oseq != NULL) + if (oseq != NULL) { + CDEBUG(D_TRACE, "%s: got sequence %#llx "DOSTID"\n", + ofd_name(ofd), seq, POSTID(&oseq->os_oi)); RETURN(oseq); + } OBD_ALLOC_PTR(oseq); if (oseq == NULL) @@ -439,6 +442,8 @@ struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd, GOTO(cleanup, rc = -EINVAL); } + CDEBUG(D_HA, "%s: adding sequence %#llx\n", ofd_name(ofd), seq); + oseq = ofd_seq_add(env, ofd, oseq); RETURN((oseq != NULL) ? oseq : ERR_PTR(-ENOENT)); cleanup: diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 4ae58a1..a15de42 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -700,8 +700,8 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, LASSERT(objcount == 1); if (unlikely(exp->exp_obd->obd_recovering)) { - u64 seq = fid_seq(fid); - u64 oid = fid_oid(fid); + u64 seq = ostid_seq(&oa->o_oi); + u64 oid = ostid_id(&oa->o_oi); struct ofd_seq *oseq; oseq = ofd_seq_load(env, ofd, seq); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 174e87d..19eae10 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -8631,7 +8631,7 @@ test_120() { # LU-11130 } run_test 120 "cross-target rename should not create bad symlinks" -test_122() { +test_122a() { [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" [[ "$OST1_VERSION" -ge $(version_code 2.11.53) ]] || skip "Need OST version at least 2.11.53" @@ -8650,7 +8650,68 @@ test_122() { cleanup } -run_test 122 "Check OST sequence update" +run_test 122a "Check OST sequence update" + +test_122b() { + [[ "$OST1_VERSION" -ge $(version_code 2.11.53) ]] || + skip "Need OST version at least 2.11.53" + local err + + reformat + LOAD_MODULES_REMOTE=true load_modules +#define OBD_FAIL_OFD_SET_OID 0x1e0 + do_facet ost1 $LCTL set_param fail_loc=0x00001e0 + + stack_trap cleanup EXIT + setup_noconfig + do_facet ost1 $LCTL set_param obdfilter.*.precreate_batch=256 + $LFS mkdir -i0 -c1 $DIR/$tdir || error "failed to create directory" + $LFS setstripe -i0 -c1 $DIR/$tdir || error "failed to setstripe" + do_facet ost1 $LCTL set_param fail_loc=0 + # overflow IDIF 32bit and create > OST_MAX_PRECREATE*5 + # so a new wrong sequence would differ from an original with error + #define OST_MAX_PRECREATE 20000 + local ost_max_precreate=20100 + local num_create=$(( ost_max_precreate * 5 )) + + # Check the number of inodes available on OST0 + local files=0 + local ifree=$($LFS df -i $MOUNT | awk '/OST0000/ { print $4 }') + + log "On OST0, $ifree inodes available. Want $num_create." + + if [ $ifree -lt 10000 ]; then + files=$(( ifree - 50 )) + else + files=10000 + fi + + local j=$((num_create / files + 1)) + + for i in $(seq 1 $j); do + createmany -o $DIR/$tdir/$tfile-$i- $files || + error "createmany fail create $files files: $?" + unlinkmany $DIR/$tdir/$tfile-$i- $files || + error "unlinkmany failed unlink $files files" + done + sync + do_facet ost1 sync + #we need a write req during recovery for ofd_seq_load + replay_barrier ost1 + dd if=/dev/urandom of=$DIR/$tdir/$tfile bs=1024k count=1 oflag=sync || + error "failed to write file" + + # OBD_FAIL_OST_CREATE_NET 0x204 + do_facet ost1 $LCTL set_param fail_loc=0x80000204 + fail ost1 + createmany -o $DIR/$tdir/file_ 100 + sync + + err=$(do_facet ost1 dmesg | tac | sed "/Recovery over/,$ d" | + grep "OST replaced or reformatted") + [ -z "$err" ] || error $err +} +run_test 122b "Check OST sequence wouldn't change when IDIF 32bit overflows" test_123aa() { remote_mgs_nodsh && skip "remote MGS with nodsh"