From: Alexey Lyashkov Date: Mon, 18 Jul 2016 14:28:18 +0000 (+0300) Subject: LU-8411 ofd: handle last_rcvd file can't update properly X-Git-Tag: 2.9.53~43 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=6a81ffa1e9e44231d812e331c73cfa9df67746ed LU-8411 ofd: handle last_rcvd file can't update properly last_rcvd update may fail but "no fail" return code will be sent to client. DIO request may be replayed in that case instead of resend, but as no fail return code send to client, user application will free a buffer, so replay will be sent with incorrect data. Write should fail if last_rcvd can't update properly. This patch causes sanity test 407 to fail or has brought out an existing bug in Lustre. sanity test 407 is added to the ALWAYS_EXCPET list. Seagate-bug-id: MRP-3609 Signed-off-by: Artem Blagodarenko Signed-off-by: James Nunez Change-Id: Idcbff5fd990edbc84539197da9876748b33795dd Reviewed-on: https://review.whamcloud.com/21398 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index cc2506d..d1d5b47 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -455,6 +455,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_TGT_REPLAY_TIMEOUT 0x717 #define OBD_FAIL_TGT_CLIENT_DEL 0x718 #define OBD_FAIL_TGT_SLUGGISH_NET 0x719 +#define OBD_FAIL_TGT_RCVD_EIO 0x720 #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 #define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 5c3a95e..8846786 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -380,7 +380,7 @@ struct thandle *ofd_trans_create(const struct lu_env *env, int ofd_trans_start(const struct lu_env *env, struct ofd_device *ofd, struct ofd_object *fo, struct thandle *th); -void ofd_trans_stop(const struct lu_env *env, struct ofd_device *ofd, +int ofd_trans_stop(const struct lu_env *env, struct ofd_device *ofd, struct thandle *th, int rc); int ofd_txn_stop_cb(const struct lu_env *env, struct thandle *txn, void *cookie); diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 44deb0c..f1e9222 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -1013,6 +1013,7 @@ ofd_commitrw_write(const struct lu_env *env, struct obd_export *exp, struct dt_object *o; struct thandle *th; int rc = 0; + int rc2 = 0; int retries = 0; int i; bool soft_sync = false; @@ -1136,7 +1137,9 @@ out_stop: granted = 0; } - ofd_trans_stop(env, ofd, th, rc); + rc2 = ofd_trans_stop(env, ofd, th, rc); + if (!rc) + rc = rc2; if (rc == -ENOSPC && retries++ < 3) { CDEBUG(D_INODE, "retry after force commit, retries:%d\n", retries); diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index 07f7dc8..fd0e74a 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -212,11 +212,12 @@ int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, struct thandle *th; struct ofd_object **batch; struct lu_fid *fid = &info->fti_fid; - u64 tmp; - int rc; - int i; - int objects = 0; - int nr_saved = nr; + u64 tmp; + int rc; + int rc2; + int i; + int objects = 0; + int nr_saved = nr; ENTRY; @@ -396,7 +397,12 @@ int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, } trans_stop: - ofd_trans_stop(env, ofd, th, rc); + rc2 = ofd_trans_stop(env, ofd, th, rc); + if (rc2) + CERROR("%s: failed to stop transaction: rc = %d\n", + ofd_name(ofd), rc2); + if (!rc) + rc = rc2; out: for (i = 0; i < nr_saved; i++) { fo = batch[i]; @@ -495,8 +501,9 @@ int ofd_attr_set(const struct lu_env *env, struct ofd_object *fo, struct ofd_device *ofd = ofd_obj2dev(fo); struct thandle *th; struct ofd_mod_data *fmd; - int ff_needed = 0; - int rc; + int ff_needed = 0; + int rc; + int rc2; ENTRY; ofd_write_lock(env, fo); @@ -571,7 +578,13 @@ int ofd_attr_set(const struct lu_env *env, struct ofd_object *fo, GOTO(stop, rc); stop: - ofd_trans_stop(env, ofd, th, rc); + rc2 = ofd_trans_stop(env, ofd, th, rc); + if (rc2) + CERROR("%s: failed to stop transaction: rc = %d\n", + ofd_name(ofd), rc2); + if (!rc) + rc = rc2; + unlock: ofd_write_unlock(env, fo); @@ -606,8 +619,9 @@ int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo, struct ofd_mod_data *fmd; struct dt_object *dob = ofd_object_child(fo); struct thandle *th; - int ff_needed = 0; - int rc; + int ff_needed = 0; + int rc; + int rc2; ENTRY; @@ -698,7 +712,12 @@ int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo, GOTO(stop, rc); stop: - ofd_trans_stop(env, ofd, th, rc); + rc2 = ofd_trans_stop(env, ofd, th, rc); + if (rc2 != 0) + CERROR("%s: failed to stop transaction: rc = %d\n", + ofd_name(ofd), rc2); + if (!rc) + rc = rc2; unlock: ofd_write_unlock(env, fo); @@ -724,7 +743,8 @@ int ofd_object_destroy(const struct lu_env *env, struct ofd_object *fo, { struct ofd_device *ofd = ofd_obj2dev(fo); struct thandle *th; - int rc = 0; + int rc = 0; + int rc2; ENTRY; @@ -756,7 +776,12 @@ int ofd_object_destroy(const struct lu_env *env, struct ofd_object *fo, dt_ref_del(env, ofd_object_child(fo), th); dt_destroy(env, ofd_object_child(fo), th); stop: - ofd_trans_stop(env, ofd, th, rc); + rc2 = ofd_trans_stop(env, ofd, th, rc); + if (rc2) + CERROR("%s failed to stop transaction: %d\n", + ofd_name(ofd), rc2); + if (!rc) + rc = rc2; unlock: ofd_write_unlock(env, fo); RETURN(rc); diff --git a/lustre/ofd/ofd_trans.c b/lustre/ofd/ofd_trans.c index 15be800..41e0971 100644 --- a/lustre/ofd/ofd_trans.c +++ b/lustre/ofd/ofd_trans.c @@ -106,10 +106,13 @@ int ofd_trans_start(const struct lu_env *env, struct ofd_device *ofd, * \param[in] ofd OFD device * \param[in] th transaction handle * \param[in] rc result code of whole operation + * + * \retval 0 if successful + * \retval negative value if case of error */ -void ofd_trans_stop(const struct lu_env *env, struct ofd_device *ofd, +int ofd_trans_stop(const struct lu_env *env, struct ofd_device *ofd, struct thandle *th, int rc) { th->th_result = rc; - dt_trans_stop(env, ofd->ofd_osd, th); + return dt_trans_stop(env, ofd->ofd_osd, th); } diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 4c2e495..9265aec 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -1804,6 +1804,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, oh->ot_quota_trans = NULL; if (oh->ot_handle != NULL) { + int rc2; handle_t *hdl = oh->ot_handle; /* @@ -1827,10 +1828,12 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, hdl->h_sync = th->th_sync; oh->ot_handle = NULL; - OSD_CHECK_SLOW_TH(oh, osd, rc = ldiskfs_journal_stop(hdl)); - if (rc != 0) + OSD_CHECK_SLOW_TH(oh, osd, rc2 = ldiskfs_journal_stop(hdl)); + if (rc2 != 0) CERROR("%s: failed to stop transaction: rc = %d\n", - osd_name(osd), rc); + osd_name(osd), rc2); + if (!rc) + rc = rc2; } else { osd_trans_stop_cb(oh, th->th_result); OBD_FREE_PTR(oh); diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index f9c070b..6ee01de 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -1343,7 +1343,11 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt, if (!lw_client) { tti->tti_off = ted->ted_lr_off; - rc = tgt_client_data_write(env, tgt, ted->ted_lcd, &tti->tti_off, th); + if (CFS_FAIL_CHECK(OBD_FAIL_TGT_RCVD_EIO)) + rc = -EIO; + else + rc = tgt_client_data_write(env, tgt, ted->ted_lcd, + &tti->tti_off, th); if (rc < 0) { mutex_unlock(&ted->ted_lcd_lock); RETURN(rc); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 38ba211..98474bc 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13,8 +13,8 @@ ALWAYS_EXCEPT=" 42a 42b 42c 42d 45 68b $SANITY_EXCEPT" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! # with LOD/OSP landing -# bug number for skipped tests: LU-2036 LU-8139 -ALWAYS_EXCEPT=" 76 101g $ALWAYS_EXCEPT" +# bug number for skipped tests: LU-2036 LU-8139 LU-8411 +ALWAYS_EXCEPT=" 76 101g 407 $ALWAYS_EXCEPT" is_sles11() # LU-4341 { @@ -15424,6 +15424,20 @@ test_312() { # LU-4856 } run_test 312 "make sure ZFS adjusts its block size by write pattern" +test_313() { + local file=$DIR/$tfile + rm -f $file + $SETSTRIPE -c 1 -i 0 $file || error "setstripe failed" + + # define OBD_FAIL_TGT_RCVD_EIO 0x720 + do_facet ost1 "$LCTL set_param fail_loc=0x720" + dd if=/dev/zero of=$file bs=4096 oflag=direct count=1 && + error "write should failed" + do_facet ost1 "$LCTL set_param fail_loc=0" + rm -f $file +} +run_test 313 "io should fail after last_rcvd update fail" + test_399() { # LU-7655 for OST fake write # turn off debug for performance testing local saved_debug=$($LCTL get_param -n debug)