osd_trans_stop() should call osd_trans_stop_cb() if transaction is
not successfully started.
Improve debug messages for distribute transaction.
Add sanity 416 for this.
Get rid of ot_write_commit which is useless.
Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I35da81ebd2c9e97c12ae52bd4faed60393cd67d6
Reviewed-on: https://review.whamcloud.com/33248
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Jenkins
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
#define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
#define OBD_FAIL_OSD_INDEX_CRASH 0x199
#define OBD_FAIL_OSD_NO_OI_ENTRY 0x198
#define OBD_FAIL_OSD_INDEX_CRASH 0x199
+#define OBD_FAIL_OSD_TXN_START 0x19a
+
#define OBD_FAIL_OFD_SET_OID 0x1e0
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OFD_SET_OID 0x1e0
#define OBD_FAIL_OST 0x200
oh->ot_credits = osd_transaction_size(dev);
}
oh->ot_credits = osd_transaction_size(dev);
}
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_TXN_START))
+ GOTO(out, rc = -EIO);
+
/*
* XXX temporary stuff. Some abstraction layer should
* be used.
/*
* XXX temporary stuff. Some abstraction layer should
* be used.
static int osd_trans_start(const struct lu_env *env, struct dt_device *d,
struct thandle *th)
{
static int osd_trans_start(const struct lu_env *env, struct dt_device *d,
struct thandle *th)
{
- struct osd_thandle *oh;
- int rc;
+ struct osd_device *osd = osd_dt_dev(d);
+ struct osd_thandle *oh;
+ int rc;
+
ENTRY;
oh = container_of0(th, struct osd_thandle, ot_super);
ENTRY;
oh = container_of0(th, struct osd_thandle, ot_super);
LASSERT(oh->ot_tx);
rc = dt_txn_hook_start(env, d, th);
LASSERT(oh->ot_tx);
rc = dt_txn_hook_start(env, d, th);
+ if (rc != 0) {
+ CERROR("%s: dt_txn_hook_start failed: rc = %d\n",
+ osd->od_svname, rc);
- if (oh->ot_write_commit && OBD_FAIL_CHECK(OBD_FAIL_OST_MAPBLK_ENOSPC))
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_TXN_START))
/* Unlike ldiskfs, ZFS checks for available space and returns
* -ENOSPC when assigning txg */
/* Unlike ldiskfs, ZFS checks for available space and returns
* -ENOSPC when assigning txg */
rc = -dmu_tx_assign(oh->ot_tx, TXG_WAIT);
if (unlikely(rc != 0)) {
rc = -dmu_tx_assign(oh->ot_tx, TXG_WAIT);
if (unlikely(rc != 0)) {
- struct osd_device *osd = osd_dt_dev(d);
/* dmu will call commit callback with error code during abort */
if (!lu_device_is_md(&d->dd_lu_dev) && rc == -ENOSPC)
CERROR("%s: failed to start transaction due to ENOSPC"
/* dmu will call commit callback with error code during abort */
if (!lu_device_is_md(&d->dd_lu_dev) && rc == -ENOSPC)
CERROR("%s: failed to start transaction due to ENOSPC"
if (oh->ot_assigned == 0) {
LASSERT(oh->ot_tx);
if (oh->ot_assigned == 0) {
LASSERT(oh->ot_tx);
+ CDEBUG(D_OTHER, "%s: transaction is aborted\n", osd->od_svname);
+ osd_trans_stop_cb(oh, th->th_result);
dmu_tx_abort(oh->ot_tx);
osd_object_sa_dirty_rele(env, oh);
osd_unlinked_list_emptify(env, osd, &unlinked, false);
dmu_tx_abort(oh->ot_tx);
osd_object_sa_dirty_rele(env, oh);
osd_unlinked_list_emptify(env, osd, &unlinked, false);
struct list_head ot_sa_list;
dmu_tx_t *ot_tx;
struct lquota_trans ot_quota_trans;
struct list_head ot_sa_list;
dmu_tx_t *ot_tx;
struct lquota_trans ot_quota_trans;
- __u32 ot_write_commit:1,
- ot_assigned:1;
};
#define OSD_OI_NAME_SIZE 24
};
#define OSD_OI_NAME_SIZE 24
space += osd_roundup2blocksz(size, offset, blksz);
}
space += osd_roundup2blocksz(size, offset, blksz);
}
- oh->ot_write_commit = 1; /* used in osd_trans_start() for fail_loc */
-
/* backend zfs filesystem might be configured to store multiple data
* copies */
space *= osd->od_os->os_copies;
/* backend zfs filesystem might be configured to store multiple data
* copies */
space *= osd->od_os->os_copies;
osd->od_readcache_max_filesize)
drop_cache = 1;
osd->od_readcache_max_filesize)
drop_cache = 1;
+ if (OBD_FAIL_CHECK(OBD_FAIL_OST_MAPBLK_ENOSPC))
+ RETURN(-ENOSPC);
+
/* LU-8791: take oo_guard to avoid the deadlock that changing block
* size and assigning arcbuf take place at the same time.
*
/* LU-8791: take oo_guard to avoid the deadlock that changing block
* size and assigning arcbuf take place at the same time.
*
list_for_each_entry(st, &tmt->tmt_sub_thandle_list, st_sub_list) {
struct sub_thandle_cookie *stc;
list_for_each_entry(st, &tmt->tmt_sub_thandle_list, st_sub_list) {
struct sub_thandle_cookie *stc;
- CDEBUG(mask, "st %p obd %s committed %d stopped %d sub_th %p\n",
+ CDEBUG(mask, "st %p obd %s committed %d started %d stopped %d "
+ "result %d sub_th %p\n",
st, st->st_dt->dd_lu_dev.ld_obd->obd_name,
st, st->st_dt->dd_lu_dev.ld_obd->obd_name,
- st->st_committed, st->st_stopped, st->st_sub_th);
+ st->st_committed, st->st_started, st->st_stopped,
+ st->st_result, st->st_sub_th);
list_for_each_entry(stc, &st->st_cookie_list, stc_list) {
CDEBUG(mask, " cookie "DFID".%u\n",
list_for_each_entry(stc, &st->st_cookie_list, stc_list) {
CDEBUG(mask, " cookie "DFID".%u\n",
sub_trans_commit_cb_internal(tmt,
master_st->st_sub_th, rc);
if (rc < 0) {
sub_trans_commit_cb_internal(tmt,
master_st->st_sub_th, rc);
if (rc < 0) {
+ CERROR("%s: stop trans failed: rc = %d\n",
+ master_dev->dd_lu_dev.ld_obd->obd_name, rc);
th->th_result = rc;
GOTO(stop_other_trans, rc);
} else if (tur != NULL && tur->tur_update_records != NULL) {
th->th_result = rc;
GOTO(stop_other_trans, rc);
} else if (tur != NULL && tur->tur_update_records != NULL) {
rc = sub_updates_write(env, lur, st);
if (rc < 0) {
rc = sub_updates_write(env, lur, st);
if (rc < 0) {
+ CERROR("%s: write updates failed: rc = %d\n",
+ st->st_dt->dd_lu_dev.ld_obd->obd_name,
+ rc);
th->th_result = rc;
break;
}
th->th_result = rc;
break;
}
st->st_sub_th->th_result = th->th_result;
rc = dt_trans_stop(env, st->st_sub_th->th_dev,
st->st_sub_th);
st->st_sub_th->th_result = th->th_result;
rc = dt_trans_stop(env, st->st_sub_th->th_dev,
st->st_sub_th);
- if (unlikely(rc < 0 && th->th_result == 0))
- th->th_result = rc;
+ if (rc < 0) {
+ CERROR("%s: stop trans failed: rc = %d\n",
+ st->st_dt->dd_lu_dev.ld_obd->obd_name, rc);
+ if (th->th_result == 0)
+ th->th_result = rc;
+ }
}
rc = top_trans_wait_result(top_th);
}
rc = top_trans_wait_result(top_th);
}
run_test 415 "lock revoke is not missing"
}
run_test 415 "lock revoke is not missing"
+
+test_416() {
+ [ $(lustre_version_code mds1) -lt $(version_code 2.11.55) ] &&
+ skip "Need server version at least 2.11.55"
+
+ # define OBD_FAIL_OSD_TXN_START 0x19a
+ do_facet mds1 lctl set_param fail_loc=0x19a
+
+ lfs mkdir -c $MDSCOUNT $DIR/$tdir
+
+ true
+}
+run_test 416 "transaction start failure won't cause system hung"
+
prep_801() {
[[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] ||
[[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] &&
prep_801() {
[[ $(lustre_version_code mds1) -lt $(version_code 2.9.55) ]] ||
[[ $(lustre_version_code ost1) -lt $(version_code 2.9.55) ]] &&