From 28fc8fcc8bfe092c9a1a8c192ab6fe22d92820e7 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Thu, 10 Jan 2013 23:17:38 +0800 Subject: [PATCH] LU-2640 osd: enhance transaction debug to trace rollback Some modifications may need to rollback because of failures during the transactions. The original transaction trace mechanism did not consider rollback cases, so cannot correctly record sub-operations and the rollback cases. In this patch, not only the specified sub-operation will be traced, but also its rollback operation (if may rollback) will be recorded also. For example: When declare for ref_add, it will record the ref_add sub-operation by "osd_thandle::ot_declare_ops[OSD_OT_REF_ADD]"; when consume the ref_add credit, the possible rollback will be recorded through the "osd_thandle::ot_declare_ops_rb[OSD_OT_REF_ADD]"; and when need to rollback the ref_add (by ref_del) in subsequent processing, it can be traced by the "osd_thandle::ot_declare_ops_rb[OSD_OT_REF_ADD]"; and once rollback starts, only "osd_thandle::ot_declare_ops_rb[X]" can be consumed. Signed-off-by: Fan Yong Change-Id: Iade214eff468c2298f84920b997040e06f1a204f Reviewed-on: http://review.whamcloud.com/5138 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/mdd/mdd_dir.c | 28 +++---- lustre/osd-ldiskfs/osd_handler.c | 138 +++++++++++++++++++++++------------ lustre/osd-ldiskfs/osd_internal.h | 150 +++++++++++++++++++++++++------------- lustre/osd-ldiskfs/osd_io.c | 10 +-- lustre/osd-ldiskfs/osd_quota.c | 6 +- 5 files changed, 215 insertions(+), 117 deletions(-) diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 68e1162..b69327d 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -970,17 +970,17 @@ static int mdd_link(const struct lu_env *env, struct md_object *tgt_obj, if (rc) GOTO(out_unlock, rc); - rc = __mdd_index_insert_only(env, mdd_tobj, mdo2fid(mdd_sobj), - name, handle, - mdd_object_capa(env, mdd_tobj)); - if (rc) - GOTO(out_unlock, rc); - rc = mdo_ref_add(env, mdd_sobj, handle); + if (rc) + GOTO(out_unlock, rc); + + + rc = __mdd_index_insert_only(env, mdd_tobj, mdo2fid(mdd_sobj), + name, handle, + mdd_object_capa(env, mdd_tobj)); if (rc != 0) { - __mdd_index_delete_only(env, mdd_tobj, name, handle, - mdd_object_capa(env, mdd_tobj)); - GOTO(out_unlock, rc); + mdo_ref_del(env, mdd_sobj, handle); + GOTO(out_unlock, rc); } LASSERT(ma->ma_attr.la_valid & LA_CTIME); @@ -1168,6 +1168,11 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, if (rc) GOTO(cleanup, rc); + rc = __mdd_index_delete(env, mdd_pobj, name, is_dir, handle, + mdd_object_capa(env, mdd_pobj)); + if (rc) + GOTO(cleanup, rc); + rc = mdo_ref_del(env, mdd_cobj, handle); if (rc != 0) { __mdd_index_insert_only(env, mdd_pobj, mdo2fid(mdd_cobj), @@ -1176,11 +1181,6 @@ static int mdd_unlink(const struct lu_env *env, struct md_object *pobj, GOTO(cleanup, rc); } - rc = __mdd_index_delete(env, mdd_pobj, name, is_dir, handle, - mdd_object_capa(env, mdd_pobj)); - if (rc) - GOTO(cleanup, rc); - if (is_dir) /* unlink dot */ mdo_ref_del(env, mdd_cobj, handle); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index aa1dcf6..8278c6b 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -89,6 +89,22 @@ static const struct dt_object_operations osd_obj_otable_it_ops; static const struct dt_index_operations osd_index_iam_ops; static const struct dt_index_operations osd_index_ea_ops; +#ifdef OSD_TRACK_DECLARES +int osd_trans_declare_op2rb[] = { + [OSD_OT_ATTR_SET] = OSD_OT_ATTR_SET, + [OSD_OT_PUNCH] = OSD_OT_MAX, + [OSD_OT_XATTR_SET] = OSD_OT_XATTR_SET, + [OSD_OT_CREATE] = OSD_OT_DESTROY, + [OSD_OT_DESTROY] = OSD_OT_CREATE, + [OSD_OT_REF_ADD] = OSD_OT_REF_DEL, + [OSD_OT_REF_DEL] = OSD_OT_REF_ADD, + [OSD_OT_WRITE] = OSD_OT_WRITE, + [OSD_OT_INSERT] = OSD_OT_DELETE, + [OSD_OT_DELETE] = OSD_OT_INSERT, + [OSD_OT_QUOTA] = OSD_OT_MAX, +}; +#endif + static int osd_has_index(const struct osd_object *obj) { return obj->oo_dt.do_index_ops != NULL; @@ -662,6 +678,11 @@ static struct thandle *osd_trans_create(const struct lu_env *env, oti->oti_dev = osd_dt_dev(d); CFS_INIT_LIST_HEAD(&oh->ot_dcb_list); osd_th_alloced(oh); + + memset(oti->oti_declare_ops, 0, OSD_OT_MAX); + memset(oti->oti_declare_ops_rb, 0, OSD_OT_MAX); + memset(oti->oti_declare_ops_cred, 0, OSD_OT_MAX); + oti->oti_rollback = false; } RETURN(th); } @@ -700,22 +721,34 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d, osd_journal(dev)->j_max_transaction_buffers); #ifdef OSD_TRACK_DECLARES CWARN(" create: %u/%u, delete: %u/%u, destroy: %u/%u\n", - oh->ot_declare_create, oh->ot_declare_create_cred, - oh->ot_declare_delete, oh->ot_declare_delete_cred, - oh->ot_declare_destroy, oh->ot_declare_destroy_cred); + oti->oti_declare_ops[OSD_OT_CREATE], + oti->oti_declare_ops_cred[OSD_OT_CREATE], + oti->oti_declare_ops[OSD_OT_DELETE], + oti->oti_declare_ops_cred[OSD_OT_DELETE], + oti->oti_declare_ops[OSD_OT_DESTROY], + oti->oti_declare_ops_cred[OSD_OT_DESTROY]); CWARN(" attr_set: %u/%u, xattr_set: %u/%u\n", - oh->ot_declare_attr_set, oh->ot_declare_attr_set_cred, - oh->ot_declare_xattr_set, oh->ot_declare_xattr_set_cred); + oti->oti_declare_ops[OSD_OT_ATTR_SET], + oti->oti_declare_ops_cred[OSD_OT_ATTR_SET], + oti->oti_declare_ops[OSD_OT_XATTR_SET], + oti->oti_declare_ops_cred[OSD_OT_XATTR_SET]); CWARN(" write: %u/%u, punch: %u/%u, quota %u/%u\n", - oh->ot_declare_write, oh->ot_declare_write_cred, - oh->ot_declare_punch, oh->ot_declare_punch_cred, - oh->ot_declare_quota, oh->ot_declare_quota_cred); + oti->oti_declare_ops[OSD_OT_WRITE], + oti->oti_declare_ops_cred[OSD_OT_WRITE], + oti->oti_declare_ops[OSD_OT_PUNCH], + oti->oti_declare_ops_cred[OSD_OT_PUNCH], + oti->oti_declare_ops[OSD_OT_QUOTA], + oti->oti_declare_ops_cred[OSD_OT_QUOTA]); CWARN(" insert: %u/%u, delete: %u/%u\n", - oh->ot_declare_insert, oh->ot_declare_insert_cred, - oh->ot_declare_delete, oh->ot_declare_destroy_cred); + oti->oti_declare_ops[OSD_OT_INSERT], + oti->oti_declare_ops_cred[OSD_OT_INSERT], + oti->oti_declare_ops[OSD_OT_DESTROY], + oti->oti_declare_ops_cred[OSD_OT_DESTROY]); CWARN(" ref_add: %u/%u, ref_del: %u/%u\n", - oh->ot_declare_ref_add, oh->ot_declare_ref_add_cred, - oh->ot_declare_ref_del, oh->ot_declare_ref_del_cred); + oti->oti_declare_ops[OSD_OT_REF_ADD], + oti->oti_declare_ops_cred[OSD_OT_REF_ADD], + oti->oti_declare_ops[OSD_OT_REF_DEL], + oti->oti_declare_ops_cred[OSD_OT_REF_DEL]); if (last_credits != oh->ot_credits && time_after(jiffies, last_printed + 60 * HZ)) { @@ -1395,8 +1428,8 @@ static int osd_declare_attr_set(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, attr_set, - osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); + osd_trans_declare_op(env, oh, OSD_OT_ATTR_SET, + osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); if (attr == NULL || obj->oo_inode == NULL) RETURN(rc); @@ -1601,7 +1634,7 @@ static int osd_attr_set(const struct lu_env *env, if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) return -EACCES; - OSD_EXEC_OP(handle, attr_set); + osd_trans_exec_op(env, handle, OSD_OT_ATTR_SET); inode = obj->oo_inode; ll_vfs_dq_init(inode); @@ -1969,7 +2002,8 @@ static int osd_declare_object_create(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, create, osd_dto_credits_noquota[DTO_OBJECT_CREATE]); + osd_trans_declare_op(env, oh, OSD_OT_CREATE, + osd_dto_credits_noquota[DTO_OBJECT_CREATE]); /* XXX: So far, only normal fid needs be inserted into the oi, * things could be changed later. Revise following code then. */ if (fid_is_norm(lu_object_fid(&dt->do_lu)) && @@ -1977,8 +2011,8 @@ static int osd_declare_object_create(const struct lu_env *env, lu_object_fid(&dt->do_lu))) { /* Reuse idle OI block may cause additional one OI block * to be changed. */ - OSD_DECLARE_OP(oh, insert, - osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); + osd_trans_declare_op(env, oh, OSD_OT_INSERT, + osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); } /* If this is directory, then we expect . and .. to be inserted as * well. The one directory block always needs to be created for the @@ -1986,9 +2020,9 @@ static int osd_declare_object_create(const struct lu_env *env, * block), there is no danger of needing a tree for the first block. */ if (attr && S_ISDIR(attr->la_mode)) { - OSD_DECLARE_OP(oh, insert, - osd_dto_credits_noquota[DTO_WRITE_BASE]); - OSD_DECLARE_OP(oh, insert, 0); + osd_trans_declare_op(env, oh, OSD_OT_INSERT, + osd_dto_credits_noquota[DTO_WRITE_BASE]); + osd_trans_declare_op(env, oh, OSD_OT_INSERT, 0); } if (!attr) @@ -2035,7 +2069,8 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, * 'tune2fs -O quota' will take care of creating them */ RETURN(-EPERM); - OSD_EXEC_OP(th, create); + osd_trans_exec_op(env, th, OSD_OT_CREATE); + osd_trans_declare_rb(env, th, OSD_OT_REF_ADD); result = __osd_object_create(info, obj, attr, hint, dof, th); if (result == 0) @@ -2065,13 +2100,15 @@ static int osd_declare_object_destroy(const struct lu_env *env, LASSERT(oh->ot_handle == NULL); LASSERT(inode); - OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_OBJECT_DELETE]); + osd_trans_declare_op(env, oh, OSD_OT_DELETE, + osd_dto_credits_noquota[DTO_OBJECT_DELETE]); /* XXX: So far, only normal fid needs to be inserted into the OI, * so only normal fid needs to be removed from the OI also. * Recycle idle OI leaf may cause additional three OI blocks * to be changed. */ - OSD_DECLARE_OP(oh, destroy, fid_is_norm(lu_object_fid(&dt->do_lu)) ? - osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3 : 0); + osd_trans_declare_op(env, oh, OSD_OT_DESTROY, + fid_is_norm(lu_object_fid(&dt->do_lu)) ? + osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3 : 0); /* one less inode */ rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, -1, oh, @@ -2116,7 +2153,7 @@ static int osd_object_destroy(const struct lu_env *env, inode->i_sb->s_op->dirty_inode(inode); } - OSD_EXEC_OP(th, destroy); + osd_trans_exec_op(env, th, OSD_OT_DESTROY); result = osd_oi_delete(osd_oti_get(env), osd, fid, th); mutex_unlock(&inode->i_mutex); @@ -2257,7 +2294,8 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, * 'tune2fs -O quota' will take care of creating them */ RETURN(-EPERM); - OSD_EXEC_OP(th, create); + osd_trans_exec_op(env, th, OSD_OT_CREATE); + osd_trans_declare_rb(env, th, OSD_OT_REF_ADD); result = __osd_object_create(info, obj, attr, hint, dof, th); /* objects under osd root shld have igif fid, so dont add fid EA */ @@ -2286,7 +2324,8 @@ static int osd_declare_object_ref_add(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, ref_add, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); + osd_trans_declare_op(env, oh, OSD_OT_REF_ADD, + osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); return 0; } @@ -2305,7 +2344,7 @@ static int osd_object_ref_add(const struct lu_env *env, LASSERT(osd_write_locked(env, obj)); LASSERT(th != NULL); - OSD_EXEC_OP(th, ref_add); + osd_trans_exec_op(env, th, OSD_OT_REF_ADD); /* * DIR_NLINK feature is set for compatibility reasons if: @@ -2348,7 +2387,8 @@ static int osd_declare_object_ref_del(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, ref_del, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); + osd_trans_declare_op(env, oh, OSD_OT_REF_DEL, + osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); return 0; } @@ -2367,7 +2407,7 @@ static int osd_object_ref_del(const struct lu_env *env, struct dt_object *dt, LASSERT(osd_write_locked(env, obj)); LASSERT(th != NULL); - OSD_EXEC_OP(th, ref_del); + osd_trans_exec_op(env, th, OSD_OT_REF_DEL); spin_lock(&obj->oo_guard); LASSERT(inode->i_nlink > 0); @@ -2442,9 +2482,10 @@ static int osd_declare_xattr_set(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, xattr_set, strcmp(name, XATTR_NAME_VERSION) == 0 ? - osd_dto_credits_noquota[DTO_ATTR_SET_BASE] : - osd_dto_credits_noquota[DTO_XATTR_SET]); + osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET, + strcmp(name, XATTR_NAME_VERSION) == 0 ? + osd_dto_credits_noquota[DTO_ATTR_SET_BASE] : + osd_dto_credits_noquota[DTO_XATTR_SET]); return 0; } @@ -2488,8 +2529,8 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) return -EACCES; - OSD_EXEC_OP(handle, xattr_set); - return __osd_xattr_set(env, dt, buf, name, fl); + osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET); + return __osd_xattr_set(env, dt, buf, name, fl); } /* @@ -2526,7 +2567,8 @@ static int osd_declare_xattr_del(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, xattr_set, osd_dto_credits_noquota[DTO_XATTR_SET]); + osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET, + osd_dto_credits_noquota[DTO_XATTR_SET]); return 0; } @@ -2552,7 +2594,7 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt, if (osd_object_auth(env, dt, capa, CAPA_OPC_META_WRITE)) return -EACCES; - OSD_EXEC_OP(handle, xattr_set); + osd_trans_exec_op(env, handle, OSD_OT_XATTR_SET); ll_vfs_dq_init(inode); dentry->d_inode = inode; @@ -2898,7 +2940,8 @@ static int osd_index_declare_iam_delete(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_INDEX_DELETE]); + osd_trans_declare_op(env, oh, OSD_OT_DELETE, + osd_dto_credits_noquota[DTO_INDEX_DELETE]); return 0; } @@ -2937,7 +2980,7 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt, if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_DELETE)) RETURN(-EACCES); - OSD_EXEC_OP(handle, delete); + osd_trans_exec_op(env, handle, OSD_OT_DELETE); ipd = osd_idx_ipd_get(env, bag); if (unlikely(ipd == NULL)) @@ -2975,7 +3018,8 @@ static int osd_index_declare_ea_delete(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_INDEX_DELETE]); + osd_trans_declare_op(env, oh, OSD_OT_DELETE, + osd_dto_credits_noquota[DTO_INDEX_DELETE]); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); @@ -3028,7 +3072,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, LASSERT(dt_object_exists(dt)); LASSERT(handle != NULL); - OSD_EXEC_OP(handle, delete); + osd_trans_exec_op(env, handle, OSD_OT_DELETE); oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle != NULL); @@ -3150,7 +3194,8 @@ static int osd_index_declare_iam_insert(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, insert, osd_dto_credits_noquota[DTO_INDEX_INSERT]); + osd_trans_declare_op(env, oh, OSD_OT_INSERT, + osd_dto_credits_noquota[DTO_INDEX_INSERT]); return 0; } @@ -3189,7 +3234,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT)) RETURN(-EACCES); - OSD_EXEC_OP(th, insert); + osd_trans_exec_op(env, th, OSD_OT_INSERT); ipd = osd_idx_ipd_get(env, bag); if (unlikely(ipd == NULL)) @@ -3582,7 +3627,8 @@ static int osd_index_declare_ea_insert(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, insert, osd_dto_credits_noquota[DTO_INDEX_INSERT]); + osd_trans_declare_op(env, oh, OSD_OT_INSERT, + osd_dto_credits_noquota[DTO_INDEX_INSERT]); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); @@ -3634,6 +3680,8 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt, LASSERT(dt_object_exists(dt)); LASSERT(th != NULL); + osd_trans_exec_op(env, th, OSD_OT_INSERT); + if (osd_object_auth(env, dt, capa, CAPA_OPC_INDEX_INSERT)) RETURN(-EACCES); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 6722a2f..3d5f893 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -272,26 +272,6 @@ struct osd_device { struct qsd_instance *od_quota_slave; }; -#define OSD_TRACK_DECLARES -#ifdef OSD_TRACK_DECLARES -#define OSD_DECLARE_OP(oh, op, credits) \ -do { \ - LASSERT((oh)->ot_handle == NULL); \ - ((oh)->ot_declare_ ##op)++; \ - ((oh)->ot_declare_ ##op ##_cred) += (credits); \ - (oh)->ot_credits += (credits); \ -} while (0) -#define OSD_EXEC_OP(handle, op) \ -do { \ - struct osd_thandle *oh = container_of(handle, typeof(*oh), ot_super); \ - LASSERT((oh)->ot_declare_ ##op > 0); \ - ((oh)->ot_declare_ ##op)--; \ -} while (0) -#else -#define OSD_DECLARE_OP(oh, op, credits) (oh)->ot_credits += (credits) -#define OSD_EXEC_OP(oh, op) -#endif - /* There are at most 10 uid/gids are affected in a transaction, and * that's rename case: * - 2 for source parent uid & gid; @@ -305,6 +285,23 @@ do { \ */ #define OSD_MAX_UGID_CNT 10 +enum { + OSD_OT_ATTR_SET = 0, + OSD_OT_PUNCH = 1, + OSD_OT_XATTR_SET = 2, + OSD_OT_CREATE = 3, + OSD_OT_DESTROY = 4, + OSD_OT_REF_ADD = 5, + OSD_OT_REF_DEL = 6, + OSD_OT_WRITE = 7, + OSD_OT_INSERT = 8, + OSD_OT_DELETE = 9, + OSD_OT_QUOTA = 10, + OSD_OT_MAX = 11 +}; + +#define OSD_TRACK_DECLARES + struct osd_thandle { struct thandle ot_super; handle_t *ot_handle; @@ -317,36 +314,6 @@ struct osd_thandle { unsigned short ot_id_type; uid_t ot_id_array[OSD_MAX_UGID_CNT]; struct lquota_trans *ot_quota_trans; - -#ifdef OSD_TRACK_DECLARES - /* Tracking for transaction credits, to allow debugging and optimizing - * cases where a large number of credits are being allocated for - * single transaction. */ - unsigned char ot_declare_attr_set; - unsigned char ot_declare_punch; - unsigned char ot_declare_xattr_set; - unsigned char ot_declare_create; - unsigned char ot_declare_destroy; - unsigned char ot_declare_ref_add; - unsigned char ot_declare_ref_del; - unsigned char ot_declare_write; - unsigned char ot_declare_insert; - unsigned char ot_declare_delete; - unsigned char ot_declare_quota; - - unsigned short ot_declare_attr_set_cred; - unsigned short ot_declare_punch_cred; - unsigned short ot_declare_xattr_set_cred; - unsigned short ot_declare_create_cred; - unsigned short ot_declare_destroy_cred; - unsigned short ot_declare_ref_add_cred; - unsigned short ot_declare_ref_del_cred; - unsigned short ot_declare_write_cred; - unsigned short ot_declare_insert_cred; - unsigned short ot_declare_delete_cred; - unsigned short ot_declare_quota_cred; -#endif - #if OSD_THANDLE_STATS /** time when this handle was allocated */ cfs_time_t oth_alloced; @@ -598,6 +565,17 @@ struct osd_thread_info { union lquota_rec oti_quota_rec; __u64 oti_quota_id; struct lu_seq_range oti_seq_range; + +#ifdef OSD_TRACK_DECLARES + /* Tracking for transaction credits, to allow debugging and optimizing + * cases where a large number of credits are being allocated for + * single transaction. */ + unsigned char oti_declare_ops[OSD_OT_MAX]; + unsigned char oti_declare_ops_rb[OSD_OT_MAX]; + unsigned short oti_declare_ops_cred[OSD_OT_MAX]; + bool oti_rollback; +#endif + }; extern int ldiskfs_pdo; @@ -853,6 +831,78 @@ struct dentry *osd_child_dentry_by_inode(const struct lu_env *env, return child_dentry; } +#ifdef OSD_TRACK_DECLARES +extern int osd_trans_declare_op2rb[]; + +static inline void osd_trans_declare_op(const struct lu_env *env, + struct osd_thandle *oh, + unsigned int op, int credits) +{ + struct osd_thread_info *oti = osd_oti_get(env); + + LASSERT(oh->ot_handle == NULL); + LASSERT(op < OSD_OT_MAX); + + oti->oti_declare_ops[op]++; + oti->oti_declare_ops_cred[op] += credits; + oh->ot_credits += credits; +} + +static inline void osd_trans_exec_op(const struct lu_env *env, + struct thandle *th, unsigned int op) +{ + struct osd_thread_info *oti = osd_oti_get(env); + struct osd_thandle *oh = container_of(th, struct osd_thandle, + ot_super); + unsigned int rb; + + LASSERT(oh->ot_handle != NULL); + LASSERT(op < OSD_OT_MAX); + + if (likely(!oti->oti_rollback && oti->oti_declare_ops[op] > 0)) { + oti->oti_declare_ops[op]--; + oti->oti_declare_ops_rb[op]++; + } else { + /* all future updates are considered rollback */ + oti->oti_rollback = true; + rb = osd_trans_declare_op2rb[op]; + LASSERTF(rb < OSD_OT_MAX, "op = %u\n", op); + LASSERTF(oti->oti_declare_ops_rb[rb] > 0, "rb = %u\n", rb); + oti->oti_declare_ops_rb[rb]--; + } +} + +static inline void osd_trans_declare_rb(const struct lu_env *env, + struct thandle *th, unsigned int op) +{ + struct osd_thread_info *oti = osd_oti_get(env); + struct osd_thandle *oh = container_of(th, struct osd_thandle, + ot_super); + + LASSERT(oh->ot_handle != NULL); + LASSERT(op < OSD_OT_MAX); + + oti->oti_declare_ops_rb[op]++; +} +#else +static inline void osd_trans_declare_op(const struct lu_env *env, + struct osd_thandle *oh, + unsigned int op, int credits) +{ + oh->ot_credits += credits; +} + +static inline void osd_trans_exec_op(const struct lu_env *env, + struct thandle *th, unsigned int op) +{ +} + +static inline void osd_trans_declare_rb(const struct lu_env *env, + struct thandle *th, unsigned int op) +{ +} +#endif + /** * Helper function to pack the fid, ldiskfs stores fid in packed format. */ diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index a6003cd..af8afe1 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1012,7 +1012,7 @@ static ssize_t osd_declare_write(const struct lu_env *env, struct dt_object *dt, else credits = osd_dto_credits_noquota[DTO_WRITE_BLOCK]; - OSD_DECLARE_OP(oh, write, credits); + osd_trans_declare_op(env, oh, OSD_OT_WRITE, credits); inode = osd_dt_obj(dt)->oo_inode; @@ -1140,7 +1140,7 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, ll_vfs_dq_init(inode); /* XXX: don't check: one declared chunk can be used many times */ - /* OSD_EXEC_OP(handle, write); */ + /* osd_trans_exec_op(env, handle, OSD_OT_WRITE); */ oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle->h_transaction != NULL); @@ -1180,8 +1180,8 @@ static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt, * orphan list. if needed truncate will extend or restart * transaction */ - OSD_DECLARE_OP(oh, punch, - osd_dto_credits_noquota[DTO_ATTR_SET_BASE] + 3); + osd_trans_declare_op(env, oh, OSD_OT_PUNCH, + osd_dto_credits_noquota[DTO_ATTR_SET_BASE] + 3); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); @@ -1213,7 +1213,7 @@ static int osd_punch(const struct lu_env *env, struct dt_object *dt, oh = container_of(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle->h_transaction != NULL); - OSD_EXEC_OP(th, punch); + osd_trans_exec_op(env, th, OSD_OT_PUNCH); tid = oh->ot_handle->h_transaction->t_tid; diff --git a/lustre/osd-ldiskfs/osd_quota.c b/lustre/osd-ldiskfs/osd_quota.c index acf5557..8c17b13 100644 --- a/lustre/osd-ldiskfs/osd_quota.c +++ b/lustre/osd-ldiskfs/osd_quota.c @@ -511,9 +511,9 @@ int osd_declare_qid(const struct lu_env *env, struct osd_thandle *oh, RETURN(-EOVERFLOW); } - OSD_DECLARE_OP(oh, quota, - (allocated || qi->lqi_id.qid_uid == 0) ? - 1 : LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(dev))); + osd_trans_declare_op(env, oh, OSD_OT_QUOTA, + (allocated || qi->lqi_id.qid_uid == 0) ? + 1: LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(dev))); oh->ot_id_array[i] = qi->lqi_id.qid_uid; osd_qid_set_type(oh, i, qi->lqi_type); -- 1.8.3.1