From: Andreas Dilger Date: Wed, 3 Oct 2012 21:26:50 +0000 (-0600) Subject: LU-1131 osd-ldiskfs: better journal credit tracking X-Git-Tag: 2.3.54~21 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=1f14f0da5098e9ad2fec1fa283659f48b8f827a7 LU-1131 osd-ldiskfs: better journal credit tracking When running with a small MDT device during testing, it is possible to overflow the reserved credit maximum for the journal. Improve the ldiskfs debugging for transaction credits so that it is possible to better understand where the reserved credits are being used. Signed-off-by: Andreas Dilger Change-Id: Iea90b771a5e19190cc95cbf8f2f725bede500c1e Reviewed-on: http://review.whamcloud.com/4282 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 1ebf741..d209956 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -591,12 +591,12 @@ static void __osd_th_check_slow(void *oth, struct osd_device *dev, /* * Concurrency: doesn't access mutable data. */ -static int osd_param_is_sane(const struct osd_device *dev, - const struct thandle *th) +static int osd_param_is_not_sane(const struct osd_device *dev, + const struct thandle *th) { - struct osd_thandle *oh; - oh = container_of0(th, struct osd_thandle, ot_super); - return oh->ot_credits <= osd_journal(dev)->j_max_transaction_buffers; + struct osd_thandle *oh = container_of(th, typeof(*oh), ot_super); + + return oh->ot_credits > osd_journal(dev)->j_max_transaction_buffers; } /* @@ -692,30 +692,48 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d, if (rc != 0) GOTO(out, rc); - if (!osd_param_is_sane(dev, th)) { + if (unlikely(osd_param_is_not_sane(dev, th))) { + static unsigned long last_printed; + static int last_credits; + CWARN("%.16s: too many transaction credits (%d > %d)\n", LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, oh->ot_credits, osd_journal(dev)->j_max_transaction_buffers); - /* XXX Limit the credits to 'max_transaction_buffers', and - * let the underlying filesystem to catch the error if - * we really need so many credits. - * - * This should be removed when we can calculate the - * credits precisely. */ - oh->ot_credits = osd_journal(dev)->j_max_transaction_buffers; #ifdef OSD_TRACK_DECLARES - CERROR(" attr_set: %d, punch: %d, xattr_set: %d,\n", - oh->ot_declare_attr_set, oh->ot_declare_punch, - oh->ot_declare_xattr_set); - CERROR(" create: %d, ref_add: %d, ref_del: %d, write: %d\n", - oh->ot_declare_create, oh->ot_declare_ref_add, - oh->ot_declare_ref_del, oh->ot_declare_write); - CERROR(" insert: %d, delete: %d, destroy: %d\n", - oh->ot_declare_insert, oh->ot_declare_delete, - oh->ot_declare_destroy); + CWARN(" create: %u/%u, delete: %u/%u, destroy: %u/%u\n", + oh->ot_declare_create, oh->ot_declare_create_cred, + oh->ot_declare_delete, oh->ot_declare_delete_cred, + oh->ot_declare_destroy, oh->ot_declare_destroy_cred); + CWARN(" attr_set: %u/%u, xattr_set: %u/%u\n", + oh->ot_declare_attr_set, oh->ot_declare_attr_set_cred, + oh->ot_declare_xattr_set, oh->ot_declare_xattr_set_cred); + CWARN(" write: %u/%u, punch: %u/%u, quota %u/%u\n", + oh->ot_declare_write, oh->ot_declare_write_cred, + oh->ot_declare_punch, oh->ot_declare_punch_cred, + oh->ot_declare_quota, oh->ot_declare_quota_cred); + CWARN(" insert: %u/%u, delete: %u/%u\n", + oh->ot_declare_insert, oh->ot_declare_insert_cred, + oh->ot_declare_delete, oh->ot_declare_destroy_cred); + CWARN(" ref_add: %u/%u, ref_del: %u/%u\n", + oh->ot_declare_ref_add, oh->ot_declare_ref_add_cred, + oh->ot_declare_ref_del, oh->ot_declare_ref_del_cred); + + if (last_credits != oh->ot_credits && + time_after(jiffies, last_printed + 60 * HZ)) { + libcfs_debug_dumpstack(NULL); + last_credits = oh->ot_credits; + last_printed = jiffies; + } #endif - } + /* XXX Limit the credits to 'max_transaction_buffers', and + * let the underlying filesystem to catch the error if + * we really need so many credits. + * + * This should be removed when we can calculate the + * credits precisely. */ + oh->ot_credits = osd_journal(dev)->j_max_transaction_buffers; + } /* * XXX temporary stuff. Some abstraction layer should @@ -1379,8 +1397,8 @@ static int osd_declare_attr_set(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, attr_set); - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; + OSD_DECLARE_OP(oh, attr_set, + osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); if (attr == NULL || obj->oo_inode == NULL) RETURN(rc); @@ -1916,42 +1934,40 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj, } static int osd_declare_object_create(const struct lu_env *env, - struct dt_object *dt, - struct lu_attr *attr, - struct dt_allocation_hint *hint, - struct dt_object_format *dof, - struct thandle *handle) + struct dt_object *dt, + struct lu_attr *attr, + struct dt_allocation_hint *hint, + struct dt_object_format *dof, + struct thandle *handle) { struct osd_thandle *oh; int rc; ENTRY; - LASSERT(handle != NULL); + LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle == NULL); + oh = container_of0(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, create); - oh->ot_credits += osd_dto_credits_noquota[DTO_OBJECT_CREATE]; - /* XXX: So far, only normal fid needs be inserted into the oi, - * things could be changed later. Revise following code then. */ - if (fid_is_norm(lu_object_fid(&dt->do_lu))) { - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + OSD_DECLARE_OP(oh, create, osd_dto_credits_noquota[DTO_OBJECT_CREATE]); + /* XXX: So far, only normal fid needs be inserted into the oi, + * things could be changed later. Revise following code then. */ + if (fid_is_norm(lu_object_fid(&dt->do_lu))) { /* Reuse idle OI block may cause additional one OI block * to be changed. */ - oh->ot_credits += 1; - } - /* If this is directory, then we expect . and .. to be inserted as - * well. The one directory block always needs to be created for the - * directory, so we could use DTO_WRITE_BASE here (GDT, block bitmap, - * block), there is no danger of needing a tree for the first block. - */ - if (attr && S_ISDIR(attr->la_mode)) { - OSD_DECLARE_OP(oh, insert); - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_WRITE_BASE]; - } + OSD_DECLARE_OP(oh, insert, + osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); + } + /* If this is directory, then we expect . and .. to be inserted as + * well. The one directory block always needs to be created for the + * directory, so we could use DTO_WRITE_BASE here (GDT, block bitmap, + * block), there is no danger of needing a tree for the first block. + */ + if (attr && S_ISDIR(attr->la_mode)) { + OSD_DECLARE_OP(oh, insert, + osd_dto_credits_noquota[DTO_WRITE_BASE]); + OSD_DECLARE_OP(oh, insert, 0); + } if (!attr) RETURN(0); @@ -2001,40 +2017,36 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, * Concurrency: must be locked */ static int osd_declare_object_destroy(const struct lu_env *env, - struct dt_object *dt, - struct thandle *th) + struct dt_object *dt, + struct thandle *th) { - struct osd_object *obj = osd_dt_obj(dt); - struct inode *inode = obj->oo_inode; - struct osd_thandle *oh; - int rc; - ENTRY; + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct osd_thandle *oh; + int rc; + ENTRY; - oh = container_of0(th, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle == NULL); - LASSERT(inode); + oh = container_of0(th, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle == NULL); + LASSERT(inode); - OSD_DECLARE_OP(oh, destroy); - OSD_DECLARE_OP(oh, delete); - oh->ot_credits += osd_dto_credits_noquota[DTO_OBJECT_DELETE]; - /* XXX: So far, only normal fid needs to be inserted into the OI, - * so only normal fid needs to be removed from the OI also. */ - if (fid_is_norm(lu_object_fid(&dt->do_lu))) { - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; - /* Recycle idle OI leaf may cause additional three OI blocks - * to be changed. */ - oh->ot_credits += 3; - } + OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_OBJECT_DELETE]); + /* XXX: So far, only normal fid needs to be inserted into the OI, + * so only normal fid needs to be removed from the OI also. + * Recycle idle OI leaf may cause additional three OI blocks + * to be changed. */ + OSD_DECLARE_OP(oh, destroy, fid_is_norm(lu_object_fid(&dt->do_lu)) ? + osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3 : 0); /* one less inode */ - rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, -1, oh, + rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, -1, oh, false, true, NULL, false); if (rc) RETURN(rc); /* data to be truncated */ - rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, 0, oh, true, - true, NULL, false); - RETURN(rc); + rc = osd_declare_inode_qid(env, inode->i_uid, inode->i_gid, 0, oh, + true, true, NULL, false); + RETURN(rc); } static int osd_object_destroy(const struct lu_env *env, @@ -2239,10 +2251,9 @@ static int osd_declare_object_ref_add(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, ref_add); - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; + OSD_DECLARE_OP(oh, ref_add, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); - return 0; + return 0; } /* @@ -2298,10 +2309,9 @@ static int osd_declare_object_ref_del(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, ref_del); - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; + OSD_DECLARE_OP(oh, ref_del, osd_dto_credits_noquota[DTO_ATTR_SET_BASE]); - return 0; + return 0; } /* @@ -2393,11 +2403,9 @@ static int osd_declare_xattr_set(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, xattr_set); - if (strcmp(name, XATTR_NAME_VERSION) == 0) - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; - else - oh->ot_credits += osd_dto_credits_noquota[DTO_XATTR_SET]; + OSD_DECLARE_OP(oh, xattr_set, strcmp(name, XATTR_NAME_VERSION) == 0 ? + osd_dto_credits_noquota[DTO_ATTR_SET_BASE] : + osd_dto_credits_noquota[DTO_XATTR_SET]); return 0; } @@ -2479,10 +2487,9 @@ static int osd_declare_xattr_del(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, xattr_set); - oh->ot_credits += osd_dto_credits_noquota[DTO_XATTR_SET]; + OSD_DECLARE_OP(oh, xattr_set, osd_dto_credits_noquota[DTO_XATTR_SET]); - return 0; + return 0; } /* @@ -2853,10 +2860,9 @@ static int osd_index_declare_iam_delete(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, delete); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; + OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_INDEX_DELETE]); - return 0; + return 0; } /** @@ -2916,23 +2922,22 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt, } static int osd_index_declare_ea_delete(const struct lu_env *env, - struct dt_object *dt, - const struct dt_key *key, - struct thandle *handle) + struct dt_object *dt, + const struct dt_key *key, + struct thandle *handle) { - struct osd_thandle *oh; + struct osd_thandle *oh; struct inode *inode; int rc; ENTRY; - LASSERT(dt_object_exists(dt)); - LASSERT(handle != NULL); + LASSERT(dt_object_exists(dt)); + LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle == NULL); + oh = container_of0(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, delete); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; + OSD_DECLARE_OP(oh, delete, osd_dto_credits_noquota[DTO_INDEX_DELETE]); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); @@ -3107,10 +3112,9 @@ static int osd_index_declare_iam_insert(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + OSD_DECLARE_OP(oh, insert, osd_dto_credits_noquota[DTO_INDEX_INSERT]); - return 0; + return 0; } /** @@ -3523,24 +3527,23 @@ static inline void osd_object_put(const struct lu_env *env, } static int osd_index_declare_ea_insert(const struct lu_env *env, - struct dt_object *dt, - const struct dt_rec *rec, - const struct dt_key *key, - struct thandle *handle) + struct dt_object *dt, + const struct dt_rec *rec, + const struct dt_key *key, + struct thandle *handle) { - struct osd_thandle *oh; + struct osd_thandle *oh; struct inode *inode; int rc; ENTRY; - LASSERT(dt_object_exists(dt)); - LASSERT(handle != NULL); + LASSERT(dt_object_exists(dt)); + LASSERT(handle != NULL); - oh = container_of0(handle, struct osd_thandle, ot_super); - LASSERT(oh->ot_handle == NULL); + oh = container_of0(handle, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + OSD_DECLARE_OP(oh, insert, osd_dto_credits_noquota[DTO_INDEX_INSERT]); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index c806e7f..9fda286 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -304,26 +304,28 @@ struct osd_device { #define OSD_TRACK_DECLARES #ifdef OSD_TRACK_DECLARES -#define OSD_DECLARE_OP(oh, op) { \ - LASSERT(oh->ot_handle == NULL); \ - ((oh)->ot_declare_ ##op)++; } -#define OSD_EXEC_OP(handle,op) { \ - struct osd_thandle *oh; \ - oh = container_of0(handle, struct osd_thandle, ot_super);\ - if (((oh)->ot_declare_ ##op) > 0) { \ - ((oh)->ot_declare_ ##op)--; \ - } \ - } +#define OSD_DECLARE_OP(oh, op, credits) \ +do { \ + LASSERT((oh)->ot_handle == NULL); \ + ((oh)->ot_declare_ ##op)++; \ + ((oh)->ot_declare_ ##op ##_cred) += (credits); \ + (oh)->ot_credits += (credits); \ +} while (0) +#define OSD_EXEC_OP(handle, op) \ +do { \ + struct osd_thandle *oh = container_of(handle, typeof(*oh), ot_super); \ + LASSERT((oh)->ot_declare_ ##op > 0); \ + ((oh)->ot_declare_ ##op)--; \ +} while (0) #else -#define OSD_DECLARE_OP(oh, op) +#define OSD_DECLARE_OP(oh, op, credits) (oh)->ot_credits += (credits) #define OSD_EXEC_OP(oh, op) #endif /* There are at most 10 uid/gids are affected in a transaction, and * that's rename case: * - 2 for source parent uid & gid; - * - 2 for source child uid & gid ('..' entry update when the child - * is directory); + * - 2 for source child uid & gid ('..' entry update when child is directory); * - 2 for target parent uid & gid; * - 2 for target child uid & gid (if the target child exists); * - 2 for root uid & gid (last_rcvd, llog, etc); @@ -347,16 +349,32 @@ struct osd_thandle { struct lquota_trans *ot_quota_trans; #ifdef OSD_TRACK_DECLARES - unsigned char ot_declare_attr_set; - unsigned char ot_declare_punch; - unsigned char ot_declare_xattr_set; - unsigned char ot_declare_create; - unsigned char ot_declare_destroy; - unsigned char ot_declare_ref_add; - unsigned char ot_declare_ref_del; - unsigned char ot_declare_write; - unsigned char ot_declare_insert; - unsigned char ot_declare_delete; + /* Tracking for transaction credits, to allow debugging and optimizing + * cases where a large number of credits are being allocated for + * single transaction. */ + unsigned char ot_declare_attr_set; + unsigned char ot_declare_punch; + unsigned char ot_declare_xattr_set; + unsigned char ot_declare_create; + unsigned char ot_declare_destroy; + unsigned char ot_declare_ref_add; + unsigned char ot_declare_ref_del; + unsigned char ot_declare_write; + unsigned char ot_declare_insert; + unsigned char ot_declare_delete; + unsigned char ot_declare_quota; + + unsigned short ot_declare_attr_set_cred; + unsigned short ot_declare_punch_cred; + unsigned short ot_declare_xattr_set_cred; + unsigned short ot_declare_create_cred; + unsigned short ot_declare_destroy_cred; + unsigned short ot_declare_ref_add_cred; + unsigned short ot_declare_ref_del_cred; + unsigned short ot_declare_write_cred; + unsigned short ot_declare_insert_cred; + unsigned short ot_declare_delete_cred; + unsigned short ot_declare_quota_cred; #endif #if OSD_THANDLE_STATS diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 5626f5f..2630fe8 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1013,8 +1013,7 @@ static ssize_t osd_declare_write(const struct lu_env *env, struct dt_object *dt, else credits = osd_dto_credits_noquota[DTO_WRITE_BLOCK]; - OSD_DECLARE_OP(oh, write); - oh->ot_credits += credits; + OSD_DECLARE_OP(oh, write, credits); inode = osd_dt_obj(dt)->oo_inode; @@ -1174,8 +1173,6 @@ static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt, LASSERT(th); oh = container_of(th, struct osd_thandle, ot_super); - OSD_DECLARE_OP(oh, punch); - /* * we don't need to reserve credits for whole truncate * it's not possible as truncate may need to free too many @@ -1184,8 +1181,8 @@ static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt, * orphan list. if needed truncate will extend or restart * transaction */ - oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; - oh->ot_credits += 3; + OSD_DECLARE_OP(oh, punch, + osd_dto_credits_noquota[DTO_ATTR_SET_BASE] + 3); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); diff --git a/lustre/osd-ldiskfs/osd_quota.c b/lustre/osd-ldiskfs/osd_quota.c index 414284e..2bf0b5d 100644 --- a/lustre/osd-ldiskfs/osd_quota.c +++ b/lustre/osd-ldiskfs/osd_quota.c @@ -511,8 +511,9 @@ int osd_declare_qid(const struct lu_env *env, struct osd_thandle *oh, RETURN(-EOVERFLOW); } - oh->ot_credits += (allocated || qi->lqi_id.qid_uid == 0) ? - 1 : LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(dev)); + OSD_DECLARE_OP(oh, quota, + (allocated || qi->lqi_id.qid_uid == 0) ? + 1 : LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(dev))); oh->ot_id_array[i] = qi->lqi_id.qid_uid; osd_qid_set_type(oh, i, qi->lqi_type);