From: Niu Yawei Date: Thu, 2 Feb 2012 14:32:02 +0000 (-0800) Subject: LU-1051 osd: improve quota credits calculation X-Git-Tag: 2.1.56~28 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=83eeeb72c1081ec85b2b38440730143323e9209e LU-1051 osd: improve quota credits calculation Instead of reserving constant LDISKFS_QUOTA_INIT_BLOCKS for all kinds of transactions, this patch tracks how many/which uid/gids are affected in each declare function, then calculates the total quota credits according to the affected uid/gids in osd_trans_start(). This patch also includes following credits improvement: - For object creation, reserve less credits for the '.' and '..' insertion, and don't reserve for file creation; - Only reserve the credits for oi insertion for normal fids; - Limit the credits to j_max_transaction_buffers in the osd_trans_start(); Signed-off-by: Niu Yawei Change-Id: Ifbcc26b670c4965f22cc96ae7db16154927f32a4 Reviewed-on: http://review.whamcloud.com/2082 Tested-by: Hudson Reviewed-by: Johann Lombardi Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index dfc5b15..1098ae4 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -159,6 +159,20 @@ static const struct dt_index_operations osd_index_ea_ops; #define OSD_EXEC_OP(oh, op) #endif +/* There are at most 10 uid/gids are affected in a transaction, and + * that's rename case: + * - 2 for source parent uid & gid; + * - 2 for source child uid & gid ('..' entry update when the child + * is directory); + * - 2 for target parent uid & gid; + * - 2 for target child uid & gid (if the target child exists); + * - 2 for root uid & gid (last_rcvd, llog, etc); + * + * The 0 to (OSD_MAX_UGID_CNT - 1) bits of ot_id_type is for indicating + * the id type of each id in the ot_id_array. + */ +#define OSD_MAX_UGID_CNT 10 + struct osd_thandle { struct thandle ot_super; handle_t *ot_handle; @@ -166,7 +180,10 @@ struct osd_thandle { cfs_list_t ot_dcb_list; /* Link to the device, for debugging. */ struct lu_ref_link *ot_dev_link; - int ot_credits; + unsigned short ot_credits; + unsigned short ot_id_cnt; + unsigned short ot_id_type; + uid_t ot_id_array[OSD_MAX_UGID_CNT]; #ifdef OSD_TRACK_DECLARES unsigned char ot_declare_attr_set; @@ -345,6 +362,51 @@ static int osd_root_get(const struct lu_env *env, return 0; } +static inline int osd_qid_type(struct osd_thandle *oh, int i) +{ + return (oh->ot_id_type & (1 << i)) ? GRPQUOTA : USRQUOTA; +} + +static inline void osd_qid_set_type(struct osd_thandle *oh, int i, int type) +{ + oh->ot_id_type |= ((type == GRPQUOTA) ? (1 << i) : 0); +} + +static void osd_declare_qid(struct dt_object *dt, struct osd_thandle *oh, + int type, uid_t id, struct inode *inode) +{ +#ifdef CONFIG_QUOTA + int i, allocated = 0; + struct osd_object *obj; + + LASSERT(dt != NULL); + LASSERT(oh != NULL); + LASSERTF(oh->ot_id_cnt <= OSD_MAX_UGID_CNT, "count=%u", + oh->ot_id_cnt); + + /* id entry is allocated in the quota file */ + if (inode && inode->i_dquot[type] && inode->i_dquot[type]->dq_off) + allocated = 1; + + for (i = 0; i < oh->ot_id_cnt; i++) { + if (oh->ot_id_array[i] == id && osd_qid_type(oh, i) == type) + return; + } + + if (unlikely(i >= OSD_MAX_UGID_CNT)) { + CERROR("more than %d uid/gids for a transaction?\n", i); + return; + } + + oh->ot_id_array[i] = id; + osd_qid_set_type(oh, i, type); + oh->ot_id_cnt++; + obj = osd_dt_obj(dt); + oh->ot_credits += (allocated || id == 0) ? + 1 : LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(osd_obj2dev(obj))); +#endif +} + /* * OSD object methods. */ @@ -780,12 +842,17 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d, if (rc != 0) GOTO(out, rc); - oh->ot_credits += LDISKFS_QUOTA_INIT_BLOCKS(osd_sb(dev)); - if (!osd_param_is_sane(dev, th)) { CWARN("%s: too many transaction credits (%d > %d)\n", d->dd_lu_dev.ld_obd->obd_name, oh->ot_credits, osd_journal(dev)->j_max_transaction_buffers); + /* XXX Limit the credits to 'max_transaction_buffers', and + * let the underlying filesystem to catch the error if + * we really need so many credits. + * + * This should be removed when we can calculate the + * credits precisely. */ + oh->ot_credits = osd_journal(dev)->j_max_transaction_buffers; #ifdef OSD_TRACK_DECLARES CERROR(" attr_set: %d, punch: %d, xattr_set: %d,\n", oh->ot_declare_attr_set, oh->ot_declare_punch, @@ -793,8 +860,9 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d, CERROR(" create: %d, ref_add: %d, ref_del: %d, write: %d\n", oh->ot_declare_create, oh->ot_declare_ref_add, oh->ot_declare_ref_del, oh->ot_declare_write); - CERROR(" insert: %d, delete: %d\n", - oh->ot_declare_insert, oh->ot_declare_delete); + CERROR(" insert: %d, delete: %d, destroy: %d\n", + oh->ot_declare_insert, oh->ot_declare_delete, + oh->ot_declare_destroy); #endif } @@ -1164,73 +1232,6 @@ static const int osd_dto_credits_noquota[DTO_NR] = { [DTO_ATTR_SET_CHOWN]= 0 }; -/** - * Note: we count into QUOTA here. - * If we mount with --data_journal we may need more. - */ -static const int osd_dto_credits_quota[DTO_NR] = { - /** - * INDEX_EXTRA_TRANS_BLOCKS(8) + - * SINGLEDATA_TRANS_BLOCKS(8) + - * 2 * QUOTA_TRANS_BLOCKS(2) - */ - [DTO_INDEX_INSERT] = 20, - /** - * INDEX_EXTRA_TRANS_BLOCKS(8) + - * SINGLEDATA_TRANS_BLOCKS(8) + - * 2 * QUOTA_TRANS_BLOCKS(2) - */ - [DTO_INDEX_DELETE] = 20, - /** - * Unused now. - */ - [DTO_INDEX_UPDATE] = 16, - /* - * Create a object. Same as create object in EXT3 filesystem. - * DATA_TRANS_BLOCKS(16) + - * INDEX_EXTRA_BLOCKS(8) + - * 3(inode bits, groups, GDT) + - * 2 * QUOTA_INIT_BLOCKS(25) - */ - [DTO_OBJECT_CREATE] = 77, - /* - * Unused now. - * DATA_TRANS_BLOCKS(16) + - * INDEX_EXTRA_BLOCKS(8) + - * 3(inode bits, groups, GDT) + - * QUOTA(?) - */ - [DTO_OBJECT_DELETE] = 27, - /** - * Attr set credits. - * 3 (inode bit, group, GDT) + - */ - [DTO_ATTR_SET_BASE] = 3, - /** - * Xattr set. The same as xattr of EXT3. - * DATA_TRANS_BLOCKS(16) - * XXX Note: in original MDS implmentation INDEX_EXTRA_TRANS_BLOCKS are - * also counted in. Do not know why? - */ - [DTO_XATTR_SET] = 16, - [DTO_LOG_REC] = 16, - /** - * creadits for inode change during write. - */ - [DTO_WRITE_BASE] = 3, - /** - * credits for single block write. - */ - [DTO_WRITE_BLOCK] = 16, - /** - * Attr set credits for chown. - * It is added to already set setattr credits - * 2 * QUOTA_INIT_BLOCKS(25) + - * 2 * QUOTA_DEL_BLOCKS(9) - */ - [DTO_ATTR_SET_CHOWN]= 68, -}; - static const struct dt_device_operations osd_dt_ops = { .dt_root_get = osd_root_get, .dt_statfs = osd_statfs, @@ -1475,8 +1476,12 @@ static int osd_declare_attr_set(const struct lu_env *env, struct thandle *handle) { struct osd_thandle *oh; + struct osd_object *obj; + LASSERT(dt != NULL); LASSERT(handle != NULL); + + obj = osd_dt_obj(dt); LASSERT(osd_invariant(obj)); oh = container_of0(handle, struct osd_thandle, ot_super); @@ -1485,6 +1490,19 @@ static int osd_declare_attr_set(const struct lu_env *env, OSD_DECLARE_OP(oh, attr_set); oh->ot_credits += osd_dto_credits_noquota[DTO_ATTR_SET_BASE]; + if (attr && attr->la_valid & LA_UID) { + if (obj->oo_inode) + osd_declare_qid(dt, oh, USRQUOTA, obj->oo_inode->i_uid, + obj->oo_inode); + osd_declare_qid(dt, oh, USRQUOTA, attr->la_uid, NULL); + } + if (attr && attr->la_valid & LA_GID) { + if (obj->oo_inode) + osd_declare_qid(dt, oh, GRPQUOTA, obj->oo_inode->i_gid, + obj->oo_inode); + osd_declare_qid(dt, oh, GRPQUOTA, attr->la_gid, NULL); + } + return 0; } @@ -1906,16 +1924,29 @@ static int osd_declare_object_create(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - OSD_DECLARE_OP(oh, insert); OSD_DECLARE_OP(oh, create); oh->ot_credits += osd_dto_credits_noquota[DTO_OBJECT_CREATE]; - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + /* XXX: So far, only normal fid needs be inserted into the oi, + * things could be changed later. Revise following code then. */ + if (fid_is_norm(lu_object_fid(&dt->do_lu))) { + OSD_DECLARE_OP(oh, insert); + oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + } + /* If this is directory, then we expect . and .. to be inserted as + * well. The one directory block always needs to be created for the + * directory, so we could use DTO_WRITE_BASE here (GDT, block bitmap, + * block), there is no danger of needing a tree for the first block. + */ + if (attr && S_ISDIR(attr->la_mode)) { + OSD_DECLARE_OP(oh, insert); + OSD_DECLARE_OP(oh, insert); + oh->ot_credits += osd_dto_credits_noquota[DTO_WRITE_BASE]; + } - /* if this is directory, then we expect . and .. - * to be inserted as well */ - OSD_DECLARE_OP(oh, insert); - OSD_DECLARE_OP(oh, insert); - oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + if (attr) { + osd_declare_qid(dt, oh, USRQUOTA, attr->la_uid, NULL); + osd_declare_qid(dt, oh, GRPQUOTA, attr->la_gid, NULL); + } return 0; } @@ -1972,6 +2003,9 @@ static int osd_declare_object_destroy(const struct lu_env *env, oh->ot_credits += osd_dto_credits_noquota[DTO_OBJECT_DELETE]; oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; + osd_declare_qid(dt, oh, USRQUOTA, inode->i_uid, inode); + osd_declare_qid(dt, oh, GRPQUOTA, inode->i_gid, inode); + RETURN(0); } @@ -3027,6 +3061,13 @@ static ssize_t osd_declare_write(const struct lu_env *env, struct dt_object *dt, OSD_DECLARE_OP(oh, write); oh->ot_credits += osd_dto_credits_noquota[DTO_WRITE_BLOCK]; + if (osd_dt_obj(dt)->oo_inode == NULL) + return 0; + + osd_declare_qid(dt, oh, USRQUOTA, osd_dt_obj(dt)->oo_inode->i_uid, + osd_dt_obj(dt)->oo_inode); + osd_declare_qid(dt, oh, GRPQUOTA, osd_dt_obj(dt)->oo_inode->i_gid, + osd_dt_obj(dt)->oo_inode); return 0; } @@ -3168,6 +3209,12 @@ static int osd_index_declare_ea_delete(const struct lu_env *env, OSD_DECLARE_OP(oh, delete); oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE]; + LASSERT(osd_dt_obj(dt)->oo_inode); + osd_declare_qid(dt, oh, USRQUOTA, osd_dt_obj(dt)->oo_inode->i_uid, + osd_dt_obj(dt)->oo_inode); + osd_declare_qid(dt, oh, GRPQUOTA, osd_dt_obj(dt)->oo_inode->i_gid, + osd_dt_obj(dt)->oo_inode); + return 0; } @@ -3674,6 +3721,12 @@ static int osd_index_declare_ea_insert(const struct lu_env *env, OSD_DECLARE_OP(oh, insert); oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + LASSERT(osd_dt_obj(dt)->oo_inode); + osd_declare_qid(dt, oh, USRQUOTA, osd_dt_obj(dt)->oo_inode->i_uid, + osd_dt_obj(dt)->oo_inode); + osd_declare_qid(dt, oh, GRPQUOTA, osd_dt_obj(dt)->oo_inode->i_gid, + osd_dt_obj(dt)->oo_inode); + return 0; }