From 92d28a7f35bcdc718e2bdcec48bcdeacd0e8f4fe Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Thu, 2 Oct 2014 14:10:44 +0400 Subject: [PATCH] LU-5794 osd: additional checks to verify credits calculation every operation counts credits consumed, so we can check when credits are exceeded. now the number of declarations doesn't matter for the checks, it's only credits. i.e. it's OK to declare a single write for a specific range and then make many writes to that range within a single transaction. the patch fixes few issues on osd-zfs making it impossible to declare change to an object being created in a same transaction. Signed-off-by: Alex Zhuravlev Change-Id: I714ccae7ce957e639938df3bed0c4e8fb49d3454 Reviewed-on: http://review.whamcloud.com/12169 Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Fan Yong Reviewed-by: Oleg Drokin --- lustre/obdclass/local_storage.c | 21 +++- lustre/osd-ldiskfs/osd_handler.c | 196 ++++++++++++++++++++++---------------- lustre/osd-ldiskfs/osd_internal.h | 178 +++++++++++++++++++++------------- lustre/osd-ldiskfs/osd_io.c | 58 +++++++---- lustre/osd-zfs/osd_index.c | 24 ++--- 5 files changed, 292 insertions(+), 185 deletions(-) diff --git a/lustre/obdclass/local_storage.c b/lustre/obdclass/local_storage.c index d183adf..463da93 100644 --- a/lustre/obdclass/local_storage.c +++ b/lustre/obdclass/local_storage.c @@ -345,6 +345,25 @@ static struct dt_object *__local_file_create(const struct lu_env *env, if (rc) GOTO(trans_stop, rc); + if (dti->dti_dof.dof_type == DFT_DIR) { + if (!dt_try_as_dir(env, dto)) + GOTO(trans_stop, rc = -ENOTDIR); + + rc = dt_declare_insert(env, dto, (const struct dt_rec *)rec, + (const struct dt_key *)".", th); + if (rc != 0) + GOTO(trans_stop, rc); + + rc = dt_declare_insert(env, dto, (const struct dt_rec *)rec, + (const struct dt_key *)"..", th); + if (rc != 0) + GOTO(trans_stop, rc); + + rc = dt_declare_ref_add(env, dto, th); + if (rc != 0) + GOTO(trans_stop, rc); + } + rc = dt_trans_start_local(env, ls->ls_osd, th); if (rc) GOTO(trans_stop, rc); @@ -361,8 +380,6 @@ static struct dt_object *__local_file_create(const struct lu_env *env, LASSERT(dt_object_exists(dto)); if (dti->dti_dof.dof_type == DFT_DIR) { - if (!dt_try_as_dir(env, dto)) - GOTO(destroy, rc = -ENOTDIR); rec->rec_type = S_IFDIR; rec->rec_fid = fid; diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index c44c8c9..9ce5774 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -876,16 +876,63 @@ static struct thandle *osd_trans_create(const struct lu_env *env, osd_th_alloced(oh); memset(oti->oti_declare_ops, 0, - sizeof(oti->oti_declare_ops)); - memset(oti->oti_declare_ops_rb, 0, - sizeof(oti->oti_declare_ops_rb)); + sizeof(oti->oti_declare_ops)); memset(oti->oti_declare_ops_cred, 0, - sizeof(oti->oti_declare_ops_cred)); - oti->oti_rollback = false; + sizeof(oti->oti_declare_ops_cred)); + memset(oti->oti_declare_ops_used, 0, + sizeof(oti->oti_declare_ops_used)); } RETURN(th); } +void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th) +{ + struct osd_thread_info *oti = osd_oti_get(env); + struct osd_thandle *oh; + + oh = container_of0(th, struct osd_thandle, ot_super); + LASSERT(oh != NULL); + + CWARN(" create: %u/%u/%u, destroy: %u/%u/%u\n", + oti->oti_declare_ops[OSD_OT_CREATE], + oti->oti_declare_ops_cred[OSD_OT_CREATE], + oti->oti_declare_ops_used[OSD_OT_CREATE], + oti->oti_declare_ops[OSD_OT_DESTROY], + oti->oti_declare_ops_cred[OSD_OT_DESTROY], + oti->oti_declare_ops_used[OSD_OT_DESTROY]); + CWARN(" attr_set: %u/%u/%u, xattr_set: %u/%u/%u\n", + oti->oti_declare_ops[OSD_OT_ATTR_SET], + oti->oti_declare_ops_cred[OSD_OT_ATTR_SET], + oti->oti_declare_ops_used[OSD_OT_ATTR_SET], + oti->oti_declare_ops[OSD_OT_XATTR_SET], + oti->oti_declare_ops_cred[OSD_OT_XATTR_SET], + oti->oti_declare_ops_used[OSD_OT_XATTR_SET]); + CWARN(" write: %u/%u/%u, punch: %u/%u/%u, quota %u/%u/%u\n", + oti->oti_declare_ops[OSD_OT_WRITE], + oti->oti_declare_ops_cred[OSD_OT_WRITE], + oti->oti_declare_ops_used[OSD_OT_WRITE], + oti->oti_declare_ops[OSD_OT_PUNCH], + oti->oti_declare_ops_cred[OSD_OT_PUNCH], + oti->oti_declare_ops_used[OSD_OT_PUNCH], + oti->oti_declare_ops[OSD_OT_QUOTA], + oti->oti_declare_ops_cred[OSD_OT_QUOTA], + oti->oti_declare_ops_used[OSD_OT_QUOTA]); + CWARN(" insert: %u/%u/%u, delete: %u/%u/%u\n", + oti->oti_declare_ops[OSD_OT_INSERT], + oti->oti_declare_ops_cred[OSD_OT_INSERT], + oti->oti_declare_ops_used[OSD_OT_INSERT], + oti->oti_declare_ops[OSD_OT_DELETE], + oti->oti_declare_ops_cred[OSD_OT_DELETE], + oti->oti_declare_ops_used[OSD_OT_DELETE]); + CWARN(" ref_add: %u/%u/%u, ref_del: %u/%u/%u\n", + oti->oti_declare_ops[OSD_OT_REF_ADD], + oti->oti_declare_ops_cred[OSD_OT_REF_ADD], + oti->oti_declare_ops_used[OSD_OT_REF_ADD], + oti->oti_declare_ops[OSD_OT_REF_DEL], + oti->oti_declare_ops_cred[OSD_OT_REF_DEL], + oti->oti_declare_ops_used[OSD_OT_REF_DEL]); +} + /* * Concurrency: shouldn't matter. */ @@ -918,33 +965,8 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, oh->ot_credits, osd_journal(dev)->j_max_transaction_buffers); - CWARN(" create: %u/%u, destroy: %u/%u\n", - oti->oti_declare_ops[OSD_OT_CREATE], - oti->oti_declare_ops_cred[OSD_OT_CREATE], - oti->oti_declare_ops[OSD_OT_DESTROY], - oti->oti_declare_ops_cred[OSD_OT_DESTROY]); - CWARN(" attr_set: %u/%u, xattr_set: %u/%u\n", - oti->oti_declare_ops[OSD_OT_ATTR_SET], - oti->oti_declare_ops_cred[OSD_OT_ATTR_SET], - oti->oti_declare_ops[OSD_OT_XATTR_SET], - oti->oti_declare_ops_cred[OSD_OT_XATTR_SET]); - CWARN(" write: %u/%u, punch: %u/%u, quota %u/%u\n", - oti->oti_declare_ops[OSD_OT_WRITE], - oti->oti_declare_ops_cred[OSD_OT_WRITE], - oti->oti_declare_ops[OSD_OT_PUNCH], - oti->oti_declare_ops_cred[OSD_OT_PUNCH], - oti->oti_declare_ops[OSD_OT_QUOTA], - oti->oti_declare_ops_cred[OSD_OT_QUOTA]); - CWARN(" insert: %u/%u, delete: %u/%u\n", - oti->oti_declare_ops[OSD_OT_INSERT], - oti->oti_declare_ops_cred[OSD_OT_INSERT], - oti->oti_declare_ops[OSD_OT_DELETE], - oti->oti_declare_ops_cred[OSD_OT_DELETE]); - CWARN(" ref_add: %u/%u, ref_del: %u/%u\n", - oti->oti_declare_ops[OSD_OT_REF_ADD], - oti->oti_declare_ops_cred[OSD_OT_REF_ADD], - oti->oti_declare_ops[OSD_OT_REF_DEL], - oti->oti_declare_ops_cred[OSD_OT_REF_DEL]); + + osd_trans_dump_creds(env, th); if (last_credits != oh->ot_credits && time_after(jiffies, last_printed + @@ -1833,6 +1855,9 @@ static int osd_attr_set(const struct lu_env *env, if (!rc) ll_dirty_inode(inode, I_DIRTY_DATASYNC); + + osd_trans_exec_check(env, handle, OSD_OT_ATTR_SET); + return rc; } @@ -2101,6 +2126,8 @@ static int __osd_object_create(struct osd_thread_info *info, int result; __u32 umask; + osd_trans_exec_op(info->oti_env, th, OSD_OT_CREATE); + /* we drop umask so that permissions we pass are not affected */ umask = current->fs->umask; current->fs->umask = 0; @@ -2123,6 +2150,8 @@ static int __osd_object_create(struct osd_thread_info *info, /* restore previous umask value */ current->fs->umask = umask; + osd_trans_exec_check(info->oti_env, th, OSD_OT_CREATE); + return result; } @@ -2138,14 +2167,19 @@ static int __osd_oi_insert(const struct lu_env *env, struct osd_object *obj, struct osd_inode_id *id = &info->oti_id; struct osd_device *osd = osd_obj2dev(obj); struct osd_thandle *oh; + int rc; LASSERT(obj->oo_inode != NULL); oh = container_of0(th, struct osd_thandle, ot_super); LASSERT(oh->ot_handle); + osd_trans_exec_op(env, th, OSD_OT_INSERT); osd_id_gen(id, obj->oo_inode->i_ino, obj->oo_inode->i_generation); - return osd_oi_insert(info, osd, fid, id, oh->ot_handle, OI_CHECK_FLD); + rc = osd_oi_insert(info, osd, fid, id, oh->ot_handle, OI_CHECK_FLD); + osd_trans_exec_check(env, th, OSD_OT_INSERT); + + return rc; } int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd, @@ -2200,17 +2234,6 @@ static int osd_declare_object_create(const struct lu_env *env, osd_trans_declare_op(env, oh, OSD_OT_INSERT, osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); - /* If this is directory, then we expect . and .. to be inserted as - * well. The one directory block always needs to be created for the - * directory, so we could use DTO_WRITE_BASE here (GDT, block bitmap, - * block), there is no danger of needing a tree for the first block. - */ - if (attr && S_ISDIR(attr->la_mode)) { - osd_trans_declare_op(env, oh, OSD_OT_INSERT, - osd_dto_credits_noquota[DTO_WRITE_BASE]); - osd_trans_declare_op(env, oh, OSD_OT_INSERT, 0); - } - if (!attr) RETURN(0); @@ -2246,18 +2269,15 @@ static int osd_object_create(const struct lu_env *env, struct dt_object *dt, * 'tune2fs -O quota' will take care of creating them */ RETURN(-EPERM); - osd_trans_exec_op(env, th, OSD_OT_CREATE); - osd_trans_declare_rb(env, th, OSD_OT_REF_ADD); - - result = __osd_object_create(info, obj, attr, hint, dof, th); - if (result == 0) - result = __osd_oi_insert(env, obj, fid, th); + result = __osd_object_create(info, obj, attr, hint, dof, th); + if (result == 0) + result = __osd_oi_insert(env, obj, fid, th); LASSERT(ergo(result == 0, - dt_object_exists(dt) && !dt_object_remote(dt))); + dt_object_exists(dt) && !dt_object_remote(dt))); - LASSERT(osd_invariant(obj)); - RETURN(result); + LASSERT(osd_invariant(obj)); + RETURN(result); } /** @@ -2337,13 +2357,14 @@ static int osd_object_destroy(const struct lu_env *env, result = osd_oi_delete(osd_oti_get(env), osd, fid, oh->ot_handle, OI_CHECK_FLD); - /* XXX: add to ext3 orphan list */ - /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */ + osd_trans_exec_check(env, th, OSD_OT_DESTROY); + /* XXX: add to ext3 orphan list */ + /* rc = ext3_orphan_add(handle_t *handle, struct inode *inode) */ - /* not needed in the cache anymore */ - set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags); + /* not needed in the cache anymore */ + set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags); - RETURN(0); + RETURN(0); } /** @@ -2583,9 +2604,6 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, * 'tune2fs -O quota' will take care of creating them */ RETURN(-EPERM); - osd_trans_exec_op(env, th, OSD_OT_CREATE); - osd_trans_declare_rb(env, th, OSD_OT_REF_ADD); - result = __osd_object_create(info, obj, attr, hint, dof, th); if (result == 0) { if (fid_is_idif(fid) && @@ -2683,6 +2701,8 @@ static int osd_object_ref_add(const struct lu_env *env, ll_dirty_inode(inode, I_DIRTY_DATASYNC); LINVRNT(osd_invariant(obj)); + osd_trans_exec_check(env, th, OSD_OT_REF_ADD); + return rc; } @@ -2750,6 +2770,8 @@ static int osd_object_ref_del(const struct lu_env *env, struct dt_object *dt, ll_dirty_inode(inode, I_DIRTY_DATASYNC); LINVRNT(osd_invariant(obj)); + osd_trans_exec_check(env, th, OSD_OT_REF_DEL); + return 0; } @@ -2836,7 +2858,7 @@ static int osd_declare_xattr_set(const struct lu_env *env, * xattr set may involve inode quota change, reserve credits for * dquot_initialize() */ - oh->ot_credits += LDISKFS_MAXQUOTAS_INIT_BLOCKS(sb); + credits += LDISKFS_MAXQUOTAS_INIT_BLOCKS(sb); } osd_trans_declare_op(env, oh, OSD_OT_XATTR_SET, credits); @@ -2873,6 +2895,7 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, struct inode *inode = obj->oo_inode; struct osd_thread_info *info = osd_oti_get(env); int fs_flags = 0; + int rc; ENTRY; LASSERT(handle != NULL); @@ -2916,8 +2939,11 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt, strcmp(name, XATTR_NAME_LINK) == 0) return -ENOSPC; - return __osd_xattr_set(info, inode, name, buf->lb_buf, buf->lb_len, + rc = __osd_xattr_set(info, inode, name, buf->lb_buf, buf->lb_len, fs_flags); + osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET); + + return rc; } /* @@ -2993,6 +3019,7 @@ static int osd_xattr_del(const struct lu_env *env, struct dt_object *dt, dentry->d_inode = inode; dentry->d_sb = inode->i_sb; rc = inode->i_op->removexattr(dentry, name); + osd_trans_exec_check(env, handle, OSD_OT_XATTR_SET); return rc; } @@ -3292,6 +3319,7 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt, rc = iam_delete(oh->ot_handle, bag, (const struct iam_key *)key, ipd); osd_ipd_put(env, bag, ipd); LINVRNT(osd_invariant(obj)); + osd_trans_exec_check(env, handle, OSD_OT_DELETE); RETURN(rc); } @@ -3311,8 +3339,10 @@ static int osd_index_declare_ea_delete(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); + /* due to DNE we may need to remove an agent inode */ osd_trans_declare_op(env, oh, OSD_OT_DELETE, - osd_dto_credits_noquota[DTO_INDEX_DELETE]); + osd_dto_credits_noquota[DTO_INDEX_DELETE] + + osd_dto_credits_noquota[DTO_OBJECT_DELETE]); inode = osd_dt_obj(dt)->oo_inode; LASSERT(inode); @@ -3511,6 +3541,7 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt, out: LASSERT(osd_invariant(obj)); + osd_trans_exec_check(env, handle, OSD_OT_DELETE); RETURN(rc); } @@ -3663,6 +3694,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt, iam_rec, ipd); osd_ipd_put(env, bag, ipd); LINVRNT(osd_invariant(obj)); + osd_trans_exec_check(env, th, OSD_OT_INSERT); RETURN(rc); } @@ -4189,7 +4221,7 @@ static int osd_index_declare_ea_insert(const struct lu_env *env, struct osd_thandle *oh; struct osd_device *osd = osd_dev(dt->do_lu.lo_dev); struct lu_fid *fid = (struct lu_fid *)rec; - int rc; + int credits, rc = 0; ENTRY; LASSERT(!dt_object_remote(dt)); @@ -4198,8 +4230,21 @@ static int osd_index_declare_ea_insert(const struct lu_env *env, oh = container_of0(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle == NULL); - osd_trans_declare_op(env, oh, OSD_OT_INSERT, - osd_dto_credits_noquota[DTO_INDEX_INSERT]); + credits = osd_dto_credits_noquota[DTO_INDEX_INSERT]; + if (fid != NULL) { + rc = osd_remote_fid(env, osd, fid); + if (unlikely(rc < 0)) + RETURN(rc); + if (rc > 0) { + /* a reference to remote inode is represented by an + * agent inode which we have to create */ + credits += osd_dto_credits_noquota[DTO_OBJECT_CREATE]; + credits += osd_dto_credits_noquota[DTO_INDEX_INSERT]; + } + rc = 0; + } + + osd_trans_declare_op(env, oh, OSD_OT_INSERT, credits); if (osd_dt_obj(dt)->oo_inode != NULL) { struct inode *inode = osd_dt_obj(dt)->oo_inode; @@ -4212,22 +4257,6 @@ static int osd_index_declare_ea_insert(const struct lu_env *env, osd_dt_obj(dt), true, NULL, false); } - if (fid == NULL) - RETURN(0); - - rc = osd_remote_fid(env, osd, fid); - if (rc <= 0) - RETURN(rc); - - rc = 0; - - osd_trans_declare_op(env, oh, OSD_OT_CREATE, - osd_dto_credits_noquota[DTO_OBJECT_CREATE]); - osd_trans_declare_op(env, oh, OSD_OT_INSERT, - osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); - osd_trans_declare_op(env, oh, OSD_OT_INSERT, - osd_dto_credits_noquota[DTO_INDEX_INSERT] + 1); - RETURN(rc); } @@ -4323,6 +4352,7 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt, if (child != NULL) osd_object_put(env, child); LASSERT(osd_invariant(obj)); + osd_trans_exec_check(env, th, OSD_OT_INSERT); RETURN(rc); } diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 0caaf0d..a9f7319 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -594,10 +594,10 @@ struct osd_thread_info { /* Tracking for transaction credits, to allow debugging and optimizing * cases where a large number of credits are being allocated for * single transaction. */ + unsigned int oti_credits_before; unsigned short oti_declare_ops[OSD_OT_MAX]; - unsigned short oti_declare_ops_rb[OSD_OT_MAX]; unsigned short oti_declare_ops_cred[OSD_OT_MAX]; - bool oti_rollback; + unsigned short oti_declare_ops_used[OSD_OT_MAX]; }; extern int ldiskfs_pdo; @@ -749,6 +749,31 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) } #endif +#ifdef LDISKFS_HT_MISC +# define osd_journal_start_sb(sb, type, nblock) \ + ldiskfs_journal_start_sb(sb, type, nblock) +# define osd_ldiskfs_append(handle, inode, nblock, err) \ + ldiskfs_append(handle, inode, nblock) +# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \ + __ldiskfs_find_entry(dir, name, de, inlined, lock) +# define osd_journal_start(inode, type, nblocks) \ + ldiskfs_journal_start(inode, type, nblocks) +# define osd_transaction_size(dev) \ + (osd_journal(dev)->j_max_transaction_buffers / 2) +#else +# define LDISKFS_HT_MISC 0 +# define osd_journal_start_sb(sb, type, nblock) \ + ldiskfs_journal_start_sb(sb, nblock) +# define osd_ldiskfs_append(handle, inode, nblock, err) \ + ldiskfs_append(handle, inode, nblock, err) +# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \ + __ldiskfs_find_entry(dir, name, de, lock) +# define osd_journal_start(inode, type, nblocks) \ + ldiskfs_journal_start(inode, nblocks) +# define osd_transaction_size(dev) \ + (osd_journal(dev)->j_max_transaction_buffers) +#endif + /* * Invariants, assertions. */ @@ -924,6 +949,7 @@ struct dentry *osd_child_dentry_by_inode(const struct lu_env *env, extern int osd_trans_declare_op2rb[]; extern int ldiskfs_track_declares_assert; +void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th); static inline void osd_trans_declare_op(const struct lu_env *env, struct osd_thandle *oh, @@ -953,7 +979,7 @@ static inline void osd_trans_exec_op(const struct lu_env *env, struct osd_thread_info *oti = osd_oti_get(env); struct osd_thandle *oh = container_of(th, struct osd_thandle, ot_super); - unsigned int rb; + unsigned int rb, left; LASSERT(oh->ot_handle != NULL); if (unlikely(op >= OSD_OT_MAX)) { @@ -967,58 +993,99 @@ static inline void osd_trans_exec_op(const struct lu_env *env, } } - if (likely(!oti->oti_rollback && oti->oti_declare_ops[op] > 0)) { - oti->oti_declare_ops[op]--; - oti->oti_declare_ops_rb[op]++; - } else { - /* all future updates are considered rollback */ - oti->oti_rollback = true; - rb = osd_trans_declare_op2rb[op]; - if (unlikely(rb >= OSD_OT_MAX)) { - if (unlikely(ldiskfs_track_declares_assert)) - LASSERTF(rb < OSD_OT_MAX, "rb = %u\n", rb); - else { - CWARN("%s: Invalid rollback index %d\n", - osd_name(osd_dt_dev(th->th_dev)), rb); - libcfs_debug_dumpstack(NULL); - return; - } - } - if (unlikely(oti->oti_declare_ops_rb[rb] == 0)) { + /* find rollback (or reverse) operation for the given one + * such an operation doesn't require additional credits + * as the same set of blocks are modified */ + rb = osd_trans_declare_op2rb[op]; + + /* check whether credits for this operation were reserved at all */ + if (unlikely(oti->oti_declare_ops_cred[op] == 0 && + oti->oti_declare_ops_cred[rb] == 0)) { + /* the API is not perfect yet: CREATE does REF_ADD internally + * while DESTROY does not. To rollback CREATE the callers + * needs to call REF_DEL+DESTROY which is hard to detect using + * a simple table of rollback operations */ + if (op == OSD_OT_REF_DEL && + oti->oti_declare_ops_cred[OSD_OT_CREATE] > 0) + goto proceed; + if (op == OSD_OT_REF_ADD && + oti->oti_declare_ops_cred[OSD_OT_DESTROY] > 0) + goto proceed; + osd_trans_dump_creds(env, th); + CERROR("%s: op = %d, rb = %d\n", + osd_name(osd_dt_dev(oh->ot_super.th_dev)), op, rb); + if (unlikely(ldiskfs_track_declares_assert)) + LBUG(); + } + +proceed: + /* remember how many credits we have unused before the operation */ + oti->oti_credits_before = oh->ot_handle->h_buffer_credits; + left = oti->oti_declare_ops_cred[op] - oti->oti_declare_ops_used[op]; + if (unlikely(oti->oti_credits_before < left)) { + osd_trans_dump_creds(env, th); + CERROR("%s: op = %d, rb = %d\n", + osd_name(osd_dt_dev(oh->ot_super.th_dev)), op, rb); + /* on a very small fs (testing?) it's possible that + * the transaction can't fit 1/4 of journal, so we + * just request less credits (see osd_trans_start()). + * ignore the same case here */ + rb = osd_transaction_size(osd_dt_dev(th->th_dev)); + if (unlikely(oh->ot_credits < rb)) { if (unlikely(ldiskfs_track_declares_assert)) - LASSERTF(oti->oti_declare_ops_rb[rb] > 0, - "rb = %u\n", rb); - else { - CWARN("%s: Overflow in tracking declares for " - "index, rb = %d\n", - osd_name(osd_dt_dev(th->th_dev)), rb); - libcfs_debug_dumpstack(NULL); - return; - } + LBUG(); } - oti->oti_declare_ops_rb[rb]--; } } -static inline void osd_trans_declare_rb(const struct lu_env *env, - struct thandle *th, unsigned int op) +static inline void osd_trans_exec_check(const struct lu_env *env, + struct thandle *th, + unsigned int op) { struct osd_thread_info *oti = osd_oti_get(env); struct osd_thandle *oh = container_of(th, struct osd_thandle, ot_super); + int used, over, quota; - LASSERT(oh->ot_handle != NULL); - if (unlikely(op >= OSD_OT_MAX)) { - if (unlikely(ldiskfs_track_declares_assert)) - LASSERT(op < OSD_OT_MAX); - else { - CWARN("%s: Invalid operation index %d\n", - osd_name(osd_dt_dev(th->th_dev)), op); - libcfs_debug_dumpstack(NULL); - } + /* how many credits have been used by the operation */ + used = oti->oti_credits_before - oh->ot_handle->h_buffer_credits; + if (unlikely(used < 0)) { + /* if some block was allocated and released in the same + * transaction, then it won't be a part of the transaction + * and delta can be negative */ + return; + } + + if (used == 0) { + /* rollback operations (e.g. when we destroy just created + * object) should not consume any credits. there is no point + * to confuse the checks below */ + return; + } + + oti->oti_declare_ops_used[op] += used; + if (oti->oti_declare_ops_used[op] <= oti->oti_declare_ops_cred[op]) + return; + + /* we account quota for a whole transaction and any operation can + * consume corresponding credits */ + over = oti->oti_declare_ops_used[op] - + oti->oti_declare_ops_cred[op]; + quota = oti->oti_declare_ops_cred[OSD_OT_QUOTA] - + oti->oti_declare_ops_used[OSD_OT_QUOTA]; + if (over <= quota) { + /* probably that credits were consumed by + * quota indirectly (in the depths of ldiskfs) */ + oti->oti_declare_ops_used[OSD_OT_QUOTA] += over; + oti->oti_declare_ops_used[op] -= over; } else { - oti->oti_declare_ops_rb[op]++; + CWARN("op %d: used %u, used now %u, reserved %u\n", + op, oti->oti_declare_ops_used[op], used, + oti->oti_declare_ops_cred[op]); + osd_trans_dump_creds(env, th); + if (unlikely(ldiskfs_track_declares_assert)) + LBUG(); } } @@ -1088,31 +1155,6 @@ static inline unsigned long osd_remote_parent_ino(struct osd_device *dev) return dev->od_mdt_map->omm_remote_parent->d_inode->i_ino; } -#ifdef LDISKFS_HT_MISC -# define osd_journal_start_sb(sb, type, nblock) \ - ldiskfs_journal_start_sb(sb, type, nblock) -# define osd_ldiskfs_append(handle, inode, nblock, err) \ - ldiskfs_append(handle, inode, nblock) -# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \ - __ldiskfs_find_entry(dir, name, de, inlined, lock) -# define osd_journal_start(inode, type, nblocks) \ - ldiskfs_journal_start(inode, type, nblocks) -# define osd_transaction_size(dev) \ - (osd_journal(dev)->j_max_transaction_buffers / 2) -#else -# define LDISKFS_HT_MISC 0 -# define osd_journal_start_sb(sb, type, nblock) \ - ldiskfs_journal_start_sb(sb, nblock) -# define osd_ldiskfs_append(handle, inode, nblock, err) \ - ldiskfs_append(handle, inode, nblock, err) -# define osd_ldiskfs_find_entry(dir, name, de, inlined, lock) \ - __ldiskfs_find_entry(dir, name, de, lock) -# define osd_journal_start(inode, type, nblocks) \ - ldiskfs_journal_start(inode, nblocks) -# define osd_transaction_size(dev) \ - (osd_journal(dev)->j_max_transaction_buffers) -#endif - void ldiskfs_inc_count(handle_t *handle, struct inode *inode); void ldiskfs_dec_count(handle_t *handle, struct inode *inode); diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 3657495..065f2e4 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1082,6 +1082,7 @@ static int osd_declare_write_commit(const struct lu_env *env, int newblocks; int rc = 0; int flags = 0; + int credits = 0; bool ignore_quota = false; long long quota_space = 0; ENTRY; @@ -1129,14 +1130,14 @@ static int osd_declare_write_commit(const struct lu_env *env, depth = ext_depth(inode); depth = max(depth, 1) + 1; newblocks += depth; - oh->ot_credits++; /* inode */ - oh->ot_credits += depth * 2 * extents; - } else { - depth = 3; - newblocks += depth; - oh->ot_credits++; /* inode */ - oh->ot_credits += depth * extents; - } + credits++; /* inode */ + credits += depth * 2 * extents; + } else { + depth = 3; + newblocks += depth; + credits++; /* inode */ + credits += depth * extents; + } /* quota space for metadata blocks */ quota_space += depth * extents * LDISKFS_BLOCK_SIZE(osd_sb(osd)); @@ -1148,15 +1149,17 @@ static int osd_declare_write_commit(const struct lu_env *env, /* we can't dirty more bitmap blocks than exist */ if (newblocks > LDISKFS_SB(osd_sb(osd))->s_groups_count) - oh->ot_credits += LDISKFS_SB(osd_sb(osd))->s_groups_count; + credits += LDISKFS_SB(osd_sb(osd))->s_groups_count; else - oh->ot_credits += newblocks; + credits += newblocks; - /* we can't dirty more gd blocks than exist */ - if (newblocks > LDISKFS_SB(osd_sb(osd))->s_gdb_count) - oh->ot_credits += LDISKFS_SB(osd_sb(osd))->s_gdb_count; - else - oh->ot_credits += newblocks; + /* we can't dirty more gd blocks than exist */ + if (newblocks > LDISKFS_SB(osd_sb(osd))->s_gdb_count) + credits += LDISKFS_SB(osd_sb(osd))->s_gdb_count; + else + credits += newblocks; + + osd_trans_declare_op(env, oh, OSD_OT_WRITE, credits); /* make sure the over quota flags were not set */ lnb[0].lnb_flags &= ~(OBD_BRW_OVER_USRQUOTA | OBD_BRW_OVER_GRPQUOTA); @@ -1232,6 +1235,8 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, osd_iobuf_add_page(iobuf, lnb[i].lnb_page); } + osd_trans_exec_op(env, thandle, OSD_OT_WRITE); + if (OBD_FAIL_CHECK(OBD_FAIL_OST_MAPBLK_ENOSPC)) { rc = -ENOSPC; } else if (iobuf->dr_npages > 0) { @@ -1257,9 +1262,11 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, osd_fini_iobuf(osd, iobuf); } - if (unlikely(rc != 0)) { - /* if write fails, we should drop pages from the cache */ - for (i = 0; i < npages; i++) { + osd_trans_exec_check(env, thandle, OSD_OT_WRITE); + + if (unlikely(rc != 0)) { + /* if write fails, we should drop pages from the cache */ + for (i = 0; i < npages; i++) { if (lnb[i].lnb_page == NULL) continue; LASSERT(PageLocked(lnb[i].lnb_page)); @@ -1704,6 +1711,8 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, oh = container_of(handle, struct osd_thandle, ot_super); LASSERT(oh->ot_handle->h_transaction != NULL); + osd_trans_exec_op(env, handle, OSD_OT_WRITE); + /* Write small symlink to inode body as we need to maintain correct * on-disk symlinks for ldiskfs. * Note: the buf->lb_buf contains a NUL terminator while buf->lb_len @@ -1716,9 +1725,12 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, result = osd_ldiskfs_write_record(inode, buf->lb_buf, buf->lb_len, is_link, pos, oh->ot_handle); - if (result == 0) - result = buf->lb_len; - return result; + if (result == 0) + result = buf->lb_len; + + osd_trans_exec_check(env, handle, OSD_OT_WRITE); + + return result; } static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt, @@ -1796,6 +1808,10 @@ static int osd_punch(const struct lu_env *env, struct dt_object *dt, LASSERT(h != NULL); LASSERT(h == oh->ot_handle); + /* do not check credits with osd_trans_exec_check() as the truncate + * can restart the transaction internally and we restart the + * transaction in this case */ + if (tid != h->h_transaction->t_tid) { int credits = oh->ot_credits; /* diff --git a/lustre/osd-zfs/osd_index.c b/lustre/osd-zfs/osd_index.c index 261ac90..027b3c6 100644 --- a/lustre/osd-zfs/osd_index.c +++ b/lustre/osd-zfs/osd_index.c @@ -669,8 +669,9 @@ static int osd_declare_dir_delete(const struct lu_env *env, const struct dt_key *key, struct thandle *th) { - struct osd_object *obj = osd_dt_obj(dt); + struct osd_object *obj = osd_dt_obj(dt); struct osd_thandle *oh; + uint64_t dnode; ENTRY; LASSERT(dt_object_exists(dt)); @@ -679,10 +680,14 @@ static int osd_declare_dir_delete(const struct lu_env *env, LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); - LASSERT(obj->oo_db); - LASSERT(osd_object_is_zap(obj->oo_db)); - - dmu_tx_hold_zap(oh->ot_tx, obj->oo_db->db_object, TRUE, (char *)key); + if (dt_object_exists(dt)) { + LASSERT(obj->oo_db); + LASSERT(osd_object_is_zap(obj->oo_db)); + dnode = obj->oo_db->db_object; + } else { + dnode = DMU_NEW_OBJECT; + } + dmu_tx_hold_zap(oh->ot_tx, dnode, TRUE, (char *)key); RETURN(0); } @@ -1608,8 +1613,6 @@ int osd_index_try(const struct lu_env *env, struct dt_object *dt, struct osd_object *obj = osd_dt_obj(dt); ENTRY; - LASSERT(dt_object_exists(dt)); - /* * XXX: implement support for fixed-size keys sorted with natural * numerical way (not using internal hash value) @@ -1622,17 +1625,16 @@ int osd_index_try(const struct lu_env *env, struct dt_object *dt, RETURN(0); } - LASSERT(obj->oo_db != NULL); + LASSERT(!dt_object_exists(dt) || obj->oo_db != NULL); if (likely(feat == &dt_directory_features)) { - if (osd_object_is_zap(obj->oo_db)) + if (!dt_object_exists(dt) || osd_object_is_zap(obj->oo_db)) dt->do_index_ops = &osd_dir_ops; else RETURN(-ENOTDIR); } else if (unlikely(feat == &dt_acct_features)) { LASSERT(fid_is_acct(lu_object_fid(&dt->do_lu))); dt->do_index_ops = &osd_acct_index_ops; - } else if (osd_object_is_zap(obj->oo_db) && - dt->do_index_ops == NULL) { + } else if (dt->do_index_ops == NULL) { /* For index file, we don't support variable key & record sizes * and the key has to be unique */ if ((feat->dif_flags & ~DT_IND_UPDATE) != 0) -- 1.8.3.1