From 9f4c54a56a8f0d3b177a76c3786c54f7d243a690 Mon Sep 17 00:00:00 2001 From: yury Date: Tue, 31 Oct 2006 12:53:28 +0000 Subject: [PATCH] - fixed bug with possible concurrent split; - comments, cleanups. --- lustre/cmm/cmm_object.c | 54 ++++++++++++++++++++++++++++++++++--------- lustre/include/md_object.h | 3 ++- lustre/mdt/mdt_handler.c | 57 +++++++++++++++++++--------------------------- lustre/mdt/mdt_internal.h | 15 ++++++++++++ lustre/mdt/mdt_open.c | 4 ++++ lustre/mdt/mdt_reint.c | 3 +++ 6 files changed, 90 insertions(+), 46 deletions(-) diff --git a/lustre/cmm/cmm_object.c b/lustre/cmm/cmm_object.c index 2137e4f..90f0890 100644 --- a/lustre/cmm/cmm_object.c +++ b/lustre/cmm/cmm_object.c @@ -393,31 +393,63 @@ static int cml_create(const struct lu_env *env, ENTRY; #ifdef HAVE_SPLIT_SUPPORT - /* - * Try to split @mo_p. If split is ok, -ERESTART is returned and current - * thread will not peoceed with create. Instead it sends -ERESTART to - * client to let it know that correct MDT should be choosen. + + /* Lock mode always should be sane. */ + LASSERT(spec->sp_cr_mode != MDL_MINMODE); + + /* + * Sigh... This is long story. MDT may have race with detecting if split + * is possible in cmm. We know this race and let it live, because + * getting it rid (with some sem or spinlock) will also mean that + * PDIROPS for craate will not work, what is really bad for performance + * and makes no sense. So, we better allow the race but split dir only + * if some of concurrent threads takes EX lock. So that, say, two + * concurrent threads may have different lock modes on directory (CW and + * EX) and not first one which comes here should split dir, but only + * that which has EX lock. And we do not care that in this case, split + * will happen a bit later may be (when dir size will not be mandatory + * <= 64K, but may be larger). So that, we allow concurrent creates and + * protect split by EX lock. */ - rc = cmm_split_try(env, mo_p); - if (rc) { + if (spec->sp_cr_mode == MDL_EX) { + /* + * Try to split @mo_p. If split is ok, -ERESTART is returned and + * current thread will not peoceed with create. Instead it sends + * -ERESTART to client to let it know that correct MDT should be + * choosen. + */ + rc = cmm_split_try(env, mo_p); if (rc == -EALREADY) { /* - * Dir is split and we would like to check if name came - * to correct MDT. If not -ERESTART is returned by - * cmm_split_check() + * Dir is already split and we would like to check if + * name came to correct MDT. If not -ERESTART is + * returned by cmm_split_check() */ rc = cmm_split_check(env, mo_p, child_name); if (rc) RETURN(rc); - } else { + } else if (rc) { /* * -ERESTART or some split error is returned, we can't * proceed with create. */ RETURN(rc); } - } + /* + * Proceed with cmm_split_try() as nothign happened, split is + * not yet expected. + */ + } else { + /* + * Check for possible split directory and let caller know that + * it should tell client that directory is split and operation + * should repeat to correct MDT. + */ + rc = cmm_split_check(env, mo_p, child_name); + if (rc) + RETURN(rc); + } #endif rc = mdo_create(env, md_object_next(mo_p), child_name, diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index 183096f..c24074c 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -151,7 +151,8 @@ struct md_create_spec { } sp_ea; } u; /* create flag from client: such as MDS_OPEN_CREAT, and others */ - __u32 sp_cr_flags; + __u32 sp_cr_flags; + mdl_mode_t sp_cr_mode; }; /* diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 3ae0025..8d38f3c 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -59,6 +59,29 @@ #include "mdt_internal.h" #include #include + +mdl_mode_t mdt_mdl_lock_modes[] = { + [LCK_MINMODE] = MDL_MINMODE, + [LCK_EX] = MDL_EX, + [LCK_PW] = MDL_PW, + [LCK_PR] = MDL_PR, + [LCK_CW] = MDL_CW, + [LCK_CR] = MDL_CR, + [LCK_NL] = MDL_NL, + [LCK_GROUP] = MDL_GROUP +}; + +ldlm_mode_t mdt_dlm_lock_modes[] = { + [MDL_MINMODE] = LCK_MINMODE, + [MDL_EX] = LCK_EX, + [MDL_PW] = LCK_PW, + [MDL_PR] = LCK_PR, + [MDL_CW] = LCK_CW, + [MDL_CR] = LCK_CR, + [MDL_NL] = LCK_NL, + [MDL_GROUP] = LCK_GROUP +}; + /* * Initialized in mdt_mod_init(). */ @@ -155,40 +178,6 @@ void mdt_set_disposition(struct mdt_thread_info *info, rep->lock_policy_res1 |= flag; } -static mdl_mode_t mdt_mdl_lock_modes[] = { - [LCK_MINMODE] = MDL_MINMODE, - [LCK_EX] = MDL_EX, - [LCK_PW] = MDL_PW, - [LCK_PR] = MDL_PR, - [LCK_CW] = MDL_CW, - [LCK_CR] = MDL_CR, - [LCK_NL] = MDL_NL, - [LCK_GROUP] = MDL_GROUP -}; - -static ldlm_mode_t mdt_dlm_lock_modes[] = { - [MDL_MINMODE] = LCK_MINMODE, - [MDL_EX] = LCK_EX, - [MDL_PW] = LCK_PW, - [MDL_PR] = LCK_PR, - [MDL_CW] = LCK_CW, - [MDL_CR] = LCK_CR, - [MDL_NL] = LCK_NL, - [MDL_GROUP] = LCK_GROUP -}; - -static inline mdl_mode_t mdt_dlm_mode2mdl_mode(ldlm_mode_t mode) -{ - LASSERT(IS_PO2(mode)); - return mdt_mdl_lock_modes[mode]; -} - -static inline ldlm_mode_t mdt_mdl_mode2dlm_mode(mdl_mode_t mode) -{ - LASSERT(IS_PO2(mode)); - return mdt_dlm_lock_modes[mode]; -} - void mdt_lock_reg_init(struct mdt_lock_handle *lh, ldlm_mode_t lm) { lh->mlh_pdo_hash = 0; diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 8ef055a..58df070 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -685,6 +685,21 @@ static inline void mdt_fid_unlock(struct lustre_handle *lh, ldlm_lock_decref(lh, mode); } +extern mdl_mode_t mdt_mdl_lock_modes[]; +extern ldlm_mode_t mdt_dlm_lock_modes[]; + +static inline mdl_mode_t mdt_dlm_mode2mdl_mode(ldlm_mode_t mode) +{ + LASSERT(IS_PO2(mode)); + return mdt_mdl_lock_modes[mode]; +} + +static inline ldlm_mode_t mdt_mdl_mode2dlm_mode(mdl_mode_t mode) +{ + LASSERT(IS_PO2(mode)); + return mdt_dlm_lock_modes[mode]; +} + /* * Capability */ diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index f52d3a9..9927f56 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -831,6 +831,10 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) if (result == -ENOENT) { /* Not found and with MDS_OPEN_CREAT: let's create it. */ mdt_set_disposition(info, ldlm_rep, DISP_OPEN_CREATE); + + /* Let lower layers know what is lock mode on directory. */ + info->mti_spec.sp_cr_mode = + mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode); result = mdo_create(info->mti_env, mdt_object_child(parent), rr->rr_name, diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index c355021..31231eb 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -70,6 +70,9 @@ static int mdt_md_create(struct mdt_thread_info *info) mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_CREATE_WRITE); + info->mti_spec.sp_cr_mode = + mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode); + rc = mdo_create(info->mti_env, next, rr->rr_name, mdt_object_child(child), &info->mti_spec, ma); -- 1.8.3.1