Whamcloud - gitweb
- fixed bug with possible concurrent split;
authoryury <yury>
Tue, 31 Oct 2006 12:53:28 +0000 (12:53 +0000)
committeryury <yury>
Tue, 31 Oct 2006 12:53:28 +0000 (12:53 +0000)
- comments, cleanups.

lustre/cmm/cmm_object.c
lustre/include/md_object.h
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_open.c
lustre/mdt/mdt_reint.c

index 2137e4f..90f0890 100644 (file)
@@ -393,31 +393,63 @@ static int cml_create(const struct lu_env *env,
         ENTRY;
 
 #ifdef HAVE_SPLIT_SUPPORT
-        /* 
-         * Try to split @mo_p. If split is ok, -ERESTART is returned and current
-         * thread will not peoceed with create. Instead it sends -ERESTART to
-         * client to let it know that correct MDT should be choosen.
+        
+        /* Lock mode always should be sane. */
+        LASSERT(spec->sp_cr_mode != MDL_MINMODE);
+
+        /*
+         * Sigh... This is long story. MDT may have race with detecting if split
+         * is possible in cmm. We know this race and let it live, because
+         * getting it rid (with some sem or spinlock) will also mean that
+         * PDIROPS for craate will not work, what is really bad for performance
+         * and makes no sense. So, we better allow the race but split dir only
+         * if some of concurrent threads takes EX lock. So that, say, two
+         * concurrent threads may have different lock modes on directory (CW and
+         * EX) and not first one which comes here should split dir, but only
+         * that which has EX lock. And we do not care that in this case, split
+         * will happen a bit later may be (when dir size will not be mandatory
+         * <= 64K, but may be larger). So that, we allow concurrent creates and
+         * protect split by EX lock.
          */
-        rc = cmm_split_try(env, mo_p);
-        if (rc) {
+        if (spec->sp_cr_mode == MDL_EX) {
+                /* 
+                 * Try to split @mo_p. If split is ok, -ERESTART is returned and
+                 * current thread will not peoceed with create. Instead it sends
+                 * -ERESTART to client to let it know that correct MDT should be
+                 * choosen.
+                 */
+                rc = cmm_split_try(env, mo_p);
                 if (rc == -EALREADY) {
                         /* 
-                         * Dir is split and we would like to check if name came
-                         * to correct MDT. If not -ERESTART is returned by
-                         * cmm_split_check()
+                         * Dir is already split and we would like to check if
+                         * name came to correct MDT. If not -ERESTART is
+                         * returned by cmm_split_check()
                          */
                         rc = cmm_split_check(env, mo_p, child_name);
                         if (rc)
                                 RETURN(rc);
-                } else {
+                } else if (rc) {
                         /* 
                          * -ERESTART or some split error is returned, we can't
                          * proceed with create.
                          */
                         RETURN(rc);
                 }
-        }
 
+                /* 
+                 * Proceed with cmm_split_try() as nothign happened, split is
+                 * not yet expected.
+                 */
+        } else {
+                /* 
+                 * Check for possible split directory and let caller know that
+                 * it should tell client that directory is split and operation
+                 * should repeat to correct MDT.
+                 */
+                rc = cmm_split_check(env, mo_p, child_name);
+                if (rc)
+                        RETURN(rc);
+        }
 #endif
 
         rc = mdo_create(env, md_object_next(mo_p), child_name,
index 183096f..c24074c 100644 (file)
@@ -151,7 +151,8 @@ struct md_create_spec {
                 } sp_ea;
         } u;
         /* create flag from client: such as MDS_OPEN_CREAT, and others */
-        __u32 sp_cr_flags;
+        __u32      sp_cr_flags;
+        mdl_mode_t sp_cr_mode;
 };
 
 /*
index 3ae0025..8d38f3c 100644 (file)
 #include "mdt_internal.h"
 #include <linux/lustre_acl.h>
 #include <lustre_param.h>
+
+mdl_mode_t mdt_mdl_lock_modes[] = {
+        [LCK_MINMODE] = MDL_MINMODE,
+        [LCK_EX]      = MDL_EX,
+        [LCK_PW]      = MDL_PW,
+        [LCK_PR]      = MDL_PR,
+        [LCK_CW]      = MDL_CW,
+        [LCK_CR]      = MDL_CR,
+        [LCK_NL]      = MDL_NL,
+        [LCK_GROUP]   = MDL_GROUP
+};
+
+ldlm_mode_t mdt_dlm_lock_modes[] = {
+        [MDL_MINMODE] = LCK_MINMODE,
+        [MDL_EX]      = LCK_EX,
+        [MDL_PW]      = LCK_PW,
+        [MDL_PR]      = LCK_PR,
+        [MDL_CW]      = LCK_CW,
+        [MDL_CR]      = LCK_CR,
+        [MDL_NL]      = LCK_NL,
+        [MDL_GROUP]   = LCK_GROUP
+};
+
 /*
  * Initialized in mdt_mod_init().
  */
@@ -155,40 +178,6 @@ void mdt_set_disposition(struct mdt_thread_info *info,
                 rep->lock_policy_res1 |= flag;
 }
 
-static mdl_mode_t mdt_mdl_lock_modes[] = {
-        [LCK_MINMODE] = MDL_MINMODE,
-        [LCK_EX]      = MDL_EX,
-        [LCK_PW]      = MDL_PW,
-        [LCK_PR]      = MDL_PR,
-        [LCK_CW]      = MDL_CW,
-        [LCK_CR]      = MDL_CR,
-        [LCK_NL]      = MDL_NL,
-        [LCK_GROUP]   = MDL_GROUP
-};
-
-static ldlm_mode_t mdt_dlm_lock_modes[] = {
-        [MDL_MINMODE] = LCK_MINMODE,
-        [MDL_EX]      = LCK_EX,
-        [MDL_PW]      = LCK_PW,
-        [MDL_PR]      = LCK_PR,
-        [MDL_CW]      = LCK_CW,
-        [MDL_CR]      = LCK_CR,
-        [MDL_NL]      = LCK_NL,
-        [MDL_GROUP]   = LCK_GROUP
-};
-
-static inline mdl_mode_t mdt_dlm_mode2mdl_mode(ldlm_mode_t mode)
-{
-        LASSERT(IS_PO2(mode));
-        return mdt_mdl_lock_modes[mode];
-}
-
-static inline ldlm_mode_t mdt_mdl_mode2dlm_mode(mdl_mode_t mode)
-{
-        LASSERT(IS_PO2(mode));
-        return mdt_dlm_lock_modes[mode];
-}
-
 void mdt_lock_reg_init(struct mdt_lock_handle *lh, ldlm_mode_t lm)
 {
         lh->mlh_pdo_hash = 0;
index 8ef055a..58df070 100644 (file)
@@ -685,6 +685,21 @@ static inline void mdt_fid_unlock(struct lustre_handle *lh,
         ldlm_lock_decref(lh, mode);
 }
 
+extern mdl_mode_t mdt_mdl_lock_modes[];
+extern ldlm_mode_t mdt_dlm_lock_modes[];
+
+static inline mdl_mode_t mdt_dlm_mode2mdl_mode(ldlm_mode_t mode)
+{
+        LASSERT(IS_PO2(mode));
+        return mdt_mdl_lock_modes[mode];
+}
+
+static inline ldlm_mode_t mdt_mdl_mode2dlm_mode(mdl_mode_t mode)
+{
+        LASSERT(IS_PO2(mode));
+        return mdt_dlm_lock_modes[mode];
+}
+
 /*
  * Capability
  */
index f52d3a9..9927f56 100644 (file)
@@ -831,6 +831,10 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
         if (result == -ENOENT) {
                 /* Not found and with MDS_OPEN_CREAT: let's create it. */
                 mdt_set_disposition(info, ldlm_rep, DISP_OPEN_CREATE);
+
+                /* Let lower layers know what is lock mode on directory. */
+                info->mti_spec.sp_cr_mode =
+                        mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode);
                 result = mdo_create(info->mti_env,
                                     mdt_object_child(parent),
                                     rr->rr_name,
index c355021..31231eb 100644 (file)
@@ -70,6 +70,9 @@ static int mdt_md_create(struct mdt_thread_info *info)
                 mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                                OBD_FAIL_MDS_REINT_CREATE_WRITE);
 
+                info->mti_spec.sp_cr_mode =
+                        mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode);
+                
                 rc = mdo_create(info->mti_env, next, rr->rr_name,
                                 mdt_object_child(child),
                                 &info->mti_spec, ma);