Whamcloud - gitweb
LU-1187 mdt: Add MDS_INODELOCK_PERM lock for remote dir
authorWang Di <di.wang@whamcloud.com>
Thu, 26 Jul 2012 11:02:58 +0000 (04:02 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 5 Feb 2013 18:15:10 +0000 (13:15 -0500)
Add MDS_INODELOCK_PERM for remote directory on remote MDT to
protect permission changes on remote MDT. So, Master MDT, where
the parent is, will grant LOOKUP lock to the client. Remote MDT,
where the child is, will grant UPDATE|PERM lock to the client.

And client will clear the dcache during, when either PERM or LOOKUP
lock is being revoked.

Change-Id: Ie11d14bbe7c706a76f7c4bd8f75b07807e2dac02
Signed-off-by: Wang Di <di.wang@intel.com>
Reviewed-on: http://review.whamcloud.com/4346
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
lustre/include/liblustre.h
lustre/include/linux/lustre_intent.h
lustre/include/lustre/lustre_idl.h
lustre/llite/dcache.c
lustre/llite/llite_internal.h
lustre/llite/namei.c
lustre/lmv/lmv_intent.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_reint.c
lustre/mdt/mdt_xattr.c

index 0c5762a..ac4414e 100644 (file)
@@ -206,9 +206,14 @@ struct lustre_intent_data {
         int       it_disposition;
         int       it_status;
         __u64     it_lock_handle;
         int       it_disposition;
         int       it_status;
         __u64     it_lock_handle;
-        void     *it_data;
         int       it_lock_mode;
         int       it_lock_mode;
+       int       it_remote_lock_mode;
+       __u64     it_remote_lock_handle;
+        void     *it_data;
+
+       unsigned int    it_lock_set:1;
 };
 };
+
 struct lookup_intent {
         int     it_magic;
         void    (*it_op_release)(struct lookup_intent *);
 struct lookup_intent {
         int     it_magic;
         void    (*it_op_release)(struct lookup_intent *);
index a1793f2..549d14d 100644 (file)
 
 /* intent IT_XXX are defined in lustre/include/obd.h */
 struct lustre_intent_data {
 
 /* intent IT_XXX are defined in lustre/include/obd.h */
 struct lustre_intent_data {
-        int             it_disposition;
-        int             it_status;
-        __u64           it_lock_handle;
-        __u64           it_lock_bits;
-        void           *it_data;
-        int             it_lock_mode;
-        unsigned int    it_lock_set:1;
+       int             it_disposition;
+       int             it_status;
+       __u64           it_lock_handle;
+       __u64           it_lock_bits;
+       int             it_lock_mode;
+       int             it_remote_lock_mode;
+       __u64           it_remote_lock_handle;
+       void           *it_data;
+       unsigned int    it_lock_set:1;
 };
 
 struct lookup_intent {
 };
 
 struct lookup_intent {
index 92613ef..1929d24 100644 (file)
@@ -1912,8 +1912,9 @@ extern void lustre_swab_generic_32s (__u32 *val);
 #define MDS_INODELOCK_UPDATE 0x000002       /* size, links, timestamps */
 #define MDS_INODELOCK_OPEN   0x000004       /* For opened files */
 #define MDS_INODELOCK_LAYOUT 0x000008       /* for layout */
 #define MDS_INODELOCK_UPDATE 0x000002       /* size, links, timestamps */
 #define MDS_INODELOCK_OPEN   0x000004       /* For opened files */
 #define MDS_INODELOCK_LAYOUT 0x000008       /* for layout */
+#define MDS_INODELOCK_PERM   0x000010       /* for permission */
 
 
-#define MDS_INODELOCK_MAXSHIFT 3
+#define MDS_INODELOCK_MAXSHIFT 4
 /* This FULL lock is useful to take on unlink sort of operations */
 #define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1)
 
 /* This FULL lock is useful to take on unlink sort of operations */
 #define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1)
 
index 55a1654..1a8d156 100644 (file)
@@ -271,7 +271,16 @@ void ll_intent_drop_lock(struct lookup_intent *it)
                 /* bug 494: intent_release may be called multiple times, from
                  * this thread and we don't want to double-decref this lock */
                 it->d.lustre.it_lock_mode = 0;
                 /* bug 494: intent_release may be called multiple times, from
                  * this thread and we don't want to double-decref this lock */
                 it->d.lustre.it_lock_mode = 0;
-        }
+               if (it->d.lustre.it_remote_lock_mode != 0) {
+                       handle.cookie = it->d.lustre.it_remote_lock_handle;
+
+                       CDEBUG(D_DLMTRACE, "releasing remote lock with cookie"
+                              LPX64" from it %p\n", handle.cookie, it);
+                       ldlm_lock_decref(&handle,
+                                        it->d.lustre.it_remote_lock_mode);
+                       it->d.lustre.it_remote_lock_mode = 0;
+               }
+       }
 }
 
 void ll_intent_release(struct lookup_intent *it)
 }
 
 void ll_intent_release(struct lookup_intent *it)
@@ -551,16 +560,33 @@ out:
                ll_invalidate_aliases(de->d_inode);
        } else {
                __u64 bits = 0;
                ll_invalidate_aliases(de->d_inode);
        } else {
                __u64 bits = 0;
+               __u64 matched_bits = 0;
 
                CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
                       "inode %p refc %d\n", de->d_name.len,
                       de->d_name.name, de, de->d_parent, de->d_inode,
                       d_refcount(de));
 
                CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
                       "inode %p refc %d\n", de->d_name.len,
                       de->d_name.name, de, de->d_parent, de->d_inode,
                       d_refcount(de));
+
                ll_set_lock_data(exp, de->d_inode, it, &bits);
                ll_set_lock_data(exp, de->d_inode, it, &bits);
-               if ((bits & MDS_INODELOCK_LOOKUP) && d_lustre_invalid(de))
+
+               /* Note: We have to match both LOOKUP and PERM lock
+                * here to make sure the dentry is valid and no one
+                * changing the permission.
+                * But if the client connects < 2.4 server, which will
+                * only grant LOOKUP lock, so we can only Match LOOKUP
+                * lock for old server */
+               if (exp_connect_flags(ll_i2mdexp(de->d_inode)) &&
+                                                       OBD_CONNECT_LVB_TYPE)
+                       matched_bits =
+                               MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM;
+               else
+                       matched_bits = MDS_INODELOCK_LOOKUP;            
+
+               if (((bits & matched_bits) == matched_bits) &&
+                   d_lustre_invalid(de))
                        d_lustre_revalidate(de);
                        d_lustre_revalidate(de);
-                ll_lookup_finish_locks(it, de);
-        }
+               ll_lookup_finish_locks(it, de);
+       }
 
 mark:
         if (it != NULL && it->it_op == IT_GETATTR && rc > 0)
 
 mark:
         if (it != NULL && it->it_op == IT_GETATTR && rc > 0)
index 3460658..a38ca9e 100644 (file)
@@ -1494,16 +1494,38 @@ static inline int ll_file_nolock(const struct file *file)
 static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
                                     struct lookup_intent *it, __u64 *bits)
 {
 static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
                                     struct lookup_intent *it, __u64 *bits)
 {
-        if (!it->d.lustre.it_lock_set) {
-                CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
-                       inode, inode->i_ino, inode->i_generation);
-                md_set_lock_data(exp, &it->d.lustre.it_lock_handle,
-                                 inode, &it->d.lustre.it_lock_bits);
-                it->d.lustre.it_lock_set = 1;
-        }
-
-        if (bits != NULL)
-                *bits = it->d.lustre.it_lock_bits;
+       if (!it->d.lustre.it_lock_set) {
+               struct lustre_handle handle;
+
+               /* If this inode is a remote object, it will get two
+                * separate locks in different namespaces, Master MDT,
+                * where the name entry is, will grant LOOKUP lock,
+                * remote MDT, where the object is, will grant
+                * UPDATE|PERM lock. The inode will be attched to both
+                * LOOKUP and PERM locks, so revoking either locks will
+                * case the dcache being cleared */
+               if (it->d.lustre.it_remote_lock_mode) {
+                       handle.cookie = it->d.lustre.it_remote_lock_handle;
+                       CDEBUG(D_DLMTRACE, "setting l_data to inode %p"
+                              "(%lu/%u) for remote lock "LPX64"\n", inode,
+                              inode->i_ino, inode->i_generation,
+                              handle.cookie);
+                       md_set_lock_data(exp, &handle.cookie, inode, NULL);
+               }
+               
+               handle.cookie = it->d.lustre.it_lock_handle;
+
+               CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)"
+                      " for lock "LPX64"\n", inode, inode->i_ino,
+                      inode->i_generation, handle.cookie);
+
+               md_set_lock_data(exp, &handle.cookie, inode,
+                                &it->d.lustre.it_lock_bits);
+               it->d.lustre.it_lock_set = 1;
+       }
+
+       if (bits != NULL)
+               *bits = it->d.lustre.it_lock_bits;
 }
 
 static inline void ll_lock_dcache(struct inode *inode)
 }
 
 static inline void ll_lock_dcache(struct inode *inode)
index 9073f85..3bcf3ff 100644 (file)
@@ -226,7 +226,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                 /* For OPEN locks we differentiate between lock modes
                 * LCK_CR, LCK_CW, LCK_PR - bug 22891 */
                if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
                 /* For OPEN locks we differentiate between lock modes
                 * LCK_CR, LCK_CW, LCK_PR - bug 22891 */
                if (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
-                           MDS_INODELOCK_LAYOUT))
+                           MDS_INODELOCK_LAYOUT | MDS_INODELOCK_PERM))
                         ll_have_md_lock(inode, &bits, LCK_MINMODE);
 
                 if (bits & MDS_INODELOCK_OPEN)
                         ll_have_md_lock(inode, &bits, LCK_MINMODE);
 
                 if (bits & MDS_INODELOCK_OPEN)
@@ -284,7 +284,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 
                if (inode->i_sb->s_root &&
                    inode != inode->i_sb->s_root->d_inode &&
 
                if (inode->i_sb->s_root &&
                    inode != inode->i_sb->s_root->d_inode &&
-                   (bits & MDS_INODELOCK_LOOKUP))
+                   (bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)))
                        ll_invalidate_aliases(inode);
                 iput(inode);
                 break;
                        ll_invalidate_aliases(inode);
                 iput(inode);
                 break;
index b1f071d..207f0d9 100644 (file)
@@ -120,29 +120,30 @@ int lmv_intent_remote(struct obd_export *exp, void *lmm,
         if (rc)
                 GOTO(out_free_op_data, rc);
 
         if (rc)
                 GOTO(out_free_op_data, rc);
 
-        /*
-         * LLite needs LOOKUP lock to track dentry revocation in order to
-         * maintain dcache consistency. Thus drop UPDATE lock here and put
-         * LOOKUP in request.
-         */
-        if (it->d.lustre.it_lock_mode != 0) {
-                ldlm_lock_decref((void *)&it->d.lustre.it_lock_handle,
-                                 it->d.lustre.it_lock_mode);
-                it->d.lustre.it_lock_mode = 0;
-        }
-        it->d.lustre.it_lock_handle = plock.cookie;
-        it->d.lustre.it_lock_mode = pmode;
-
-        EXIT;
+       /*
+        * LLite needs LOOKUP lock to track dentry revocation in order to
+        * maintain dcache consistency. Thus drop UPDATE|PERM lock here
+        * and put LOOKUP in request.
+        */
+       if (it->d.lustre.it_lock_mode != 0) {
+               it->d.lustre.it_remote_lock_handle =
+                                       it->d.lustre.it_lock_handle;
+               it->d.lustre.it_remote_lock_mode = it->d.lustre.it_lock_mode;
+       }
+
+       it->d.lustre.it_lock_handle = plock.cookie;
+       it->d.lustre.it_lock_mode = pmode;
+
+       EXIT;
 out_free_op_data:
 out_free_op_data:
-        OBD_FREE_PTR(op_data);
+       OBD_FREE_PTR(op_data);
 out:
 out:
-        if (rc && pmode)
-                ldlm_lock_decref(&plock, pmode);
+       if (rc && pmode)
+               ldlm_lock_decref(&plock, pmode);
 
 
-        ptlrpc_req_finished(*reqp);
-        *reqp = req;
-        return rc;
+       ptlrpc_req_finished(*reqp);
+       *reqp = req;
+       return rc;
 }
 
 /*
 }
 
 /*
index 87c2fc7..3ed1784 100644 (file)
@@ -1160,9 +1160,9 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                          * needed here but update is.
                          */
                         child_bits &= ~MDS_INODELOCK_LOOKUP;
                          * needed here but update is.
                          */
                         child_bits &= ~MDS_INODELOCK_LOOKUP;
-                        child_bits |= MDS_INODELOCK_UPDATE;
+                       child_bits |= MDS_INODELOCK_PERM | MDS_INODELOCK_UPDATE;
 
 
-                        rc = mdt_object_lock(info, child, lhc, child_bits,
+                       rc = mdt_object_lock(info, child, lhc, child_bits,
                                              MDT_LOCAL_LOCK);
                 }
                 if (rc == 0) {
                                              MDT_LOCAL_LOCK);
                 }
                 if (rc == 0) {
@@ -2450,10 +2450,14 @@ static int mdt_object_lock0(struct mdt_thread_info *info, struct mdt_object *o,
 
        if (mdt_object_remote(o)) {
                 if (locality == MDT_CROSS_LOCK) {
 
        if (mdt_object_remote(o)) {
                 if (locality == MDT_CROSS_LOCK) {
-                        ibits &= ~MDS_INODELOCK_UPDATE;
+                       ibits &= ~(MDS_INODELOCK_UPDATE | MDS_INODELOCK_PERM);
                         ibits |= MDS_INODELOCK_LOOKUP;
                 } else {
                         ibits |= MDS_INODELOCK_LOOKUP;
                 } else {
-                        LASSERT(!(ibits & MDS_INODELOCK_UPDATE));
+                       LASSERTF(!(ibits &
+                                 (MDS_INODELOCK_UPDATE | MDS_INODELOCK_PERM)),
+                               "%s: wrong bit "LPX64" for remote obj "DFID"\n",
+                               mdt_obd_name(info->mti_mdt), ibits,
+                               PFID(mdt_object_fid(o)));
                         LASSERT(ibits & MDS_INODELOCK_LOOKUP);
                 }
                 /* No PDO lock on remote object */
                         LASSERT(ibits & MDS_INODELOCK_LOOKUP);
                 }
                 /* No PDO lock on remote object */
@@ -3565,10 +3569,11 @@ static int mdt_intent_getattr(enum mdt_it_code opcode,
 
         switch (opcode) {
         case MDT_IT_LOOKUP:
 
         switch (opcode) {
         case MDT_IT_LOOKUP:
-                child_bits = MDS_INODELOCK_LOOKUP;
+               child_bits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM;
                 break;
         case MDT_IT_GETATTR:
                 break;
         case MDT_IT_GETATTR:
-                child_bits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE;
+               child_bits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
+                            MDS_INODELOCK_PERM;
                 break;
         default:
                 CERROR("Unsupported intent (%d)\n", opcode);
                 break;
         default:
                 CERROR("Unsupported intent (%d)\n", opcode);
index 5b458ce..b5c5cb9 100644 (file)
@@ -392,8 +392,12 @@ int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo,
         lh = &info->mti_lh[MDT_LH_PARENT];
         mdt_lock_reg_init(lh, LCK_PW);
 
         lh = &info->mti_lh[MDT_LH_PARENT];
         mdt_lock_reg_init(lh, LCK_PW);
 
-        if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
-                lockpart |= MDS_INODELOCK_LOOKUP;
+       /* Even though the new MDT will grant PERM lock to the old
+        * client, but the old client will almost ignore that during
+        * So it needs to revoke both LOOKUP and PERM lock here, so
+        * both new and old client can cancel the dcache */
+       if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
+               lockpart |= MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM;
 
         rc = mdt_object_lock(info, mo, lh, lockpart, MDT_LOCAL_LOCK);
         if (rc != 0)
 
         rc = mdt_object_lock(info, mo, lh, lockpart, MDT_LOCAL_LOCK);
         if (rc != 0)
index 1e2f157..6164810 100644 (file)
@@ -337,8 +337,9 @@ int mdt_reint_setxattr(struct mdt_thread_info *info,
          * not change the access permissions of this inode, nor any
          * other existing inodes. It is setting the ACLs inherited
          * by new directories/files at create time. */
          * not change the access permissions of this inode, nor any
          * other existing inodes. It is setting the ACLs inherited
          * by new directories/files at create time. */
+       /* We need revoke both LOOKUP|PERM lock here, see mdt_attr_set. */
         if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
         if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
-                lockpart |= MDS_INODELOCK_LOOKUP;
+               lockpart |= MDS_INODELOCK_PERM | MDS_INODELOCK_LOOKUP;
 
         lh = &info->mti_lh[MDT_LH_PARENT];
         /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX
 
         lh = &info->mti_lh[MDT_LH_PARENT];
         /* ACLs were sent to clients under LCK_CR locks, so taking LCK_EX