Whamcloud - gitweb
LU-13759 dom: lock cancel to drop pages 01/39401/7
authorMikhail Pershin <mpershin@whamcloud.com>
Wed, 15 Jul 2020 05:12:55 +0000 (08:12 +0300)
committerOleg Drokin <green@whamcloud.com>
Thu, 13 Aug 2020 05:58:38 +0000 (05:58 +0000)
Prevent stale pages after lock cancel by creating
cl_page connection for read-on-open pages.

This reverts 02e766f5ed to fix the problem.
Since VM pages are connected to cl_object they can be
found and discarded by CLIO properly.

Fixes: 02e766f5ed ("LU-11427 llite: optimize read on open pages")
Test-Parameters: mdssizegb=20 testlist=dom-performance
Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: Iba8c87c934c442b4c0b45d7d3821ceede1a6e68f
Reviewed-on: https://review.whamcloud.com/39401
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Vitaly Fertman <vitaly.fertman@hpe.com>
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/namei.c
lustre/mdc/mdc_dev.c
lustre/tests/sanity-dom.sh

index d158d1a..0bb845f 100644 (file)
@@ -453,9 +453,10 @@ static inline int ll_dom_readpage(void *data, struct page *page)
        return rc;
 }
 
        return rc;
 }
 
-void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
-                       struct lookup_intent *it)
+void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req)
 {
 {
+       struct lu_env *env;
+       struct cl_io *io;
        struct ll_inode_info *lli = ll_i2info(inode);
        struct cl_object *obj = lli->lli_clob;
        struct address_space *mapping = inode->i_mapping;
        struct ll_inode_info *lli = ll_i2info(inode);
        struct cl_object *obj = lli->lli_clob;
        struct address_space *mapping = inode->i_mapping;
@@ -465,6 +466,8 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
        char *data;
        unsigned long index, start;
        struct niobuf_local lnb;
        char *data;
        unsigned long index, start;
        struct niobuf_local lnb;
+       __u16 refcheck;
+       int rc;
 
        ENTRY;
 
 
        ENTRY;
 
@@ -499,6 +502,16 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
                RETURN_EXIT;
        }
 
                RETURN_EXIT;
        }
 
+       env = cl_env_get(&refcheck);
+       if (IS_ERR(env))
+               RETURN_EXIT;
+       io = vvp_env_thread_io(env);
+       io->ci_obj = obj;
+       io->ci_ignore_layout = 1;
+       rc = cl_io_init(env, io, CIT_MISC, obj);
+       if (rc)
+               GOTO(out_io, rc);
+
        CDEBUG(D_INFO, "Get data along with open at %llu len %i, size %llu\n",
               rnb->rnb_offset, rnb->rnb_len, body->mbo_dom_size);
 
        CDEBUG(D_INFO, "Get data along with open at %llu len %i, size %llu\n",
               rnb->rnb_offset, rnb->rnb_len, body->mbo_dom_size);
 
@@ -510,6 +523,8 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
        LASSERT((lnb.lnb_file_offset & ~PAGE_MASK) == 0);
        lnb.lnb_page_offset = 0;
        do {
        LASSERT((lnb.lnb_file_offset & ~PAGE_MASK) == 0);
        lnb.lnb_page_offset = 0;
        do {
+               struct cl_page *page;
+
                lnb.lnb_data = data + (index << PAGE_SHIFT);
                lnb.lnb_len = rnb->rnb_len - (index << PAGE_SHIFT);
                if (lnb.lnb_len > PAGE_SIZE)
                lnb.lnb_data = data + (index << PAGE_SHIFT);
                lnb.lnb_len = rnb->rnb_len - (index << PAGE_SHIFT);
                if (lnb.lnb_len > PAGE_SIZE)
@@ -525,9 +540,33 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
                              PTR_ERR(vmpage));
                        break;
                }
                              PTR_ERR(vmpage));
                        break;
                }
+               lock_page(vmpage);
+               if (vmpage->mapping == NULL) {
+                       unlock_page(vmpage);
+                       put_page(vmpage);
+                       /* page was truncated */
+                       break;
+               }
+               /* attach VM page to CL page cache */
+               page = cl_page_find(env, obj, vmpage->index, vmpage,
+                                   CPT_CACHEABLE);
+               if (IS_ERR(page)) {
+                       ClearPageUptodate(vmpage);
+                       unlock_page(vmpage);
+                       put_page(vmpage);
+                       break;
+               }
+               cl_page_export(env, page, 1);
+               cl_page_put(env, page);
+               unlock_page(vmpage);
                put_page(vmpage);
                index++;
        } while (rnb->rnb_len > (index << PAGE_SHIFT));
                put_page(vmpage);
                index++;
        } while (rnb->rnb_len > (index << PAGE_SHIFT));
+
+out_io:
+       cl_io_fini(env, io);
+       cl_env_put(env, &refcheck);
+
        EXIT;
 }
 
        EXIT;
 }
 
@@ -609,27 +648,21 @@ retry:
        rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
 
        if (!rc && itp->it_lock_mode) {
        rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
 
        if (!rc && itp->it_lock_mode) {
-               struct lustre_handle handle = {.cookie = itp->it_lock_handle};
-               struct ldlm_lock *lock;
-               bool has_dom_bit = false;
+               __u64 bits = 0;
 
                /* If we got a lock back and it has a LOOKUP bit set,
                 * make sure the dentry is marked as valid so we can find it.
                 * We don't need to care about actual hashing since other bits
                 * of kernel will deal with that later.
                 */
 
                /* If we got a lock back and it has a LOOKUP bit set,
                 * make sure the dentry is marked as valid so we can find it.
                 * We don't need to care about actual hashing since other bits
                 * of kernel will deal with that later.
                 */
-               lock = ldlm_handle2lock(&handle);
-               if (lock) {
-                       has_dom_bit = ldlm_has_dom(lock);
-                       if (lock->l_policy_data.l_inodebits.bits &
-                           MDS_INODELOCK_LOOKUP)
-                               d_lustre_revalidate(de);
-
-                       LDLM_LOCK_PUT(lock);
-               }
-               ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
-               if (has_dom_bit)
-                       ll_dom_finish_open(de->d_inode, req, itp);
+               ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, &bits);
+               if (bits & MDS_INODELOCK_LOOKUP)
+                       d_lustre_revalidate(de);
+               /* if DoM bit returned along with LAYOUT bit then there
+                * can be read-on-open data returned.
+                */
+               if (bits & MDS_INODELOCK_DOM && bits & MDS_INODELOCK_LAYOUT)
+                       ll_dom_finish_open(de->d_inode, req);
        }
 
 out:
        }
 
 out:
index 6a1c658..4363d09 100644 (file)
@@ -1197,8 +1197,7 @@ ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
                        struct lov_user_md **kbuf);
 void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
 
                        struct lov_user_md **kbuf);
 void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
 
-void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
-                       struct lookup_intent *it);
+void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req);
 
 /* Compute expected user md size when passing in a md from user space */
 static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
 
 /* Compute expected user md size when passing in a md from user space */
 static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
index 9b7241a..0fd9d76 100644 (file)
@@ -190,13 +190,6 @@ static int ll_dom_lock_cancel(struct inode *inode, struct ldlm_lock *lock)
        int rc;
        ENTRY;
 
        int rc;
        ENTRY;
 
-       if (!lli->lli_clob) {
-               /* due to DoM read on open, there may exist pages for Lustre
-                * regular file even though cl_object is not set up yet. */
-               truncate_inode_pages(inode->i_mapping, 0);
-               RETURN(0);
-       }
-
        env = cl_env_get(&refcheck);
        if (IS_ERR(env))
                RETURN(PTR_ERR(env));
        env = cl_env_get(&refcheck);
        if (IS_ERR(env))
                RETURN(PTR_ERR(env));
@@ -664,10 +657,11 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
                        }
                }
 
                        }
                }
 
-               if (it->it_op & IT_OPEN)
-                       ll_dom_finish_open(inode, request, it);
-
                ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
                ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
+               /* OPEN can return data if lock has DoM+LAYOUT bits set */
+               if (it->it_op & IT_OPEN &&
+                   bits & MDS_INODELOCK_DOM && bits & MDS_INODELOCK_LAYOUT)
+                       ll_dom_finish_open(inode, request);
 
                /* We used to query real size from OSTs here, but actually
                 * this is not needed. For stat() calls size would be updated
 
                /* We used to query real size from OSTs here, but actually
                 * this is not needed. For stat() calls size would be updated
index 69d1f07..72cf45c 100644 (file)
@@ -1434,6 +1434,11 @@ int mdc_object_prune(const struct lu_env *env, struct cl_object *obj)
 static int mdc_object_flush(const struct lu_env *env, struct cl_object *obj,
                            struct ldlm_lock *lock)
 {
 static int mdc_object_flush(const struct lu_env *env, struct cl_object *obj,
                            struct ldlm_lock *lock)
 {
+       /* if lock cancel is initiated from llite then it is combined
+        * lock with DOM bit and it may have no l_ast_data initialized yet,
+        * so init it here with given osc_object.
+        */
+       mdc_set_dom_lock_data(lock, cl2osc(obj));
        RETURN(mdc_dlm_blocking_ast0(env, lock, LDLM_CB_CANCELING));
 }
 
        RETURN(mdc_dlm_blocking_ast0(env, lock, LDLM_CB_CANCELING));
 }
 
index 2cfe0ee..a94494d 100644 (file)
@@ -155,6 +155,19 @@ test_6() {
 }
 run_test 6 "Race two writes, check file size"
 
 }
 run_test 6 "Race two writes, check file size"
 
+test_7() {
+       dd if=/dev/zero of=$DIR1/$tfile bs=1000 count=1
+       cancel_lru_locks
+
+       $MULTIOP $DIR1/$tfile or1000c
+       dd if=/dev/urandom of=$DIR2/$tfile bs=1000 count=1
+       local md5_1=$(md5sum $DIR/$tfile | awk '{ print $1 }')
+       local md5_2=$(md5sum $DIR2/$tfile | awk '{ print $1 }')
+       [[ $md5_1 == $md5_2 ]] ||
+               error "Client reads stale page"
+}
+run_test 7 "Stale pages after read-on-open"
+
 test_fsx() {
        local file1=$DIR1/$tfile
        local file2=$DIR2/$tfile
 test_fsx() {
        local file1=$DIR1/$tfile
        local file2=$DIR2/$tfile