Prevent stale pages after lock cancel by creating
cl_page connection for read-on-open pages.
This reverts
02e766f5ed to fix the problem.
Since VM pages are connected to cl_object they can be
found and discarded by CLIO properly.
Fixes:
02e766f5ed ("LU-11427 llite: optimize read on open pages")
Test-Parameters: mdssizegb=20 testlist=dom-performance
Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: Iba8c87c934c442b4c0b45d7d3821ceede1a6e68f
Reviewed-on: https://review.whamcloud.com/39401
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Vitaly Fertman <vitaly.fertman@hpe.com>
Reviewed-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
-void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
- struct lookup_intent *it)
+void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req)
+ struct lu_env *env;
+ struct cl_io *io;
struct ll_inode_info *lli = ll_i2info(inode);
struct cl_object *obj = lli->lli_clob;
struct address_space *mapping = inode->i_mapping;
struct ll_inode_info *lli = ll_i2info(inode);
struct cl_object *obj = lli->lli_clob;
struct address_space *mapping = inode->i_mapping;
char *data;
unsigned long index, start;
struct niobuf_local lnb;
char *data;
unsigned long index, start;
struct niobuf_local lnb;
+ __u16 refcheck;
+ int rc;
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN_EXIT;
+ io = vvp_env_thread_io(env);
+ io->ci_obj = obj;
+ io->ci_ignore_layout = 1;
+ rc = cl_io_init(env, io, CIT_MISC, obj);
+ if (rc)
+ GOTO(out_io, rc);
+
CDEBUG(D_INFO, "Get data along with open at %llu len %i, size %llu\n",
rnb->rnb_offset, rnb->rnb_len, body->mbo_dom_size);
CDEBUG(D_INFO, "Get data along with open at %llu len %i, size %llu\n",
rnb->rnb_offset, rnb->rnb_len, body->mbo_dom_size);
LASSERT((lnb.lnb_file_offset & ~PAGE_MASK) == 0);
lnb.lnb_page_offset = 0;
do {
LASSERT((lnb.lnb_file_offset & ~PAGE_MASK) == 0);
lnb.lnb_page_offset = 0;
do {
+ struct cl_page *page;
+
lnb.lnb_data = data + (index << PAGE_SHIFT);
lnb.lnb_len = rnb->rnb_len - (index << PAGE_SHIFT);
if (lnb.lnb_len > PAGE_SIZE)
lnb.lnb_data = data + (index << PAGE_SHIFT);
lnb.lnb_len = rnb->rnb_len - (index << PAGE_SHIFT);
if (lnb.lnb_len > PAGE_SIZE)
PTR_ERR(vmpage));
break;
}
PTR_ERR(vmpage));
break;
}
+ lock_page(vmpage);
+ if (vmpage->mapping == NULL) {
+ unlock_page(vmpage);
+ put_page(vmpage);
+ /* page was truncated */
+ break;
+ }
+ /* attach VM page to CL page cache */
+ page = cl_page_find(env, obj, vmpage->index, vmpage,
+ CPT_CACHEABLE);
+ if (IS_ERR(page)) {
+ ClearPageUptodate(vmpage);
+ unlock_page(vmpage);
+ put_page(vmpage);
+ break;
+ }
+ cl_page_export(env, page, 1);
+ cl_page_put(env, page);
+ unlock_page(vmpage);
put_page(vmpage);
index++;
} while (rnb->rnb_len > (index << PAGE_SHIFT));
put_page(vmpage);
index++;
} while (rnb->rnb_len > (index << PAGE_SHIFT));
+
+out_io:
+ cl_io_fini(env, io);
+ cl_env_put(env, &refcheck);
+
rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
if (!rc && itp->it_lock_mode) {
rc = ll_prep_inode(&de->d_inode, req, NULL, itp);
if (!rc && itp->it_lock_mode) {
- struct lustre_handle handle = {.cookie = itp->it_lock_handle};
- struct ldlm_lock *lock;
- bool has_dom_bit = false;
/* If we got a lock back and it has a LOOKUP bit set,
* make sure the dentry is marked as valid so we can find it.
* We don't need to care about actual hashing since other bits
* of kernel will deal with that later.
*/
/* If we got a lock back and it has a LOOKUP bit set,
* make sure the dentry is marked as valid so we can find it.
* We don't need to care about actual hashing since other bits
* of kernel will deal with that later.
*/
- lock = ldlm_handle2lock(&handle);
- if (lock) {
- has_dom_bit = ldlm_has_dom(lock);
- if (lock->l_policy_data.l_inodebits.bits &
- MDS_INODELOCK_LOOKUP)
- d_lustre_revalidate(de);
-
- LDLM_LOCK_PUT(lock);
- }
- ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, NULL);
- if (has_dom_bit)
- ll_dom_finish_open(de->d_inode, req, itp);
+ ll_set_lock_data(sbi->ll_md_exp, de->d_inode, itp, &bits);
+ if (bits & MDS_INODELOCK_LOOKUP)
+ d_lustre_revalidate(de);
+ /* if DoM bit returned along with LAYOUT bit then there
+ * can be read-on-open data returned.
+ */
+ if (bits & MDS_INODELOCK_DOM && bits & MDS_INODELOCK_LAYOUT)
+ ll_dom_finish_open(de->d_inode, req);
struct lov_user_md **kbuf);
void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
struct lov_user_md **kbuf);
void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req);
-void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req,
- struct lookup_intent *it);
+void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req);
/* Compute expected user md size when passing in a md from user space */
static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
/* Compute expected user md size when passing in a md from user space */
static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
- if (!lli->lli_clob) {
- /* due to DoM read on open, there may exist pages for Lustre
- * regular file even though cl_object is not set up yet. */
- truncate_inode_pages(inode->i_mapping, 0);
- RETURN(0);
- }
-
env = cl_env_get(&refcheck);
if (IS_ERR(env))
RETURN(PTR_ERR(env));
env = cl_env_get(&refcheck);
if (IS_ERR(env))
RETURN(PTR_ERR(env));
- if (it->it_op & IT_OPEN)
- ll_dom_finish_open(inode, request, it);
-
ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
ll_set_lock_data(ll_i2sbi(parent)->ll_md_exp, inode, it, &bits);
+ /* OPEN can return data if lock has DoM+LAYOUT bits set */
+ if (it->it_op & IT_OPEN &&
+ bits & MDS_INODELOCK_DOM && bits & MDS_INODELOCK_LAYOUT)
+ ll_dom_finish_open(inode, request);
/* We used to query real size from OSTs here, but actually
* this is not needed. For stat() calls size would be updated
/* We used to query real size from OSTs here, but actually
* this is not needed. For stat() calls size would be updated
static int mdc_object_flush(const struct lu_env *env, struct cl_object *obj,
struct ldlm_lock *lock)
{
static int mdc_object_flush(const struct lu_env *env, struct cl_object *obj,
struct ldlm_lock *lock)
{
+ /* if lock cancel is initiated from llite then it is combined
+ * lock with DOM bit and it may have no l_ast_data initialized yet,
+ * so init it here with given osc_object.
+ */
+ mdc_set_dom_lock_data(lock, cl2osc(obj));
RETURN(mdc_dlm_blocking_ast0(env, lock, LDLM_CB_CANCELING));
}
RETURN(mdc_dlm_blocking_ast0(env, lock, LDLM_CB_CANCELING));
}
}
run_test 6 "Race two writes, check file size"
}
run_test 6 "Race two writes, check file size"
+test_7() {
+ dd if=/dev/zero of=$DIR1/$tfile bs=1000 count=1
+ cancel_lru_locks
+
+ $MULTIOP $DIR1/$tfile or1000c
+ dd if=/dev/urandom of=$DIR2/$tfile bs=1000 count=1
+ local md5_1=$(md5sum $DIR/$tfile | awk '{ print $1 }')
+ local md5_2=$(md5sum $DIR2/$tfile | awk '{ print $1 }')
+ [[ $md5_1 == $md5_2 ]] ||
+ error "Client reads stale page"
+}
+run_test 7 "Stale pages after read-on-open"
+
test_fsx() {
local file1=$DIR1/$tfile
local file2=$DIR2/$tfile
test_fsx() {
local file1=$DIR1/$tfile
local file2=$DIR2/$tfile