From a0b283dcd3846f5ab5a7ba9e2e83b7f5d6db40c1 Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Wed, 22 Aug 2018 21:19:48 +0800 Subject: [PATCH] LU-10918 pcc: auto RO-PCC caching when O_RDONLY open files During the file open() operation, if the file is being opened with O_RDONLY flags, and the file matches the predefined rule, it will be prefetched and attached into RO-PCC automatically. Test-Parameters: clientcount=3 testlist=sanity-pcc,sanity-pcc,sanity-pcc Signed-off-by: Qian Yingjin Change-Id: Ib2c2ab51d67aed84eb7676c8df191faa33dfad39 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/38346 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Reviewed-by: Li Xi --- lustre/llite/file.c | 11 ++-- lustre/llite/namei.c | 16 +++++ lustre/llite/pcc.c | 147 +++++++++++++++++++++++++++++++++++++++------ lustre/llite/pcc.h | 1 + lustre/tests/sanity-pcc.sh | 132 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 282 insertions(+), 25 deletions(-) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index b6800b9..b50c94b 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1049,12 +1049,15 @@ restart: GOTO(out_och_free, rc); } - rc = pcc_file_open(inode, file); - if (rc) - GOTO(out_och_free, rc); - mutex_unlock(&lli->lli_och_mutex); + /* It is not from atomic_open(). */ + if (it == &oit) { + rc = pcc_file_open(inode, file); + if (rc) + GOTO(out_och_free, rc); + } + fd = NULL; /* Must do this outside lli_och_mutex lock to prevent deadlock where diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index b707de8..8074eb6 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -1224,6 +1224,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, item.pm_projid = ll_i2info(dir)->lli_projid; item.pm_name = &dentry->d_name; dataset = pcc_dataset_match_get(&sbi->ll_pcc_super, + LU_PCC_READWRITE, &item); pca.pca_dataset = dataset; } @@ -1346,6 +1347,20 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, */ if (de != NULL) dput(de); + + if (rc) + GOTO(out_release, rc); + + /* Auto PCC-RO attach during PCC open will try + * to change the layout to read-only state. If + * the intent open returns the lock with + * MDS_INODELOCK_LAYOUT bit set, it may cause + * dead lock. Thus it would better to release + * the intent lock first before call PCC open. + */ + ll_intent_release(it); + rc = pcc_file_open(dentry->d_inode, file); + GOTO(out_free, rc); } } else { rc = finish_no_open(file, de); @@ -1356,6 +1371,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, out_release: ll_intent_release(it); +out_free: OBD_FREE(it, sizeof(*it)); clear: ll_clear_inode_lock_owner(dir); diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index e9a4cc3..95c9a63 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -643,18 +643,29 @@ pcc_cond_match(struct pcc_match_rule *rule, struct pcc_matcher *matcher) return 0; } +static inline bool +pcc_dataset_attach_allowed(struct pcc_dataset *dataset, enum lu_pcc_type type) +{ + if (type == LU_PCC_READWRITE && dataset->pccd_flags & PCC_DATASET_PCCRW) + return true; + + if (type == LU_PCC_READONLY && dataset->pccd_flags & PCC_DATASET_PCCRO) + return true; + + return false; +} + struct pcc_dataset* -pcc_dataset_match_get(struct pcc_super *super, struct pcc_matcher *matcher) +pcc_dataset_match_get(struct pcc_super *super, enum lu_pcc_type type, + struct pcc_matcher *matcher) { struct pcc_dataset *dataset; struct pcc_dataset *selected = NULL; down_read(&super->pccs_rw_sem); list_for_each_entry(dataset, &super->pccs_datasets, pccd_linkage) { - if (!(dataset->pccd_flags & PCC_DATASET_PCCRW)) - continue; - - if (pcc_cond_match(&dataset->pccd_rule, matcher)) { + if (pcc_dataset_attach_allowed(dataset, type) && + pcc_cond_match(&dataset->pccd_rule, matcher)) { atomic_inc(&dataset->pccd_refcount); selected = dataset; break; @@ -1072,15 +1083,20 @@ void pcc_file_init(struct pcc_file *pccf) pccf->pccf_type = LU_PCC_NONE; } -static inline bool pcc_auto_attach_enabled(enum pcc_dataset_flags flags, +static inline bool pcc_auto_attach_enabled(struct pcc_dataset *dataset, + enum lu_pcc_type type, enum pcc_io_type iot) { - if (iot == PIT_OPEN) - return flags & PCC_DATASET_OPEN_ATTACH; - if (iot == PIT_GETATTR) - return flags & PCC_DATASET_STAT_ATTACH; - else - return flags & PCC_DATASET_AUTO_ATTACH; + if (pcc_dataset_attach_allowed(dataset, type)) { + if (iot == PIT_OPEN) + return dataset->pccd_flags & PCC_DATASET_OPEN_ATTACH; + if (iot == PIT_GETATTR) + return dataset->pccd_flags & PCC_DATASET_STAT_ATTACH; + else + return dataset->pccd_flags & PCC_DATASET_AUTO_ATTACH; + } + + return false; } static const char pcc_xattr_layout[] = XATTR_USER_PREFIX "PCC.layout"; @@ -1333,7 +1349,7 @@ static int pcc_try_datasets_attach(struct inode *inode, enum pcc_io_type iot, down_read(&super->pccs_rw_sem); list_for_each_entry_safe(dataset, tmp, &super->pccs_datasets, pccd_linkage) { - if (!pcc_auto_attach_enabled(dataset->pccd_flags, iot)) + if (!pcc_auto_attach_enabled(dataset, type, iot)) break; rc = pcc_try_dataset_attach(inode, gen, type, dataset, cached); @@ -1385,6 +1401,53 @@ static int pcc_try_datasets_attach(struct inode *inode, enum pcc_io_type iot, RETURN(rc); } +static int pcc_readonly_ioctl_attach(struct file *file, struct inode *inode, + __u32 roid); + +/* Call with pcci_mutex hold */ +static int pcc_try_readonly_open_attach(struct inode *inode, struct file *file, + bool *cached) +{ + struct dentry *dentry = file->f_path.dentry; + struct pcc_dataset *dataset; + struct pcc_matcher item; + struct pcc_inode *pcci; + int rc = 0; + + ENTRY; + + if (!((file->f_flags & O_ACCMODE) == O_RDONLY)) + RETURN(0); + + item.pm_uid = from_kuid(&init_user_ns, current_uid()); + item.pm_gid = from_kgid(&init_user_ns, current_gid()); + item.pm_projid = ll_i2info(inode)->lli_projid; + item.pm_name = &dentry->d_name; + dataset = pcc_dataset_match_get(&ll_i2sbi(inode)->ll_pcc_super, + LU_PCC_READONLY, &item); + if (dataset == NULL) + RETURN(0); + + if ((dataset->pccd_flags & PCC_DATASET_PCC_ALL) == PCC_DATASET_PCCRO) { + pcc_inode_unlock(inode); + rc = pcc_readonly_ioctl_attach(file, inode, dataset->pccd_roid); + pcc_inode_lock(inode); + pcci = ll_i2pcci(inode); + if (pcci && pcc_inode_has_layout(pcci)) + *cached = true; + if (rc) { + CDEBUG(D_CACHE, + "Failed to try PCC-RO attach "DFID", rc = %d\n", + PFID(&ll_i2info(inode)->lli_fid), rc); + /* ignore the error during auto PCC-RO attach. */ + rc = 0; + } + } + + pcc_dataset_put(dataset); + RETURN(rc); +} + /* * TODO: For RW-PCC, it is desirable to store HSM info as a layout (LU-10606). * Thus the client can get archive ID from the layout directly. When try to @@ -1513,6 +1576,9 @@ int pcc_file_open(struct inode *inode, struct file *file) if (pcc_may_auto_attach(inode, PIT_OPEN)) rc = pcc_try_auto_attach(inode, &cached, PIT_OPEN); + if (rc == 0 && !cached) + rc = pcc_try_readonly_open_attach(inode, file, &cached); + if (rc < 0 || !cached) GOTO(out_unlock, rc); @@ -1581,8 +1647,14 @@ static bool pcc_io_tolerate(struct pcc_inode *pcci, * in pcc_page_mkwrite(). */ } else if (pcci->pcci_type == LU_PCC_READONLY) { + /* + * For async I/O engine such as libaio and io_uring, PCC read + * should not tolerate -EAGAIN/-EIOCBQUEUED errors, return + * the error code to the caller directly. + */ if ((iot == PIT_READ || iot == PIT_GETATTR || - iot == PIT_SPLICE_READ) && rc < 0 && rc != -ENOMEM) + iot == PIT_SPLICE_READ) && rc < 0 && rc != -ENOMEM && + rc != -EAGAIN && rc != -EIOCBQUEUED) return false; if (iot == PIT_FAULT && (rc & VM_FAULT_SIGBUS) && !(rc & VM_FAULT_OOM)) @@ -2497,6 +2569,15 @@ static ssize_t pcc_copy_data(struct file *src, struct file *dst) ENTRY; +#ifdef FMODE_CAN_READ + /* Need to add FMODE_CAN_READ flags here, otherwise the check in + * kernel_read() during open() for auto PCC-RO attach will fail. + */ + if ((src->f_mode & FMODE_READ) && + likely(src->f_op->read || src->f_op->read_iter)) + src->f_mode |= FMODE_CAN_READ; +#endif + OBD_ALLOC_LARGE(buf, buf_len); if (buf == NULL) RETURN(-ENOMEM); @@ -2553,6 +2634,7 @@ static int pcc_attach_data_archive(struct file *file, struct inode *inode, { const struct cred *old_cred; struct file *pcc_filp; + bool direct = false; struct path path; ssize_t ret; int rc; @@ -2577,7 +2659,23 @@ static int pcc_attach_data_archive(struct file *file, struct inode *inode, if (rc) GOTO(out_fput, rc); + /* + * When attach a file at file open() time with direct I/O mode, the + * data copy from Lustre OSTs to PCC copy in kernel will report + * -EFAULT error as the buffer is allocated in the kernel space, not + * from the user space. + * Thus it needs to unmask O_DIRECT flag from the file handle during + * data copy. After finished data copying, restore the flag in the + * file handle. + */ + if (file->f_flags & O_DIRECT) { + file->f_flags &= ~O_DIRECT; + direct = true; + } + ret = pcc_copy_data(file, pcc_filp); + if (direct) + file->f_flags |= O_DIRECT; if (ret < 0) GOTO(out_fput, rc = ret); @@ -2649,7 +2747,6 @@ out_unlock: } out_dataset_put: pcc_dataset_put(dataset); - RETURN(rc); } @@ -2772,9 +2869,16 @@ repeat: RETURN(rc); } +static void pcc_readonly_attach_fini(struct inode *inode) +{ + pcc_inode_lock(inode); + ll_i2info(inode)->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING; + pcc_inode_unlock(inode); +} + static int pcc_readonly_ioctl_attach(struct file *file, struct inode *inode, - struct lu_pcc_attach *attach) + __u32 roid) { struct ll_sb_info *sbi = ll_i2sbi(inode); struct pcc_super *super = ll_i2pccs(inode); @@ -2799,12 +2903,12 @@ static int pcc_readonly_ioctl_attach(struct file *file, rc = pcc_layout_rdonly_set(inode, &gen); if (rc) - RETURN(rc); + GOTO(out_fini, rc); dataset = pcc_dataset_get(&ll_s2sbi(inode->i_sb)->ll_pcc_super, - LU_PCC_READONLY, attach->pcca_id); + LU_PCC_READONLY, roid); if (dataset == NULL) - RETURN(-ENOENT); + GOTO(out_fini, rc = -ENOENT); rc = pcc_attach_data_archive(file, inode, dataset, &dentry); if (rc) @@ -2855,6 +2959,8 @@ out_put_unlock: mutex_unlock(&lli->lli_layout_mutex); out_dataset_put: pcc_dataset_put(dataset); +out_fini: + pcc_readonly_attach_fini(inode); RETURN(rc); } @@ -2871,7 +2977,8 @@ int pcc_ioctl_attach(struct file *file, struct inode *inode, rc = -EOPNOTSUPP; break; case LU_PCC_READONLY: - rc = pcc_readonly_ioctl_attach(file, inode, attach); + rc = pcc_readonly_ioctl_attach(file, inode, + attach->pcca_id); break; default: rc = -EINVAL; diff --git a/lustre/llite/pcc.h b/lustre/llite/pcc.h index d13b1b0..a6ab770 100644 --- a/lustre/llite/pcc.h +++ b/lustre/llite/pcc.h @@ -265,6 +265,7 @@ int pcc_inode_create_fini(struct inode *inode, struct pcc_create_attach *pca); void pcc_create_attach_cleanup(struct super_block *sb, struct pcc_create_attach *pca); struct pcc_dataset *pcc_dataset_match_get(struct pcc_super *super, + enum lu_pcc_type type, struct pcc_matcher *matcher); void pcc_dataset_put(struct pcc_dataset *dataset); void pcc_inode_free(struct inode *inode); diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index 221919b..a852860 100755 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -213,7 +213,7 @@ setup_loopdev() { local mntpt=$3 local size=${4:-50} - do_facet $facet mkdir -p $mntpt || error "mkdir -p $hsm_root failed" + do_facet $facet mkdir -p $mntpt || error "mkdir -p $mntpt failed" stack_trap "do_facet $facet rm -rf $mntpt" EXIT do_facet $facet dd if=/dev/zero of=$file bs=1M count=$size stack_trap "do_facet $facet rm -f $file" EXIT @@ -1078,6 +1078,62 @@ test_10d() { } run_test 10d "Test RO-PCC with group quota on loop PCC device" +test_usrgrp_edquot() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/$tfile + local id=$RUNAS_ID + local ug=$1 + + $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro || + skip "Server does not support PCC-RO" + + [[ $ug == "g" ]] && id=$RUNAS_GID + setup_loopdev $SINGLEAGT $loopfile $mntpt 50 + do_facet $SINGLEAGT quotacheck -c$ug $mntpt || + error "quotacheck -c$ug $mntpt failed" + do_facet $SINGLEAGT quotaon -$ug $mntpt || + error "quotaon -$ug $mntpt failed" + do_facet $SINGLEAGT setquota -$ug $id 0 4096 0 0 $mntpt || + error "setquota -$ug $id on $mntpt failed" + do_facet $SINGLEAGT repquota -${ug}vs $mntpt + do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed" + setup_pcc_mapping $SINGLEAGT \ + "${ug}id={$id}\ roid=$HSM_ARCHIVE_NUMBER\ pccro=1" + do_facet $SINGLEAGT $LCTL pcc list $MOUNT + + dd if=/dev/zero of=$file bs=1M count=2 || + error "dd write $file failed" + chown $RUNAS_ID:$RUNAS_GID $file || + error "chown $RUNAS_ID:$RUNAS_GID $file failed" + do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1M count=2 || + error "dd read $file failed" + check_lpcc_state $file "readonly" + $LFS getstripe -v $file + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=1M count=5 || + error "dd write $file failed" + check_lpcc_state $file "none" + do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1M count=5 || + error "dd read $file failed" + do_facet $SINGLEAGT $LFS pcc state $file + $LFS getstripe -v $file + do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file || + error "PCC-RO attach $file failed" + + do_facet $SINGLEAGT $LFS pcc detach $file || error "detach $file failed" +} + +test_10e() { + test_usrgrp_edquot "u" +} +run_test 10e "Tolerate -EDQUOT failure when auto PCC-RO attach with user quota" + +test_10f() { + test_usrgrp_edquot "g" +} +run_test 10f "Tolerate -EDQUOT failure when auto PCC-RO attach with group quota" + test_11() { local loopfile="$TMP/$tfile" local mntpt="/mnt/pcc.$tdir" @@ -2511,6 +2567,80 @@ test_28() { } run_test 28 "RW-PCC attach should fail when the file has cluster-wide openers" +test_29a() { + local project_id=100 + local agt_facet=$SINGLEAGT + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/$tdir/$tfile + local file2=$DIR2/$tdir/$tfile + + $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro || + skip "Server does not support PCC-RO" + + is_project_quota_supported || skip "project quota is not supported" + + enable_project_quota + setup_loopdev $SINGLEAGT $loopfile $mntpt 50 + copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER" + setup_pcc_mapping $SINGLEAGT \ + "projid={$project_id}\ rwid=$HSM_ARCHIVE_NUMBER\ pccro=1" + $LCTL pcc list $MOUNT + + do_facet $SINGLEAGT mkdir -p $DIR/$tdir || + error "mkdir $DIR/$tdir failed" + do_facet $SINGLEAGT "echo -n ro_uptodate > $file" || + error "failed to write $file" + check_lpcc_state $file "none" + $LFS project -sp $project_id $file || + error "failed to set project for $file" + $LFS project -d $file + check_lpcc_state $file "readonly" + check_file_data $SINGLEAGT $file "ro_uptodate" + + echo -n Update_ro_data > $file2 + check_lpcc_state $file "readonly" + check_file_data $SINGLEAGT $file "Update_ro_data" + + do_facet $SINGLEAGT $LFS pcc detach $file || + error "failed to detach $file" +} +run_test 29a "Auto readonly caching on RO-PCC backend for O_RDONLY open" + +test_29b() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/myfile.dat + + $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro || + skip "Server does not support PCC-RO" + + setup_loopdev $SINGLEAGT $loopfile $mntpt 50 + do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed" + setup_pcc_mapping $SINGLEAGT \ + "fname={*.dat}\ roid=$HSM_ARCHIVE_NUMBER\ pccro=1" + do_facet $SINGLEAGT $LCTL pcc list $MOUNT + + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=4k count=1 || + error "Write $file failed" + do_facet $SINGLEAGT dd if=$file of=/dev/null bs=4k count=1 || + error "Read $file failed" + do_facet $SINGLEAGT $LFS pcc state $file + check_lpcc_state $file "readonly" + do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=4k count=1 || + error "Write $file failed" + sysctl vm.drop_caches=3 + do_facet $SINGLEAGT dd if=$file of=/dev/null bs=4k count=1 || + error "Read $file failed" + do_facet $SINGLEAGT $LFS pcc state $file + check_lpcc_state $file "readonly" + + do_facet $SINGLEAGT $LFS pcc detach $file || error "detach $file failed" +} +run_test 29b "Auto PCC-RO attach in atomic_open" + #test 101: containers and PCC #LU-15170: Test mount namespaces with PCC #This tests the cases where the PCC mount is not present in the container by -- 1.8.3.1