Whamcloud - gitweb
LU-10918 pcc: auto RO-PCC caching when O_RDONLY open files 46/38346/33
authorQian Yingjin <qian@ddn.com>
Wed, 22 Aug 2018 13:19:48 +0000 (21:19 +0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 5 Jun 2024 04:47:41 +0000 (04:47 +0000)
During the file open() operation, if the file is being opened with
O_RDONLY flags, and the file matches the predefined rule, it will
be prefetched and attached into RO-PCC automatically.

Test-Parameters: clientcount=3 testlist=sanity-pcc,sanity-pcc,sanity-pcc
Signed-off-by: Qian Yingjin <qian@ddn.com>
Change-Id: Ib2c2ab51d67aed84eb7676c8df191faa33dfad39
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/38346
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Li Xi <lixi@ddn.com>
lustre/llite/file.c
lustre/llite/namei.c
lustre/llite/pcc.c
lustre/llite/pcc.h
lustre/tests/sanity-pcc.sh

index b6800b9..b50c94b 100644 (file)
@@ -1049,12 +1049,15 @@ restart:
                        GOTO(out_och_free, rc);
        }
 
-       rc = pcc_file_open(inode, file);
-       if (rc)
-               GOTO(out_och_free, rc);
-
        mutex_unlock(&lli->lli_och_mutex);
 
+       /* It is not from atomic_open(). */
+       if (it == &oit) {
+               rc = pcc_file_open(inode, file);
+               if (rc)
+                       GOTO(out_och_free, rc);
+       }
+
        fd = NULL;
 
        /* Must do this outside lli_och_mutex lock to prevent deadlock where
index b707de8..8074eb6 100644 (file)
@@ -1224,6 +1224,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
                        item.pm_projid = ll_i2info(dir)->lli_projid;
                        item.pm_name = &dentry->d_name;
                        dataset = pcc_dataset_match_get(&sbi->ll_pcc_super,
+                                                       LU_PCC_READWRITE,
                                                        &item);
                        pca.pca_dataset = dataset;
                }
@@ -1346,6 +1347,20 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
                                 */
                                if (de != NULL)
                                        dput(de);
+
+                               if (rc)
+                                       GOTO(out_release, rc);
+
+                               /* Auto PCC-RO attach during PCC open will try
+                                * to change the layout to read-only state. If
+                                * the intent open returns the lock with
+                                * MDS_INODELOCK_LAYOUT bit set, it may cause
+                                * dead lock. Thus it would better to release
+                                * the intent lock first before call PCC open.
+                                */
+                               ll_intent_release(it);
+                               rc = pcc_file_open(dentry->d_inode, file);
+                               GOTO(out_free, rc);
                        }
                } else {
                        rc = finish_no_open(file, de);
@@ -1356,6 +1371,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 
 out_release:
        ll_intent_release(it);
+out_free:
        OBD_FREE(it, sizeof(*it));
 clear:
        ll_clear_inode_lock_owner(dir);
index e9a4cc3..95c9a63 100644 (file)
@@ -643,18 +643,29 @@ pcc_cond_match(struct pcc_match_rule *rule, struct pcc_matcher *matcher)
        return 0;
 }
 
+static inline bool
+pcc_dataset_attach_allowed(struct pcc_dataset *dataset, enum lu_pcc_type type)
+{
+       if (type == LU_PCC_READWRITE && dataset->pccd_flags & PCC_DATASET_PCCRW)
+               return true;
+
+       if (type == LU_PCC_READONLY && dataset->pccd_flags & PCC_DATASET_PCCRO)
+               return true;
+
+       return false;
+}
+
 struct pcc_dataset*
-pcc_dataset_match_get(struct pcc_super *super, struct pcc_matcher *matcher)
+pcc_dataset_match_get(struct pcc_super *super, enum lu_pcc_type type,
+                     struct pcc_matcher *matcher)
 {
        struct pcc_dataset *dataset;
        struct pcc_dataset *selected = NULL;
 
        down_read(&super->pccs_rw_sem);
        list_for_each_entry(dataset, &super->pccs_datasets, pccd_linkage) {
-               if (!(dataset->pccd_flags & PCC_DATASET_PCCRW))
-                       continue;
-
-               if (pcc_cond_match(&dataset->pccd_rule, matcher)) {
+               if (pcc_dataset_attach_allowed(dataset, type) &&
+                   pcc_cond_match(&dataset->pccd_rule, matcher)) {
                        atomic_inc(&dataset->pccd_refcount);
                        selected = dataset;
                        break;
@@ -1072,15 +1083,20 @@ void pcc_file_init(struct pcc_file *pccf)
        pccf->pccf_type = LU_PCC_NONE;
 }
 
-static inline bool pcc_auto_attach_enabled(enum pcc_dataset_flags flags,
+static inline bool pcc_auto_attach_enabled(struct pcc_dataset *dataset,
+                                          enum lu_pcc_type type,
                                           enum pcc_io_type iot)
 {
-       if (iot == PIT_OPEN)
-               return flags & PCC_DATASET_OPEN_ATTACH;
-       if (iot == PIT_GETATTR)
-               return flags & PCC_DATASET_STAT_ATTACH;
-       else
-               return flags & PCC_DATASET_AUTO_ATTACH;
+       if (pcc_dataset_attach_allowed(dataset, type)) {
+               if (iot == PIT_OPEN)
+                       return dataset->pccd_flags & PCC_DATASET_OPEN_ATTACH;
+               if (iot == PIT_GETATTR)
+                       return dataset->pccd_flags & PCC_DATASET_STAT_ATTACH;
+               else
+                       return dataset->pccd_flags & PCC_DATASET_AUTO_ATTACH;
+       }
+
+       return false;
 }
 
 static const char pcc_xattr_layout[] = XATTR_USER_PREFIX "PCC.layout";
@@ -1333,7 +1349,7 @@ static int pcc_try_datasets_attach(struct inode *inode, enum pcc_io_type iot,
        down_read(&super->pccs_rw_sem);
        list_for_each_entry_safe(dataset, tmp,
                                 &super->pccs_datasets, pccd_linkage) {
-               if (!pcc_auto_attach_enabled(dataset->pccd_flags, iot))
+               if (!pcc_auto_attach_enabled(dataset, type, iot))
                        break;
 
                rc = pcc_try_dataset_attach(inode, gen, type, dataset, cached);
@@ -1385,6 +1401,53 @@ static int pcc_try_datasets_attach(struct inode *inode, enum pcc_io_type iot,
        RETURN(rc);
 }
 
+static int pcc_readonly_ioctl_attach(struct file *file, struct inode *inode,
+                                    __u32 roid);
+
+/* Call with pcci_mutex hold */
+static int pcc_try_readonly_open_attach(struct inode *inode, struct file *file,
+                                       bool *cached)
+{
+       struct dentry *dentry = file->f_path.dentry;
+       struct pcc_dataset *dataset;
+       struct pcc_matcher item;
+       struct pcc_inode *pcci;
+       int rc = 0;
+
+       ENTRY;
+
+       if (!((file->f_flags & O_ACCMODE) == O_RDONLY))
+               RETURN(0);
+
+       item.pm_uid = from_kuid(&init_user_ns, current_uid());
+       item.pm_gid = from_kgid(&init_user_ns, current_gid());
+       item.pm_projid = ll_i2info(inode)->lli_projid;
+       item.pm_name = &dentry->d_name;
+       dataset = pcc_dataset_match_get(&ll_i2sbi(inode)->ll_pcc_super,
+                                       LU_PCC_READONLY, &item);
+       if (dataset == NULL)
+               RETURN(0);
+
+       if ((dataset->pccd_flags & PCC_DATASET_PCC_ALL) == PCC_DATASET_PCCRO) {
+               pcc_inode_unlock(inode);
+               rc = pcc_readonly_ioctl_attach(file, inode, dataset->pccd_roid);
+               pcc_inode_lock(inode);
+               pcci = ll_i2pcci(inode);
+               if (pcci && pcc_inode_has_layout(pcci))
+                       *cached = true;
+               if (rc) {
+                       CDEBUG(D_CACHE,
+                              "Failed to try PCC-RO attach "DFID", rc = %d\n",
+                              PFID(&ll_i2info(inode)->lli_fid), rc);
+                       /* ignore the error during auto PCC-RO attach. */
+                       rc = 0;
+               }
+       }
+
+       pcc_dataset_put(dataset);
+       RETURN(rc);
+}
+
 /*
  * TODO: For RW-PCC, it is desirable to store HSM info as a layout (LU-10606).
  * Thus the client can get archive ID from the layout directly. When try to
@@ -1513,6 +1576,9 @@ int pcc_file_open(struct inode *inode, struct file *file)
                if (pcc_may_auto_attach(inode, PIT_OPEN))
                        rc = pcc_try_auto_attach(inode, &cached, PIT_OPEN);
 
+               if (rc == 0 && !cached)
+                       rc = pcc_try_readonly_open_attach(inode, file, &cached);
+
                if (rc < 0 || !cached)
                        GOTO(out_unlock, rc);
 
@@ -1581,8 +1647,14 @@ static bool pcc_io_tolerate(struct pcc_inode *pcci,
                 * in pcc_page_mkwrite().
                 */
        } else if (pcci->pcci_type == LU_PCC_READONLY) {
+               /*
+                * For async I/O engine such as libaio and io_uring, PCC read
+                * should not tolerate -EAGAIN/-EIOCBQUEUED errors, return
+                * the error code to the caller directly.
+                */
                if ((iot == PIT_READ || iot == PIT_GETATTR ||
-                    iot == PIT_SPLICE_READ) && rc < 0 && rc != -ENOMEM)
+                    iot == PIT_SPLICE_READ) && rc < 0 && rc != -ENOMEM &&
+                    rc != -EAGAIN && rc != -EIOCBQUEUED)
                        return false;
                if (iot == PIT_FAULT && (rc & VM_FAULT_SIGBUS) &&
                    !(rc & VM_FAULT_OOM))
@@ -2497,6 +2569,15 @@ static ssize_t pcc_copy_data(struct file *src, struct file *dst)
 
        ENTRY;
 
+#ifdef FMODE_CAN_READ
+       /* Need to add FMODE_CAN_READ flags here, otherwise the check in
+        * kernel_read() during open() for auto PCC-RO attach will fail.
+        */
+       if ((src->f_mode & FMODE_READ) &&
+           likely(src->f_op->read || src->f_op->read_iter))
+               src->f_mode |= FMODE_CAN_READ;
+#endif
+
        OBD_ALLOC_LARGE(buf, buf_len);
        if (buf == NULL)
                RETURN(-ENOMEM);
@@ -2553,6 +2634,7 @@ static int pcc_attach_data_archive(struct file *file, struct inode *inode,
 {
        const struct cred *old_cred;
        struct file *pcc_filp;
+       bool direct = false;
        struct path path;
        ssize_t ret;
        int rc;
@@ -2577,7 +2659,23 @@ static int pcc_attach_data_archive(struct file *file, struct inode *inode,
        if (rc)
                GOTO(out_fput, rc);
 
+       /*
+        * When attach a file at file open() time with direct I/O mode, the
+        * data copy from Lustre OSTs to PCC copy in kernel will report
+        * -EFAULT error as the buffer is allocated in the kernel space, not
+        * from the user space.
+        * Thus it needs to unmask O_DIRECT flag from the file handle during
+        * data copy. After finished data copying, restore the flag in the
+        * file handle.
+        */
+       if (file->f_flags & O_DIRECT) {
+               file->f_flags &= ~O_DIRECT;
+               direct = true;
+       }
+
        ret = pcc_copy_data(file, pcc_filp);
+       if (direct)
+               file->f_flags |= O_DIRECT;
        if (ret < 0)
                GOTO(out_fput, rc = ret);
 
@@ -2649,7 +2747,6 @@ out_unlock:
        }
 out_dataset_put:
        pcc_dataset_put(dataset);
-
        RETURN(rc);
 }
 
@@ -2772,9 +2869,16 @@ repeat:
        RETURN(rc);
 }
 
+static void pcc_readonly_attach_fini(struct inode *inode)
+{
+       pcc_inode_lock(inode);
+       ll_i2info(inode)->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING;
+       pcc_inode_unlock(inode);
+}
+
 static int pcc_readonly_ioctl_attach(struct file *file,
                                     struct inode *inode,
-                                    struct lu_pcc_attach *attach)
+                                    __u32 roid)
 {
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct pcc_super *super = ll_i2pccs(inode);
@@ -2799,12 +2903,12 @@ static int pcc_readonly_ioctl_attach(struct file *file,
 
        rc = pcc_layout_rdonly_set(inode, &gen);
        if (rc)
-               RETURN(rc);
+               GOTO(out_fini, rc);
 
        dataset = pcc_dataset_get(&ll_s2sbi(inode->i_sb)->ll_pcc_super,
-                                 LU_PCC_READONLY, attach->pcca_id);
+                                 LU_PCC_READONLY, roid);
        if (dataset == NULL)
-               RETURN(-ENOENT);
+               GOTO(out_fini, rc = -ENOENT);
 
        rc = pcc_attach_data_archive(file, inode, dataset, &dentry);
        if (rc)
@@ -2855,6 +2959,8 @@ out_put_unlock:
        mutex_unlock(&lli->lli_layout_mutex);
 out_dataset_put:
        pcc_dataset_put(dataset);
+out_fini:
+       pcc_readonly_attach_fini(inode);
 
        RETURN(rc);
 }
@@ -2871,7 +2977,8 @@ int pcc_ioctl_attach(struct file *file, struct inode *inode,
                rc = -EOPNOTSUPP;
                break;
        case LU_PCC_READONLY:
-               rc = pcc_readonly_ioctl_attach(file, inode, attach);
+               rc = pcc_readonly_ioctl_attach(file, inode,
+                                              attach->pcca_id);
                break;
        default:
                rc = -EINVAL;
index d13b1b0..a6ab770 100644 (file)
@@ -265,6 +265,7 @@ int pcc_inode_create_fini(struct inode *inode, struct pcc_create_attach *pca);
 void pcc_create_attach_cleanup(struct super_block *sb,
                               struct pcc_create_attach *pca);
 struct pcc_dataset *pcc_dataset_match_get(struct pcc_super *super,
+                                         enum lu_pcc_type type,
                                          struct pcc_matcher *matcher);
 void pcc_dataset_put(struct pcc_dataset *dataset);
 void pcc_inode_free(struct inode *inode);
index 221919b..a852860 100755 (executable)
@@ -213,7 +213,7 @@ setup_loopdev() {
        local mntpt=$3
        local size=${4:-50}
 
-       do_facet $facet mkdir -p $mntpt || error "mkdir -p $hsm_root failed"
+       do_facet $facet mkdir -p $mntpt || error "mkdir -p $mntpt failed"
        stack_trap "do_facet $facet rm -rf $mntpt" EXIT
        do_facet $facet dd if=/dev/zero of=$file bs=1M count=$size
        stack_trap "do_facet $facet rm -f $file" EXIT
@@ -1078,6 +1078,62 @@ test_10d() {
 }
 run_test 10d "Test RO-PCC with group quota on loop PCC device"
 
+test_usrgrp_edquot() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+       local id=$RUNAS_ID
+       local ug=$1
+
+       $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro ||
+               skip "Server does not support PCC-RO"
+
+       [[ $ug == "g" ]] && id=$RUNAS_GID
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       do_facet $SINGLEAGT quotacheck -c$ug $mntpt ||
+               error "quotacheck -c$ug $mntpt failed"
+       do_facet $SINGLEAGT quotaon -$ug $mntpt ||
+               error "quotaon -$ug $mntpt failed"
+       do_facet $SINGLEAGT setquota -$ug $id 0 4096 0 0 $mntpt ||
+               error "setquota -$ug $id on $mntpt failed"
+       do_facet $SINGLEAGT repquota -${ug}vs $mntpt
+       do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed"
+       setup_pcc_mapping $SINGLEAGT \
+               "${ug}id={$id}\ roid=$HSM_ARCHIVE_NUMBER\ pccro=1"
+       do_facet $SINGLEAGT $LCTL pcc list $MOUNT
+
+       dd if=/dev/zero of=$file bs=1M count=2 ||
+               error "dd write $file failed"
+       chown $RUNAS_ID:$RUNAS_GID $file ||
+               error "chown $RUNAS_ID:$RUNAS_GID $file failed"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1M count=2 ||
+               error "dd read $file failed"
+       check_lpcc_state $file "readonly"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=1M count=5 ||
+               error "dd write $file failed"
+       check_lpcc_state $file "none"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1M count=5 ||
+               error "dd read $file failed"
+       do_facet $SINGLEAGT $LFS pcc state $file
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "PCC-RO attach $file failed"
+
+       do_facet $SINGLEAGT $LFS pcc detach $file || error "detach $file failed"
+}
+
+test_10e() {
+       test_usrgrp_edquot "u"
+}
+run_test 10e "Tolerate -EDQUOT failure when auto PCC-RO attach with user quota"
+
+test_10f() {
+       test_usrgrp_edquot "g"
+}
+run_test 10f "Tolerate -EDQUOT failure when auto PCC-RO attach with group quota"
+
 test_11() {
        local loopfile="$TMP/$tfile"
        local mntpt="/mnt/pcc.$tdir"
@@ -2511,6 +2567,80 @@ test_28() {
 }
 run_test 28 "RW-PCC attach should fail when the file has cluster-wide openers"
 
+test_29a() {
+       local project_id=100
+       local agt_facet=$SINGLEAGT
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tdir/$tfile
+       local file2=$DIR2/$tdir/$tfile
+
+       $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro ||
+               skip "Server does not support PCC-RO"
+
+       is_project_quota_supported || skip "project quota is not supported"
+
+       enable_project_quota
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={$project_id}\ rwid=$HSM_ARCHIVE_NUMBER\ pccro=1"
+       $LCTL pcc list $MOUNT
+
+       do_facet $SINGLEAGT mkdir -p $DIR/$tdir ||
+               error "mkdir $DIR/$tdir failed"
+       do_facet $SINGLEAGT "echo -n ro_uptodate > $file" ||
+               error "failed to write $file"
+       check_lpcc_state $file "none"
+       $LFS project -sp $project_id $file ||
+               error "failed to set project for $file"
+       $LFS project -d $file
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "ro_uptodate"
+
+       echo -n Update_ro_data > $file2
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "Update_ro_data"
+
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "failed to detach $file"
+}
+run_test 29a "Auto readonly caching on RO-PCC backend for O_RDONLY open"
+
+test_29b() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/myfile.dat
+
+       $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro ||
+               skip "Server does not support PCC-RO"
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed"
+       setup_pcc_mapping $SINGLEAGT \
+               "fname={*.dat}\ roid=$HSM_ARCHIVE_NUMBER\ pccro=1"
+       do_facet $SINGLEAGT $LCTL pcc list $MOUNT
+
+       do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=4k count=1 ||
+               error "Write $file failed"
+       do_facet $SINGLEAGT dd if=$file of=/dev/null bs=4k count=1 ||
+               error "Read $file failed"
+       do_facet $SINGLEAGT $LFS pcc state $file
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=4k count=1 ||
+               error "Write $file failed"
+       sysctl vm.drop_caches=3
+       do_facet $SINGLEAGT dd if=$file of=/dev/null bs=4k count=1 ||
+               error "Read $file failed"
+       do_facet $SINGLEAGT $LFS pcc state $file
+       check_lpcc_state $file "readonly"
+
+       do_facet $SINGLEAGT $LFS pcc detach $file || error "detach $file failed"
+}
+run_test 29b "Auto PCC-RO attach in atomic_open"
+
 #test 101: containers and PCC
 #LU-15170: Test mount namespaces with PCC
 #This tests the cases where the PCC mount is not present in the container by