From f3be560031cc7022a22f4c661f94ac7aa6f617f1 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Wed, 8 Dec 2021 15:19:31 -0500 Subject: [PATCH] LU-15170 llite: Switch pcc to lookup_one_len Using kern_path to lookup files in the PCC cache means we are subject to user namespaces, so the PCC volume must be mapped in to a container or the cached files cannot be found. One solution is to switch to using lookup_one_len - this is what the code which *creates* PCC files does. This manually walks the path from the root, which avoids namespace issues. This is appropriate because PCC is kernel functionality - the user should not be able to directly access the volume, but it should be accessible as a cache. Signed-off-by: Patrick Farrell Change-Id: Idd15574ace29543bed1a9937cb35404781714791 Reviewed-on: https://review.whamcloud.com/45436 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Yingjin Qian Reviewed-by: Oleg Drokin --- lustre/llite/pcc.c | 122 ++++++++++++++++++++++++++++++------------- lustre/tests/sanity-pcc.sh | 127 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 210 insertions(+), 39 deletions(-) diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index ab451c4..f60ab53 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -1057,7 +1057,7 @@ void pcc_inode_free(struct inode *inode) * reduce overhead: * (fid->f_oid >> 16 & oxFFFF)/FID */ -#define MAX_PCC_DATABASE_PATH (6 * 5 + FID_NOBRACE_LEN + 1) +#define PCC_DATASET_MAX_PATH (6 * 5 + FID_NOBRACE_LEN + 1) static int pcc_fid2dataset_path(char *buf, int sz, struct lu_fid *fid) { return scnprintf(buf, sz, "%04x/%04x/%04x/%04x/%04x/%04x/" @@ -1137,21 +1137,6 @@ static int pcc_get_layout_info(struct inode *inode, struct cl_layout *clt) RETURN(rc < 0 ? rc : 0); } -static int pcc_fid2dataset_fullpath(char *buf, int sz, struct lu_fid *fid, - struct pcc_dataset *dataset) -{ - return scnprintf(buf, sz, "%s/%04x/%04x/%04x/%04x/%04x/%04x/" - DFID_NOBRACE, - dataset->pccd_pathname, - (fid)->f_oid & 0xFFFF, - (fid)->f_oid >> 16 & 0xFFFF, - (unsigned int)((fid)->f_seq & 0xFFFF), - (unsigned int)((fid)->f_seq >> 16 & 0xFFFF), - (unsigned int)((fid)->f_seq >> 32 & 0xFFFF), - (unsigned int)((fid)->f_seq >> 48 & 0xFFFF), - PFID(fid)); -} - /* Must be called with pcci->pcci_lock held */ static void pcc_inode_attach_init(struct pcc_dataset *dataset, struct pcc_inode *pcci, @@ -1198,6 +1183,72 @@ static inline bool pcc_inode_has_layout(struct pcc_inode *pcci) return pcci->pcci_layout_gen != CL_LAYOUT_GEN_NONE; } +static struct dentry *pcc_lookup(struct dentry *base, char *pathname) +{ + char *ptr = NULL, *component; + struct dentry *parent; + struct dentry *child = ERR_PTR(-ENOENT); + + ptr = pathname; + + /* move past any initial '/' to the start of the first path component*/ + while (*ptr == '/') + ptr++; + + /* store the start of the first path component */ + component = ptr; + + parent = dget(base); + while (ptr) { + /* find the start of the next component - if we don't find it, + * the current component is the last component + */ + ptr = strchr(ptr, '/'); + /* put a NUL char in place of the '/' before the next compnent + * so we can treat this component as a string; note the full + * path string is NUL terminated to this is not needed for the + * last component + */ + if (ptr) + *ptr = '\0'; + + /* look up the current component */ + inode_lock(parent->d_inode); + child = lookup_one_len(component, parent, strlen(component)); + inode_unlock(parent->d_inode); + + /* repair the path string: put '/' back in place of the NUL */ + if (ptr) + *ptr = '/'; + + dput(parent); + + if (IS_ERR_OR_NULL(child)) + break; + + /* we may find a cached negative dentry */ + if (!d_is_positive(child)) { + dput(child); + child = NULL; + break; + } + + /* descend in to the next level of the path */ + parent = child; + + /* move the pointer past the '/' to the next component */ + if (ptr) + ptr++; + component = ptr; + } + + /* NULL child means we didn't find anything */ + if (!child) + child = ERR_PTR(-ENOENT); + + return child; +} + static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, enum lu_pcc_type type, struct pcc_dataset *dataset, @@ -1206,9 +1257,8 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, struct ll_inode_info *lli = ll_i2info(inode); struct pcc_inode *pcci = lli->lli_pcc_inode; const struct cred *old_cred; - struct dentry *pcc_dentry; - struct path path; - char *pathname; + struct dentry *pcc_dentry = NULL; + char pathname[PCC_DATASET_MAX_PATH]; __u32 pcc_gen; int rc; @@ -1218,24 +1268,25 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, !(dataset->pccd_flags & PCC_DATASET_RWPCC)) RETURN(0); - OBD_ALLOC(pathname, PATH_MAX); - if (pathname == NULL) - RETURN(-ENOMEM); - - pcc_fid2dataset_fullpath(pathname, PATH_MAX, &lli->lli_fid, dataset); + rc = pcc_fid2dataset_path(pathname, PCC_DATASET_MAX_PATH, + &lli->lli_fid); old_cred = override_creds(pcc_super_cred(inode->i_sb)); - rc = kern_path(pathname, LOOKUP_FOLLOW, &path); - if (rc) + pcc_dentry = pcc_lookup(dataset->pccd_path.dentry, pathname); + if (IS_ERR(pcc_dentry)) { + rc = PTR_ERR(pcc_dentry); + CDEBUG(D_CACHE, "%s: path lookup error on "DFID":%s: rc = %d\n", + ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid), + pathname, rc); /* ignore this error */ GOTO(out, rc = 0); + } - pcc_dentry = path.dentry; rc = ll_vfs_getxattr(pcc_dentry, pcc_dentry->d_inode, pcc_xattr_layout, &pcc_gen, sizeof(pcc_gen)); if (rc < 0) /* ignore this error */ - GOTO(out_put_path, rc = 0); + GOTO(out_put_pcc_dentry, rc = 0); rc = 0; /* The file is still valid cached in PCC, attach it immediately. */ @@ -1245,7 +1296,7 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, if (!pcci) { OBD_SLAB_ALLOC_PTR_GFP(pcci, pcc_inode_slab, GFP_NOFS); if (pcci == NULL) - GOTO(out_put_path, rc = -ENOMEM); + GOTO(out_put_pcc_dentry, rc = -ENOMEM); pcc_inode_init(pcci, lli); dget(pcc_dentry); @@ -1267,11 +1318,10 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen, pcc_layout_gen_set(pcci, gen); *cached = true; } -out_put_path: - path_put(&path); +out_put_pcc_dentry: + dput(pcc_dentry); out: revert_creds(old_cred); - OBD_FREE(pathname, PATH_MAX); RETURN(rc); } @@ -2186,11 +2236,11 @@ static int __pcc_inode_create(struct pcc_dataset *dataset, struct dentry *child; int rc = 0; - OBD_ALLOC(path, MAX_PCC_DATABASE_PATH); + OBD_ALLOC(path, PCC_DATASET_MAX_PATH); if (path == NULL) return -ENOMEM; - pcc_fid2dataset_path(path, MAX_PCC_DATABASE_PATH, fid); + pcc_fid2dataset_path(path, PCC_DATASET_MAX_PATH, fid); base = pcc_mkdir_p(dataset->pccd_path.dentry, path, 0); if (IS_ERR(base)) { @@ -2198,7 +2248,7 @@ static int __pcc_inode_create(struct pcc_dataset *dataset, GOTO(out, rc); } - snprintf(path, MAX_PCC_DATABASE_PATH, DFID_NOBRACE, PFID(fid)); + snprintf(path, PCC_DATASET_MAX_PATH, DFID_NOBRACE, PFID(fid)); child = pcc_create(base, path, 0); if (IS_ERR(child)) { rc = PTR_ERR(child); @@ -2209,7 +2259,7 @@ static int __pcc_inode_create(struct pcc_dataset *dataset, out_base: dput(base); out: - OBD_FREE(path, MAX_PCC_DATABASE_PATH); + OBD_FREE(path, PCC_DATASET_MAX_PATH); return rc; } diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index 36c0714..53da75b 100644 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -91,10 +91,12 @@ check_file_data() local client="$1" local path="$2" local expected_data="$3" + local pid=$4 - path_data=$(do_facet $client cat $path) - [[ "x$path_data" == "x$expected_data" ]] || error \ - "expected $path: $expected_data, got: $path_data" + # if $pid is set, then run command within namespace for that process + path_data=$(do_facet $client ${pid:+nsenter -t $pid -U -m} cat $path) + [[ "x$path_data" == "x$expected_data" ]] || + error "expected $path: $expected_data, got: $path_data" } check_lpcc_data() @@ -1489,6 +1491,125 @@ test_20() { } run_test 20 "Auto attach works after the inode was once evicted from cache" +#test 101: containers and PCC +#LU-15170: Test mount namespaces with PCC +#This tests the cases where the PCC mount is not present in the container by +#creating a mount namespace without the PCC mount in it (this is probably the +#standard config for most containers) +test_101a() { + local loopfile="$TMP/$tfile" + local mntpt="/mnt/pcc.$tdir" + local hsm_root="$mntpt/$tdir" + local file=$DIR/$tdir/$tfile + + # Some kernels such as RHEL7 default to 0 user namespaces + local maxuserns=$(do_facet $SINGLEAGT cat /proc/sys/user/max_user_namespaces) + do_facet $SINGLEAGT "echo 10 > /proc/sys/user/max_user_namespaces" + stack_trap "do_facet $SINGLEAGT 'echo $maxuserns > /proc/sys/user/max_user_namespaces'" + + echo "creating user namespace for $RUNAS_ID" + # Create a mount and user namespace with this command, and leave the + # process running so we can do the rest of our steps + do_facet $SINGLEAGT $RUNAS unshare -Um sleep 600 & + # Let the child start... + sleep 0.2 + # Get the sleep PID so we can find its namespace and kill it later + PID=$(do_facet $SINGLEAGT pgrep sleep) + stack_trap "do_facet $SINGLEAGT kill -9 $PID" EXIT + echo "Created NS: child (sleep) pid $PID" + # Map 'RUNAS' to root in the namespace, so it has rights to do whatever + # This is handled by '-r' in unshare in newer versions + do_facet $SINGLEAGT $RUNAS newuidmap $PID 0 $RUNAS_ID 1 || + error "could not map uid $RUNAS_ID to root in namespace" + do_facet $SINGLEAGT $RUNAS newgidmap $PID 0 $RUNAS_GID 1 || + error "could not map gid $RUNAS_GID to root in namespace" + + # Create PCC after creating namespace; namespace will not have PCC + # mount + setup_loopdev $SINGLEAGT $loopfile $mntpt 50 + + # Create a temp file inside the PCC mount to verify mount namespace + do_facet $SINGLEAGT touch $mntpt/$tfile.tmp + stack_trap "do_facet $SINGLEAGT rm -f $mntpt/$tfile.tmp" EXIT + echo "Check for temp file in PCC mount" + do_facet $SINGLEAGT test -f $mntpt/$tfile.tmp || + error "Should see $mntpt/$tfile.tmp" + echo "Check for temp file in PCC mount from inside namespace" + do_facet $SINGLEAGT nsenter -t $PID -U -m test -f $mntpt/$tfile.tmp && + error "Should not see $mntpt/$tfile.tmp from namespace" + rm -f $mntpt/$tfile.tmp + + # Finish PCC setup + copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER" + setup_pcc_mapping $SINGLEAGT "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER" + + mkdir_on_mdt0 $DIR/$tdir || error "mkdir $DIR/$tdir failed" + chmod 777 $DIR/$tdir || error "chmod 777 $DIR/$tdir failed" + + echo "Verify open attach from inside mount namespace" + do_facet $SINGLEAGT nsenter -t $PID -U -m dd if=/dev/zero of=$file bs=1024 count=1 || + error "failed to dd write to $file" + do_facet $SINGLEAGT nsenter -t $PID -U -m $LFS pcc attach \ + -i $HSM_ARCHIVE_NUMBER $file || error "cannot attach $file" + do_facet $SINGLEAGT nsenter -t $PID -U -m $LFS pcc state $file + + check_lpcc_state $file "readwrite" $SINGLEAGT "$RUNAS" + # Revoke the layout lock, the PCC-cached file will be + # detached automatically. + do_facet $SINGLEAGT $LCTL set_param ldlm.namespaces.*mdc*.lru_size=clear + check_lpcc_state $file "readwrite" $SINGLEAGT "$RUNAS" + # Detach the file but keep the cache, as the file layout generation + # is not changed, so the file is still valid cached in PCC, and can + # be reused from PCC cache directly. + do_facet $SINGLEAGT nsenter -t $PID -U -m $LFS pcc detach -k $file || + error "PCC detach $file failed" + check_lpcc_state $file "readwrite" $SINGLEAGT "$RUNAS" + do_facet $SINGLEAGT nsenter -t $PID -U -m $LFS pcc detach $file || + error "PCC detach $file failed" + do_facet $SINGLEAGT nsenter -t $PID -U -m dd if=/dev/zero of=$file bs=1024 count=1 || + error "failed to dd write to $file" + rm -f $file || error "rm $file failed" + + echo "Verify auto attach at open from inside NS for RW-PCC" + # nsenter has strange behavior with echo, which means we have to place + # this in a script so we can use sh, otherwise it doesn't execute echo + # in the namespace + # NB: using /bin/echo instead of the shell built in does not help + echo "echo -n autoattach_data > $file" > $DIR/$tdir/$tfile.shell + # File is owned by root, make it accessible to RUNAS user + chmod a+rw $DIR/$tdir/$tfile.shell + stack_trap 'rm -f $DIR/$tdir/$tfile.shell' EXIT + do_facet $SINGLEAGT nsenter -t $PID -U -m "sh $DIR/$tdir/$tfile.shell" + do_facet $SINGLEAGT nsenter -t $PID -U -m $LFS pcc attach -i $HSM_ARCHIVE_NUMBER \ + $file || error "RW-PCC attach $file failed" + check_lpcc_state $file "readwrite" + + # Revoke the layout lock, the PCC-cached file will be + # detached automatically. + do_facet $SINGLEAGT $LCTL set_param ldlm.namespaces.*mdc*.lru_size=clear + check_file_data $SINGLEAGT $file "autoattach_data" $PID + check_lpcc_state $file "readwrite" + + # Detach the file with -k option, as the file layout generation + # is not changed, so the file is still valid cached in PCC, + # and can be reused from PCC cache directly. + do_facet $SINGLEAGT $LFS pcc detach -k $file || + error "RW-PCC detach $file failed" + check_lpcc_state $file "readwrite" + # HSM released exists archived status + check_hsm_flags $file "0x0000000d" + check_file_data $SINGLEAGT $file "autoattach_data" $PID + + # HSM restore the PCC cached file, the layout generation + # was changed, so the file can not be auto attached. + $LFS hsm_restore $file || error "failed to restore $file" + wait_request_state $(path2fid $file) RESTORE SUCCEED + check_lpcc_state $file "none" + # HSM exists archived status + check_hsm_flags $file "0x00000009" +} +run_test 101a "Test auto attach in mount namespace (simulated container)" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1