From 8bc4b26453fb9d3296475ab3b9c9f3a9bf905afc Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Sat, 3 Jun 2017 11:20:10 -0400 Subject: [PATCH] LU-8656 vvp: Add S_NOSEC support We must use the i_mutex to protect permission changes, which means we need to take it when we write to a file with the setuid or setgid bit set (as this removes those bits). LU-8025 attempted to use IS_NOSEC to check for this case, but did not actually add support for the S_NOSEC flag to Lustre. S_NOSEC was added in upstream kernel commit: 69b4573296469fd3f70cf7044693074980517067 But a key change requiring parallel filesystems to opt in with a superblock flag was added in: 9e1f1de02c2275d7172e18dc4e7c2065777611bf This patch adds the required support. Specifically, Lustre should set S_NOSEC correctly when an inode is created (ll_iget), but only for new inodes. Setting it for existing inodes requires taking the i_mutex, creating an unacceptable metadata performance impact in the lookup code. Existing inodes get S_NOSEC set either by a setattr call (see below), or by the first writer to write to the node, in file_remove_privs/file_remove_suid. So it's OK not to set S_NOSEC on all inodes in ll_iget. Also, Lustre must clear S_NOSEC when it gets a metadata update because another node could have changed permissions. i_flags is already cleared in ll_update_inode, but we would prefer to have S_NOSEC set whenever possible, so we want to re-do the check after the update. This requires holding the i_mutex to avoid a check/set race with permissions changes. We cannot easily take the i_mutex in ll_update_inode (it is called from too many places, some of which already hold the i_mutex). It is acceptable to sometimes not have S_NOSEC set (since occasionally taking the i_mutex when not needed is OK), so we look at getting it set most of the time. It looks like the primary concern is ll_md_setattr. The caller (ll_setattr_raw) takes the i_mutex before returning, so we do the relevant call there. This opens a window during which S_NOSEC is not set, but again, this merely creates a situation where we take the i_mutex unnecessarily, and should be rare in practice. Testing with multiple writers shows that we only very rarely attempt to take the i_mutex, so the performance impact is minimal. This commit also adds a test to verify the S_NOSEC/i_mutex behavior. Signed-off-by: Patrick Farrell Change-Id: If481366283ceaeb247e50cc4d536fb10e9c329ee Reviewed-on: https://review.whamcloud.com/22853 Tested-by: Jenkins Reviewed-by: James Simmons Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/lustre_compat.h | 7 +++++++ lustre/include/obd_support.h | 3 ++- lustre/llite/llite_lib.c | 18 ++++++++++++++++-- lustre/llite/namei.c | 1 + lustre/llite/vvp_io.c | 7 +++++++ lustre/tests/sanity.sh | 33 +++++++++++++++++++++++++++++++++ 6 files changed, 66 insertions(+), 3 deletions(-) diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index 498d27b..bdd3d78 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -462,6 +462,13 @@ static inline bool is_sxid(umode_t mode) #define IS_NOSEC(inode) (!is_sxid(inode->i_mode)) #endif +#ifndef MS_NOSEC +static inline void inode_has_no_xattr(struct inode *inode) +{ + return; +} +#endif + #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index b130242..0a3813b 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -532,7 +532,8 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LLITE_SETDIRSTRIPE_PAUSE 0x140b #define OBD_FAIL_LLITE_CREATE_NODE_PAUSE 0x140c #define OBD_FAIL_LLITE_PTASK_IO_FAIL 0x140d - +#define OBD_FAIL_LLITE_IMUTEX_SEC 0x140e +#define OBD_FAIL_LLITE_IMUTEX_NOSEC 0x140f #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index c3c149c..e739193 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -241,6 +241,13 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, if (sbi->ll_flags & LL_SBI_USER_XATTR) data->ocd_connect_flags |= OBD_CONNECT_XATTR; +#ifdef MS_NOSEC + /* Setting this indicates we correctly support S_NOSEC (See kernel + * commit 9e1f1de02c2275d7172e18dc4e7c2065777611bf) + */ + sb->s_flags |= MS_NOSEC; +#endif + if (sbi->ll_flags & LL_SBI_FLOCK) sbi->ll_fop = &ll_file_operations_flock; else if (sbi->ll_flags & LL_SBI_LOCALFLOCK) @@ -1636,6 +1643,12 @@ out: inode_lock(inode); if ((attr->ia_valid & ATTR_SIZE) && !hsm_import) inode_dio_wait(inode); + /* Once we've got the i_mutex, it's safe to set the S_NOSEC + * flag. ll_update_inode (called from ll_md_setattr), clears + * inode flags, so there is a gap where S_NOSEC is not set. + * This can cause a writer to take the i_mutex unnecessarily, + * but this is safe to do and should be rare. */ + inode_has_no_xattr(inode); } ll_stats_ops_tally(ll_i2sbi(inode), (attr->ia_valid & ATTR_SIZE) ? @@ -1838,6 +1851,9 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) lli->lli_ctime = body->mbo_ctime; } + /* Clear i_flags to remove S_NOSEC before permissions are updated */ + if (body->mbo_valid & OBD_MD_FLFLAGS) + inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags); if (body->mbo_valid & OBD_MD_FLMODE) inode->i_mode = (inode->i_mode & S_IFMT) | (body->mbo_mode & ~S_IFMT); @@ -1859,8 +1875,6 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid); if (body->mbo_valid & OBD_MD_FLPROJID) lli->lli_projid = body->mbo_projid; - if (body->mbo_valid & OBD_MD_FLFLAGS) - inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags); if (body->mbo_valid & OBD_MD_FLNLINK) set_nlink(inode, body->mbo_nlink); if (body->mbo_valid & OBD_MD_FLRDEV) diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 0d4b4b3..c8bfe98 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -136,6 +136,7 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, iput(inode); inode = ERR_PTR(rc); } else { + inode_has_no_xattr(inode); unlock_new_inode(inode); } } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) { diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 35a5c3d..a6c355a 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1056,6 +1056,13 @@ static int vvp_io_write_start(const struct lu_env *env, RETURN(-EFBIG); } + /* Tests to verify we take the i_mutex correctly */ + if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_IMUTEX_SEC) && !lock_inode) + RETURN(-EINVAL); + + if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_IMUTEX_NOSEC) && lock_inode) + RETURN(-EINVAL); + /* * When using the locked AIO function (generic_file_aio_write()) * testing has shown the inode mutex to be a limiting factor diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index c69aa4a..c120a2e 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -14982,6 +14982,39 @@ test_257() { } run_test 257 "xattr locks are not lost" +# Verify we take the i_mutex when security requires it +test_258a() { +#define OBD_FAIL_IMUTEX_SEC 0x141c + $LCTL set_param fail_loc=0x141c + touch $DIR/$tfile + chmod u+s $DIR/$tfile + chmod a+rwx $DIR/$tfile + $RUNAS dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 oflag=append + RC=$? + if [ $RC -ne 0 ]; then + error "error, failed to take i_mutex, rc=$?" + fi + rm -f $DIR/$tfile +} +run_test 258a + +# Verify we do NOT take the i_mutex in the normal case +test_258b() { +#define OBD_FAIL_IMUTEX_NOSEC 0x141d + $LCTL set_param fail_loc=0x141d + touch $DIR/$tfile + chmod a+rwx $DIR + chmod a+rw $DIR/$tfile + $RUNAS dd if=/dev/zero of=$DIR/$tfile bs=4k count=1 oflag=append + RC=$? + if [ $RC -ne 0 ]; then + error "error, took i_mutex unnecessarily, rc=$?" + fi + rm -f $DIR/$tfile + +} +run_test 258b "verify i_mutex security behavior" + test_260() { #define OBD_FAIL_MDC_CLOSE 0x806 $LCTL set_param fail_loc=0x80000806 -- 1.8.3.1