From aba68250a67a10104c534bd726f67b31a7f35692 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Wed, 14 Aug 2019 05:06:14 -0400 Subject: [PATCH] LU-9964 llite: prevent mulitple group locks The patch adds mutex for group lock enqueue. It also adds waiting of group lock users on a client side for a same node. This prevents mulitple locks on the same resource and fixes a bugs when two locks cover the same dirty pages. The patch adds test sanity 244b. It creates threads which opens file, takes group lock, writes data, puts group lock, closes. It recreates the problem when a client has two or more group locks for a single file. This leads to a wrong behaviour for a flush etc. osc_cache_writeback_range()) ASSERTION( hp == 0 && discard == 0 ) failed One more test for group lock with open file and fork. It checks that childs doesn't unlock file until the last close. Cray-bug-id: LUS-7232 Signed-off-by: Alexander Boyko Change-Id: Iaf65cf26219211b76daaa1a91eeb80092c328f2d Reviewed-on: https://review.whamcloud.com/35791 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Patrick Farrell Reviewed-by: Andriy Skulysh Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_request.c | 3 +- lustre/llite/file.c | 73 ++++++++++++++++++++++++++---------------- lustre/llite/llite_internal.h | 3 ++ lustre/llite/llite_lib.c | 3 ++ lustre/osc/osc_lock.c | 2 ++ lustre/tests/group_lock_test.c | 60 ++++++++++++++++++++++++++++++++++ lustre/tests/sanity.sh | 23 +++++++++++-- 7 files changed, 136 insertions(+), 31 deletions(-) diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index e2e194d..34b7d93 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -1008,7 +1008,8 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, lock->l_conn_export = exp; lock->l_export = NULL; lock->l_blocking_ast = einfo->ei_cb_bl; - lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL)); + lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL | + LDLM_FL_ATOMIC_CB)); lock->l_activity = ktime_get_real_seconds(); /* lock not sent to server yet */ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 81decaf..9e82a2c 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -2230,6 +2230,7 @@ out: RETURN(rc); } + static int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) { @@ -2245,18 +2246,28 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) RETURN(-EINVAL); } - if (ll_file_nolock(file)) - RETURN(-EOPNOTSUPP); + if (ll_file_nolock(file)) + RETURN(-EOPNOTSUPP); +retry: + if (file->f_flags & O_NONBLOCK) { + if (!mutex_trylock(&lli->lli_group_mutex)) + RETURN(-EAGAIN); + } else + mutex_lock(&lli->lli_group_mutex); - spin_lock(&lli->lli_lock); if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { CWARN("group lock already existed with gid %lu\n", fd->fd_grouplock.lg_gid); - spin_unlock(&lli->lli_lock); - RETURN(-EINVAL); + GOTO(out, rc = -EINVAL); + } + if (arg != lli->lli_group_gid && lli->lli_group_users != 0) { + if (file->f_flags & O_NONBLOCK) + GOTO(out, rc = -EAGAIN); + mutex_unlock(&lli->lli_group_mutex); + wait_var_event(&lli->lli_group_users, !lli->lli_group_users); + GOTO(retry, rc = 0); } LASSERT(fd->fd_grouplock.lg_lock == NULL); - spin_unlock(&lli->lli_lock); /** * XXX: group lock needs to protect all OST objects while PFL @@ -2276,7 +2287,7 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) env = cl_env_get(&refcheck); if (IS_ERR(env)) - RETURN(PTR_ERR(env)); + GOTO(out, rc = PTR_ERR(env)); rc = cl_object_layout_get(env, obj, &cl); if (!rc && cl.cl_is_composite) @@ -2285,28 +2296,26 @@ ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg) cl_env_put(env, &refcheck); if (rc) - RETURN(rc); + GOTO(out, rc); } rc = cl_get_grouplock(ll_i2info(inode)->lli_clob, arg, (file->f_flags & O_NONBLOCK), &grouplock); - if (rc) - RETURN(rc); - spin_lock(&lli->lli_lock); - if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { - spin_unlock(&lli->lli_lock); - CERROR("another thread just won the race\n"); - cl_put_grouplock(&grouplock); - RETURN(-EINVAL); - } + if (rc) + GOTO(out, rc); fd->fd_flags |= LL_FILE_GROUP_LOCKED; fd->fd_grouplock = grouplock; - spin_unlock(&lli->lli_lock); + if (lli->lli_group_users == 0) + lli->lli_group_gid = grouplock.lg_gid; + lli->lli_group_users++; CDEBUG(D_INFO, "group lock %lu obtained\n", arg); - RETURN(0); +out: + mutex_unlock(&lli->lli_group_mutex); + + RETURN(rc); } static int ll_put_grouplock(struct inode *inode, struct file *file, @@ -2315,32 +2324,40 @@ static int ll_put_grouplock(struct inode *inode, struct file *file, struct ll_inode_info *lli = ll_i2info(inode); struct ll_file_data *fd = LUSTRE_FPRIVATE(file); struct ll_grouplock grouplock; + int rc; ENTRY; - spin_lock(&lli->lli_lock); + mutex_lock(&lli->lli_group_mutex); if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - spin_unlock(&lli->lli_lock); - CWARN("no group lock held\n"); - RETURN(-EINVAL); - } + CWARN("no group lock held\n"); + GOTO(out, rc = -EINVAL); + } LASSERT(fd->fd_grouplock.lg_lock != NULL); if (fd->fd_grouplock.lg_gid != arg) { CWARN("group lock %lu doesn't match current id %lu\n", arg, fd->fd_grouplock.lg_gid); - spin_unlock(&lli->lli_lock); - RETURN(-EINVAL); + GOTO(out, rc = -EINVAL); } grouplock = fd->fd_grouplock; memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock)); fd->fd_flags &= ~LL_FILE_GROUP_LOCKED; - spin_unlock(&lli->lli_lock); cl_put_grouplock(&grouplock); + + lli->lli_group_users--; + if (lli->lli_group_users == 0) { + lli->lli_group_gid = 0; + wake_up_var(&lli->lli_group_users); + } CDEBUG(D_INFO, "group lock %lu released\n", arg); - RETURN(0); + GOTO(out, rc = 0); +out: + mutex_unlock(&lli->lli_group_mutex); + + RETURN(rc); } /** diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 9ac3f5d..1ae04bb 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -214,6 +214,9 @@ struct ll_inode_info { struct mutex lli_pcc_lock; enum lu_pcc_state_flags lli_pcc_state; struct pcc_inode *lli_pcc_inode; + struct mutex lli_group_mutex; + __u64 lli_group_users; + unsigned long lli_group_gid; }; }; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 187d57e..0129b1c 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1006,6 +1006,9 @@ void ll_lli_init(struct ll_inode_info *lli) mutex_init(&lli->lli_pcc_lock); lli->lli_pcc_state = PCC_STATE_FL_NONE; lli->lli_pcc_inode = NULL; + mutex_init(&lli->lli_group_mutex); + lli->lli_group_users = 0; + lli->lli_group_gid = 0; } mutex_init(&lli->lli_layout_mutex); memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid)); diff --git a/lustre/osc/osc_lock.c b/lustre/osc/osc_lock.c index 10849d0..90811af 100644 --- a/lustre/osc/osc_lock.c +++ b/lustre/osc/osc_lock.c @@ -1200,6 +1200,8 @@ int osc_lock_init(const struct lu_env *env, oscl->ols_flags = osc_enq2ldlm_flags(enqflags); oscl->ols_speculative = !!(enqflags & CEF_SPECULATIVE); + if (lock->cll_descr.cld_mode == CLM_GROUP) + oscl->ols_flags |= LDLM_FL_ATOMIC_CB; if (oscl->ols_flags & LDLM_FL_HAS_INTENT) { oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED; diff --git a/lustre/tests/group_lock_test.c b/lustre/tests/group_lock_test.c index 35bde28..f20010d 100644 --- a/lustre/tests/group_lock_test.c +++ b/lustre/tests/group_lock_test.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include #include @@ -420,6 +422,63 @@ static void test30(void) close(fd2); } +/* Test locking between several fds. */ +static void test40(void) +{ + int rc; + int fd; + int gid; + char buf[10000]; + int i, j, threads = 40; + + /* Create the test file. */ + fd = open(mainpath, O_RDWR | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); + ASSERTF(fd >= 0, "creat failed for '%s': %s", + mainpath, strerror(errno)); + /* Lock */ + gid = 1234; + rc = ioctl(fd, LL_IOC_GROUP_LOCK, gid); + ASSERTF(rc == 0, "cannot lock '%s' gid %d: %s", mainpath, gid, + strerror(errno)); + + for (i = 0; i < threads; i++) { + rc = fork(); + ASSERTF(rc >= 0, "fork failed %s", strerror(errno)); + + if (rc == 0) { + struct iovec io = { .iov_base = buf, + .iov_len = sizeof(buf)}; + + /* writing to a fixed offset */ + memset(buf, i, sizeof(buf)); + rc = pwritev(fd, &io, 1, sizeof(buf) * i); + ASSERTF(rc == sizeof(buf), "write failed for '%s' block %d: %s", + mainpath, i, strerror(errno)); + + close(fd); + exit(0); + } + } + + while ((i = wait(&rc)) > 0); + + /* Check data */ + for (i = 0; i < threads; i++) { + rc = read(fd, buf, sizeof(buf)); + ASSERTF(rc == sizeof(buf), "read failed for '%s': %s", + mainpath, strerror(errno)); + for (j = 0; j < rc; j++) + ASSERTF(i == buf[j], "wrong data at off %d %d != %d", + j, i, buf[j]); + } + + /* close unlock group lock */ + rc = close(fd); + ASSERTF(rc == 0, "close failed '%s': %s", + mainpath, strerror(errno)); +} + static void usage(char *prog) { fprintf(stderr, "Usage: %s [-d lustre_dir]\n", prog); @@ -476,6 +535,7 @@ int main(int argc, char *argv[]) PERFORM(test12); PERFORM(test20); PERFORM(test30); + PERFORM(test40); return EXIT_SUCCESS; } diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 67f6563..68ca2fc 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -16841,7 +16841,7 @@ test_243() } run_test 243 "various group lock tests" -test_244() +test_244a() { test_mkdir $DIR/$tdir dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=35 @@ -16849,7 +16849,26 @@ test_244() error "sendfile+grouplock failed" rm -rf $DIR/$tdir } -run_test 244 "sendfile with group lock tests" +run_test 244a "sendfile with group lock tests" + +test_244b() +{ + [ $PARALLEL == "yes" ] && skip "skip parallel run" && return + + local threads=50 + local size=$((1024*1024)) + + test_mkdir $DIR/$tdir + for i in $(seq 1 $threads); do + local file=$DIR/$tdir/file_$((i / 10)) + $MULTIOP $file OG1234w$size_$((i % 3))w$size_$((i % 4))g1234c & + local pids[$i]=$! + done + for i in $(seq 1 $threads); do + wait ${pids[$i]} + done +} +run_test 244b "multi-threaded write with group lock" test_245() { local flagname="multi_mod_rpcs" -- 1.8.3.1