From 72c1f7095203cc1badadf581c66f9546476438ab Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Mon, 16 Aug 2021 22:54:59 -0400 Subject: [PATCH] LU-14949 llite: Always do lookup on ENOENT in open When there is no valid dentry found for a file we want to open, we perform a full lookup, which goes to the server and looks up the file by name. When we find an existing dentry in cache *but the file is not open on the node*, we do not do a full lookup. We move directly to opening the file. When we open files, we use the FID of the file. The problem occurs when a new file is renamed *over* the file we were trying to open. This removes the FID we are trying to open, but the file *name* userspace called open() on is still present. In this case, we will return ENOENT, even though there is a file matching the name used in the open() call. The solution is when we get an ENOENT on open (indicating our open raced with an unlink), we always send ESTALE back to the VFS, which restarts the open and forces a lookup to the server (by forcing Lustre to consider the dentry invalid, see comments in ll_intent_file_open and code in ll_revalidate_dentry). This causes a lookup by name, which will correctly handle the rename, allowing the open to proceed normally. This should only generate extra retries in the case where a positive dentry exists on the client but the file has been removed on the server, ie, open racing with unlink. This should hopefully be rare. Signed-off-by: Patrick Farrell Signed-off-by: Oleg Drokin Change-Id: If9157cac901c81d6ad3f15997d419d3907fe88b8 Reviewed-on: https://review.whamcloud.com/44675 Reviewed-by: Hongchao Zhang Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo --- lustre/include/obd_support.h | 1 + lustre/llite/file.c | 22 +++++++++++++++------- lustre/tests/sanity.sh | 22 ++++++++++++++++++++++ lustre/tests/sanityn.sh | 24 ++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index e813f28..5852bf2 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -600,6 +600,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE2 0x1416 #define OBD_FAIL_LLITE_RACE_MOUNT 0x1417 #define OBD_FAIL_LLITE_PAGE_ALLOC 0x1418 +#define OBD_FAIL_LLITE_OPEN_DELAY 0x1419 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 934be16..0c08757 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -636,6 +636,8 @@ retry: op_data->op_data = lmm; op_data->op_data_size = lmmsize; + OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_OPEN_DELAY, cfs_fail_val); + rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req, &ll_md_blocking_ast, 0); kfree(name); @@ -687,14 +689,20 @@ out: ptlrpc_req_finished(req); ll_intent_drop_lock(itp); - /* We did open by fid, but by the time we got to the server, - * the object disappeared. If this is a create, we cannot really - * tell the userspace that the file it was trying to create - * does not exist. Instead let's return -ESTALE, and the VFS will - * retry the create with LOOKUP_REVAL that we are going to catch - * in ll_revalidate_dentry() and use lookup then. + /* We did open by fid, but by the time we got to the server, the object + * disappeared. This is possible if the object was unlinked, but it's + * also possible if the object was unlinked by a rename. In the case + * of an object renamed over our existing one, we can't fail this open. + * O_CREAT also goes through this path if we had an existing dentry, + * and it's obviously wrong to return ENOENT for O_CREAT. + * + * Instead let's return -ESTALE, and the VFS will retry the open with + * LOOKUP_REVAL, which we catch in ll_revalidate_dentry and fail to + * revalidate, causing a lookup. This causes extra lookups in the case + * where we had a dentry in cache but the file is being unlinked and we + * lose the race with unlink, but this should be very rare. */ - if (rc == -ENOENT && itp->it_op & IT_CREAT) + if (rc == -ENOENT) rc = -ESTALE; RETURN(rc); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b000065..c0aac29 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -3721,6 +3721,28 @@ test_31q() { } run_test 31q "create striped directory on specific MDTs" +#LU-14949 +test_31r() { + touch $DIR/$tfile.target + touch $DIR/$tfile.source + + #OBD_FAIL_LLITE_OPEN_DELAY 0x1419 + $LCTL set_param fail_loc=0x1419 fail_val=3 + cat $DIR/$tfile.target & + CATPID=$! + + # Guarantee open is waiting before we get here + sleep 1 + mv $DIR/$tfile.source $DIR/$tfile.target + + wait $CATPID + RC=$? + if [[ $RC -ne 0 ]]; then + error "open with cat failed, rc=$RC" + fi +} +run_test 31r "open-rename(replace) race" + cleanup_test32_mount() { local rc=0 trap 0 diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index d433294..636ef021 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -866,6 +866,30 @@ test_31b() { } run_test 31b "voluntary OST cancel / blocking ast race==============" +#LU-14949 - multi-client version of the test 31r in sanity. +test_31r() { + touch $DIR/$tfile.target + touch $DIR/$tfile.source + + ls -l $DIR/$tfile.target # cache it for sure + + #OBD_FAIL_LLITE_OPEN_DELAY 0x1419 + $LCTL set_param fail_loc=0x1419 fail_val=3 + cat $DIR/$tfile.target & + CATPID=$! + + # Guarantee open is waiting before we get here + sleep 1 + mv $DIR2/$tfile.source $DIR2/$tfile.target + + wait $CATPID + RC=$? + if [[ $RC -ne 0 ]]; then + error "open with cat failed, rc=$RC" + fi +} +run_test 31r "open-rename(replace) race" + test_32b() { # bug 11270 remote_ost_nodsh && skip "remote OST with nodsh" && return -- 1.8.3.1