From 204d19c3f378486ec4f01dc42a5fe769e996f7a5 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Mon, 23 Aug 2021 13:32:07 -0400 Subject: [PATCH] LU-14949 llite: Always do lookup on ENOENT in open When there is no valid dentry found for a file we want to open, we perform a full lookup, which goes to the server and looks up the file by name. When we find an existing dentry in cache *but the file is not open on the node*, we do not do a full lookup. We move directly to opening the file. When we open files, we use the FID of the file. The problem occurs when a new file is renamed *over* the file we were trying to open. This removes the FID we are trying to open, but the file *name* userspace called open() on is still present. In this case, we will return ENOENT, even though there is a file matching the name used in the open() call. The solution is when we get an ENOENT on open (indicating our open raced with an unlink), we always send ESTALE back to the VFS, which restarts the open and forces a lookup to the server (by forcing Lustre to consider the dentry invalid, see comments in ll_intent_file_open and code in ll_revalidate_dentry). This causes a lookup by name, which will correctly handle the rename, allowing the open to proceed normally. This should only generate extra retries in the case where a positive dentry exists on the client but the file has been removed on the server, ie, open racing with unlink. This should hopefully be rare. Lustre-change: https://review.whamcloud.com/44675 Lustre-commit: 72c1f7095203cc1badadf581c66f9546476438ab Signed-off-by: Patrick Farrell Change-Id: Ice19f4bdbea9a2cbeb337336f7e7098afa6b4be3 Reviewed-by: Andreas Dilger Reviewed-by: Li Xi Reviewed-on: https://review.whamcloud.com/44949 Tested-by: jenkins Tested-by: Andreas Dilger --- lustre/include/obd_support.h | 1 + lustre/llite/file.c | 22 +++++++++++++++------- lustre/tests/sanity.sh | 22 ++++++++++++++++++++++ lustre/tests/sanityn.sh | 24 ++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 57892a1..dbe9b51 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -594,6 +594,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_LLITE_SHORT_COMMIT 0x1415 #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE2 0x1416 #define OBD_FAIL_LLITE_PAGE_ALLOC 0x1418 +#define OBD_FAIL_LLITE_OPEN_DELAY 0x1419 #define OBD_FAIL_FID_INDIR 0x1501 #define OBD_FAIL_FID_INLMA 0x1502 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index b57c179..804b58f 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -637,6 +637,8 @@ retry: op_data->op_data = lmm; op_data->op_data_size = lmmsize; + OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_OPEN_DELAY, cfs_fail_val); + rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req, &ll_md_blocking_ast, 0); kfree(name); @@ -688,14 +690,20 @@ out: ptlrpc_req_finished(req); ll_intent_drop_lock(itp); - /* We did open by fid, but by the time we got to the server, - * the object disappeared. If this is a create, we cannot really - * tell the userspace that the file it was trying to create - * does not exist. Instead let's return -ESTALE, and the VFS will - * retry the create with LOOKUP_REVAL that we are going to catch - * in ll_revalidate_dentry() and use lookup then. + /* We did open by fid, but by the time we got to the server, the object + * disappeared. This is possible if the object was unlinked, but it's + * also possible if the object was unlinked by a rename. In the case + * of an object renamed over our existing one, we can't fail this open. + * O_CREAT also goes through this path if we had an existing dentry, + * and it's obviously wrong to return ENOENT for O_CREAT. + * + * Instead let's return -ESTALE, and the VFS will retry the open with + * LOOKUP_REVAL, which we catch in ll_revalidate_dentry and fail to + * revalidate, causing a lookup. This causes extra lookups in the case + * where we had a dentry in cache but the file is being unlinked and we + * lose the race with unlink, but this should be very rare. */ - if (rc == -ENOENT && itp->it_op & IT_CREAT) + if (rc == -ENOENT) rc = -ESTALE; RETURN(rc); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 1354800..032ad91 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -3453,6 +3453,28 @@ test_31q() { } run_test 31q "create striped directory on specific MDTs" +#LU-14949 +test_31r() { + touch $DIR/$tfile.target + touch $DIR/$tfile.source + + #OBD_FAIL_LLITE_OPEN_DELAY 0x1419 + $LCTL set_param fail_loc=0x1419 fail_val=3 + cat $DIR/$tfile.target & + CATPID=$! + + # Guarantee open is waiting before we get here + sleep 1 + mv $DIR/$tfile.source $DIR/$tfile.target + + wait $CATPID + RC=$? + if [[ $RC -ne 0 ]]; then + error "open with cat failed, rc=$RC" + fi +} +run_test 31r "open-rename(replace) race" + cleanup_test32_mount() { local rc=0 trap 0 diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 23a8471..8a31bae 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -858,6 +858,30 @@ test_31b() { } run_test 31b "voluntary OST cancel / blocking ast race==============" +#LU-14949 - multi-client version of the test 31r in sanity. +test_31r() { + touch $DIR/$tfile.target + touch $DIR/$tfile.source + + ls -l $DIR/$tfile.target # cache it for sure + + #OBD_FAIL_LLITE_OPEN_DELAY 0x1419 + $LCTL set_param fail_loc=0x1419 fail_val=3 + cat $DIR/$tfile.target & + CATPID=$! + + # Guarantee open is waiting before we get here + sleep 1 + mv $DIR2/$tfile.source $DIR2/$tfile.target + + wait $CATPID + RC=$? + if [[ $RC -ne 0 ]]; then + error "open with cat failed, rc=$RC" + fi +} +run_test 31r "open-rename(replace) race" + test_32b() { # bug 11270 remote_ost_nodsh && skip "remote OST with nodsh" && return -- 1.8.3.1