From e3cfb688ed7116a57b2c7f89a3e4f28291a0b69f Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Mon, 20 Mar 2023 17:49:06 -0400 Subject: [PATCH] Revert "LU-14541 llite: Check vmpage in releasepage" This reverts commit c524079f4f59a39b99467d9868ee4aafdcf033e9, because it breaks releasepage for Lustre and does not completely fix the data consistency issue in LU-14541. Breaking releasepage matters because it prevents direct I/O from working if there is page cache data present, and because it causes similar issues with GDS, which must be able to flush page cache pages before doing I/O. Revert "LU-14541 llite: Check vmpage in releasepage" This reverts commit c524079f4f59a39b99467d9868ee4aafdcf033e9, because it breaks releasepage for Lustre and does not completely fix the data consistency issue in LU-14541. Breaking releasepage matters because it prevents direct I/O from working if there is page cache data present, and because it causes similar issues with GDS, which must be able to flush page cache pages before doing I/O. With patches: "LU-16160 llite: SIGBUS is possible on a race with page reclaim"/ d9c23a7934747eb19e23470b30806482a1aa60f8 and "LU-14541 llite: Check for page deletion after fault"/ 19678e30147f50f813e72e8216cfb0453fe0ca6e LU-14541 is fully resolved, so we can revert this patch. Signed-off-by: Patrick Farrell Change-Id: I613bdb4f27161ffc3638d1d8ea38827af5a7bd47 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/49654 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: Qian Yingjin Reviewed-by: Oleg Drokin --- lustre/include/cl_object.h | 8 -------- lustre/llite/rw26.c | 23 ++++++++--------------- lustre/osc/osc_page.c | 9 ++------- lustre/tests/sanity.sh | 16 ++++++---------- 4 files changed, 16 insertions(+), 40 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index c6607cd..dc371ce 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -987,14 +987,6 @@ static inline bool __page_in_use(const struct cl_page *page, int refc) */ #define cl_page_in_use_noref(pg) __page_in_use(pg, 0) -/* references: cl_page, page cache, optional + refcount for caller reference - * (always 0 or 1 currently) - */ -static inline int vmpage_in_use(struct page *vmpage, int refcount) -{ - return (page_count(vmpage) - page_mapcount(vmpage) > 2 + refcount); -} - /** @} cl_page */ /** \addtogroup cl_lock cl_lock diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 63c0f8f..1a39142 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -168,10 +168,10 @@ static void ll_invalidatepage(struct page *vmpage, static bool do_release_page(struct page *vmpage, gfp_t wait) { - struct lu_env *env; - struct cl_object *obj; - struct cl_page *clpage; struct address_space *mapping; + struct cl_object *obj; + struct cl_page *page; + struct lu_env *env; int result = 0; LASSERT(PageLocked(vmpage)); @@ -186,23 +186,16 @@ static bool do_release_page(struct page *vmpage, gfp_t wait) if (obj == NULL) return 1; - clpage = cl_vmpage_page(vmpage, obj); - if (clpage == NULL) + page = cl_vmpage_page(vmpage, obj); + if (page == NULL) return 1; env = cl_env_percpu_get(); LASSERT(!IS_ERR(env)); - /* we must not delete the cl_page if the vmpage is in use, otherwise we - * disconnect the vmpage from Lustre while it's still alive(!), which - * means we won't find it to discard on lock cancellation. - * - * References here are: caller + cl_page + page cache. - * Any other references are potentially transient and must be ignored. - */ - if (!cl_page_in_use(clpage) && !vmpage_in_use(vmpage, 1)) { + if (!cl_page_in_use(page)) { result = 1; - cl_page_delete(env, clpage); + cl_page_delete(env, page); } /* To use percpu env array, the call path can not be rescheduled; @@ -219,7 +212,7 @@ static bool do_release_page(struct page *vmpage, gfp_t wait) * that we won't get into object delete path. */ LASSERT(cl_object_refc(obj) > 1); - cl_page_put(env, clpage); + cl_page_put(env, page); cl_env_percpu_put(env); return result; diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index 4467cd3..c5928a0 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -521,13 +521,8 @@ static inline bool lru_page_busy(struct client_obd *cli, struct cl_page *page) if (cli->cl_cache->ccc_unstable_check) { struct page *vmpage = cl_page_vmpage(page); - /* this check is racy because the vmpage is not locked, but - * that's OK - the code which does the actual page release - * checks this again before releasing - * - * vmpage have two known users: cl_page and VM page cache - */ - if (vmpage_in_use(vmpage, 0)) + /* vmpage have two known users: cl_page and VM page cache */ + if (page_count(vmpage) - page_mapcount(vmpage) > 2) return true; } return false; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index f58f69f..47a2f77 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -41,7 +41,6 @@ ALWAYS_EXCEPT="$SANITY_EXCEPT " always_except LU-9693 42a 42c always_except LU-6493 42b always_except LU-16515 118c 118d -always_except LU-14541 277 always_except LU-8411 407 if $SHARED_KEY; then @@ -25602,21 +25601,18 @@ test_398a() { # LU-4198 $LFS setstripe -c 1 -i 0 $DIR/$tfile $LCTL set_param ldlm.namespaces.*.lru_size=clear - # Disabled: DIO does not push out buffered I/O pages, see LU-12587 # request a new lock on client - #dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 - #dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct conv=notrunc - #local lock_count=$($LCTL get_param -n \ - # ldlm.namespaces.$imp_name.lru_size) - #[[ $lock_count -eq 0 ]] || error "lock should be cancelled by direct IO" + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct conv=notrunc + local lock_count=$($LCTL get_param -n \ + ldlm.namespaces.$imp_name.lru_size) + [[ $lock_count -eq 0 ]] || error "lock should be cancelled by direct IO" $LCTL set_param ldlm.namespaces.*-OST0000-osc-ffff*.lru_size=clear # no lock cached, should use lockless DIO and not enqueue new lock - dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct \ - conv=notrunc || - error "dio write failed" + dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct conv=notrunc lock_count=$($LCTL get_param -n \ ldlm.namespaces.$imp_name.lru_size) [[ $lock_count -eq 0 ]] || error "no lock should be held by direct IO" -- 1.8.3.1