We cannot release a page if the vmpage reference count is
>1, otherwise we will detach a vmpage from Lustre when the
page is still referenced in the VM.
This creates a situation where page discard for lock
cancellation will not find the page, so we can get stale
data reads.
This re-introduces the LU-12587 issue where direct I/O on
a client falls back to buffered I/O if there are pages in
cache, since it cannot flush them. This is annoying but
not a huge problem.
Fixes:
e59f0c9a245f ("LU-12587 llite: don't check vmpage refcount in ll_releasepage()")
Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I3aa1cd7330f5e7d1ba2ddb0c12779aa22f3d70b7
Reviewed-on: https://review.whamcloud.com/47262
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
*/
#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
*/
#define cl_page_in_use_noref(pg) __page_in_use(pg, 0)
+/* references: cl_page, page cache, optional + refcount for caller reference
+ * (always 0 or 1 currently)
+ */
+static inline int vmpage_in_use(struct page *vmpage, int refcount)
+{
+ return (page_count(vmpage) - page_mapcount(vmpage) > 2 + refcount);
+}
+
/** @} cl_page */
/** \addtogroup cl_lock cl_lock
/** @} cl_page */
/** \addtogroup cl_lock cl_lock
{
struct lu_env *env;
struct cl_object *obj;
{
struct lu_env *env;
struct cl_object *obj;
+ struct cl_page *clpage;
struct address_space *mapping;
int result = 0;
struct address_space *mapping;
int result = 0;
if (obj == NULL)
return 1;
if (obj == NULL)
return 1;
- page = cl_vmpage_page(vmpage, obj);
- if (page == NULL)
+ clpage = cl_vmpage_page(vmpage, obj);
+ if (clpage == NULL)
return 1;
env = cl_env_percpu_get();
LASSERT(!IS_ERR(env));
return 1;
env = cl_env_percpu_get();
LASSERT(!IS_ERR(env));
- if (!cl_page_in_use(page)) {
+ /* we must not delete the cl_page if the vmpage is in use, otherwise we
+ * disconnect the vmpage from Lustre while it's still alive(!), which
+ * means we won't find it to discard on lock cancellation.
+ *
+ * References here are: caller + cl_page + page cache.
+ * Any other references are potentially transient and must be ignored.
+ */
+ if (!cl_page_in_use(clpage) && !vmpage_in_use(vmpage, 1)) {
- cl_page_delete(env, page);
+ cl_page_delete(env, clpage);
}
/* To use percpu env array, the call path can not be rescheduled;
}
/* To use percpu env array, the call path can not be rescheduled;
* that we won't get into object delete path.
*/
LASSERT(cl_object_refc(obj) > 1);
* that we won't get into object delete path.
*/
LASSERT(cl_object_refc(obj) > 1);
- cl_page_put(env, page);
+ cl_page_put(env, clpage);
cl_env_percpu_put(env);
return result;
cl_env_percpu_put(env);
return result;
if (cli->cl_cache->ccc_unstable_check) {
struct page *vmpage = cl_page_vmpage(page);
if (cli->cl_cache->ccc_unstable_check) {
struct page *vmpage = cl_page_vmpage(page);
- /* vmpage have two known users: cl_page and VM page cache */
- if (page_count(vmpage) - page_mapcount(vmpage) > 2)
+ /* this check is racy because the vmpage is not locked, but
+ * that's OK - the code which does the actual page release
+ * checks this again before releasing
+ *
+ * vmpage have two known users: cl_page and VM page cache
+ */
+ if (vmpage_in_use(vmpage, 0))
return true;
}
return false;
return true;
}
return false;
ALWAYS_EXCEPT="$SANITY_EXCEPT "
# bug number for skipped test: LU-9693 LU-6493 LU-9693
ALWAYS_EXCEPT+=" 42a 42b 42c "
ALWAYS_EXCEPT="$SANITY_EXCEPT "
# bug number for skipped test: LU-9693 LU-6493 LU-9693
ALWAYS_EXCEPT+=" 42a 42b 42c "
-# bug number: LU-8411 LU-9054
-ALWAYS_EXCEPT+=" 407 312 "
+# bug number: LU-8411 LU-9054 LU-14541
+ALWAYS_EXCEPT+=" 407 312 277 "
if $SHARED_KEY; then
# bug number: LU-14181 LU-14181
if $SHARED_KEY; then
# bug number: LU-14181 LU-14181
$LFS setstripe -c 1 -i 0 $DIR/$tfile
$LCTL set_param ldlm.namespaces.*.lru_size=clear
$LFS setstripe -c 1 -i 0 $DIR/$tfile
$LCTL set_param ldlm.namespaces.*.lru_size=clear
+ # Disabled: DIO does not push out buffered I/O pages, see LU-12587
# request a new lock on client
# request a new lock on client
- dd if=/dev/zero of=$DIR/$tfile bs=1M count=1
+ #dd if=/dev/zero of=$DIR/$tfile bs=1M count=1
- dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct conv=notrunc
- local lock_count=$($LCTL get_param -n \
- ldlm.namespaces.$imp_name.lru_size)
- [[ $lock_count -eq 0 ]] || error "lock should be cancelled by direct IO"
+ #dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct conv=notrunc
+ #local lock_count=$($LCTL get_param -n \
+ # ldlm.namespaces.$imp_name.lru_size)
+ #[[ $lock_count -eq 0 ]] || error "lock should be cancelled by direct IO"
$LCTL set_param ldlm.namespaces.*-OST0000-osc-ffff*.lru_size=clear
# no lock cached, should use lockless DIO and not enqueue new lock
$LCTL set_param ldlm.namespaces.*-OST0000-osc-ffff*.lru_size=clear
# no lock cached, should use lockless DIO and not enqueue new lock
- dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct conv=notrunc
+ dd if=/dev/zero of=$DIR/$tfile bs=1M count=1 oflag=direct \
+ conv=notrunc ||
+ error "dio write failed"
lock_count=$($LCTL get_param -n \
ldlm.namespaces.$imp_name.lru_size)
[[ $lock_count -eq 0 ]] || error "no lock should be held by direct IO"
lock_count=$($LCTL get_param -n \
ldlm.namespaces.$imp_name.lru_size)
[[ $lock_count -eq 0 ]] || error "no lock should be held by direct IO"
init_logging
ALWAYS_EXCEPT="$SANITYN_EXCEPT "
init_logging
ALWAYS_EXCEPT="$SANITYN_EXCEPT "
-# bug number for skipped test: LU-7105
-ALWAYS_EXCEPT+=" 28 "
-# bug number for skipped test: LU-14541
-ALWAYS_EXCEPT+=" 16f"
+# bug number for skipped test: LU-7105 LU-14541
+ALWAYS_EXCEPT+=" 28 16f"
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
# skip tests for PPC until they are fixed
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
# skip tests for PPC until they are fixed