Whamcloud - gitweb
LU-921 llite: warning in case of discarding dirty pages
authorEmoly Liu <emoly.liu@intel.com>
Tue, 4 Dec 2012 05:00:10 +0000 (13:00 +0800)
committerOleg Drokin <green@whamcloud.com>
Wed, 5 Dec 2012 02:33:32 +0000 (21:33 -0500)
when a client is evicted, dirty pages may get silently discarded,
the caller of successful write(2) will not know that the data he
wrote have been discarded due to eviction before it can be flushed
to the OSS.
test 24 of recovery-small is now reenabled back.

port of master patch 624a3ac233938153e889ae81f639cf46c0a8312b
port of master patch 4cfcf91efa06decc155d8f12eafc8ede3e850fd5

Signed-off-by: Hongchao Zhang <hongchao.zhang@whamcloud.com>
Signed-off-by: Liu Ying <emoly.liu@intel.com>
Change-Id: If8aeed507890a656cbb8231879d0e31aa9f4ea02
Reviewed-on: http://review.whamcloud.com/4716
Tested-by: Hudson
Reviewed-by: Jinshan Xiong <jinshan.xiong@whamcloud.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lclient.h
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/vvp_page.c
lustre/tests/recovery-small.sh

index abbcbb0..31164ba 100644 (file)
@@ -190,6 +190,18 @@ struct ccc_object {
          * \see ll_vm_open(), ll_vm_close().
          */
         cfs_atomic_t            cob_mmap_cnt;
+
+        /**
+         * various flags
+         * cob_discard_page_warned
+         *      if pages belonging to this object are discarded when a client
+         * is evicted, some debug info will be printed, this flag will be set
+         * during processing the first discarded page, then avoid flooding
+         * debug message for lots of discarded pages.
+         *
+         * \see ll_dirty_page_discard_warn.
+         */
+        int                     cob_discard_page_warned:1;
 };
 
 /**
index 2f1b604..c6db9f9 100644 (file)
@@ -750,6 +750,7 @@ void ll_umount_begin(struct super_block *sb);
 #endif
 int ll_remount_fs(struct super_block *sb, int *flags, char *data);
 int ll_show_options(struct seq_file *seq, struct vfsmount *vfs);
+void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret);
 int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
                   struct super_block *);
 void lustre_dump_dentry(struct dentry *, int recur);
index fe062ee..14d4693 100644 (file)
@@ -2245,3 +2245,48 @@ int ll_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
         RETURN(0);
 }
+
+static char *ll_d_path(struct dentry *dentry, char *buf, int bufsize)
+{
+        char *path = NULL;
+
+#ifdef HAVE_FS_STRUCT_USE_PATH
+        struct path p;
+
+        p.dentry = dentry;
+        p.mnt = current->fs->root.mnt;
+        path_get(&p);
+        path = d_path(&p, buf, bufsize);
+        path_put(&p);
+#else
+        path = d_path(dentry, current->fs->rootmnt, buf, bufsize);
+#endif
+
+        return path;
+}
+
+void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret)
+{
+        char *buf, *path = NULL;
+        struct dentry *dentry = NULL;
+        struct ccc_object *obj = cl_inode2ccc(page->mapping->host);
+
+        buf = (char *)__get_free_page(GFP_KERNEL);
+        if (buf != NULL) {
+                dentry = d_find_alias(page->mapping->host);
+                if (dentry != NULL)
+                        path = ll_d_path(dentry, buf, PAGE_SIZE);
+        }
+
+        CWARN("dirty page discard: %s/fid: "DFID"/%s may get corrupted "
+              "(rc %d)\n",
+              s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
+              PFID(&obj->cob_header.coh_lu.loh_fid),
+              (path && !IS_ERR(path)) ? path : "", ioret);
+
+        if (dentry != NULL)
+                dput(dentry);
+
+        if (buf != NULL)
+                free_page((unsigned long)buf);
+}
index deae49f..fe04232 100644 (file)
@@ -245,14 +245,23 @@ static int vvp_page_prep_write(const struct lu_env *env,
  */
 static void vvp_vmpage_error(struct inode *inode, cfs_page_t *vmpage, int ioret)
 {
-        if (ioret == 0)
+        struct ccc_object *obj = cl_inode2ccc(inode);
+
+        if (ioret == 0) {
                 ClearPageError(vmpage);
-        else if (ioret != -EINTR) {
+                obj->cob_discard_page_warned = 0;
+        } else {
                 SetPageError(vmpage);
                 if (ioret == -ENOSPC)
                         set_bit(AS_ENOSPC, &inode->i_mapping->flags);
                 else
                         set_bit(AS_EIO, &inode->i_mapping->flags);
+
+                if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
+                     obj->cob_discard_page_warned == 0) {
+                        obj->cob_discard_page_warned = 1;
+                        ll_dirty_page_discard_warn(vmpage, ioret);
+                }
         }
 }
 
index 45fa7b1..3d461b0 100755 (executable)
@@ -2,8 +2,8 @@
 
 set -e
 
-#         bug  5494 5493
-ALWAYS_EXCEPT="24   52 $RECOVERY_SMALL_EXCEPT"
+#         bug  5493
+ALWAYS_EXCEPT="52 $RECOVERY_SMALL_EXCEPT"
 
 PTLDEBUG=${PTLDEBUG:--1}
 LUSTRE=${LUSTRE:-`dirname $0`/..}
@@ -621,7 +621,7 @@ test_23() { #b=4561
 }
 run_test 23 "client hang when close a file after mds crash"
 
-test_24() { # bug 11710 details correct fsync() behavior
+test_24a() { # bug 11710 details correct fsync() behavior
        remote_ost_nodsh && skip "remote OST with nodsh" && return 0
 
        mkdir -p $DIR/$tdir
@@ -637,7 +637,7 @@ test_24() { # bug 11710 details correct fsync() behavior
        client_reconnect
        [ $rc -eq 0 ] && error_ignore 5494 "multiop didn't fail fsync: rc $rc" || true
 }
-run_test 24 "fsync error (should return error)"
+run_test 24a "fsync error (should return error)"
 
 wait_client_evicted () {
        local facet=$1
@@ -645,10 +645,43 @@ wait_client_evicted () {
        local varsvc=${facet}_svc
 
        wait_update $(facet_active_host $facet) \
-                "lctl get_param -n *.${!varsvc}.num_exports | cut -d' ' -f2" \
-                $((exports - 1)) $3
+               "lctl get_param -n *.${!varsvc}.num_exports | cut -d' ' -f2" \
+               $((exports - 1)) $3
 }
 
+test_24b() {
+       remote_ost_nodsh && skip "remote OST with nodsh" && return 0
+
+       dmesg -c
+       mkdir -p $DIR/$tdir
+       lfs setstripe $DIR/$tdir -s 0 -i 0 -c 1
+       cancel_lru_locks osc
+       multiop_bg_pause $DIR/$tdir/$tfile-1 Ow8192_yc ||
+               error "mulitop Ow8192_yc failed"
+
+       MULTI_PID1=$!
+       multiop_bg_pause $DIR/$tdir/$tfile-2 Ow8192_c ||
+               error "mulitop Ow8192_c failed"
+
+       MULTI_PID2=$!
+       ost_evict_client
+
+       kill -USR1 $MULTI_PID1
+       wait $MULTI_PID1
+       rc1=$?
+       kill -USR1 $MULTI_PID2
+       wait $MULTI_PID2
+       rc2=$?
+       lctl set_param fail_loc=0x0
+       client_reconnect
+       [ $rc1 -eq 0 -o $rc2 -eq 0 ] &&
+       error_ignore "multiop didn't fail fsync: $rc1 or close: $rc2" || true
+
+       dmesg | grep "dirty page discard:" ||
+               error "no discarded dirty page found!"
+}
+run_test 24b "test dirty page discard due to client eviction"
+
 test_26a() {      # was test_26 bug 5921 - evict dead exports by pinger
 # this test can only run from a client on a separate node.
        remote_ost || { skip "local OST" && return 0; }