From 624a3ac233938153e889ae81f639cf46c0a8312b Mon Sep 17 00:00:00 2001 From: Hongchao Zhang Date: Tue, 23 Oct 2012 20:00:17 +0800 Subject: [PATCH 1/1] LU-921 llite: warning in case of discarding dirty pages when a client is evicted, dirty pages may get silently discarded, the caller of successful write(2) will not know that the data he wrote have been discarded due to eviction before it can be flushed to the OSS. Signed-off-by: Hongchao Zhang Change-Id: Iecfbf096548ff08cdd6064d53ad8c688343fcddc Reviewed-on: http://review.whamcloud.com/1908 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Jinshan Xiong Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lclient.h | 12 +++++++++++ lustre/llite/llite_internal.h | 1 + lustre/llite/llite_lib.c | 45 ++++++++++++++++++++++++++++++++++++++++++ lustre/llite/vvp_page.c | 27 ++++++++++++++++--------- lustre/tests/recovery-small.sh | 41 ++++++++++++++++++++++++++++++++++---- 5 files changed, 113 insertions(+), 13 deletions(-) diff --git a/lustre/include/lclient.h b/lustre/include/lclient.h index 9c56770..a1bc9aa 100644 --- a/lustre/include/lclient.h +++ b/lustre/include/lclient.h @@ -204,6 +204,18 @@ struct ccc_object { * \see ll_vm_open(), ll_vm_close(). */ cfs_atomic_t cob_mmap_cnt; + + /** + * various flags + * cob_discard_page_warned + * if pages belonging to this object are discarded when a client + * is evicted, some debug info will be printed, this flag will be set + * during processing the first discarded page, then avoid flooding + * debug message for lots of discarded pages. + * + * \see ll_dirty_page_discard_warn. + */ + int cob_discard_page_warned:1; }; /** diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 70f1e44..58ff340 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -819,6 +819,7 @@ void ll_umount_begin(struct super_block *sb); #endif int ll_remount_fs(struct super_block *sb, int *flags, char *data); int ll_show_options(struct seq_file *seq, struct vfsmount *vfs); +void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret); int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, struct super_block *); void lustre_dump_dentry(struct dentry *, int recur); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 0566ab2..f1aa2af 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -2407,3 +2407,48 @@ char *ll_get_fsname(struct super_block *sb, char *buf, int buflen) return buf; } + +static char* ll_d_path(struct dentry *dentry, char *buf, int bufsize) +{ + char *path = NULL; + +#ifdef HAVE_FS_STRUCT_USE_PATH + struct path p; + + p.dentry = dentry; + p.mnt = current->fs->root.mnt; + path_get(&p); + path = d_path(&p, buf, bufsize); + path_put(&p); +#else + path = d_path(dentry, current->fs->rootmnt, buf, bufsize); +#endif + + return path; +} + +void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret) +{ + char *buf, *path = NULL; + struct dentry *dentry = NULL; + struct ccc_object *obj = cl_inode2ccc(page->mapping->host); + + buf = (char *)__get_free_page(GFP_KERNEL); + if (buf != NULL) { + dentry = d_find_alias(page->mapping->host); + if (dentry != NULL) + path = ll_d_path(dentry, buf, PAGE_SIZE); + } + + CWARN("%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted " + "(rc %d)\n", ll_get_fsname(page->mapping->host->i_sb, NULL, 0), + s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev, + PFID(&obj->cob_header.coh_lu.loh_fid), + (path && !IS_ERR(path)) ? path : "", ioret); + + if (dentry != NULL) + dput(dentry); + + if (buf != NULL) + free_page((unsigned long)buf); +} diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index c07cb29..ffb4464 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -245,15 +245,24 @@ static int vvp_page_prep_write(const struct lu_env *env, */ static void vvp_vmpage_error(struct inode *inode, cfs_page_t *vmpage, int ioret) { - if (ioret == 0) - ClearPageError(vmpage); - else if (ioret != -EINTR) { - SetPageError(vmpage); - if (ioret == -ENOSPC) - set_bit(AS_ENOSPC, &inode->i_mapping->flags); - else - set_bit(AS_EIO, &inode->i_mapping->flags); - } + struct ccc_object *obj = cl_inode2ccc(inode); + + if (ioret == 0) { + ClearPageError(vmpage); + obj->cob_discard_page_warned = 0; + } else { + SetPageError(vmpage); + if (ioret == -ENOSPC) + set_bit(AS_ENOSPC, &inode->i_mapping->flags); + else + set_bit(AS_EIO, &inode->i_mapping->flags); + + if ((ioret == -ESHUTDOWN || ioret == -EINTR) && + obj->cob_discard_page_warned == 0) { + obj->cob_discard_page_warned = 1; + ll_dirty_page_discard_warn(vmpage, ioret); + } + } } static void vvp_page_completion_read(const struct lu_env *env, diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 5c19142..637419e 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -690,7 +690,7 @@ test_23() { #b=4561 } run_test 23 "client hang when close a file after mds crash" -test_24() { # bug 11710 details correct fsync() behavior +test_24a() { # bug 11710 details correct fsync() behavior remote_ost_nodsh && skip "remote OST with nodsh" && return 0 mkdir -p $DIR/$tdir @@ -706,7 +706,7 @@ test_24() { # bug 11710 details correct fsync() behavior client_reconnect [ $rc -eq 0 ] && error_ignore 5494 "multiop didn't fail fsync: rc $rc" || true } -run_test 24 "fsync error (should return error)" +run_test 24a "fsync error (should return error)" wait_client_evicted () { local facet=$1 @@ -714,10 +714,43 @@ wait_client_evicted () { local varsvc=${facet}_svc wait_update $(facet_active_host $facet) \ - "lctl get_param -n *.${!varsvc}.num_exports | cut -d' ' -f2" \ - $((exports - 1)) $3 + "lctl get_param -n *.${!varsvc}.num_exports | cut -d' ' -f2" \ + $((exports - 1)) $3 } +test_24b() { + remote_ost_nodsh && skip "remote OST with nodsh" && return 0 + + dmesg -c + mkdir -p $DIR/$tdir + lfs setstripe $DIR/$tdir -s 0 -i 0 -c 1 + cancel_lru_locks osc + multiop_bg_pause $DIR/$tdir/$tfile-1 Ow8192_yc || + error "mulitop Ow8192_yc failed" + + MULTI_PID1=$! + multiop_bg_pause $DIR/$tdir/$tfile-2 Ow8192_c || + error "mulitop Ow8192_c failed" + + MULTI_PID2=$! + ost_evict_client + + kill -USR1 $MULTI_PID1 + wait $MULTI_PID1 + rc1=$? + kill -USR1 $MULTI_PID2 + wait $MULTI_PID2 + rc2=$? + lctl set_param fail_loc=0x0 + client_reconnect + [ $rc1 -eq 0 -o $rc2 -eq 0 ] && + error_ignore "multiop didn't fail fsync: $rc1 or close: $rc2" || true + + dmesg | grep "dirty page discard:" || \ + error "no discarded dirty page found!" +} +run_test 24b "test dirty page discard due to client eviction" + test_26a() { # was test_26 bug 5921 - evict dead exports by pinger # this test can only run from a client on a separate node. remote_ost || { skip "local OST" && return 0; } -- 1.8.3.1