From 72372486a5e96dd2078e01a697025da41bbf03c2 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Wed, 25 Jul 2018 14:24:27 +0400 Subject: [PATCH] LU-11347 osd: do not use pagecache for I/O for testing purposes cache is constantly disabled. - with non-rotational storage - when both read and write caches are disable - sanityn/16c to run fsx with cache disable Change-Id: If6ea9186485cd0aceb0372b68f4860de3a4fb124 Signed-off-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/32875 Reviewed-by: Patrick Farrell Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/osd-ldiskfs/osd_handler.c | 16 ++++++ lustre/osd-ldiskfs/osd_internal.h | 3 ++ lustre/osd-ldiskfs/osd_io.c | 104 +++++++++++++++++++++++++++++++++----- lustre/tests/sanityn.sh | 36 +++++++++++++ 4 files changed, 145 insertions(+), 14 deletions(-) diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 85aea9b..90af137 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -7308,6 +7308,16 @@ static void osd_key_fini(const struct lu_context *ctx, struct ldiskfs_inode_info *lli = LDISKFS_I(info->oti_inode); struct osd_idmap_cache *idc = info->oti_ins_cache; + if (info->oti_dio_pages) { + int i; + for (i = 0; i < PTLRPC_MAX_BRW_PAGES; i++) { + if (info->oti_dio_pages[i]) + __free_page(info->oti_dio_pages[i]); + } + OBD_FREE(info->oti_dio_pages, + sizeof(struct page *) * PTLRPC_MAX_BRW_PAGES); + } + if (info->oti_inode != NULL) OBD_FREE_PTR(lli); if (info->oti_hlock != NULL) @@ -7618,6 +7628,12 @@ static int osd_mount(const struct lu_env *env, if (lmd_flags & LMD_FLG_NOSCRUB) o->od_auto_scrub_interval = AS_NEVER; + if (blk_queue_nonrot(bdev_get_queue(osd_sb(o)->s_bdev))) { + /* do not use pagecache with flash-backed storage */ + o->od_writethrough_cache = 0; + o->od_read_cache = 0; + } + GOTO(out, rc = 0); out_mnt: diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index c73b489..0306fc3 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -643,6 +643,9 @@ struct osd_thread_info { unsigned int oti_declare_ops_cred[OSD_OT_MAX]; unsigned int oti_declare_ops_used[OSD_OT_MAX]; struct osd_directory oti_iam; + + struct page **oti_dio_pages; + int oti_dio_pages_used; }; extern int ldiskfs_pdo; diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index b04c6a5..856695e 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -56,6 +56,15 @@ /* ext_depth() */ #include +static inline bool osd_use_page_cache(struct osd_device *d) +{ + /* do not use pagecache if write and read caching are disabled */ + if (d->od_writethrough_cache + d->od_read_cache == 0) + return false; + /* use pagecache by default */ + return true; +} + static int __osd_init_iobuf(struct osd_device *d, struct osd_iobuf *iobuf, int rw, int line, int pages) { @@ -485,22 +494,54 @@ static int osd_map_remote_to_local(loff_t offset, ssize_t len, int *nrpages, RETURN(0); } -static struct page *osd_get_page(struct dt_object *dt, loff_t offset, - gfp_t gfp_mask) +static struct page *osd_get_page(const struct lu_env *env, struct dt_object *dt, + loff_t offset, gfp_t gfp_mask) { + struct osd_thread_info *oti = osd_oti_get(env); struct inode *inode = osd_dt_obj(dt)->oo_inode; struct osd_device *d = osd_obj2dev(osd_dt_obj(dt)); struct page *page; + int cur = oti->oti_dio_pages_used; LASSERT(inode); - page = find_or_create_page(inode->i_mapping, offset >> PAGE_SHIFT, - gfp_mask); + if (osd_use_page_cache(d)) { + page = find_or_create_page(inode->i_mapping, + offset >> PAGE_SHIFT, + gfp_mask); + + if (likely(page)) + LASSERT(!test_bit(PG_private_2, &page->flags)); + else + lprocfs_counter_add(d->od_stats, LPROC_OSD_NO_PAGE, 1); + } else { + + LASSERT(oti->oti_dio_pages); - if (unlikely(page == NULL)) - lprocfs_counter_add(d->od_stats, LPROC_OSD_NO_PAGE, 1); + if (unlikely(!oti->oti_dio_pages[cur])) { + LASSERT(cur < PTLRPC_MAX_BRW_PAGES); + page = alloc_page(gfp_mask); + if (!page) + return NULL; + oti->oti_dio_pages[cur] = page; + } + + page = oti->oti_dio_pages[cur]; + LASSERT(!test_bit(PG_private_2, &page->flags)); + set_bit(PG_private_2, &page->flags); + oti->oti_dio_pages_used++; + + LASSERT(!PageLocked(page)); + lock_page(page); + + LASSERT(!page->mapping); + LASSERT(!PageWriteback(page)); + ClearPageUptodate(page); + + page->index = offset >> PAGE_SHIFT; + } - return page; + return page; } /* @@ -537,6 +578,7 @@ static struct page *osd_get_page(struct dt_object *dt, loff_t offset, static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt, struct niobuf_local *lnb, int npages) { + struct osd_thread_info *oti = osd_oti_get(env); struct pagevec pvec; int i; @@ -547,16 +589,31 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt, #endif for (i = 0; i < npages; i++) { - if (lnb[i].lnb_page == NULL) + struct page *page = lnb[i].lnb_page; + + if (page == NULL) continue; - LASSERT(PageLocked(lnb[i].lnb_page)); - unlock_page(lnb[i].lnb_page); - if (pagevec_add(&pvec, lnb[i].lnb_page) == 0) - pagevec_release(&pvec); + LASSERT(PageLocked(page)); + + /* if the page isn't cached, then reset uptodate + * to prevent reuse */ + if (test_bit(PG_private_2, &page->flags)) { + clear_bit(PG_private_2, &page->flags); + ClearPageUptodate(page); + unlock_page(page); + oti->oti_dio_pages_used--; + } else { + unlock_page(page); + if (pagevec_add(&pvec, page) == 0) + pagevec_release(&pvec); + } dt_object_put(env, dt); + lnb[i].lnb_page = NULL; } + LASSERTF(oti->oti_dio_pages_used == 0, "%d\n", oti->oti_dio_pages_used); + /* Release any partial pagevec */ pagevec_release(&pvec); @@ -591,19 +648,29 @@ static int osd_bufs_get(const struct lu_env *env, struct dt_object *dt, loff_t pos, ssize_t len, struct niobuf_local *lnb, enum dt_bufs_type rw) { + struct osd_thread_info *oti = osd_oti_get(env); struct osd_object *obj = osd_dt_obj(dt); int npages, i, rc = 0; gfp_t gfp_mask; LASSERT(obj->oo_inode); + if (!osd_use_page_cache(osd_obj2dev(obj))) { + if (unlikely(!oti->oti_dio_pages)) { + OBD_ALLOC(oti->oti_dio_pages, + sizeof(struct page *) * PTLRPC_MAX_BRW_PAGES); + if (!oti->oti_dio_pages) + return -ENOMEM; + } + } + osd_map_remote_to_local(pos, len, &npages, lnb); /* this could also try less hard for DT_BUFS_TYPE_READAHEAD pages */ gfp_mask = rw & DT_BUFS_TYPE_LOCAL ? (GFP_NOFS | __GFP_HIGHMEM) : GFP_HIGHUSER; for (i = 0; i < npages; i++, lnb++) { - lnb->lnb_page = osd_get_page(dt, lnb->lnb_file_offset, + lnb->lnb_page = osd_get_page(env, dt, lnb->lnb_file_offset, gfp_mask); if (lnb->lnb_page == NULL) GOTO(cleanup, rc = -ENOMEM); @@ -2170,6 +2237,7 @@ void osd_trunc_unlock_all(struct list_head *list) void osd_execute_truncate(struct osd_object *obj) { + struct osd_device *d = osd_obj2dev(obj); struct inode *inode = obj->oo_inode; __u64 size; @@ -2196,8 +2264,16 @@ void osd_execute_truncate(struct osd_object *obj) * avoid data corruption during direct disk write. b=17397 */ size = i_size_read(inode); - if ((size & ~PAGE_MASK) != 0) + if ((size & ~PAGE_MASK) == 0) + return; + if (osd_use_page_cache(d)) { filemap_fdatawrite_range(inode->i_mapping, size, size + 1); + } else { + /* Notice we use "wait" version to ensure I/O is complete */ + filemap_write_and_wait_range(inode->i_mapping, size, size + 1); + invalidate_mapping_pages(inode->i_mapping, size >> PAGE_SHIFT, + size >> PAGE_SHIFT); + } } void osd_process_truncates(struct list_head *list) diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 61817b9..011b5f5 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -444,6 +444,42 @@ test_16b() { } run_test 16b "$FSXNUM iterations of dual-mount fsx at small size" +test_16c() { + local file1=$DIR1/$tfile + local file2=$DIR2/$tfile + local stripe_size=$(do_facet $SINGLEMDS \ + "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") + + [ $(facet_fstype ost1) != ldiskfs ] && skip "dio on ldiskfs only" + + # to allocate grant because it may run out due to test_15. + $LFS setstripe -c -1 $file1 + dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync + dd if=/dev/zero of=$file2 bs=$stripe_size count=$OSTCOUNT oflag=sync + rm -f $file1 + wait_delete_completed + + local list=$(comma_list $(osts_nodes)) + if ! get_osd_param $list '' read_cache_enable >/dev/null; then + skip "not cache-capable obdfilter" + fi + + set_osd_param $list '' read_cache_enable 0 + set_osd_param $list '' writethrough_cache_enable 0 + + $LFS setstripe -c -1 $file1 # b=10919 + fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 \ + || error "fsx failed" + rm -f $file1 + + set_osd_param $list '' read_cache_enable 1 + set_osd_param $list '' writethrough_cache_enable 1 + + return 0 +} +run_test 16c "verify data consistency on ldiskfs with cache disabled (b=17397)" + + test_17() { # bug 3513, 3667 remote_ost_nodsh && skip "remote OST with nodsh" && return -- 1.8.3.1