From e46545c5af0b582e292b658cf741c47fdde343e9 Mon Sep 17 00:00:00 2001 From: jxiong Date: Tue, 20 Jan 2009 04:30:21 +0000 Subject: [PATCH] b=5498 r=nikita,adilger Porting lloop driver to HEAD, and add a new test to verify the basic function of lloop driver in sanity --- lustre/llite/llite_internal.h | 21 +++ lustre/llite/lloop.c | 384 ++++++++++++++++++++++++++---------------- lustre/llite/rw26.c | 39 ++++- lustre/llite/vvp_page.c | 7 +- lustre/obdclass/cl_page.c | 7 +- lustre/tests/sanity.sh | 38 ++++- lustre/utils/obd.c | 7 +- 7 files changed, 336 insertions(+), 167 deletions(-) diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index a03e1bf..f34c5d3 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -1216,4 +1216,25 @@ static inline int cl_merge_lvb(struct inode *inode) struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt); +/** direct write pages */ +struct ll_dio_pages { + /** page array to be written. we don't support + * partial pages except the last one. */ + struct page **ldp_pages; + /* offset of each page */ + loff_t *ldp_offsets; + /** if ldp_offsets is NULL, it means a sequential + * pages to be written, then this is the file offset + * of the * first page. */ + loff_t ldp_start_offset; + /** how many bytes are to be written. */ + size_t ldp_size; + /** # of pages in the array. */ + int ldp_nr; +}; + +extern ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, + int rw, struct inode *inode, + struct ll_dio_pages *pv); + #endif /* LLITE_INTERNAL_H */ diff --git a/lustre/llite/lloop.c b/lustre/llite/lloop.c index 05026f1..0a1b98e 100644 --- a/lustre/llite/lloop.c +++ b/lustre/llite/lloop.c @@ -42,9 +42,6 @@ * Copyright 1993 by Theodore Ts'o. Redistribution of this file is * permitted under the GNU General Public License. * - * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 - * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 - * * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 * @@ -56,10 +53,6 @@ * * Loadable modules and other fixes by AK, 1998 * - * Make real block number available to downstream transfer functions, enables - * CBC (and relatives) mode encryption requiring unique IVs per data block. - * Reed H. Petty, rhp@draper.net - * * Maximum number of loop devices now dynamic via max_loop module parameter. * Russell Kroll 19990701 * @@ -129,37 +122,40 @@ enum { }; struct lloop_device { - int lo_number; - int lo_refcnt; - loff_t lo_offset; - loff_t lo_sizelimit; - int lo_flags; + int lo_number; + int lo_refcnt; + loff_t lo_offset; + loff_t lo_sizelimit; + int lo_flags; int (*ioctl)(struct lloop_device *, int cmd, - unsigned long arg); + unsigned long arg); - struct file * lo_backing_file; + struct file *lo_backing_file; struct block_device *lo_device; - unsigned lo_blocksize; + unsigned lo_blocksize; - int old_gfp_mask; + int old_gfp_mask; - spinlock_t lo_lock; - struct bio *lo_bio; - struct bio *lo_biotail; - int lo_state; - struct semaphore lo_sem; - struct semaphore lo_ctl_mutex; - struct semaphore lo_bh_mutex; - atomic_t lo_pending; + spinlock_t lo_lock; + struct bio *lo_bio; + struct bio *lo_biotail; + int lo_state; + struct semaphore lo_sem; + struct semaphore lo_ctl_mutex; + atomic_t lo_pending; + wait_queue_head_t lo_bh_wait; - request_queue_t *lo_queue; + request_queue_t *lo_queue; + + const struct lu_env *lo_env; + struct cl_io lo_io; + struct ll_dio_pages lo_pvec; /* data to handle bio for lustre. */ struct lo_request_data { - struct brw_page lrd_pages[LLOOP_MAX_SEGMENTS]; - struct obdo lrd_oa; + struct page *lrd_pages[LLOOP_MAX_SEGMENTS]; + loff_t lrd_offsets[LLOOP_MAX_SEGMENTS]; } lo_requests[1]; - }; /* @@ -170,7 +166,8 @@ enum { }; static int lloop_major; -static int max_loop = 8; +#define MAX_LOOP_DEFAULT 16 +static int max_loop = MAX_LOOP_DEFAULT; static struct lloop_device *loop_dev; static struct gendisk **disks; static struct semaphore lloop_mutex; @@ -194,63 +191,88 @@ static loff_t get_loop_size(struct lloop_device *lo, struct file *file) return loopsize >> 9; } -static int do_bio_filebacked(struct lloop_device *lo, struct bio *bio) +static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head) { - struct inode *inode = lo->lo_backing_file->f_dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct obd_info oinfo = {{{ 0 }}}; - struct brw_page *pg = lo->lo_requests[0].lrd_pages; - struct obdo *oa = &lo->lo_requests[0].lrd_oa; - pgoff_t offset; - int ret, cmd, i, opc; - struct bio_vec *bvec; - - BUG_ON(bio->bi_hw_segments > LLOOP_MAX_SEGMENTS); - - offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset; - bio_for_each_segment(bvec, bio, i) { - BUG_ON(bvec->bv_offset != 0); - BUG_ON(bvec->bv_len != CFS_PAGE_SIZE); - - pg->pg = bvec->bv_page; - pg->off = offset; - pg->count = bvec->bv_len; - pg->flag = OBD_BRW_SRVLOCK; - - pg++; - offset += bvec->bv_len; + const struct lu_env *env = lo->lo_env; + struct cl_io *io = &lo->lo_io; + struct inode *inode = lo->lo_backing_file->f_dentry->d_inode; + struct cl_object *obj = ll_i2info(inode)->lli_clob; + pgoff_t offset; + int ret; + int i; + int rw; + obd_count page_count = 0; + struct bio_vec *bvec; + struct bio *bio; + ssize_t bytes; + + struct ll_dio_pages *pvec = &lo->lo_pvec; + struct page **pages = pvec->ldp_pages; + loff_t *offsets = pvec->ldp_offsets; + + truncate_inode_pages(inode->i_mapping, 0); + + /* initialize the IO */ + memset(io, 0, sizeof(*io)); + io->ci_obj = obj; + ret = cl_io_init(env, io, CIT_MISC, obj); + if (ret) + return io->ci_result; + io->ci_lockreq = CILR_NEVER; + + LASSERT(head != NULL); + rw = head->bi_rw; + for (bio = head; bio != NULL; bio = bio->bi_next) { + LASSERT(rw == bio->bi_rw); + + offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset; + bio_for_each_segment(bvec, bio, i) { + BUG_ON(bvec->bv_offset != 0); + BUG_ON(bvec->bv_len != CFS_PAGE_SIZE); + + pages[page_count] = bvec->bv_page; + offsets[page_count] = offset; + page_count++; + offset += bvec->bv_len; + } + LASSERT(page_count <= LLOOP_MAX_SEGMENTS); } - oa->o_mode = inode->i_mode; - oa->o_id = lsm->lsm_object_id; - oa->o_gr = lsm->lsm_object_gr; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLMODE | - OBD_MD_FLTYPE |OBD_MD_FLGROUP; - obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER); - - cmd = OBD_BRW_READ; - if (bio_rw(bio) == WRITE) - cmd = OBD_BRW_WRITE; - - if (cmd == OBD_BRW_WRITE) - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, bio->bi_size); - else - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, bio->bi_size); - oinfo.oi_oa = oa; - oinfo.oi_md = lsm; - opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW; - oinfo.oi_capa = ll_osscapa_get(inode, opc); - ret = obd_brw(cmd, ll_i2dtexp(inode), &oinfo, - (obd_count)(i - bio->bi_idx), - lo->lo_requests[0].lrd_pages, NULL); - capa_put(oinfo.oi_capa); - if (ret == 0) - obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS); - return ret; + ll_stats_ops_tally(ll_i2sbi(inode), + (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ, + page_count << PAGE_CACHE_SHIFT); + + pvec->ldp_size = page_count << PAGE_CACHE_SHIFT; + pvec->ldp_nr = page_count; + + /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to + * write those pages into OST. Even worse case is that more pages + * would be asked to write out to swap space, and then finally get here + * again. + * Unfortunately this is NOT easy to fix. + * Thoughts on solution: + * 0. Define a reserved pool for cl_pages, which could be a list of + * pre-allocated cl_pages from cl_page_kmem; + * 1. Define a new operation in cl_object_operations{}, says clo_depth, + * which measures how many layers for this lustre object. Generally + * speaking, the depth would be 2, one for llite, and one for lovsub. + * However, for SNS, there will be more since we need additional page + * to store parity; + * 2. Reserve the # of (page_count * depth) cl_pages from the reserved + * pool. Afterwards, the clio would allocate the pages from reserved + * pool, this guarantees we neeedn't allocate the cl_pages from + * generic cl_page slab cache. + * Of course, if there is NOT enough pages in the pool, we might + * be asked to write less pages once, this purely depends on + * implementation. Anyway, we should be careful to avoid deadlocking. + */ + LOCK_INODE_MUTEX(inode); + bytes = ll_direct_rw_pages(env, io, rw, inode, pvec); + UNLOCK_INODE_MUTEX(inode); + cl_io_fini(env, io); + return (bytes == pvec->ldp_size) ? 0 : (int)bytes; } - /* * Add bio to back of pending list */ @@ -266,41 +288,77 @@ static void loop_add_bio(struct lloop_device *lo, struct bio *bio) lo->lo_bio = lo->lo_biotail = bio; spin_unlock_irqrestore(&lo->lo_lock, flags); - up(&lo->lo_bh_mutex); + atomic_inc(&lo->lo_pending); + if (waitqueue_active(&lo->lo_bh_wait)) + wake_up(&lo->lo_bh_wait); } /* * Grab first pending buffer */ -static struct bio *loop_get_bio(struct lloop_device *lo) +static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) { - struct bio *bio; + struct bio *first; + struct bio **bio; + unsigned int count = 0; + unsigned int page_count = 0; + int rw; spin_lock_irq(&lo->lo_lock); - if ((bio = lo->lo_bio)) { - if (bio == lo->lo_biotail) - lo->lo_biotail = NULL; - lo->lo_bio = bio->bi_next; - bio->bi_next = NULL; + first = lo->lo_bio; + if (unlikely(first == NULL)) { + spin_unlock_irq(&lo->lo_lock); + return 0; } - spin_unlock_irq(&lo->lo_lock); - return bio; + /* TODO: need to split the bio, too bad. */ + LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS); + + rw = first->bi_rw; + bio = &lo->lo_bio; + while (*bio && (*bio)->bi_rw == rw) { + CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u \n", + (unsigned long long)(*bio)->bi_sector, (*bio)->bi_size, + page_count, (*bio)->bi_vcnt); + if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS) + break; + + + page_count += (*bio)->bi_vcnt; + count++; + bio = &(*bio)->bi_next; + } + if (*bio) { + /* Some of bios can't be mergable. */ + lo->lo_bio = *bio; + *bio = NULL; + } else { + /* Hit the end of queue */ + lo->lo_biotail = NULL; + lo->lo_bio = NULL; + } + *req = first; + spin_unlock_irq(&lo->lo_lock); + return count; } static int loop_make_request(request_queue_t *q, struct bio *old_bio) { struct lloop_device *lo = q->queuedata; int rw = bio_rw(old_bio); + int inactive; if (!lo) - goto out; + goto err; + + CDEBUG(D_INFO, "submit bio sector %llu size %u\n", + (unsigned long long)old_bio->bi_sector, old_bio->bi_size); spin_lock_irq(&lo->lo_lock); - if (lo->lo_state != LLOOP_BOUND) - goto inactive; - atomic_inc(&lo->lo_pending); + inactive = (lo->lo_state != LLOOP_BOUND); spin_unlock_irq(&lo->lo_lock); + if (inactive) + goto err; if (rw == WRITE) { if (lo->lo_flags & LO_FLAGS_READ_ONLY) @@ -314,14 +372,8 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) loop_add_bio(lo, old_bio); return 0; err: - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); -out: bio_io_error(old_bio, old_bio->bi_size); return 0; -inactive: - spin_unlock_irq(&lo->lo_lock); - goto out; } /* @@ -338,27 +390,50 @@ static void loop_unplug(request_queue_t *q) static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio) { int ret; - ret = do_bio_filebacked(lo, bio); - bio_endio(bio, bio->bi_size, ret); + ret = do_bio_lustrebacked(lo, bio); + while (bio) { + struct bio *tmp = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio, bio->bi_size, ret); + bio = tmp; + } +} + +static inline int loop_active(struct lloop_device *lo) +{ + return atomic_read(&lo->lo_pending) || (lo->lo_state == LLOOP_RUNDOWN); } /* * worker thread that handles reads/writes to file backed loop devices, - * to avoid blocking in our make_request_fn. it also does loop decrypting - * on reads for block backed loop, as that is too heavy to do from - * b_end_io context where irqs may be disabled. + * to avoid blocking in our make_request_fn. */ static int loop_thread(void *data) { struct lloop_device *lo = data; struct bio *bio; + unsigned int count; + unsigned long times = 0; + unsigned long total_count = 0; + + struct lu_env *env; + int refcheck; + int ret = 0; daemonize("lloop%d", lo->lo_number); set_user_nice(current, -20); lo->lo_state = LLOOP_BOUND; - atomic_inc(&lo->lo_pending); + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + GOTO(out, ret = PTR_ERR(env)); + + lo->lo_env = env; + memset(&lo->lo_pvec, 0, sizeof(lo->lo_pvec)); + lo->lo_pvec.ldp_pages = lo->lo_requests[0].lrd_pages; + lo->lo_pvec.ldp_offsets = lo->lo_requests[0].lrd_offsets; /* * up sem, we are running @@ -366,40 +441,54 @@ static int loop_thread(void *data) up(&lo->lo_sem); for (;;) { - down_interruptible(&lo->lo_bh_mutex); - /* - * could be upped because of tear-down, not because of - * pending work - */ - if (!atomic_read(&lo->lo_pending)) - break; + wait_event(lo->lo_bh_wait, loop_active(lo)); + if (!atomic_read(&lo->lo_pending)) { + int exiting = 0; + spin_lock_irq(&lo->lo_lock); + exiting = (lo->lo_state == LLOOP_RUNDOWN); + spin_unlock_irq(&lo->lo_lock); + if (exiting) + break; + } - bio = loop_get_bio(lo); - if (!bio) { + bio = NULL; + count = loop_get_bio(lo, &bio); + if (!count) { CWARN("lloop(minor: %d): missing bio\n", lo->lo_number); continue; } - loop_handle_bio(lo, bio); - /* - * upped both for pending work and tear-down, lo_pending - * will hit zero then - */ - if (atomic_dec_and_test(&lo->lo_pending)) - break; + total_count += count; + if (total_count < count) { /* overflow */ + total_count = count; + times = 1; + } else { + times++; + } + if ((times & 127) == 0) { + CDEBUG(D_INFO, "total: %lu, count: %lu, avg: %lu\n", + total_count, times, total_count / times); + } + + LASSERT(bio != NULL); + LASSERT(count <= atomic_read(&lo->lo_pending)); + loop_handle_bio(lo, bio); + atomic_sub(count, &lo->lo_pending); } + cl_env_put(env, &refcheck); +out: up(&lo->lo_sem); - return 0; + return ret; } static int loop_set_fd(struct lloop_device *lo, struct file *unused, struct block_device *bdev, struct file *file) { - struct inode *inode; + struct inode *inode; struct address_space *mapping; - int lo_flags = 0; - int error; + int lo_flags = 0; + int error; loff_t size; if (!try_module_get(THIS_MODULE)) @@ -452,8 +541,10 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused, /* queue parameters */ blk_queue_hardsect_size(lo->lo_queue, CFS_PAGE_SIZE); - blk_queue_max_sectors(lo->lo_queue, LLOOP_MAX_SEGMENTS); + blk_queue_max_sectors(lo->lo_queue, + LLOOP_MAX_SEGMENTS << (CFS_PAGE_SHIFT - 9)); blk_queue_max_phys_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS); + blk_queue_max_hw_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS); set_capacity(disks[lo->lo_number], size); bd_set_size(bdev, size << 9); @@ -487,9 +578,8 @@ static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, spin_lock_irq(&lo->lo_lock); lo->lo_state = LLOOP_RUNDOWN; - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); spin_unlock_irq(&lo->lo_lock); + wake_up(&lo->lo_bh_wait); down(&lo->lo_sem); lo->lo_backing_file = NULL; @@ -533,7 +623,7 @@ static int lo_release(struct inode *inode, struct file *file) /* lloop device node's ioctl function. */ static int lo_ioctl(struct inode *inode, struct file *unused, - unsigned int cmd, unsigned long arg) + unsigned int cmd, unsigned long arg) { struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; struct block_device *bdev = inode->i_bdev; @@ -578,12 +668,13 @@ static struct block_device_operations lo_fops = { /* dynamic iocontrol callback. * This callback is registered in lloop_init and will be called by * ll_iocontrol_call. + * * This is a llite regular file ioctl function. It takes the responsibility - * of attaching a file, and detaching a file by a lloop's device numner. + * of attaching or detaching a file by a lloop's device numner. */ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, - unsigned int cmd, unsigned long arg, - void *magic, int *rcp) + unsigned int cmd, unsigned long arg, + void *magic, int *rcp) { struct lloop_device *lo = NULL; struct block_device *bdev = NULL; @@ -684,25 +775,27 @@ static int __init lloop_init(void) }; if (max_loop < 1 || max_loop > 256) { + max_loop = MAX_LOOP_DEFAULT; CWARN("lloop: invalid max_loop (must be between" - " 1 and 256), using default (8)\n"); - max_loop = 8; + " 1 and 256), using default (%u)\n", max_loop); } lloop_major = register_blkdev(0, "lloop"); if (lloop_major < 0) return -EIO; + CDEBUG(D_CONFIG, "registered lloop major %d with %u minors\n", + lloop_major, max_loop); + ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist); if (ll_iocontrol_magic == NULL) goto out_mem1; - loop_dev = kmalloc(max_loop * sizeof(struct lloop_device), GFP_KERNEL); + OBD_ALLOC_WAIT(loop_dev, max_loop * sizeof(*loop_dev)); if (!loop_dev) goto out_mem1; - memset(loop_dev, 0, max_loop * sizeof(struct lloop_device)); - disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL); + OBD_ALLOC_WAIT(disks, max_loop * sizeof(*disks)); if (!disks) goto out_mem2; @@ -718,14 +811,13 @@ static int __init lloop_init(void) struct lloop_device *lo = &loop_dev[i]; struct gendisk *disk = disks[i]; - memset(lo, 0, sizeof(*lo)); lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) goto out_mem4; init_MUTEX(&lo->lo_ctl_mutex); init_MUTEX_LOCKED(&lo->lo_sem); - init_MUTEX_LOCKED(&lo->lo_bh_mutex); + init_waitqueue_head(&lo->lo_bh_wait); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = lloop_major; @@ -748,9 +840,9 @@ out_mem4: out_mem3: while (i--) put_disk(disks[i]); - kfree(disks); + OBD_FREE(disks, max_loop * sizeof(*disks)); out_mem2: - kfree(loop_dev); + OBD_FREE(loop_dev, max_loop * sizeof(*loop_dev)); out_mem1: unregister_blkdev(lloop_major, "lloop"); ll_iocontrol_unregister(ll_iocontrol_magic); @@ -770,9 +862,11 @@ static void lloop_exit(void) } if (ll_unregister_blkdev(lloop_major, "lloop")) CWARN("lloop: cannot unregister blkdev\n"); + else + CDEBUG(D_CONFIG, "unregistered lloop major %d\n", lloop_major); - kfree(disks); - kfree(loop_dev); + OBD_FREE(disks, max_loop * sizeof(*disks)); + OBD_FREE(loop_dev, max_loop * sizeof(loop_dev)); } module_init(lloop_init); diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 031b1ab..fac56d7 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -216,11 +216,9 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty) OBD_FREE(pages, npages * sizeof(*pages)); } -static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, - int rw, struct inode *inode, - struct address_space *mapping, - size_t size, loff_t file_offset, - struct page **pages, int page_count) +ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, + int rw, struct inode *inode, + struct ll_dio_pages *pv) { struct cl_page *clp; struct ccc_page *clup; @@ -229,8 +227,11 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, struct cl_sync_io *anchor = &ccc_env_info(env)->cti_sync_io; int i; ssize_t rc = 0; - ssize_t size_orig = size; - size_t page_size = cl_page_size(obj); + loff_t file_offset = pv->ldp_start_offset; + size_t size = pv->ldp_size; + int page_count = pv->ldp_nr; + struct page **pages = pv->ldp_pages; + size_t page_size = cl_page_size(obj); ENTRY; cl_sync_io_init(anchor, page_count); @@ -238,8 +239,11 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, queue = &io->ci_queue; cl_2queue_init(queue); for (i = 0; i < page_count; i++) { + if (pv->ldp_offsets) + file_offset = pv->ldp_offsets[i]; + LASSERT(!(file_offset & (page_size - 1))); clp = cl_page_find(env, obj, cl_index(obj, file_offset), - pages[i], CPT_TRANSIENT); + pv->ldp_pages[i], CPT_TRANSIENT); if (IS_ERR(clp)) { rc = PTR_ERR(clp); break; @@ -319,7 +323,7 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, cl_sync_io_note(anchor, +1); /* wait for the IO to be finished. */ rc = cl_sync_io_wait(env, io, &queue->c2_qout, - anchor) ?: size_orig; + anchor) ?: pv->ldp_size; } } @@ -328,6 +332,23 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, cl_2queue_fini(env, queue); RETURN(rc); } +EXPORT_SYMBOL(ll_direct_rw_pages); + +static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, + int rw, struct inode *inode, + struct address_space *mapping, + size_t size, loff_t file_offset, + struct page **pages, int page_count) +{ + struct ll_dio_pages pvec = { .ldp_pages = pages, + .ldp_nr = page_count, + .ldp_size = size, + .ldp_offsets = NULL, + .ldp_start_offset = file_offset + }; + + return ll_direct_rw_pages(env, io, rw, inode, &pvec); +} /* This is the maximum size of a single O_DIRECT request, based on a 128kB * kmalloc limit. We need to fit all of the brw_page structs, each one diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index d199ad6..b698f52 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -243,7 +243,12 @@ static void vvp_page_completion_common(const struct lu_env *env, struct cl_sync_io *anchor = cp->cpg_sync_io; LINVRNT(cl_page_is_vmlocked(env, clp)); - KLASSERT(!PageWriteback(vmpage)); + + /* Don't assert the page writeback bit here because the lustre file + * may be as a backend of swap space. in this case, the page writeback + * is set by VM, and obvious we shouldn't clear it at all. Fortunately + * this type of pages are all TRANSIENT pages. */ + KLASSERT(ergo(clp->cp_type == CPT_CACHEABLE, !PageWriteback(vmpage))); vvp_vmpage_error(inode, vmpage, ioret); diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index feac1ff..e88427b 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -1259,7 +1259,12 @@ void cl_page_completion(const struct lu_env *env, (const struct lu_env *, const struct cl_page_slice *, int), ioret); - KLASSERT(!PageWriteback(cl_page_vmpage(env, pg))); + /* Don't assert the page writeback bit here because the lustre file + * may be as a backend of swap space. in this case, the page writeback + * is set by VM, and obvious we shouldn't clear it at all. Fortunately + * this type of pages are all TRANSIENT pages. */ + KLASSERT(ergo(pg->cp_type == CPT_CACHEABLE, + !PageWriteback(cl_page_vmpage(env, pg)))); EXIT; } EXPORT_SYMBOL(cl_page_completion); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index fd20f97..c199621 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8,7 +8,7 @@ set -e ONLY=${ONLY:-"$*"} # bug number for skipped test: 13297 2108 9789 3637 9789 3561 12622 12653 12653 5188 10764 16260 -ALWAYS_EXCEPT=" 27u 42a 42b 42c 42d 45 51d 65a 65e 68 75 119d $SANITY_EXCEPT" +ALWAYS_EXCEPT=" 27u 42a 42b 42c 42d 45 51d 65a 65e 68b 75 119d $SANITY_EXCEPT" # bug number for skipped test: 2108 9789 3637 9789 3561 5188/5749 1443 #ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27m 42a 42b 42c 42d 45 68 76"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! @@ -3075,12 +3075,15 @@ LLOOP= cleanup_68() { trap 0 if [ ! -z "$LLOOP" ]; then - swapoff $LLOOP || error "swapoff failed" + if swapon -s | grep -q $LLOOP; then + swapoff $LLOOP || error "swapoff failed" + fi + $LCTL blockdev_detach $LLOOP || error "detach failed" rm -f $LLOOP unset LLOOP fi - rm -f $DIR/f68 + rm -f $DIR/f68* } meminfo() { @@ -3091,10 +3094,29 @@ swap_used() { swapon -s | awk '($1 == "'$1'") { print $4 }' } +# test case for lloop driver, basic function +test_68a() { + [ "$UID" != 0 ] && skip "must run as root" && return + + grep -q llite_lloop /proc/modules + [ $? -ne 0 ] && skip "can't find module llite_lloop" && return + + LLOOP=$TMP/lloop.`date +%s`.`date +%N` + dd if=/dev/zero of=$DIR/f68a bs=4k count=1024 + $LCTL blockdev_attach $DIR/f68a $LLOOP || error "attach failed" + + trap cleanup_68 EXIT + + directio rdwr $LLOOP 0 1024 4096 || error "direct write failed" + directio rdwr $LLOOP 0 1025 4096 && error "direct write should fail" + + cleanup_68 +} +run_test 68a "lloop driver - basic test ========================" # excercise swapping to lustre by adding a high priority swapfile entry # and then consuming memory until it is used. -test_68() { +test_68b() { # was test_68 [ "$UID" != 0 ] && skip "must run as root" && return lctl get_param -n devices | grep -q obdfilter && \ skip "local OST" && return @@ -3110,10 +3132,10 @@ test_68() { [[ $NR_BLOCKS -le 2048 ]] && NR_BLOCKS=2048 LLOOP=$TMP/lloop.`date +%s`.`date +%N` - dd if=/dev/zero of=$DIR/f68 bs=64k seek=$NR_BLOCKS count=1 - mkswap $DIR/f68 + dd if=/dev/zero of=$DIR/f68b bs=64k seek=$NR_BLOCKS count=1 + mkswap $DIR/f68b - $LCTL blockdev_attach $DIR/f68 $LLOOP || error "attach failed" + $LCTL blockdev_attach $DIR/f68b $LLOOP || error "attach failed" trap cleanup_68 EXIT @@ -3128,7 +3150,7 @@ test_68() { [ $SWAPUSED -eq 0 ] && echo "no swap used???" || true } -run_test 68 "support swapping to Lustre ========================" +run_test 68b "support swapping to Lustre ========================" # bug5265, obdfilter oa2dentry return -ENOENT # #define OBD_FAIL_OST_ENOENT 0x217 diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index a408a9d..63b2757 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -2142,14 +2142,15 @@ static int jt_blockdev_find_module(const char *module) { FILE *fp; int found = 0; - char modname[256]; + char buf[1024]; fp = fopen("/proc/modules", "r"); if (fp == NULL) return -1; - while (fscanf(fp, "%s %*s %*s %*s %*s %*s", modname) == 1) { - if (strcmp(module, modname) == 0) { + while (fgets(buf, 1024, fp) != NULL) { + *strchr(buf, ' ') = 0; + if (strcmp(module, buf) == 0) { found = 1; break; } -- 1.8.3.1