X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fllite%2Flloop.c;h=4c581814ff26c65ceed4b7bfd0ca86287700e238;hb=9ae3b7e83cd19f4ffdc1e111496ca90971f12061;hp=3b13aabd4d91b4eb78a35a249b1e4e8c75644d84;hpb=90bde110350493b1d07b12f692a829b4d2cab617;p=fs%2Flustre-release.git diff --git a/lustre/llite/lloop.c b/lustre/llite/lloop.c index 3b13aab..4c58181 100644 --- a/lustre/llite/lloop.c +++ b/lustre/llite/lloop.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -28,6 +26,8 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -79,9 +79,6 @@ * */ -#ifndef AUTOCONF_INCLUDED -#include -#endif #include #include @@ -94,7 +91,6 @@ #include #include #include -#include #include #include #include @@ -112,7 +108,7 @@ #include #include "llite_internal.h" -#define LLOOP_MAX_SEGMENTS PTLRPC_MAX_BRW_PAGES +#define LLOOP_MAX_SEGMENTS LNET_MAX_IOV /* Possible states of device */ enum { @@ -136,12 +132,12 @@ struct lloop_device { int old_gfp_mask; - cfs_spinlock_t lo_lock; - struct bio *lo_bio; - struct bio *lo_biotail; - int lo_state; - cfs_semaphore_t lo_sem; - cfs_semaphore_t lo_ctl_mutex; + spinlock_t lo_lock; + struct bio *lo_bio; + struct bio *lo_biotail; + int lo_state; + struct semaphore lo_sem; + struct mutex lo_ctl_mutex; cfs_atomic_t lo_pending; cfs_waitq_t lo_bh_wait; @@ -170,7 +166,7 @@ static int lloop_major; static int max_loop = MAX_LOOP_DEFAULT; static struct lloop_device *loop_dev; static struct gendisk **disks; -static cfs_semaphore_t lloop_mutex; +static struct mutex lloop_mutex; static void *ll_iocontrol_magic = NULL; static loff_t get_loop_size(struct lloop_device *lo, struct file *file) @@ -240,37 +236,37 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head) ll_stats_ops_tally(ll_i2sbi(inode), (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ, - page_count << PAGE_CACHE_SHIFT); + page_count); pvec->ldp_size = page_count << PAGE_CACHE_SHIFT; pvec->ldp_nr = page_count; - /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to - * write those pages into OST. Even worse case is that more pages - * would be asked to write out to swap space, and then finally get here - * again. - * Unfortunately this is NOT easy to fix. - * Thoughts on solution: - * 0. Define a reserved pool for cl_pages, which could be a list of - * pre-allocated cl_pages from cl_page_kmem; - * 1. Define a new operation in cl_object_operations{}, says clo_depth, - * which measures how many layers for this lustre object. Generally - * speaking, the depth would be 2, one for llite, and one for lovsub. - * However, for SNS, there will be more since we need additional page - * to store parity; - * 2. Reserve the # of (page_count * depth) cl_pages from the reserved - * pool. Afterwards, the clio would allocate the pages from reserved - * pool, this guarantees we neeedn't allocate the cl_pages from - * generic cl_page slab cache. - * Of course, if there is NOT enough pages in the pool, we might - * be asked to write less pages once, this purely depends on - * implementation. Anyway, we should be careful to avoid deadlocking. - */ - LOCK_INODE_MUTEX(inode); - bytes = ll_direct_rw_pages(env, io, rw, inode, pvec); - UNLOCK_INODE_MUTEX(inode); - cl_io_fini(env, io); - return (bytes == pvec->ldp_size) ? 0 : (int)bytes; + /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to + * write those pages into OST. Even worse case is that more pages + * would be asked to write out to swap space, and then finally get here + * again. + * Unfortunately this is NOT easy to fix. + * Thoughts on solution: + * 0. Define a reserved pool for cl_pages, which could be a list of + * pre-allocated cl_pages; + * 1. Define a new operation in cl_object_operations{}, says clo_depth, + * which measures how many layers for this lustre object. Generally + * speaking, the depth would be 2, one for llite, and one for lovsub. + * However, for SNS, there will be more since we need additional page + * to store parity; + * 2. Reserve the # of (page_count * depth) cl_pages from the reserved + * pool. Afterwards, the clio would allocate the pages from reserved + * pool, this guarantees we neeedn't allocate the cl_pages from + * generic cl_page slab cache. + * Of course, if there is NOT enough pages in the pool, we might + * be asked to write less pages once, this purely depends on + * implementation. Anyway, we should be careful to avoid deadlocking. + */ + mutex_lock(&inode->i_mutex); + bytes = ll_direct_rw_pages(env, io, rw, inode, pvec); + mutex_unlock(&inode->i_mutex); + cl_io_fini(env, io); + return (bytes == pvec->ldp_size) ? 0 : (int)bytes; } /* @@ -278,19 +274,19 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head) */ static void loop_add_bio(struct lloop_device *lo, struct bio *bio) { - unsigned long flags; - - cfs_spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_biotail) { - lo->lo_biotail->bi_next = bio; - lo->lo_biotail = bio; - } else - lo->lo_bio = lo->lo_biotail = bio; - cfs_spin_unlock_irqrestore(&lo->lo_lock, flags); - - cfs_atomic_inc(&lo->lo_pending); - if (cfs_waitq_active(&lo->lo_bh_wait)) - cfs_waitq_signal(&lo->lo_bh_wait); + unsigned long flags; + + spin_lock_irqsave(&lo->lo_lock, flags); + if (lo->lo_biotail) { + lo->lo_biotail->bi_next = bio; + lo->lo_biotail = bio; + } else + lo->lo_bio = lo->lo_biotail = bio; + spin_unlock_irqrestore(&lo->lo_lock, flags); + + cfs_atomic_inc(&lo->lo_pending); + if (cfs_waitq_active(&lo->lo_bh_wait)) + cfs_waitq_signal(&lo->lo_bh_wait); } /* @@ -298,18 +294,18 @@ static void loop_add_bio(struct lloop_device *lo, struct bio *bio) */ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) { - struct bio *first; - struct bio **bio; - unsigned int count = 0; - unsigned int page_count = 0; - int rw; - - cfs_spin_lock_irq(&lo->lo_lock); - first = lo->lo_bio; - if (unlikely(first == NULL)) { - cfs_spin_unlock_irq(&lo->lo_lock); - return 0; - } + struct bio *first; + struct bio **bio; + unsigned int count = 0; + unsigned int page_count = 0; + int rw; + + spin_lock_irq(&lo->lo_lock); + first = lo->lo_bio; + if (unlikely(first == NULL)) { + spin_unlock_irq(&lo->lo_lock); + return 0; + } /* TODO: need to split the bio, too bad. */ LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS); @@ -338,11 +334,12 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) lo->lo_bio = NULL; } *req = first; - cfs_spin_unlock_irq(&lo->lo_lock); - return count; + spin_unlock_irq(&lo->lo_lock); + return count; } -static int loop_make_request(struct request_queue *q, struct bio *old_bio) +static ll_mrf_ret +loop_make_request(struct request_queue *q, struct bio *old_bio) { struct lloop_device *lo = q->queuedata; int rw = bio_rw(old_bio); @@ -354,9 +351,9 @@ static int loop_make_request(struct request_queue *q, struct bio *old_bio) CDEBUG(D_INFO, "submit bio sector %llu size %u\n", (unsigned long long)old_bio->bi_sector, old_bio->bi_size); - cfs_spin_lock_irq(&lo->lo_lock); - inactive = (lo->lo_state != LLOOP_BOUND); - cfs_spin_unlock_irq(&lo->lo_lock); + spin_lock_irq(&lo->lo_lock); + inactive = (lo->lo_state != LLOOP_BOUND); + spin_unlock_irq(&lo->lo_lock); if (inactive) goto err; @@ -370,12 +367,13 @@ static int loop_make_request(struct request_queue *q, struct bio *old_bio) goto err; } loop_add_bio(lo, old_bio); - return 0; + LL_MRF_RETURN(0); err: - cfs_bio_io_error(old_bio, old_bio->bi_size); - return 0; + cfs_bio_io_error(old_bio, old_bio->bi_size); + LL_MRF_RETURN(0); } +#ifdef HAVE_REQUEST_QUEUE_UNPLUG_FN /* * kick off io on the underlying address space */ @@ -386,6 +384,7 @@ static void loop_unplug(struct request_queue *q) clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); blk_run_address_space(lo->lo_backing_file->f_mapping); } +#endif static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio) { @@ -439,15 +438,15 @@ static int loop_thread(void *data) /* * up sem, we are running */ - cfs_up(&lo->lo_sem); - - for (;;) { - cfs_wait_event(lo->lo_bh_wait, loop_active(lo)); - if (!cfs_atomic_read(&lo->lo_pending)) { - int exiting = 0; - cfs_spin_lock_irq(&lo->lo_lock); - exiting = (lo->lo_state == LLOOP_RUNDOWN); - cfs_spin_unlock_irq(&lo->lo_lock); + up(&lo->lo_sem); + + for (;;) { + cfs_wait_event(lo->lo_bh_wait, loop_active(lo)); + if (!cfs_atomic_read(&lo->lo_pending)) { + int exiting = 0; + spin_lock_irq(&lo->lo_lock); + exiting = (lo->lo_state == LLOOP_RUNDOWN); + spin_unlock_irq(&lo->lo_lock); if (exiting) break; } @@ -479,7 +478,7 @@ static int loop_thread(void *data) cl_env_put(env, &refcheck); out: - cfs_up(&lo->lo_sem); + up(&lo->lo_sem); return ret; } @@ -538,7 +537,9 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused, */ blk_queue_make_request(lo->lo_queue, loop_make_request); lo->lo_queue->queuedata = lo; +#ifdef HAVE_REQUEST_QUEUE_UNPLUG_FN lo->lo_queue->unplug_fn = loop_unplug; +#endif /* queue parameters */ CLASSERT(CFS_PAGE_SIZE < (1 << (sizeof(unsigned short) * 8))); @@ -554,7 +555,7 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused, set_blocksize(bdev, lo->lo_blocksize); cfs_create_thread(loop_thread, lo, CLONE_KERNEL); - cfs_down(&lo->lo_sem); + down(&lo->lo_sem); return 0; out: @@ -578,12 +579,12 @@ static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, if (filp == NULL) return -EINVAL; - cfs_spin_lock_irq(&lo->lo_lock); - lo->lo_state = LLOOP_RUNDOWN; - cfs_spin_unlock_irq(&lo->lo_lock); - cfs_waitq_signal(&lo->lo_bh_wait); + spin_lock_irq(&lo->lo_lock); + lo->lo_state = LLOOP_RUNDOWN; + spin_unlock_irq(&lo->lo_lock); + cfs_waitq_signal(&lo->lo_bh_wait); - cfs_down(&lo->lo_sem); + down(&lo->lo_sem); lo->lo_backing_file = NULL; lo->ioctl = NULL; lo->lo_device = NULL; @@ -611,9 +612,9 @@ static int lo_open(struct inode *inode, struct file *file) struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; #endif - cfs_down(&lo->lo_ctl_mutex); + mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; - cfs_up(&lo->lo_ctl_mutex); + mutex_unlock(&lo->lo_ctl_mutex); return 0; } @@ -628,9 +629,9 @@ static int lo_release(struct inode *inode, struct file *file) struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; #endif - cfs_down(&lo->lo_ctl_mutex); + mutex_lock(&lo->lo_ctl_mutex); --lo->lo_refcnt; - cfs_up(&lo->lo_ctl_mutex); + mutex_unlock(&lo->lo_ctl_mutex); return 0; } @@ -641,17 +642,18 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct lloop_device *lo = bdev->bd_disk->private_data; - struct inode *inode = lo->lo_backing_file->f_dentry->d_inode; + struct inode *inode = NULL; + int err = 0; #else static int lo_ioctl(struct inode *inode, struct file *unused, unsigned int cmd, unsigned long arg) { struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; struct block_device *bdev = inode->i_bdev; -#endif int err = 0; +#endif - cfs_down(&lloop_mutex); + mutex_lock(&lloop_mutex); switch (cmd) { case LL_IOC_LLOOP_DETACH: { err = loop_clr_fd(lo, bdev, 2); @@ -663,6 +665,9 @@ static int lo_ioctl(struct inode *inode, struct file *unused, case LL_IOC_LLOOP_INFO: { struct lu_fid fid; + LASSERT(lo->lo_backing_file != NULL); + if (inode == NULL) + inode = lo->lo_backing_file->f_dentry->d_inode; if (lo->lo_state == LLOOP_BOUND) fid = ll_i2info(inode)->lli_fid; else @@ -677,7 +682,7 @@ static int lo_ioctl(struct inode *inode, struct file *unused, err = -EINVAL; break; } - cfs_up(&lloop_mutex); + mutex_unlock(&lloop_mutex); return err; } @@ -713,7 +718,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, CWARN("Enter llop_ioctl\n"); - cfs_down(&lloop_mutex); + mutex_lock(&lloop_mutex); switch (cmd) { case LL_IOC_LLOOP_ATTACH: { struct lloop_device *lo_free = NULL; @@ -740,7 +745,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, if (put_user((long)old_encode_dev(dev), (long*)arg)) GOTO(out, err = -EFAULT); - bdev = open_by_devnum(dev, file->f_mode); + bdev = blkdev_get_by_dev(dev, file->f_mode, NULL); if (IS_ERR(bdev)) GOTO(out, err = PTR_ERR(bdev)); @@ -783,7 +788,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, } out: - cfs_up(&lloop_mutex); + mutex_unlock(&lloop_mutex); out1: if (rcp) *rcp = err; @@ -829,7 +834,7 @@ static int __init lloop_init(void) goto out_mem3; } - cfs_init_mutex(&lloop_mutex); + mutex_init(&lloop_mutex); for (i = 0; i < max_loop; i++) { struct lloop_device *lo = &loop_dev[i]; @@ -839,11 +844,11 @@ static int __init lloop_init(void) if (!lo->lo_queue) goto out_mem4; - cfs_init_mutex(&lo->lo_ctl_mutex); - cfs_init_mutex_locked(&lo->lo_sem); - cfs_waitq_init(&lo->lo_bh_wait); - lo->lo_number = i; - cfs_spin_lock_init(&lo->lo_lock); + mutex_init(&lo->lo_ctl_mutex); + sema_init(&lo->lo_sem, 0); + cfs_waitq_init(&lo->lo_bh_wait); + lo->lo_number = i; + spin_lock_init(&lo->lo_lock); disk->major = lloop_major; disk->first_minor = i; disk->fops = &lo_fops;