X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Flloop.c;h=5f4dec3f3f5db880783ba53f09767d44e5d9214d;hp=0a1b98eb4d74f0a5fa0a01f92711953c6666233a;hb=69b4763882b4515ff4a1b9b223e522172fdb27f4;hpb=e46545c5af0b582e292b658cf741c47fdde343e9 diff --git a/lustre/llite/lloop.c b/lustre/llite/lloop.c index 0a1b98e..5f4dec3 100644 --- a/lustre/llite/lloop.c +++ b/lustre/llite/lloop.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, Whamcloud, Inc. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -79,9 +79,6 @@ * */ -#ifndef AUTOCONF_INCLUDED -#include -#endif #include #include @@ -94,7 +91,6 @@ #include #include #include -#include #include #include #include @@ -136,16 +132,16 @@ struct lloop_device { int old_gfp_mask; - spinlock_t lo_lock; + cfs_spinlock_t lo_lock; struct bio *lo_bio; struct bio *lo_biotail; int lo_state; - struct semaphore lo_sem; - struct semaphore lo_ctl_mutex; - atomic_t lo_pending; - wait_queue_head_t lo_bh_wait; + cfs_semaphore_t lo_sem; + cfs_mutex_t lo_ctl_mutex; + cfs_atomic_t lo_pending; + cfs_waitq_t lo_bh_wait; - request_queue_t *lo_queue; + struct request_queue *lo_queue; const struct lu_env *lo_env; struct cl_io lo_io; @@ -170,7 +166,7 @@ static int lloop_major; static int max_loop = MAX_LOOP_DEFAULT; static struct lloop_device *loop_dev; static struct gendisk **disks; -static struct semaphore lloop_mutex; +static cfs_mutex_t lloop_mutex; static void *ll_iocontrol_magic = NULL; static loff_t get_loop_size(struct lloop_device *lo, struct file *file) @@ -240,37 +236,37 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head) ll_stats_ops_tally(ll_i2sbi(inode), (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ, - page_count << PAGE_CACHE_SHIFT); + page_count); pvec->ldp_size = page_count << PAGE_CACHE_SHIFT; pvec->ldp_nr = page_count; - /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to - * write those pages into OST. Even worse case is that more pages - * would be asked to write out to swap space, and then finally get here - * again. - * Unfortunately this is NOT easy to fix. - * Thoughts on solution: - * 0. Define a reserved pool for cl_pages, which could be a list of - * pre-allocated cl_pages from cl_page_kmem; - * 1. Define a new operation in cl_object_operations{}, says clo_depth, - * which measures how many layers for this lustre object. Generally - * speaking, the depth would be 2, one for llite, and one for lovsub. - * However, for SNS, there will be more since we need additional page - * to store parity; - * 2. Reserve the # of (page_count * depth) cl_pages from the reserved - * pool. Afterwards, the clio would allocate the pages from reserved - * pool, this guarantees we neeedn't allocate the cl_pages from - * generic cl_page slab cache. - * Of course, if there is NOT enough pages in the pool, we might - * be asked to write less pages once, this purely depends on - * implementation. Anyway, we should be careful to avoid deadlocking. - */ - LOCK_INODE_MUTEX(inode); - bytes = ll_direct_rw_pages(env, io, rw, inode, pvec); - UNLOCK_INODE_MUTEX(inode); - cl_io_fini(env, io); - return (bytes == pvec->ldp_size) ? 0 : (int)bytes; + /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to + * write those pages into OST. Even worse case is that more pages + * would be asked to write out to swap space, and then finally get here + * again. + * Unfortunately this is NOT easy to fix. + * Thoughts on solution: + * 0. Define a reserved pool for cl_pages, which could be a list of + * pre-allocated cl_pages from cl_page_kmem; + * 1. Define a new operation in cl_object_operations{}, says clo_depth, + * which measures how many layers for this lustre object. Generally + * speaking, the depth would be 2, one for llite, and one for lovsub. + * However, for SNS, there will be more since we need additional page + * to store parity; + * 2. Reserve the # of (page_count * depth) cl_pages from the reserved + * pool. Afterwards, the clio would allocate the pages from reserved + * pool, this guarantees we neeedn't allocate the cl_pages from + * generic cl_page slab cache. + * Of course, if there is NOT enough pages in the pool, we might + * be asked to write less pages once, this purely depends on + * implementation. Anyway, we should be careful to avoid deadlocking. + */ + mutex_lock(&inode->i_mutex); + bytes = ll_direct_rw_pages(env, io, rw, inode, pvec); + mutex_unlock(&inode->i_mutex); + cl_io_fini(env, io); + return (bytes == pvec->ldp_size) ? 0 : (int)bytes; } /* @@ -280,17 +276,17 @@ static void loop_add_bio(struct lloop_device *lo, struct bio *bio) { unsigned long flags; - spin_lock_irqsave(&lo->lo_lock, flags); + cfs_spin_lock_irqsave(&lo->lo_lock, flags); if (lo->lo_biotail) { lo->lo_biotail->bi_next = bio; lo->lo_biotail = bio; } else lo->lo_bio = lo->lo_biotail = bio; - spin_unlock_irqrestore(&lo->lo_lock, flags); + cfs_spin_unlock_irqrestore(&lo->lo_lock, flags); - atomic_inc(&lo->lo_pending); - if (waitqueue_active(&lo->lo_bh_wait)) - wake_up(&lo->lo_bh_wait); + cfs_atomic_inc(&lo->lo_pending); + if (cfs_waitq_active(&lo->lo_bh_wait)) + cfs_waitq_signal(&lo->lo_bh_wait); } /* @@ -304,10 +300,10 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) unsigned int page_count = 0; int rw; - spin_lock_irq(&lo->lo_lock); + cfs_spin_lock_irq(&lo->lo_lock); first = lo->lo_bio; if (unlikely(first == NULL)) { - spin_unlock_irq(&lo->lo_lock); + cfs_spin_unlock_irq(&lo->lo_lock); return 0; } @@ -338,11 +334,11 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) lo->lo_bio = NULL; } *req = first; - spin_unlock_irq(&lo->lo_lock); + cfs_spin_unlock_irq(&lo->lo_lock); return count; } -static int loop_make_request(request_queue_t *q, struct bio *old_bio) +static int loop_make_request(struct request_queue *q, struct bio *old_bio) { struct lloop_device *lo = q->queuedata; int rw = bio_rw(old_bio); @@ -354,9 +350,9 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) CDEBUG(D_INFO, "submit bio sector %llu size %u\n", (unsigned long long)old_bio->bi_sector, old_bio->bi_size); - spin_lock_irq(&lo->lo_lock); + cfs_spin_lock_irq(&lo->lo_lock); inactive = (lo->lo_state != LLOOP_BOUND); - spin_unlock_irq(&lo->lo_lock); + cfs_spin_unlock_irq(&lo->lo_lock); if (inactive) goto err; @@ -372,20 +368,22 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) loop_add_bio(lo, old_bio); return 0; err: - bio_io_error(old_bio, old_bio->bi_size); + cfs_bio_io_error(old_bio, old_bio->bi_size); return 0; } +#ifdef HAVE_REQUEST_QUEUE_UNPLUG_FN /* * kick off io on the underlying address space */ -static void loop_unplug(request_queue_t *q) +static void loop_unplug(struct request_queue *q) { struct lloop_device *lo = q->queuedata; clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); blk_run_address_space(lo->lo_backing_file->f_mapping); } +#endif static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio) { @@ -394,14 +392,15 @@ static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio) while (bio) { struct bio *tmp = bio->bi_next; bio->bi_next = NULL; - bio_endio(bio, bio->bi_size, ret); + cfs_bio_endio(bio, bio->bi_size, ret); bio = tmp; } } static inline int loop_active(struct lloop_device *lo) { - return atomic_read(&lo->lo_pending) || (lo->lo_state == LLOOP_RUNDOWN); + return cfs_atomic_read(&lo->lo_pending) || + (lo->lo_state == LLOOP_RUNDOWN); } /* @@ -438,15 +437,15 @@ static int loop_thread(void *data) /* * up sem, we are running */ - up(&lo->lo_sem); + cfs_up(&lo->lo_sem); for (;;) { - wait_event(lo->lo_bh_wait, loop_active(lo)); - if (!atomic_read(&lo->lo_pending)) { + cfs_wait_event(lo->lo_bh_wait, loop_active(lo)); + if (!cfs_atomic_read(&lo->lo_pending)) { int exiting = 0; - spin_lock_irq(&lo->lo_lock); + cfs_spin_lock_irq(&lo->lo_lock); exiting = (lo->lo_state == LLOOP_RUNDOWN); - spin_unlock_irq(&lo->lo_lock); + cfs_spin_unlock_irq(&lo->lo_lock); if (exiting) break; } @@ -471,14 +470,14 @@ static int loop_thread(void *data) } LASSERT(bio != NULL); - LASSERT(count <= atomic_read(&lo->lo_pending)); + LASSERT(count <= cfs_atomic_read(&lo->lo_pending)); loop_handle_bio(lo, bio); - atomic_sub(count, &lo->lo_pending); + cfs_atomic_sub(count, &lo->lo_pending); } cl_env_put(env, &refcheck); out: - up(&lo->lo_sem); + cfs_up(&lo->lo_sem); return ret; } @@ -491,7 +490,7 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused, int error; loff_t size; - if (!try_module_get(THIS_MODULE)) + if (!cfs_try_module_get(THIS_MODULE)) return -ENODEV; error = -EBUSY; @@ -537,27 +536,30 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused, */ blk_queue_make_request(lo->lo_queue, loop_make_request); lo->lo_queue->queuedata = lo; +#ifdef HAVE_REQUEST_QUEUE_UNPLUG_FN lo->lo_queue->unplug_fn = loop_unplug; +#endif /* queue parameters */ - blk_queue_hardsect_size(lo->lo_queue, CFS_PAGE_SIZE); - blk_queue_max_sectors(lo->lo_queue, - LLOOP_MAX_SEGMENTS << (CFS_PAGE_SHIFT - 9)); - blk_queue_max_phys_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS); - blk_queue_max_hw_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS); + CLASSERT(CFS_PAGE_SIZE < (1 << (sizeof(unsigned short) * 8))); + blk_queue_logical_block_size(lo->lo_queue, + (unsigned short)CFS_PAGE_SIZE); + blk_queue_max_hw_sectors(lo->lo_queue, + LLOOP_MAX_SEGMENTS << (CFS_PAGE_SHIFT - 9)); + blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS); set_capacity(disks[lo->lo_number], size); bd_set_size(bdev, size << 9); set_blocksize(bdev, lo->lo_blocksize); - kernel_thread(loop_thread, lo, CLONE_KERNEL); - down(&lo->lo_sem); + cfs_create_thread(loop_thread, lo, CLONE_KERNEL); + cfs_down(&lo->lo_sem); return 0; out: /* This is safe: open() is still holding a reference. */ - module_put(THIS_MODULE); + cfs_module_put(THIS_MODULE); return error; } @@ -576,12 +578,12 @@ static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, if (filp == NULL) return -EINVAL; - spin_lock_irq(&lo->lo_lock); + cfs_spin_lock_irq(&lo->lo_lock); lo->lo_state = LLOOP_RUNDOWN; - spin_unlock_irq(&lo->lo_lock); - wake_up(&lo->lo_bh_wait); + cfs_spin_unlock_irq(&lo->lo_lock); + cfs_waitq_signal(&lo->lo_bh_wait); - down(&lo->lo_sem); + cfs_down(&lo->lo_sem); lo->lo_backing_file = NULL; lo->ioctl = NULL; lo->lo_device = NULL; @@ -595,56 +597,82 @@ static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, lo->lo_state = LLOOP_UNBOUND; fput(filp); /* This is safe: open() is still holding a reference. */ - module_put(THIS_MODULE); + cfs_module_put(THIS_MODULE); return 0; } +#ifdef HAVE_BLKDEV_PUT_2ARGS +static int lo_open(struct block_device *bdev, fmode_t mode) +{ + struct lloop_device *lo = bdev->bd_disk->private_data; +#else static int lo_open(struct inode *inode, struct file *file) { struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; +#endif - down(&lo->lo_ctl_mutex); + cfs_mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; - up(&lo->lo_ctl_mutex); + cfs_mutex_unlock(&lo->lo_ctl_mutex); return 0; } +#ifdef HAVE_BLKDEV_PUT_2ARGS +static int lo_release(struct gendisk *disk, fmode_t mode) +{ + struct lloop_device *lo = disk->private_data; +#else static int lo_release(struct inode *inode, struct file *file) { struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; +#endif - down(&lo->lo_ctl_mutex); + cfs_mutex_lock(&lo->lo_ctl_mutex); --lo->lo_refcnt; - up(&lo->lo_ctl_mutex); + cfs_mutex_unlock(&lo->lo_ctl_mutex); return 0; } /* lloop device node's ioctl function. */ +#ifdef HAVE_BLKDEV_PUT_2ARGS +static int lo_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct lloop_device *lo = bdev->bd_disk->private_data; + struct inode *inode = NULL; + int err = 0; +#else static int lo_ioctl(struct inode *inode, struct file *unused, unsigned int cmd, unsigned long arg) { struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; struct block_device *bdev = inode->i_bdev; int err = 0; +#endif - down(&lloop_mutex); + cfs_mutex_lock(&lloop_mutex); switch (cmd) { case LL_IOC_LLOOP_DETACH: { err = loop_clr_fd(lo, bdev, 2); if (err == 0) - blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */ + ll_blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */ break; } case LL_IOC_LLOOP_INFO: { - __u64 ino = 0; + struct lu_fid fid; + LASSERT(lo->lo_backing_file != NULL); + if (inode == NULL) + inode = lo->lo_backing_file->f_dentry->d_inode; if (lo->lo_state == LLOOP_BOUND) - ino = lo->lo_backing_file->f_dentry->d_inode->i_ino; + fid = ll_i2info(inode)->lli_fid; + else + fid_zero(&fid); - if (put_user(ino, (__u64 *)arg)) + if (copy_to_user((struct lu_fid *)arg, &fid, sizeof(fid))) err = -EFAULT; break; } @@ -653,7 +681,7 @@ static int lo_ioctl(struct inode *inode, struct file *unused, err = -EINVAL; break; } - up(&lloop_mutex); + cfs_mutex_unlock(&lloop_mutex); return err; } @@ -689,7 +717,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, CWARN("Enter llop_ioctl\n"); - down(&lloop_mutex); + cfs_mutex_lock(&lloop_mutex); switch (cmd) { case LL_IOC_LLOOP_ATTACH: { struct lloop_device *lo_free = NULL; @@ -716,7 +744,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, if (put_user((long)old_encode_dev(dev), (long*)arg)) GOTO(out, err = -EFAULT); - bdev = open_by_devnum(dev, file->f_mode); + bdev = blkdev_get_by_dev(dev, file->f_mode, NULL); if (IS_ERR(bdev)) GOTO(out, err = PTR_ERR(bdev)); @@ -724,7 +752,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, err = loop_set_fd(lo, NULL, bdev, file); if (err) { fput(file); - blkdev_put(bdev); + ll_blkdev_put(bdev, 0); } break; @@ -748,7 +776,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, bdev = lo->lo_device; err = loop_clr_fd(lo, bdev, 1); if (err == 0) - blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */ + ll_blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */ break; } @@ -759,7 +787,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, } out: - up(&lloop_mutex); + cfs_mutex_unlock(&lloop_mutex); out1: if (rcp) *rcp = err; @@ -805,7 +833,7 @@ static int __init lloop_init(void) goto out_mem3; } - init_MUTEX(&lloop_mutex); + cfs_mutex_init(&lloop_mutex); for (i = 0; i < max_loop; i++) { struct lloop_device *lo = &loop_dev[i]; @@ -815,11 +843,11 @@ static int __init lloop_init(void) if (!lo->lo_queue) goto out_mem4; - init_MUTEX(&lo->lo_ctl_mutex); - init_MUTEX_LOCKED(&lo->lo_sem); - init_waitqueue_head(&lo->lo_bh_wait); + cfs_mutex_init(&lo->lo_ctl_mutex); + cfs_sema_init(&lo->lo_sem, 0); + cfs_waitq_init(&lo->lo_bh_wait); lo->lo_number = i; - spin_lock_init(&lo->lo_lock); + cfs_spin_lock_init(&lo->lo_lock); disk->major = lloop_major; disk->first_minor = i; disk->fops = &lo_fops; @@ -835,7 +863,7 @@ static int __init lloop_init(void) out_mem4: while (i--) - blk_put_queue(loop_dev[i].lo_queue); + blk_cleanup_queue(loop_dev[i].lo_queue); i = max_loop; out_mem3: while (i--) @@ -857,7 +885,7 @@ static void lloop_exit(void) ll_iocontrol_unregister(ll_iocontrol_magic); for (i = 0; i < max_loop; i++) { del_gendisk(disks[i]); - blk_put_queue(loop_dev[i].lo_queue); + blk_cleanup_queue(loop_dev[i].lo_queue); put_disk(disks[i]); } if (ll_unregister_blkdev(lloop_major, "lloop")) @@ -866,7 +894,7 @@ static void lloop_exit(void) CDEBUG(D_CONFIG, "unregistered lloop major %d\n", lloop_major); OBD_FREE(disks, max_loop * sizeof(*disks)); - OBD_FREE(loop_dev, max_loop * sizeof(loop_dev)); + OBD_FREE(loop_dev, max_loop * sizeof(*loop_dev)); } module_init(lloop_init);