X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Flloop.c;h=b8ae0b50f440e6643c1639e67ed7f48367c4f67f;hp=32413077f788d6ef0c56e9c3aac37dd8eb479da9;hb=5e5e4ae2be4bc377f0f896163ae59bf338c4250c;hpb=6869932b552ac705f411de3362f01bd50c1f6f7d diff --git a/lustre/llite/lloop.c b/lustre/llite/lloop.c index 3241307..b8ae0b5 100644 --- a/lustre/llite/lloop.c +++ b/lustre/llite/lloop.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,9 +42,6 @@ * Copyright 1993 by Theodore Ts'o. Redistribution of this file is * permitted under the GNU General Public License. * - * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993 - * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996 - * * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996 * @@ -56,10 +53,6 @@ * * Loadable modules and other fixes by AK, 1998 * - * Make real block number available to downstream transfer functions, enables - * CBC (and relatives) mode encryption requiring unique IVs per data block. - * Reed H. Petty, rhp@draper.net - * * Maximum number of loop devices now dynamic via max_loop module parameter. * Russell Kroll 19990701 * @@ -86,12 +79,10 @@ * */ -#ifndef AUTOCONF_INCLUDED -#include -#endif #include #include +#include #include #include #include @@ -101,7 +92,6 @@ #include #include #include -#include #include #include #include @@ -116,10 +106,9 @@ #include #include -#include #include "llite_internal.h" -#define LLOOP_MAX_SEGMENTS PTLRPC_MAX_BRW_PAGES +#define LLOOP_MAX_SEGMENTS LNET_MAX_IOV /* Possible states of device */ enum { @@ -129,37 +118,37 @@ enum { }; struct lloop_device { - int lo_number; - int lo_refcnt; - loff_t lo_offset; - loff_t lo_sizelimit; - int lo_flags; - int (*ioctl)(struct lloop_device *, int cmd, - unsigned long arg); - - struct file * lo_backing_file; - struct block_device *lo_device; - unsigned lo_blocksize; - - int old_gfp_mask; - - spinlock_t lo_lock; - struct bio *lo_bio; - struct bio *lo_biotail; - int lo_state; - struct semaphore lo_sem; - struct semaphore lo_ctl_mutex; - struct semaphore lo_bh_mutex; - atomic_t lo_pending; - - request_queue_t *lo_queue; - - /* data to handle bio for lustre. */ - struct lo_request_data { - struct brw_page lrd_pages[LLOOP_MAX_SEGMENTS]; - struct obdo lrd_oa; - } lo_requests[1]; - + int lo_number; + int lo_refcnt; + loff_t lo_offset; + loff_t lo_sizelimit; + int lo_flags; + struct file *lo_backing_file; + struct block_device *lo_device; + unsigned lo_blocksize; + + gfp_t old_gfp_mask; + + spinlock_t lo_lock; + struct bio *lo_bio; + struct bio *lo_biotail; + int lo_state; + struct semaphore lo_sem; + struct mutex lo_ctl_mutex; + atomic_t lo_pending; + wait_queue_head_t lo_bh_wait; + + struct request_queue *lo_queue; + + const struct lu_env *lo_env; + struct cl_io lo_io; + struct ll_dio_pages lo_pvec; + + /* data to handle bio for lustre. */ + struct lo_request_data { + struct page *lrd_pages[LLOOP_MAX_SEGMENTS]; + loff_t lrd_offsets[LLOOP_MAX_SEGMENTS]; + } lo_requests[1]; }; /* @@ -170,10 +159,11 @@ enum { }; static int lloop_major; -static int max_loop = 8; +#define MAX_LOOP_DEFAULT 16 +static int max_loop = MAX_LOOP_DEFAULT; static struct lloop_device *loop_dev; static struct gendisk **disks; -static struct semaphore lloop_mutex; +static struct mutex lloop_mutex; static void *ll_iocontrol_magic = NULL; static loff_t get_loop_size(struct lloop_device *lo, struct file *file) @@ -194,113 +184,204 @@ static loff_t get_loop_size(struct lloop_device *lo, struct file *file) return loopsize >> 9; } -static int do_bio_filebacked(struct lloop_device *lo, struct bio *bio) +static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head) { - struct inode *inode = lo->lo_backing_file->f_dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct obd_info oinfo = {{{ 0 }}}; - struct brw_page *pg = lo->lo_requests[0].lrd_pages; - struct obdo *oa = &lo->lo_requests[0].lrd_oa; - pgoff_t offset; - int ret, cmd, i, opc; - struct bio_vec *bvec; - - BUG_ON(bio->bi_hw_segments > LLOOP_MAX_SEGMENTS); - - offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset; - bio_for_each_segment(bvec, bio, i) { - BUG_ON(bvec->bv_offset != 0); - BUG_ON(bvec->bv_len != CFS_PAGE_SIZE); - - pg->pg = bvec->bv_page; - pg->off = offset; - pg->count = bvec->bv_len; - pg->flag = OBD_BRW_SRVLOCK; - - pg++; - offset += bvec->bv_len; - } - - oa->o_mode = inode->i_mode; - oa->o_id = lsm->lsm_object_id; - oa->o_gr = lsm->lsm_object_gr; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLMODE | - OBD_MD_FLTYPE |OBD_MD_FLGROUP; - obdo_from_inode(oa, inode, OBD_MD_FLFID | OBD_MD_FLGENER); - - cmd = OBD_BRW_READ; - if (bio_rw(bio) == WRITE) - cmd = OBD_BRW_WRITE; - - if (cmd == OBD_BRW_WRITE) - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_WRITE, bio->bi_size); - else - ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_BRW_READ, bio->bi_size); - oinfo.oi_oa = oa; - oinfo.oi_md = lsm; - opc = cmd & OBD_BRW_WRITE ? CAPA_OPC_OSS_WRITE : CAPA_OPC_OSS_RW; - oinfo.oi_capa = ll_osscapa_get(inode, opc); - ret = obd_brw(cmd, ll_i2dtexp(inode), &oinfo, - (obd_count)(i - bio->bi_idx), - lo->lo_requests[0].lrd_pages, NULL); - capa_put(oinfo.oi_capa); - if (ret == 0) - obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS); - return ret; + const struct lu_env *env = lo->lo_env; + struct cl_io *io = &lo->lo_io; + struct dentry *de = lo->lo_backing_file->f_path.dentry; + struct inode *inode = de->d_inode; + struct cl_object *obj = ll_i2info(inode)->lli_clob; + pgoff_t offset; + int ret; +#ifdef HAVE_BVEC_ITER + struct bvec_iter iter; + struct bio_vec bvec; +#else + int iter; + struct bio_vec *bvec; +#endif + int rw; + size_t page_count = 0; + struct bio *bio; + ssize_t bytes; + + struct ll_dio_pages *pvec = &lo->lo_pvec; + struct page **pages = pvec->ldp_pages; + loff_t *offsets = pvec->ldp_offsets; + + truncate_inode_pages(inode->i_mapping, 0); + + /* initialize the IO */ + memset(io, 0, sizeof(*io)); + io->ci_obj = obj; + ret = cl_io_init(env, io, CIT_MISC, obj); + if (ret) + return io->ci_result; + io->ci_lockreq = CILR_NEVER; + + LASSERT(head != NULL); + rw = head->bi_rw; + for (bio = head; bio != NULL; bio = bio->bi_next) { + LASSERT(rw == bio->bi_rw); + +#ifdef HAVE_BVEC_ITER + offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset; + bio_for_each_segment_all(bvec, bio, iter) { + BUG_ON(bvec.bv_offset != 0); + BUG_ON(bvec.bv_len != PAGE_CACHE_SIZE); + + pages[page_count] = bvec.bv_page; + offsets[page_count] = offset; + page_count++; + offset += bvec.bv_len; +#else + offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset; + bio_for_each_segment_all(bvec, bio, iter) { + BUG_ON(bvec->bv_offset != 0); + BUG_ON(bvec->bv_len != PAGE_CACHE_SIZE); + + pages[page_count] = bvec->bv_page; + offsets[page_count] = offset; + page_count++; + offset += bvec->bv_len; +#endif + } + LASSERT(page_count <= LLOOP_MAX_SEGMENTS); + } + + ll_stats_ops_tally(ll_i2sbi(inode), + (rw == WRITE) ? LPROC_LL_BRW_WRITE : LPROC_LL_BRW_READ, + page_count); + + pvec->ldp_size = page_count << PAGE_CACHE_SHIFT; + pvec->ldp_nr = page_count; + + /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to + * write those pages into OST. Even worse case is that more pages + * would be asked to write out to swap space, and then finally get here + * again. + * Unfortunately this is NOT easy to fix. + * Thoughts on solution: + * 0. Define a reserved pool for cl_pages, which could be a list of + * pre-allocated cl_pages; + * 1. Define a new operation in cl_object_operations{}, says clo_depth, + * which measures how many layers for this lustre object. Generally + * speaking, the depth would be 2, one for llite, and one for lovsub. + * However, for SNS, there will be more since we need additional page + * to store parity; + * 2. Reserve the # of (page_count * depth) cl_pages from the reserved + * pool. Afterwards, the clio would allocate the pages from reserved + * pool, this guarantees we neeedn't allocate the cl_pages from + * generic cl_page slab cache. + * Of course, if there is NOT enough pages in the pool, we might + * be asked to write less pages once, this purely depends on + * implementation. Anyway, we should be careful to avoid deadlocking. + */ + mutex_lock(&inode->i_mutex); + bytes = ll_direct_rw_pages(env, io, rw, inode, pvec); + mutex_unlock(&inode->i_mutex); + cl_io_fini(env, io); + return (bytes == pvec->ldp_size) ? 0 : (int)bytes; } - /* * Add bio to back of pending list */ static void loop_add_bio(struct lloop_device *lo, struct bio *bio) { - unsigned long flags; - - spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_biotail) { - lo->lo_biotail->bi_next = bio; - lo->lo_biotail = bio; - } else - lo->lo_bio = lo->lo_biotail = bio; - spin_unlock_irqrestore(&lo->lo_lock, flags); - - up(&lo->lo_bh_mutex); + unsigned long flags; + + spin_lock_irqsave(&lo->lo_lock, flags); + if (lo->lo_biotail) { + lo->lo_biotail->bi_next = bio; + lo->lo_biotail = bio; + } else + lo->lo_bio = lo->lo_biotail = bio; + spin_unlock_irqrestore(&lo->lo_lock, flags); + + atomic_inc(&lo->lo_pending); + if (waitqueue_active(&lo->lo_bh_wait)) + wake_up(&lo->lo_bh_wait); } /* * Grab first pending buffer */ -static struct bio *loop_get_bio(struct lloop_device *lo) +static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req) { - struct bio *bio; + struct bio *first; + struct bio **bio; + unsigned int count = 0; + unsigned int page_count = 0; + int rw; + + spin_lock_irq(&lo->lo_lock); + first = lo->lo_bio; + if (unlikely(first == NULL)) { + spin_unlock_irq(&lo->lo_lock); + return 0; + } + + /* TODO: need to split the bio, too bad. */ + LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS); + + rw = first->bi_rw; + bio = &lo->lo_bio; + while (*bio && (*bio)->bi_rw == rw) { +#ifdef HAVE_BVEC_ITER + CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u \n", + (unsigned long long)(*bio)->bi_iter.bi_sector, + (*bio)->bi_iter.bi_size, page_count, (*bio)->bi_vcnt); +#else + CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u \n", + (unsigned long long)(*bio)->bi_sector, (*bio)->bi_size, + page_count, (*bio)->bi_vcnt); +#endif + if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS) + break; - spin_lock_irq(&lo->lo_lock); - if ((bio = lo->lo_bio)) { - if (bio == lo->lo_biotail) - lo->lo_biotail = NULL; - lo->lo_bio = bio->bi_next; - bio->bi_next = NULL; + page_count += (*bio)->bi_vcnt; + count++; + bio = &(*bio)->bi_next; } - spin_unlock_irq(&lo->lo_lock); - - return bio; + if (*bio) { + /* Some of bios can't be mergable. */ + lo->lo_bio = *bio; + *bio = NULL; + } else { + /* Hit the end of queue */ + lo->lo_biotail = NULL; + lo->lo_bio = NULL; + } + *req = first; + spin_unlock_irq(&lo->lo_lock); + return count; } -static int loop_make_request(request_queue_t *q, struct bio *old_bio) +static ll_mrf_ret +loop_make_request(struct request_queue *q, struct bio *old_bio) { struct lloop_device *lo = q->queuedata; int rw = bio_rw(old_bio); + int inactive; if (!lo) - goto out; + goto err; - spin_lock_irq(&lo->lo_lock); - if (lo->lo_state != LLOOP_BOUND) - goto inactive; - atomic_inc(&lo->lo_pending); - spin_unlock_irq(&lo->lo_lock); +#ifdef HAVE_BVEC_ITER + CDEBUG(D_INFO, "submit bio sector %llu size %u\n", + (unsigned long long)old_bio->bi_iter.bi_sector, + old_bio->bi_iter.bi_size); +#else + CDEBUG(D_INFO, "submit bio sector %llu size %u\n", + (unsigned long long)old_bio->bi_sector, old_bio->bi_size); +#endif + + spin_lock_irq(&lo->lo_lock); + inactive = (lo->lo_state != LLOOP_BOUND); + spin_unlock_irq(&lo->lo_lock); + if (inactive) + goto err; if (rw == WRITE) { if (lo->lo_flags & LO_FLAGS_READ_ONLY) @@ -312,98 +393,137 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) goto err; } loop_add_bio(lo, old_bio); - return 0; + LL_MRF_RETURN(0); err: - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); -out: - bio_io_error(old_bio, old_bio->bi_size); - return 0; -inactive: - spin_unlock_irq(&lo->lo_lock); - goto out; + bio_io_error(old_bio); + LL_MRF_RETURN(0); } +#ifdef HAVE_REQUEST_QUEUE_UNPLUG_FN /* * kick off io on the underlying address space */ -static void loop_unplug(request_queue_t *q) +static void loop_unplug(struct request_queue *q) { struct lloop_device *lo = q->queuedata; clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); blk_run_address_space(lo->lo_backing_file->f_mapping); } +#endif static inline void loop_handle_bio(struct lloop_device *lo, struct bio *bio) { - int ret; - ret = do_bio_filebacked(lo, bio); - bio_endio(bio, bio->bi_size, ret); + int ret; + + ret = do_bio_lustrebacked(lo, bio); + while (bio) { + struct bio *tmp = bio->bi_next; + + bio->bi_next = NULL; +#ifdef HAVE_BIO_ENDIO_USES_ONE_ARG + bio->bi_error = ret; + bio_endio(bio); +#else + bio_endio(bio, ret); +#endif + bio = tmp; + } +} + +static inline int loop_active(struct lloop_device *lo) +{ + return atomic_read(&lo->lo_pending) || + (lo->lo_state == LLOOP_RUNDOWN); } /* * worker thread that handles reads/writes to file backed loop devices, - * to avoid blocking in our make_request_fn. it also does loop decrypting - * on reads for block backed loop, as that is too heavy to do from - * b_end_io context where irqs may be disabled. + * to avoid blocking in our make_request_fn. */ static int loop_thread(void *data) { struct lloop_device *lo = data; struct bio *bio; + unsigned int count; + unsigned long times = 0; + unsigned long total_count = 0; - daemonize("lloop%d", lo->lo_number); + struct lu_env *env; + __u16 refcheck; + int ret = 0; set_user_nice(current, -20); lo->lo_state = LLOOP_BOUND; - atomic_inc(&lo->lo_pending); + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + GOTO(out, ret = PTR_ERR(env)); + + lo->lo_env = env; + memset(&lo->lo_pvec, 0, sizeof(lo->lo_pvec)); + lo->lo_pvec.ldp_pages = lo->lo_requests[0].lrd_pages; + lo->lo_pvec.ldp_offsets = lo->lo_requests[0].lrd_offsets; /* * up sem, we are running */ - up(&lo->lo_sem); - - for (;;) { - down_interruptible(&lo->lo_bh_mutex); - /* - * could be upped because of tear-down, not because of - * pending work - */ - if (!atomic_read(&lo->lo_pending)) - break; - - bio = loop_get_bio(lo); - if (!bio) { + up(&lo->lo_sem); + + for (;;) { + wait_event(lo->lo_bh_wait, loop_active(lo)); + if (!atomic_read(&lo->lo_pending)) { + int exiting = 0; + spin_lock_irq(&lo->lo_lock); + exiting = (lo->lo_state == LLOOP_RUNDOWN); + spin_unlock_irq(&lo->lo_lock); + if (exiting) + break; + } + + bio = NULL; + count = loop_get_bio(lo, &bio); + if (!count) { CWARN("lloop(minor: %d): missing bio\n", lo->lo_number); continue; } - loop_handle_bio(lo, bio); - - /* - * upped both for pending work and tear-down, lo_pending - * will hit zero then - */ - if (atomic_dec_and_test(&lo->lo_pending)) - break; - } - up(&lo->lo_sem); - return 0; + total_count += count; + if (total_count < count) { /* overflow */ + total_count = count; + times = 1; + } else { + times++; + } + if ((times & 127) == 0) { + CDEBUG(D_INFO, "total: %lu, count: %lu, avg: %lu\n", + total_count, times, total_count / times); + } + + LASSERT(bio != NULL); + LASSERT(count <= atomic_read(&lo->lo_pending)); + loop_handle_bio(lo, bio); + atomic_sub(count, &lo->lo_pending); + } + cl_env_put(env, &refcheck); + +out: + up(&lo->lo_sem); + return ret; } static int loop_set_fd(struct lloop_device *lo, struct file *unused, struct block_device *bdev, struct file *file) { - struct inode *inode; + struct inode *inode; struct address_space *mapping; - int lo_flags = 0; - int error; + int lo_flags = 0; + int error; loff_t size; - if (!try_module_get(THIS_MODULE)) - return -ENODEV; + if (!try_module_get(THIS_MODULE)) + return -ENODEV; error = -EBUSY; if (lo->lo_state != LLOOP_UNBOUND) @@ -431,11 +551,10 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused, set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); - lo->lo_blocksize = CFS_PAGE_SIZE; + lo->lo_blocksize = PAGE_CACHE_SIZE; lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; - lo->ioctl = NULL; lo->lo_sizelimit = 0; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); @@ -448,33 +567,35 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused, */ blk_queue_make_request(lo->lo_queue, loop_make_request); lo->lo_queue->queuedata = lo; +#ifdef HAVE_REQUEST_QUEUE_UNPLUG_FN lo->lo_queue->unplug_fn = loop_unplug; +#endif - /* queue parameters */ - blk_queue_hardsect_size(lo->lo_queue, CFS_PAGE_SIZE); - blk_queue_max_sectors(lo->lo_queue, LLOOP_MAX_SEGMENTS); - blk_queue_max_phys_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS); + /* queue parameters */ + blk_queue_max_hw_sectors(lo->lo_queue, + LLOOP_MAX_SEGMENTS << (PAGE_CACHE_SHIFT - 9)); + blk_queue_max_segments(lo->lo_queue, LLOOP_MAX_SEGMENTS); set_capacity(disks[lo->lo_number], size); bd_set_size(bdev, size << 9); - set_blocksize(bdev, lo->lo_blocksize); + set_blocksize(bdev, lo->lo_blocksize); - kernel_thread(loop_thread, lo, CLONE_KERNEL); - down(&lo->lo_sem); - return 0; + kthread_run(loop_thread, lo, "lloop%d", lo->lo_number); + down(&lo->lo_sem); + return 0; - out: - /* This is safe: open() is still holding a reference. */ - module_put(THIS_MODULE); - return error; +out: + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + return error; } -static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, +static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, int count) { struct file *filp = lo->lo_backing_file; - int gfp = lo->old_gfp_mask; + gfp_t gfp = lo->old_gfp_mask; if (lo->lo_state != LLOOP_BOUND) return -ENXIO; @@ -485,85 +606,97 @@ static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev, if (filp == NULL) return -EINVAL; - spin_lock_irq(&lo->lo_lock); - lo->lo_state = LLOOP_RUNDOWN; - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); - spin_unlock_irq(&lo->lo_lock); + spin_lock_irq(&lo->lo_lock); + lo->lo_state = LLOOP_RUNDOWN; + spin_unlock_irq(&lo->lo_lock); + wake_up(&lo->lo_bh_wait); - down(&lo->lo_sem); + down(&lo->lo_sem); lo->lo_backing_file = NULL; - lo->ioctl = NULL; lo->lo_device = NULL; lo->lo_offset = 0; lo->lo_sizelimit = 0; lo->lo_flags = 0; - ll_invalidate_bdev(bdev, 0); + invalidate_bdev(bdev); set_capacity(disks[lo->lo_number], 0); bd_set_size(bdev, 0); mapping_set_gfp_mask(filp->f_mapping, gfp); lo->lo_state = LLOOP_UNBOUND; - fput(filp); - /* This is safe: open() is still holding a reference. */ - module_put(THIS_MODULE); - return 0; + fput(filp); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + return 0; } -static int lo_open(struct inode *inode, struct file *file) +static int lo_open(struct block_device *bdev, fmode_t mode) { - struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; + struct lloop_device *lo = bdev->bd_disk->private_data; - down(&lo->lo_ctl_mutex); + mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; - up(&lo->lo_ctl_mutex); + mutex_unlock(&lo->lo_ctl_mutex); return 0; } -static int lo_release(struct inode *inode, struct file *file) +#ifdef HAVE_BLKDEV_RELEASE_RETURN_INT +static int +#else +static void +#endif +lo_release(struct gendisk *disk, fmode_t mode) { - struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; - - down(&lo->lo_ctl_mutex); - --lo->lo_refcnt; - up(&lo->lo_ctl_mutex); + struct lloop_device *lo = disk->private_data; - return 0; + mutex_lock(&lo->lo_ctl_mutex); + --lo->lo_refcnt; + mutex_unlock(&lo->lo_ctl_mutex); +#ifdef HAVE_BLKDEV_RELEASE_RETURN_INT + return 0; +#endif } /* lloop device node's ioctl function. */ -static int lo_ioctl(struct inode *inode, struct file *unused, - unsigned int cmd, unsigned long arg) +static int lo_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) { - struct lloop_device *lo = inode->i_bdev->bd_disk->private_data; - struct block_device *bdev = inode->i_bdev; - int err = 0; + struct lloop_device *lo = bdev->bd_disk->private_data; + int err = 0; - down(&lloop_mutex); + mutex_lock(&lloop_mutex); switch (cmd) { case LL_IOC_LLOOP_DETACH: { err = loop_clr_fd(lo, bdev, 2); if (err == 0) - blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */ + blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */ break; } - case LL_IOC_LLOOP_INFO: { - __u64 ino = 0; - - if (lo->lo_state == LLOOP_BOUND) - ino = lo->lo_backing_file->f_dentry->d_inode->i_ino; - - if (put_user(ino, (__u64 *)arg)) - err = -EFAULT; - break; + case LL_IOC_LLOOP_INFO: { + struct inode *inode; + struct lu_fid fid; + + if (lo->lo_backing_file == NULL) { + err = -ENOENT; + break; + } + inode = lo->lo_backing_file->f_path.dentry->d_inode; + if (inode != NULL && lo->lo_state == LLOOP_BOUND) + fid = ll_i2info(inode)->lli_fid; + else + fid_zero(&fid); + + if (copy_to_user((struct lu_fid __user *)arg, + &fid, sizeof(fid))) + err = -EFAULT; + break; } default: err = -EINVAL; break; } - up(&lloop_mutex); + mutex_unlock(&lloop_mutex); return err; } @@ -575,15 +708,16 @@ static struct block_device_operations lo_fops = { .ioctl = lo_ioctl, }; -/* dynamic iocontrol callback. - * This callback is registered in lloop_init and will be called by - * ll_iocontrol_call. - * This is a llite regular file ioctl function. It takes the responsibility - * of attaching a file, and detaching a file by a lloop's device numner. +/* dynamic iocontrol callback. + * This callback is registered in lloop_init and will be called by + * ll_iocontrol_call. + * + * This is a llite regular file ioctl function. It takes the responsibility + * of attaching or detaching a file by a lloop's device numner. */ -static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, - unsigned int cmd, unsigned long arg, - void *magic, int *rcp) +static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, + unsigned int cmd, unsigned long arg, + void *magic, int *rcp) { struct lloop_device *lo = NULL; struct block_device *bdev = NULL; @@ -598,11 +732,12 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, CWARN("Enter llop_ioctl\n"); - down(&lloop_mutex); - switch (cmd) { - case LL_IOC_LLOOP_ATTACH: { - struct lloop_device *lo_free = NULL; - int i; + mutex_lock(&lloop_mutex); + switch (cmd) { + case LL_IOC_LLOOP_ATTACH: { + struct inode *inode = file->f_path.dentry->d_inode; + struct lloop_device *lo_free = NULL; + int i; for (i = 0; i < max_loop; i++, lo = NULL) { lo = &loop_dev[i]; @@ -611,21 +746,21 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, lo_free = lo; continue; } - if (lo->lo_backing_file->f_dentry->d_inode == - file->f_dentry->d_inode) - break; - } - if (lo || !lo_free) - GOTO(out, err = -EBUSY); + if (lo->lo_backing_file->f_path.dentry->d_inode == + inode) + break; + } + if (lo || !lo_free) + GOTO(out, err = -EBUSY); lo = lo_free; dev = MKDEV(lloop_major, lo->lo_number); /* quit if the used pointer is writable */ - if (put_user((long)old_encode_dev(dev), (long*)arg)) - GOTO(out, err = -EFAULT); + if (put_user((long)old_encode_dev(dev), (long __user *)arg)) + GOTO(out, err = -EFAULT); - bdev = open_by_devnum(dev, file->f_mode); + bdev = blkdev_get_by_dev(dev, file->f_mode, NULL); if (IS_ERR(bdev)) GOTO(out, err = PTR_ERR(bdev)); @@ -633,7 +768,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, err = loop_set_fd(lo, NULL, bdev, file); if (err) { fput(file); - blkdev_put(bdev); + blkdev_put(bdev, 0); } break; @@ -641,7 +776,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, case LL_IOC_LLOOP_DETACH_BYDEV: { int minor; - + dev = old_decode_dev(arg); if (MAJOR(dev) != lloop_major) GOTO(out, err = -EINVAL); @@ -657,7 +792,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, bdev = lo->lo_device; err = loop_clr_fd(lo, bdev, 1); if (err == 0) - blkdev_put(bdev); /* grabbed in LLOOP_ATTACH */ + blkdev_put(bdev, 0); /* grabbed in LLOOP_ATTACH */ break; } @@ -668,7 +803,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file, } out: - up(&lloop_mutex); + mutex_unlock(&lloop_mutex); out1: if (rcp) *rcp = err; @@ -684,25 +819,27 @@ static int __init lloop_init(void) }; if (max_loop < 1 || max_loop > 256) { + max_loop = MAX_LOOP_DEFAULT; CWARN("lloop: invalid max_loop (must be between" - " 1 and 256), using default (8)\n"); - max_loop = 8; + " 1 and 256), using default (%u)\n", max_loop); } lloop_major = register_blkdev(0, "lloop"); if (lloop_major < 0) return -EIO; + CDEBUG(D_CONFIG, "registered lloop major %d with %u minors\n", + lloop_major, max_loop); + ll_iocontrol_magic = ll_iocontrol_register(lloop_ioctl, 2, cmdlist); if (ll_iocontrol_magic == NULL) goto out_mem1; - loop_dev = kmalloc(max_loop * sizeof(struct lloop_device), GFP_KERNEL); + OBD_ALLOC_WAIT(loop_dev, max_loop * sizeof(*loop_dev)); if (!loop_dev) goto out_mem1; - memset(loop_dev, 0, max_loop * sizeof(struct lloop_device)); - disks = kmalloc(max_loop * sizeof(struct gendisk *), GFP_KERNEL); + OBD_ALLOC_WAIT(disks, max_loop * sizeof(*disks)); if (!disks) goto out_mem2; @@ -712,22 +849,21 @@ static int __init lloop_init(void) goto out_mem3; } - init_MUTEX(&lloop_mutex); + mutex_init(&lloop_mutex); for (i = 0; i < max_loop; i++) { struct lloop_device *lo = &loop_dev[i]; struct gendisk *disk = disks[i]; - memset(lo, 0, sizeof(*lo)); lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) goto out_mem4; - init_MUTEX(&lo->lo_ctl_mutex); - init_MUTEX_LOCKED(&lo->lo_sem); - init_MUTEX_LOCKED(&lo->lo_bh_mutex); - lo->lo_number = i; - spin_lock_init(&lo->lo_lock); + mutex_init(&lo->lo_ctl_mutex); + sema_init(&lo->lo_sem, 0); + init_waitqueue_head(&lo->lo_bh_wait); + lo->lo_number = i; + spin_lock_init(&lo->lo_lock); disk->major = lloop_major; disk->first_minor = i; disk->fops = &lo_fops; @@ -743,14 +879,14 @@ static int __init lloop_init(void) out_mem4: while (i--) - blk_put_queue(loop_dev[i].lo_queue); + blk_cleanup_queue(loop_dev[i].lo_queue); i = max_loop; out_mem3: while (i--) put_disk(disks[i]); - kfree(disks); + OBD_FREE(disks, max_loop * sizeof(*disks)); out_mem2: - kfree(loop_dev); + OBD_FREE(loop_dev, max_loop * sizeof(*loop_dev)); out_mem1: unregister_blkdev(lloop_major, "lloop"); ll_iocontrol_unregister(ll_iocontrol_magic); @@ -758,27 +894,28 @@ out_mem1: return -ENOMEM; } -static void lloop_exit(void) +static void __exit lloop_exit(void) { - int i; - - ll_iocontrol_unregister(ll_iocontrol_magic); - for (i = 0; i < max_loop; i++) { - del_gendisk(disks[i]); - blk_put_queue(loop_dev[i].lo_queue); - put_disk(disks[i]); - } - if (ll_unregister_blkdev(lloop_major, "lloop")) - CWARN("lloop: cannot unregister blkdev\n"); - - kfree(disks); - kfree(loop_dev); + int i; + + ll_iocontrol_unregister(ll_iocontrol_magic); + for (i = 0; i < max_loop; i++) { + del_gendisk(disks[i]); + blk_cleanup_queue(loop_dev[i].lo_queue); + put_disk(disks[i]); + } + unregister_blkdev(lloop_major, "lloop"); + + OBD_FREE(disks, max_loop * sizeof(*disks)); + OBD_FREE(loop_dev, max_loop * sizeof(*loop_dev)); } -module_init(lloop_init); -module_exit(lloop_exit); - -CFS_MODULE_PARM(max_loop, "i", int, 0444, "maximum of lloop_device"); -MODULE_AUTHOR("Sun Microsystems, Inc. "); +module_param(max_loop, int, 0444); +MODULE_PARM_DESC(max_loop, "maximum of lloop_device"); +MODULE_AUTHOR("OpenSFS, Inc. "); MODULE_DESCRIPTION("Lustre virtual block device"); +MODULE_VERSION(LUSTRE_VERSION_STRING); MODULE_LICENSE("GPL"); + +module_init(lloop_init); +module_exit(lloop_exit);