From 164bf598e3e4f738cb31e8422904de6dc614a2f0 Mon Sep 17 00:00:00 2001 From: adilger Date: Tue, 24 Feb 2004 01:47:59 +0000 Subject: [PATCH] Land b_smallfix onto HEAD (20040223_1817) --- lnet/include/linux/kp30.h | 6 - lnet/klnds/qswlnd/qswlnd_cb.c | 12 +- lustre/ChangeLog | 9 + lustre/include/linux/lustre_compat25.h | 7 +- lustre/include/linux/lustre_idl.h | 1 + lustre/include/linux/lustre_net.h | 2 +- lustre/include/linux/obd.h | 65 ++++---- lustre/include/linux/obd_class.h | 50 +++--- lustre/include/linux/obd_support.h | 3 + .../kernel_patches/patches/2.6.0-test6-mm4.patch | 96 +---------- lustre/ldlm/ldlm_lib.c | 4 +- lustre/liblustre/llite_lib.h | 2 +- lustre/liblustre/rw.c | 39 +++-- lustre/llite/file.c | 7 +- lustre/llite/llite_internal.h | 26 ++- lustre/llite/rw.c | 183 +++++++++++---------- lustre/llite/rw24.c | 2 +- lustre/lov/lov_obd.c | 34 ++-- lustre/lov/lov_pack.c | 8 +- lustre/lvfs/lvfs_linux.c | 22 ++- lustre/mdc/mdc_locks.c | 38 +++-- lustre/mds/handler.c | 3 +- lustre/mds/mds_lov.c | 10 +- lustre/mds/mds_open.c | 29 +++- lustre/mds/mds_reint.c | 4 +- lustre/obdclass/class_obd.c | 10 +- lustre/obdclass/genops.c | 127 +++++++------- lustre/obdclass/lprocfs_status.c | 8 +- lustre/obdfilter/filter.c | 34 ++-- lustre/obdfilter/filter_io_24.c | 3 +- lustre/obdfilter/filter_io_26.c | 4 +- lustre/osc/osc_internal.h | 4 +- lustre/osc/osc_request.c | 87 +++++----- lustre/portals/include/linux/kp30.h | 6 - lustre/portals/knals/qswnal/qswnal_cb.c | 12 +- lustre/ptlrpc/client.c | 32 +++- lustre/ptlrpc/import.c | 6 +- lustre/ptlrpc/pack_generic.c | 1 + lustre/ptlrpc/ptlrpc_internal.h | 2 +- lustre/ptlrpc/recover.c | 31 ++-- lustre/scripts/cvs-modified-files.pl | 47 ------ lustre/scripts/cvsdiffclient | 26 --- lustre/scripts/land1.sh | 93 ----------- lustre/scripts/land2.sh | 28 ---- lustre/tests/acceptance-small.sh | 12 +- lustre/tests/llmountcleanup.sh | 2 +- lustre/tests/recovery-small.sh | 7 + lustre/tests/replay-single.sh | 31 +++- lustre/tests/sanity.sh | 4 +- lustre/tests/test-framework.sh | 4 +- lustre/utils/wirecheck.c | 1 + lustre/utils/wiretest.c | 1 + 52 files changed, 583 insertions(+), 702 deletions(-) delete mode 100755 lustre/scripts/cvs-modified-files.pl delete mode 100755 lustre/scripts/cvsdiffclient delete mode 100755 lustre/scripts/land1.sh delete mode 100755 lustre/scripts/land2.sh diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 6596d33..75e83b4 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -184,8 +184,6 @@ do { \ PREPARE_TQUEUE((wq), (cb), (cbdata)); \ } while (0) -#define ll_invalidate_inode_pages(inode) invalidate_inode_pages(inode) -#define ll_truncate_complete_page(page) truncate_complete_page(page) #define PageUptodate Page_Uptodate #define our_recalc_sigpending(current) recalc_sigpending(current) #define num_online_cpus() smp_num_cpus @@ -202,10 +200,6 @@ static inline void our_cond_resched(void) do { \ INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ } while (0) -#define ll_invalidate_inode_pages(inode) \ - invalidate_inode_pages((inode)->i_mapping) -#define ll_truncate_complete_page(page) \ - truncate_complete_page((page)->mapping, page) #define wait_on_page wait_on_page_locked #define our_recalc_sigpending(current) recalc_sigpending() #define strtok(a,b) strpbrk(a, b) diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c index 4c2bd6a..478c25f 100644 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ b/lnet/klnds/qswlnd/qswlnd_cb.c @@ -1556,7 +1556,7 @@ kqswnal_recvmsg (nal_cb_t *nal, LASSERT (mlen <= rlen); if (krx->krx_nob < KQSW_HDR_SIZE + mlen) { CERROR("Bad message size: have %d, need %d + %d\n", - krx->krx_nob, KQSW_HDR_SIZE, mlen); + krx->krx_nob, (int)KQSW_HDR_SIZE, (int)mlen); return (PTL_FAIL); } @@ -1564,16 +1564,15 @@ kqswnal_recvmsg (nal_cb_t *nal, LASSERT (kiov == NULL || !in_interrupt ()); /* Either all pages or all vaddrs */ LASSERT (!(kiov != NULL && iov != NULL)); - - if (mlen != 0) - { + + if (mlen != 0) { page = 0; page_ptr = ((char *) page_address(krx->krx_pages[0])) + KQSW_HDR_SIZE; page_nob = PAGE_SIZE - KQSW_HDR_SIZE; LASSERT (niov > 0); - + if (kiov != NULL) { /* skip complete frags */ while (offset >= kiov->kiov_len) { @@ -1582,7 +1581,8 @@ kqswnal_recvmsg (nal_cb_t *nal, niov--; LASSERT (niov > 0); } - iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; + iov_ptr = ((char *)kmap (kiov->kiov_page)) + + kiov->kiov_offset + offset; iov_nob = kiov->kiov_len - offset; } else { /* skip complete frags */ diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 8807642..0a5e0de 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -28,6 +28,15 @@ tbd Cluster File Systems, Inc. - reset bulk XID's when resending them (caught by 1138 test) - unregister_bulk after timeout - fix lconf error (2694) + - handle write after unfinished setstripe, stripe-only getstripe (2388) + - readahead locks pages, leaves pending causing memory pressure (2673) + - increase OST request buffers to 4096 on large machines (2729) + - fix up permission of existing directories in simple_mkdir (2661) + - init deleted item, add assertions ptlrpc_abort_inflight() (2725) + - don't assign transno to errored transactions (2742) + - don't delete objects on OST if given a bogus objid from MDS (2751) + - handle large client PAGE_SIZE readdir on small PAGE_SIZE MDS (2777) + - if rq_no_resend, then timeout request after recovery (2432) * miscellania - return LL_SUPER_MAGIC from statfs for the filesystem type (1972) diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index a0cafd9..df59db4 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -57,9 +57,11 @@ #define ll_pgcache_lock(mapping) spin_lock(&mapping->page_lock) #define ll_pgcache_unlock(mapping) spin_unlock(&mapping->page_lock) #define ll_call_writepage(inode, page) \ - (inode)->i_mapping->a_ops->writepage(page, NULL) + (inode)->i_mapping->a_ops->writepage(page, NULL) +#define ll_invalidate_inode_pages(inode) \ + invalidate_inode_pages((inode)->i_mapping) #define ll_truncate_complete_page(page) \ - truncate_complete_page(page->mapping, page) + truncate_complete_page(page->mapping, page) #define ll_vfs_create(a,b,c,d) vfs_create(a,b,c,d) @@ -142,6 +144,7 @@ typedef long sector_t; #define ll_pgcache_unlock(mapping) spin_unlock(&pagecache_lock) #define ll_call_writepage(inode, page) \ (inode)->i_mapping->a_ops->writepage(page) +#define ll_invalidate_inode_pages(inode) invalidate_inode_pages(inode) #define ll_truncate_complete_page(page) truncate_complete_page(page) static inline void __d_drop(struct dentry *dentry) diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 7de8875..b2dbd86 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -180,6 +180,7 @@ struct lustre_msg { #define MSG_GEN_FLAG_MASK 0x0000ffff #define MSG_LAST_REPLAY 1 #define MSG_RESENT 2 +#define MSG_REPLAY 4 static inline int lustre_msg_get_flags(struct lustre_msg *msg) { diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 860c6b8..8b34ada 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -87,7 +87,7 @@ * except in the open case where there are a large number of OSTs in a LOV. */ #define MDS_MAXREQSIZE (5 * 1024) -#define MDS_MAXMEM (num_physpages*(PAGE_SIZE/512)) +#define MDS_MAXMEM (num_physpages*(PAGE_SIZE/128)) #define MDS_NBUFS min(MDS_MAXMEM/MDS_BUFSIZE, MDS_NBUF_MAX) #define OST_MAX_THREADS 36UL diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index c43c62d..e911174 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -40,7 +40,7 @@ struct loi_oap_pages { struct list_head lop_pending; int lop_num_pending; struct list_head lop_urgent; - struct list_head lop_pending_sync; + struct list_head lop_pending_group; }; struct lov_oinfo { /* per-stripe data structure */ @@ -65,10 +65,10 @@ static inline void loi_init(struct lov_oinfo *loi) { INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending); INIT_LIST_HEAD(&loi->loi_read_lop.lop_urgent); - INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending_sync); + INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending_group); INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending); INIT_LIST_HEAD(&loi->loi_write_lop.lop_urgent); - INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending_sync); + INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending_group); INIT_LIST_HEAD(&loi->loi_cli_item); INIT_LIST_HEAD(&loi->loi_write_item); } @@ -103,9 +103,16 @@ struct brw_page { }; enum async_flags { - ASYNC_READY = 0x1, + ASYNC_READY = 0x1, /* ap_make_ready will not be called before this + page is added to an rpc */ ASYNC_URGENT = 0x2, - ASYNC_COUNT_STABLE = 0x4, + ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called + to give the caller a chance to update + or cancel the size of the io */ + ASYNC_GROUP_SYNC = 0x8, /* ap_completion will not be called, instead + the page is accounted for in the + obd_io_group given to + obd_queue_group_io */ }; struct obd_async_page_ops { @@ -115,26 +122,26 @@ struct obd_async_page_ops { void (*ap_completion)(void *data, int cmd, int rc); }; -/* the `osic' is passed down from a caller of obd rw methods. the callee - * records enough state such that the caller can sleep on the osic and +/* the `oig' is passed down from a caller of obd rw methods. the callee + * records enough state such that the caller can sleep on the oig and * be woken when all the callees have finished their work */ -struct obd_sync_io_container { - spinlock_t osic_lock; - atomic_t osic_refcount; - int osic_pending; - int osic_rc; - struct list_head osic_occ_list; - wait_queue_head_t osic_waitq; +struct obd_io_group { + spinlock_t oig_lock; + atomic_t oig_refcount; + int oig_pending; + int oig_rc; + struct list_head oig_occ_list; + wait_queue_head_t oig_waitq; }; -/* the osic callback context lets the callee of obd rw methods register +/* the oig callback context lets the callee of obd rw methods register * for callbacks from the caller. */ -struct osic_callback_context { - struct list_head occ_osic_item; +struct oig_callback_context { + struct list_head occ_oig_item; /* called when the caller has received a signal while sleeping. * callees of this method are encouraged to abort their state - * in the osic. This may be called multiple times. */ - void (*occ_interrupted)(struct osic_callback_context *occ); + * in the oig. This may be called multiple times. */ + void (*occ_interrupted)(struct oig_callback_context *occ); }; /* if we find more consumers this could be generalized */ @@ -570,16 +577,16 @@ struct obd_ops { struct lov_oinfo *loi, void *cookie, int cmd, obd_off off, int count, obd_flag brw_flags, obd_flag async_flags); - int (*o_queue_sync_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_sync_io_container *osic, - void *cookie, int cmd, obd_off off, int count, - obd_flag brw_flags); - int (*o_trigger_sync_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_sync_io_container *osic); + int (*o_queue_group_io)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig, + void *cookie, int cmd, obd_off off, int count, + obd_flag brw_flags, obd_flag async_flags); + int (*o_trigger_group_io)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig); int (*o_set_async_flags)(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_oinfo *loi, void *cookie, diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 71790e8..90a521b 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -68,13 +68,13 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next); -void osic_init(struct obd_sync_io_container **osic); -void osic_add_one(struct obd_sync_io_container *osic, - struct osic_callback_context *occ); -void osic_complete_one(struct obd_sync_io_container *osic, - struct osic_callback_context *occ, int rc); -void osic_release(struct obd_sync_io_container *osic); -int osic_wait(struct obd_sync_io_container *osic); +int oig_init(struct obd_io_group **oig); +void oig_add_one(struct obd_io_group *oig, + struct oig_callback_context *occ); +void oig_complete_one(struct obd_io_group *oig, + struct oig_callback_context *occ, int rc); +void oig_release(struct obd_io_group *oig); +int oig_wait(struct obd_io_group *oig); /* config.c */ int class_process_config(struct lustre_cfg *lcfg); @@ -758,37 +758,39 @@ static inline int obd_set_async_flags(struct obd_export *exp, RETURN(rc); } -static inline int obd_queue_sync_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_sync_io_container *osic, - void *cookie, int cmd, obd_off off, - int count, obd_flag brw_flags) +static inline int obd_queue_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig, + void *cookie, int cmd, obd_off off, + int count, obd_flag brw_flags, + obd_flag async_flags) { int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, queue_sync_io, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, queue_sync_io); + OBD_CHECK_OP(exp->exp_obd, queue_group_io, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, queue_group_io); LASSERT(cmd & OBD_BRW_RWMASK); - rc = OBP(exp->exp_obd, queue_sync_io)(exp, lsm, loi, osic, cookie, - cmd, off, count, brw_flags); + rc = OBP(exp->exp_obd, queue_group_io)(exp, lsm, loi, oig, cookie, + cmd, off, count, brw_flags, + async_flags); RETURN(rc); } -static inline int obd_trigger_sync_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_sync_io_container *osic) +static inline int obd_trigger_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig) { int rc; ENTRY; - OBD_CHECK_OP(exp->exp_obd, trigger_sync_io, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, trigger_sync_io); + OBD_CHECK_OP(exp->exp_obd, trigger_group_io, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, trigger_group_io); - rc = OBP(exp->exp_obd, trigger_sync_io)(exp, lsm, loi, osic); + rc = OBP(exp->exp_obd, trigger_group_io)(exp, lsm, loi, oig); RETURN(rc); } diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index c1a7d13..7b232ea 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -80,6 +80,7 @@ extern unsigned int obd_sync_filter; #define OBD_FAIL_MDS_SYNC_PACK 0x125 #define OBD_FAIL_MDS_DONE_WRITING_NET 0x126 #define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 +#define OBD_FAIL_MDS_ALLOC_OBDO 0x128 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -126,6 +127,8 @@ extern unsigned int obd_sync_filter; #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 #define OBD_FAIL_OBD_LOGD_NET 0x602 +#define OBD_FAIL_TGT_REPLY_NET 0x700 + /* preparation for a more advanced failure testbed (not functional yet) */ #define OBD_FAIL_MASK_SYS 0x0000FF00 #define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) diff --git a/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch b/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch index a32f010..2de9932 100644 --- a/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch +++ b/lustre/kernel_patches/patches/2.6.0-test6-mm4.patch @@ -14428,9 +14428,7 @@ switch (MAJOR_OP(regs->iir)) --- linux-2.6.0-test6/arch/parisc/lib/checksum.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/arch/parisc/lib/checksum.c 2003-10-05 00:33:23.000000000 -0700 -@@ -16,8 +16,10 @@ - * - * $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ +@@ -18,6 +18,8 @@ */ -#include +#include @@ -31510,9 +31508,7 @@ #include --- linux-2.6.0-test6/drivers/char/ftape/compressor/zftape-compress.c 2003-06-14 12:18:32.000000000 -0700 +++ 25/drivers/char/ftape/compressor/zftape-compress.c 2003-10-05 00:33:24.000000000 -0700 -@@ -31,6 +31,7 @@ - char zftc_rev[] = "$Revision: 1.5 $"; - char zftc_dat[] = "$Date: 2004/02/23 23:37:02 $"; +@@ -33,4 +33,5 @@ +#include #include @@ -37168,32 +37164,15 @@ --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divamnt.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/divamnt.c 2003-10-05 00:33:24.000000000 -0700 -@@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ -+/* Id: 2.6.0-test6-mm4.patch,v 1.3.2.1 2004/02/14 07:21:32 nic Exp $ - * - * Driver for Eicon DIVA Server ISDN cards. - * Maint module -@@ -24,10 +24,9 @@ +@@ -24,6 +24,5 @@ #include "platform.h" #include "di_defs.h" #include "divasync.h" -#include "di_defs.h" #include "debug_if.h" --static char *main_revision = "$Revision: 1.5 $"; -+static char *main_revision = "$Revision: 1.5 $"; - - static int major; - --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/divasmain.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/divasmain.c 2003-10-05 00:33:24.000000000 -0700 -@@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ -+/* Id: 2.6.0-test6-mm4.patch,v 1.3.2.1 2004/02/14 07:21:32 nic Exp $ - * - * Low level driver for Eicon DIVA Server ISDN cards. - * @@ -9,13 +9,11 @@ * of the GNU General Public License, incorporated herein by reference. */ @@ -37208,39 +37187,8 @@ #include #include #include -@@ -43,7 +41,7 @@ - #include "diva_dma.h" - #include "diva_pci.h" - --static char *main_revision = "$Revision: 1.5 $"; -+static char *main_revision = "$Revision: 1.5 $"; - - static int major; - ---- linux-2.6.0-test6/drivers/isdn/hardware/eicon/dqueue.c 2003-06-14 12:18:22.000000000 -0700 -+++ 25/drivers/isdn/hardware/eicon/dqueue.c 2003-10-05 00:33:24.000000000 -0700 -@@ -1,10 +1,10 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ -+/* Id: 2.6.0-test6-mm4.patch,v 1.3.2.1 2004/02/14 07:21:32 nic Exp $ - * - * Driver for Eicon DIVA Server ISDN cards. - * User Mode IDI Interface - * -- * Copyright 2000,2001 by Armin Schindler (mac@melware.de) -- * Copyright 2000,2001 Cytronics & Melware (info@melware.de) -+ * Copyright 2000-2003 by Armin Schindler (mac@melware.de) -+ * Copyright 2000-2003 Cytronics & Melware (info@melware.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/mntfunc.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/mntfunc.c 2003-10-05 00:33:24.000000000 -0700 -@@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ -+/* Id: 2.6.0-test6-mm4.patch,v 1.4 2004/02/14 03:14:33 rread Exp $ - * - * Driver for Eicon DIVA Server ISDN cards. - * Maint module @@ -14,7 +14,6 @@ #include "platform.h" #include "di_defs.h" @@ -37249,30 +37197,8 @@ #include "debug_if.h" extern char *DRIVERRELEASE_MNT; ---- linux-2.6.0-test6/drivers/isdn/hardware/eicon/os_capi.h 2003-06-14 12:18:25.000000000 -0700 -+++ 25/drivers/isdn/hardware/eicon/os_capi.h 2003-10-05 00:33:24.000000000 -0700 -@@ -1,10 +1,10 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ -+/* Id: 2.6.0-test6-mm4.patch,v 1.3.2.1 2004/02/14 07:21:32 nic Exp $ - * - * ISDN interface module for Eicon active cards DIVA. - * CAPI Interface OS include files - * -- * Copyright 2000-2002 by Armin Schindler (mac@melware.de) -- * Copyright 2000-2002 Cytronics & Melware (info@melware.de) -+ * Copyright 2000-2003 by Armin Schindler (mac@melware.de) -+ * Copyright 2000-2003 Cytronics & Melware (info@melware.de) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. --- linux-2.6.0-test6/drivers/isdn/hardware/eicon/platform.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/isdn/hardware/eicon/platform.h 2003-10-05 00:33:24.000000000 -0700 -@@ -1,4 +1,4 @@ --/* $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ -+/* Id: 2.6.0-test6-mm4.patch,v 1.3.2.1 2004/02/14 07:21:32 nic Exp $ - * - * platform.h - * @@ -19,7 +19,6 @@ #endif @@ -37752,9 +37678,7 @@ /* Motion JPEG chip registers */ --- linux-2.6.0-test6/drivers/media/video/planb.c 2003-09-08 13:58:57.000000000 -0700 +++ 25/drivers/media/video/planb.c 2003-10-05 00:33:24.000000000 -0700 -@@ -27,7 +27,6 @@ - - /* $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ */ +@@ -29,5 +29,4 @@ -#include #include @@ -38068,8 +37992,7 @@ static struct mtd_info *check_cmd_set(struct map_info *, int); --- linux-2.6.0-test6/drivers/mtd/chips/map_rom.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/drivers/mtd/chips/map_rom.c 2003-10-05 00:33:24.000000000 -0700 -@@ -4,7 +4,6 @@ - * $Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ +@@ -5,6 +5,7 @@ */ -#include @@ -42155,15 +42078,6 @@ add_timer((struct timer_list *)&board->modemline_timer); --- linux-2.6.0-test6/drivers/net/wan/dscc4.c 2003-09-27 18:57:45.000000000 -0700 +++ 25/drivers/net/wan/dscc4.c 2003-10-05 00:36:13.000000000 -0700 -@@ -107,7 +107,7 @@ - #include - - /* Version */ --static const char version[] = "$Id: 2.6.0-test6-mm4.patch,v 1.5 2004/02/23 23:37:02 phil Exp $ for Linux\n"; -+static const char version[] = "Id: 2.6.0-test6-mm4.patch,v 1.3.2.1 2004/02/14 07:21:32 nic Exp $ for Linux\n"; - static int debug; - static int quartz; - @@ -592,6 +592,7 @@ static inline int dscc4_xpr_ack(struct d return (i >= 0 ) ? i : -EAGAIN; } diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index d1e2b49..2787619 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -93,8 +93,8 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) sema_init(&cli->cl_sem, 1); cli->cl_conn_count = 0; - memcpy(server_uuid.uuid, lcfg->lcfg_inlbuf2, min(lcfg->lcfg_inllen2, - sizeof(server_uuid))); + memcpy(server_uuid.uuid, lcfg->lcfg_inlbuf2, + min_t(unsigned int, lcfg->lcfg_inllen2, sizeof(server_uuid))); cli->cl_dirty = 0; cli->cl_avail_grant = 0; diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index 043be49..5908ef5 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -114,7 +114,7 @@ struct llu_inode_info { sizeof(struct page) * (x)) struct llu_sysio_cookie { - struct obd_sync_io_container *lsc_osic; + struct obd_io_group *lsc_oig; struct inode *lsc_inode; int lsc_maxpages; int lsc_npages; diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c index c07409e..307dd46 100644 --- a/lustre/liblustre/rw.c +++ b/lustre/liblustre/rw.c @@ -265,18 +265,25 @@ static struct llu_sysio_cookie* get_sysio_cookie(struct inode *inode, int maxpages) { struct llu_sysio_cookie *cookie; + int rc; OBD_ALLOC(cookie, LLU_SYSIO_COOKIE_SIZE(maxpages)); - if (cookie) { - I_REF(inode); - cookie->lsc_inode = inode; - cookie->lsc_maxpages = maxpages; - cookie->lsc_llap = (struct ll_async_page *)(cookie + 1); - cookie->lsc_pages = (struct page *) (cookie->lsc_llap + maxpages); - - osic_init(&cookie->lsc_osic); + if (cookie == NULL) + goto out; + + I_REF(inode); + cookie->lsc_inode = inode; + cookie->lsc_maxpages = maxpages; + cookie->lsc_llap = (struct ll_async_page *)(cookie + 1); + cookie->lsc_pages = (struct page *) (cookie->lsc_llap + maxpages); + + rc = oig_init(&cookie->lsc_oig); + if (rc) { + OBD_FREE(cookie, LLU_SYSIO_COOKIE_SIZE(maxpages)); + cookie = NULL; } +out: return cookie; } @@ -305,7 +312,7 @@ void put_sysio_cookie(struct llu_sysio_cookie *cookie) I_RELE(cookie->lsc_inode); - osic_release(cookie->lsc_osic); + oig_release(cookie->lsc_oig); OBD_FREE(cookie, LLU_SYSIO_COOKIE_SIZE(cookie->lsc_maxpages)); } @@ -463,9 +470,11 @@ int llu_prep_async_io(struct llu_sysio_cookie *cookie, int cmd, llap[i].llap_page = &pages[i]; llap[i].llap_inode = cookie->lsc_inode; - rc = obd_queue_sync_io(exp, lsm, NULL, cookie->lsc_osic, - llap[i].llap_cookie, cmd, - pages[i]._offset, pages[i]._count, 0); + rc = obd_queue_group_io(exp, lsm, NULL, cookie->lsc_oig, + llap[i].llap_cookie, cmd, + pages[i]._offset, pages[i]._count, 0, + ASYNC_READY | ASYNC_URGENT | + ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC); if (rc) RETURN(rc); @@ -481,7 +490,7 @@ int llu_start_async_io(struct llu_sysio_cookie *cookie) struct lov_stripe_md *lsm = llu_i2info(cookie->lsc_inode)->lli_smd; struct obd_export *exp = llu_i2obdexp(cookie->lsc_inode); - return obd_trigger_sync_io(exp, lsm, NULL, cookie->lsc_osic); + return obd_trigger_group_io(exp, lsm, NULL, cookie->lsc_oig); } /* @@ -509,7 +518,7 @@ llu_rw(int cmd, struct inode *inode, char *buf, size_t count, loff_t pos) GOTO(out_cleanup, rc); /* - rc = osic_wait(&osic); + rc = oig_wait(&oig); if (rc) { CERROR("file i/o error!\n"); rw_count = rc; @@ -730,7 +739,7 @@ int llu_iop_iodone(struct ioctx *ioctxp) for (i = 0; i < lsca->ncookies; i++) { cookie = lsca->cookies[i]; if (cookie) { - err = osic_wait(cookie->lsc_osic); + err = oig_wait(cookie->lsc_oig); if (err && !rc) rc = err; if (!rc) diff --git a/lustre/llite/file.c b/lustre/llite/file.c index b98a066..bac31cb 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -154,7 +154,7 @@ static int ll_local_open(struct file *file, struct lookup_intent *it) memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC; file->private_data = fd; - ll_readahead_init(&fd->fd_ras); + ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras); lli->lli_io_epoch = body->io_epoch; @@ -760,6 +760,11 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, if (count == 0) RETURN(0); + /* If file was opened for LL_IOC_LOV_SETSTRIPE but the ioctl wasn't + * called on the file, don't fail the below assertion (bug 2388). */ + if (file->f_flags & O_LOV_DELAY_CREATE && lsm == NULL) + RETURN(-EBADF); + LASSERT(lsm); if (file->f_flags & O_APPEND) { diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 799dabe..c0ca902 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -21,7 +21,7 @@ struct ll_sb_info { struct obd_uuid ll_mds_uuid; struct obd_uuid ll_mds_peer_uuid; struct lustre_mount_data *ll_lmd; - char *ll_instance; + char *ll_instance; int ll_flags; wait_queue_head_t ll_commitcbd_waitq; @@ -38,7 +38,7 @@ struct ll_sb_info { struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ - spinlock_t ll_pglist_lock; + spinlock_t ll_pglist_lock; unsigned long ll_pglist_gen; struct list_head ll_pglist; }; @@ -65,7 +65,7 @@ extern struct proc_dir_entry *proc_lustre_fs_root; #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) # define hlist_del_init list_del_init -#endif +#endif static inline struct inode *ll_info2i(struct ll_inode_info *lli) { @@ -92,20 +92,16 @@ static inline void ll_i2uctxt(struct ll_uctxt *ctxt, struct inode *i1, ctxt->gid2 = i2->i_gid; else ctxt->gid2 = -1; - } else + } else ctxt->gid2 = 0; } struct it_cb_data { - struct inode *icbd_parent; - struct dentry **icbd_childp; - obd_id hash; + struct inode *icbd_parent; + struct dentry **icbd_childp; + obd_id hash; }; -#define LL_PRIVBITS_READ 0 -#define LL_PRIVBITS__LAST 1 -#define LL_PRIVBITS_MASK ((1 << LL_PRIVBITS__LAST) - 1) - #define LLAP_MAGIC 98764321 struct ll_async_page { @@ -152,7 +148,7 @@ void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa); #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) #define ll_ap_completion ll_ap_completion_24 void ll_ap_completion_24(void *data, int cmd, int rc); -#else +#else #define ll_ap_completion ll_ap_completion_26 void ll_ap_completion_26(void *data, int cmd, int rc); #endif @@ -161,7 +157,7 @@ int ll_sync_page(struct page *page); int ll_readpage(struct file *file, struct page *page); struct ll_async_page *llap_from_cookie(void *cookie); struct ll_async_page *llap_from_page(struct page *page); -void ll_readahead_init(struct ll_readahead_state *ras); +void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); void ll_truncate(struct inode *inode); @@ -221,7 +217,7 @@ void ll_read_inode2(struct inode *inode, void *opaque); int ll_iocontrol(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); void ll_umount_begin(struct super_block *sb); -int ll_prep_inode(struct obd_export *exp, struct inode **inode, +int ll_prep_inode(struct obd_export *exp, struct inode **inode, struct ptlrpc_request *req, int offset, struct super_block *); __u32 get_uuid2int(const char *name, int len); struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len, @@ -267,7 +263,7 @@ static inline __u64 ll_ts2u64(time_t *time) { return *time; } -#endif +#endif /* don't need an addref as the sb_info should be holding one */ static inline struct obd_export *ll_s2obdexp(struct super_block *sb) diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index bf0594c..c9ee1db 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -55,7 +55,7 @@ #endif /* SYNCHRONOUS I/O to object storage for an inode */ -static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, +static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, struct page *page, int flags) { struct ll_inode_info *lli = ll_i2info(inode); @@ -243,13 +243,8 @@ static int ll_ap_make_ready(void *data, int cmd) page = llap->llap_page; - if (cmd == OBD_BRW_READ) { - /* _sync_page beat us to it and is about to call - * _set_async_flags which will fire off rpcs again */ - if (!test_and_clear_bit(LL_PRIVBITS_READ, &page->private)) - RETURN(-EAGAIN); + if (cmd == OBD_BRW_READ) RETURN(0); - } /* we're trying to write, but the page is locked.. come back later */ if (TryLockPage(page)) @@ -327,9 +322,6 @@ static struct obd_async_page_ops ll_async_page_ops = { .ap_completion = ll_ap_completion, }; -#define page_llap(page) \ - ((struct ll_async_page *)((page)->private & ~LL_PRIVBITS_MASK)) - /* XXX have the exp be an argument? */ struct ll_async_page *llap_from_page(struct page *page) { @@ -340,12 +332,12 @@ struct ll_async_page *llap_from_page(struct page *page) int rc; ENTRY; - llap = page_llap(page); + llap = (struct ll_async_page *)page->private; if (llap != NULL) { if (llap->llap_magic != LLAP_MAGIC) RETURN(ERR_PTR(-EINVAL)); RETURN(llap); - } + } exp = ll_i2obdexp(page->mapping->host); if (exp == NULL) @@ -355,8 +347,7 @@ struct ll_async_page *llap_from_page(struct page *page) if (llap == NULL) RETURN(ERR_PTR(-ENOMEM)); llap->llap_magic = LLAP_MAGIC; - rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, - NULL, page, + rc = obd_prep_async_page(exp, ll_i2info(inode)->lli_smd, NULL, page, (obd_off)page->index << PAGE_SHIFT, &ll_async_page_ops, llap, &llap->llap_cookie); if (rc) { @@ -364,9 +355,9 @@ struct ll_async_page *llap_from_page(struct page *page) RETURN(ERR_PTR(rc)); } - CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap, + CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", llap, page, llap->llap_cookie, (obd_off)page->index << PAGE_SHIFT); - /* also zeroing the PRIVBITS low order bitflags */ + /* also zeroing the PRIVBITS low order bitflags */ page->private = (unsigned long)llap; llap->llap_page = page; @@ -423,23 +414,29 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, rc = obd_queue_async_io(exp, lsm, NULL, llap->llap_cookie, OBD_BRW_WRITE, 0, 0, 0, 0); if (rc != 0) { /* async failed, try sync.. */ - struct obd_sync_io_container *osic; - osic_init(&osic); + struct obd_io_group *oig; + rc = oig_init(&oig); + if (rc) + GOTO(out, rc); llap->llap_write_queued = 0; - rc = obd_queue_sync_io(exp, lsm, NULL, osic, - llap->llap_cookie, - OBD_BRW_WRITE, 0, to, 0); + rc = obd_queue_group_io(exp, lsm, NULL, oig, + llap->llap_cookie, + OBD_BRW_WRITE, 0, to, 0, + ASYNC_READY | ASYNC_URGENT | + ASYNC_COUNT_STABLE | + ASYNC_GROUP_SYNC); + if (rc) - GOTO(free_osic, rc); + GOTO(free_oig, rc); - rc = obd_trigger_sync_io(exp, lsm, NULL, osic); + rc = obd_trigger_group_io(exp, lsm, NULL, oig); if (rc) - GOTO(free_osic, rc); + GOTO(free_oig, rc); - rc = osic_wait(osic); -free_osic: - osic_release(osic); + rc = oig_wait(oig); +free_oig: + oig_release(oig); GOTO(out, rc); } LL_CDEBUG_PAGE(page, "write queued\n"); @@ -449,7 +446,7 @@ free_osic: LPROC_LL_DIRTY_HITS); } - /* put the page in the page cache, from now on ll_removepage is + /* put the page in the page cache, from now on ll_removepage is * responsible for cleaning up the llap */ set_page_dirty(page); @@ -490,22 +487,21 @@ void ll_removepage(struct page *page) exp = ll_i2obdexp(inode); if (exp == NULL) { - CERROR("page %p ind %lu gave null export\n", page, - page->index); + CERROR("page %p ind %lu gave null export\n", page, page->index); EXIT; return; } llap = llap_from_page(page); if (IS_ERR(llap)) { - CERROR("page %p ind %lu couldn't find llap: %ld\n", page, + CERROR("page %p ind %lu couldn't find llap: %ld\n", page, page->index, PTR_ERR(llap)); EXIT; return; } //llap_write_complete(inode, llap); - rc = obd_teardown_async_page(exp, ll_i2info(inode)->lli_smd, NULL, + rc = obd_teardown_async_page(exp, ll_i2info(inode)->lli_smd, NULL, llap->llap_cookie); if (rc != 0) CERROR("page %p ind %lu failed: %d\n", page, page->index, rc); @@ -535,46 +531,45 @@ static int ll_page_matches(struct page *page) page_extent.l_extent.end = page_extent.l_extent.start + PAGE_CACHE_SIZE - 1; flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED; - matches = obd_match(ll_i2sbi(inode)->ll_osc_exp, - ll_i2info(inode)->lli_smd, LDLM_EXTENT, + matches = obd_match(ll_i2sbi(inode)->ll_osc_exp, + ll_i2info(inode)->lli_smd, LDLM_EXTENT, &page_extent, LCK_PR, &flags, inode, &match_lockh); if (matches < 0) { LL_CDEBUG_PAGE(page, "lock match failed\n"); RETURN(matches); - } + } if (matches) { - obd_cancel(ll_i2sbi(inode)->ll_osc_exp, + obd_cancel(ll_i2sbi(inode)->ll_osc_exp, ll_i2info(inode)->lli_smd, LCK_PR, &match_lockh); } RETURN(matches); } static int ll_issue_page_read(struct obd_export *exp, - struct ll_async_page *llap, int defer_uptodate) + struct ll_async_page *llap, + struct obd_io_group *oig, int defer) { struct page *page = llap->llap_page; int rc; - /* we don't issue this page as URGENT so that it can be batched - * with other pages by the kernel's read-ahead. We have a strong - * requirement that readpage() callers must call wait_on_page() - * or lock_page() to get into ->sync_page() to trigger the IO */ - llap->llap_defer_uptodate = defer_uptodate; page_cache_get(page); - set_bit(LL_PRIVBITS_READ, &page->private); /* see ll_sync_page() */ - rc = obd_queue_async_io(exp, ll_i2info(page->mapping->host)->lli_smd, - NULL, llap->llap_cookie, OBD_BRW_READ, 0, + llap->llap_defer_uptodate = defer; + rc = obd_queue_group_io(exp, ll_i2info(page->mapping->host)->lli_smd, + NULL, oig, llap->llap_cookie, OBD_BRW_READ, 0, PAGE_SIZE, 0, ASYNC_COUNT_STABLE); if (rc) { LL_CDEBUG_PAGE(page, "read queueing failed\n"); - clear_bit(LL_PRIVBITS_READ, &page->private); page_cache_release(page); } RETURN(rc); } -static void ll_readahead(struct ll_readahead_state *ras, - struct obd_export *exp, struct address_space *mapping) +#define LL_RA_MIN(inode) ((unsigned long)PTL_MD_MAX_PAGES / 2) +#define LL_RA_MAX(inode) (inode->i_blksize * 3) + +static void ll_readahead(struct ll_readahead_state *ras, + struct obd_export *exp, struct address_space *mapping, + struct obd_io_group *oig) { unsigned long i, start, end; struct ll_async_page *llap; @@ -602,7 +597,7 @@ static void ll_readahead(struct ll_readahead_state *ras, CDEBUG(D_READA, "ni %lu last %lu win %lu: reading from %lu to %lu\n", ras->ras_next_index, ras->ras_last, ras->ras_window, - start, end); + start, end); spin_unlock(&ras->ras_lock); @@ -615,8 +610,8 @@ static void ll_readahead(struct ll_readahead_state *ras, * truncating the page (page->mapping == NULL) */ page = grab_cache_page_nowait(mapping, i); if (page == NULL) - continue; - + break; + /* the book-keeping above promises that we've tried * all the indices from start to end, so we don't * stop if anyone returns an error. This may not be good. */ @@ -627,7 +622,7 @@ static void ll_readahead(struct ll_readahead_state *ras, if (IS_ERR(llap) || llap->llap_defer_uptodate) goto next_page; - rc = ll_issue_page_read(exp, llap, 1); + rc = ll_issue_page_read(exp, llap, oig, 1); if (rc == 0) LL_CDEBUG_PAGE(page, "started read-ahead\n"); if (rc) { @@ -640,18 +635,15 @@ static void ll_readahead(struct ll_readahead_state *ras, } } -/* XXX this should really bubble up somehow. */ -#define LL_RA_MIN ((unsigned long)PTL_MD_MAX_PAGES / 2) -#define LL_RA_MAX ((unsigned long)(32 * PTL_MD_MAX_PAGES)) - /* called with the ras_lock held or from places where it doesn't matter */ -static void ll_readahead_set(struct ll_readahead_state *ras, +static void ll_readahead_set(struct inode *inode, + struct ll_readahead_state *ras, unsigned long index) { ras->ras_next_index = index; if (ras->ras_next_index != ~0UL) ras->ras_next_index++; - ras->ras_window = LL_RA_MIN; + ras->ras_window = LL_RA_MIN(inode); ras->ras_last = ras->ras_next_index + ras->ras_window; if (ras->ras_last < ras->ras_next_index) ras->ras_last = ~0UL; @@ -660,63 +652,64 @@ static void ll_readahead_set(struct ll_readahead_state *ras, index); } -void ll_readahead_init(struct ll_readahead_state *ras) +void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras) { spin_lock_init(&ras->ras_lock); - ll_readahead_set(ras, 0); + ll_readahead_set(inode, ras, 0); } -static void ll_readahead_update(struct ll_readahead_state *ras, +static void ll_readahead_update(struct inode *inode, + struct ll_readahead_state *ras, unsigned long index, int hit) { unsigned long issued_start, new_last; spin_lock(&ras->ras_lock); - /* we're interested in noticing the index's relation to the + /* we're interested in noticing the index's relation to the * previously issued read-ahead pages */ issued_start = ras->ras_next_index - ras->ras_window - 1; if (issued_start > ras->ras_next_index) issued_start = 0; - CDEBUG(D_READA, "ni %lu last %lu win %lu: %s ind %lu start %lu\n", + CDEBUG(D_READA, "ni %lu last %lu win %lu: %s ind %lu start %lu\n", ras->ras_next_index, ras->ras_last, ras->ras_window, hit ? "hit" : "miss", index, issued_start); - if (!hit && + if (!hit && index == ras->ras_next_index && index == ras->ras_last + 1) { /* special case the kernel's read-ahead running into the * page just beyond our read-ahead window as an extension * of our read-ahead. sigh. wishing it was easier to * turn off 2.4's read-ahead. */ - ras->ras_window = min(LL_RA_MAX, ras->ras_window + 1); + ras->ras_window = min(LL_RA_MAX(inode), ras->ras_window + 1); if (index != ~0UL) ras->ras_next_index = index + 1; ras->ras_last = index; - } else if (!hit && + } else if (!hit && (index > issued_start || ras->ras_next_index >= index)) { /* deal with a miss way out of the window. we interpret * this as a seek and restart the window */ - ll_readahead_set(ras, index); + ll_readahead_set(inode, ras, index); - } else if (!hit && + } else if (!hit && issued_start <= index && index < ras->ras_next_index) { /* a miss inside the window? surely its memory pressure * evicting our read pages before the app can see them. * we shrink the window aggressively */ unsigned long old_window = ras->ras_window; - ras->ras_window = max(ras->ras_window / 2, LL_RA_MIN); + ras->ras_window = max(ras->ras_window / 2, LL_RA_MIN(inode)); ras->ras_last -= old_window - ras->ras_window; if (ras->ras_next_index > ras->ras_last) ras->ras_next_index = ras->ras_last + 1; CDEBUG(D_READA, "ni %lu last %lu win %lu: miss inside\n", ras->ras_next_index, ras->ras_last, ras->ras_window); - } else if (hit && + } else if (hit && issued_start <= index && index < ras->ras_next_index) { - /* a hit inside the window. grow the window by twice the + /* a hit inside the window. grow the window by twice the * number of pages that are satisified within the window. */ - ras->ras_window = min(LL_RA_MAX, ras->ras_window + 2); + ras->ras_window = min(LL_RA_MAX(inode), ras->ras_window + 2); /* we want the next readahead pass to issue a windows worth * beyond where the app currently is */ @@ -745,8 +738,9 @@ int ll_readpage(struct file *filp, struct page *page) struct ll_file_data *fd = filp->private_data; struct inode *inode = page->mapping->host; struct obd_export *exp; - int rc; struct ll_async_page *llap; + struct obd_io_group *oig = NULL; + int rc; ENTRY; LASSERT(PageLocked(page)); @@ -756,6 +750,10 @@ int ll_readpage(struct file *filp, struct page *page) (((obd_off)page->index) << PAGE_SHIFT)); LASSERT(atomic_read(&filp->f_dentry->d_inode->i_count) > 0); + rc = oig_init(&oig); + if (rc < 0) + GOTO(out, rc); + exp = ll_i2obdexp(inode); if (exp == NULL) GOTO(out, rc = -EINVAL); @@ -765,15 +763,17 @@ int ll_readpage(struct file *filp, struct page *page) GOTO(out, rc = PTR_ERR(llap)); if (llap->llap_defer_uptodate) { - ll_readahead_update(&fd->fd_ras, page->index, 1); + ll_readahead_update(inode, &fd->fd_ras, page->index, 1); + ll_readahead(&fd->fd_ras, exp, page->mapping, oig); + obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, + oig); LL_CDEBUG_PAGE(page, "marking uptodate from defer\n"); SetPageUptodate(page); - ll_readahead(&fd->fd_ras, exp, page->mapping); unlock_page(page); - RETURN(0); + GOTO(out_oig, rc = 0); } - ll_readahead_update(&fd->fd_ras, page->index, 0); + ll_readahead_update(inode, &fd->fd_ras, page->index, 0); rc = ll_page_matches(page); if (rc < 0) @@ -789,18 +789,26 @@ int ll_readpage(struct file *filp, struct page *page) } } - rc = ll_issue_page_read(exp, llap, 0); - if (rc == 0) { - LL_CDEBUG_PAGE(page, "queued readpage\n"); - if ((ll_i2sbi(inode)->ll_flags & LL_SBI_READAHEAD)) - ll_readahead(&fd->fd_ras, exp, page->mapping); - } + rc = ll_issue_page_read(exp, llap, oig, 0); + if (rc) + GOTO(out, rc); + + LL_CDEBUG_PAGE(page, "queued readpage\n"); + if ((ll_i2sbi(inode)->ll_flags & LL_SBI_READAHEAD)) + ll_readahead(&fd->fd_ras, exp, page->mapping, oig); + + rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig); + out: - if (rc) + if (rc) unlock_page(page); +out_oig: + if (oig != NULL) + oig_release(oig); RETURN(rc); } +#if 0 /* this is for read pages. we issue them as ready but not urgent. when * someone waits on them we fire them off, hopefully merged with adjacent * reads that were queued by read-ahead. */ @@ -821,15 +829,16 @@ int ll_sync_page(struct page *page) exp = ll_i2obdexp(page->mapping->host); if (exp == NULL) RETURN(-EINVAL); - + llap = llap_from_page(page); if (IS_ERR(llap)) RETURN(PTR_ERR(llap)); LL_CDEBUG_PAGE(page, "setting ready|urgent\n"); - rc = obd_set_async_flags(exp, ll_i2info(page->mapping->host)->lli_smd, - NULL, llap->llap_cookie, + rc = obd_set_async_flags(exp, ll_i2info(page->mapping->host)->lli_smd, + NULL, llap->llap_cookie, ASYNC_READY|ASYNC_URGENT); return rc; } +#endif diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c index cda014e..23be231 100644 --- a/lustre/llite/rw24.c +++ b/lustre/llite/rw24.c @@ -221,6 +221,6 @@ struct address_space_operations ll_aops = { prepare_write: ll_prepare_write, commit_write: ll_commit_write, removepage: ll_removepage, - sync_page: ll_sync_page, + sync_page: NULL, bmap: NULL }; diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 47a447e..e9c8c12 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1918,12 +1918,12 @@ static int lov_set_async_flags(struct obd_export *exp, RETURN(rc); } -static int lov_queue_sync_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_sync_io_container *osic, void *cookie, - int cmd, obd_off off, int count, - obd_flag brw_flags) +static int lov_queue_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig, void *cookie, + int cmd, obd_off off, int count, + obd_flag brw_flags, obd_flag async_flags) { struct lov_obd *lov = &exp->exp_obd->u.lov; struct lov_async_page *lap; @@ -1939,19 +1939,19 @@ static int lov_queue_sync_io(struct obd_export *exp, RETURN(PTR_ERR(lap)); loi = &lsm->lsm_oinfo[lap->lap_stripe]; - rc = obd_queue_sync_io(lov->tgts[loi->loi_ost_idx].ltd_exp, lsm, loi, - osic, lap->lap_sub_cookie, cmd, off, count, - brw_flags); + rc = obd_queue_group_io(lov->tgts[loi->loi_ost_idx].ltd_exp, lsm, loi, + oig, lap->lap_sub_cookie, cmd, off, count, + brw_flags, async_flags); RETURN(rc); } /* this isn't exactly optimal. we may have queued sync io in oscs on * all stripes, but we don't record that fact at queue time. so we * trigger sync io on all stripes. */ -static int lov_trigger_sync_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_sync_io_container *osic) +static int lov_trigger_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig) { struct lov_obd *lov = &exp->exp_obd->u.lov; int rc = 0, i, err; @@ -1963,8 +1963,8 @@ static int lov_trigger_sync_io(struct obd_export *exp, for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++) { - err = obd_trigger_sync_io(lov->tgts[loi->loi_ost_idx].ltd_exp, - lsm, loi, osic); + err = obd_trigger_group_io(lov->tgts[loi->loi_ost_idx].ltd_exp, + lsm, loi, oig); if (rc == 0 && err != 0) rc = err; }; @@ -2826,8 +2826,8 @@ struct obd_ops lov_obd_ops = { .o_prep_async_page = lov_prep_async_page, .o_queue_async_io = lov_queue_async_io, .o_set_async_flags = lov_set_async_flags, - .o_queue_sync_io = lov_queue_sync_io, - .o_trigger_sync_io = lov_trigger_sync_io, + .o_queue_group_io = lov_queue_group_io, + .o_trigger_group_io = lov_trigger_group_io, .o_teardown_async_page lov_teardown_async_page, o_punch: lov_punch, o_sync: lov_sync, diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index ef04e68..6a4ac6b 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -573,10 +573,14 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm, LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lmmk->lmm_objects[0])); /* User wasn't expecting this many OST entries */ - if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) + if (lum.lmm_stripe_count == 0) { + if (copy_to_user(lump, lmmk, sizeof(lum))) + rc = -EFAULT; + } else if (lum.lmm_stripe_count < lmmk->lmm_stripe_count) { rc = -EOVERFLOW; - else if (copy_to_user(lump, lmmk, lmm_size)) + } else if (copy_to_user(lump, lmmk, lmm_size)) { rc = -EFAULT; + } obd_free_diskmd(exp, &lmmk); diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 2855b52..7e34fce 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -193,9 +193,18 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode) GOTO(out_up, dchild); if (dchild->d_inode) { - if (!S_ISREG(dchild->d_inode->i_mode)) + int old_mode = dchild->d_inode->i_mode; + if (!S_ISREG(old_mode)) GOTO(out_err, err = -EEXIST); + /* Fixup file permissions if necessary */ + if ((old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { + CWARN("fixing permissions on %s from %o to %o\n", + name, old_mode, mode); + dchild->d_inode->i_mode = (mode & S_IALLUGO) | + (old_mode & ~S_IALLUGO); + mark_inode_dirty(dchild->d_inode); + } GOTO(out_up, dchild); } @@ -228,9 +237,18 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode) GOTO(out_up, dchild); if (dchild->d_inode) { - if (!S_ISDIR(dchild->d_inode->i_mode)) + int old_mode = dchild->d_inode->i_mode; + if (!S_ISDIR(old_mode)) GOTO(out_err, err = -ENOTDIR); + /* Fixup directory permissions if necessary */ + if ((old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { + CWARN("fixing permissions on %s from %o to %o\n", + name, old_mode, mode); + dchild->d_inode->i_mode = (mode & S_IALLUGO) | + (old_mode & ~S_IALLUGO); + mark_inode_dirty(dchild->d_inode); + } GOTO(out_up, dchild); } diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index f102439..e6701e2 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -333,6 +333,14 @@ int mdc_enqueue(struct obd_export *exp, it->d.lustre.it_lock_mode = lock_mode; it->d.lustre.it_data = req; + if (it->d.lustre.it_status < 0 && req->rq_replay) { + LASSERT(req->rq_transno == 0); + /* Don't hold error requests for replay. */ + spin_lock(&req->rq_lock); + req->rq_replay = 0; + spin_unlock(&req->rq_lock); + } + /* We know what to expect, so we do any byte flipping required here */ LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1); if (reply_buffers >= 3) { @@ -399,8 +407,8 @@ EXPORT_SYMBOL(mdc_enqueue); int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, struct ll_fid *pfid, const char *name, int len, void *lmm, int lmmsize, - struct ll_fid *cfid, struct lookup_intent *it, int flags, - struct ptlrpc_request **reqp, + struct ll_fid *cfid, struct lookup_intent *it, + int lookup_flags, struct ptlrpc_request **reqp, ldlm_blocking_callback cb_blocking) { struct lustre_handle lockh; @@ -413,28 +421,28 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, LASSERT(it); CDEBUG(D_DLMTRACE, "name: %*s in %ld, intent: %s\n", len, name, - (unsigned long) pfid->id, ldlm_it2str(it->it_op)); + (unsigned long)pfid->id, ldlm_it2str(it->it_op)); if (cfid && (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)) { /* We could just return 1 immediately, but since we should only * be called in revalidate_it if we already have a lock, let's * verify that. */ - struct ldlm_res_id res_id ={.name = {cfid->id, + struct ldlm_res_id res_id ={.name = {cfid->id, cfid->generation}}; struct lustre_handle lockh; - int mode, flags = LDLM_FL_BLOCK_GRANTED; + int mode = LCK_PR; - mode = LCK_PR; - rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags, - &res_id, LDLM_PLAIN, NULL, LCK_PR, &lockh); + rc = ldlm_lock_match(exp->exp_obd->obd_namespace, + LDLM_FL_BLOCK_GRANTED, &res_id, + LDLM_PLAIN, NULL, LCK_PR, &lockh); if (!rc) { mode = LCK_PW; - rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags, - &res_id, LDLM_PLAIN, NULL, LCK_PW, - &lockh); + rc = ldlm_lock_match(exp->exp_obd->obd_namespace, + LDLM_FL_BLOCK_GRANTED, &res_id, + LDLM_PLAIN, NULL, LCK_PW, &lockh); } if (rc) { - memcpy(&it->d.lustre.it_lock_handle, &lockh, + memcpy(&it->d.lustre.it_lock_handle, &lockh, sizeof(lockh)); it->d.lustre.it_lock_mode = mode; } @@ -492,11 +500,11 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, if (it->it_op & IT_OPEN) { if (!it_disposition(it, DISP_OPEN_OPEN) || it->d.lustre.it_status != 0) { - unsigned long flags; + unsigned long irqflags; - spin_lock_irqsave(&request->rq_lock, flags); + spin_lock_irqsave(&request->rq_lock, irqflags); request->rq_replay = 0; - spin_unlock_irqrestore(&request->rq_lock, flags); + spin_unlock_irqrestore(&request->rq_lock, irqflags); } } diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 5d8e2e2..fe28761 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -79,7 +79,8 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, if (!pages) GOTO(out, rc = -ENOMEM); - desc = ptlrpc_prep_bulk_exp (req, 1, BULK_PUT_SOURCE, MDS_BULK_PORTAL); + desc = ptlrpc_prep_bulk_exp(req, npages, BULK_PUT_SOURCE, + MDS_BULK_PORTAL); if (desc == NULL) GOTO(out_free, rc = -ENOMEM); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 9cd8bbc..97deb7d 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -493,10 +493,9 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, RETURN(0); } -int mds_notify(struct obd_device *obd, struct obd_device *watched, - int active) +int mds_notify(struct obd_device *obd, struct obd_device *watched, int active) { - struct obd_uuid *uuid; + struct obd_uuid *uuid; int rc = 0; if (!active) @@ -504,16 +503,15 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, if (strcmp(watched->obd_type->typ_name, "osc")) { CERROR("unexpected notification of %s %s!\n", - watched->obd_type->typ_name, - watched->obd_name); + watched->obd_type->typ_name, watched->obd_name); RETURN(-EINVAL); } + uuid = &watched->u.cli.cl_import->imp_target_uuid; if (obd->obd_recovering) { CWARN("MDS %s: in recovery, not resetting orphans on %s\n", obd->obd_name, uuid->uuid); } else { - uuid = &watched->u.cli.cl_import->imp_target_uuid; CWARN("MDS %s: %s now active, resetting orphans\n", obd->obd_name, uuid->uuid); rc = mds_lov_clearorphans(&obd->u.mds, uuid); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 80c89e5..8d49420 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -339,7 +339,8 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, } /* replay case */ - if (rec->ur_fid2->id) { + if(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { + LASSERT (rec->ur_fid2->id); body->valid |= OBD_MD_FLBLKSZ | OBD_MD_FLEASIZE; lmm_size = rec->ur_eadatalen; lmm = rec->ur_eadata; @@ -358,6 +359,10 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, RETURN(0); } + + if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_ALLOC_OBDO)) + GOTO(out_ids, rc = -ENOMEM); + oa = obdo_alloc(); if (oa == NULL) GOTO(out_ids, rc = -ENOMEM); @@ -784,15 +789,23 @@ int mds_open(struct mds_update_record *rec, int offset, /* Step 0: If we are passed a fid, then we assume the client already * opened this file and is only replaying the RPC, so we open the * inode by fid (at some large expense in security). */ - if (rec->ur_fid2->id) { + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) { + DEBUG_REQ(D_HA, req, "open replay, disp: "LPX64"\n", + rep->lock_policy_res1); + + LASSERT(rec->ur_fid2->id); + rc = mds_open_by_fid(req, rec->ur_fid2, body, rec->ur_flags, rec, rep); - if (rc != -ENOENT) + if (rc != -ENOENT) RETURN(rc); /* We didn't find the correct inode on disk either, so we * need to re-create it via a regular replay. */ LASSERT(rec->ur_flags & MDS_OPEN_CREAT); + } else { + LASSERT(!rec->ur_fid2->id); } + LASSERT(offset == 2); /* If we got here, we must be called via intent */ med = &req->rq_export->exp_mds_data; @@ -938,6 +951,16 @@ int mds_open(struct mds_update_record *rec, int offset, if ((rec->ur_flags & MDS_OPEN_DIRECTORY) && !S_ISDIR(dchild->d_inode->i_mode)) GOTO(cleanup, rc = -ENOTDIR); + + if (S_ISDIR(dchild->d_inode->i_mode)) { + if (rec->ur_flags & MDS_OPEN_CREAT || rec->ur_flags & FMODE_WRITE) { + /*we are tryying to create or write a exist dir*/ + GOTO(cleanup, rc = -EISDIR); + } + if (ll_permission(dchild->d_inode, acc_mode, NULL)) { + GOTO(cleanup, rc = -EACCES); + } + } /* Step 5: mds_open it */ rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle, rec, diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 3227f34..40c49f3 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -129,7 +129,9 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, off = med->med_off; transno = req->rq_reqmsg->transno; - if (transno == 0) { + if (rc != 0) { + LASSERT(transno == 0); + } else if (transno == 0) { spin_lock(&mds->mds_transno_lock); transno = ++mds->mds_last_transno; spin_unlock(&mds->mds_transno_lock); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index bc171b2..fdbfb91 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -408,11 +408,11 @@ EXPORT_SYMBOL(class_conn2cliimp); EXPORT_SYMBOL(class_disconnect); EXPORT_SYMBOL(class_disconnect_exports); -EXPORT_SYMBOL(osic_init); -EXPORT_SYMBOL(osic_release); -EXPORT_SYMBOL(osic_add_one); -EXPORT_SYMBOL(osic_wait); -EXPORT_SYMBOL(osic_complete_one); +EXPORT_SYMBOL(oig_init); +EXPORT_SYMBOL(oig_release); +EXPORT_SYMBOL(oig_add_one); +EXPORT_SYMBOL(oig_wait); +EXPORT_SYMBOL(oig_complete_one); /* uuid.c */ EXPORT_SYMBOL(class_uuid_unparse); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index d3b86bf..9ee9c4d 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -654,118 +654,125 @@ void class_disconnect_exports(struct obd_device *obd, int flags) EXIT; } -void osic_init(struct obd_sync_io_container **osic_out) -{ - struct obd_sync_io_container *osic; - OBD_ALLOC(osic, sizeof(*osic)); - spin_lock_init(&osic->osic_lock); - osic->osic_rc = 0; - osic->osic_pending = 0; - atomic_set(&osic->osic_refcount, 1); - init_waitqueue_head(&osic->osic_waitq); - INIT_LIST_HEAD(&osic->osic_occ_list); - *osic_out = osic; +int oig_init(struct obd_io_group **oig_out) +{ + struct obd_io_group *oig; + ENTRY; + + OBD_ALLOC(oig, sizeof(*oig)); + if (oig == NULL) + RETURN(-ENOMEM); + + spin_lock_init(&oig->oig_lock); + oig->oig_rc = 0; + oig->oig_pending = 0; + atomic_set(&oig->oig_refcount, 1); + init_waitqueue_head(&oig->oig_waitq); + INIT_LIST_HEAD(&oig->oig_occ_list); + + *oig_out = oig; + RETURN(0); }; -static inline void osic_grab(struct obd_sync_io_container *osic) +static inline void oig_grab(struct obd_io_group *oig) { - atomic_inc(&osic->osic_refcount); + atomic_inc(&oig->oig_refcount); } -void osic_release(struct obd_sync_io_container *osic) +void oig_release(struct obd_io_group *oig) { - if (atomic_dec_and_test(&osic->osic_refcount)) - OBD_FREE(osic, sizeof(*osic)); + if (atomic_dec_and_test(&oig->oig_refcount)) + OBD_FREE(oig, sizeof(*oig)); } -void osic_add_one(struct obd_sync_io_container *osic, - struct osic_callback_context *occ) +void oig_add_one(struct obd_io_group *oig, + struct oig_callback_context *occ) { unsigned long flags; - CDEBUG(D_CACHE, "osic %p ready to roll\n", osic); - spin_lock_irqsave(&osic->osic_lock, flags); - osic->osic_pending++; + CDEBUG(D_CACHE, "oig %p ready to roll\n", oig); + spin_lock_irqsave(&oig->oig_lock, flags); + oig->oig_pending++; if (occ != NULL) - list_add_tail(&occ->occ_osic_item, &osic->osic_occ_list); - spin_unlock_irqrestore(&osic->osic_lock, flags); - osic_grab(osic); + list_add_tail(&occ->occ_oig_item, &oig->oig_occ_list); + spin_unlock_irqrestore(&oig->oig_lock, flags); + oig_grab(oig); } -void osic_complete_one(struct obd_sync_io_container *osic, - struct osic_callback_context *occ, int rc) +void oig_complete_one(struct obd_io_group *oig, + struct oig_callback_context *occ, int rc) { unsigned long flags; wait_queue_head_t *wake = NULL; int old_rc; - spin_lock_irqsave(&osic->osic_lock, flags); + spin_lock_irqsave(&oig->oig_lock, flags); if (occ != NULL) - list_del_init(&occ->occ_osic_item); + list_del_init(&occ->occ_oig_item); - old_rc = osic->osic_rc; - if (osic->osic_rc == 0 && rc != 0) - osic->osic_rc = rc; + old_rc = oig->oig_rc; + if (oig->oig_rc == 0 && rc != 0) + oig->oig_rc = rc; - if (--osic->osic_pending <= 0) - wake = &osic->osic_waitq; + if (--oig->oig_pending <= 0) + wake = &oig->oig_waitq; - spin_unlock_irqrestore(&osic->osic_lock, flags); + spin_unlock_irqrestore(&oig->oig_lock, flags); - CDEBUG(D_CACHE, "osic %p completed, rc %d -> %d via %d, %d now " - "pending (racey)\n", osic, old_rc, osic->osic_rc, rc, - osic->osic_pending); + CDEBUG(D_CACHE, "oig %p completed, rc %d -> %d via %d, %d now " + "pending (racey)\n", oig, old_rc, oig->oig_rc, rc, + oig->oig_pending); if (wake) wake_up(wake); - osic_release(osic); + oig_release(oig); } -static int osic_done(struct obd_sync_io_container *osic) +static int oig_done(struct obd_io_group *oig) { unsigned long flags; int rc = 0; - spin_lock_irqsave(&osic->osic_lock, flags); - if (osic->osic_pending <= 0) + spin_lock_irqsave(&oig->oig_lock, flags); + if (oig->oig_pending <= 0) rc = 1; - spin_unlock_irqrestore(&osic->osic_lock, flags); + spin_unlock_irqrestore(&oig->oig_lock, flags); return rc; } -static void interrupted_osic(void *data) +static void interrupted_oig(void *data) { - struct obd_sync_io_container *osic = data; + struct obd_io_group *oig = data; struct list_head *pos; - struct osic_callback_context *occ; + struct oig_callback_context *occ; unsigned long flags; - spin_lock_irqsave(&osic->osic_lock, flags); - list_for_each(pos, &osic->osic_occ_list) { - occ = list_entry(pos, struct osic_callback_context, - occ_osic_item); + spin_lock_irqsave(&oig->oig_lock, flags); + list_for_each(pos, &oig->oig_occ_list) { + occ = list_entry(pos, struct oig_callback_context, + occ_oig_item); occ->occ_interrupted(occ); } - spin_unlock_irqrestore(&osic->osic_lock, flags); + spin_unlock_irqrestore(&oig->oig_lock, flags); } -int osic_wait(struct obd_sync_io_container *osic) +int oig_wait(struct obd_io_group *oig) { - struct l_wait_info lwi = LWI_INTR(interrupted_osic, osic); + struct l_wait_info lwi = LWI_INTR(interrupted_oig, oig); int rc; - CDEBUG(D_CACHE, "waiting for osic %p\n", osic); + CDEBUG(D_CACHE, "waiting for oig %p\n", oig); do { - rc = l_wait_event(osic->osic_waitq, osic_done(osic), &lwi); + rc = l_wait_event(oig->oig_waitq, oig_done(oig), &lwi); LASSERTF(rc == 0 || rc == -EINTR, "rc: %d\n", rc); - /* we can't continue until the osic has emptied and stopped + /* we can't continue until the oig has emptied and stopped * referencing state that the caller will free upon return */ if (rc == -EINTR) lwi = (struct l_wait_info){ 0, }; } while (rc == -EINTR); - LASSERTF(osic->osic_pending == 0, - "exiting osic_wait(osic = %p) with %d pending\n", osic, - osic->osic_pending); + LASSERTF(oig->oig_pending == 0, + "exiting oig_wait(oig = %p) with %d pending\n", oig, + oig->oig_pending); - CDEBUG(D_CACHE, "done waiting on osic %p rc %d\n", osic, osic->osic_rc); - return osic->osic_rc; + CDEBUG(D_CACHE, "done waiting on oig %p rc %d\n", oig, oig->oig_rc); + return oig->oig_rc; } diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 41af093..54a1d7b 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -149,8 +149,8 @@ void lprocfs_remove(struct proc_dir_entry *root) /* Memory corruption once caused this to fail, and without this LASSERT we would loop here forever. */ LASSERTF(strlen(rm_entry->name) == rm_entry->namelen, - "0x%p %s/%s len %d\n", rm_entry, - temp->name, rm_entry->name, strlen(rm_entry->name)); + "0x%p %s/%s len %d\n", rm_entry, temp->name, + rm_entry->name, (int)strlen(rm_entry->name)); remove_proc_entry(rm_entry->name, rm_entry->parent); if (temp == parent) @@ -624,8 +624,8 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, prep_async_page); LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_async_io); LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_async_flags); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_sync_io); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_sync_io); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io); LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page); LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch); LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 1cc0a6e..d3785d4 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1708,6 +1708,12 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa, (oa->o_flags & OBD_FL_DELORPHAN)) { if (diff >= 0) RETURN(diff); + if (-diff > 10000) { /* XXX make this smarter */ + CERROR("ignoring bogus orphan destroy request: obdid " + LPU64" last_id "LPU64"\n", + oa->o_id, filter_last_id(filter, oa)); + RETURN(-EINVAL); + } filter_destroy_precreated(exp, oa, filter); rc = filter_update_last_objid(obd, group, 0); if (rc) @@ -1723,7 +1729,6 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa, LASSERT(diff >= 0); RETURN(diff); } - } /* We rely on the fact that only one thread will be creating files in a given @@ -1874,8 +1879,14 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_RECREATE_OBJS)) { - diff = 1; - rc = filter_precreate(obd, oa, group, &diff); + if (oa->o_id > filter_last_id(&obd->u.filter, oa)) { + CERROR("recreate objid "LPU64" > last id "LPU64"\n", + oa->o_id, filter_last_id(&obd->u.filter, oa)); + rc = -EINVAL; + } else { + diff = 1; + rc = filter_precreate(obd, oa, group, &diff); + } } else { diff = filter_should_precreate(exp, oa, group); if (diff > 0) { @@ -2094,11 +2105,11 @@ static void filter_grant_total_exports(struct obd_device *obd, spin_lock(&obd->obd_dev_lock); list_for_each_entry(exp_pos, &obd->obd_exports, exp_obd_chain) { fed = &exp_pos->exp_filter_data; - LASSERTF(fed->fed_dirty <= maxsize, "cli %s/%p %lu > "LPU64, + LASSERTF(fed->fed_dirty <= maxsize, "cli %s/%p %lu > "LPU64"\n", exp_pos->exp_client_uuid.uuid, exp_pos, fed->fed_dirty, maxsize); LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize, - "cli %s/%p %lu+%lu > "LPU64, + "cli %s/%p %lu+%lu > "LPU64"\n", exp_pos->exp_client_uuid.uuid, exp_pos, fed->fed_grant, fed->fed_pending, maxsize); *tot_dirty += fed->fed_dirty; @@ -2114,16 +2125,17 @@ static void filter_grant_sanity_check(obd_size tot_dirty, obd_size tot_pending, obd_size fo_tot_pending, obd_size fo_tot_granted, obd_size maxsize) { - LASSERTF(tot_dirty == fo_tot_dirty, LPU64" != "LPU64, + LASSERTF(tot_dirty == fo_tot_dirty, LPU64" != "LPU64"\n", tot_dirty, fo_tot_dirty); - LASSERTF(tot_pending == fo_tot_pending, LPU64" != "LPU64, + LASSERTF(tot_pending == fo_tot_pending, LPU64" != "LPU64"\n", tot_pending, fo_tot_pending); - LASSERTF(tot_granted == fo_tot_granted, LPU64" != "LPU64, + LASSERTF(tot_granted == fo_tot_granted, LPU64" != "LPU64"\n", tot_granted, fo_tot_granted); - LASSERTF(tot_dirty <= maxsize, LPU64" > "LPU64, tot_dirty, maxsize); - LASSERTF(tot_pending <= tot_granted, LPU64" > "LPU64, tot_pending, + LASSERTF(tot_dirty <= maxsize, LPU64" > "LPU64"\n", tot_dirty, maxsize); + LASSERTF(tot_pending <= tot_granted, LPU64" > "LPU64"\n", tot_pending, tot_granted); - LASSERTF(tot_granted <= maxsize, LPU64" > "LPU64, tot_granted, maxsize); + LASSERTF(tot_granted <= maxsize, LPU64" > "LPU64"\n", + tot_granted, maxsize); } static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index eced509..8c0ad36 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -311,7 +311,6 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (time_after(jiffies, now + 15 * HZ)) CERROR("slow direct_io %lus\n", (jiffies - now) / HZ); - filter_grant_commit(exp, niocount, res); err = fsfilt_commit_wait(obd, inode, wait_handle); if (err) rc = err; @@ -321,6 +320,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ); cleanup: + filter_grant_commit(exp, niocount, res); + switch (cleanup_phase) { case 2: pop_ctxt(&saved, &obd->obd_ctxt, NULL); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index e5ee245..95c96e6 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -202,8 +202,6 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, submit_bio(WRITE, bio); } - filter_grant_commit(exp, niocount, res); - /* time to wait for I/O completion */ wait_event(dreq->wait, atomic_read(&dreq->numreqs) == 0); @@ -238,6 +236,8 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ); cleanup: + filter_grant_commit(exp, niocount, res); + switch (cleanup_phase) { case 2: pop_ctxt(&saved, &obd->obd_ctxt, NULL); diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 68a2d35..ae3e010 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -34,8 +34,8 @@ struct osc_async_page { enum async_flags oap_async_flags; unsigned long oap_interrupted:1; - struct obd_sync_io_container *oap_osic; - struct osic_callback_context oap_occ; + struct obd_io_group *oap_oig; + struct oig_callback_context oap_occ; struct ptlrpc_request *oap_request; struct client_obd *oap_cli; struct lov_oinfo *oap_loi; diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 3635a7b..b0686b3 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -784,7 +784,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, LASSERT((pg->off & ~PAGE_MASK) + pg->count <= PAGE_SIZE); LASSERTF(i == 0 || pg->off > pg_prev->off, "i %d p_c %u pg %p [pri %lu ind %lu] off "LPU64 - " prev_pg %p [pri %lu ind %lu] off "LPU64, + " prev_pg %p [pri %lu ind %lu] off "LPU64"\n", i, page_count, pg->pg, pg->pg->private, pg->pg->index, pg->off, pg_prev->pg, pg_prev->pg->private, pg_prev->pg->index, @@ -1154,7 +1154,7 @@ static void lop_update_pending(struct client_obd *cli, * rpc yet it can dequeue immediately. Otherwise it has to mark the rpc as * desiring interruption which will forcefully complete the rpc once the rpc * has timed out */ -static void osc_occ_interrupted(struct osic_callback_context *occ) +static void osc_occ_interrupted(struct oig_callback_context *occ) { struct osc_async_page *oap; struct loi_oap_pages *lop; @@ -1188,8 +1188,8 @@ static void osc_occ_interrupted(struct osic_callback_context *occ) lop_update_pending(oap->oap_cli, lop, oap->oap_cmd, -1); loi_list_maint(oap->oap_cli, oap->oap_loi); - osic_complete_one(oap->oap_osic, &oap->oap_occ, 0); - oap->oap_osic = NULL; + oig_complete_one(oap->oap_oig, &oap->oap_occ, 0); + oap->oap_oig = NULL; } unlock: @@ -1210,9 +1210,9 @@ static void osc_complete_oap(struct client_obd *cli, oap->oap_request = NULL; } - if (oap->oap_osic) { - osic_complete_one(oap->oap_osic, &oap->oap_occ, rc); - oap->oap_osic = NULL; + if (oap->oap_oig) { + oig_complete_one(oap->oap_oig, &oap->oap_occ, rc); + oap->oap_oig = NULL; EXIT; return; } @@ -1918,11 +1918,12 @@ out: RETURN(rc); } -static int osc_queue_sync_io(struct obd_export *exp, struct lov_stripe_md *lsm, +static int osc_queue_group_io(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_oinfo *loi, - struct obd_sync_io_container *osic, void *cookie, + struct obd_io_group *oig, void *cookie, int cmd, obd_off off, int count, - obd_flag brw_flags) + obd_flag brw_flags, + obd_flag async_flags) { struct client_obd *cli = &exp->exp_obd->u.cli; struct osc_async_page *oap; @@ -1950,34 +1951,35 @@ static int osc_queue_sync_io(struct obd_export *exp, struct lov_stripe_md *lsm, oap->oap_page_off = off; oap->oap_count = count; oap->oap_brw_flags = brw_flags; + oap->oap_async_flags = async_flags; if (cmd == OBD_BRW_WRITE) lop = &loi->loi_write_lop; else lop = &loi->loi_read_lop; - list_add_tail(&oap->oap_pending_item, &lop->lop_pending_sync); - oap->oap_osic = osic; - osic_add_one(osic, &oap->oap_occ); + list_add_tail(&oap->oap_pending_item, &lop->lop_pending_group); + if (oap->oap_async_flags & ASYNC_GROUP_SYNC) { + oap->oap_oig = oig; + oig_add_one(oig, &oap->oap_occ); + } - LOI_DEBUG(loi, "oap %p page %p on sync pending\n", oap, oap->oap_page); + LOI_DEBUG(loi, "oap %p page %p on group pending\n", oap, oap->oap_page); spin_unlock(&cli->cl_loi_list_lock); RETURN(0); } -static void osc_sync_to_pending(struct client_obd *cli, struct lov_oinfo *loi, - struct loi_oap_pages *lop, int cmd) +static void osc_group_to_pending(struct client_obd *cli, struct lov_oinfo *loi, + struct loi_oap_pages *lop, int cmd) { struct list_head *pos, *tmp; struct osc_async_page *oap; - list_for_each_safe(pos, tmp, &lop->lop_pending_sync) { + list_for_each_safe(pos, tmp, &lop->lop_pending_group) { oap = list_entry(pos, struct osc_async_page, oap_pending_item); list_del(&oap->oap_pending_item); - oap->oap_async_flags |= ASYNC_READY | ASYNC_URGENT | - ASYNC_COUNT_STABLE; list_add_tail(&oap->oap_pending_item, &lop->lop_pending); list_add(&oap->oap_urgent_item, &lop->lop_urgent); lop_update_pending(cli, lop, cmd, 1); @@ -1985,10 +1987,10 @@ static void osc_sync_to_pending(struct client_obd *cli, struct lov_oinfo *loi, loi_list_maint(cli, loi); } -static int osc_trigger_sync_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_sync_io_container *osic) +static int osc_trigger_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig) { struct client_obd *cli = &exp->exp_obd->u.cli; ENTRY; @@ -2001,8 +2003,8 @@ static int osc_trigger_sync_io(struct obd_export *exp, spin_lock(&cli->cl_loi_list_lock); - osc_sync_to_pending(cli, loi, &loi->loi_write_lop, OBD_BRW_WRITE); - osc_sync_to_pending(cli, loi, &loi->loi_read_lop, OBD_BRW_READ); + osc_group_to_pending(cli, loi, &loi->loi_write_lop, OBD_BRW_WRITE); + osc_group_to_pending(cli, loi, &loi->loi_read_lop, OBD_BRW_READ); osc_check_rpcs(cli); spin_unlock(&cli->cl_loi_list_lock); @@ -2545,9 +2547,8 @@ static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs, */ static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump) { - struct lov_user_md lum; - struct lov_mds_md *lmmk; - int rc, lmm_size; + struct lov_user_md lum, *lumk; + int rc, lum_size; ENTRY; if (!lsm) @@ -2560,22 +2561,26 @@ static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump) if (lum.lmm_magic != LOV_USER_MAGIC) RETURN(-EINVAL); - if (lum.lmm_stripe_count < 1) - RETURN(-EOVERFLOW); + if (lum.lmm_stripe_count > 0) { + lum_size = sizeof(lum) + sizeof(lum.lmm_objects[0]); + OBD_ALLOC(lumk, lum_size); + if (!lumk) + RETURN(-ENOMEM); - lmm_size = sizeof(lum) + sizeof(lum.lmm_objects[0]); - OBD_ALLOC(lmmk, lmm_size); - if (!lmmk) - RETURN(-ENOMEM); + lumk->lmm_objects[0].l_object_id = lsm->lsm_object_id; + } else { + lum_size = sizeof(lum); + lumk = &lum; + } - lmmk->lmm_stripe_count = 1; - lmmk->lmm_object_id = lsm->lsm_object_id; - lmmk->lmm_objects[0].l_object_id = lsm->lsm_object_id; + lumk->lmm_object_id = lsm->lsm_object_id; + lumk->lmm_stripe_count = 1; - if (copy_to_user(lump, lmmk, lmm_size)) + if (copy_to_user(lump, lumk, lum_size)) rc = -EFAULT; - OBD_FREE(lmmk, lmm_size); + if (lumk != &lum) + OBD_FREE(lumk, lum_size); RETURN(rc); } @@ -2919,8 +2924,8 @@ struct obd_ops osc_obd_ops = { .o_prep_async_page = osc_prep_async_page, .o_queue_async_io = osc_queue_async_io, .o_set_async_flags = osc_set_async_flags, - .o_queue_sync_io = osc_queue_sync_io, - .o_trigger_sync_io = osc_trigger_sync_io, + .o_queue_group_io = osc_queue_group_io, + .o_trigger_group_io = osc_trigger_group_io, .o_teardown_async_page = osc_teardown_async_page, o_punch: osc_punch, o_sync: osc_sync, diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 6596d33..75e83b4 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -184,8 +184,6 @@ do { \ PREPARE_TQUEUE((wq), (cb), (cbdata)); \ } while (0) -#define ll_invalidate_inode_pages(inode) invalidate_inode_pages(inode) -#define ll_truncate_complete_page(page) truncate_complete_page(page) #define PageUptodate Page_Uptodate #define our_recalc_sigpending(current) recalc_sigpending(current) #define num_online_cpus() smp_num_cpus @@ -202,10 +200,6 @@ static inline void our_cond_resched(void) do { \ INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ } while (0) -#define ll_invalidate_inode_pages(inode) \ - invalidate_inode_pages((inode)->i_mapping) -#define ll_truncate_complete_page(page) \ - truncate_complete_page((page)->mapping, page) #define wait_on_page wait_on_page_locked #define our_recalc_sigpending(current) recalc_sigpending() #define strtok(a,b) strpbrk(a, b) diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c index 4c2bd6a..478c25f 100644 --- a/lustre/portals/knals/qswnal/qswnal_cb.c +++ b/lustre/portals/knals/qswnal/qswnal_cb.c @@ -1556,7 +1556,7 @@ kqswnal_recvmsg (nal_cb_t *nal, LASSERT (mlen <= rlen); if (krx->krx_nob < KQSW_HDR_SIZE + mlen) { CERROR("Bad message size: have %d, need %d + %d\n", - krx->krx_nob, KQSW_HDR_SIZE, mlen); + krx->krx_nob, (int)KQSW_HDR_SIZE, (int)mlen); return (PTL_FAIL); } @@ -1564,16 +1564,15 @@ kqswnal_recvmsg (nal_cb_t *nal, LASSERT (kiov == NULL || !in_interrupt ()); /* Either all pages or all vaddrs */ LASSERT (!(kiov != NULL && iov != NULL)); - - if (mlen != 0) - { + + if (mlen != 0) { page = 0; page_ptr = ((char *) page_address(krx->krx_pages[0])) + KQSW_HDR_SIZE; page_nob = PAGE_SIZE - KQSW_HDR_SIZE; LASSERT (niov > 0); - + if (kiov != NULL) { /* skip complete frags */ while (offset >= kiov->kiov_len) { @@ -1582,7 +1581,8 @@ kqswnal_recvmsg (nal_cb_t *nal, niov--; LASSERT (niov > 0); } - iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; + iov_ptr = ((char *)kmap (kiov->kiov_page)) + + kiov->kiov_offset + offset; iov_nob = kiov->kiov_len - offset; } else { /* skip complete frags */ diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 84c781d..eac0e99 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -238,6 +238,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, spin_lock_init(&request->rq_lock); INIT_LIST_HEAD(&request->rq_list); INIT_LIST_HEAD(&request->rq_replay_list); + INIT_LIST_HEAD(&request->rq_set_chain); init_waitqueue_head(&request->rq_reply_waitq); request->rq_xid = ptlrpc_next_xid(); atomic_set(&request->rq_refcount, 1); @@ -632,7 +633,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) } /* ptlrpc_queue_wait->l_wait_event guarantees that rq_intr - * will only be set after rq_timedout, but the osic waiting + * will only be set after rq_timedout, but the oig waiting * path sets rq_intr irrespective of whether ptlrpcd has * seen a timeout. our policy is to only interpret * interrupted rpcs after they have timed out */ @@ -665,10 +666,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) continue; } - list_del(&req->rq_list); - list_add_tail(&req->rq_list, - &imp->imp_sending_list); - + list_del_init(&req->rq_list); if (status != 0) { req->rq_status = status; req->rq_phase = RQ_PHASE_INTERPRET; @@ -676,6 +674,16 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) flags); GOTO(interpret, req->rq_status); } + if (req->rq_no_resend) { + req->rq_status = -ENOTCONN; + req->rq_phase = RQ_PHASE_INTERPRET; + spin_unlock_irqrestore(&imp->imp_lock, + flags); + GOTO(interpret, req->rq_status); + } + list_add_tail(&req->rq_list, + &imp->imp_sending_list); + spin_unlock_irqrestore(&imp->imp_lock, flags); req->rq_waiting = 0; @@ -992,8 +1000,10 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) return; } - LASSERT(!request->rq_receiving_reply); - LASSERT(request->rq_rqbd == NULL); /* client-side */ + LASSERTF(!request->rq_receiving_reply, "req %p\n", request); + LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */ + LASSERTF(list_empty(&request->rq_list), "req %p\n", request); + LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request); /* We must take it off the imp_replay_list first. Otherwise, we'll set * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ @@ -1006,6 +1016,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) spin_unlock_irqrestore(&request->rq_import->imp_lock, flags); } + LASSERTF(list_empty(&request->rq_replay_list), "req %p\n", request); if (atomic_read(&request->rq_refcount) != 0) { DEBUG_REQ(D_ERROR, request, @@ -1259,6 +1270,9 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, if (!list_empty(&req->rq_replay_list)) return; + lustre_msg_add_flags(req->rq_reqmsg, + MSG_REPLAY); + LASSERT(imp->imp_replayable); /* Balanced in ptlrpc_free_committed, usually. */ ptlrpc_request_addref(req); @@ -1348,6 +1362,10 @@ restart: else if (req->rq_intr) { rc = -EINTR; } + else if (req->rq_no_resend) { + spin_unlock_irqrestore(&imp->imp_lock, flags); + GOTO(out, rc = -ETIMEDOUT); + } else { GOTO(restart, rc); } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 5bc9e3f..b32eb2b 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -404,6 +404,7 @@ static int signal_completed_replay(struct obd_import *imp) int ptlrpc_import_recovery_state_machine(struct obd_import *imp) { int rc = 0; + int inflight; if (imp->imp_state == LUSTRE_IMP_EVICTED) { CDEBUG(D_HA, "evicted from %s@%s; invalidating\n", @@ -416,8 +417,9 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) if (imp->imp_state == LUSTRE_IMP_REPLAY) { CDEBUG(D_HA, "replay requested by %s\n", imp->imp_target_uuid.uuid); - rc = ptlrpc_replay_next(imp); - if (rc == 0 && atomic_read(&imp->imp_replay_inflight) == 0) { + rc = ptlrpc_replay_next(imp, &inflight); + if (inflight == 0 && + atomic_read(&imp->imp_replay_inflight) == 0) { IMPORT_SET_STATE(imp, LUSTRE_IMP_REPLAY_LOCKS); rc = ldlm_replay_locks(imp); if (rc) diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index c735d01..150bbce 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -678,6 +678,7 @@ void lustre_assert_wire_constants(void) LASSERT(PTL_RPC_MSG_REPLY == 4713); LASSERT(MSG_LAST_REPLAY == 1); LASSERT(MSG_RESENT == 2); + LASSERT(MSG_REPLAY == 4); LASSERT(MSG_CONNECT_RECOVERING == 1); LASSERT(MSG_CONNECT_RECONNECT == 2); LASSERT(MSG_CONNECT_REPLAYABLE == 4); diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index d42eb65..a78cd0d 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -39,7 +39,7 @@ void lustre_assert_wire_constants(void); int ptlrpc_import_in_recovery(struct obd_import *imp); int ptlrpc_set_import_discon(struct obd_import *imp); void ptlrpc_handle_failed_import(struct obd_import *imp); -int ptlrpc_replay_next(struct obd_import *imp); +int ptlrpc_replay_next(struct obd_import *imp, int *inflight); #ifdef __KERNEL__ diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 76469cb..7eea191 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -121,16 +121,17 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp) #endif } -int ptlrpc_replay_next(struct obd_import *imp) +int ptlrpc_replay_next(struct obd_import *imp, int *inflight) { int rc = 0; struct list_head *tmp, *pos; struct ptlrpc_request *req; unsigned long flags; __u64 last_transno; - int sent_req = 0; ENTRY; + *inflight = 0; + /* It might have committed some after we last spoke, so make sure we * get rid of them now. */ @@ -139,8 +140,10 @@ int ptlrpc_replay_next(struct obd_import *imp) last_transno = imp->imp_last_replay_transno; spin_unlock_irqrestore(&imp->imp_lock, flags); - CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n", - imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno); + CDEBUG(D_HA, "import %p from %s committed "LPU64" last "LPU64"\n", + imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno, + last_transno); + /* Do I need to hold a lock across this iteration? We shouldn't be * racing with any additions to the list, because we're in recovery * and are therefore not processing additional requests to add. Calls @@ -159,25 +162,17 @@ int ptlrpc_replay_next(struct obd_import *imp) list_for_each_safe(tmp, pos, &imp->imp_replay_list) { req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); if (req->rq_transno > last_transno) { - /* remove from list so ptlrpcd can send the - req, it should be reinserted after it is - sent and replied. Perhaps better solution - would be to add req->rq_replay_list so the - req can be saved for replay and still go - through the normal send thread. */ rc = ptlrpc_replay_req(req); if (rc) { - CERROR("recovery replay error %d for req "LPD64"\n", - rc, req->rq_xid); + CERROR("recovery replay error %d for req " + LPD64"\n", rc, req->rq_xid); RETURN(rc); } - sent_req = 1; + *inflight = 1; break; } - } - - RETURN(sent_req); + RETURN(rc); } int ptlrpc_resend(struct obd_import *imp) @@ -369,13 +364,13 @@ static int ptlrpc_recover_import_no_retry(struct obd_import *imp, RETURN(rc); CDEBUG(D_ERROR, "%s: recovery started, waiting\n", - imp->imp_client->cli_name); + imp->imp_target_uuid.uuid); lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL); rc = l_wait_event(imp->imp_recovery_waitq, !ptlrpc_import_in_recovery(imp), &lwi); CDEBUG(D_ERROR, "%s: recovery finished\n", - imp->imp_client->cli_name); + imp->imp_target_uuid.uuid); RETURN(rc); diff --git a/lustre/scripts/cvs-modified-files.pl b/lustre/scripts/cvs-modified-files.pl deleted file mode 100755 index d13c4d3..0000000 --- a/lustre/scripts/cvs-modified-files.pl +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env perl - -my $mode = "NONE"; -my @modified, @added, @removed; - -while($line = <>) { - if ($line =~ /Modified Files:/) { - $mode = "MODIFIED"; - next; - } - - if ($line =~ /Added Files:/) { - $mode = "ADDED"; - next; - } - - if ($line =~ /Removed Files:/) { - $mode = "REMOVED"; - next; - } - - if ($mode eq "NONE") { next; } - if ($line =~ /-------/) { next; } - - chop($line); - $line =~ s/^CVS:\s+//; - $line =~ s/\s+$//; - # print "processing $line for $mode\n"; - @files = split(/ /, $line); - # print "new files for $mode: ", join(', ', @files), "\n"; - - if ($mode eq "MODIFIED") { - push(@modified, @files); - } elsif ($mode eq "ADDED") { - push(@added, @files); - } elsif ($mode eq "REMOVED") { - push(@removed, @files); - } else { - die "Unknown mode $mode!"; - } -} - -print join(' ', @modified); -if ($ENV{"SHOW_ALL_FILES"} ne "no") { - print ' ', join(' ', @added), ' ', join(' ', @removed); -} -print "\n"; diff --git a/lustre/scripts/cvsdiffclient b/lustre/scripts/cvsdiffclient deleted file mode 100755 index dab1e90..0000000 --- a/lustre/scripts/cvsdiffclient +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -[ -f .mergeinfo ] && . ./.mergeinfo - -FILES=`cvs-modified-files.pl $1` -TMP=`mktemp /tmp/cvslog-XXXXXXXX` -if [ -f $TMP ]; then - [ -f .mergeinfo ] && \ - echo "CVS: Update $child from $parent ($date)" >> $TMP - echo "CVS: did you update the ChangeLog for a bug fix?" >> $TMP - echo "CVS: b=" >> $TMP - echo "CVS: r=" >> $TMP - - cat $1 >> $TMP - cp $TMP $1 - rm $TMP -fi - -if [ "${FILES:+have_files}"x = have_filesx ]; then - echo Diffing $1 : $FILES - cvs diff -wbBup $FILES 2>/dev/null | sed "s/^/CVS: /" >>$1 -fi -#gnuclient $1 || vi $1 -[ "$EDITOR" ] || EDITOR=vi - -$EDITOR $1 diff --git a/lustre/scripts/land1.sh b/lustre/scripts/land1.sh deleted file mode 100755 index 0c07803..0000000 --- a/lustre/scripts/land1.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/sh -e - -CONFLICTS=cvs-merge-conflicts -CVS=cvs - -if [ -f .mergeinfo ] ; then - echo ".mergeinfo exists - clean up first" - exit -fi - -if [ -f $CONFLICTS ] ; then - echo "$CONFLICTS exists - clean up first" - exit -fi - -if [ $# -lt 2 -o $# -gt 3 ]; then - echo "This is phase 1 of merging branches. Usage: $0 parent child [dir]" - exit -fi - -parent=$1 -PARENT=`echo $parent | sed -e "s/^b_//" | tr "[a-z]" "[A-Z]"` -child=$2 -CHILD=`echo $child | sed -e "s/^b_//" | tr "[a-z]" "[A-Z]"` -date=`date +%Y%m%d_%H%M` -module=lustre - -if [ "$parent" != "HEAD" -a "`cat CVS/Tag`" != "T$parent" ]; then - echo "This script must be run within the $parent branch" - exit 1 -fi - -dir=$3 - -if [ $parent != "HEAD" ]; then - parent="b_$parent" -fi -if [ $child != "HEAD" ]; then - child="b_$child" -fi - -cat << EOF > .mergeinfo -parent=$parent -PARENT=$PARENT -child=$child -CHILD=$CHILD -date=$date -module=$module -dir=$dir -CONFLICTS=$CONFLICTS -EOF - -echo PARENT $PARENT parent $parent CHILD $CHILD child $child date $date - -# Update your tree to the PARENT branch; HEAD is not really a branch, so you -# need to update -A instead of update -r HEAD, or the commit will fail. -p -echo -n "Updating to $parent ...." -if [ $parent == "HEAD" ]; then - $CVS update -AdP $dir -else - $CVS update -r $parent -dP $dir -fi -echo "done" - -echo -n "Tagging as ${PARENT}_${CHILD}_LAND_PARENT_$date ..." -$CVS tag ${PARENT}_${CHILD}_LAND_PARENT_$date $dir -echo "done" - -echo -n "Create land point on ${child} ${PARENT}_${CHILD}_LAND_CHILD_$date ..." -$CVS rtag -r ${child} ${PARENT}_${CHILD}_LAND_CHILD_$date $module $dir -echo "done" - -echo -n "Preserve old base tag ${CHILD}_BASE as ${CHILD}_BASE_PREV ..." -$CVS tag -F -r ${CHILD}_BASE ${CHILD}_BASE_PREV $dir -echo "done" - -# Apply all of the changes to your local tree: -echo -n "Updating as -j ${CHILD}_BASE -j ${PARENT}_${CHILD}_LAND_CHILD_$date ..." -$CVS update -j ${CHILD}_BASE -j ${PARENT}_${CHILD}_LAND_CHILD_$date $dir -echo "done" - -echo -n "Recording conflicts in $CONFLICTS ..." -if $CVS update | grep '^C' > $CONFLICTS; then - echo "Conflicts found, fix before committing." - cat $CONFLICTS -else - echo "No conflicts found" - rm -f $CONFLICTS -fi -echo "done" - -echo "Test, commit and then run land2.sh (no arguments)" - diff --git a/lustre/scripts/land2.sh b/lustre/scripts/land2.sh deleted file mode 100755 index 2bb9fad..0000000 --- a/lustre/scripts/land2.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh -e - -CVS=cvs - -if [ ! -f .mergeinfo ] ; then - echo ".mergeinfo doesn't exist - exit" - exit -fi - -. .mergeinfo - -if [ -f "$CONFLICTS" ] ; then - echo "$CONFLICTS exists - clean up first" - cat $CONFLICTS - exit -fi - -#cvs update $dir 2>&1 | grep "^M" && echo "uncommitted changes" && exit 1 - -echo -n "Tagging as ${CHILD}_BASE_$date ..." -$CVS tag -F ${CHILD}_BASE_$date $dir -echo "done" -echo -n "Tagging as ${CHILD}_BASE ..." -$CVS tag -F ${CHILD}_BASE $dir - -echo "saving .mergeinfo as .mergeinfo-$date" -mv .mergeinfo .mergeinfo-$date -echo "done" diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index 536e99e..31bb8ad 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -12,8 +12,8 @@ if [ -z "$THREADS" ]; then THREADS=`expr $KB / 16384` [ $THREADS -gt $MAX_THREADS ] && THREADS=$MAX_THREADS fi -[ "$SIZE" ] || SIZE=20480 -[ "$RSIZE" ] || RSIZE=64 +[ "$SIZE" ] || SIZE=40960 +[ "$RSIZE" ] || RSIZE=512 [ "$UID" ] || UID=1000 [ "$MOUNT" ] || MOUNT=/mnt/lustre [ "$MOUNT2" ] || MOUNT2=${MOUNT}2 @@ -27,13 +27,13 @@ fi for NAME in $CONFIGS; do export NAME MOUNT [ -e $NAME.sh ] && sh $NAME.sh - [ ! -e $NAME.xml ] && [ -z "$LDAPURL" ] && echo "no config '$NAME.xml'" 1>&2 && exit 1 + [ ! -e $NAME.xml ] && [ -z "$LDAPURL" ] && \ + echo "no config '$NAME.xml'" 1>&2 && exit 1 if [ "$RUNTESTS" != "no" ]; then sh runtests fi - #[ "$SANITY" != "no" ] && sh sanity.sh if [ "$SANITY" != "no" ]; then SANITYLOG=/tmp/sanity.log START=: CLEAN=: sh sanity.sh fi @@ -62,7 +62,7 @@ for NAME in $CONFIGS; do if [ "$BONNIE" != "no" ]; then mount | grep $MOUNT || sh llmount.sh $DEBUG_OFF - bonnie++ -s 0 -n 10 -u $UID -d $MOUNT + bonnie++ -f -r 0 -s $(($SIZE / 1024)) -n 10 -u $UID -d $MOUNT $DEBUG_ON sh llmountcleanup.sh sh llrmount.sh @@ -83,7 +83,7 @@ for NAME in $CONFIGS; do if [ "$IOZONE_DIR" != "no" ]; then mount | grep $MOUNT || sh llmount.sh SPACE=`df $MOUNT | tail -1 | awk '{ print $4 }'` - IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 1000 \)` + IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 512 \)` [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS $DEBUG_OFF diff --git a/lustre/tests/llmountcleanup.sh b/lustre/tests/llmountcleanup.sh index 3ce5dfa..cdcbaa8 100755 --- a/lustre/tests/llmountcleanup.sh +++ b/lustre/tests/llmountcleanup.sh @@ -39,7 +39,7 @@ if [ "$BUSY" ]; then mv $TMP/debug $TMP/debug-busy.`date +%s` exit 255 fi -LEAK_LUSTRE=`dmesg | grep "obd mem.*leaked" | tail -1 | grep -v "leaked: 0"` +LEAK_LUSTRE=`dmesg | tail -40 | grep "obd mem.*leaked"` LEAK_PORTALS=`dmesg | tail -20 | grep "Portals memory leaked"` if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then echo "$LEAK_LUSTRE" 1>&2 diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 8ae8c13..1d4d976 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -190,4 +190,11 @@ test_14() { } run_test 14 "mdc_readpage resend test (bug 1138)" +test_15() { + do_facet mds "sysctl -w lustre.fail_loc=0x80000128" + touch $DIR/$tfile && return 1 + return 0 +} +run_test 15 "failed open (-ENOMEM)" + $CLEANUP diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index b516587..9030789 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -107,7 +107,7 @@ test_2b() { } run_test 2b "touch" -test_3() { +test_3a() { replay_barrier mds mcreate $DIR/$tfile o_directory $DIR/$tfile @@ -115,7 +115,32 @@ test_3() { $CHECKSTAT -t file $DIR/$tfile || return 2 rm $DIR/$tfile } -run_test 3 "replay failed open" +run_test 3a "replay failed open(O_DIRECTORY)" + +test_3b() { + replay_barrier mds +#define OBD_FAIL_MDS_OPEN_PACK | OBD_FAIL_ONCE + do_facet mds "sysctl -w lustre.fail_loc=0x80000114" + touch $DIR/$tfile + do_facet mds "sysctl -w lustre.fail_loc=0" + fail mds + $CHECKSTAT -t file $DIR/$tfile && return 2 + return 0 +} +run_test 3b "replay failed open -ENOMEM" + +test_3c() { + replay_barrier mds +#define OBD_FAIL_MDS_ALLOC_OBDO | OBD_FAIL_ONCE + do_facet mds "sysctl -w lustre.fail_loc=0x80000128" + touch $DIR/$tfile + do_facet mds "sysctl -w lustre.fail_loc=0" + fail mds + + $CHECKSTAT -t file $DIR/$tfile && return 2 + return 0 +} +run_test 3c "replay failed open -ENOMEM" test_4() { replay_barrier mds @@ -124,7 +149,7 @@ test_4() { done fail mds for i in `seq 10`; do - grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i" + grep -q "tag-$i" $DIR/$tfile-$i || error "$tfile-$i" done } run_test 4 "|x| 10 open(O_CREAT)s" diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index f4a7f22..ea4810e 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -1713,7 +1713,7 @@ test_62() { } run_test 62 "verify obd_match failure doesn't LBUG (should -EIO)" -# bug 2319 - osic_wait() interrupted causes crash because of invalid waitq. +# bug 2319 - oig_wait() interrupted causes crash because of invalid waitq. test_63() { MAX_DIRTY_MB=`cat /proc/fs/lustre/osc/*/max_dirty_mb | head -1` for i in /proc/fs/lustre/osc/*/max_dirty_mb ; do @@ -1731,7 +1731,7 @@ test_63() { done true } -run_test 63 "Verify osic_wait interruption does not crash ======" +run_test 63 "Verify oig_wait interruption does not crash ======" test_64a () { df $DIR diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 17c4167..4b054d5 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -517,8 +517,8 @@ equals_msg() { run_one() { testnum=$1 message=$2 - tfile=f$base - tdir=d$base + tfile=f${testnum} + tdir=d${base} # Pretty tests run faster. equals_msg $testnum: $message diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index d33fc7a..8445ce9 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -749,6 +749,7 @@ main(int argc, char **argv) CHECK_VALUE(MSG_LAST_REPLAY); CHECK_VALUE(MSG_RESENT); + CHECK_VALUE(MSG_REPLAY); CHECK_VALUE(MSG_CONNECT_RECOVERING); CHECK_VALUE(MSG_CONNECT_RECONNECT); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index cdc17b8..e426715 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -35,6 +35,7 @@ void lustre_assert_wire_constants(void) LASSERT(PTL_RPC_MSG_REPLY == 4713); LASSERT(MSG_LAST_REPLAY == 1); LASSERT(MSG_RESENT == 2); + LASSERT(MSG_REPLAY == 4); LASSERT(MSG_CONNECT_RECOVERING == 1); LASSERT(MSG_CONNECT_RECONNECT == 2); LASSERT(MSG_CONNECT_REPLAYABLE == 4); -- 1.8.3.1