From d0b24f7930fd678e433752448c716afdd91083cf Mon Sep 17 00:00:00 2001 From: adilger Date: Thu, 16 Dec 2004 22:52:19 +0000 Subject: [PATCH] Land b1_4_smallfix onto b1_4 (20041216_1438) - add per-fs limit of Lustre pages in page cache, avoid OOM (4699) - no transno return for symlink open, don't save no-trasno open (3440) - don't try to complete elan receive that already failed (4012) - free RPC server reply state on error (5406) - fix ppc64/x86_64 spec to use %{_libdir} instead of /usr/lib (5389) - unlock page after async write error during send (3677) - return original error code on reconstructed replies (3761) - no transno return for symlink open, don't save no-trasno open (3440) - bump the watchdog timeouts -- we can't handle 30sec yet - extra debugging for orphan dentry/inode bug (5259) - clean up thread from ptlrpc_start_thread() on error (5160) - minor changes from b_cray to reduce size of diff - fix mount usage message, return errors per mount(8) --- lustre/ChangeLog | 21 ++- lustre/conf/Makefile.am | 2 +- lustre/configure.in | 11 +- lustre/doc/lmc.1 | 27 ++-- lustre/include/linux/lustre_compat25.h | 1 + lustre/include/linux/lustre_export.h | 2 +- lustre/include/linux/lustre_fsfilt.h | 4 - lustre/include/linux/lustre_import.h | 7 +- lustre/include/linux/lustre_log.h | 5 +- lustre/include/linux/obd.h | 2 +- lustre/include/linux/obd_support.h | 2 - .../patches/vfs_intent-2.4.20-rh.patch | 4 +- .../patches/vfs_intent-2.4.20-vanilla.patch | 4 +- .../patches/vfs_intent-2.4.21-suse2.patch | 4 +- lustre/kernel_patches/series/chaos-2.4.21 | 1 - lustre/kernel_patches/series/rhel-2.4.21 | 1 - lustre/kernel_patches/series/suse-2.4.21-jvn | 2 +- lustre/kernel_patches/series/vanilla-2.4.24 | 2 +- lustre/ldlm/ldlm_lib.c | 12 +- lustre/ldlm/ldlm_lockd.c | 5 +- lustre/ldlm/ldlm_request.c | 2 +- lustre/ldlm/ldlm_test.c | 2 +- lustre/llite/dcache.c | 13 ++ lustre/llite/file.c | 9 +- lustre/llite/llite_internal.h | 11 +- lustre/llite/llite_lib.c | 62 +++++++- lustre/llite/lproc_llite.c | 14 +- lustre/llite/rw.c | 10 +- lustre/llite/rw24.c | 1 + lustre/llite/rw26.c | 2 + lustre/llite/super.c | 13 ++ lustre/lov/lov_request.c | 12 +- lustre/lvfs/fsfilt_ext3.c | 177 ++++----------------- lustre/lvfs/lvfs_linux.c | 11 +- lustre/lvfs/lvfs_userfs.c | 2 +- lustre/mdc/mdc_locks.c | 44 ++--- lustre/mds/handler.c | 15 +- lustre/mds/lproc_mds.c | 1 - lustre/mds/mds_internal.h | 2 +- lustre/mds/mds_open.c | 6 +- lustre/mds/mds_reint.c | 7 +- lustre/obdfilter/filter.c | 1 - lustre/obdfilter/filter_io_26.c | 12 +- lustre/ost/ost_handler.c | 38 +++-- lustre/portals/libcfs/tracefile.c | 10 +- lustre/portals/utils/debug.c | 25 +-- lustre/ptlrpc/client.c | 2 +- lustre/ptlrpc/import.c | 21 +-- lustre/ptlrpc/lproc_ptlrpc.c | 1 + lustre/ptlrpc/pinger.c | 16 +- lustre/ptlrpc/ptlrpcd.c | 10 +- lustre/ptlrpc/recov_thread.c | 2 +- lustre/ptlrpc/recover.c | 15 +- lustre/ptlrpc/service.c | 12 +- lustre/scripts/branch.sh | 9 +- lustre/scripts/lustre.spec.in | 14 +- lustre/tests/acceptance-small.sh | 4 + lustre/tests/cfg/insanity-ltest.sh | 2 +- lustre/tests/cfg/local.sh | 2 +- lustre/tests/createmany.c | 2 +- lustre/tests/echo.sh | 4 +- lustre/tests/ll_dirstripe_verify.c | 99 ++++++------ lustre/tests/llmountcleanup.sh | 1 - lustre/tests/local.sh | 8 +- lustre/tests/lov.sh | 2 +- lustre/tests/multiop.c | 13 +- lustre/tests/recovery-cleanup.sh | 7 +- lustre/tests/recovery-small.sh | 4 +- lustre/tests/replay-dual.sh | 4 +- lustre/tests/replay-ost-single.sh | 2 +- lustre/tests/replay-single.sh | 24 ++- lustre/tests/runas.c | 2 +- lustre/tests/sanity.sh | 45 ++++-- lustre/tests/test-framework.sh | 3 +- lustre/tests/unlinkmany.c | 4 +- lustre/utils/lconf | 16 +- lustre/utils/lctl.c | 4 +- lustre/utils/llmount.c | 142 +++++++++++------ 78 files changed, 606 insertions(+), 511 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 27ad84f..daf8014 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,3 +1,13 @@ +tbd Cluster File Systems, Inc. + * version 1.4.1 + * bug fixes + - add per-fs limit of Lustre pages in page cache, avoid OOM (4699) + - no transno return for symlink open, don't save no-trasno open (3440) + - don't try to complete elan receive that already failed (4012) + - free RPC server reply state on error (5406) + * miscellania + - fix ppc64/x86_64 spec to use %{_libdir} instead of /usr/lib (5389) + 2004-11-23 Cluster File Systems, Inc. * version 1.4.0 * bug fixes @@ -6,10 +16,6 @@ - don't use EXT3 constants in llite code (5094) - memory shortage at startup could cause assertion (5176) - don't keep a lock reference when lock is not granted (4238) - - unsafe list practices (rarely) led to infinite eviction loop (4908) - - add per-fs limit of Lustre pages in page cache, avoid OOM (4699) - - drop import inflight refcount on signal_completed_replay error (5255) - - unlock page after async write error during send (3677) * miscellania - reorganization of lov code - single portals codebase @@ -28,8 +34,15 @@ tbd Cluster File Systems, Inc. - lock /proc/sys/portals/routes internal state, avoiding oops (4827) - the watchdog thread now runs as interruptible (5246) - handle missing objects in filter_preprw_read properly (5265) + - unsafe list practices (rarely) led to infinite eviction loop (4908) + - drop import inflight refcount on signal_completed_replay error (5255) + - unlock page after async write error during send (3677) + - return original error code on reconstructed replies (3761) + - no transno return for symlink open, don't save no-trasno open (3440) * miscellania - add pid to ldlm debugging output (4922) + - bump the watchdog timeouts -- we can't handle 30sec yet + - extra debugging for orphan dentry/inode bug (5259) 2004-11-16 Cluster File Systems, Inc. * version 1.2.8 diff --git a/lustre/conf/Makefile.am b/lustre/conf/Makefile.am index a7550c3..978cf29 100644 --- a/lustre/conf/Makefile.am +++ b/lustre/conf/Makefile.am @@ -10,5 +10,5 @@ ldapschemadir = $(sysconfdir)/openldap/schema if UTILS ldapconf_SCRIPTS = slapd-lustre.conf ldapschema_SCRIPTS = lustre.schema -pkglib_DATA = top.ldif lustre2ldif.xsl +pkgdata_DATA = top.ldif lustre2ldif.xsl endif diff --git a/lustre/configure.in b/lustre/configure.in index 96116b8..6776e74 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -5,7 +5,7 @@ AC_INIT AC_CANONICAL_SYSTEM -AM_INIT_AUTOMAKE(lustre, 1.4.0.1) +AM_INIT_AUTOMAKE(lustre, 1.4.0.3) # AM_MAINTAINER_MODE # Four main targets: lustre kernel modules, utilities, tests, and liblustre @@ -61,15 +61,6 @@ AC_SUBST(ENABLE_DOC) # default backing fs is ext3 BACKINGFS='ext3' -# LLNL patches their ext3 and calls it extN -AC_MSG_CHECKING([whether to use extN]) -AC_ARG_ENABLE([extN], - AC_HELP_STRING([--enable-extN], - [use extN instead of ext3 for lustre backend]), - [BACKINGFS='extN'],[enable_extN='no']) -AC_MSG_RESULT([$enable_extN]) -AM_CONDITIONAL(EXTN, test x$enable_extN = xyes) - # SuSE gets ldiskfs AC_MSG_CHECKING([whether to enable ldiskfs]) AC_ARG_ENABLE([ldiskfs], diff --git a/lustre/doc/lmc.1 b/lustre/doc/lmc.1 index c3345e8..accc973 100644 --- a/lustre/doc/lmc.1 +++ b/lustre/doc/lmc.1 @@ -40,16 +40,16 @@ The arguements required are: This will create a new node with the given name if not already present. .TP --timeout -Timeout before going into recovery +Timeout before going into recovery. .TP --lustre_upcall -Set the location of the Lustre upcall scripts used by the client for recovery +Set the location of the Lustre upcall scripts used by the client for recovery. .TP --portals_upcall -Specify the location of the Portals upcall scripts used by the client for recovery +Specify the location of the Portals upcall scripts used by the client for recovery. .TP --upcall -Specify the location of both (Lustre and Portals) upcall scripts used by the client for recovery +Specify the location of both (Lustre and Portals) upcall scripts used by the client for recovery. .PP .B --add net Adds a network device descriptor for the given node, with parameters as indicated. @@ -59,7 +59,7 @@ The arguments required are: This will create a new node with the given name if not already present. This is also used to specify a specific node for other elements. .TP --nettype -This can be tcp, elan, gm, scimac. +This can be tcp, elan, or gm. .TP --nid nid The network id, e.g. ElanID or IP address as used by Portals. If nid is '*', then the local address of the interface with specified nettype is will be substituted when the node is configured with lconf. An nid of '*' should be used only for the generic client configuration. @@ -67,7 +67,7 @@ The network id, e.g. ElanID or IP address as used by Portals. If nid is '*', the --hostaddr addr .TP --router -Optional flag to mark this node as a router +Optional flag to mark this node as a router. .TP --port [port] Optional arguement to indicate the tcp port. The default is 988. @@ -82,13 +82,16 @@ Optional arguement. Default is 0. Optional arguement since some OSTs might not have the required support. This is turned off by default, value of 1 will turn it ON. .PP .B --add mds -Specify the MDS configuration +Specify the MDS configuration: .TP --node -Name of the node on which the MDS resides +Name of the node on which the MDS resides. .TP --mds -Common name of the MDS +Common name of the MDS. +.TP +--mdsuuid +Specify MDS uuid. .TP --dev Path of device on local system. If the is a file, then a loop device is created and used as the block device. @@ -132,6 +135,9 @@ Assign a name to the OST device. --node Node on which the OST service is run, can not be a profile node. .TP +--failover +Enable failover support on OST. +.TP --dev Path of device on local system. If this is a file, then a loop device is created and used as the block device. .TP @@ -174,6 +180,9 @@ Creates a static route through a gateway to a specific nid or a range of nid's. --node node Node to add the route to. .TP +--router +Optional flag to mark a node as router. +.TP --gw nid The nid of the gateway (must be a local interface or a peer). .TP diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 119eb59..5f76d28 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -114,6 +114,7 @@ static inline int cleanup_group_info(void) page->private = 0; \ } while(0) +#define kiobuf bio #define smp_num_cpus num_online_cpus() #include diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h index 2ba6078..7539a18 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/linux/lustre_export.h @@ -70,7 +70,7 @@ struct obd_export { spinlock_t exp_lock; /* protects flags int below */ /* ^ protects exp_outstanding_replies too */ int exp_flags; - int exp_failed:1, + unsigned int exp_failed:1, exp_replay_needed:1, exp_libclient:1; /* liblustre client? */ union { diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 1557133..cb257e7 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -59,11 +59,7 @@ struct fsfilt_operations { int size); int (* fs_get_md)(struct inode *inode, void *md, int size); /* this method is needed to make IO operation fsfilt nature depend. */ -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - int (* fs_send_bio)(int rw, struct inode *inode, struct bio *bio); -#else int (* fs_send_bio)(int rw, struct inode *inode,struct kiobuf *bio); -#endif ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count, loff_t *offset); int (* fs_add_journal_cb)(struct obd_device *obd, __u64 last_rcvd, diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index ecae76c..84a6e0e 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -28,10 +28,9 @@ enum lustre_imp_state { static inline char * ptlrpc_import_state_name(enum lustre_imp_state state) { - static char* import_state_names[] = { - "", "CLOSED", "NEW", "DISCONN", - "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", + "", "CLOSED", "NEW", "DISCONN", + "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", "RECOVER", "FULL", "EVICTED", }; @@ -80,7 +79,7 @@ struct obd_import { spinlock_t imp_lock; /* flags */ - int imp_invalid:1, imp_replayable:1, + unsigned int imp_invalid:1, imp_replayable:1, imp_dlm_fake:1, imp_server_timeout:1, imp_initial_recov:1, imp_force_verify:1, imp_pingable:1, imp_resend_replay:1, diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h index 8152647..9266f13 100644 --- a/lustre/include/linux/lustre_log.h +++ b/lustre/include/linux/lustre_log.h @@ -40,6 +40,7 @@ #define LOG_NAME_LIMIT(logname, name) \ snprintf(logname, sizeof(logname), "LOGS/%s", name) +#define LLOG_EEMPTY 4711 struct plain_handle_data { struct list_head phd_entry; @@ -67,8 +68,6 @@ struct llog_handle { } u; }; -#define LLOG_EEMPTY 4711 - /* llog.c - general API */ typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *); int llog_init_handle(struct llog_handle *handle, int flags, @@ -89,7 +88,7 @@ struct llog_process_data { struct llog_process_cat_data { int first_idx; int last_idx; - /* to process catlog across zero record */ + /* to process catalog across zero record */ }; int llog_cat_put(struct llog_handle *cathandle); diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index 8e4abbf..d915fd6 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -394,7 +394,7 @@ struct lov_obd { struct lov_desc desc; int bufsize; int refcount; - int lo_catalog_loaded:1; + unsigned int lo_catalog_loaded:1; struct lov_tgt_desc *tgts; }; diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 8ddc85b..20a25ce 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -84,9 +84,7 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_MDS_DONE_WRITING_NET 0x126 #define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 #define OBD_FAIL_MDS_ALLOC_OBDO 0x128 -#if 0 /* added on b_cray */ #define OBD_FAIL_MDS_PAUSE_OPEN 0x129 -#endif #define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a #define OBD_FAIL_MDS_OPEN_CREATE 0x12b diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch index 74d556d..3133c62 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch @@ -1289,7 +1289,7 @@ Index: linux-2.4.20/fs/open.c - error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | - LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); ++ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); if (error) goto out; @@ -1686,7 +1686,7 @@ Index: linux-2.4.20/include/linux/fs.h #define ATTR_ATTR_FLAG 1024 +#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 ++#define ATTR_CTIME_SET 0x2000 /* * This is the Inode Attributes structure, used for notify_change(). It diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch index 9d70dab..3205465 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch @@ -1203,7 +1203,7 @@ Index: linux-2.4.24/fs/open.c - error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | - LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); ++ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); if (error) goto out; @@ -1660,7 +1660,7 @@ Index: linux-2.4.24/include/linux/fs.h #define ATTR_ATTR_FLAG 1024 +#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 ++#define ATTR_CTIME_SET 0x2000 /* * This is the Inode Attributes structure, used for notify_change(). It diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch index d6b6e52..4006c70 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.4.21-suse2.patch @@ -1217,7 +1217,7 @@ Index: linux-2.4.21-x86_64/fs/open.c - error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | - LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); ++ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); if (error) goto out; @@ -1676,7 +1676,7 @@ Index: linux-2.4.21-x86_64/include/linux/fs.h #define ATTR_ATTR_FLAG 1024 +#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -+#define ATTR_CTIME_SET 0x2000 ++#define ATTR_CTIME_SET 0x2000 /* * This is the Inode Attributes structure, used for notify_change(). It diff --git a/lustre/kernel_patches/series/chaos-2.4.21 b/lustre/kernel_patches/series/chaos-2.4.21 index 3e69962..97190b5 100644 --- a/lustre/kernel_patches/series/chaos-2.4.21 +++ b/lustre/kernel_patches/series/chaos-2.4.21 @@ -1,6 +1,5 @@ revert-76chaos.patch configurable-x86-stack-2.4.21-chaos.patch -configurable-x86_64-2.4.21.patch dev_read_only_2.4.21-chaos.patch exports_2.4.19-suse.patch lustre_version.patch diff --git a/lustre/kernel_patches/series/rhel-2.4.21 b/lustre/kernel_patches/series/rhel-2.4.21 index 817319b..cf623d5 100644 --- a/lustre/kernel_patches/series/rhel-2.4.21 +++ b/lustre/kernel_patches/series/rhel-2.4.21 @@ -1,5 +1,4 @@ configurable-x86-stack-2.4.21-chaos.patch -configurable-x86_64-2.4.21.patch dev_read_only_2.4.21-chaos.patch exports_2.4.19-suse.patch lustre_version.patch diff --git a/lustre/kernel_patches/series/suse-2.4.21-jvn b/lustre/kernel_patches/series/suse-2.4.21-jvn index 6751795..70ad932 100644 --- a/lustre/kernel_patches/series/suse-2.4.21-jvn +++ b/lustre/kernel_patches/series/suse-2.4.21-jvn @@ -24,7 +24,7 @@ ext3-no-write-super-chaos.patch add_page_private.patch nfs_export_kernel-2.4.21-suse2.patch ext3-raw-lookup.patch -ext3-ea-in-inode-2.4.21-suse2.patch +ext3-ea-in-inode-2.4.21-chaos.patch listman-2.4.20.patch ext3-truncate-buffer-head.patch lookup-stack-symbols-2.4.21-suse-171.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.24 b/lustre/kernel_patches/series/vanilla-2.4.24 index 735db03..4358e37 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.24 +++ b/lustre/kernel_patches/series/vanilla-2.4.24 @@ -24,7 +24,7 @@ tcp-zero-copy-2.4.22-rh.patch jbd-dont-account-blocks-twice.patch jbd-commit-tricks.patch ext3-no-write-super-chaos.patch -add_page_private.patch +add_page_private-2.4.24-vanilla.patch nfs_export_kernel-2.4.22.patch ext3-raw-lookup.patch ext3-ea-in-inode-2.4.22-rh.patch diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index c5f39a3..dd4ae47 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -602,10 +602,12 @@ void target_destroy_export(struct obd_export *exp) */ -static void target_release_saved_req(struct ptlrpc_request *req) +static void target_release_saved_req(struct ptlrpc_request *req) { - if (req->rq_reply_state != NULL) + if (req->rq_reply_state != NULL) { ptlrpc_rs_decref(req->rq_reply_state); + /* req->rq_reply_state = NULL; */ + } class_export_put(req->rq_export); OBD_FREE(req->rq_reqmsg, req->rq_reqlen); @@ -636,7 +638,7 @@ static void target_finish_recovery(struct obd_device *obd) struct ptlrpc_request *req; req = list_entry(tmp, struct ptlrpc_request, rq_list); list_del(&req->rq_list); - DEBUG_REQ(D_ERROR, req, "delayed:"); + DEBUG_REQ(D_WARNING, req, "delayed:"); ptlrpc_reply(req); target_release_saved_req(req); } @@ -862,6 +864,10 @@ static void process_recovery_queue(struct obd_device *obd) /* bug 1580: decide how to properly sync() in recovery */ //mds_fsync_super(mds->mds_sb); class_export_put(req->rq_export); + if (req->rq_reply_state != NULL) { + ptlrpc_rs_decref(req->rq_reply_state); + /* req->rq_reply_state = NULL; */ + } OBD_FREE(req->rq_reqmsg, req->rq_reqlen); OBD_FREE(req, sizeof *req); spin_lock_bh(&obd->obd_processing_task_lock); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 67ab95a..efad2b5 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -146,7 +146,7 @@ static int expired_lock_main(void *arg) CERROR("lock with free export on elt list %p\n", export); lock->l_export = NULL; - LDLM_ERROR(lock, "free export\n"); + LDLM_ERROR(lock, "free export"); continue; } export = class_export_get(lock->l_export); @@ -174,7 +174,6 @@ static void waiting_locks_callback(unsigned long unused) spin_lock_bh(&waiting_locks_spinlock); while (!list_empty(&waiting_locks_list)) { - lock = list_entry(waiting_locks_list.next, struct ldlm_lock, l_pending_chain); @@ -623,7 +622,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_BLOCKED, obd_timeout * 2); l_lock(&lock->l_resource->lr_namespace->ns_lock); if (req->rq_export->exp_failed) { - LDLM_ERROR(lock,"lock on destroyed export %p\n",req->rq_export); + LDLM_ERROR(lock, "lock on destroyed export %p", req->rq_export); l_unlock(&lock->l_resource->lr_namespace->ns_lock); GOTO(out, rc = -ENOTCONN); } diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index fae8d40..ed7e842 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -587,7 +587,7 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) ldlm_lock_cancel(lock); } else { if (lock->l_resource->lr_namespace->ns_client) { - LDLM_ERROR(lock, "Trying to cancel local lock\n"); + LDLM_ERROR(lock, "Trying to cancel local lock"); LBUG(); } LDLM_DEBUG(lock, "client-side local cancel"); diff --git a/lustre/ldlm/ldlm_test.c b/lustre/ldlm/ldlm_test.c index 7a5e066..1c3f0da 100644 --- a/lustre/ldlm/ldlm_test.c +++ b/lustre/ldlm/ldlm_test.c @@ -565,7 +565,7 @@ int ldlm_regression_start(struct obd_device *obddev, num_extents = num_extents_in; LDLM_DEBUG_NOLOCK("regression test started: threads: %d, max_locks: " - "%d, num_res: %d, num_ext: %d\n", + "%d, num_res: %d, num_ext: %d", threads, max_locks_in, num_resources_in, num_extents_in); diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index c384eed..03d9f23 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -157,6 +157,19 @@ restart: tmp = head; while ((tmp = tmp->next) != head) { struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); + + if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') { + CERROR("called on root (?) dentry=%p, inode=%p " + "ino=%lu\n", dentry, inode, inode->i_ino); + lustre_dump_dentry(dentry, 1); + portals_debug_dumpstack(NULL); + } else if (d_mountpoint(dentry)) { + CERROR("called on mountpoint (?) dentry=%p, inode=%p " + "ino=%lu\n", dentry, inode, inode->i_ino); + lustre_dump_dentry(dentry, 1); + portals_debug_dumpstack(NULL); + } + if (atomic_read(&dentry->d_count) == 0) { CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p " "inode %p\n", dentry->d_name.len, diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 62ffd4a..ee99808 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -131,7 +131,7 @@ static int ll_intent_file_open(struct file *file, void *lmm, rc = mdc_enqueue(sbi->ll_mdc_exp, LDLM_PLAIN, itp, LCK_PW, &data, &lockh, lmm, lmmsize, ldlm_completion_ast, - ll_mdc_blocking_ast, parent->d_inode); + ll_mdc_blocking_ast, NULL); if (rc < 0) CERROR("lock enqueue: err: %d\n", rc); RETURN(rc); @@ -611,7 +611,7 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp) lvb->lvb_ctime = LTIME_S(inode->i_ctime); LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64 - "atime "LPU64", mtime "LPU64", ctime "LPU64, + " atime "LPU64", mtime "LPU64", ctime "LPU64, inode->i_size, stripe, lvb->lvb_size, lvb->lvb_mtime, lvb->lvb_atime, lvb->lvb_ctime); GOTO(iput, 0); @@ -1084,11 +1084,12 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, case EXT3_IOC_GETFLAGS: case EXT3_IOC_SETFLAGS: RETURN( ll_iocontrol(inode, file, cmd, arg) ); + case EXT3_IOC_GETVERSION_OLD: + case EXT3_IOC_GETVERSION: + return put_user(inode->i_generation, (int *) arg); /* We need to special case any other ioctls we want to handle, * to send them to the MDS/OST as appropriate and to properly * network encode the arg field. - case EXT2_IOC_GETVERSION_OLD: - case EXT2_IOC_GETVERSION_NEW: case EXT2_IOC_SETVERSION_OLD: case EXT2_IOC_SETVERSION_NEW: */ diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 4075d6b..8708ca8 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -12,9 +12,12 @@ #include +extern struct list_head ll_super_blocks; +extern spinlock_t ll_sb_lock; + /* default to about 40meg of readahead on a given system. That much tied * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */ -#define SBI_DEFAULT_READAHEAD_MAX ((40UL << 20) >> PAGE_CACHE_SHIFT) +#define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT)) enum ra_stat { RA_STAT_HIT = 0, RA_STAT_MISS, @@ -36,6 +39,7 @@ struct ll_ra_info { }; struct ll_sb_info { + struct list_head ll_list; /* this protects pglist and ra_info. It isn't safe to * grab from interrupt contexts */ spinlock_t ll_lock; @@ -137,7 +141,7 @@ struct ll_async_page { struct page *llap_page; struct list_head llap_pending_write; /* only trust these if the page lock is providing exclusion */ - unsigned llap_write_queued:1, + unsigned int llap_write_queued:1, llap_defer_uptodate:1, llap_origin:3, llap_ra_used:1; @@ -183,6 +187,7 @@ int ll_writepage(struct page *page); void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa); void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc); int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction); +void ll_shrink_cache(int priority, unsigned int gfp_mask); void ll_removepage(struct page *page); int ll_readpage(struct file *file, struct page *page); struct ll_async_page *llap_from_cookie(void *cookie); @@ -247,6 +252,8 @@ int ll_iocontrol(struct inode *inode, struct file *file, void ll_umount_begin(struct super_block *sb); int ll_prep_inode(struct obd_export *exp, struct inode **inode, struct ptlrpc_request *req, int offset, struct super_block *); +void lustre_dump_dentry(struct dentry *, int recur); +void lustre_dump_inode(struct inode *); struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, struct list_head *list); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index fe911da..af4bca4 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -69,6 +69,10 @@ struct ll_sb_info *lustre_init_sbi(struct super_block *sb) generate_random_uuid(uuid); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); + + spin_lock(&ll_sb_lock); + list_add_tail(&sbi->ll_list, &ll_super_blocks); + spin_unlock(&ll_sb_lock); RETURN(sbi); } @@ -77,8 +81,12 @@ void lustre_free_sbi(struct super_block *sb) struct ll_sb_info *sbi = ll_s2sbi(sb); ENTRY; - if (sbi != NULL) + if (sbi != NULL) { + spin_lock(&ll_sb_lock); + list_del(&sbi->ll_list); + spin_unlock(&ll_sb_lock); OBD_FREE(sbi, sizeof(*sbi)); + } ll_s2sbi(sb) = NULL; EXIT; } @@ -236,6 +244,49 @@ out: RETURN(err); } +void lustre_dump_inode(struct inode *inode) +{ + struct list_head *tmp; + int dentry_count = 0; + + LASSERT(inode != NULL); + + list_for_each(tmp, &inode->i_dentry) + dentry_count++; + + CERROR("inode %p dump: dev=%s:%lu, mode=%o, count=%u, %d dentries\n", + inode, kdevname(inode->i_sb->s_dev), inode->i_ino, + inode->i_mode, atomic_read(&inode->i_count), dentry_count); +} + +void lustre_dump_dentry(struct dentry *dentry, int recur) +{ + struct list_head *tmp; + int subdirs = 0; + + LASSERT(dentry != NULL); + + list_for_each(tmp, &dentry->d_subdirs) + subdirs++; + + CERROR("dentry %p dump: name=%.*s parent=%.*s (%p), inode=%p, count=%u," + " flags=0x%x, vfs_flags=0x%lx, fsdata=%p, %d subdirs\n", dentry, + dentry->d_name.len, dentry->d_name.name, + dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, + dentry->d_parent, dentry->d_inode, atomic_read(&dentry->d_count), + dentry->d_flags, dentry->d_vfs_flags, dentry->d_fsdata, subdirs); + if (dentry->d_inode != NULL) + lustre_dump_inode(dentry->d_inode); + + if (recur == 0) + return; + + list_for_each(tmp, &dentry->d_subdirs) { + struct dentry *d = list_entry(tmp, struct dentry, d_child); + lustre_dump_dentry(d, recur - 1); + } +} + void lustre_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); @@ -258,9 +309,12 @@ void lustre_common_put_super(struct super_block *sb) // We do this to get rid of orphaned dentries. That is not really trw. hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash); - CWARN("orphan dentry %.*s (%p->%p) at unmount\n", + CWARN("found orphan dentry %.*s (%p->%p) at unmount, dumping " + "before and after shrink_dcache_parent\n", dentry->d_name.len, dentry->d_name.name, dentry, next); + lustre_dump_dentry(dentry, 1); shrink_dcache_parent(dentry); + lustre_dump_dentry(dentry, 1); } EXIT; } @@ -742,7 +796,7 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) inode = lock->l_ast_data; __LDLM_DEBUG(inode->i_state & I_FREEING ? D_INFO : D_WARNING, lock, - "l_ast_data %p is bogus: magic %08x\n", + "l_ast_data %p is bogus: magic %08x", lock->l_ast_data, lli->lli_inode_magic); inode = NULL; } @@ -757,7 +811,7 @@ static int null_if_equal(struct ldlm_lock *lock, void *data) lock->l_ast_data = NULL; if (lock->l_req_mode != lock->l_granted_mode) - LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); + LDLM_ERROR(lock,"clearing inode with ungranted lock"); } return LDLM_ITER_CONTINUE; diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index d39803c..fed41c3 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -513,13 +513,13 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v) llap->llap_origin); seq_printf(seq, "%5lu | %p %p %s %s %s | %p %p %lu %u [", - sbi->ll_pglist_gen, - llap, llap->llap_cookie, - llap_origins[llap->llap_origin], - llap->llap_write_queued ? "wq" : "- ", - llap->llap_defer_uptodate ? "du" : "- ", - page, page->mapping->host, page->index, - page_count(page)); + sbi->ll_pglist_gen, + llap, llap->llap_cookie, + llap_origins[llap->llap_origin], + llap->llap_write_queued ? "wq" : "- ", + llap->llap_defer_uptodate ? "du" : "- ", + page, page->mapping->host, page->index, + page_count(page)); seq_page_flag(seq, page, locked, has_flags); seq_page_flag(seq, page, error, has_flags); seq_page_flag(seq, page, referenced, has_flags); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 369ff51..05303eb 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -429,7 +429,7 @@ int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction) "%s%s%s%s origin %s\n", llap->llap_write_queued ? "wq " : "", PageDirty(page) ? "pd " : "", - PageUptodate(page) ? "" : "!pu", + PageUptodate(page) ? "" : "!pu ", llap->llap_defer_uptodate ? "" : "!du", llap_origins[llap->llap_origin]); continue; @@ -459,6 +459,14 @@ int llap_shrink_cache(struct ll_sb_info *sbi, int shrink_fraction) return count; } +void ll_shrink_cache(int priority, unsigned int gfp_mask) +{ + struct ll_sb_info *sbi; + + list_for_each_entry(sbi, &ll_super_blocks, ll_list) + llap_shrink_cache(sbi, priority); +} + struct ll_async_page *llap_from_page(struct page *page, unsigned origin) { struct ll_async_page *llap; diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c index ec30746..6ad6dcd 100644 --- a/lustre/llite/rw24.c +++ b/lustre/llite/rw24.c @@ -91,6 +91,7 @@ out: if (rc) { if (!lli->lli_async_rc) lli->lli_async_rc = rc; + /* re-dirty page on error so it retries write */ SetPageDirty(page); ClearPageLaunder(page); unlock_page(page); diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index bade134..409fbee 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -93,6 +93,8 @@ out: if (rc) { if (!lli->lli_async_rc) lli->lli_async_rc = rc; + /* re-dirty page on error so it retries write */ + SetPageDirty(page); unlock_page(page); } else { set_page_writeback(page); diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 2625872..3cccf42 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "llite_internal.h" #include @@ -39,6 +40,14 @@ extern struct address_space_operations ll_aops; extern struct address_space_operations ll_dir_aops; +LIST_HEAD(ll_super_blocks); +spinlock_t ll_sb_lock = SPIN_LOCK_UNLOCKED; + +static struct cache_definition llap_cache_definition = { + "llap_cache", + ll_shrink_cache +}; + static struct super_block *ll_read_super(struct super_block *sb, void *data, int silent) { @@ -102,6 +111,8 @@ static int __init init_lustre_lite(void) proc_lustre_fs_root = proc_lustre_root ? proc_mkdir("llite", proc_lustre_root) : NULL; + register_cache(&llap_cache_definition); + rc = register_filesystem(&lustre_lite_fs_type); if (rc == 0) rc = register_filesystem(&lustre_fs_type); @@ -115,6 +126,8 @@ static void __exit exit_lustre_lite(void) unregister_filesystem(&lustre_lite_fs_type); unregister_filesystem(&lustre_fs_type); + unregister_cache(&llap_cache_definition); + LASSERTF(kmem_cache_destroy(ll_file_data_slab) == 0, "couldn't destroy ll_file_data slab\n"); if (ll_async_page_slab) diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index 0f59335..92e4b39 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -136,15 +136,15 @@ int lov_update_enqueue_set(struct lov_request_set *set, if (tmp > lock->l_policy_data.l_extent.end) tmp = lock->l_policy_data.l_extent.end + 1; if (tmp >= loi->loi_kms) { - CDEBUG(D_INODE, "lock acquired, setting rss=" - LPU64", kms="LPU64"\n", loi->loi_rss, tmp); + LDLM_DEBUG(lock, "lock acquired, setting rss=" + LPU64", kms="LPU64, loi->loi_rss, tmp); loi->loi_kms = tmp; loi->loi_kms_valid = 1; } else { - CDEBUG(D_INODE, "lock acquired, setting rss=" - LPU64"; leaving kms="LPU64", end="LPU64 - "\n", loi->loi_rss, loi->loi_kms, - lock->l_policy_data.l_extent.end); + LDLM_DEBUG(lock, "lock acquired, setting rss=" + LPU64"; leaving kms="LPU64", end="LPU64, + loi->loi_rss, loi->loi_kms, + lock->l_policy_data.l_extent.end); } ldlm_lock_allow_match(lock); LDLM_LOCK_PUT(lock); diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 39aace8..13f7a16 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -35,6 +35,7 @@ #include #include #include +#include #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) #include #else @@ -52,7 +53,7 @@ #ifdef EXT3_MULTIBLOCK_ALLOCATOR #include #endif - + #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)) # define lock_24kernel() lock_kernel() # define unlock_24kernel() unlock_kernel() @@ -77,9 +78,6 @@ struct fsfilt_cb_data { #endif #define XATTR_LUSTRE_MDS_LOV_EA "lov" -#define EXT3_XATTR_INDEX_LUSTRE 5 /* old */ -#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid" /* old */ - /* * We don't currently need any additional blocks for rmdir and * unlink transactions because we are storing the OST oa_id inside @@ -144,7 +142,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, EXT3_DELETE_TRANS_BLOCKS * logs; break; default: CERROR("unknown transaction start op %d\n", op); - LBUG(); + LBUG(); } LASSERT(current->journal_info == desc_private); @@ -241,7 +239,7 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso, /* last_rcvd update */ needed += EXT3_DATA_TRANS_BLOCKS; -#if defined(CONFIG_QUOTA) && !defined(__x86_64__) /* XXX */ +#if defined(CONFIG_QUOTA) /* We assume that there will be 1 bit set in s_dquot.flags for each * quota file that is active. This is at least true for now. */ @@ -372,7 +370,7 @@ static int fsfilt_ext3_commit_wait(struct inode *inode, void *h) tid_t tid = (tid_t)(long)h; CDEBUG(D_INODE, "commit wait: %lu\n", (unsigned long) tid); - if (is_journal_aborted(EXT3_JOURNAL(inode))) + if (is_journal_aborted(EXT3_JOURNAL(inode))) return -EIO; log_wait_commit(EXT3_JOURNAL(inode), tid); @@ -440,73 +438,21 @@ static int fsfilt_ext3_iocontrol(struct inode * inode, struct file *file, RETURN(rc); } -#undef INLINE_EA -#undef OLD_EA static int fsfilt_ext3_set_md(struct inode *inode, void *handle, void *lmm, int lmm_size) { - int rc, old_ea = 0; + int rc; LASSERT(down_trylock(&inode->i_sem) != 0); -#ifdef INLINE_EA /* can go away before 1.0 - just for testing bug 2097 now */ - /* Nasty hack city - store stripe MD data in the block pointers if - * it will fit, because putting it in an EA currently kills the MDS - * performance. We'll fix this with "fast EAs" in the future. - */ - if (inode->i_blocks == 0 && lmm_size <= sizeof(EXT3_I(inode)->i_data) - - sizeof(EXT3_I(inode)->i_data[0])) { - unsigned old_size = EXT3_I(inode)->i_data[0]; - if (old_size != 0) { - LASSERT(old_size < sizeof(EXT3_I(inode)->i_data)); - CERROR("setting EA on %lu/%u again... interesting\n", - inode->i_ino, inode->i_generation); - } - - EXT3_I(inode)->i_data[0] = cpu_to_le32(lmm_size); - memcpy(&EXT3_I(inode)->i_data[1], lmm, lmm_size); - mark_inode_dirty(inode); - return 0; - } -#endif -#ifdef OLD_EA - /* keep this when we get rid of OLD_EA (too noisy during conversion) */ - if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */) { + if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */) CWARN("setting EA on %lu/%u again... interesting\n", inode->i_ino, inode->i_generation); - old_ea = 1; - } lock_kernel(); - /* this can go away before 1.0. For bug 2097 testing only. */ - rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0); -#else - lock_kernel(); rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED, XATTR_LUSTRE_MDS_LOV_EA, lmm, lmm_size, 0); - /* This tries to delete the old-format LOV EA, but only as long as we - * have successfully saved the new-format LOV EA (we can always try - * the conversion again the next time the file is accessed). It is - * possible (although unlikely) that the new-format LOV EA couldn't be - * saved because it ran out of space but we would need a file striped - * over least 123 OSTs before the two EAs filled a 4kB block. - * - * This can be removed when all filesystems have converted to the - * new EA format, but otherwise adds little if any overhead. If we - * wanted backward compatibility for existing files, we could keep - * the old EA around for a while but we'd have to clean it up later. */ - if (rc >= 0 && old_ea) { - int err = ext3_xattr_set_handle(handle, inode, - EXT3_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, - NULL, 0, 0); - if (err) - CERROR("error deleting old LOV EA on %lu/%u: rc %d\n", - inode->i_ino, inode->i_generation, err); - } -#endif unlock_kernel(); if (rc) @@ -522,61 +468,9 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size) LASSERT(down_trylock(&inode->i_sem) != 0); lock_kernel(); - /* Keep support for reading "inline EAs" until we convert - * users over to new format entirely. See bug 841/2097. */ - if (inode->i_blocks == 0 && EXT3_I(inode)->i_data[0]) { - unsigned size = le32_to_cpu(EXT3_I(inode)->i_data[0]); - void *handle; - - LASSERT(size < sizeof(EXT3_I(inode)->i_data)); - if (lmm) { - if (size > lmm_size) { - CERROR("inline EA on %lu/%u bad size %u > %u\n", - inode->i_ino, inode->i_generation, - size, lmm_size); - return -ERANGE; - } - memcpy(lmm, &EXT3_I(inode)->i_data[1], size); - } - -#ifndef INLINE_EA - /* migrate LOV EA data to external block - keep same format */ - CWARN("DEBUG: migrate inline EA for inode %lu/%u to block\n", - inode->i_ino, inode->i_generation); - - handle = journal_start(EXT3_JOURNAL(inode), - EXT3_XATTR_TRANS_BLOCKS); - if (!IS_ERR(handle)) { - int err; - rc = fsfilt_ext3_set_md(inode, handle, - &EXT3_I(inode)->i_data[1],size); - if (rc == 0) { - memset(EXT3_I(inode)->i_data, 0, - sizeof(EXT3_I(inode)->i_data)); - mark_inode_dirty(inode); - } - err = journal_stop(handle); - if (err && rc == 0) - rc = err; - } else { - rc = PTR_ERR(handle); - } -#endif - unlock_kernel(); - return size; - } rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, XATTR_LUSTRE_MDS_LOV_EA, lmm, lmm_size); - /* try old EA type if new one failed - MDS will convert it for us */ - if (rc == -ENODATA) { - CDEBUG(D_INFO,"failed new LOV EA %d/%s from inode %lu: rc %d\n", - EXT3_XATTR_INDEX_TRUSTED, XATTR_LUSTRE_MDS_LOV_EA, - inode->i_ino, rc); - - rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size); - } unlock_kernel(); /* This gives us the MD size */ @@ -585,7 +479,7 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size) if (rc < 0) { CDEBUG(D_INFO, "error getting EA %d/%s from inode %lu: rc %d\n", - EXT3_XATTR_INDEX_LUSTRE, XATTR_LUSTRE_MDS_OBJID, + EXT3_XATTR_INDEX_TRUSTED, XATTR_LUSTRE_MDS_LOV_EA, inode->i_ino, rc); memset(lmm, 0, lmm_size); return (rc == -ENODATA) ? 0 : rc; @@ -749,7 +643,7 @@ static int fsfilt_ext3_sync(struct super_block *sb) #define ext3_up_truncate_sem(inode) up(&EXT3_I(inode)->truncate_sem); #define ext3_down_truncate_sem(inode) down(&EXT3_I(inode)->truncate_sem); #endif - + #include #if EXT3_EXT_MAGIC == 0xf301 #define ee_start e_start @@ -771,17 +665,17 @@ struct bpointers { }; static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, - unsigned long block, int *aflags) + unsigned long block, int *aflags) { struct ext3_inode_info *ei = EXT3_I(inode); unsigned long bg_start; unsigned long colour; int depth; - + if (path) { struct ext3_extent *ex; depth = path->p_depth; - + /* try to predict block placement */ if ((ex = path[depth].p_ext)) { #if 0 @@ -795,18 +689,18 @@ static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, #endif return ex->ee_start + (block - ex->ee_block); } - + /* it looks index is empty * try to find starting from index itself */ if (path[depth].p_bh) return path[depth].p_bh->b_blocknr; } - + /* OK. use inode's group */ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); colour = (current->pid % 16) * - (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); + (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); return bg_start + colour + block; } @@ -822,16 +716,16 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, loff_t new_i_size; handle_t *handle; int i, aflags = 0; - + i = EXT_DEPTH(tree); EXT_ASSERT(i == path->p_depth); EXT_ASSERT(path[i].p_hdr); - + if (exist) { err = EXT_CONTINUE; goto map; } - + if (bp->create == 0) { i = 0; if (newex->ee_block < bp->start) @@ -841,21 +735,20 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, i, newex->ee_len); for (; i < newex->ee_len && bp->num; i++) { *(bp->created) = 0; - *(bp->created) = 0; bp->created++; *(bp->blocks) = 0; bp->blocks++; bp->num--; bp->start++; } - + return EXT_CONTINUE; } - + tgen = EXT_GENERATION(tree); count = ext3_ext_calc_credits_for_insert(tree, path); ext3_up_truncate_sem(inode); - + lock_24kernel(); handle = journal_start(EXT3_JOURNAL(inode), count + EXT3_ALLOC_NEEDED + 1); unlock_24kernel(); @@ -863,7 +756,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, ext3_down_truncate_sem(inode); return PTR_ERR(handle); } - + if (tgen != EXT_GENERATION(tree)) { /* the tree has changed. so path can be invalid at moment */ lock_24kernel(); @@ -872,7 +765,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, ext3_down_truncate_sem(inode); return EXT_REPEAT; } - + ext3_down_truncate_sem(inode); count = newex->ee_len; goal = ext3_ext_find_goal(inode, path, newex->ee_block, &aflags); @@ -881,7 +774,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, if (!pblock) goto out; EXT_ASSERT(count <= newex->ee_len); - + /* insert new extent */ newex->ee_start = pblock; newex->ee_len = count; @@ -898,7 +791,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, err = ext3_mark_inode_dirty(handle, inode); } } - + out: lock_24kernel(); journal_stop(handle); @@ -939,10 +832,10 @@ int fsfilt_map_nblocks(struct inode *inode, unsigned long block, struct ext3_extents_tree tree; struct bpointers bp; int err; - + CDEBUG(D_OTHER, "blocks %lu-%lu requested for inode %u\n", block, block + num, (unsigned) inode->i_ino); - + ext3_init_tree_desc(&tree, inode); tree.private = &bp; bp.blocks = blocks; @@ -950,12 +843,12 @@ int fsfilt_map_nblocks(struct inode *inode, unsigned long block, bp.start = block; bp.init_num = bp.num = num; bp.create = create; - + ext3_down_truncate_sem(inode); err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb); ext3_ext_invalidate_cache(&tree); ext3_up_truncate_sem(inode); - + return err; } @@ -967,10 +860,10 @@ int fsfilt_ext3_map_ext_inode_pages(struct inode *inode, struct page **page, int rc = 0, i = 0; struct page *fp = NULL; int clen = 0; - + CDEBUG(D_OTHER, "inode %lu: map %d pages from %lu\n", inode->i_ino, pages, (*page)->index); - + /* pages are sorted already. so, we just have to find * contig. space and process them properly */ while (i < pages) { @@ -987,20 +880,20 @@ int fsfilt_ext3_map_ext_inode_pages(struct inode *inode, struct page **page, i++; continue; } - + /* process found extent */ rc = fsfilt_map_nblocks(inode, fp->index * blocks_per_page, clen * blocks_per_page, blocks, created, create); if (rc) GOTO(cleanup, rc); - + /* look for next extent */ fp = NULL; blocks += blocks_per_page * clen; created += blocks_per_page * clen; } - + if (fp) rc = fsfilt_map_nblocks(inode, fp->index * blocks_per_page, clen * blocks_per_page, blocks, @@ -1019,7 +912,7 @@ int fsfilt_ext3_map_bm_inode_pages(struct inode *inode, struct page **page, int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; unsigned long *b; int rc = 0, i, *cr; - + for (i = 0, cr = created, b = blocks; i < pages; i++, page++) { rc = ext3_map_inode_page(inode, *page, b, cr, create); if (rc) { diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index a7f4d7c..6742dd6 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -1,7 +1,7 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * lustre/lib/fsfilt_ext3.c + * lustre/lib/lvfs_linux.c * Lustre filesystem abstraction routines * * Copyright (C) 2002, 2003 Cluster File Systems, Inc. @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -122,10 +121,10 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, if (uc->ouc_suppgid2 != -1) current_groups[current_ngroups++] = uc->ouc_suppgid2; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) - if (uc->ouc_suppgid1 != -1 && uc->ouc_suppgid2 != -1 - && (uc->ouc_suppgid1 > uc->ouc_suppgid2)) { - current_groups[0] = uc->ouc_suppgid2; - current_groups[1] = uc->ouc_suppgid1; + if (uc->ouc_suppgid1 != -1 && uc->ouc_suppgid2 != -1 && + (uc->ouc_suppgid1 > uc->ouc_suppgid2)) { + current_groups[0] = uc->ouc_suppgid2; + current_groups[1] = uc->ouc_suppgid1; } #endif } diff --git a/lustre/lvfs/lvfs_userfs.c b/lustre/lvfs/lvfs_userfs.c index 021a0a3..cbdb254 100644 --- a/lustre/lvfs/lvfs_userfs.c +++ b/lustre/lvfs/lvfs_userfs.c @@ -1,7 +1,7 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * lustre/lib/fsfilt_ext3.c + * lustre/lib/lvfs_userfs.c * Lustre filesystem abstraction routines * * Copyright (C) 2002, 2003 Cluster File Systems, Inc. diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 4b5ecc8..df96d28 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -171,7 +171,20 @@ int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, return 0; } - +static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc) +{ + /* Don't hold error requests for replay. */ + if (req->rq_replay) { + unsigned long irqflags; + spin_lock_irqsave(&req->rq_lock, irqflags); + req->rq_replay = 0; + spin_unlock_irqrestore(&req->rq_lock, irqflags); + } + if (rc && req->rq_transno != 0) { + DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc); + LBUG(); + } +} /* We always reserve enough space in the reply packet for a stripe MD, because * we don't know in advance the file type. */ @@ -306,7 +319,8 @@ int mdc_enqueue(struct obd_export *exp, rc = 0; } else if (rc != 0) { CERROR("ldlm_cli_enqueue: %d\n", rc); - LASSERT (rc < 0); + LASSERTF(rc < 0, "rc %d\n", rc); + mdc_clear_replay_flag(req, rc); ptlrpc_req_finished(req); RETURN(rc); } else { /* rc = 0 */ @@ -334,13 +348,8 @@ int mdc_enqueue(struct obd_export *exp, it->d.lustre.it_lock_mode = lock_mode; it->d.lustre.it_data = req; - if (it->d.lustre.it_status < 0 && req->rq_replay) { - LASSERT(req->rq_transno == 0); - /* Don't hold error requests for replay. */ - spin_lock(&req->rq_lock); - req->rq_replay = 0; - spin_unlock(&req->rq_lock); - } + if (it->d.lustre.it_status < 0 && req->rq_replay) + mdc_clear_replay_flag(req, it->d.lustre.it_status); DEBUG_REQ(D_RPCTRACE, req, "disposition: %x, status: %d", it->d.lustre.it_disposition, it->d.lustre.it_status); @@ -351,7 +360,7 @@ int mdc_enqueue(struct obd_export *exp, struct mds_body *body; body = lustre_swab_repbuf(req, 1, sizeof (*body), - lustre_swab_mds_body); + lustre_swab_mds_body); if (body == NULL) { CERROR ("Can't swab mds_body\n"); RETURN (-EPROTO); @@ -424,7 +433,7 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, ENTRY; LASSERT(it); - CDEBUG(D_DLMTRACE, "name: %.*s in inode "LPU64", intent: %s flags %#o\n", + CDEBUG(D_DLMTRACE,"name: %.*s in inode "LPU64", intent: %s flags %#o\n", len, name, pfid->id, ldlm_it2str(it->it_op), it->it_flags); if (cfid && (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)) { @@ -482,16 +491,9 @@ int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt, * It's important that we do this first! Otherwise we might exit the * function without doing so, and try to replay a failed create * (bug 3440) */ - if (it->it_op & IT_OPEN) { - if (!it_disposition(it, DISP_OPEN_OPEN) || - it->d.lustre.it_status != 0) { - unsigned long irqflags; - - spin_lock_irqsave(&request->rq_lock, irqflags); - request->rq_replay = 0; - spin_unlock_irqrestore(&request->rq_lock, irqflags); - } - } + if (it->it_op & IT_OPEN && request->rq_replay && + (!it_disposition(it, DISP_OPEN_OPEN) ||it->d.lustre.it_status != 0)) + mdc_clear_replay_flag(request, it->d.lustre.it_status); if (!it_disposition(it, DISP_IT_EXECD)) { /* The server failed before it even started executing the diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index ee65ca4..6e84268 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -953,10 +953,13 @@ static int mds_readpage(struct ptlrpc_request *req) struct obd_ucred uc; ENTRY; + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_READPAGE_PACK)) + RETURN(-ENOMEM); + rc = lustre_pack_reply(req, 1, &size, NULL); - if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_READPAGE_PACK)) { - CERROR("mds: out of memory\n"); - GOTO(out, rc = -ENOMEM); + if (rc) { + CERROR("mds: out of memory while packing readpage reply\n"); + RETURN(-ENOMEM); } body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_mds_body); @@ -1319,8 +1322,6 @@ int mds_handle(struct ptlrpc_request *req) LASSERT(current->journal_info == NULL); - EXIT; - /* If we're DISCONNECTing, the mds_export_data is already freed */ if (!rc && req->rq_reqmsg->opc != MDS_DISCONNECT) { struct mds_export_data *med = &req->rq_export->exp_mds_data; @@ -1329,6 +1330,8 @@ int mds_handle(struct ptlrpc_request *req) target_committed_to_req(req); } + + EXIT; out: if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) { @@ -1408,7 +1411,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) * and the rest of options are passed by mount options. Probably this * should be moved to somewhere else like startup scripts or lconf. */ sprintf(options, "iopen_nopriv"); - + if (lcfg->lcfg_inllen4 > 0 && lcfg->lcfg_inlbuf4) sprintf(options + strlen(options), ",%s", lcfg->lcfg_inlbuf4); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 589297a..543d296 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -60,7 +60,6 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "fstype", lprocfs_rd_fstype, 0, 0 }, { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 }, { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, - { "num_exports", lprocfs_rd_num_exports, 0, 0 }, { "evict_client", 0, lprocfs_wr_evict_client, 0 }, { "num_exports", lprocfs_rd_num_exports, 0, 0 }, { 0 } diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index c251d2f..503bc42 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -7,7 +7,7 @@ #include -#define MDS_SERVICE_WATCHDOG_TIMEOUT 30000 +#define MDS_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 1000) #define MAX_ATIME_DIFF 60 diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index dcd45d3..134680c 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -982,7 +982,7 @@ int mds_open(struct mds_update_record *rec, int offset, /* if we are following a symlink, don't open */ if (S_ISLNK(dchild->d_inode->i_mode)) - GOTO(cleanup, rc = 0); + GOTO(cleanup_no_trans, rc = 0); if ((rec->ur_flags & MDS_OPEN_DIRECTORY) && !S_ISDIR(dchild->d_inode->i_mode)) @@ -991,7 +991,7 @@ int mds_open(struct mds_update_record *rec, int offset, if (S_ISDIR(dchild->d_inode->i_mode)) { if (rec->ur_flags & MDS_OPEN_CREAT || rec->ur_flags & FMODE_WRITE) { - /*we are tryying to create or write a exist dir*/ + /* we are trying to create or write a exist dir */ GOTO(cleanup, rc = -EISDIR); } if (ll_permission(dchild->d_inode, acc_mode, NULL)) { @@ -1013,7 +1013,7 @@ int mds_open(struct mds_update_record *rec, int offset, cleanup: rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, req, rc, rep ? rep->lock_policy_res1 : 0); - + cleanup_no_trans: switch (cleanup_phase) { case 2: if (rc && created) { diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index b58c07a..7735318 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -106,7 +106,8 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, /* if the export has already been failed, we have no last_rcvd slot */ if (req->rq_export->exp_failed) { - CERROR("committing transaction for disconnected client\n"); + CWARN("committing transaction for disconnected client %s\n", + req->rq_export->exp_client_uuid.uuid); if (handle) GOTO(commit, rc); RETURN(rc); @@ -158,8 +159,8 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, } DEBUG_REQ(log_pri, req, - "wrote trans #"LPU64" client %s at idx %u: err = %d", - transno, mcd->mcd_uuid, med->med_idx, err); + "wrote trans #"LPU64" rc %d client %s at idx %u: err = %d", + transno, rc, mcd->mcd_uuid, med->med_idx, err); err = mds_lov_write_objids(obd); if (err) { diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 419d4c03..a943183 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -68,7 +68,6 @@ static void filter_commit_cb(struct obd_device *obd, __u64 transno, obd_transno_commit_cb(obd, transno, error); } - /* Assumes caller has already pushed us into the kernel context. */ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti, int rc) diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index bb8b1c1..ea2d3c0 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -315,8 +315,8 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, * checked elsewhere */ LASSERT(dreq->dr_npages <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES); if (dreq->dr_npages == 0) - GOTO(out, rc=0); - + RETURN(0); + rc = fsfilt_map_inode_pages(obd, inode, dreq->dr_pages, dreq->dr_npages, dreq->dr_blocks, @@ -326,7 +326,7 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, if (rw == OBD_BRW_WRITE) { if (rc == 0) { #if 0 - filter_tally_write(&obd->u.filter, + filter_tally_write(&obd->u.filter, dreq->dr_pages, dreq->dr_page_idx, dreq->dr_blocks, @@ -334,10 +334,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, #endif if (attr->ia_size > inode->i_size) attr->ia_valid |= ATTR_SIZE; - rc = fsfilt_setattr(obd, dchild, + rc = fsfilt_setattr(obd, dchild, oti->oti_handle, attr, 0); } - + up(&inode->i_sem); rc2 = filter_finish_transno(exp, oti, 0); @@ -370,8 +370,6 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, filter_clear_page_cache(inode, iobuf); RETURN(filter_do_bio(obd, inode, dreq, rw)); -out: - RETURN(rc); } /* See if there are unallocated parts in given file region */ diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index ebc5d64..924c03c 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -528,6 +528,11 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) req->rq_status = rc; ptlrpc_error(req); } else { + if (req->rq_reply_state != NULL) { + /* reply out callback would free */ + ptlrpc_rs_decref(req->rq_reply_state); + req->rq_reply_state = NULL; + } if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) { CERROR("bulk IO comms error: " "evicting %s@%s id %s\n", @@ -537,7 +542,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) ptlrpc_fail_export(req->rq_export); } else { CERROR("ignoring bulk IO comms error: " - "client reconnected %s@%s id %s\n", + "client reconnected %s@%s id %s\n", req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, req->rq_peerstr); @@ -727,6 +732,11 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) req->rq_status = rc; ptlrpc_error(req); } else { + if (req->rq_reply_state != NULL) { + /* reply out callback would free */ + ptlrpc_rs_decref(req->rq_reply_state); + req->rq_reply_state = NULL; + } if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) { CERROR("%s: bulk IO comm error evicting %s@%s id %s\n", req->rq_export->exp_obd->obd_name, @@ -791,7 +801,7 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd) npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb); if (npages < 0) GOTO (out, rc = npages); - + size[1] = npages * sizeof(*pp_rnb); rc = lustre_pack_reply(req, 2, size, NULL); if (rc) @@ -903,8 +913,6 @@ static int ost_filter_recovery_request(struct ptlrpc_request *req, } } - - static int ost_handle(struct ptlrpc_request *req) { struct obd_trans_info trans_info = { 0, }; @@ -919,8 +927,8 @@ static int ost_handle(struct ptlrpc_request *req) int abort_recovery, recovering; if (req->rq_export == NULL) { - CDEBUG(D_HA, "operation %d on unconnected OST\n", - req->rq_reqmsg->opc); + CDEBUG(D_HA,"operation %d on unconnected OST from %s\n", + req->rq_reqmsg->opc, req->rq_peerstr); req->rq_status = -ENOTCONN; GOTO(out, rc = -ENOTCONN); } @@ -993,14 +1001,14 @@ static int ost_handle(struct ptlrpc_request *req) GOTO(out, rc = -EROFS); rc = ost_brw_write(req, oti); LASSERT(current->journal_info == NULL); - /* ost_brw sends its own replies */ + /* ost_brw_write sends its own replies */ RETURN(rc); case OST_READ: CDEBUG(D_INODE, "read\n"); OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); rc = ost_brw_read(req, oti); LASSERT(current->journal_info == NULL); - /* ost_brw sends its own replies */ + /* ost_brw_read sends its own replies */ RETURN(rc); case OST_SAN_READ: CDEBUG(D_INODE, "san read\n"); @@ -1135,10 +1143,9 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ost->ost_service = ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, - OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, 30000, - ost_handle, "ost", - obd->obd_proc_entry, - ost_print_req); + OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, + obd_timeout * 1000, ost_handle, "ost", + obd->obd_proc_entry, ost_print_req); if (ost->ost_service == NULL) { CERROR("failed to start service\n"); GOTO(out_lprocfs, rc = -ENOMEM); @@ -1151,10 +1158,9 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ost->ost_create_service = ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, - OST_CREATE_PORTAL, OSC_REPLY_PORTAL, 30000, - ost_handle, "ost_create", - obd->obd_proc_entry, - ost_print_req); + OST_CREATE_PORTAL, OSC_REPLY_PORTAL, + obd_timeout * 1000, ost_handle, "ost_create", + obd->obd_proc_entry, ost_print_req); if (ost->ost_create_service == NULL) { CERROR("failed to start OST create service\n"); GOTO(out_service, rc = -ENOMEM); diff --git a/lustre/portals/libcfs/tracefile.c b/lustre/portals/libcfs/tracefile.c index aca4c41..f0c06e5 100644 --- a/lustre/portals/libcfs/tracefile.c +++ b/lustre/portals/libcfs/tracefile.c @@ -782,16 +782,16 @@ int trace_write_debug_mb(struct file *file, const char *buffer, max = simple_strtoul(string, NULL, 0); if (max == 0) return -EINVAL; - max /= smp_num_cpus; - if (max * smp_num_cpus > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5) { + if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) { printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " - "%d MB, which is more than 80%% of physical RAM " - "(%lu).\n", max * smp_num_cpus, - (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5); + "%dMB, which is more than 80%% of available RAM (%lu)\n", + max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5); return -EINVAL; } + max /= smp_num_cpus; + for (i = 0; i < NR_CPUS; i++) { struct trace_cpu_data *tcd; tcd = &trace_data[i].tcd; diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c index 16af0b5..2a23630 100644 --- a/lustre/portals/utils/debug.c +++ b/lustre/portals/utils/debug.c @@ -377,6 +377,7 @@ static int parse_buffer(FILE *in, FILE *out) int jt_dbg_debug_kernel(int argc, char **argv) { char filename[4096]; + struct stat st; int rc, raw = 0, fd; FILE *in, *out = stdout; @@ -390,18 +391,21 @@ int jt_dbg_debug_kernel(int argc, char **argv) } else if (argc > 1 && (argv[1][0] == '0' || argv[1][0] == '1')) { raw = atoi(argv[1]); argc--; - } else { - sprintf(filename, "%s.%lu.%u", argc > 1 ? argv[1] : - "/tmp/lustre-log", time(NULL), getpid()); } - unlink(filename); + /* If we are dumping raw (which means no conversion step to ASCII) + * then dump directly to any supplied filename, otherwise this is + * just a temp file and we dump to the real file at convert time. */ + if (argc > 1 && raw) + strcpy(filename, argv[1]); + else + sprintf(filename, "/tmp/lustre-log.%lu.%u",time(NULL),getpid()); + + if (stat(filename, &st) == 0 && S_ISREG(st.st_mode)) + unlink(filename); fd = open("/proc/sys/portals/dump_kernel", O_WRONLY); if (fd < 0) { - if (errno == ENOENT) /* no dump file created */ - return 0; - fprintf(stderr, "open(dump_kernel) failed: %s\n", strerror(errno)); return 1; @@ -421,6 +425,9 @@ int jt_dbg_debug_kernel(int argc, char **argv) in = fopen(filename, "r"); if (in == NULL) { + if (errno == ENOENT) /* no dump file created */ + return 0; + fprintf(stderr, "fopen(%s) failed: %s\n", filename, strerror(errno)); return 1; @@ -500,7 +507,7 @@ dbg_write_cmd(int fd, char *str) { int len = strlen(str); int rc = write(fd, str, len); - + return (rc == len ? 0 : 1); } @@ -522,7 +529,7 @@ int jt_dbg_debug_daemon(int argc, char **argv) strerror(errno)); return -1; } - + rc = -1; if (strcasecmp(argv[1], "start") == 0) { if (argc < 3 || argc > 4 || diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 2bd4f07..e5ed15f 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -502,7 +502,7 @@ static int after_reply(struct ptlrpc_request *req) if (req->rq_import->imp_replayable) { spin_lock_irqsave(&imp->imp_lock, flags); - if (req->rq_replay || req->rq_transno != 0) + if (req->rq_transno != 0) ptlrpc_retain_replayable_request(req, imp); else if (req->rq_commit_cb != NULL) req->rq_commit_cb(req); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index d10f12f..93dd97a 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -100,10 +100,10 @@ int ptlrpc_set_import_discon(struct obd_import *imp) spin_lock_irqsave(&imp->imp_lock, flags); if (imp->imp_state == LUSTRE_IMP_FULL) { - CERROR("%s: connection lost to %s@%s\n", - imp->imp_obd->obd_name, - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); + CWARN("%s: connection lost to %s@%s\n", + imp->imp_obd->obd_name, + imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); spin_unlock_irqrestore(&imp->imp_lock, flags); obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON); @@ -142,11 +142,6 @@ void ptlrpc_deactivate_import(struct obd_import *imp) * for all the RPC completions, and finally notify the obd to * invalidate its state (ie cancel locks, clear pending requests, * etc). - * - * in_rpc: true if this is called while processing an rpc, like - * CONNECT. It will allow for one RPC to be inflight while - * waiting for requests to complete. Ugly, yes, but I don't see an - * cleaner way right now. */ void ptlrpc_invalidate_import(struct obd_import *imp) { @@ -577,10 +572,10 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) GOTO(out, rc); IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); ptlrpc_activate_import(imp); - CERROR("%s: connection restored to %s@%s\n", - imp->imp_obd->obd_name, - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); + CWARN("%s: connection restored to %s@%s\n", + imp->imp_obd->obd_name, + imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); } if (imp->imp_state == LUSTRE_IMP_FULL) { diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 8c30c70..c1055ec 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -440,6 +440,7 @@ void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) svc->srv_stats = NULL; } } + void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) { if (obd->obd_svc_procroot) { diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 354c611..88f9694 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -115,22 +115,22 @@ static int ptlrpc_pinger_main(void *arg) spin_unlock_irqrestore(&imp->imp_lock, flags); if (imp->imp_next_ping <= this_ping || force) { - if (level == LUSTRE_IMP_DISCON && + if (level == LUSTRE_IMP_DISCON && !imp->imp_deactive) { - /* wait at least a timeout before + /* wait at least a timeout before trying recovery again. */ - imp->imp_next_ping = jiffies + + imp->imp_next_ping = jiffies + (obd_timeout * HZ); ptlrpc_initiate_recovery(imp); - } + } else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov) { - CDEBUG(D_HA, + CDEBUG(D_HA, "not pinging %s (in recovery " - " or recovery disabled: %s)\n", + "or recovery disabled: %s)\n", imp->imp_target_uuid.uuid, ptlrpc_import_state_name(level)); - } + } else if (imp->imp_pingable || force) { ptlrpc_ping(imp); } @@ -138,7 +138,7 @@ static int ptlrpc_pinger_main(void *arg) } else { if (imp->imp_pingable) CDEBUG(D_HA, "don't need to ping %s " - "(%lu > %lu)\n", + "(%lu > %lu)\n", imp->imp_target_uuid.uuid, imp->imp_next_ping, this_ping); } diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 71cfdfd..d40dfb4 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -35,20 +35,16 @@ # else # include # endif +# include +# include #else /* __KERNEL__ */ # include +# include #endif #include #include -#ifdef __KERNEL__ -# include -# include -#else -# include -#endif - #include #include /* for OBD_FAIL_CHECK */ #include diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 11d94e8..6239e5a 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -197,7 +197,7 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) llcd_put(ctxt->loc_llcd); ctxt->loc_llcd = NULL; } - CWARN("import will be destroyed, put " + CWARN("reverse import disconnected, put " "llcd %p:%p\n", ctxt->loc_llcd, ctxt); ctxt->loc_imp = NULL; up(&ctxt->loc_sem); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index b6bae88..6dc0e55 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -69,7 +69,7 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) argv[0], argv[1], argv[2], rc); } else { - CERROR("Invoked upcall %s %s %s\n", + CWARN("Invoked upcall %s %s %s\n", argv[0], argv[1], argv[2]); } } @@ -110,7 +110,7 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp) argv[0], argv[1], argv[2], argv[3], argv[4],rc); } else { - CERROR("Invoked upcall %s %s %s %s %s\n", + CWARN("Invoked upcall %s %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3], argv[4]); } #else @@ -279,7 +279,7 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) imp->imp_obd->obd_name, imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid); - + if (ptlrpc_set_import_discon(imp)) { if (!imp->imp_replayable) { CDEBUG(D_HA, "import %s@%s for %s not replayable, " @@ -293,14 +293,13 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) rc = ptlrpc_connect_import(imp, NULL); } - /* Wait for recovery to complete and resend. If evicted, then this request will be errored out later.*/ spin_lock_irqsave(&failed_req->rq_lock, flags); if (!failed_req->rq_no_resend) failed_req->rq_resend = 1; spin_unlock_irqrestore(&failed_req->rq_lock, flags); - + EXIT; } @@ -308,7 +307,7 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) * This should only be called by the ioctl interface, currently * with the lctl deactivate and activate commands. */ -int ptlrpc_set_import_active(struct obd_import *imp, int active) +int ptlrpc_set_import_active(struct obd_import *imp, int active) { struct obd_device *obd = imp->imp_obd; int rc = 0; @@ -337,10 +336,10 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid) { int rc; ENTRY; - + /* force import to be disconnected. */ ptlrpc_set_import_discon(imp); - + rc = ptlrpc_recover_import_no_retry(imp, new_uuid); RETURN(rc); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index dbb7aa75..4104d7f 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -374,8 +374,8 @@ ptlrpc_server_free_request(struct ptlrpc_request *req) list_del(&rqbd->rqbd_list); list_add_tail(&rqbd->rqbd_list, &svc->srv_history_rqbds); svc->srv_n_history_rqbds++; - - /* cull some history? + + /* cull some history? * I expect only about 1 or 2 rqbds need to be recycled here */ while (svc->srv_n_history_rqbds > svc->srv_max_history_rqbds) { rqbd = list_entry(svc->srv_history_rqbds.next, @@ -574,7 +574,7 @@ put_conn: } ptlrpc_server_free_request(request); - + RETURN(1); } @@ -956,7 +956,11 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc, */ rc = kernel_thread(ptlrpc_main, &d, CLONE_VM | CLONE_FILES); if (rc < 0) { - CERROR("cannot start thread: %d\n", rc); + CERROR("cannot start thread '%s': rc %d\n", name, rc); + + spin_lock_irqsave(&svc->srv_lock, flags); + list_del(&thread->t_link); + spin_unlock_irqrestore(&svc->srv_lock, flags); OBD_FREE(thread, sizeof(*thread)); RETURN(rc); } diff --git a/lustre/scripts/branch.sh b/lustre/scripts/branch.sh index 875c71d..5240fa9 100755 --- a/lustre/scripts/branch.sh +++ b/lustre/scripts/branch.sh @@ -9,6 +9,7 @@ fi parent=$1 child=$2 CHILD=`echo $child | sed -e "s/^b_//" | tr "[a-z]" "[A-Z]"` +date=`date +%Y%m%d_%H%M` module=lustre case $parent in @@ -29,11 +30,15 @@ fi echo parent: $parent CHILD: $CHILD child: $child date: $date -echo -n "tagging $parent as '${CHILD}_BASE' ...." -$CVS rtag -r $parent ${CHILD}_BASE $module +echo -n "tagging $parent as '${CHILD}_BASE_$date' ..." +$CVS rtag -r $parent ${CHILD}_BASE_$date $module +echo "done" +echo -n "tagging ${CHILD}_BASE_$date as '${CHILD}_BASE' ...." +$CVS rtag -r ${CHILD}_BASE_$date ${CHILD}_BASE $module echo "done" echo -n "branching $child at ${CHILD}_BASE' ...." $CVS rtag -b -r ${CHILD}_BASE $child $module +echo "done" echo -n "updating to $child ...." $CVS update -r $child echo "done" diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in index 6215e7a..3d036a7 100644 --- a/lustre/scripts/lustre.spec.in +++ b/lustre/scripts/lustre.spec.in @@ -87,7 +87,8 @@ cd $RPM_BUILD_DIR/lustre-%{version} --with-linux='%{linuxdir}' \ %{disable_doc} --disable-liblustre \ --sysconfdir=%{_sysconfdir} \ - --mandir=%{_mandir} + --mandir=%{_mandir} \ + --libdir=%{_libdir} make -j $RPM_BUILD_NCPUS -s %install @@ -144,13 +145,14 @@ mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre %attr(-, root, root) /usr/bin/lstripe %attr(-, root, root) /usr/bin/mcreate %attr(-, root, root) /usr/bin/munlink -%attr(-, root, root) /usr/lib/lustre/python +%attr(-, root, root) %{_libdir}/lustre/python %attr(-, root, root) /usr/share/lustre/examples %attr(-, root, root) /etc/init.d/lustre %attr(-, root, root) /etc/init.d/lustrefs -%attr(-, root, root) /usr/lib/libptlctl.a -%attr(-, root, root) /usr/lib/liblustreapi.a + +%attr(-, root, root) %{_libdir}/libptlctl.a +%attr(-, root, root) %{_libdir}/liblustreapi.a %attr(-, root, root) /usr/include/lustre %attr(-, root, root) /usr/include/portals %attr(-, root, root) /usr/include/linux/lustre_idl.h @@ -202,8 +204,8 @@ mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre %files -n lustre-ldap %attr(-, root, root) /etc/openldap/slapd-lustre.conf %attr(-, root, root) /etc/openldap/schema/lustre.schema -%attr(-, root, root) /usr/lib/lustre/lustre2ldif.xsl -%attr(-, root, root) /usr/lib/lustre/top.ldif +%attr(-, root, root) /usr/share/lustre/lustre2ldif.xsl +%attr(-, root, root) /usr/share/lustre/top.ldif #%dir /var/lib/ldap/lustre %attr(700, ldap, ldap) /var/lib/ldap/lustre diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index 3527f2a..abba4802 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -169,5 +169,9 @@ if [ "$REPLAY_DUAL" != "no" ]; then sh replay-dual.sh fi +if [ "$INSANITY" != "no" ]; then + sh insanity.sh -r +fi + RC=$? echo "completed with rc $RC" && exit $RC diff --git a/lustre/tests/cfg/insanity-ltest.sh b/lustre/tests/cfg/insanity-ltest.sh index bc41e3c..fe63e81 100644 --- a/lustre/tests/cfg/insanity-ltest.sh +++ b/lustre/tests/cfg/insanity-ltest.sh @@ -31,7 +31,7 @@ LIVE_CLIENT=${LIVE_CLIENT:-${CLIENT1}} # This should always be a list, not a regexp FAIL_CLIENTS=${FAIL_CLIENTS:-"`all_but_one_clients`"} -NETTYPE=${NETTYPE:-${NETWORKTYPE}} +NETTYPE=${NETTYPE:-${NETTYPE}} TIMEOUT=${TIMEOUT:-30} PTLDEBUG=${PTLDEBUG:-0} diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh index 9af8621..71ff93b 100644 --- a/lustre/tests/cfg/local.sh +++ b/lustre/tests/cfg/local.sh @@ -27,7 +27,7 @@ FSTYPE=${FSTYPE:-ext3} TIMEOUT=${TIMEOUT:-20} UPCALL=${UPCALL:-DEFAULT} -STRIPE_BYTES=${STRIPE_BYTES:-65536} +STRIPE_BYTES=${STRIPE_BYTES:-1048576} STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0} FAILURE_MODE=${FAILURE_MODE:-SOFT} # or HARD diff --git a/lustre/tests/createmany.c b/lustre/tests/createmany.c index 1018ca5..466c156 100644 --- a/lustre/tests/createmany.c +++ b/lustre/tests/createmany.c @@ -83,7 +83,7 @@ int main(int argc, char ** argv) break; } close(fd); - } else if (do_link) { + } else if (do_link) { rc = link(tgt, filename); if (rc) { printf("link(%s, %s) error: %s\n", diff --git a/lustre/tests/echo.sh b/lustre/tests/echo.sh index 67dd27e..b937c17 100755 --- a/lustre/tests/echo.sh +++ b/lustre/tests/echo.sh @@ -29,7 +29,7 @@ h2gm () { } h2elan () { - echo $1 | sed 's/[^0-9]*//g' + echo $1 | sed 's/[^0-9]*//g' } # FIXME: make LMC not require MDS for obdecho LOV @@ -37,7 +37,7 @@ MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=10000 FSTYPE=${FSTYPE:-ext3} -STRIPE_BYTES=65536 +STRIPE_BYTES=1048576 STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs rm -f $config diff --git a/lustre/tests/ll_dirstripe_verify.c b/lustre/tests/ll_dirstripe_verify.c index bfbe7bc..56eea6e 100644 --- a/lustre/tests/ll_dirstripe_verify.c +++ b/lustre/tests/ll_dirstripe_verify.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,27 @@ #define MAX_LOV_UUID_COUNT 1000 +int read_proc_entry(char *proc_path, char *buf, int len) +{ + int rcnt = -1, fd; + + if ((fd = open(proc_path, O_RDONLY)) == -1) { + fprintf(stderr, "open('%s') failed: %s\n", + proc_path, strerror(errno)); + rcnt = -1; + } else if ((rcnt = read(fd, buf, len)) <= 0) { + fprintf(stderr, "read('%s') failed: %s\n", + proc_path, strerror(errno)); + } else { + buf[rcnt - 1] = '\0'; + } + + if (fd >= 0) + close(fd); + + return (rcnt); +} + int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, struct lov_user_md *lum_file2) { @@ -34,85 +56,62 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, int stripe_size; int stripe_offset; int ost_count; - int fd; - char buf[32]; + char buf[128]; + char lov_path[PATH_MAX]; + char tmp_path[PATH_MAX]; int i; + if (read_proc_entry("/proc/fs/lustre/llite/fs0/lov/common_name", + buf, sizeof(buf)) <= 0) + return -1; + + snprintf(lov_path, sizeof(lov_path) - 1, "/proc/fs/lustre/lov/%s", buf); + stripe_count = (int)lum_dir->lmm_stripe_count; if (stripe_count == 0) { - fd = open("/proc/fs/lustre/lov/lov1/stripecount", O_RDONLY); - if (fd == -1) { - fprintf(stderr, "open proc file error: %s\n", - strerror(errno)); - return -1; - } - if (read(fd, buf, sizeof(buf)) == -1) { - fprintf(stderr, "read proc file error: %s\n", - strerror(errno)); - close(fd); + snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/stripecount", lov_path); + if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0) return -1; - } - + stripe_count = atoi(buf); - stripe_count = stripe_count ? stripe_count : 1; - close(fd); } stripe_size = (int)lum_dir->lmm_stripe_size; if (stripe_size == 0) { - fd = open("/proc/fs/lustre/lov/lov1/stripesize", O_RDONLY); - if (fd == -1) { - fprintf(stderr, "open proc file error: %s\n", - strerror(errno)); - return -1; - } - if (read(fd, buf, sizeof(buf)) == -1) { - fprintf(stderr, "read proc file error: %s\n", - strerror(errno)); - close(fd); + snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/stripesize", lov_path); + if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0) return -1; - } stripe_size = atoi(buf); - close(fd); } - fd = open("/proc/fs/lustre/lov/lov1/numobd", O_RDONLY); - if(fd == -1) { - fprintf(stderr, "open proc file error: %s\n", - strerror(errno)); - return -1; - } - if (read(fd, buf, sizeof(buf)) == -1) { - fprintf(stderr, "read proc file error: %s\n", - strerror(errno)); - close(fd); + snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/numobd", lov_path); + if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0) return -1; - } ost_count = atoi(buf); - close(fd); + stripe_count = stripe_count ? stripe_count : ost_count; if ((lum_file1->lmm_stripe_count != stripe_count) || (lum_file1->lmm_stripe_size != stripe_size)) return -1; - + stripe_offset = (short int)lum_dir->lmm_stripe_offset; if (stripe_offset != -1) { for (i = 0; i < stripe_count; i++) - if (lum_file1->lmm_objects[i].l_ost_idx != - (stripe_offset + i) % ost_count) + if (lum_file1->lmm_objects[i].l_ost_idx != + (stripe_offset + i) % ost_count) return -1; } else if (lum_file2 != NULL) { int next, idx; next = (lum_file1->lmm_objects[stripe_count-1].l_ost_idx + 1) % ost_count; idx = lum_file2->lmm_objects[0].l_ost_idx; - if (idx != next) + if (idx != next) return -1; } - return 0; + return 0; } int main(int argc, char **argv) @@ -139,7 +138,7 @@ int main(int argc, char **argv) if ((lum_dir = (struct lov_user_md *)malloc(lum_size)) == NULL) { fprintf(stderr, "unable to allocate memory for ioctl's"); return errno; - } + } rc = ioctl(dirfd(dir), LL_IOC_LOV_GETSTRIPE, lum_dir); if (rc) { @@ -150,7 +149,7 @@ int main(int argc, char **argv) } else { rc = errno; goto cleanup; - } + } } if ((lum_file1 = (struct lov_user_md *)malloc(lum_size)) == NULL) { @@ -169,9 +168,9 @@ int main(int argc, char **argv) } if (argc == 4) { - if ((lum_file2 = (struct lov_user_md *)malloc(lum_size)) - == NULL) { - fprintf(stderr, + lum_file2 = (struct lov_user_md *)malloc(lum_size); + if (lum_file2 == NULL) { + fprintf(stderr, "unable to allocate memory for ioctl's"); rc = errno; goto cleanup; diff --git a/lustre/tests/llmountcleanup.sh b/lustre/tests/llmountcleanup.sh index c678df8..7c907b0 100755 --- a/lustre/tests/llmountcleanup.sh +++ b/lustre/tests/llmountcleanup.sh @@ -28,7 +28,6 @@ fi [ "$NODE" ] && node_opt="--node $NODE" -sync; sleep 2; sync [ "$MOUNT2" ] && umount $MOUNT2 ${LCONF} $NOMOD $portals_opt $lustre_opt $node_opt --cleanup $@ \ diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index 2d37a47..d231c41 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -12,7 +12,7 @@ MDSSIZE=${MDSSIZE:-400000} FSTYPE=${FSTYPE:-ext3} MOUNT=${MOUNT:-/mnt/lustre} MOUNT2=${MOUNT2:-${MOUNT}2} -NETWORKTYPE=${NETWORKTYPE:-tcp} +NETTYPE=${NETTYPE:-tcp} OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`} OSTSIZE=${OSTSIZE:-400000} @@ -23,15 +23,15 @@ JSIZE=${JSIZE:-0} MDSISIZE=${MDSISIZE:-0} [ "$MDSISIZE" -gt 0 ] && IARG="--inode_size $MDSISIZE" -STRIPE_BYTES=65536 +STRIPE_BYTES=${STRIPE_BYTES:-1048576} STRIPES_PER_OBJ=0 # 0 means stripe over all OSTs rm -f $config # create nodes ${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid `hostname` --nettype $NETWORKTYPE || exit 11 -${LMC} --add net --node client --nid '*' --nettype $NETWORKTYPE || exit 12 +${LMC} --add net --node localhost --nid `hostname` --nettype $NETTYPE || exit 11 +${LMC} --add net --node client --nid '*' --nettype $NETTYPE || exit 12 # configure mds server ${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE $JARG $IARG || exit 20 diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index e370474..f9fceb0 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -23,7 +23,7 @@ OSTSIZE=${OSTSIZE:-150000} # 1 to config an echo client instead of llite ECHO_CLIENT=${ECHO_CLIENT:-} -STRIPE_BYTES=65536 +STRIPE_BYTES=${STRIPE_BYTES:-1048576} STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-$((OSTCOUNT -1))} # specific journal size for the ost, in MB diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index 776eaea..713c20f 100755 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -45,7 +45,11 @@ char usage[] = " Y fdatasync\n" " z seek to zero\n"; -void null_handler(int unused) { } +static int usr1_received; +void usr1_handler(int unused) +{ + usr1_received = 1; +} static const char * pop_arg(int argc, char *argv[]) @@ -73,14 +77,17 @@ int main(int argc, char **argv) exit(1); } - signal(SIGUSR1, null_handler); + signal(SIGUSR1, usr1_handler); fname = argv[1]; for (commands = argv[2]; *commands; commands++) { switch (*commands) { case '_': - pause(); + if (usr1_received == 0) + pause(); + usr1_received = 0; + signal(SIGUSR1, usr1_handler); break; case 'c': if (close(fd) == -1) { diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh index a70c5f3..c783fe0 100755 --- a/lustre/tests/recovery-cleanup.sh +++ b/lustre/tests/recovery-cleanup.sh @@ -27,6 +27,7 @@ MDSSIZE=${MDSSIZE:-100000} FSTYPE=${FSTYPE:-ext3} OSTDEV=${OSTDEV:-/tmp/ost1-`hostname`} OSTSIZE=${OSTSIZE:-100000} +STRIPE_BYTES=${STRIPE_BYTES:-1048576} do_mds() { $PDSH $MDSNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@" || exit $? @@ -54,7 +55,7 @@ make_config() { done lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE \ --dev $MDSDEV --size $MDSSIZE || exit 5 - lmc -m $CONFIG --add lov --lov lov1 --mds mds1 --stripe_sz 65536 \ + lmc -m $CONFIG --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES \ --stripe_cnt 0 --stripe_pattern 0 || exit 6 lmc -m $CONFIG --add ost --nspath /mnt/ost_ns --node $OSTNODE \ --lov lov1 --dev $OSTDEV --size $OSTSIZE --fstype $FSTYPE || exit 7 @@ -108,7 +109,7 @@ wait_for_timeout() { try_to_cleanup() { kill -INT $! - unmount_client --force --dump /tmp/client-cleanup-`date +%s`.log + unmount_client --force --dump $TMP/recovery-cleanup-`hostname`.log mount_client --timeout=${TIMEOUT:-5} --lustre_upcall=/bin/true } @@ -141,4 +142,4 @@ try_to_cleanup drop_request "munlink /mnt/lustre/link1" & wait_for_timeout try_to_cleanup -$CLEANUP '--dump /tmp/`hostname`-cleanup.log' +$CLEANUP '--dump $TMP/recovery-cleanup-`hostname`.log' diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 837f328..91672cd 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -45,8 +45,8 @@ setup() { cleanup() { zconf_umount `hostname` $MOUNT stop mds ${FORCE} $MDSLCONFARGS - stop ost2 ${FORCE} --dump cleanup.log - stop ost ${FORCE} --dump cleanup.log + stop ost2 ${FORCE} + stop ost ${FORCE} --dump $TMP/recovery-small-`hostname`.log } replay() { diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index c313a98..2b0682a 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -43,7 +43,7 @@ cleanup() { rmmod llite stop mds ${FORCE} stop ost2 ${FORCE} - stop ost ${FORCE} --dump cleanup-dual.log + stop ost ${FORCE} --dump $TMP/replay-dual-`hostname`.log } if [ "$ONLY" == "cleanup" ]; then @@ -362,7 +362,7 @@ run_test 17 "fail OST during recovery (3571)" export NOW=0 test_18() { # bug 3822 - evicting client with enqueued lock - set -vx + #set -vx mkdir -p $MOUNT1/$tdir touch $MOUNT1/$tdir/f0 #define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index b0f39ef..19559e1 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -36,7 +36,7 @@ cleanup() { fi zconf_umount `hostname` $MOUNT stop mds ${FORCE} $MDSLCONFARGS - stop ost ${FORCE} --dump cleanup.log + stop ost ${FORCE} --dump $TMP/replay-ost-single-`hostname`.log } if [ "$ONLY" == "cleanup" ]; then diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 9f25515..97e1b60 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -42,8 +42,8 @@ cleanup() { fi zconf_umount `hostname` $MOUNT stop mds ${FORCE} $MDSLCONFARGS - stop ost2 ${FORCE} --dump cleanup.log - stop ost ${FORCE} --dump cleanup.log + stop ost2 ${FORCE} + stop ost ${FORCE} --dump $TMP/replay-single-`hostname`.log } if [ "$ONLY" == "cleanup" ]; then @@ -857,8 +857,7 @@ test_43() { # OBD_FAIL_OST_CREATE_NET 0x204 do_facet ost "sysctl -w lustre.fail_loc=0x80000204" - facet_failover mds - df $MOUNT || return 1 + fail mds sleep 10 do_facet ost "sysctl -w lustre.fail_loc=0" @@ -978,8 +977,11 @@ test_52() { } run_test 52 "time out lock replay (3764)" +#b_cray 53 "|X| open request and close reply while two MDC requests in flight" +#b_cray 54 "|X| open request and close reply while two MDC requests in flight" + #b3761 ASSERTION(hash != 0) failed -test_53() { +test_55() { # OBD_FAIL_MDS_OPEN_CREATE | OBD_FAIL_ONCE do_facet mds "sysctl -w lustre.fail_loc=0x8000012b" touch $DIR/$tfile & @@ -989,7 +991,17 @@ test_53() { rm $DIR/$tfile return 0 } -run_test 53 "let MDS_CHECK_RESENT return the original return code instead of 0" +run_test 55 "let MDS_CHECK_RESENT return the original return code instead of 0" + +#b3440 ASSERTION(rec->ur_fid2->id) failed +test_56() { + ln -s foo $DIR/$tfile + replay_barrier mds + #drop_reply "cat $DIR/$tfile" + fail mds + sleep 10 +} +run_test 56 "don't replay a symlink open request (3440)" equals_msg test complete, cleaning up $CLEANUP diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c index 3b76af6..aa0bedc 100644 --- a/lustre/tests/runas.c +++ b/lustre/tests/runas.c @@ -18,7 +18,7 @@ #endif static const char usage[] = -"Usage: %s -u user_id [-g grp_id ] [ -G[gid0,gid1,...] ] command\n" +"Usage: %s -u user_id [-g grp_id] [-G[gid0,gid1,...]] command\n" " -u user_id switch to UID user_id\n" " -g grp_id switch to GID grp_id\n" " -G[gid0,gid1,...] set supplementary groups\n"; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index e2cda44..f484a77 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -14,7 +14,7 @@ ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42c 45"} [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT" SRCDIR=`dirname $0` -export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH +export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH:/sbin TMP=${TMP:-/tmp} FSTYPE=${FSTYPE:-ext3} @@ -113,11 +113,11 @@ build_test_filter() { } _basetest() { - echo $* + echo $* } basetest() { - IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 + IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 } run_test() { @@ -444,7 +444,7 @@ test_16() { run_test 16 "touch .../d16/f; rm -rf .../d16/f =================" test_17a() { - mkdir $DIR/d17 + mkdir -p $DIR/d17 touch $DIR/d17/f ln -s $DIR/d17/f $DIR/d17/l-exist ls -l $DIR/d17 @@ -456,9 +456,7 @@ test_17a() { run_test 17a "symlinks: create, remove (real) ==================" test_17b() { - if [ ! -d $DIR/d17 ]; then - mkdir $DIR/d17 - fi + mkdir -p $DIR/d17 ln -s no-such-file $DIR/d17/l-dangle ls -l $DIR/d17 $CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error @@ -468,6 +466,20 @@ test_17b() { } run_test 17b "symlinks: create, remove (dangling) ==============" +test_17c() { # bug 3440 - don't save failed open RPC for replay + mkdir -p $DIR/d17 + ln -s foo $DIR/d17/f17c + cat $DIR/d17/f17c && error "opened non-existent symlink" || true +} +run_test 17c "symlinks: open dangling (should return error) ====" + +test_17d() { + mkdir -p $DIR/d17 + ln -s foo $DIR/d17/f17d + touch $DIR/d17/f17d || error "creating to new symlink" +} +run_test 17d "symlinks: create dangling ========================" + test_18() { touch $DIR/f ls $DIR || error @@ -889,7 +901,7 @@ test_27m() { dd if=/dev/zero of=$DIR/d27/f27m_1 bs=1024 count=$MAXFREE && \ error "dd should fill OST0" i=2 - while $LSTRIPE $DIR/d27/f27m_$i 0 0 1 ; do + while $LSTRIPE $DIR/d27/f27m_$i 0 0 1 ; do i=`expr $i + 1` [ $i -gt 256 ] && break done @@ -1008,7 +1020,7 @@ test_31f() { # bug 4554 lfs getstripe $DIR/d31f/hosts multiop $DIR/d31f D_c & MULTIPID2=$! - + sleep 6 kill -USR1 $MULTIPID || error "first opendir $MULTIPID not running" @@ -1737,7 +1749,7 @@ test_48d() { # bug 2350 run_test 48d "Access removed parent subdir (should return errors)" test_48e() { # bug 4134 -# check_kernel_version 36 || return 0 + check_kernel_version 41 || return 0 #sysctl -w portals.debug=-1 #set -vx mkdir -p $DIR/d48e/dir @@ -1752,7 +1764,7 @@ test_48e() { # bug 4134 $TRACE wait $cdpid && error "'cd ..' worked after recreate parent" $TRACE rm $DIR/d48e || error "'$DIR/d48e' failed" } -run_test 48e "Access to removed and recreated parent subdir (should return errors)" +run_test 48e "Access to recreated parent subdir (should return errors)" test_50() { # bug 1485 @@ -2310,13 +2322,12 @@ test_99f() { run_test 99f "cvs commit =======================================" test_100() { - netstat -ta | while read PROT SND RCV LOCAL REMOTE STAT; do - LPORT=`echo $LOCAL | cut -d: -f2` + netstat -tna | while read PROT SND RCV LOCAL REMOTE STAT; do + [ "$PROT" != "tcp" ] && continue RPORT=`echo $REMOTE | cut -d: -f2` - if [ "$PROT" = "tcp" ] && [ "$LPORT" != "*" ] && [ "$RPORT" != "*" ] && [ $RPORT -eq 988 ] && [ $LPORT -gt 1024 ]; then - echo "local port: $LPORT > 1024" - error - fi + [ "$RPORT" != "988" ] && continue + LPORT=`echo $LOCAL | cut -d: -f2` + [ $LPORT -ge 1024 ] && error "local port: $LPORT > 1024" || true done } run_test 100 "check local port using privileged port ===========" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 72e8451..ddbed84 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -475,13 +475,14 @@ clear_failloc() { } cancel_lru_locks() { - $LCTL mark cancel_lru_locks + $LCTL mark "cancel_lru_locks start" for d in /proc/fs/lustre/ldlm/namespaces/$1*; do if [ -f $d/lru_size ]; then echo clear > $d/lru_size grep [0-9] $d/lock_unused_count fi done + $LCTL mark "cancel_lru_locks stop" } diff --git a/lustre/tests/unlinkmany.c b/lustre/tests/unlinkmany.c index 45f0d15..080b1c6 100644 --- a/lustre/tests/unlinkmany.c +++ b/lustre/tests/unlinkmany.c @@ -27,8 +27,8 @@ int main(int argc, char ** argv) return 1; } - if (strcmp(argv[1], "-d") == 0) { - do_rmdir = 1; + if (strcmp(argv[1], "-d") == 0) { + do_rmdir = 1; argv++; argc--; } diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 276a2b2..e45650d 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -249,13 +249,13 @@ class DaemonHandler: pid = int(fp.read()) fp.close() return pid - except IOError: - return 0 except ValueError: print "WARNING: invalid pid in %s, removed" % self.pidfile() - print "WARNING: You may need to stop acceptor by yourself and then unload the module libcfs" + print "WARNING: You may need to stop acceptor by yourself" os.unlink(self.pidfile()) return 0 + except IOError: + return 0 def clean_pidfile(self): """ Remove a stale pidfile """ @@ -858,7 +858,7 @@ def loop_base(): if not os.access(loop + str(0), os.R_OK): loop = loop + '/' if not os.access(loop + str(0), os.R_OK): - panic ("can't access loop devices") + panic("can't access loop devices") return loop # find loop device assigned to thefile @@ -989,7 +989,7 @@ def sys_get_local_address(net_type, wildcard, cluster_id): # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()' f = sys_get_elan_position_file() if not f: - panic ("unable to determine local Elan ID") + panic("unable to determine local Elan ID") try: fp = open(f, 'r') lines = fp.readlines() @@ -1895,7 +1895,7 @@ class Client(Module): """ Lookup a server's network information """ self._server_nets = get_ost_net(self.db, srv_uuid) if len(self._server_nets) == 0: - panic ("Unable to find a server for:", srv_uuid) + panic("Unable to find a server for:", srv_uuid) def get_servers(self): return self._server_nets @@ -1911,7 +1911,7 @@ class Client(Module): else: routes = find_route(self.get_servers()) if len(routes) == 0: - panic ("no route to", self.target_uuid) + panic("no route to", self.target_uuid) for (srv, r) in routes: lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3]) except CommandError, e: @@ -2378,7 +2378,7 @@ def newService(db): elif type == 'mgmt': n = Management(db) else: - panic ("unknown service type:", type) + panic("unknown service type:", type) return n # diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index a1035e5..197b95e 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -272,10 +272,10 @@ command_t cmdlist[] = { "usage: dk [file] [raw]"}, {"debug_file", jt_dbg_debug_file, 0, "convert a binary debug file dumped by the kernel to ASCII text\n" - "usage: debug_file [output] [raw]"}, + "usage: debug_file [output]"}, {"df", jt_dbg_debug_file, 0, "read debug buffer from input and dump to output, same as debug_file\n" - "usage: df [output] [raw]"}, + "usage: df [output]"}, {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer\n" "usage: clear"}, {"mark", jt_dbg_mark_debug_buf, 0,"insert marker text in kernel debug buffer\n" diff --git a/lustre/utils/llmount.c b/lustre/utils/llmount.c index 60e81a5..bea7f5c 100644 --- a/lustre/utils/llmount.c +++ b/lustre/utils/llmount.c @@ -35,9 +35,9 @@ #include "obdctl.h" #include -int debug; int verbose; int nomtab; +int fake; int force; static char *progname = NULL; @@ -52,6 +52,29 @@ int route_index; ptl_nid_t lmd_cluster_id = 0; llmount_route_t routes[MAX_ROUTES]; +void usage(FILE *out) +{ + fprintf(out, "usage: %s :// " + "[-fhnv] [-o mntopt]\n", progname); + fprintf(out, "\t: hostname or nid of MDS (config) node\n" + "\t: name of MDS service (e.g. mds1)\n" + "\t: name of client config (e.g. client)\n" + "\t: filesystem mountpoint (e.g. /mnt/lustre)\n" + "\t-f|--fake: fake mount (updates /etc/mtab)\n" + "\t--force: force mount even if already in /etc/mtab\n" + "\t-h|--help: print this usage message\n" + "\t-n|--nomtab: do not update /etc/mtab after mount\n" + "\t-v|--verbose: print verbose config settings\n" + "\t-o: filesystem mount options:\n" + "\t\tnettype={tcp,elan,iibnal,lonal}: network type\n" + "\t\tcluster_id=0xNNNN: cluster this node is part of\n" + "\t\tlocal_nid=0xNNNN: client ID (default ipaddr or nodenum)\n" + "\t\tserver_nid=0xNNNN: server node ID (default mdsnode)\n" + "\t\tport=NNN: server port (default 988 for tcp)\n" + "\t\troute=[-]:[-]: portal route to MDS\n"); + exit(out != stdout); +} + static int check_mtab_entry(char *spec, char *mtpt, char *type) { FILE *fp; @@ -77,12 +100,13 @@ static int check_mtab_entry(char *spec, char *mtpt, char *type) return(0); } -static void +static int update_mtab_entry(char *spec, char *mtpt, char *type, char *opts, int flags, int freq, int pass) { FILE *fp; struct mntent mnt; + int rc = 0; mnt.mnt_fsname = spec; mnt.mnt_dir = mtpt; @@ -96,14 +120,17 @@ update_mtab_entry(char *spec, char *mtpt, char *type, char *opts, if (fp == NULL) { fprintf(stderr, "%s: setmntent(%s): %s:", progname, MOUNTED, strerror (errno)); + rc = 16; } else { - if ((addmntent (fp, &mnt)) == 1) { + if ((addmntent(fp, &mnt)) == 1) { fprintf(stderr, "%s: addmntent: %s:", progname, strerror (errno)); + rc = 16; } endmntent(fp); } } + return rc; } int @@ -154,12 +181,12 @@ static int parse_route(char *opteq, char *opttgts) if (ptl_parse_nid(&gw_lo, gw_lo_ptr) != 0) { fprintf(stderr, "%s: can't parse NID %s\n", progname,gw_lo_ptr); - return(-1); + return(1); } if (ptl_parse_nid(&gw_hi, gw_hi_ptr) != 0) { fprintf(stderr, "%s: can't parse NID %s\n", progname,gw_hi_ptr); - return(-1); + return(1); } tgt_lo_ptr = opttgts + 1; @@ -172,12 +199,12 @@ static int parse_route(char *opteq, char *opttgts) if (ptl_parse_nid(&tgt_lo, tgt_lo_ptr) != 0) { fprintf(stderr, "%s: can't parse NID %s\n",progname,tgt_lo_ptr); - return(-1); + return(1); } if (ptl_parse_nid(&tgt_hi, tgt_hi_ptr) != 0) { fprintf(stderr, "%s: can't parse NID %s\n",progname,tgt_hi_ptr); - return(-1); + return(1); } while (gw_lo <= gw_hi) { @@ -197,7 +224,21 @@ static int parse_route(char *opteq, char *opttgts) return(0); } -int parse_options(char * options, struct lustre_mount_data *lmd) +static int ignored_option(const char *check) +{ + char *ignore[] = { "noatime", "async", "rw", "suid", "dev", + "exec", "nouser", NULL }; + char **which = ignore; + + while (*which != NULL) { + if (strcmp(check, *which) == 0) + return 1; + which++; + } + return 0; +} + +int parse_options(char *options, struct lustre_mount_data *lmd) { ptl_nid_t nid = 0, cluster_id = 0; int val; @@ -214,7 +255,7 @@ int parse_options(char * options, struct lustre_mount_data *lmd) if (ptl_parse_nid(&cluster_id, opteq+1) != 0) { fprintf(stderr, "%s: can't parse NID " "%s\n", progname, opteq+1); - return (-1); + return (1); } lmd_cluster_id = cluster_id; } else if(!strcmp(opt, "route")) { @@ -223,7 +264,7 @@ int parse_options(char * options, struct lustre_mount_data *lmd) "of the form: route=" "[-]:[-]\n", progname); - return(-1); + return(1); } parse_route(opteq, opttgts); } else if (!strcmp(opt, "local_nid")) { @@ -232,7 +273,7 @@ int parse_options(char * options, struct lustre_mount_data *lmd) "can't parse NID %s\n", progname, opteq+1); - return (-1); + return (1); } lmd->lmd_local_nid = nid; } else if (!strcmp(opt, "server_nid")) { @@ -240,7 +281,7 @@ int parse_options(char * options, struct lustre_mount_data *lmd) fprintf(stderr, "%s: " "can't parse NID %s\n", progname, opteq + 1); - return (-1); + return (1); } lmd->lmd_server_nid = nid; } else if (!strcmp(opt, "port")) { @@ -248,16 +289,23 @@ int parse_options(char * options, struct lustre_mount_data *lmd) } else { fprintf(stderr, "%s: unknown option '%s'\n", progname, opt); - return (-1); + usage(stderr); } } else { + if (ignored_option(opt)) + continue; + val = 1; if (!strncmp(opt, "no", 2)) { val = 0; opt += 2; } - if (!strcmp(opt, "debug")) { - debug = val; + if (!strcmp(opt, "debug")) { /* deprecated */ + fake = val; + } else { + fprintf(stderr, "%s: unknown option '%s'\n", + progname, opt); + usage(stderr); } } } @@ -271,7 +319,7 @@ get_local_elan_id(char *fname, char *buf) int rc; if (fp == NULL) - return -1; + return 1; rc = fscanf(fp, "NodeId %255s", buf); @@ -325,14 +373,14 @@ set_local(struct lustre_mount_data *lmd) fprintf(stderr, "%s: can't read Elan ID from /proc\n", progname); - return -1; + return 1; } } } if (ptl_parse_nid (&nid, ptr) != 0) { fprintf (stderr, "%s: can't parse NID %s\n", progname, buf); - return (-1); + return (1); } lmd->lmd_local_nid = nid + lmd_cluster_id; @@ -350,7 +398,7 @@ set_peer(char *hostname, struct lustre_mount_data *lmd) if (ptl_parse_nid (&nid, hostname) != 0) { fprintf (stderr, "%s: can't parse NID %s\n", progname, hostname); - return (-1); + return (1); } lmd->lmd_server_nid = nid; } @@ -358,7 +406,7 @@ set_peer(char *hostname, struct lustre_mount_data *lmd) if (ptl_parse_ipaddr(&lmd->lmd_server_ipaddr, hostname) != 0) { fprintf (stderr, "%s: can't parse host %s\n", progname, hostname); - return (-1); + return (1); } } else if (lmd->lmd_nal == QSWNAL &&lmd->lmd_server_nid == PTL_NID_ANY){ char buf[64]; @@ -366,12 +414,12 @@ set_peer(char *hostname, struct lustre_mount_data *lmd) if (rc != 1) { fprintf (stderr, "%s: can't get elan id from host %s\n", progname, hostname); - return -1; + return 1; } if (ptl_parse_nid (&nid, buf) != 0) { fprintf (stderr, "%s: can't parse NID %s\n", progname, hostname); - return (-1); + return (1); } lmd->lmd_server_nid = nid; } @@ -410,13 +458,13 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd) fprintf(stderr, "%s: directory to mount not in " "host:/mds/profile format\n", progname); - return(-1); + return(1); } } else { fprintf(stderr, "%s: " "directory to mount not in host:/mds/profile format\n", progname); - return(-1); + return(1); } rc = parse_options(options, lmd); @@ -432,13 +480,13 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd) return rc; if (strlen(mds) > sizeof(lmd->lmd_mds) + 1) { fprintf(stderr, "%s: mds name too long\n", progname); - return(-1); + return(1); } strcpy(lmd->lmd_mds, mds); if (strlen(profile) > sizeof(lmd->lmd_profile) + 1) { fprintf(stderr, "%s: profile name too long\n", progname); - return(-1); + return(1); } strcpy(lmd->lmd_profile, profile); @@ -503,7 +551,7 @@ static int set_routes(struct lustre_mount_data *lmd) { LPX64" : "LPX64" - "LPX64"\n[%d] %s\n", progname, routes[i].gw, routes[i].lo, routes[i].hi, errno, strerror(errno)); - err = -1; + err = 2; break; } } @@ -512,20 +560,14 @@ static int set_routes(struct lustre_mount_data *lmd) { return err; } -void usage(FILE *out) -{ - fprintf(out, "usage: %s [-f] [-v] [-n] [-o mntopt]\n", - progname); - exit(out != stdout); -} - int main(int argc, char *const argv[]) { char *source, *target, *options = ""; int i, nargs = 3, opt, rc; struct lustre_mount_data lmd; static struct option long_opt[] = { - {"force", 0, 0, 'f'}, + {"fake", 0, 0, 'f'}, + {"force", 0, 0, 1}, {"help", 0, 0, 'h'}, {"nomtab", 0, 0, 'n'}, {"options", 1, 0, 'o'}, @@ -538,11 +580,16 @@ int main(int argc, char *const argv[]) while ((opt = getopt_long(argc, argv, "fno:v", long_opt, NULL)) != EOF){ switch (opt) { - case 'f': + case 1: ++force; printf("force: %d\n", force); nargs++; break; + case 'f': + ++fake; + printf("fake: %d\n", fake); + nargs++; + break; case 'h': usage(stdout); break; @@ -591,14 +638,10 @@ int main(int argc, char *const argv[]) exit(1); } - rc = set_routes(&lmd); - if (rc) { - exit(1); - } - - if (debug) { - printf("%s: debug mode, not mounting\n", progname); - exit(0); + if (!fake) { + rc = set_routes(&lmd); + if (rc) + exit(2); } rc = access(target, F_OK); @@ -606,19 +649,20 @@ int main(int argc, char *const argv[]) rc = errno; fprintf(stderr, "%s: %s inaccessible: %s\n", progname, target, strerror(errno)); - return rc; + return 1; } - rc = mount(source, target, "lustre", 0, (void *)&lmd); + if (!fake) + rc = mount(source, target, "lustre", 0, (void *)&lmd); if (rc) { - rc = errno; fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", source, target, progname, strerror(errno)); - if (rc == ENODEV) + if (errno == ENODEV) fprintf(stderr, "Are the lustre modules loaded?\n" "Check /etc/modules.conf and /proc/filesystems\n"); + rc = 32; } else { - update_mtab_entry(source, target, "lustre", options, 0, 0, 0); + rc = update_mtab_entry(source, target, "lustre", options,0,0,0); } return rc; } -- 1.8.3.1