From 434e9dcafde053727baece32875c7e21e0a6c537 Mon Sep 17 00:00:00 2001 From: nathan Date: Tue, 18 Oct 2005 16:38:26 +0000 Subject: [PATCH] b=8080 Landing LNET (b1_4_newconfig) --- lustre/ChangeLog | 61 +- lustre/autoMakefile.am | 2 +- lustre/autoconf/lustre-version.ac | 2 +- lustre/include/liblustre.h | 14 +- lustre/include/linux/lustre_cfg.h | 8 +- lustre/include/linux/lustre_debug.h | 4 +- lustre/include/linux/lustre_idl.h | 5 +- lustre/include/linux/lustre_lib.h | 3 +- lustre/include/linux/lustre_net.h | 214 +++--- lustre/include/linux/obd.h | 2 +- lustre/include/linux/obd_class.h | 8 +- lustre/include/linux/obd_support.h | 8 +- lustre/include/lustre/lustre_user.h | 4 + lustre/ldlm/l_lock.c | 4 +- lustre/ldlm/ldlm_lib.c | 19 +- lustre/ldlm/ldlm_lock.c | 13 +- lustre/ldlm/ldlm_lockd.c | 47 +- lustre/ldlm/ldlm_request.c | 9 +- lustre/liblustre/Makefile.am | 24 +- lustre/liblustre/genlib.sh | 42 +- lustre/liblustre/llite_lib.c | 39 +- lustre/liblustre/llite_lib.h | 7 +- lustre/liblustre/lutil.c | 105 +-- lustre/liblustre/lutil.h | 1 - lustre/liblustre/tests/Makefile.am | 2 +- lustre/liblustre/tests/echo_test.c | 33 +- lustre/liblustre/tests/recovery_small.c | 4 +- lustre/liblustre/tests/replay_ost_single.c | 4 +- lustre/liblustre/tests/replay_single.c | 4 +- lustre/liblustre/tests/sanity.c | 2 +- lustre/llite/dcache.c | 2 +- lustre/llite/llite_close.c | 2 +- lustre/llite/llite_lib.c | 60 +- lustre/mdc/mdc_request.c | 5 +- lustre/mds/handler.c | 37 +- lustre/mds/mds_internal.h | 4 +- lustre/mds/mds_lib.c | 4 +- lustre/mds/mds_lov.c | 63 +- lustre/mds/mds_reint.c | 9 +- lustre/obdclass/class_obd.c | 21 +- lustre/obdclass/genops.c | 60 +- lustre/obdclass/llog_ioctl.c | 2 +- lustre/obdclass/llog_swab.c | 70 +- lustre/obdclass/lustre_peer.c | 48 +- lustre/obdclass/obd_config.c | 88 +-- lustre/obdecho/echo.c | 2 +- lustre/obdfilter/filter.c | 3 +- lustre/osc/osc_request.c | 25 +- lustre/ost/ost_handler.c | 32 +- lustre/ptlrpc/client.c | 58 +- lustre/ptlrpc/connection.c | 111 ++-- lustre/ptlrpc/events.c | 434 +++++-------- lustre/ptlrpc/import.c | 18 +- lustre/ptlrpc/llog_client.c | 8 +- lustre/ptlrpc/lproc_ptlrpc.c | 6 +- lustre/ptlrpc/niobuf.c | 221 +++---- lustre/ptlrpc/pack_generic.c | 11 +- lustre/ptlrpc/pers.c | 58 +- lustre/ptlrpc/ptlrpc_internal.h | 2 +- lustre/ptlrpc/ptlrpcd.c | 2 +- lustre/ptlrpc/recov_thread.c | 2 +- lustre/ptlrpc/service.c | 251 +++----- lustre/scripts/bdev-io-survey.sh | 11 +- lustre/scripts/lustre | 2 +- lustre/tests/.cvsignore | 1 + lustre/tests/2ost.sh | 14 +- lustre/tests/Makefile.am | 2 +- lustre/tests/acceptance-metadata-double.sh | 4 +- lustre/tests/acceptance-metadata-parallel.sh | 2 +- lustre/tests/acceptance-metadata-single.sh | 4 +- lustre/tests/acceptance-small.sh | 6 +- lustre/tests/cobd.sh | 16 +- lustre/tests/conf-sanity.sh | 49 +- lustre/tests/echo.sh | 14 +- lustre/tests/insanity.sh | 39 +- lustre/tests/liblustre_sanity_uml.sh | 7 +- lustre/tests/ll_dirstripe_verify.c | 2 +- lustre/tests/llmountcleanup.sh | 2 +- lustre/tests/llrmount.sh | 2 +- lustre/tests/local.sh | 41 +- lustre/tests/lov.sh | 13 +- lustre/tests/mkdirdeep.c | 4 +- lustre/tests/oos.sh | 4 +- lustre/tests/recovery-small.sh | 2 +- lustre/tests/replay-dual.sh | 2 +- lustre/tests/replay-ost-single.sh | 2 +- lustre/tests/replay-single.sh | 8 +- lustre/tests/rundbench | 2 +- lustre/tests/runfailure-net | 2 +- lustre/tests/runiozone | 2 +- lustre/tests/runregression-mds.sh | 2 +- lustre/tests/runregression-net.sh | 2 +- lustre/tests/sanity.sh | 18 +- lustre/tests/test-framework.sh | 32 +- lustre/tests/test-lwizard.sh | 5 +- lustre/tests/uml.sh | 6 +- lustre/tests/writemany.c | 8 +- lustre/utils/.cvsignore | 1 + lustre/utils/Lustre/lustredb.py | 3 + lustre/utils/Makefile.am | 15 +- lustre/utils/lconf | 932 +++++---------------------- lustre/utils/lctl.c | 17 +- lustre/utils/lfs.c | 12 +- lustre/utils/liblustreapi.c | 2 +- lustre/utils/llmount.c | 576 +---------------- lustre/utils/llog_reader.c | 331 ++++++++++ lustre/utils/lmc | 42 +- lustre/utils/lustre_cfg.c | 35 +- lustre/utils/lwizard | 8 +- lustre/utils/module_setup.sh | 38 +- lustre/utils/obd.c | 109 +++- lustre/utils/obdctl.h | 2 +- lustre/utils/rmmod_all.sh | 5 +- 113 files changed, 1829 insertions(+), 3044 deletions(-) create mode 100644 lustre/utils/llog_reader.c diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 728f5ea..1fc0371 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,7 +1,29 @@ tbd Cluster File Systems, Inc. * version 1.4.6 + * WIRE PROTOCOL CHANGE. This version of Lustre networking WILL NOT + INTEROPERATE with older versions automatically. Please read the + user documentation before upgrading any part of a live system. * bug fixes +Severity : enhancement +Bugzilla : 7981/8208 +Description: Introduced Lustre Networking (LNET) +Details : LNET is new networking infrastructure for Lustre, it includes + a reorganized network configuration mode (see the user + documentation for full details) as well as support for routing + between different network fabrics. Lustre Networking Devices + (LNDs) for the supported network fabrics have also been + created for this new infrastructure. + +Severity : enhancement +Bugzilla : 7982 +Description: Configuration change for the XT3 + The PTLLND is now used to run Lustre over Portals on the XT3 + The configure option(s) --with-cray-portals are no longer used. + Rather --with-portals= is used to + enable building on the XT3. In addition to enable XT3 specific + features the option --enable-cray-xt3 must be used. + Severity : major Frequency : rare Bugzilla : 7407 @@ -276,64 +298,71 @@ Severity : minor Bugzilla : 7278 Description: O_CREAT|O_EXCL open flags in liblustre always return -EEXIST Details : Make libsysio to not enforce O_EXCL by clearing the flag, - for liblustre O_EXCL is enforced by MDS. + for liblustre O_EXCL is enforced by MDS. Severity : minor Bugzilla : 6455 Description: readdir never returns NULL in liblustre. Details : Corrected llu_iop_getdirentries logic, to return offset of next - dentry in struct dirent. + dentry in struct dirent. Severity : minor Bugzilla : 7137 Frequency : liblustre only, depends on application IO pattern Description: liblustre clients evicted if not contacting servers Details : Don't put liblustre clients into the ping_evictor list, so - they will not be evicted by the pinger ever. + they will not be evicted by the pinger ever. Severity : enhancement Bugzilla : 6902 Description: Add ability to evict clients by NID from MDS. Details : By echoing "nid:$NID" string into - /proc/fs/lustre/mds/.../evict_client client with nid that equals to - $NID would be instantly evicted from this MDS and from all active - OSTs connected to it. + /proc/fs/lustre/mds/.../evict_client client with nid that equals to + $NID would be instantly evicted from this MDS and from all active + OSTs connected to it. Severity : minor Bugzilla : 7198 Description: Do not query file size twice, somewhat slowing stat(2) calls. Details : lookup_it_finish() used to query file size from OSTs that was not - needed. + needed. Severity : minor Bugzilla : 6237 Description: service threads change working directory to that of init Details : Starting lustre service threads may pin the working directory - of the parent thread, making that filesystem busy. Threads - now change to the working directory of init to avoid this. + of the parent thread, making that filesystem busy. Threads + now change to the working directory of init to avoid this. Severity : minor Bugzilla : 6827 Frequency : during shutdown only Description: shutdown with a failed MDS or OST can cause unmount to hang Details : Don't resend DISCONNECT messages in ptlrpc_disconnect_import() - if server is down. + if server is down. Severity : minor Bugzilla : 7331 Frequency : 2.6 only Description: chmod/chown may include an extra supplementary group Details : ll{,u}_mdc_pack_op_data() does not properly initialize the - supplementary group and if none is specified this is used. + supplementary group and if none is specified this is used. Severity : minor Bugzilla : 5479 (6816) Frequency : rare Description: Racing open + rm can assert client in mdc_set_open_replay_data() Details : If lookup is in progress on a file that is unlinked we might try - to revalidate the inode and fail in revalidate after lookup is - complete and ll_file_open() enqueues the open again but - it_open_error() was not checking DISP_OPEN_OPEN errors correctly. + to revalidate the inode and fail in revalidate after lookup is + complete and ll_file_open() enqueues the open again but + it_open_error() was not checking DISP_OPEN_OPEN errors correctly. + +Severity : minor +Frequency : always, if lconf --abort_recovery used +Bugzilla : 7047 +Description: lconf --abort_recovery fails with 'Operation not supported' +Details : lconf was attempting to abort recovery on the MDT device and not + the MDS device ------------------------------------------------------------------------------ @@ -343,10 +372,10 @@ Details : If lookup is in progress on a file that is unlinked we might try Severity : major Frequency : rare (only unsupported configurations with a node running as an - OST and a client) + OST and a client) Bugzilla : 6514, 5137 Description: Mounting a Lustre file system on a node running as an OST could - lead to deadlocks + lead to deadlocks Details : OSTs now allocate memory needed to write out data at startup, instead of when needed, to avoid having to allocate memory in possibly low memory situations. diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am index 6601c1c..92426b5 100644 --- a/lustre/autoMakefile.am +++ b/lustre/autoMakefile.am @@ -63,7 +63,7 @@ CSTKO=/tmp/checkstack.orig checkstack: [ -f ${CSTK} -a ! -s ${CSTKO} ] && mv ${CSTK} ${CSTKO} || true - for i in ${SUBDIRS} portals/knals/*; do \ + for i in ${SUBDIRS} lnet/klnds/*; do \ MOD=$$i/`basename $$i`.o; \ [ -f $$MOD ] && objdump -d $$MOD | perl tests/checkstack.pl; \ done | sort -nr > ${CSTK} diff --git a/lustre/autoconf/lustre-version.ac b/lustre/autoconf/lustre-version.ac index e478b80..d38c6f3 100644 --- a/lustre/autoconf/lustre-version.ac +++ b/lustre/autoconf/lustre-version.ac @@ -1 +1 @@ -m4_define([LUSTRE_VERSION],[1.4.5.8]) +m4_define([LUSTRE_VERSION],[1.4.5.91]) diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index b02f5e4..f88243a 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -62,7 +62,7 @@ #include #include -#include +#include #include /* definitions for liblustre */ @@ -102,8 +102,6 @@ static inline void inter_module_put(void *a) return; } -extern ptl_handle_ni_t tcpnal_ni; - void *inter_module_get(char *arg); /* cheats for now */ @@ -728,17 +726,9 @@ int cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *); /* log related */ static inline int llog_init_commit_master(void) { return 0; } static inline int llog_cleanup_commit_master(int force) { return 0; } -static inline void portals_run_lbug_upcall(char *file, const char *fn, +static inline void libcfs_run_lbug_upcall(char *file, const char *fn, const int l){} -#define LBUG() \ - do { \ - printf("!!!LBUG at %s:%d\n", __FILE__, __LINE__); \ - sleep(1000000); \ - } while (0) - - - /* completion */ struct completion { unsigned int done; diff --git a/lustre/include/linux/lustre_cfg.h b/lustre/include/linux/lustre_cfg.h index a5232af..6c2f8a6 100644 --- a/lustre/include/linux/lustre_cfg.h +++ b/lustre/include/linux/lustre_cfg.h @@ -63,7 +63,7 @@ struct lustre_cfg { uint32_t lcfg_num; uint32_t lcfg_flags; uint64_t lcfg_nid; - uint32_t lcfg_nal; + uint32_t lcfg_nal; /* not used any more */ uint32_t lcfg_bufcount; uint32_t lcfg_buflens[0]; @@ -236,11 +236,7 @@ static inline int lustre_cfg_sanity_check(void *buf, int len) struct lustre_mount_data { uint32_t lmd_magic; uint32_t lmd_flags; - uint64_t lmd_local_nid; - uint64_t lmd_server_nid; - uint32_t lmd_nal; - uint32_t lmd_server_ipaddr; - uint32_t lmd_port; + uint64_t lmd_nid; char lmd_mds[64]; char lmd_profile[64]; }; diff --git a/lustre/include/linux/lustre_debug.h b/lustre/include/linux/lustre_debug.h index e550272..a554739 100644 --- a/lustre/include/linux/lustre_debug.h +++ b/lustre/include/linux/lustre_debug.h @@ -30,7 +30,7 @@ do { if (index > ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT)) { \ CERROR("bad page index %lu > %Lu\n", index, \ ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT)); \ - portal_debug = ~0UL; \ + libcfs_debug = ~0UL; \ OP; \ }} while(0) @@ -38,7 +38,7 @@ do { if (index > ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT)) { \ do { if (offset > ASSERT_MAX_SIZE_MB << 20) { \ CERROR("bad file offset %Lu > %Lu\n", offset, \ ASSERT_MAX_SIZE_MB << 20); \ - portal_debug = ~0UL; \ + libcfs_debug = ~0UL; \ OP; \ }} while(0) diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 87cc637..beb7023 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -983,7 +983,7 @@ typedef enum { MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK, MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR, OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000, - PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, + PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, /* obsolete */ LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000, LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539, LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b, @@ -1150,9 +1150,6 @@ extern void lustre_swab_llogd_conn_body (struct llogd_conn_body *d); extern void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail); -struct portals_cfg; -extern void lustre_swab_portals_cfg(struct portals_cfg *pcfg); - struct lustre_cfg; extern void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg); diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 57017a5..547c182 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -464,7 +464,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define ECHO_IOC_CANCEL _IOWR('f', 203, long) /* XXX _IOWR('f', 250, long) has been defined in - * portals/include/libcfs/kp30.h for debug, don't use it + * lnet/include/libcfs/kp30.h for debug, don't use it */ /* Until such time as we get_info the per-stripe maximum from the OST, @@ -725,6 +725,5 @@ do { \ #define LIBLUSTRE_CLIENT (1) #endif - #endif /* _LUSTRE_LIB_H */ diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index eff0a4f..e537a20 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -34,60 +34,35 @@ #include // #include -#include +#include #include #include #include #include /* MD flags we _always_ use */ -#define PTLRPC_MD_OPTIONS (PTL_MD_EVENT_START_DISABLE | \ - PTL_MD_LUSTRE_COMPLETION_SEMANTICS) +#define PTLRPC_MD_OPTIONS 0 -/* Define some large-ish maxima for bulk I/O - * CAVEAT EMPTOR, with multinet (i.e. gateways forwarding between networks) +/* Define maxima for bulk I/O + * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) * these limits are system wide and not interface-local. */ -#define PTLRPC_MAX_BRW_SIZE (1 << 20) -#define PTLRPC_MAX_BRW_PAGES 512 - -/* ...reduce to fit... */ - -#if CRAY_PORTALS -/* include a cray header here if relevant - * NB liblustre SIZE/PAGES is affected too, but it merges contiguous - * chunks, so FTTB, it always used contiguous MDs */ -#else -# include -#endif - -#if (defined(PTL_MTU) && (PTL_MTU < PTLRPC_MAX_BRW_SIZE)) -# undef PTLRPC_MAX_BRW_SIZE -# define PTLRPC_MAX_BRW_SIZE PTL_MTU -#endif -#if (defined(PTL_MD_MAX_IOV) && (PTL_MD_MAX_IOV < PTLRPC_MAX_BRW_PAGES )) -# undef PTLRPC_MAX_BRW_PAGES -# define PTLRPC_MAX_BRW_PAGES PTL_MD_MAX_IOV -#endif - -/* ...and make consistent... */ +#define PTLRPC_MAX_BRW_SIZE LNET_MTU +#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE/PAGE_SIZE) +/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */ #ifdef __KERNEL__ -#if (PTLRPC_MAX_BRW_SIZE > PTLRPC_MAX_BRW_PAGES * PAGE_SIZE) -# undef PTLRPC_MAX_BRW_SIZE -# define PTLRPC_MAX_BRW_SIZE (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE) -#else -# undef PTLRPC_MAX_BRW_PAGES -# define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE / PAGE_SIZE) -#endif - -#if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0) -#error "PTLRPC_MAX_BRW_PAGES isn't a power of two" -#endif -#else /* !__KERNEL__ */ -/* PAGE_SIZE isn't a constant, can't use CPP on it. We assume that the - * limit is on the number of pages for large pages, which is currently true. */ -# undef PTLRPC_MAX_BRW_PAGES -# define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE / PAGE_SIZE) +# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0) +# error "PTLRPC_MAX_BRW_PAGES isn't a power of two" +# endif +# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE)) +# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE" +# endif +# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU) +# error "PTLRPC_MAX_BRW_SIZE too big" +# endif +# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV) +# error "PTLRPC_MAX_BRW_PAGES too big" +# endif #endif /* __KERNEL__ */ /* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request @@ -155,14 +130,10 @@ #define PTLBD_BUFSIZE (32 * 1024) #define PTLBD_MAXREQSIZE 1024 -struct ptlrpc_peer { - ptl_process_id_t peer_id; - struct ptlrpc_ni *peer_ni; -}; - struct ptlrpc_connection { struct list_head c_link; - struct ptlrpc_peer c_peer; + lnet_nid_t c_self; + lnet_process_id_t c_peer; struct obd_uuid c_remote_uuid; atomic_t c_refcount; }; @@ -244,7 +215,7 @@ struct ptlrpc_bulk_desc; * ptlrpc callback & work item stuff */ struct ptlrpc_cb_id { - void (*cbid_fn)(ptl_event_t *ev); /* specific callback fn */ + void (*cbid_fn)(lnet_event_t *ev); /* specific callback fn */ void *cbid_arg; /* additional arg */ }; @@ -252,35 +223,35 @@ struct ptlrpc_cb_id { #define RS_DEBUG 1 struct ptlrpc_reply_state { - struct ptlrpc_cb_id rs_cb_id; - struct list_head rs_list; - struct list_head rs_exp_list; - struct list_head rs_obd_list; + struct ptlrpc_cb_id rs_cb_id; + struct list_head rs_list; + struct list_head rs_exp_list; + struct list_head rs_obd_list; #if RS_DEBUG - struct list_head rs_debug_list; + struct list_head rs_debug_list; #endif /* updates to following flag serialised by srv_request_lock */ - unsigned int rs_difficult:1; /* ACK/commit stuff */ - unsigned int rs_scheduled:1; /* being handled? */ - unsigned int rs_scheduled_ever:1; /* any schedule attempts? */ - unsigned int rs_handled:1; /* been handled yet? */ - unsigned int rs_on_net:1; /* reply_out_callback pending? */ - unsigned int rs_prealloc:1; /* rs from prealloc list */ - - int rs_size; - __u64 rs_transno; - __u64 rs_xid; - struct obd_export *rs_export; - struct ptlrpc_srv_ni *rs_srv_ni; - ptl_handle_md_t rs_md_h; - atomic_t rs_refcount; + unsigned int rs_difficult:1; /* ACK/commit stuff */ + unsigned int rs_scheduled:1; /* being handled? */ + unsigned int rs_scheduled_ever:1;/* any schedule attempts? */ + unsigned int rs_handled:1; /* been handled yet? */ + unsigned int rs_on_net:1; /* reply_out_callback pending? */ + unsigned int rs_prealloc:1; /* rs from prealloc list */ + + int rs_size; + __u64 rs_transno; + __u64 rs_xid; + struct obd_export *rs_export; + struct ptlrpc_service *rs_service; + lnet_handle_md_t rs_md_h; + atomic_t rs_refcount; /* locks awaiting client reply ACK */ - int rs_nlocks; - struct lustre_handle rs_locks[RS_MAX_LOCKS]; - ldlm_mode_t rs_modes[RS_MAX_LOCKS]; + int rs_nlocks; + struct lustre_handle rs_locks[RS_MAX_LOCKS]; + ldlm_mode_t rs_modes[RS_MAX_LOCKS]; /* last member: variable sized reply message */ - struct lustre_msg rs_msg; + struct lustre_msg rs_msg; }; struct ptlrpc_thread; @@ -299,7 +270,7 @@ struct ptlrpc_request_pool { int prp_rq_size; void (*prp_populate)(struct ptlrpc_request_pool *, int); }; - + struct ptlrpc_request { int rq_type; /* one of PTL_RPC_MSG_* */ struct list_head rq_list; @@ -341,26 +312,27 @@ struct ptlrpc_request { enum lustre_imp_state rq_send_state; /* client+server request */ - ptl_handle_md_t rq_req_md_h; + lnet_handle_md_t rq_req_md_h; struct ptlrpc_cb_id rq_req_cbid; /* server-side... */ struct timeval rq_arrival_time; /* request arrival time */ struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */ struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/ -#if CRAY_PORTALS - ptl_uid_t rq_uid; /* peer uid, used in MDS only */ +#if CRAY_XT3 +# error "Need to get the uid from the event?" + __u32 rq_uid; /* peer uid, used in MDS only */ #endif - + /* client-only incoming reply */ - ptl_handle_md_t rq_reply_md_h; + lnet_handle_md_t rq_reply_md_h; wait_queue_head_t rq_reply_waitq; struct ptlrpc_cb_id rq_reply_cbid; - struct ptlrpc_peer rq_peer; /* XXX see service.c can this be removed? */ - char rq_peerstr[PTL_NALFMT_SIZE]; - struct obd_export *rq_export; - struct obd_import *rq_import; + lnet_nid_t rq_self; + lnet_process_id_t rq_peer; + struct obd_export *rq_export; + struct obd_import *rq_import; void (*rq_replay_cb)(struct ptlrpc_request *); void (*rq_commit_cb)(struct ptlrpc_request *); @@ -471,12 +443,12 @@ struct ptlrpc_bulk_desc { __u64 bd_last_xid; struct ptlrpc_cb_id bd_cbid; /* network callback info */ - ptl_handle_md_t bd_md_h; /* associated MD */ + lnet_handle_md_t bd_md_h; /* associated MD */ -#if (!CRAY_PORTALS && defined(__KERNEL__)) - ptl_kiov_t bd_iov[0]; +#if defined(__KERNEL__) + lnet_kiov_t bd_iov[0]; #else - ptl_md_iovec_t bd_iov[0]; + lnet_md_iovec_t bd_iov[0]; #endif }; @@ -494,34 +466,14 @@ struct ptlrpc_thread { struct ptlrpc_request_buffer_desc { struct list_head rqbd_list; struct list_head rqbd_reqs; - struct ptlrpc_srv_ni *rqbd_srv_ni; - ptl_handle_md_t rqbd_md_h; + struct ptlrpc_service *rqbd_service; + lnet_handle_md_t rqbd_md_h; int rqbd_refcount; char *rqbd_buffer; struct ptlrpc_cb_id rqbd_cbid; struct ptlrpc_request rqbd_req; }; -/* event queues are per-ni, because one day we may get a hardware - * supported NAL that delivers events asynchonously wrt kernel portals - * into the eq. - */ -struct ptlrpc_ni { /* Generic interface state */ - char *pni_name; - int pni_number; - ptl_handle_ni_t pni_ni_h; - ptl_handle_eq_t pni_eq_h; -}; - -struct ptlrpc_srv_ni { - /* Interface-specific service state */ - struct ptlrpc_service *sni_service; /* owning service */ - struct ptlrpc_ni *sni_ni; /* network interface */ - struct list_head sni_active_rqbds; /* req buffers receiving */ - struct list_head sni_active_replies; /* all the active replies */ - int sni_nrqbd_receiving; /* # posted request buffers */ -}; - typedef int (*svc_handler_t)(struct ptlrpc_request *req); typedef void (*svcreq_printfn_t)(void *, struct ptlrpc_request *); @@ -552,11 +504,14 @@ struct ptlrpc_service { svcreq_printfn_t srv_request_history_print_fn; /* service-specific print fn */ struct list_head srv_idle_rqbds; /* request buffers to be reposted */ + struct list_head srv_active_rqbds; /* req buffers receiving */ struct list_head srv_history_rqbds; /* request buffer history */ + int srv_nrqbd_receiving; /* # posted request buffers */ int srv_n_history_rqbds; /* # request buffers in history */ int srv_max_history_rqbds; /* max # request buffers in history */ atomic_t srv_outstanding_replies; + struct list_head srv_active_replies; /* all the active replies */ struct list_head srv_reply_queue; /* replies waiting for service */ wait_queue_head_t srv_waitq; /* all threads sleep on this */ @@ -588,43 +543,30 @@ struct ptlrpc_service { */ void (*srv_done)(struct ptlrpc_thread *thread); - struct ptlrpc_srv_ni srv_interfaces[0]; + //struct ptlrpc_srv_ni srv_interfaces[0]; }; -static inline char *ptlrpc_peernid2str(struct ptlrpc_peer *p, char *str) -{ - LASSERT(p->peer_ni != NULL); - return (portals_nid2str(p->peer_ni->pni_number, p->peer_id.nid, str)); -} - -static inline char *ptlrpc_id2str(struct ptlrpc_peer *p, char *str) -{ - LASSERT(p->peer_ni != NULL); - return (portals_id2str(p->peer_ni->pni_number, p->peer_id, str)); -} - /* ptlrpc/events.c */ -extern struct ptlrpc_ni ptlrpc_interfaces[]; -extern int ptlrpc_ninterfaces; -extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, struct ptlrpc_peer *peer); -extern void request_out_callback (ptl_event_t *ev); -extern void reply_in_callback(ptl_event_t *ev); -extern void client_bulk_callback (ptl_event_t *ev); -extern void request_in_callback(ptl_event_t *ev); -extern void reply_out_callback(ptl_event_t *ev); -extern void server_bulk_callback (ptl_event_t *ev); -extern int ptlrpc_default_nal(void); +extern lnet_handle_eq_t ptlrpc_eq_h; +extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, + lnet_process_id_t *peer, lnet_nid_t *self); +extern void request_out_callback (lnet_event_t *ev); +extern void reply_in_callback(lnet_event_t *ev); +extern void client_bulk_callback (lnet_event_t *ev); +extern void request_in_callback(lnet_event_t *ev); +extern void reply_out_callback(lnet_event_t *ev); +extern void server_bulk_callback (lnet_event_t *ev); /* ptlrpc/connection.c */ void ptlrpc_dump_connections(void); void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *); -struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, - struct obd_uuid *uuid); +struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer, + lnet_nid_t self, struct obd_uuid *uuid); int ptlrpc_put_connection(struct ptlrpc_connection *c); struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); void ptlrpc_init_connection(void); void ptlrpc_cleanup_connection(void); -extern ptl_pid_t ptl_get_pid(void); +extern lnet_pid_t ptl_get_pid(void); /* ptlrpc/niobuf.c */ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc); diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index f0697c3..a681ad9 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -533,7 +533,7 @@ struct obd_device { unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1, obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1, obd_no_recov:1, obd_stopping:1, obd_starting:1, - obd_force:1, obd_fail:1; + obd_force:1, obd_fail:1, obd_async_recov:1; atomic_t obd_refcount; wait_queue_head_t obd_refcount_waitq; struct proc_dir_entry *obd_proc_entry; diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 3fe5746..b89063c 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -89,8 +89,7 @@ void ping_evictor_stop(void); #endif -/* buf should be len PTL_NALFMT_SIZE */ -char *obd_export_nid2str(struct obd_export *exp, char *buf); +char *obd_export_nid2str(struct obd_export *exp); int obd_export_evict_by_nid(struct obd_device *obd, char *nid); int obd_export_evict_by_uuid(struct obd_device *obd, char *uuid); @@ -107,7 +106,6 @@ void class_decref(struct obd_device *obd); struct config_llog_instance { char * cfg_instance; struct obd_uuid cfg_uuid; - ptl_nid_t cfg_local_nid; }; int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, struct config_llog_instance *cfg); @@ -1136,8 +1134,8 @@ typedef __u8 class_uuid_t[16]; void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); /* lustre_peer.c */ -int lustre_uuid_to_peer(char *uuid, __u32 *peer_nal, ptl_nid_t *peer_nid); -int class_add_uuid(char *uuid, __u64 nid, __u32 nal); +int lustre_uuid_to_peer(char *uuid, lnet_nid_t *peer_nid, int index); +int class_add_uuid(char *uuid, __u64 nid); int class_del_uuid (char *uuid); void class_init_uuidlist(void); void class_exit_uuidlist(void); diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 0bb7e55..bcbdabc 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -291,7 +291,7 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) # endif #endif /* __KERNEL__ */ -extern atomic_t portal_kmemory; +extern atomic_t libcfs_kmemory; #if defined(LUSTRE_UTILS) /* this version is for utils only */ #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ @@ -314,7 +314,7 @@ do { \ CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ + atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ } else { \ memset(ptr, 0, size); \ atomic_add(size, &obd_memory); \ @@ -345,7 +345,7 @@ do { \ CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ + atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ } else { \ memset(ptr, 0, size); \ atomic_add(size, &obd_memory); \ @@ -411,7 +411,7 @@ do { \ CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ + atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ } else { \ memset(ptr, 0, size); \ atomic_add(size, &obd_memory); \ diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 30e55e6..6446634 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -255,4 +255,8 @@ struct if_quotactl { #endif #endif /* !LPU64 */ +#ifndef offsetof +# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) +#endif + #endif /* _LUSTRE_USER_H */ diff --git a/lustre/ldlm/l_lock.c b/lustre/ldlm/l_lock.c index 03ffc36..09fda38 100644 --- a/lustre/ldlm/l_lock.c +++ b/lustre/ldlm/l_lock.c @@ -127,7 +127,7 @@ void l_check_ns_lock(struct ldlm_namespace *ns) if (!l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) { CERROR("namespace %s lock not held when it should be; tell " "phil\n", ns->ns_name); - portals_debug_dumpstack(NULL); + libcfs_debug_dumpstack(NULL); next_msg = jiffies + 60 * HZ; } } @@ -139,7 +139,7 @@ void l_check_no_ns_lock(struct ldlm_namespace *ns) if (l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) { CERROR("namespace %s lock held illegally; tell phil\n", ns->ns_name); - portals_debug_dumpstack(NULL); + libcfs_debug_dumpstack(NULL); next_msg = jiffies + 60 * HZ; } } diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 873dbff..5475efb 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -550,15 +550,15 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) obd_str2uuid (&cluuid, str); /* XXX extract a nettype and format accordingly */ - switch (sizeof(ptl_nid_t)) { + switch (sizeof(lnet_nid_t)) { /* NB the casts only avoid compiler warnings */ case 8: snprintf(remote_uuid.uuid, sizeof remote_uuid, - "NET_"LPX64"_UUID", (__u64)req->rq_peer.peer_id.nid); + "NET_"LPX64"_UUID", (__u64)req->rq_peer.nid); break; case 4: snprintf(remote_uuid.uuid, sizeof remote_uuid, - "NET_%x_UUID", (__u32)req->rq_peer.peer_id.nid); + "NET_%x_UUID", (__u32)req->rq_peer.nid); break; default: LBUG(); @@ -682,9 +682,9 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) if (export->exp_connection != NULL) ptlrpc_put_connection(export->exp_connection); - export->exp_connection = ptlrpc_get_connection(&req->rq_peer, + export->exp_connection = ptlrpc_get_connection(req->rq_peer, + req->rq_self, &remote_uuid); - if (rc == EALREADY) { /* We indicate the reconnection in a flag, not an error code. */ lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT); @@ -1242,12 +1242,9 @@ target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) struct ptlrpc_reply_state *rs; struct obd_device *obd; struct obd_export *exp; - struct ptlrpc_srv_ni *sni; struct ptlrpc_service *svc; - sni = req->rq_rqbd->rqbd_srv_ni; - svc = sni->sni_service; - + svc = req->rq_rqbd->rqbd_service; rs = req->rq_reply_state; if (rs == NULL || !rs->rs_difficult) { /* no notifiers */ @@ -1258,7 +1255,7 @@ target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) /* must be an export if locks saved */ LASSERT (req->rq_export != NULL); /* req/reply consistent */ - LASSERT (rs->rs_srv_ni == sni); + LASSERT (rs->rs_service == svc); /* "fresh" reply */ LASSERT (!rs->rs_scheduled); @@ -1317,7 +1314,7 @@ target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) list_add_tail (&rs->rs_list, &svc->srv_reply_queue); wake_up (&svc->srv_waitq); } else { - list_add (&rs->rs_list, &sni->sni_active_replies); + list_add (&rs->rs_list, &svc->srv_active_replies); rs->rs_scheduled = 0; /* allow notifier to schedule */ } diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 8701bc0..a15a5ea 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1172,10 +1172,9 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos) { - char str[PTL_NALFMT_SIZE]; struct obd_device *obd = NULL; - if (!((portal_debug | D_ERROR) & level)) + if (!((libcfs_debug | D_ERROR) & level)) return; if (!lock) { @@ -1189,17 +1188,15 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos) if (lock->l_conn_export != NULL) obd = lock->l_conn_export->exp_obd; if (lock->l_export && lock->l_export->exp_connection) { - CDEBUG(level, " Node: NID %s on %s (rhandle: "LPX64")\n", - ptlrpc_peernid2str(&lock->l_export->exp_connection->c_peer, str), - lock->l_export->exp_connection->c_peer.peer_ni->pni_name, + CDEBUG(level, " Node: NID %s (rhandle: "LPX64")\n", + libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid), lock->l_remote_handle.cookie); } else if (obd == NULL) { CDEBUG(level, " Node: local\n"); } else { struct obd_import *imp = obd->u.cli.cl_import; - CDEBUG(level, " Node: NID %s on %s (rhandle: "LPX64")\n", - ptlrpc_peernid2str(&imp->imp_connection->c_peer, str), - imp->imp_connection->c_peer.peer_ni->pni_name, + CDEBUG(level, " Node: NID %s (rhandle: "LPX64")\n", + libcfs_nid2str(imp->imp_connection->c_peer.nid), lock->l_remote_handle.cookie); } CDEBUG(level, " Resource: %p ("LPU64"/"LPU64")\n", lock->l_resource, diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 7f1221f..c00fa37 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -114,7 +114,7 @@ static int expired_lock_main(void *arg) ENTRY; lock_kernel(); - kportal_daemonize("ldlm_elt"); + libcfs_daemonize("ldlm_elt"); SIGNAL_MASK_LOCK(current, flags); sigfillset(¤t->blocked); @@ -137,8 +137,8 @@ static int expired_lock_main(void *arg) spin_unlock_bh(&waiting_locks_spinlock); /* from waiting_locks_callback, but not in timer */ - portals_debug_dumplog(); - portals_run_lbug_upcall(__FILE__, + libcfs_debug_dumplog(); + libcfs_run_lbug_upcall(__FILE__, "waiting_locks_callback", expired_lock_thread.elt_dump); @@ -189,10 +189,9 @@ static int expired_lock_main(void *arg) static void waiting_locks_callback(unsigned long unused) { struct ldlm_lock *lock, *last = NULL; - char str[PTL_NALFMT_SIZE]; if (obd_dump_on_timeout) - portals_debug_dumplog(); + libcfs_debug_dumplog(); spin_lock_bh(&waiting_locks_spinlock); while (!list_empty(&waiting_locks_list)) { @@ -206,7 +205,7 @@ static void waiting_locks_callback(unsigned long unused) LDLM_ERROR(lock, "lock callback timer expired: evicting client " "%s@%s nid %s ",lock->l_export->exp_client_uuid.uuid, lock->l_export->exp_connection->c_remote_uuid.uuid, - ptlrpc_peernid2str(&lock->l_export->exp_connection->c_peer,str)); + libcfs_nid2str(lock->l_export->exp_connection->c_peer.nid)); if (lock == last) { LDLM_ERROR(lock, "waiting on lock multiple times"); @@ -223,9 +222,9 @@ static void waiting_locks_callback(unsigned long unused) /* LBUG(); */ CEMERG("would be an LBUG, but isn't (bug 5653)\n"); - portals_debug_dumpstack(NULL); - /*blocks* portals_debug_dumplog(); */ - /*blocks* portals_run_lbug_upcall(file, func, line); */ + libcfs_debug_dumpstack(NULL); + /*blocks* libcfs_debug_dumplog(); */ + /*blocks* libcfs_run_lbug_upcall(file, func, line); */ break; } last = lock; @@ -272,7 +271,7 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)"); if (time_after(jiffies, next)) { next = jiffies + 14400 * HZ; - portals_debug_dumpstack(NULL); + libcfs_debug_dumpstack(NULL); } return 0; } @@ -363,19 +362,18 @@ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, const char *ast_type) { struct ptlrpc_connection *conn = lock->l_export->exp_connection; - char str[PTL_NALFMT_SIZE]; - - ptlrpc_peernid2str(&conn->c_peer, str); + char *str = libcfs_nid2str(conn->c_peer.nid); LCONSOLE_ERROR("A client on nid %s was evicted from service %s.\n", str, lock->l_export->exp_obd->obd_name); - LDLM_ERROR(lock, "%s AST failed (%d): evicting client %s@%s NID "LPX64 + LDLM_ERROR(lock, "%s AST failed (%d): evicting client %s@%s NID %s" " (%s)", ast_type, rc, lock->l_export->exp_client_uuid.uuid, - conn->c_remote_uuid.uuid, conn->c_peer.peer_id.nid, str); + conn->c_remote_uuid.uuid, libcfs_nid2str(conn->c_peer.nid), + str); if (obd_dump_on_timeout) - portals_debug_dumplog(); + libcfs_debug_dumplog(); class_fail_export(lock->l_export); } @@ -383,15 +381,14 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock, struct ptlrpc_request *req, int rc, const char *ast_type) { - struct ptlrpc_peer *peer = &req->rq_import->imp_connection->c_peer; - char str[PTL_NALFMT_SIZE]; + lnet_process_id_t peer = req->rq_import->imp_connection->c_peer; if (rc == -ETIMEDOUT || rc == -EINTR || rc == -ENOTCONN) { LASSERT(lock->l_export); if (lock->l_export->exp_libclient) { LDLM_DEBUG(lock, "%s AST to liblustre client (nid %s)" " timeout, just cancelling lock", ast_type, - ptlrpc_peernid2str(peer, str)); + libcfs_nid2str(peer.nid)); ldlm_lock_cancel(lock); rc = -ERESTART; } else { @@ -405,11 +402,11 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock, if (rc == -EINVAL) LDLM_DEBUG(lock, "client (nid %s) returned %d" " from %s AST - normal race", - ptlrpc_peernid2str(peer, str), + libcfs_nid2str(peer.nid), req->rq_repmsg->status, ast_type); else LDLM_ERROR(lock, "client (nid %s) returned %d " - "from %s AST", ptlrpc_peernid2str(peer, str), + "from %s AST", libcfs_nid2str(peer.nid), (req->rq_repmsg != NULL) ? req->rq_repmsg->status : 0, ast_type); ldlm_lock_cancel(lock); @@ -915,7 +912,7 @@ int ldlm_handle_cancel(struct ptlrpc_request *req) " from client %s id %s\n", dlm_req->lock_handle1.cookie, req->rq_export->exp_client_uuid.uuid, - req->rq_peerstr); + libcfs_id2str(req->rq_peer)); LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock " "(cookie "LPU64")", dlm_req->lock_handle1.cookie); @@ -1153,7 +1150,7 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) "export cookie "LPX64"; this is " "normal if this node rebooted with a lock held\n", req->rq_reqmsg->opc, - req->rq_peerstr, + libcfs_id2str(req->rq_peer), req->rq_reqmsg->handle.cookie); dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req), @@ -1289,7 +1286,7 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) struct ldlm_request *dlm_req; CERROR("operation %d from %s with bad export cookie "LPU64"\n", - req->rq_reqmsg->opc, req->rq_peerstr, + req->rq_reqmsg->opc, libcfs_id2str(req->rq_peer), req->rq_reqmsg->handle.cookie); dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req), @@ -1357,7 +1354,7 @@ static int ldlm_bl_thread_main(void *arg) char name[sizeof(current->comm)]; snprintf(name, sizeof(name) - 1, "ldlm_bl_%02d", bltd->bltd_num); - kportal_daemonize(name); + libcfs_daemonize(name); } SIGNAL_MASK_LOCK(current, flags); sigfillset(¤t->blocked); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 49038e2..90eed82 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -62,7 +62,7 @@ int ldlm_expired_completion_wait(void *data) ldlm_namespace_dump(D_DLMTRACE, lock->l_resource->lr_namespace); if (last_dump == 0) - portals_debug_dumplog(); + libcfs_debug_dumplog(); } RETURN(0); } @@ -693,12 +693,11 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) rc = ptlrpc_queue_wait(req); if (rc == ESTALE) { - char str[PTL_NALFMT_SIZE]; CERROR("client/server (nid %s) out of sync" " -- not fatal, flags %d\n", - ptlrpc_peernid2str(&req->rq_import-> - imp_connection->c_peer, str), -lock->l_flags); + libcfs_nid2str(req->rq_import-> + imp_connection->c_peer.nid), + lock->l_flags); } else if (rc == -ETIMEDOUT) { ptlrpc_req_finished(req); GOTO(restart, rc); diff --git a/lustre/liblustre/Makefile.am b/lustre/liblustre/Makefile.am index 6fac0b2..9b4829a 100644 --- a/lustre/liblustre/Makefile.am +++ b/lustre/liblustre/Makefile.am @@ -2,11 +2,13 @@ SUBDIRS = . tests AM_CPPFLAGS = $(HAVE_EFENCE) -I$(SYSIO)/include -D_LARGEFILE64_SOURCE=1 \ - $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals + $(LLCPPFLAGS) -I$(top_srcdir)/lnet/ulnds AM_CFLAGS = $(LLCFLAGS) AM_LIBS = $(LIBEFENCE) + + LUSTRE_LIBS = libllite.a \ $(top_builddir)/lustre/lov/liblov.a \ $(top_builddir)/lustre/obdecho/libobdecho.a \ @@ -16,14 +18,16 @@ LUSTRE_LIBS = libllite.a \ $(top_builddir)/lustre/obdclass/liblustreclass.a \ $(top_builddir)/lustre/lvfs/liblvfs.a -if !CRAY_PORTALS -PTL_LIBS = $(top_builddir)/portals/utils/libuptlctl.a \ - $(top_builddir)/portals/unals/libtcpnal.a \ - $(top_builddir)/portals/portals/libportals.a -else -PTL_LIBS = $(top_builddir)/portals/utils/libuptlctl.a \ - $(CRAY_PORTALS_LIBS)/libportals.a +LND_LIBS = +if BUILD_USOCKLND +LND_LIBS += $(top_builddir)/lnet/ulnds/socklnd/libsocklnd.a endif +if BUILD_PTLLND +LND_LIBS += $(top_builddir)/lnet/ulnds/ptllnd/libptllnd.a +endif + +LNET_LIBS = $(top_builddir)/lnet/utils/libuptlctl.a \ + $(top_builddir)/lnet/lnet/liblnet.a SYSIO_LIBS = $(SYSIO)/lib/libsysio.a @@ -52,8 +56,8 @@ libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \ liblustre_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \ llite_lib.h -liblustre.a : $(LUSTRE_LIBS) $(PTL_LIBS) $(SYSIO_LIBS) - sh $(srcdir)/genlib.sh "$(SYSIO)" "$(CRAY_PORTALS_LIBS)" "$(LIBS)" +liblustre.a : $(LUSTRE_LIBS) $(LND_LIBS) $(LNET_LIBS) $(SYSIO_LIBS) + sh $(srcdir)/genlib.sh "$(SYSIO)" "$(LIBS)" "$(LND_LIBS)" "$(PTHREAD_LIBS)" EXTRA_DIST = genlib.sh diff --git a/lustre/liblustre/genlib.sh b/lustre/liblustre/genlib.sh index d4c2bb3..b615afd 100755 --- a/lustre/liblustre/genlib.sh +++ b/lustre/liblustre/genlib.sh @@ -18,8 +18,9 @@ RANLIB=/usr/bin/ranlib CWD=`pwd` SYSIO=$1 -CRAY_PORTALS_LIBS=$2 -LIBS=$3 +LIBS=$2 +LND_LIBS=$3 +PTHREAD_LIBS=$4 if [ ! -f $SYSIO/lib/libsysio.a ]; then echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist" @@ -54,22 +55,6 @@ build_sysio_obj_list() { done } -# -# special treatment for libportals.a -# -cray_tmp=$CWD/cray_tmp_`date +%s` -rm -rf $cray_tmp -build_cray_portals_obj_list() { - _objs=`$AR -t $1` - mkdir -p $cray_tmp - cd $cray_tmp - $AR -x $1 - cd .. - for _lib in $_objs; do - ALL_OBJS=$ALL_OBJS"$cray_tmp/$_lib "; - done -} - # lustre components libs build_obj_list . libllite.a build_obj_list ../lov liblov.a @@ -80,16 +65,16 @@ build_obj_list ../ptlrpc libptlrpc.a build_obj_list ../obdclass liblustreclass.a build_obj_list ../lvfs liblvfs.a -# portals components libs -build_obj_list ../../portals/utils libuptlctl.a - -if [ "x$CRAY_PORTALS_LIBS" = "x" ]; then - build_obj_list ../../portals/unals libtcpnal.a - build_obj_list ../../portals/portals libportals.a -# if libportals is already in our LIBS we don't need to link against it here -elif $(echo "$LIBS" | grep -v -- "-lportals" >/dev/null) ; then - build_cray_portals_obj_list $CRAY_PORTALS_LIBS/libportals.a +# lnet components libs +build_obj_list ../../lnet/utils libuptlctl.a +build_obj_list ../../lnet/libcfs libcfs.a +if $(echo "$LND_LIBS" | grep "socklnd" >/dev/null) ; then + build_obj_list ../../lnet/ulnds/socklnd libsocklnd.a +fi +if $(echo "$LND_LIBS" | grep "ptllnd" >/dev/null) ; then + build_obj_list ../../lnet/ulnds/ptllnd libptllnd.a fi +build_obj_list ../../lnet/lnet liblnet.a # create static lib lsupport rm -f $CWD/liblsupport.a @@ -113,8 +98,7 @@ if test x$OS = xAIX; then gcc -shared -o $CWD/liblustre.so $ALL_OBJS -lpthread -Xlinker -bnoipath ../../libsyscall.so else $LD -shared -o $CWD/liblustre.so -init __liblustre_setup_ -fini __liblustre_cleanup_ \ - $ALL_OBJS -lcap -lpthread + $ALL_OBJS -lcap $PTHREAD_LIBS fi rm -rf $sysio_tmp -rm -rf $cray_tmp diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index 9c2e22b..10e3472 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -47,21 +47,18 @@ #define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE" #define ENV_LUSTRE_DEBUG_MASK "LIBLUSTRE_DEBUG_MASK" #define ENV_LUSTRE_DEBUG_SUBSYS "LIBLUSTRE_DEBUG_SUBSYS" -#define ENV_LUSTRE_NAL_NAME "LIBLUSTRE_NAL_NAME" /* both sys/queue.h (libsysio require it) and portals/lists.h have definition * of 'LIST_HEAD'. undef it to suppress warnings */ #undef LIST_HEAD -#include /* needed for parse_dump */ +#include /* needed for parse_dump */ #include "lutil.h" #include "llite_lib.h" static int lllib_init(void) { - liblustre_set_nal_nid(); - if (liblustre_init_current("liblustre") || init_obdclass() || init_lib_portals() || @@ -88,37 +85,23 @@ int liblustre_process_log(struct config_llog_instance *cfg, class_uuid_t uuid; struct obd_uuid mdc_uuid; struct llog_ctxt *ctxt; - ptl_nid_t nid = 0; - int nal, err, rc = 0; - char *nal_name; + lnet_nid_t nid = 0; + int err, rc = 0; ENTRY; generate_random_uuid(uuid); class_uuid_unparse(uuid, &mdc_uuid); - if (ptl_parse_nid(&nid, mdsnid)) { + nid = libcfs_str2nid(mdsnid); + if (nid == LNET_NID_ANY) { CERROR("Can't parse NID %s\n", mdsnid); RETURN(-EINVAL); } - nal_name = getenv(ENV_LUSTRE_NAL_NAME); - if (!nal_name) { -#if CRAY_PORTALS - nal_name = "cray_qk_nal"; -#else - nal_name = "tcp"; -#endif - } - nal = ptl_name2nal(nal_name); - if (nal <= 0) { - CERROR("Can't parse NAL %s\n", nal_name); - RETURN(-EINVAL); - } lustre_cfg_bufs_reset(&bufs, NULL); lustre_cfg_bufs_set_string(&bufs, 1, peer); lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs); lcfg->lcfg_nid = nid; - lcfg->lcfg_nal = nal; rc = class_process_config(lcfg); lustre_cfg_free(lcfg); if (rc < 0) @@ -263,8 +246,8 @@ int _sysio_lustre_init(void) #endif #if 0 - portal_debug = -1; - portal_subsystem_debug = -1; + libcfs_debug = -1; + libcfs_subsystem_debug = -1; #endif liblustre_init_random(); @@ -284,11 +267,11 @@ int _sysio_lustre_init(void) /* debug masks */ debug_mask = getenv(ENV_LUSTRE_DEBUG_MASK); if (debug_mask) - portal_debug = (unsigned int) strtol(debug_mask, NULL, 0); + libcfs_debug = (unsigned int) strtol(debug_mask, NULL, 0); debug_subsys = getenv(ENV_LUSTRE_DEBUG_SUBSYS); if (debug_subsys) - portal_subsystem_debug = + libcfs_subsystem_debug = (unsigned int) strtol(debug_subsys, NULL, 0); #ifndef INIT_SYSIO @@ -377,9 +360,9 @@ void __liblustre_cleanup_(void) * liblutre. this dilema lead to another hack in * libsysio/src/file_hack.c FIXME */ -#ifdef INIT_SYSIO _sysio_shutdown(); +#ifdef INIT_SYSIO cleanup_lib_portals(); - PtlFini(); + LNetFini(); #endif } diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index 349548b..a6371924 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -171,11 +171,8 @@ struct mount_option_s /* llite_lib.c */ void generate_random_uuid(unsigned char uuid_out[16]); -int liblustre_process_log(struct config_llog_instance *cfg, - char *mdsnid, - char *mdsname, - char *profile, - int allow_recov); +int liblustre_process_log(struct config_llog_instance *cfg, char *mdsnid, + char *mdsname, char *profile, int allow_recov); int ll_parse_mount_target(const char *target, char **mdsnid, char **mdsname, char **profile); diff --git a/lustre/liblustre/lutil.c b/lustre/liblustre/lutil.c index d432f50..a59ae84 100644 --- a/lustre/liblustre/lutil.c +++ b/lustre/liblustre/lutil.c @@ -48,22 +48,15 @@ #include "lutil.h" -#if CRAY_PORTALS -void portals_debug_dumplog(void){}; -#endif -unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL); -unsigned int portal_debug = 0; +unsigned int libcfs_subsystem_debug = ~0 - (S_LNET | S_LND); +unsigned int libcfs_debug = 0; struct task_struct *current; -ptl_handle_ni_t tcpnal_ni; -ptl_nid_t tcpnal_mynid; void *inter_module_get(char *arg) { - if (!strcmp(arg, "tcpnal_ni")) - return &tcpnal_ni; - else if (!strcmp(arg, "ldlm_cli_cancel_unused")) + if (!strcmp(arg, "ldlm_cli_cancel_unused")) return ldlm_cli_cancel_unused; else if (!strcmp(arg, "ldlm_namespace_cleanup")) return ldlm_namespace_cleanup; @@ -73,52 +66,6 @@ void *inter_module_get(char *arg) return NULL; } -char *portals_nid2str(int nal, ptl_nid_t nid, char *str) -{ - if (nid == PTL_NID_ANY) { - snprintf(str, PTL_NALFMT_SIZE, "%s", "PTL_NID_ANY"); - return str; - } - - switch(NALID_FROM_IFACE(nal)){ -#if !CRAY_PORTALS - case TCPNAL: - /* userspace NAL */ - case IIBNAL: - case OPENIBNAL: - case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u", - (__u32)(nid >> 32), HIPQUAD(nid)); - break; - case QSWNAL: - case GMNAL: - snprintf(str, PTL_NALFMT_SIZE, "%u:%u", - (__u32)(nid >> 32), (__u32)nid); - break; -#else - case PTL_IFACE_SS: - case PTL_IFACE_SS_ACCEL: - snprintf(str, PTL_NALFMT_SIZE, "%u", (__u32)nid); - break; -#endif - default: - snprintf(str, PTL_NALFMT_SIZE, "?%x? %llx", - nal, (long long)nid); - break; - } - return str; -} - -char *portals_id2str(int nal, ptl_process_id_t id, char *str) -{ - int len; - - portals_nid2str(nal, id.nid, str); - len = strlen(str); - snprintf(str + len, PTL_NALFMT_SIZE - len, ",%u", id.pid); - return str; -} - /* * random number generator stuff */ @@ -232,38 +179,6 @@ static void init_capability(int *res) #endif } -void liblustre_set_nal_nid() -{ -#ifdef HAVE_GETHOSTBYNAME - pid_t pid; - uint32_t ip; - struct in_addr in; - - /* need to setup mynid before tcpnal initialization */ - /* a meaningful nid could help debugging */ - ip = get_ipv4_addr(); - if (ip == 0) - get_random_bytes(&ip, sizeof(ip)); - pid = getpid() & 0xffffffff; - tcpnal_mynid = ((uint64_t)pid << 32) | ip; - - in.s_addr = htonl(ip); - CDEBUG(D_RPCTRACE | D_VFSTRACE, "TCPNAL NID: %016Lx (%u:%s)\n", - (long long)tcpnal_mynid, pid, inet_ntoa(in)); -#else - pid_t pid; - uint32_t ip; - - ip = _my_pnid; - if (ip & 0xFF) - ip <<= 8; - pid = getpid() & 0xFF; - tcpnal_mynid = ip | pid; - CDEBUG(D_RPCTRACE | D_VFSTRACE, "NAL NID: %08x (%u)\n", - tcpnal_mynid, pid); -#endif -} - int in_group_p(gid_t gid) { int i; @@ -317,13 +232,12 @@ void generate_random_uuid(unsigned char uuid_out[16]) int init_lib_portals() { - int max_interfaces; int rc; ENTRY; - rc = PtlInit(&max_interfaces); - if (rc != PTL_OK) { - CERROR("PtlInit failed: %d\n", rc); + rc = LNetInit(); + if (rc != 0) { + CERROR("LNetInit failed: %d\n", rc); RETURN (-ENXIO); } RETURN(0); @@ -334,10 +248,3 @@ void cleanup_lib_portals() { ptlrpc_exit_portals(); } - -int -libcfs_nal_cmd(struct portals_cfg *pcfg) -{ - /* handle portals command if we want */ - return 0; -} diff --git a/lustre/liblustre/lutil.h b/lustre/liblustre/lutil.h index dc67a23..dc5e6e2 100644 --- a/lustre/liblustre/lutil.h +++ b/lustre/liblustre/lutil.h @@ -28,7 +28,6 @@ void liblustre_init_random(void); int liblustre_init_current(char *comm); -void liblustre_set_nal_nid(void); int init_lib_portals(void); void cleanup_lib_portals(void); diff --git a/lustre/liblustre/tests/Makefile.am b/lustre/liblustre/tests/Makefile.am index 616fea4..1362aaa 100644 --- a/lustre/liblustre/tests/Makefile.am +++ b/lustre/liblustre/tests/Makefile.am @@ -1,6 +1,6 @@ ## Liblustre excecutables & libraries Makefile -AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals +AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir)/lnet/ulnds AM_CFLAGS = $(LLCFLAGS) AM_LIBS = $(LIBEFENCE) $(LIBREADLINE) diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c index 12816f1..2f4e093 100644 --- a/lustre/liblustre/tests/echo_test.c +++ b/lustre/liblustre/tests/echo_test.c @@ -33,14 +33,6 @@ extern int class_handle_ioctl(unsigned int cmd, unsigned long arg); -struct pingcli_args { - ptl_nid_t mynid; - ptl_nid_t nid; - ptl_pid_t port; - int count; - int size; -}; - static int liblustre_ioctl(int dev_id, unsigned int opc, void *ptr) { int rc = -EINVAL; @@ -68,11 +60,11 @@ static int connect_echo_client(void) { struct lustre_cfg *lcfg; struct lustre_cfg_bufs bufs; - ptl_nid_t nid; + lnet_nid_t nid; char *peer = "ECHO_PEER_NID"; class_uuid_t osc_uuid, echo_uuid; struct obd_uuid osc_uuid_str, echo_uuid_str; - int nal, err; + int err; ENTRY; generate_random_uuid(osc_uuid); @@ -80,22 +72,17 @@ static int connect_echo_client(void) generate_random_uuid(echo_uuid); class_uuid_unparse(echo_uuid, &echo_uuid_str); - if (ptl_parse_nid(&nid, echo_server_nid)) { + nid = libcfs_str2nid(echo_server_nid); + if (nid == LNET_NID_ANY) { CERROR("Can't parse NID %s\n", echo_server_nid); RETURN(-EINVAL); } - nal = ptl_name2nal("tcp"); - if (nal <= 0) { - CERROR("Can't parse NAL tcp\n"); - RETURN(-EINVAL); - } - /* add uuid */ + /* add uuid */ lustre_cfg_bufs_reset(&bufs, NULL); lustre_cfg_bufs_set_string(&bufs, 1, peer); lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs); lcfg->lcfg_nid = nid; - lcfg->lcfg_nal = nal; err = class_process_config(lcfg); lustre_cfg_free(lcfg); if (err < 0) { @@ -103,8 +90,9 @@ static int connect_echo_client(void) RETURN(-EINVAL); } - /* attach osc */ - lustre_cfg_bufs_reset(&bufs, LUSTRE_OSC_NAME); + /* attach osc */ + lustre_cfg_bufs_reset(&bufs, osc_dev_name); + lustre_cfg_bufs_set_string(&bufs, 1, LUSTRE_OSC_NAME); lustre_cfg_bufs_set_string(&bufs, 2, osc_uuid_str.uuid); lcfg = lustre_cfg_new(LCFG_ATTACH, &bufs); err = class_process_config(lcfg); @@ -238,11 +226,10 @@ int main(int argc, char **argv) return 1; } - portal_debug = 0; - portal_subsystem_debug = 0; + libcfs_debug = 0; + libcfs_subsystem_debug = 0; liblustre_init_random(); - liblustre_set_nal_nid(); if (liblustre_init_current(argv[0]) || init_obdclass() || init_lib_portals() || diff --git a/lustre/liblustre/tests/recovery_small.c b/lustre/liblustre/tests/recovery_small.c index fcd6a7e..6af93f1 100644 --- a/lustre/liblustre/tests/recovery_small.c +++ b/lustre/liblustre/tests/recovery_small.c @@ -296,8 +296,8 @@ void t7() LEAVE(); } -extern int portal_debug; -extern int portal_subsystem_debug; +extern int libcfs_debug; +extern int libcfs_subsystem_debug; extern void __liblustre_setup_(void); extern void __liblustre_cleanup_(void); diff --git a/lustre/liblustre/tests/replay_ost_single.c b/lustre/liblustre/tests/replay_ost_single.c index 2897807..418ba94 100644 --- a/lustre/liblustre/tests/replay_ost_single.c +++ b/lustre/liblustre/tests/replay_ost_single.c @@ -242,8 +242,8 @@ void t4() } } -extern int portal_debug; -extern int portal_subsystem_debug; +extern int libcfs_debug; +extern int libcfs_subsystem_debug; extern void __liblustre_setup_(void); extern void __liblustre_cleanup_(void); diff --git a/lustre/liblustre/tests/replay_single.c b/lustre/liblustre/tests/replay_single.c index a10d684..17155f8 100644 --- a/lustre/liblustre/tests/replay_single.c +++ b/lustre/liblustre/tests/replay_single.c @@ -309,8 +309,8 @@ void t10() t_unlink(path2); } -extern int portal_debug; -extern int portal_subsystem_debug; +extern int libcfs_debug; +extern int libcfs_subsystem_debug; extern void __liblustre_setup_(void); extern void __liblustre_cleanup_(void); diff --git a/lustre/liblustre/tests/sanity.c b/lustre/liblustre/tests/sanity.c index d52c9f0..5ee1d98 100644 --- a/lustre/liblustre/tests/sanity.c +++ b/lustre/liblustre/tests/sanity.c @@ -1138,7 +1138,7 @@ int main(int argc, char * const argv[]) free(buf_alloc); - printf("liblustre is about shutdown\n"); + printf("liblustre is about to shutdown\n"); __liblustre_cleanup_(); printf("complete successfully\n"); diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 7ee9a58..e41c118 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -165,7 +165,7 @@ restart: CERROR("called on root (?) dentry=%p, inode=%p " "ino=%lu\n", dentry, inode, inode->i_ino); lustre_dump_dentry(dentry, 1); - portals_debug_dumpstack(NULL); + libcfs_debug_dumpstack(NULL); } else if (d_mountpoint(dentry)) { /* For mountpoints we skip removal of the dentry which happens solely because we have a lock on it diff --git a/lustre/llite/llite_close.c b/lustre/llite/llite_close.c index c218fa4..91b77f8 100644 --- a/lustre/llite/llite_close.c +++ b/lustre/llite/llite_close.c @@ -200,7 +200,7 @@ static int ll_close_thread(void *arg) char name[sizeof(current->comm)]; unsigned long flags; snprintf(name, sizeof(name) - 1, "ll_close"); - kportal_daemonize(name); + libcfs_daemonize(name); SIGNAL_MASK_LOCK(current, flags); sigfillset(¤t->blocked); RECALC_SIGPENDING; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index f668c62..554e229 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -457,6 +457,11 @@ void ll_options(char *options, char **ost, char **mdc, int *flags) *flags |= tmp; continue; } + tmp = ll_set_opt("nouser_xattr", this_char, LL_SBI_USER_XATTR); + if (tmp) { + *flags &= ~tmp; + continue; + } } EXIT; } @@ -516,7 +521,6 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, { struct lustre_cfg *lcfg = NULL; struct lustre_cfg_bufs bufs; - struct portals_cfg pcfg; char * peer = "MDS_PEER_UUID"; struct obd_device *obd; struct lustre_handle mdc_conn = {0, }; @@ -536,42 +540,16 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, class_uuid_unparse(uuid, &mdc_uuid); CDEBUG(D_HA, "generated uuid: %s\n", mdc_uuid.uuid); - if (lmd->lmd_local_nid) { - PCFG_INIT(pcfg, NAL_CMD_REGISTER_MYNID); - pcfg.pcfg_nal = lmd->lmd_nal; - pcfg.pcfg_nid = lmd->lmd_local_nid; - rc = libcfs_nal_cmd(&pcfg); - if (rc < 0) - GOTO(out, rc); - } - - if (lmd->lmd_nal == SOCKNAL || - lmd->lmd_nal == OPENIBNAL || - lmd->lmd_nal == IIBNAL || - lmd->lmd_nal == VIBNAL || - lmd->lmd_nal == RANAL) { - PCFG_INIT(pcfg, NAL_CMD_ADD_PEER); - pcfg.pcfg_nal = lmd->lmd_nal; - pcfg.pcfg_nid = lmd->lmd_server_nid; - LASSERT(pcfg.pcfg_nid); - pcfg.pcfg_id = lmd->lmd_server_ipaddr; - pcfg.pcfg_misc = lmd->lmd_port; - rc = libcfs_nal_cmd(&pcfg); - if (rc < 0) - GOTO(out, rc); - } - lustre_cfg_bufs_reset(&bufs, name); lustre_cfg_bufs_set_string(&bufs, 1, peer); lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs); - lcfg->lcfg_nal = lmd->lmd_nal; - lcfg->lcfg_nid = lmd->lmd_server_nid; - LASSERT(lcfg->lcfg_nal); + lcfg->lcfg_nid = lmd->lmd_nid; + LASSERT(lcfg->lcfg_nid != LNET_NID_ANY); rc = class_process_config(lcfg); lustre_cfg_free(lcfg); if (rc < 0) - GOTO(out_del_conn, rc); + GOTO(out, err); lustre_cfg_bufs_reset(&bufs, name); lustre_cfg_bufs_set_string(&bufs, 1, LUSTRE_MDC_NAME); @@ -590,8 +568,12 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, lcfg = lustre_cfg_new(LCFG_SETUP, &bufs); rc = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (rc < 0) + if (rc < 0) { + LCONSOLE_ERROR("I couldn't establish a connection with the MDS." + " Check that the MDS host NID is correct and the" + " networks are up.\n"); GOTO(out_detach, rc); + } obd = class_name2obd(name); if (obd == NULL) @@ -667,20 +649,6 @@ out_del_uuid: if (err) CERROR("del MDC UUID failed: rc = %d\n", err); -out_del_conn: - if (lmd->lmd_nal == SOCKNAL || - lmd->lmd_nal == OPENIBNAL || - lmd->lmd_nal == IIBNAL || - lmd->lmd_nal == VIBNAL || - lmd->lmd_nal == RANAL) { - PCFG_INIT(pcfg, NAL_CMD_DEL_PEER); - pcfg.pcfg_nal = lmd->lmd_nal; - pcfg.pcfg_nid = lmd->lmd_server_nid; - pcfg.pcfg_flags = 1; /* single_share */ - err = libcfs_nal_cmd(&pcfg); - if (err) - CERROR("del MDS peer failed: rc = %d\n", err); - } out: RETURN(rc); @@ -741,11 +709,9 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) cfg.cfg_instance = ll_instance; cfg.cfg_uuid = sbi->ll_sb_uuid; - cfg.cfg_local_nid = lmd->lmd_local_nid; err = lustre_process_log(lmd, lmd->lmd_profile, &cfg, 0); if (err < 0) { CERROR("Unable to process log: %s\n", lmd->lmd_profile); - GOTO(out_free, err); } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index dbd78ae..f52d543 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -578,6 +578,7 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, /* Ensure that this close's handle is fixed up during replay. */ LASSERT(och != NULL); + LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); mod = och->och_mod; if (likely(mod != NULL)) { mod->mod_close_req = req; @@ -585,8 +586,8 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, /* FIXME This should be an ASSERT, but until we figure out why it can be poisoned here, give a reasonable return. bug 6155 */ - CERROR("LBUG POISONED req %p!\n", mod->mod_open_req); - ptlrpc_free_req(req); + CERROR("LBUG POISONED open %p!\n", mod->mod_open_req); + ptlrpc_req_finished(req); GOTO(out, rc = -EIO); } DEBUG_REQ(D_HA, mod->mod_open_req, "matched open"); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index cc3cf0e..b809ca7 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -400,7 +400,7 @@ static int mds_disconnect(struct obd_export *exp) struct ptlrpc_reply_state *rs = list_entry(exp->exp_outstanding_replies.next, struct ptlrpc_reply_state, rs_exp_list); - struct ptlrpc_service *svc = rs->rs_srv_ni->sni_service; + struct ptlrpc_service *svc = rs->rs_service; spin_lock(&svc->srv_lock); list_del_init(&rs->rs_exp_list); @@ -1114,7 +1114,7 @@ int mds_handle(struct ptlrpc_request *req) if (req->rq_export == NULL) { CERROR("operation %d on unconnected MDS from %s\n", req->rq_reqmsg->opc, - req->rq_peerstr); + libcfs_id2str(req->rq_peer)); req->rq_status = -ENOTCONN; GOTO(out, rc = -ENOTCONN); } @@ -1541,9 +1541,12 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) mds_quota_setup(mds); + /* Wait for mds_postrecov trying to clear orphans until 9439 is fixed */ + obd->obd_async_recov = 0; rc = mds_postsetup(obd); if (rc) GOTO(err_fs, rc); + obd->obd_async_recov = 0; lprocfs_init_vars(mds, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); @@ -1653,7 +1656,6 @@ err_llog: int mds_postrecov(struct obd_device *obd) { - struct mds_obd *mds = &obd->u.mds; int rc, item = 0; ENTRY; @@ -1679,36 +1681,11 @@ int mds_postrecov(struct obd_device *obd) item = rc; } - rc = obd_set_info(mds->mds_osc_exp, strlen("mds_conn"), "mds_conn", - 0, NULL); - if (rc) - GOTO(out, rc); - - rc = llog_connect(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT), - obd->u.mds.mds_lov_desc.ld_tgt_count, - NULL, NULL, NULL); - if (rc) { - CERROR("%s: failed at llog_origin_connect: %d\n", - obd->obd_name, rc); - GOTO(out, rc); - } - - /* remove the orphaned precreated objects */ - rc = mds_lov_clearorphans(mds, NULL /* all OSTs */); - if (rc) { - GOTO(err_llog, rc); - } + /* Does anyone need this to be synchronous ever? */ + mds_lov_start_synchronize(obd, NULL, obd->obd_async_recov); out: RETURN(rc < 0 ? rc : item); - -err_llog: - /* cleanup all llogging subsystems */ - rc = obd_llog_finish(obd, mds->mds_lov_desc.ld_tgt_count); - if (rc) - CERROR("%s: failed to cleanup llogging subsystems\n", - obd->obd_name); - goto out; } int mds_lov_clean(struct obd_device *obd) diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 1aa5967..78ce7cb 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -154,6 +154,8 @@ int mds_lov_write_objids(struct obd_device *obd); void mds_lov_update_objids(struct obd_device *obd, obd_id *ids); int mds_lov_set_nextid(struct obd_device *obd); int mds_lov_clearorphans(struct mds_obd *mds, struct obd_uuid *ost_uuid); +int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid, + int nonblock); int mds_post_mds_lovconf(struct obd_device *obd); int mds_notify(struct obd_device *obd, struct obd_device *watched, int active); int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode, @@ -207,7 +209,7 @@ int lustre_dquot_init(void); void lustre_dquot_exit(void); int dqacq_handler(struct obd_device *obd, struct qunit_data *qdata, int opc); void mds_adjust_qunit(struct obd_device *obd, uid_t cuid, gid_t cgid, - uid_t puid, gid_t pgid, int rc); + uid_t puid, gid_t pgid, int rc); int init_admin_quotafiles(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl); int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl); diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index c780cb1..2047fdb 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -364,7 +364,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, LASSERT(body != NULL); /* previously verified & swabbed by caller */ -#if CRAY_PORTALS +#if CRAY_XT3 ucred->luc_fsuid = req->rq_uid; #else ucred->luc_fsuid = body->fsuid; @@ -382,7 +382,7 @@ int mds_init_ucred(struct lvfs_ucred *ucred, struct ptlrpc_request *req, return rc; } -#if CRAY_PORTALS +#if CRAY_XT3 if (ucred->luc_uce) ucred->luc_fsgid = ucred->luc_uce->ue_primary; #endif diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index b9f28fd..94a0da7 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -364,8 +364,6 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (data->ioc_type == LUSTRE_CFG_TYPE) { rec.lrh_type = OBD_CFG_REC; - } else if (data->ioc_type == PORTALS_CFG_TYPE) { - rec.lrh_type = PTL_CFG_REC; } else { CERROR("unknown cfg record type:%d \n", data->ioc_type); RETURN(-EINVAL); @@ -491,22 +489,13 @@ struct mds_lov_sync_info { struct obd_uuid *mlsi_uuid; /* target to sync */ }; -int mds_lov_synchronize(void *data) +static int __mds_lov_syncronize(void *data) { struct mds_lov_sync_info *mlsi = data; struct obd_device *obd; struct obd_uuid *uuid; - unsigned long flags; int rc = 0; - - lock_kernel(); - ptlrpc_daemonize(); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - unlock_kernel(); + ENTRY; obd = mlsi->mlsi_obd; uuid = mlsi->mlsi_uuid; @@ -514,7 +503,6 @@ int mds_lov_synchronize(void *data) OBD_FREE(mlsi, sizeof(*mlsi)); LASSERT(obd != NULL); - LASSERT(uuid != NULL); rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"), "mds_conn", 0, uuid); @@ -531,7 +519,8 @@ int mds_lov_synchronize(void *data) } CWARN("MDS %s: %s now active, resetting orphans\n", - obd->obd_name, uuid->uuid); + obd->obd_name, uuid ? (char *)uuid->uuid : "All OSC's"); + rc = mds_lov_clearorphans(&obd->u.mds, uuid); if (rc != 0) { CERROR("%s: failed at mds_lov_clearorphans: %d\n", @@ -544,7 +533,24 @@ out: RETURN(rc); } -int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid) +int mds_lov_synchronize(void *data) +{ + unsigned long flags; + + lock_kernel(); + ptlrpc_daemonize(); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + unlock_kernel(); + + return (__mds_lov_syncronize(data)); +} + +int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid, + int nonblock) { struct mds_lov_sync_info *mlsi; int rc; @@ -561,14 +567,20 @@ int mds_lov_start_synchronize(struct obd_device *obd, struct obd_uuid *uuid) /* We need to lock the mds in place for our new thread context. */ class_export_get(obd->obd_self_export); - rc = kernel_thread(mds_lov_synchronize, mlsi, CLONE_VM | CLONE_FILES); - if (rc < 0) - CERROR("%s: error starting mds_lov_synchronize: %d\n", - obd->obd_name, rc); - else { - CDEBUG(D_HA, "%s: mds_lov_synchronize thread: %d\n", - obd->obd_name, rc); - rc = 0; + if (nonblock) { + /* Syncronize in the background */ + rc = kernel_thread(mds_lov_synchronize, mlsi, CLONE_VM | CLONE_FILES); + if (rc < 0) { + CERROR("%s: error starting mds_lov_synchronize: %d\n", + obd->obd_name, rc); + class_export_put(obd->obd_self_export); + } else { + CDEBUG(D_HA, "%s: mds_lov_synchronize thread: %d\n", + obd->obd_name, rc); + rc = 0; + } + } else { + rc = __mds_lov_syncronize((void *)mlsi); } RETURN(rc); @@ -595,8 +607,7 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, int active) obd->obd_name, uuid->uuid); } else { LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); - - rc = mds_lov_start_synchronize(obd, uuid); + rc = mds_lov_start_synchronize(obd, uuid, 1); } RETURN(rc); } diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 5cce652..49cd470 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -293,7 +293,6 @@ void mds_steal_ack_locks(struct ptlrpc_request *req) struct ptlrpc_reply_state *oldrep; struct ptlrpc_service *svc; unsigned long flags; - char str[PTL_NALFMT_SIZE]; int i; /* CAVEAT EMPTOR: spinlock order */ @@ -309,7 +308,7 @@ void mds_steal_ack_locks(struct ptlrpc_request *req) "new %d old %d\n", req->rq_xid, req->rq_reqmsg->opc, oldrep->rs_msg.opc); - svc = oldrep->rs_srv_ni->sni_service; + svc = oldrep->rs_service; spin_lock (&svc->srv_lock); list_del_init (&oldrep->rs_exp_list); @@ -318,7 +317,7 @@ void mds_steal_ack_locks(struct ptlrpc_request *req) " o%d NID %s\n", oldrep->rs_nlocks, oldrep, oldrep->rs_xid, oldrep->rs_transno, oldrep->rs_msg.opc, - ptlrpc_peernid2str(&exp->exp_connection->c_peer, str)); + libcfs_nid2str(exp->exp_connection->c_peer.nid)); for (i = 0; i < oldrep->rs_nlocks; i++) ptlrpc_save_lock(req, @@ -2103,7 +2102,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, int rc; ENTRY; -#if CRAY_PORTALS +#if CRAY_XT3 rec->ur_uc.luc_fsuid = req->rq_uid; #endif @@ -2122,7 +2121,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, /* checked by unpacker */ LASSERT(rec->ur_opcode < REINT_MAX && reinters[rec->ur_opcode] != NULL); -#if CRAY_PORTALS +#if CRAY_XT3 if (rec->ur_uc.luc_uce) rec->ur_uc.luc_fsgid = rec->ur_uc.luc_uce->ue_primary; #endif diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index d4916b1..baeba10 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -75,7 +75,7 @@ #ifndef __KERNEL__ /* liblustre workaround */ -atomic_t portal_kmemory = {0}; +atomic_t libcfs_kmemory = {0}; #endif struct obd_device obd_dev[MAX_OBD_DEVICES]; @@ -175,22 +175,24 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) { char *buf = NULL; struct obd_ioctl_data *data; - struct portals_debug_ioctl_data *debug_data; + struct libcfs_debug_ioctl_data *debug_data; struct obd_device *obd = NULL; int err = 0, len = 0; ENTRY; +#ifdef __KERNEL__ if (current->fsuid != 0) RETURN(err = -EACCES); +#endif if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */ RETURN(err = -ENOTTY); /* only for debugging */ - if (cmd == PTL_IOC_DEBUG_MASK) { - debug_data = (struct portals_debug_ioctl_data*)arg; - portal_subsystem_debug = debug_data->subs; - portal_debug = debug_data->debug; + if (cmd == LIBCFS_IOC_DEBUG_MASK) { + debug_data = (struct libcfs_debug_ioctl_data*)arg; + libcfs_subsystem_debug = debug_data->subs; + libcfs_debug = debug_data->debug; return 0; } @@ -296,11 +298,8 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) case OBD_IOC_CLOSE_UUID: { - ptl_nid_t peer_nid; - __u32 peer_nal; - CDEBUG(D_IOCTL, "closing all connections to uuid %s\n", + CDEBUG(D_IOCTL, "closing all connections to uuid %s (NOOP)\n", data->ioc_inlbuf1); - lustre_uuid_to_peer(data->ioc_inlbuf1, &peer_nal, &peer_nid); GOTO(out, err = 0); } @@ -464,7 +463,7 @@ static int obd_proc_read_health(char *page, char **start, off_t off, int rc = 0, i; *eof = 1; - if (portals_catastrophe) + if (libcfs_catastrophe) rc += snprintf(page + rc, count - rc, "LBUG\n"); spin_lock(&obd_dev_lock); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 9e51686..79c730a 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -29,20 +29,12 @@ #ifdef __KERNEL__ #include /* for request_module() */ #include -#include -#include -#include -#include -#include -#include -#include #else #include -#include +#endif #include #include -#include -#endif +#include #include #include @@ -1014,7 +1006,7 @@ void class_fail_export(struct obd_export *exp) exp, exp->exp_client_uuid.uuid); if (obd_dump_on_timeout) - portals_debug_dumplog(); + libcfs_debug_dumplog(); /* Most callers into obd_disconnect are removing their own reference * (request, for example) in addition to the one from the hash table. @@ -1029,6 +1021,15 @@ void class_fail_export(struct obd_export *exp) } EXPORT_SYMBOL(class_fail_export); +char *obd_export_nid2str(struct obd_export *exp) +{ + if (exp->exp_connection != NULL) + return libcfs_nid2str(exp->exp_connection->c_peer.nid); + + return "(no nid)"; +} +EXPORT_SYMBOL(obd_export_nid2str); + /* Ping evictor thread */ #ifdef __KERNEL__ #define PET_READY 1 @@ -1070,7 +1071,7 @@ static int ping_evictor_main(void *arg) ENTRY; lock_kernel(); - kportal_daemonize("ping_evictor"); + libcfs_daemonize("ping_evictor"); SIGNAL_MASK_LOCK(current, flags); sigfillset(¤t->blocked); RECALC_SIGPENDING; @@ -1108,16 +1109,13 @@ static int ping_evictor_main(void *arg) struct obd_export,exp_obd_chain_timed); if (expire_time > exp->exp_last_request_time) { - char ipbuf[PTL_NALFMT_SIZE]; - class_export_get(exp); spin_unlock(&obd->obd_dev_lock); - LCONSOLE_WARN("%s: haven't heard from %s in %ld" " seconds. I think it's dead, " "and I am evicting it.\n", obd->obd_name, - obd_export_nid2str(exp, ipbuf), + obd_export_nid2str(exp), (long)(CURRENT_SECONDS - exp->exp_last_request_time)); @@ -1181,7 +1179,6 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) { struct obd_export *oldest_exp; time_t oldest_time; - char str[PTL_NALFMT_SIZE]; ENTRY; @@ -1237,9 +1234,8 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) exp->exp_obd->obd_eviction_timer = CURRENT_SECONDS + 3 * PING_INTERVAL; CDEBUG(D_HA, "%s: Think about evicting %s from %ld\n", - exp->exp_obd->obd_name, - ptlrpc_peernid2str(&exp->exp_connection->c_peer, - str), oldest_time); + exp->exp_obd->obd_name, obd_export_nid2str(exp), + oldest_time); } } else { if (CURRENT_SECONDS > (exp->exp_obd->obd_eviction_timer + @@ -1256,23 +1252,6 @@ void class_update_export_timer(struct obd_export *exp, time_t extra_delay) } EXPORT_SYMBOL(class_update_export_timer); -char *obd_export_nid2str(struct obd_export *exp, char *ipbuf) -{ - struct ptlrpc_peer *peer; - - peer = exp->exp_connection ? &exp->exp_connection->c_peer : NULL; - - if (peer && peer->peer_ni) { - portals_nid2str(peer->peer_ni->pni_number, - peer->peer_id.nid, - ipbuf); - } else { - snprintf(ipbuf, PTL_NALFMT_SIZE, "(no nid)"); - } - - return ipbuf; -} - #define EVICT_BATCH 32 int obd_export_evict_by_nid(struct obd_device *obd, char *nid) { @@ -1283,13 +1262,10 @@ int obd_export_evict_by_nid(struct obd_device *obd, char *nid) search_again: spin_lock(&obd->obd_dev_lock); list_for_each(p, &obd->obd_exports) { - char ipbuf[PTL_NALFMT_SIZE]; - doomed_exp[num_to_evict] = list_entry(p, struct obd_export, exp_obd_chain); - obd_export_nid2str(doomed_exp[num_to_evict], ipbuf); - - if (strcmp(ipbuf, nid) == 0) { + if (strcmp(obd_export_nid2str(doomed_exp[num_to_evict]), nid) + == 0) { class_export_get(doomed_exp[num_to_evict]); if (++num_to_evict == EVICT_BATCH) break; diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c index 830eb68..ce3f0a1 100644 --- a/lustre/obdclass/llog_ioctl.c +++ b/lustre/obdclass/llog_ioctl.c @@ -128,7 +128,7 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec, case MDS_UNLINK_REC: case MDS_SETATTR_REC: case OBD_CFG_REC: - case PTL_CFG_REC: + case PTL_CFG_REC: /* obsolete */ case LLOG_HDR_MAGIC: { l = snprintf(out, remains, "[index]: %05d [type]: " "%02x [len]: %04d ok\n", diff --git a/lustre/obdclass/llog_swab.c b/lustre/obdclass/llog_swab.c index 813209f..ff8688f 100644 --- a/lustre/obdclass/llog_swab.c +++ b/lustre/obdclass/llog_swab.c @@ -126,7 +126,7 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) } case OBD_CFG_REC: - case PTL_CFG_REC: + case PTL_CFG_REC: /* obsolete */ /* these are swabbed as they are consumed */ break; @@ -201,74 +201,12 @@ void lustre_swab_llog_hdr (struct llog_log_hdr *h) } EXPORT_SYMBOL(lustre_swab_llog_hdr); -#define PRINT_PCFG32(x) CDEBUG(D_OTHER, "\tpcfg->pcfg_"#x": %#x\n", pcfg->pcfg_##x) -#define PRINT_PCFG64(x) CDEBUG(D_OTHER, "\tpcfg->pcfg_"#x": "LPX64"\n", pcfg->pcfg_##x) - -static void print_portals_cfg(struct portals_cfg *pcfg) -{ - ENTRY; - - if (!(portal_debug & D_OTHER)) /* don't loop on nothing */ - return; - CDEBUG(D_OTHER, "portals_cfg: %p\n", pcfg); - PRINT_PCFG32(version); - PRINT_PCFG32(command); - - PRINT_PCFG32(nal); - PRINT_PCFG32(flags); - - PRINT_PCFG32(gw_nal); - PRINT_PCFG64(nid); - PRINT_PCFG64(nid2); - PRINT_PCFG64(nid3); - PRINT_PCFG32(id); - PRINT_PCFG32(misc); - PRINT_PCFG32(fd); - PRINT_PCFG32(count); - PRINT_PCFG32(size); - PRINT_PCFG32(wait); - - PRINT_PCFG32(plen1); - PRINT_PCFG32(plen2); - - EXIT; -} - -void lustre_swab_portals_cfg(struct portals_cfg *pcfg) -{ - ENTRY; - - __swab32s(&pcfg->pcfg_version); - __swab32s(&pcfg->pcfg_command); - - __swab32s(&pcfg->pcfg_nal); - __swab32s(&pcfg->pcfg_flags); - - __swab32s(&pcfg->pcfg_gw_nal); - __swab64s(&pcfg->pcfg_nid); - __swab64s(&pcfg->pcfg_nid2); - __swab64s(&pcfg->pcfg_nid3); - __swab32s(&pcfg->pcfg_id); - __swab32s(&pcfg->pcfg_misc); - __swab32s(&pcfg->pcfg_fd); - __swab32s(&pcfg->pcfg_count); - __swab32s(&pcfg->pcfg_size); - __swab32s(&pcfg->pcfg_wait); - - __swab32s(&pcfg->pcfg_plen1); - __swab32s(&pcfg->pcfg_plen2); - - print_portals_cfg(pcfg); - EXIT; -} -EXPORT_SYMBOL(lustre_swab_portals_cfg); - static void print_lustre_cfg(struct lustre_cfg *lcfg) { int i; ENTRY; - if (!(portal_debug & D_OTHER)) /* don't loop on nothing */ + if (!(libcfs_debug & D_OTHER)) /* don't loop on nothing */ return; CDEBUG(D_OTHER, "lustre_cfg: %p\n", lcfg); CDEBUG(D_OTHER, "\tlcfg->lcfg_version: %#x\n", lcfg->lcfg_version); @@ -276,8 +214,7 @@ static void print_lustre_cfg(struct lustre_cfg *lcfg) CDEBUG(D_OTHER, "\tlcfg->lcfg_command: %#x\n", lcfg->lcfg_command); CDEBUG(D_OTHER, "\tlcfg->lcfg_num: %#x\n", lcfg->lcfg_num); CDEBUG(D_OTHER, "\tlcfg->lcfg_flags: %#x\n", lcfg->lcfg_flags); - CDEBUG(D_OTHER, "\tlcfg->lcfg_nid: "LPX64"\n", lcfg->lcfg_nid); - CDEBUG(D_OTHER, "\tlcfg->lcfg_nal: %#x\n", lcfg->lcfg_nal); + CDEBUG(D_OTHER, "\tlcfg->lcfg_nid: %s\n", libcfs_nid2str(lcfg->lcfg_nid)); CDEBUG(D_OTHER, "\tlcfg->lcfg_bufcount: %d\n", lcfg->lcfg_bufcount); if (lcfg->lcfg_bufcount < LUSTRE_CFG_MAX_BUFCOUNT) @@ -306,7 +243,6 @@ void lustre_swab_lustre_cfg(struct lustre_cfg *lcfg) __swab32s(&lcfg->lcfg_num); __swab32s(&lcfg->lcfg_flags); __swab64s(&lcfg->lcfg_nid); - __swab32s(&lcfg->lcfg_nal); __swab32s(&lcfg->lcfg_bufcount); for (i = 0; i < lcfg->lcfg_bufcount && i < LUSTRE_CFG_MAX_BUFCOUNT; i++) diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c index c4f87c0..a1d89e1 100644 --- a/lustre/obdclass/lustre_peer.c +++ b/lustre/obdclass/lustre_peer.c @@ -41,10 +41,9 @@ #include struct uuid_nid_data { - struct list_head head; - ptl_nid_t nid; - char *uuid; - __u32 nal; + struct list_head un_list; + lnet_nid_t un_nid; + char *un_uuid; }; /* FIXME: This should probably become more elegant than a global linked list */ @@ -63,7 +62,7 @@ void class_exit_uuidlist(void) class_del_uuid(NULL); } -int lustre_uuid_to_peer(char *uuid, __u32 *peer_nal, ptl_nid_t *peer_nid) +int lustre_uuid_to_peer(char *uuid, lnet_nid_t *peer_nid, int index) { struct list_head *tmp; @@ -71,11 +70,11 @@ int lustre_uuid_to_peer(char *uuid, __u32 *peer_nal, ptl_nid_t *peer_nid) list_for_each(tmp, &g_uuid_list) { struct uuid_nid_data *data = - list_entry(tmp, struct uuid_nid_data, head); + list_entry(tmp, struct uuid_nid_data, un_list); - if (strcmp(data->uuid, uuid) == 0) { - *peer_nid = data->nid; - *peer_nal = data->nal; + if (!strcmp(data->un_uuid, uuid) && + index-- == 0) { + *peer_nid = data->un_nid; spin_unlock (&g_uuid_lock); return 0; @@ -83,16 +82,16 @@ int lustre_uuid_to_peer(char *uuid, __u32 *peer_nal, ptl_nid_t *peer_nid) } spin_unlock (&g_uuid_lock); - return -1; + return -ENOENT; } -int class_add_uuid(char *uuid, __u64 nid, __u32 nal) +int class_add_uuid(char *uuid, __u64 nid) { struct uuid_nid_data *data; int rc; int nob = strnlen (uuid, PAGE_SIZE) + 1; - LASSERT(nal != 0); + LASSERT(nid != 0); /* valid newconfig NID is never zero */ if (nob > PAGE_SIZE) return -EINVAL; @@ -102,20 +101,19 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal) if (data == NULL) return -ENOMEM; - OBD_ALLOC(data->uuid, nob); + OBD_ALLOC(data->un_uuid, nob); if (data == NULL) { OBD_FREE(data, sizeof(*data)); return -ENOMEM; } - CDEBUG(D_INFO, "add uuid %s "LPX64" %x\n", uuid, nid, nal); - memcpy(data->uuid, uuid, nob); - data->nid = nid; - data->nal = nal; + CDEBUG(D_INFO, "add uuid %s %s\n", uuid, libcfs_nid2str(nid)); + memcpy(data->un_uuid, uuid, nob); + data->un_nid = nid; spin_lock (&g_uuid_lock); - list_add(&data->head, &g_uuid_list); + list_add(&data->un_list, &g_uuid_list); spin_unlock (&g_uuid_lock); @@ -135,11 +133,11 @@ int class_del_uuid (char *uuid) spin_lock (&g_uuid_lock); list_for_each_safe(tmp, n, &g_uuid_list) { - data = list_entry(tmp, struct uuid_nid_data, head); + data = list_entry(tmp, struct uuid_nid_data, un_list); - if (uuid == NULL || strcmp(data->uuid, uuid) == 0) { - list_del (&data->head); - list_add (&data->head, &deathrow); + if (uuid == NULL || strcmp(data->un_uuid, uuid) == 0) { + list_del (&data->un_list); + list_add (&data->un_list, &deathrow); if (uuid) break; } @@ -154,11 +152,11 @@ int class_del_uuid (char *uuid) } do { - data = list_entry(deathrow.next, struct uuid_nid_data, head); + data = list_entry(deathrow.next, struct uuid_nid_data, un_list); - list_del (&data->head); + list_del (&data->un_list); - OBD_FREE(data->uuid, strlen(data->uuid) + 1); + OBD_FREE(data->un_uuid, strlen(data->un_uuid) + 1); OBD_FREE(data, sizeof(*data)); } while (!list_empty (&deathrow)); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index e4ef852..e8cda72 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -269,7 +269,6 @@ int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg) static void dump_exports(struct obd_device *obd) { struct obd_export *exp, *n; - char ipbuf[PTL_NALFMT_SIZE]; list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) { struct ptlrpc_reply_state *rs; @@ -285,7 +284,7 @@ static void dump_exports(struct obd_device *obd) CDEBUG(D_IOCTL, "%s: %p %s %s %d %d %d: %p %s\n", obd->obd_name, exp, exp->exp_client_uuid.uuid, - obd_export_nid2str(exp, ipbuf), + obd_export_nid2str(exp), atomic_read(&exp->exp_refcount), exp->exp_failed, nreplies, first_reply, nreplies > 3 ? "..." : ""); @@ -553,7 +552,6 @@ void class_del_profile(char *prof) int class_process_config(struct lustre_cfg *lcfg) { struct obd_device *obd; - char nidstr[PTL_NALFMT_SIZE]; int err; LASSERT(lcfg && !IS_ERR(lcfg)); @@ -567,13 +565,10 @@ int class_process_config(struct lustre_cfg *lcfg) } case LCFG_ADD_UUID: { CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid "LPX64 - " (%s), nal %x\n", lustre_cfg_string(lcfg, 1), - lcfg->lcfg_nid, - portals_nid2str(lcfg->lcfg_nal, lcfg->lcfg_nid, nidstr), - lcfg->lcfg_nal); + " (%s)\n", lustre_cfg_string(lcfg, 1), + lcfg->lcfg_nid, libcfs_nid2str(lcfg->lcfg_nid)); - err = class_add_uuid(lustre_cfg_string(lcfg, 1), lcfg->lcfg_nid, - lcfg->lcfg_nal); + err = class_add_uuid(lustre_cfg_string(lcfg, 1), lcfg->lcfg_nid); GOTO(out, err); } case LCFG_DEL_UUID: { @@ -716,8 +711,23 @@ static int class_config_llog_handler(struct llog_handle * handle, lcfg_new->lcfg_num = lcfg->lcfg_num; lcfg_new->lcfg_flags = lcfg->lcfg_flags; - lcfg_new->lcfg_nid = lcfg->lcfg_nid; - lcfg_new->lcfg_nal = lcfg->lcfg_nal; + + /* XXX Hack to try to remain binary compatible with + * pre-newconfig logs */ + if (lcfg->lcfg_nal != 0 && /* pre-newconfig log? */ + (lcfg->lcfg_nid >> 32) == 0) { + __u32 addr = (__u32)(lcfg->lcfg_nid & 0xffffffff); + + lcfg_new->lcfg_nid = + LNET_MKNID(LNET_MKNET(lcfg->lcfg_nal, 0), addr); + CWARN("Converted pre-newconfig NAL %d NID %x to %s\n", + lcfg->lcfg_nal, addr, + libcfs_nid2str(lcfg_new->lcfg_nid)); + } else { + lcfg_new->lcfg_nid = lcfg->lcfg_nid; + } + + lcfg_new->lcfg_nal = 0; /* illegal value for obsolete field */ rc = class_process_config(lcfg_new); lustre_cfg_free(lcfg_new); @@ -727,26 +737,7 @@ static int class_config_llog_handler(struct llog_handle * handle, break; } case PTL_CFG_REC: { - struct portals_cfg *pcfg = (struct portals_cfg *)cfg_buf; - if (pcfg->pcfg_version != PORTALS_CFG_VERSION) { - if (pcfg->pcfg_version == __swab32(PORTALS_CFG_VERSION)) { - CDEBUG(D_OTHER, "swabbing portals_cfg %p\n", - pcfg); - lustre_swab_portals_cfg(pcfg); - } else { - CERROR("Unknown portals_cfg version: %#x " - "(expecting %#x)\n", - pcfg->pcfg_version, - PORTALS_CFG_VERSION); - RETURN(-EINVAL); - } - } - if (pcfg->pcfg_command ==NAL_CMD_REGISTER_MYNID && - cfg->cfg_local_nid != PTL_NID_ANY) { - pcfg->pcfg_nid = cfg->cfg_local_nid; - } - - rc = libcfs_nal_cmd(pcfg); + CWARN("Ignoring obsolete portals config\n"); break; } default: @@ -807,43 +798,18 @@ int class_config_dump_handler(struct llog_handle * handle, if (lcfg->lcfg_flags) CDEBUG(D_INFO, " flags: %x\n", lcfg->lcfg_flags); if (lcfg->lcfg_nid) - CDEBUG(D_INFO, " nid: "LPX64"\n", - lcfg->lcfg_nid); + CDEBUG(D_INFO, " nid: %s\n", + libcfs_nid2str(lcfg->lcfg_nid)); if (lcfg->lcfg_nal) - CDEBUG(D_INFO, " nal: %x\n", lcfg->lcfg_nal); + CDEBUG(D_INFO, " nal: %x (obsolete)\n", lcfg->lcfg_nal); if (lcfg->lcfg_num) - CDEBUG(D_INFO, " nal: %x\n", lcfg->lcfg_num); + CDEBUG(D_INFO, " num: %x\n", lcfg->lcfg_num); for (i = 1; i < lcfg->lcfg_bufcount; i++) if (LUSTRE_CFG_BUFLEN(lcfg, i) > 0) CDEBUG(D_INFO, " inlbuf%d: %s\n", i, lustre_cfg_string(lcfg, i)); } else if (rec->lrh_type == PTL_CFG_REC) { - struct portals_cfg *pcfg = (struct portals_cfg *)cfg_buf; - CDEBUG(D_INFO, "pcfg command: %d\n", pcfg->pcfg_command); - if (pcfg->pcfg_nal) - CDEBUG(D_INFO, " nal: %x\n", - pcfg->pcfg_nal); - if (pcfg->pcfg_gw_nal) - CDEBUG(D_INFO, " gw_nal: %x\n", - pcfg->pcfg_gw_nal); - if (pcfg->pcfg_nid) - CDEBUG(D_INFO, " nid: "LPX64"\n", - pcfg->pcfg_nid); - if (pcfg->pcfg_nid2) - CDEBUG(D_INFO, " nid: "LPX64"\n", - pcfg->pcfg_nid2); - if (pcfg->pcfg_nid3) - CDEBUG(D_INFO, " nid: "LPX64"\n", - pcfg->pcfg_nid3); - if (pcfg->pcfg_misc) - CDEBUG(D_INFO, " nid: %d\n", - pcfg->pcfg_misc); - if (pcfg->pcfg_id) - CDEBUG(D_INFO, " id: %x\n", - pcfg->pcfg_id); - if (pcfg->pcfg_flags) - CDEBUG(D_INFO, " flags: %x\n", - pcfg->pcfg_flags); + CDEBUG(D_INFO, "Obsolete pcfg command\n"); } else { CERROR("unhandled lrh_type: %#x\n", rec->lrh_type); rc = -EINVAL; diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index cf563db..0b56fb7 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -82,7 +82,7 @@ static int echo_disconnect(struct obd_export *exp) struct ptlrpc_reply_state *rs = list_entry(exp->exp_outstanding_replies.next, struct ptlrpc_reply_state, rs_exp_list); - struct ptlrpc_service *svc = rs->rs_srv_ni->sni_service; + struct ptlrpc_service *svc = rs->rs_service; spin_lock(&svc->srv_lock); list_del_init(&rs->rs_exp_list); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index dc8adf1..0cc99dc 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2633,7 +2633,6 @@ static int filter_set_info(struct obd_export *exp, __u32 keylen, { struct obd_device *obd; struct llog_ctxt *ctxt; - char str[PTL_NALFMT_SIZE]; int rc = 0; ENTRY; @@ -2648,7 +2647,7 @@ static int filter_set_info(struct obd_export *exp, __u32 keylen, RETURN(-EINVAL); CWARN("%s: received MDS connection from %s\n", obd->obd_name, - ptlrpc_peernid2str(&exp->exp_connection->c_peer, str)); + obd_export_nid2str(exp)); obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie; /* setup llog imports */ diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index e9ea9dc..7f7d6d6 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -908,6 +908,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, obd_count page_count, struct brw_page *pga, int rc) { + const lnet_process_id_t *peer = + &req->rq_import->imp_connection->c_peer; struct client_obd *cli = &req->rq_import->imp_obd->u.cli; struct ost_body *body; __u32 client_cksum = 0; @@ -970,32 +972,28 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, handle_short_read(rc, page_count, pga); if (unlikely(oa->o_valid & OBD_MD_FLCKSUM)) { - struct ptlrpc_peer *peer = - &req->rq_import->imp_connection->c_peer; static int cksum_counter; __u32 cksum = osc_checksum_bulk(rc, page_count, pga); __u32 server_cksum = oa->o_cksum; - char str[PTL_NALFMT_SIZE]; - - ptlrpc_peernid2str(peer, str); if (server_cksum == ~0 && rc > 0) { CERROR("Protocol error: server %s set the 'checksum' " "bit, but didn't send a checksum. Not fatal, " - "but please tell CFS.\n", str); + "but please tell CFS.\n", + libcfs_nid2str(peer->nid)); RETURN(0); } cksum_counter++; + if (server_cksum != cksum) { - CERROR("Bad checksum: server %x != client %x, server " - "NID "LPX64" (%s)\n", server_cksum, cksum, - peer->peer_id.nid, str); + CERROR("Bad checksum from %s: server %x != client %x\n", + libcfs_nid2str(peer->nid), server_cksum, cksum); cksum_counter = 0; oa->o_cksum = cksum; } else if ((cksum_counter & (-cksum_counter)) == cksum_counter){ - CWARN("Checksum %u from "LPX64" (%s) OK: %x\n", - cksum_counter, peer->peer_id.nid, str, cksum); + CWARN("Checksum %u from %s OK: %x\n", + cksum_counter, libcfs_nid2str(peer->nid), cksum); } CDEBUG(D_PAGE, "checksum %x confirmed\n", cksum); } else if (unlikely(client_cksum)) { @@ -1003,9 +1001,8 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, cksum_missed++; if ((cksum_missed & (-cksum_missed)) == cksum_missed) - CERROR("Request checksum %u from "LPX64", no reply\n", - cksum_missed, - req->rq_import->imp_connection->c_peer.peer_id.nid); + CERROR("Checksum %u requested from %s but not sent\n", + cksum_missed, libcfs_nid2str(peer->nid)); } RETURN(0); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index f96dec0..cc54ac0 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -345,21 +345,12 @@ static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc) int i; for (i = 0; i < desc->bd_iov_count; i++) { -#ifdef CRAY_PORTALS - char *ptr = desc->bd_iov[i].iov_base; - int len = desc->bd_iov[i].iov_len; -#else struct page *page = desc->bd_iov[i].kiov_page; int off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK; char *ptr = kmap(page) + off; int len = desc->bd_iov[i].kiov_len; -#endif cksum = crc32_le(cksum, ptr, len); -#ifndef CRAY_PORTALS - kunmap(page); - LL_CDEBUG_PAGE(D_PAGE, page, "off %d checksum %x\n", off,cksum); -#endif } return cksum; @@ -693,14 +684,14 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) "evicting %s@%s id %s\n", req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, - req->rq_peerstr); + libcfs_id2str(req->rq_peer)); class_fail_export(req->rq_export); } else { CERROR("ignoring bulk IO comms error: " "client reconnected %s@%s id %s\n", req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, - req->rq_peerstr); + libcfs_id2str(req->rq_peer)); } } @@ -860,16 +851,21 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) cksum_counter++; if (client_cksum != cksum) { CERROR("Bad checksum: client %x, server %x id %s\n", - client_cksum, cksum, req->rq_peerstr); + client_cksum, cksum, + libcfs_id2str(req->rq_peer)); cksum_counter = 0; repbody->oa.o_cksum = cksum; repbody->oa.o_valid |= OBD_MD_FLCKSUM; } else if ((cksum_counter & (-cksum_counter)) == cksum_counter) { - CWARN("Checksum %u from %s: %x OK\n", - cksum_counter, req->rq_peerstr, cksum); + CWARN("Checksum %u from %s: %x OK\n", cksum_counter, + libcfs_id2str(req->rq_peer), cksum); } else { - CDEBUG(D_PAGE, "checksum %x confirmed\n", cksum); + cksum_counter++; + if ((cksum_counter & (-cksum_counter)) == cksum_counter) + CWARN("Checksum %u from %s: %x OK\n", + cksum_counter, + libcfs_id2str(req->rq_peer), cksum); } } @@ -922,14 +918,14 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) req->rq_export->exp_obd->obd_name, req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, - req->rq_peerstr); + libcfs_id2str(req->rq_peer)); class_fail_export(req->rq_export); } else { CERROR("ignoring bulk IO comms error: " "client reconnected %s@%s id %s\n", req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, - req->rq_peerstr); + libcfs_id2str(req->rq_peer)); } } RETURN(rc); @@ -1124,7 +1120,7 @@ static int ost_handle(struct ptlrpc_request *req) if (req->rq_export == NULL) { CDEBUG(D_HA,"operation %d on unconnected OST from %s\n", - req->rq_reqmsg->opc, req->rq_peerstr); + req->rq_reqmsg->opc, libcfs_id2str(req->rq_peer)); req->rq_status = -ENOTCONN; GOTO(out, rc = -ENOTCONN); } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 53ec1a5..a99cb48 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -49,16 +49,17 @@ void ptlrpc_init_client(int req_portal, int rep_portal, char *name, struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) { struct ptlrpc_connection *c; - struct ptlrpc_peer peer; - int err; + lnet_nid_t self; + lnet_process_id_t peer; + int err; - err = ptlrpc_uuid_to_peer(uuid, &peer); + err = ptlrpc_uuid_to_peer(uuid, &peer, &self); if (err != 0) { CERROR("cannot find peer %s!\n", uuid->uuid); return NULL; } - c = ptlrpc_get_connection(&peer, uuid); + c = ptlrpc_get_connection(peer, self, uuid); if (c) { memcpy(c->c_remote_uuid.uuid, uuid->uuid, sizeof(c->c_remote_uuid.uuid)); @@ -72,16 +73,18 @@ struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, struct obd_uuid *uuid) { - struct ptlrpc_peer peer; - int err; + lnet_nid_t self; + lnet_process_id_t peer; + int err; - err = ptlrpc_uuid_to_peer(uuid, &peer); + err = ptlrpc_uuid_to_peer(uuid, &peer, &self); if (err != 0) { CERROR("cannot find peer %s!\n", uuid->uuid); return; } - memcpy(&conn->c_peer, &peer, sizeof (peer)); + conn->c_peer = peer; + conn->c_self = self; return; } @@ -97,7 +100,7 @@ static inline struct ptlrpc_bulk_desc *new_bulk(int npages, int type, int portal init_waitqueue_head(&desc->bd_waitq); desc->bd_max_iov = npages; desc->bd_iov_count = 0; - desc->bd_md_h = PTL_INVALID_HANDLE; + desc->bd_md_h = LNET_INVALID_HANDLE; desc->bd_portal = portal; desc->bd_type = type; @@ -636,7 +639,7 @@ static int after_reply(struct ptlrpc_request *req) spin_unlock_irqrestore(&imp->imp_lock, flags); req->rq_commit_cb(req); spin_lock_irqsave(&imp->imp_lock, flags); - } + } if (req->rq_transno > imp->imp_max_transno) imp->imp_max_transno = req->rq_transno; @@ -654,7 +657,6 @@ static int after_reply(struct ptlrpc_request *req) static int ptlrpc_send_new_req(struct ptlrpc_request *req) { - char str[PTL_NALFMT_SIZE]; struct obd_import *imp; unsigned long flags; int rc; @@ -698,12 +700,11 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) spin_unlock_irqrestore(&imp->imp_lock, flags); req->rq_reqmsg->status = current->pid; - CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc" - " %s:%s:%d:"LPU64":%s:%s:%d\n", current->comm, + CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc" + " %s:%s:%d:"LPU64":%s:%d\n", current->comm, imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, - imp->imp_connection->c_peer.peer_ni->pni_name, - ptlrpc_peernid2str(&imp->imp_connection->c_peer, str), + libcfs_nid2str(imp->imp_connection->c_peer.nid), req->rq_reqmsg->opc); rc = ptl_send_rpc(req); @@ -717,7 +718,6 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) int ptlrpc_check_set(struct ptlrpc_request_set *set) { - char str[PTL_NALFMT_SIZE]; unsigned long flags; struct list_head *tmp; int force_timer_recalc = 0; @@ -917,12 +917,11 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) req->rq_status); } - CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:" - "opc %s:%s:%d:"LPU64":%s:%s:%d\n", current->comm, + CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:" + "opc %s:%s:%d:"LPU64":%s:%d\n", current->comm, imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, - imp->imp_connection->c_peer.peer_ni->pni_name, - ptlrpc_peernid2str(&imp->imp_connection->c_peer, str), + libcfs_nid2str(imp->imp_connection->c_peer.nid), req->rq_reqmsg->opc); set->set_remaining--; @@ -951,7 +950,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) ptlrpc_unregister_reply (req); if (obd_dump_on_timeout) - portals_debug_dumplog(); + libcfs_debug_dumplog(); if (req->rq_bulk != NULL) ptlrpc_unregister_bulk (req); @@ -1263,7 +1262,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) if (!ptlrpc_client_receiving_reply(request)) return; - PtlMDUnlink (request->rq_reply_md_h); + LNetMDUnlink (request->rq_reply_md_h); /* We have to l_wait_event() whatever the result, to give liblustre * a chance to run reply_in_callback() */ @@ -1457,7 +1456,6 @@ void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, int ptlrpc_queue_wait(struct ptlrpc_request *req) { - char str[PTL_NALFMT_SIZE]; int rc = 0; int brc; struct l_wait_info lwi; @@ -1473,12 +1471,11 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) /* for distributed debugging */ req->rq_reqmsg->status = current->pid; LASSERT(imp->imp_obd != NULL); - CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc " - "%s:%s:%d:"LPU64":%s:%s:%d\n", current->comm, + CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc " + "%s:%s:%d:"LPU64":%s:%d\n", current->comm, imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, - imp->imp_connection->c_peer.peer_ni->pni_name, - ptlrpc_peernid2str(&imp->imp_connection->c_peer, str), + libcfs_nid2str(imp->imp_connection->c_peer.nid), req->rq_reqmsg->opc); /* Mark phase here for a little debug help */ @@ -1570,12 +1567,11 @@ restart: l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req), &lwi); DEBUG_REQ(D_NET, req, "-- done sleeping"); - CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:opc " - "%s:%s:%d:"LPU64":%s:%s:%d\n", current->comm, + CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:opc " + "%s:%s:%d:"LPU64":%s:%d\n", current->comm, imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, - imp->imp_connection->c_peer.peer_ni->pni_name, - ptlrpc_peernid2str(&imp->imp_connection->c_peer, str), + libcfs_nid2str(imp->imp_connection->c_peer.nid), req->rq_reqmsg->opc); spin_lock_irqsave(&imp->imp_lock, flags); diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index da301a4..fc55e25 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -40,79 +40,95 @@ static struct list_head conn_unused_list; void ptlrpc_dump_connections(void) { - char str[PTL_NALFMT_SIZE]; struct list_head *tmp; struct ptlrpc_connection *c; ENTRY; list_for_each(tmp, &conn_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - CERROR("Connection %p/%s has refcount %d (nid=%s on %s)\n", + CERROR("Connection %p/%s has refcount %d (nid=%s->%s)\n", c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount), - ptlrpc_peernid2str(&c->c_peer, str), - c->c_peer.peer_ni->pni_name); + libcfs_nid2str(c->c_self), + libcfs_nid2str(c->c_peer.nid)); } EXIT; } -struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, - struct obd_uuid *uuid) +struct ptlrpc_connection* +ptlrpc_lookup_conn_locked (lnet_process_id_t peer) { - char str[PTL_NALFMT_SIZE]; - struct list_head *tmp, *pos; struct ptlrpc_connection *c; - ENTRY; - - - CDEBUG(D_INFO, "peer is %s on %s\n", - ptlrpc_id2str(peer, str), peer->peer_ni->pni_name); + struct list_head *tmp; - spin_lock(&conn_lock); list_for_each(tmp, &conn_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 && - peer->peer_ni == c->c_peer.peer_ni) { - ptlrpc_connection_addref(c); - GOTO(out, c); - } + + if (peer.nid == c->c_peer.nid && + peer.pid == c->c_peer.pid) + return ptlrpc_connection_addref(c); } - list_for_each_safe(tmp, pos, &conn_unused_list) { + list_for_each(tmp, &conn_unused_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 && - peer->peer_ni == c->c_peer.peer_ni) { - ptlrpc_connection_addref(c); + + if (peer.nid == c->c_peer.nid && + peer.pid == c->c_peer.pid) { list_del(&c->c_link); list_add(&c->c_link, &conn_list); - GOTO(out, c); + return ptlrpc_connection_addref(c); } } - /* FIXME: this should be a slab once we can validate slab addresses - * without OOPSing */ - OBD_ALLOC_GFP(c, sizeof(*c), GFP_ATOMIC); - + return NULL; +} + + +struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer, + lnet_nid_t self, struct obd_uuid *uuid) +{ + struct ptlrpc_connection *c; + struct ptlrpc_connection *c2; + ENTRY; + + CDEBUG(D_INFO, "self %s peer %s\n", + libcfs_nid2str(self), libcfs_id2str(peer)); + + spin_lock(&conn_lock); + + c = ptlrpc_lookup_conn_locked(peer); + + spin_unlock(&conn_lock); + + if (c != NULL) + RETURN (c); + + OBD_ALLOC(c, sizeof(*c)); if (c == NULL) - GOTO(out, c); + RETURN (NULL); - if (uuid && uuid->uuid) /* XXX ???? */ + atomic_set(&c->c_refcount, 1); + c->c_peer = peer; + c->c_self = self; + if (uuid != NULL) obd_str2uuid(&c->c_remote_uuid, uuid->uuid); - atomic_set(&c->c_refcount, 0); - memcpy(&c->c_peer, peer, sizeof(c->c_peer)); - - ptlrpc_connection_addref(c); - list_add(&c->c_link, &conn_list); + spin_lock(&conn_lock); - EXIT; - out: + c2 = ptlrpc_lookup_conn_locked(peer); + if (c2 == NULL) + list_add(&c->c_link, &conn_list); + spin_unlock(&conn_lock); - return c; + + if (c2 == NULL) + RETURN (c); + + OBD_FREE(c, sizeof(*c)); + RETURN (c2); } int ptlrpc_put_connection(struct ptlrpc_connection *c) { - char str[PTL_NALFMT_SIZE]; int rc = 0; ENTRY; @@ -121,10 +137,9 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c) RETURN(0); } - CDEBUG (D_INFO, "connection=%p refcount %d to %s on %s\n", + CDEBUG (D_INFO, "connection=%p refcount %d to %s\n", c, atomic_read(&c->c_refcount) - 1, - ptlrpc_peernid2str(&c->c_peer, str), - c->c_peer.peer_ni->pni_name); + libcfs_nid2str(c->c_peer.nid)); if (atomic_dec_and_test(&c->c_refcount)) { spin_lock(&conn_lock); @@ -142,13 +157,11 @@ int ptlrpc_put_connection(struct ptlrpc_connection *c) struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c) { - char str[PTL_NALFMT_SIZE]; ENTRY; atomic_inc(&c->c_refcount); - CDEBUG (D_INFO, "connection=%p refcount %d to %s on %s\n", + CDEBUG (D_INFO, "connection=%p refcount %d to %s\n", c, atomic_read(&c->c_refcount), - ptlrpc_peernid2str(&c->c_peer, str), - c->c_peer.peer_ni->pni_name); + libcfs_nid2str(c->c_peer.nid)); RETURN(c); } @@ -161,7 +174,6 @@ void ptlrpc_init_connection(void) void ptlrpc_cleanup_connection(void) { - char str[PTL_NALFMT_SIZE]; struct list_head *tmp, *pos; struct ptlrpc_connection *c; @@ -173,10 +185,9 @@ void ptlrpc_cleanup_connection(void) } list_for_each_safe(tmp, pos, &conn_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - CERROR("Connection %p/%s has refcount %d (nid=%s on %s)\n", + CERROR("Connection %p/%s has refcount %d (nid=%s)\n", c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount), - ptlrpc_peernid2str(&c->c_peer, str), - c->c_peer.peer_ni->pni_name); + libcfs_nid2str(c->c_peer.nid)); list_del(&c->c_link); OBD_FREE(c, sizeof(*c)); } diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 21f451b..da6f8ad 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -34,34 +34,26 @@ #include #include "ptlrpc_internal.h" -#if !defined(__KERNEL__) && CRAY_PORTALS -/* forward ref in events.c */ -static void cray_portals_callback(ptl_event_t *ev); -#endif - +lnet_handle_eq_t ptlrpc_eq_h; -struct ptlrpc_ni ptlrpc_interfaces[8]; -int ptlrpc_ninterfaces; - -/* +/* * Client's outgoing request callback */ -void request_out_callback(ptl_event_t *ev) +void request_out_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_request *req = cbid->cbid_arg; unsigned long flags; ENTRY; - LASSERT (ev->type == PTL_EVENT_SEND_END || - ev->type == PTL_EVENT_UNLINK); + LASSERT (ev->type == LNET_EVENT_SEND || + ev->type == LNET_EVENT_UNLINK); LASSERT (ev->unlinked); - DEBUG_REQ((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR, req, - "type %d, status %d", ev->type, ev->ni_fail_type); + DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req, + "type %d, status %d", ev->type, ev->status); - if (ev->type == PTL_EVENT_UNLINK || - ev->ni_fail_type != PTL_NI_OK) { + if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { /* Failed send: make it seem like the reply timed out, just * like failing sends in client.c does currently... */ @@ -69,7 +61,7 @@ void request_out_callback(ptl_event_t *ev) spin_lock_irqsave(&req->rq_lock, flags); req->rq_net_err = 1; spin_unlock_irqrestore(&req->rq_lock, flags); - + ptlrpc_wake_client_req(req); } @@ -81,30 +73,29 @@ void request_out_callback(ptl_event_t *ev) /* * Client's incoming reply callback */ -void reply_in_callback(ptl_event_t *ev) +void reply_in_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_request *req = cbid->cbid_arg; unsigned long flags; ENTRY; - LASSERT (ev->type == PTL_EVENT_PUT_END || - ev->type == PTL_EVENT_UNLINK); + LASSERT (ev->type == LNET_EVENT_PUT || + ev->type == LNET_EVENT_UNLINK); LASSERT (ev->unlinked); LASSERT (ev->md.start == req->rq_repmsg); LASSERT (ev->offset == 0); LASSERT (ev->mlength <= req->rq_replen); - - DEBUG_REQ((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR, req, - "type %d, status %d", ev->type, ev->ni_fail_type); + + DEBUG_REQ((ev->status == 0) ? D_NET : D_ERROR, req, + "type %d, status %d", ev->type, ev->status); spin_lock_irqsave (&req->rq_lock, flags); LASSERT (req->rq_receiving_reply); req->rq_receiving_reply = 0; - if (ev->type == PTL_EVENT_PUT_END && - ev->ni_fail_type == PTL_NI_OK) { + if (ev->type == LNET_EVENT_PUT && ev->status == 0) { req->rq_replied = 1; req->rq_nob_received = ev->mlength; } @@ -117,34 +108,33 @@ void reply_in_callback(ptl_event_t *ev) EXIT; } -/* +/* * Client's bulk has been written/read */ -void client_bulk_callback (ptl_event_t *ev) +void client_bulk_callback (lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_bulk_desc *desc = cbid->cbid_arg; unsigned long flags; ENTRY; - LASSERT ((desc->bd_type == BULK_PUT_SINK && - ev->type == PTL_EVENT_PUT_END) || + LASSERT ((desc->bd_type == BULK_PUT_SINK && + ev->type == LNET_EVENT_PUT) || (desc->bd_type == BULK_GET_SOURCE && - ev->type == PTL_EVENT_GET_END) || - ev->type == PTL_EVENT_UNLINK); + ev->type == LNET_EVENT_GET) || + ev->type == LNET_EVENT_UNLINK); LASSERT (ev->unlinked); - CDEBUG((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR, - "event type %d, status %d, desc %p\n", - ev->type, ev->ni_fail_type, desc); + CDEBUG((ev->status == 0) ? D_NET : D_ERROR, + "event type %d, status %d, desc %p\n", + ev->type, ev->status, desc); spin_lock_irqsave (&desc->bd_lock, flags); LASSERT(desc->bd_network_rw); desc->bd_network_rw = 0; - if (ev->type != PTL_EVENT_UNLINK && - ev->ni_fail_type == PTL_NI_OK) { + if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) { desc->bd_success = 1; desc->bd_nob_transferred = ev->mlength; } @@ -157,29 +147,27 @@ void client_bulk_callback (ptl_event_t *ev) EXIT; } -/* +/* * Server's incoming request callback */ -void request_in_callback(ptl_event_t *ev) +void request_in_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg; - struct ptlrpc_srv_ni *srv_ni = rqbd->rqbd_srv_ni; - struct ptlrpc_service *service = srv_ni->sni_service; + struct ptlrpc_service *service = rqbd->rqbd_service; struct ptlrpc_request *req; - char str[PTL_NALFMT_SIZE]; unsigned long flags; ENTRY; - LASSERT (ev->type == PTL_EVENT_PUT_END || - ev->type == PTL_EVENT_UNLINK); + LASSERT (ev->type == LNET_EVENT_PUT || + ev->type == LNET_EVENT_UNLINK); LASSERT ((char *)ev->md.start >= rqbd->rqbd_buffer); LASSERT ((char *)ev->md.start + ev->offset + ev->mlength <= rqbd->rqbd_buffer + service->srv_buf_size); - CDEBUG((ev->ni_fail_type == PTL_OK) ? D_NET : D_ERROR, - "event type %d, status %d, service %s\n", - ev->type, ev->ni_fail_type, service->srv_name); + CDEBUG((ev->status == 0) ? D_NET : D_ERROR, + "event type %d, status %d, service %s\n", + ev->type, ev->status, service->srv_name); if (ev->unlinked) { /* If this is the last request message to fit in the @@ -190,8 +178,8 @@ void request_in_callback(ptl_event_t *ev) req = &rqbd->rqbd_req; memset(req, 0, sizeof (*req)); } else { - LASSERT (ev->type == PTL_EVENT_PUT_END); - if (ev->ni_fail_type != PTL_NI_OK) { + LASSERT (ev->type == LNET_EVENT_PUT); + if (ev->status != 0) { /* We moaned above already... */ return; } @@ -199,9 +187,8 @@ void request_in_callback(ptl_event_t *ev) if (req == NULL) { CERROR("Can't allocate incoming request descriptor: " "Dropping %s RPC from %s\n", - service->srv_name, - portals_id2str(srv_ni->sni_ni->pni_number, - ev->initiator, str)); + service->srv_name, + libcfs_id2str(ev->initiator)); return; } } @@ -211,16 +198,14 @@ void request_in_callback(ptl_event_t *ev) * size to non-zero if this was a successful receive. */ req->rq_xid = ev->match_bits; req->rq_reqmsg = ev->md.start + ev->offset; - if (ev->type == PTL_EVENT_PUT_END && - ev->ni_fail_type == PTL_NI_OK) + if (ev->type == LNET_EVENT_PUT && ev->status == 0) req->rq_reqlen = ev->mlength; do_gettimeofday(&req->rq_arrival_time); - req->rq_peer.peer_id = ev->initiator; - req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni; - ptlrpc_id2str(&req->rq_peer, req->rq_peerstr); + req->rq_peer = ev->initiator; + req->rq_self = ev->target.nid; req->rq_rqbd = rqbd; req->rq_phase = RQ_PHASE_NEW; -#if CRAY_PORTALS +#if CRAY_XT3 req->rq_uid = ev->uid; #endif @@ -230,16 +215,16 @@ void request_in_callback(ptl_event_t *ev) list_add_tail(&req->rq_history_list, &service->srv_request_history); if (ev->unlinked) { - srv_ni->sni_nrqbd_receiving--; - if (ev->type != PTL_EVENT_UNLINK && - srv_ni->sni_nrqbd_receiving == 0) { - /* This service is off-air on this interface because - * all its request buffers are busy. Portals will - * start dropping incoming requests until more buffers - * get posted. NB don't moan if it's because we're - * tearing down the service. */ - CERROR("All %s %s request buffers busy\n", - service->srv_name, srv_ni->sni_ni->pni_name); + service->srv_nrqbd_receiving--; + if (ev->type != LNET_EVENT_UNLINK && + service->srv_nrqbd_receiving == 0) { + /* This service is off-air because all its request + * buffers are busy. Portals will start dropping + * incoming requests until more buffers get posted. + * NB don't moan if it's because we're tearing down the + * service. */ + CERROR("All %s request buffers busy\n", + service->srv_name); } /* req takes over the network's ref on rqbd */ } else { @@ -261,18 +246,17 @@ void request_in_callback(ptl_event_t *ev) /* * Server's outgoing reply callback */ -void reply_out_callback(ptl_event_t *ev) +void reply_out_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_reply_state *rs = cbid->cbid_arg; - struct ptlrpc_srv_ni *sni = rs->rs_srv_ni; - struct ptlrpc_service *svc = sni->sni_service; + struct ptlrpc_service *svc = rs->rs_service; unsigned long flags; ENTRY; - LASSERT (ev->type == PTL_EVENT_SEND_END || - ev->type == PTL_EVENT_ACK || - ev->type == PTL_EVENT_UNLINK); + LASSERT (ev->type == LNET_EVENT_SEND || + ev->type == LNET_EVENT_ACK || + ev->type == LNET_EVENT_UNLINK); if (!rs->rs_difficult) { /* 'Easy' replies have no further processing so I drop the @@ -301,29 +285,29 @@ void reply_out_callback(ptl_event_t *ev) /* * Server's bulk completion callback */ -void server_bulk_callback (ptl_event_t *ev) +void server_bulk_callback (lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; struct ptlrpc_bulk_desc *desc = cbid->cbid_arg; unsigned long flags; ENTRY; - LASSERT (ev->type == PTL_EVENT_SEND_END || - ev->type == PTL_EVENT_UNLINK || + LASSERT (ev->type == LNET_EVENT_SEND || + ev->type == LNET_EVENT_UNLINK || (desc->bd_type == BULK_PUT_SOURCE && - ev->type == PTL_EVENT_ACK) || + ev->type == LNET_EVENT_ACK) || (desc->bd_type == BULK_GET_SINK && - ev->type == PTL_EVENT_REPLY_END)); + ev->type == LNET_EVENT_REPLY)); - CDEBUG((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR, - "event type %d, status %d, desc %p\n", - ev->type, ev->ni_fail_type, desc); + CDEBUG((ev->status == 0) ? D_NET : D_ERROR, + "event type %d, status %d, desc %p\n", + ev->type, ev->status, desc); spin_lock_irqsave (&desc->bd_lock, flags); - - if ((ev->type == PTL_EVENT_ACK || - ev->type == PTL_EVENT_REPLY_END) && - ev->ni_fail_type == PTL_NI_OK) { + + if ((ev->type == LNET_EVENT_ACK || + ev->type == LNET_EVENT_REPLY) && + ev->status == 0) { /* We heard back from the peer, so even if we get this * before the SENT event (oh yes we can), we know we * read/wrote the peer buffer and how much... */ @@ -341,10 +325,10 @@ void server_bulk_callback (ptl_event_t *ev) EXIT; } -static void ptlrpc_master_callback(ptl_event_t *ev) +static void ptlrpc_master_callback(lnet_event_t *ev) { struct ptlrpc_cb_id *cbid = ev->md.user_ptr; - void (*callback)(ptl_event_t *ev) = cbid->cbid_fn; + void (*callback)(lnet_event_t *ev) = cbid->cbid_fn; /* Honestly, it's best to find out early. */ LASSERT (cbid->cbid_arg != LP_POISON); @@ -354,75 +338,92 @@ static void ptlrpc_master_callback(ptl_event_t *ev) callback == request_in_callback || callback == reply_out_callback || callback == server_bulk_callback); - + callback (ev); } -int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) +int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, + lnet_process_id_t *peer, lnet_nid_t *self) { - struct ptlrpc_ni *pni; - __u32 peer_nal; - ptl_nid_t peer_nid; - int i; - char str[PTL_NALFMT_SIZE]; - int rc; - - ENTRY; - - rc = lustre_uuid_to_peer (uuid->uuid, &peer_nal, &peer_nid); - - if (rc != 0) - RETURN (rc); - - for (i = 0; i < ptlrpc_ninterfaces; i++) { - pni = &ptlrpc_interfaces[i]; - -#if !CRAY_PORTALS - if (pni->pni_number == peer_nal) { -#else - /* compatible nals but may be from different bridges */ - if (NALID_FROM_IFACE(pni->pni_number) == - NALID_FROM_IFACE(peer_nal)) { -#endif - peer->peer_id.nid = peer_nid; - peer->peer_id.pid = LUSTRE_SRV_PTL_PID; - peer->peer_ni = pni; - RETURN(0); + int best_dist = 0; + int best_order = 0; + int count = 0; + int rc = -ENOENT; + int portals_compatibility; + int dist; + int order; + lnet_nid_t dst_nid; + lnet_nid_t src_nid; + + portals_compatibility = LNetCtl(IOC_LIBCFS_PORTALS_COMPATIBILITY, NULL); + + peer->pid = LUSTRE_SRV_LNET_PID; + + /* Choose the matching UUID that's closest */ + while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) { + dist = LNetDist(dst_nid, &src_nid, &order); + if (dist < 0) + continue; + + if (dist == 0) { /* local! use loopback LND */ + peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0); + rc = 0; + break; + } + + LASSERT (order >= 0); + if (rc < 0 || + dist < best_dist || + (dist == best_dist && order < best_order)) { + best_dist = dist; + best_order = order; + + if (portals_compatibility > 1) { + /* Strong portals compatibility: Zero the nid's + * NET, so if I'm reading new config logs, or + * getting configured by (new) lconf I can + * still talk to old servers. */ + dst_nid = LNET_MKNID(0, LNET_NIDADDR(dst_nid)); + src_nid = LNET_MKNID(0, LNET_NIDADDR(src_nid)); + } + peer->nid = dst_nid; + *self = src_nid; + rc = 0; } } - CERROR("Can't find ptlrpc interface for NAL %x, NID %s\n", - peer_nal, portals_nid2str(peer_nal, peer_nid, str)); - return (-ENOENT); + CDEBUG(D_WARNING,"%s->%s\n", uuid->uuid, libcfs_id2str(*peer)); + if (rc != 0) + CERROR("No NID found for %s\n", uuid->uuid); + return rc; } -void ptlrpc_ni_fini(struct ptlrpc_ni *pni) +void ptlrpc_ni_fini(void) { wait_queue_head_t waitq; struct l_wait_info lwi; int rc; int retries; - + /* Wait for the event queue to become idle since there may still be * messages in flight with pending events (i.e. the fire-and-forget * messages == client requests and "non-difficult" server * replies */ for (retries = 0;; retries++) { - rc = PtlEQFree(pni->pni_eq_h); + rc = LNetEQFree(ptlrpc_eq_h); switch (rc) { default: LBUG(); - case PTL_OK: - PtlNIFini(pni->pni_ni_h); + case 0: + LNetNIFini(); return; - - case PTL_EQ_IN_USE: + + case -EBUSY: if (retries != 0) - CWARN("Event queue for %s still busy\n", - pni->pni_name); - + CWARN("Event queue still busy\n"); + /* Wait for a bit */ init_waitqueue_head(&waitq); lwi = LWI_TIMEOUT(2*HZ, NULL, NULL); @@ -433,89 +434,53 @@ void ptlrpc_ni_fini(struct ptlrpc_ni *pni) /* notreached */ } -ptl_pid_t ptl_get_pid(void) +lnet_pid_t ptl_get_pid(void) { - ptl_pid_t pid; + lnet_pid_t pid; #ifndef __KERNEL__ pid = getpid(); -# if CRAY_PORTALS - /* hack to keep pid in range accepted by ernal */ - pid &= 0xFF; - if (pid == LUSTRE_SRV_PTL_PID) - pid++; -# endif #else - pid = LUSTRE_SRV_PTL_PID; + pid = LUSTRE_SRV_LNET_PID; #endif return pid; } - -int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) + +int ptlrpc_ni_init(void) { int rc; - char str[20]; - ptl_handle_ni_t nih; - ptl_pid_t pid; + lnet_pid_t pid; pid = ptl_get_pid(); + CDEBUG(D_NET, "My pid is: %x\n", pid); /* We're not passing any limits yet... */ - rc = PtlNIInit(number, pid, NULL, NULL, &nih); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CDEBUG (D_NET, "Can't init network interface %s: %d\n", - name, rc); + rc = LNetNIInit(pid); + if (rc < 0) { + CDEBUG (D_NET, "Can't init network interface: %d\n", rc); return (-ENOENT); } - CDEBUG(D_NET, "My pid is: %x\n", ptl_get_pid()); - - PtlSnprintHandle(str, sizeof(str), nih); - CDEBUG (D_NET, "init %x %s: %s\n", number, name, str); - - pni->pni_name = name; - pni->pni_number = number; - pni->pni_ni_h = nih; - - pni->pni_eq_h = PTL_INVALID_HANDLE; - /* CAVEAT EMPTOR: how we process portals events is _radically_ * different depending on... */ #ifdef __KERNEL__ /* kernel portals calls our master callback when events are added to * the event queue. In fact lustre never pulls events off this queue, * so it's only sized for some debug history. */ -# if CRAY_PORTALS - PtlNIDebug(pni->pni_ni_h, 0xffffffff); -# endif - rc = PtlEQAlloc(pni->pni_ni_h, 1024, ptlrpc_master_callback, - &pni->pni_eq_h); + rc = LNetEQAlloc(1024, ptlrpc_master_callback, &ptlrpc_eq_h); #else /* liblustre calls the master callback when it removes events from the * event queue. The event queue has to be big enough not to drop * anything */ -# if CRAY_PORTALS - /* cray portals implements a non-standard callback to notify us there - * are buffered events even when the app is not doing a filesystem - * call. */ - rc = PtlEQAlloc(pni->pni_ni_h, 10240, cray_portals_callback, - &pni->pni_eq_h); -# else - rc = PtlEQAlloc(pni->pni_ni_h, 10240, PTL_EQ_HANDLER_NONE, - &pni->pni_eq_h); -# endif + rc = LNetEQAlloc(10240, LNET_EQ_HANDLER_NONE, &ptlrpc_eq_h); #endif - if (rc != PTL_OK) - GOTO (fail, rc = -ENOMEM); - - return (0); - fail: - CERROR ("Failed to initialise network interface %s: %d\n", - name, rc); + if (rc == 0) + return 0; + + CERROR ("Failed to allocate event queue: %d\n", rc); + LNetNIFini(); - /* OK to do complete teardown since we invalidated the handles above */ - ptlrpc_ni_fini (pni); - return (rc); + return (-ENOMEM); } #ifndef __KERNEL__ @@ -526,14 +491,14 @@ void * liblustre_register_wait_callback (int (*fn)(void *arg), void *arg) { struct liblustre_wait_callback *llwc; - + OBD_ALLOC(llwc, sizeof(*llwc)); LASSERT (llwc != NULL); - + llwc->llwc_fn = fn; llwc->llwc_arg = arg; list_add_tail(&llwc->llwc_list, &liblustre_wait_callbacks); - + return (llwc); } @@ -541,7 +506,7 @@ void liblustre_deregister_wait_callback (void *opaque) { struct liblustre_wait_callback *llwc = opaque; - + list_del(&llwc->llwc_list); OBD_FREE(llwc, sizeof(*llwc)); } @@ -549,25 +514,24 @@ liblustre_deregister_wait_callback (void *opaque) int liblustre_check_events (int timeout) { - ptl_event_t ev; + lnet_event_t ev; int rc; int i; ENTRY; - rc = PtlEQPoll(&ptlrpc_interfaces[0].pni_eq_h, 1, timeout * 1000, - &ev, &i); - if (rc == PTL_EQ_EMPTY) + rc = LNetEQPoll(&ptlrpc_eq_h, 1, timeout * 1000, &ev, &i); + if (rc == 0) RETURN(0); - - LASSERT (rc == PTL_EQ_DROPPED || rc == PTL_OK); - + + LASSERT (rc == -EOVERFLOW || rc == 1); + /* liblustre: no asynch callback so we can't affort to miss any * events... */ - if (rc == PTL_EQ_DROPPED) { + if (rc == -EOVERFLOW) { CERROR ("Dropped an event!!!\n"); abort(); } - + ptlrpc_master_callback (&ev); RETURN(1); } @@ -591,9 +555,9 @@ liblustre_wait_event (int timeout) /* Give all registered callbacks a bite at the cherry */ list_for_each(tmp, &liblustre_wait_callbacks) { - llwc = list_entry(tmp, struct liblustre_wait_callback, + llwc = list_entry(tmp, struct liblustre_wait_callback, llwc_list); - + if (llwc->llwc_fn(llwc->llwc_arg)) found_something = 1; } @@ -612,79 +576,18 @@ liblustre_wait_event (int timeout) return found_something; } -#if CRAY_PORTALS -static void cray_portals_callback(ptl_event_t *ev) -{ - /* We get a callback from the client Cray portals implementation - * whenever anyone calls PtlEQPoll(), and an event queue with a - * callback handler has outstanding events. - * - * If it's not liblustre calling PtlEQPoll(), this lets us know we - * have outstanding events which we handle with - * liblustre_wait_event(). - * - * Otherwise, we're already eagerly consuming events and we'd - * handle events out of order if we recursed. */ - if (!liblustre_waiting) - liblustre_wait_event(0); -} -#endif #endif /* __KERNEL__ */ -int ptlrpc_default_nal(void) -{ - if (ptlrpc_ninterfaces == 0) - return (-ENOENT); - - return (ptlrpc_interfaces[0].pni_number); -} - int ptlrpc_init_portals(void) { - /* Add new portals network interfaces here. - * Order is irrelevent! */ - static struct { - int number; - char *name; - } ptl_nis[] = { -#if !CRAY_PORTALS - {QSWNAL, "qswnal"}, - {SOCKNAL, "socknal"}, - {GMNAL, "gmnal"}, - {OPENIBNAL, "openibnal"}, - {IIBNAL, "iibnal"}, - {VIBNAL, "vibnal"}, - {TCPNAL, "tcpnal"}, - {LONAL, "lonal"}, - {RANAL, "ranal"}, -#else - {CRAY_KERN_NAL, "cray_kern_nal"}, - {CRAY_QK_NAL, "cray_qk_nal"}, - {CRAY_USER_NAL, "cray_user_nal"}, -#endif - }; - int rc; - int i; - - LASSERT(ptlrpc_ninterfaces == 0); - - for (i = 0; i < sizeof (ptl_nis) / sizeof (ptl_nis[0]); i++) { - LASSERT(ptlrpc_ninterfaces < (sizeof(ptlrpc_interfaces) / - sizeof(ptlrpc_interfaces[0]))); - - rc = ptlrpc_ni_init(ptl_nis[i].number, ptl_nis[i].name, - &ptlrpc_interfaces[ptlrpc_ninterfaces]); - if (rc == 0) - ptlrpc_ninterfaces++; - } + int rc = ptlrpc_ni_init(); - if (ptlrpc_ninterfaces == 0) { - CERROR("network initialisation failed: is a NAL module " - "loaded?\n"); + if (rc != 0) { + CERROR("network initialisation failed\n"); return -EIO; } #ifndef __KERNEL__ - liblustre_services_callback = + liblustre_services_callback = liblustre_register_wait_callback(&liblustre_check_services, NULL); #endif return 0; @@ -695,6 +598,5 @@ void ptlrpc_exit_portals(void) #ifndef __KERNEL__ liblustre_deregister_wait_callback(liblustre_services_callback); #endif - while (ptlrpc_ninterfaces > 0) - ptlrpc_ni_fini (&ptlrpc_interfaces[--ptlrpc_ninterfaces]); + ptlrpc_ni_fini(); } diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index ba7ac09..83c9d68 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -119,7 +119,6 @@ int ptlrpc_set_import_discon(struct obd_import *imp) spin_lock_irqsave(&imp->imp_lock, flags); if (imp->imp_state == LUSTRE_IMP_FULL) { - char nidbuf[PTL_NALFMT_SIZE]; char *target_start; int target_len; @@ -130,14 +129,13 @@ int ptlrpc_set_import_discon(struct obd_import *imp) "lost; in progress operations using this " "service will %s.\n", target_len, target_start, - ptlrpc_peernid2str(&imp->imp_connection->c_peer, - nidbuf), - imp->imp_replayable + libcfs_nid2str(imp->imp_connection->c_peer.nid), + imp->imp_replayable ? "wait for recovery to complete" : "fail"); if (obd_dump_on_timeout) - portals_debug_dumplog(); + libcfs_debug_dumplog(); CWARN("%s: connection lost to %s@%s\n", imp->imp_obd->obd_name, @@ -569,7 +567,7 @@ finish: spin_unlock_irqrestore(&imp->imp_lock, flags); CERROR("this is bug 7269 - please attach log there\n"); if (bug7269_dump == 0) - portals_debug_dumplog(); + libcfs_debug_dumplog(); bug7269_dump = 1; } } @@ -726,7 +724,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) } if (imp->imp_state == LUSTRE_IMP_RECOVER) { - char nidbuf[PTL_NALFMT_SIZE]; + char *nidstr; CDEBUG(D_HA, "reconnected to %s@%s\n", imp->imp_target_uuid.uuid, @@ -740,12 +738,10 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) deuuidify(imp->imp_target_uuid.uuid, NULL, &target_start, &target_len); - ptlrpc_peernid2str(&imp->imp_connection->c_peer, - nidbuf); + nidstr = libcfs_nid2str(imp->imp_connection->c_peer.nid); LCONSOLE_INFO("Connection restored to service %.*s using nid " - "%s.\n", - target_len, target_start, nidbuf); + "%s.\n", target_len, target_start, nidstr); CWARN("%s: connection restored to %s@%s\n", imp->imp_obd->obd_name, diff --git a/lustre/ptlrpc/llog_client.c b/lustre/ptlrpc/llog_client.c index 6f718cc..83ce394 100644 --- a/lustre/ptlrpc/llog_client.c +++ b/lustre/ptlrpc/llog_client.c @@ -46,7 +46,7 @@ /* This is a callback from the llog_* functions. * Assumes caller has already pushed us into the kernel context. */ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res, - struct llog_logid *logid, char *name) + struct llog_logid *logid, char *name) { struct obd_import *imp; struct llogd_body req_body; @@ -85,7 +85,8 @@ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res, bufcount++; } - req = ptlrpc_prep_req(imp, LLOG_ORIGIN_HANDLE_CREATE,bufcount,size,tmp); + req = ptlrpc_prep_req(imp, LLOG_ORIGIN_HANDLE_CREATE, + bufcount, size, tmp); if (!req) GOTO(err_free, rc = -ENOMEM); @@ -185,7 +186,8 @@ static int llog_client_read_header(struct llog_handle *handle) int rc; ENTRY; - req = ptlrpc_prep_req(imp, LLOG_ORIGIN_HANDLE_READ_HEADER,1,&size,NULL); + req = ptlrpc_prep_req(imp, LLOG_ORIGIN_HANDLE_READ_HEADER, + 1, &size, NULL); if (!req) GOTO(out, rc = -ENOMEM); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 3bc0ad7..d2a410e 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -337,9 +337,9 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter) * parser. Currently I only print stuff here I know is OK * to look at coz it was set up in request_in_callback()!!! */ seq_printf(s, LPD64":%s:%s:"LPD64":%d:%s ", - req->rq_history_seq, - req->rq_peer.peer_ni->pni_name, req->rq_peerstr, - req->rq_xid, req->rq_reqlen,ptlrpc_rqphase2str(req)); + req->rq_history_seq, libcfs_nid2str(req->rq_self), + libcfs_id2str(req->rq_peer), req->rq_xid, + req->rq_reqlen,ptlrpc_rqphase2str(req)); if (svc->srv_request_history_print_fn == NULL) seq_printf(s, "\n"); diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 44a6193..e1b3219 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -33,57 +33,52 @@ #include #include "ptlrpc_internal.h" -static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, - ptl_ack_req_t ack, struct ptlrpc_cb_id *cbid, +static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len, + lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid, struct ptlrpc_connection *conn, int portal, __u64 xid) { int rc; - ptl_md_t md; - char str[PTL_NALFMT_SIZE]; + lnet_md_t md; ENTRY; LASSERT (portal != 0); LASSERT (conn != NULL); - CDEBUG (D_INFO, "conn=%p ni %s id %s on %s\n", - conn, conn->c_peer.peer_ni->pni_name, - ptlrpc_id2str(&conn->c_peer, str), - conn->c_peer.peer_ni->pni_name); + CDEBUG (D_INFO, "conn=%p id %s\n", conn, libcfs_id2str(conn->c_peer)); md.start = base; md.length = len; - md.threshold = (ack == PTL_ACK_REQ) ? 2 : 1; + md.threshold = (ack == LNET_ACK_REQ) ? 2 : 1; md.options = PTLRPC_MD_OPTIONS; md.user_ptr = cbid; - md.eq_handle = conn->c_peer.peer_ni->pni_eq_h; + md.eq_handle = ptlrpc_eq_h; - if (ack == PTL_ACK_REQ && + if (ack == LNET_ACK_REQ && OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_ACK | OBD_FAIL_ONCE)) { /* don't ask for the ack to simulate failing client */ - ack = PTL_NOACK_REQ; + ack = LNET_NOACK_REQ; obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED; } - rc = PtlMDBind (conn->c_peer.peer_ni->pni_ni_h, md, - PTL_UNLINK, mdh); - if (rc != PTL_OK) { - CERROR ("PtlMDBind failed: %d\n", rc); - LASSERT (rc == PTL_NO_SPACE); + rc = LNetMDBind (md, LNET_UNLINK, mdh); + if (rc != 0) { + CERROR ("LNetMDBind failed: %d\n", rc); + LASSERT (rc == -ENOMEM); RETURN (-ENOMEM); } CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64"\n", len, portal, xid); - rc = PtlPut (*mdh, ack, conn->c_peer.peer_id, portal, 0, xid, 0, 0); - if (rc != PTL_OK) { + rc = LNetPut (conn->c_self, *mdh, ack, + conn->c_peer, portal, xid, 0, 0); + if (rc != 0) { int rc2; /* We're going to get an UNLINK event when I unlink below, * which will complete just like any other failed send, so * I fall through and return success here! */ - CERROR("PtlPut(%s, %d, "LPD64") failed: %d\n", - ptlrpc_id2str(&conn->c_peer, str), - portal, xid, rc); - rc2 = PtlMDUnlink(*mdh); - LASSERTF(rc2 == PTL_OK, "rc2 = %d\n", rc2); + CERROR("LNetPut(%s, %d, "LPD64") failed: %d\n", + libcfs_id2str(conn->c_peer), portal, xid, rc); + rc2 = LNetMDUnlink(*mdh); + LASSERTF(rc2 == 0, "rc2 = %d\n", rc2); } RETURN (0); @@ -91,12 +86,11 @@ static int ptl_send_buf (ptl_handle_md_t *mdh, void *base, int len, int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) { - int rc; - int rc2; - struct ptlrpc_peer *peer; - ptl_md_t md; - __u64 xid; - char str[PTL_NALFMT_SIZE]; + struct ptlrpc_connection *conn = desc->bd_export->exp_connection; + int rc; + int rc2; + lnet_md_t md; + __u64 xid; ENTRY; if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_PUT_NET)) @@ -107,10 +101,9 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) LASSERT (desc->bd_type == BULK_PUT_SOURCE || desc->bd_type == BULK_GET_SINK); desc->bd_success = 0; - peer = &desc->bd_export->exp_connection->c_peer; md.user_ptr = &desc->bd_cbid; - md.eq_handle = peer->peer_ni->pni_eq_h; + md.eq_handle = ptlrpc_eq_h; md.threshold = 2; /* SENT and ACK/REPLY */ md.options = PTLRPC_MD_OPTIONS; ptlrpc_fill_bulk_md(&md, desc); @@ -121,40 +114,38 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) /* NB total length may be 0 for a read past EOF, so we send a 0 * length bulk, since the client expects a bulk event. */ - rc = PtlMDBind(peer->peer_ni->pni_ni_h, md, - PTL_UNLINK, &desc->bd_md_h); - if (rc != PTL_OK) { - CERROR("PtlMDBind failed: %d\n", rc); - LASSERT (rc == PTL_NO_SPACE); + rc = LNetMDBind(md, LNET_UNLINK, &desc->bd_md_h); + if (rc != 0) { + CERROR("LNetMDBind failed: %d\n", rc); + LASSERT (rc == -ENOMEM); RETURN(-ENOMEM); } /* Client's bulk and reply matchbits are the same */ xid = desc->bd_req->rq_xid; - CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d on %s " - "nid %s pid %d xid "LPX64"\n", desc->bd_iov_count, - desc->bd_nob, desc->bd_portal, peer->peer_ni->pni_name, - ptlrpc_id2str(peer, str), peer->peer_id.pid, xid); + CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d " + "id %s xid "LPX64"\n", desc->bd_iov_count, + desc->bd_nob, desc->bd_portal, + libcfs_id2str(conn->c_peer), xid); /* Network is about to get at the memory */ desc->bd_network_rw = 1; if (desc->bd_type == BULK_PUT_SOURCE) - rc = PtlPut (desc->bd_md_h, PTL_ACK_REQ, peer->peer_id, - desc->bd_portal, 0, xid, 0, 0); + rc = LNetPut (conn->c_self, desc->bd_md_h, LNET_ACK_REQ, + conn->c_peer, desc->bd_portal, xid, 0, 0); else - rc = PtlGet (desc->bd_md_h, peer->peer_id, - desc->bd_portal, 0, xid, 0); + rc = LNetGet (conn->c_self, desc->bd_md_h, + conn->c_peer, desc->bd_portal, xid, 0); - if (rc != PTL_OK) { + if (rc != 0) { /* Can't send, so we unlink the MD bound above. The UNLINK * event this creates will signal completion with failure, * so we return SUCCESS here! */ CERROR("Transfer(%s, %d, "LPX64") failed: %d\n", - ptlrpc_id2str(peer, str), - desc->bd_portal, xid, rc); - rc2 = PtlMDUnlink(desc->bd_md_h); - LASSERT (rc2 == PTL_OK); + libcfs_id2str(conn->c_peer), desc->bd_portal, xid, rc); + rc2 = LNetMDUnlink(desc->bd_md_h); + LASSERT (rc2 == 0); } RETURN(0); @@ -177,7 +168,7 @@ void ptlrpc_abort_bulk (struct ptlrpc_bulk_desc *desc) * but we must still l_wait_event() in this case, to give liblustre * a chance to run server_bulk_callback()*/ - PtlMDUnlink (desc->bd_md_h); + LNetMDUnlink (desc->bd_md_h); for (;;) { /* Network access will complete in finite time but the HUGE @@ -196,11 +187,11 @@ void ptlrpc_abort_bulk (struct ptlrpc_bulk_desc *desc) int ptlrpc_register_bulk (struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc = req->rq_bulk; - struct ptlrpc_peer *peer; + lnet_process_id_t peer; int rc; int rc2; - ptl_handle_me_t me_h; - ptl_md_t md; + lnet_handle_me_t me_h; + lnet_md_t md; ENTRY; if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_GET_NET)) @@ -216,14 +207,14 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) desc->bd_success = 0; - peer = &desc->bd_import->imp_connection->c_peer; + peer = desc->bd_import->imp_connection->c_peer; md.user_ptr = &desc->bd_cbid; - md.eq_handle = peer->peer_ni->pni_eq_h; + md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ md.options = PTLRPC_MD_OPTIONS | ((desc->bd_type == BULK_GET_SOURCE) ? - PTL_MD_OP_GET : PTL_MD_OP_PUT); + LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc); LASSERT (desc->bd_cbid.cbid_fn == client_bulk_callback); @@ -238,32 +229,31 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) desc->bd_registered = 1; desc->bd_last_xid = req->rq_xid; - rc = PtlMEAttach(peer->peer_ni->pni_ni_h, desc->bd_portal, - desc->bd_import->imp_connection->c_peer.peer_id, - req->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h); - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); - LASSERT (rc == PTL_NO_SPACE); + rc = LNetMEAttach(desc->bd_portal, peer, + req->rq_xid, 0, LNET_UNLINK, LNET_INS_AFTER, &me_h); + if (rc != 0) { + CERROR("LNetMEAttach failed: %d\n", rc); + LASSERT (rc == -ENOMEM); RETURN (-ENOMEM); } /* About to let the network at it... */ desc->bd_network_rw = 1; - rc = PtlMDAttach(me_h, md, PTL_UNLINK, &desc->bd_md_h); - if (rc != PTL_OK) { - CERROR("PtlMDAttach failed: %d\n", rc); - LASSERT (rc == PTL_NO_SPACE); + rc = LNetMDAttach(me_h, md, LNET_UNLINK, &desc->bd_md_h); + if (rc != 0) { + CERROR("LNetMDAttach failed: %d\n", rc); + LASSERT (rc == -ENOMEM); desc->bd_network_rw = 0; - rc2 = PtlMEUnlink (me_h); - LASSERT (rc2 == PTL_OK); + rc2 = LNetMEUnlink (me_h); + LASSERT (rc2 == 0); RETURN (-ENOMEM); } CDEBUG(D_NET, "Setup bulk %s buffers: %u pages %u bytes, xid "LPX64", " - "portal %u on %s\n", + "portal %u\n", desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", desc->bd_iov_count, desc->bd_nob, - req->rq_xid, desc->bd_portal, peer->peer_ni->pni_name); + req->rq_xid, desc->bd_portal); RETURN(0); } @@ -288,7 +278,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req) * but we must still l_wait_event() in this case to give liblustre * a chance to run client_bulk_callback() */ - PtlMDUnlink (desc->bd_md_h); + LNetMDUnlink (desc->bd_md_h); if (req->rq_set != NULL) wq = &req->rq_set->set_waitq; @@ -311,7 +301,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req) int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult) { - struct ptlrpc_service *svc = req->rq_rqbd->rqbd_srv_ni->sni_service; + struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service; struct ptlrpc_reply_state *rs = req->rq_reply_state; struct ptlrpc_connection *conn; int rc; @@ -346,7 +336,7 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult) req->rq_repmsg->opc = req->rq_reqmsg->opc; if (req->rq_export == NULL) - conn = ptlrpc_get_connection(&req->rq_peer, NULL); + conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL); else conn = ptlrpc_connection_addref(req->rq_export->exp_connection); @@ -354,7 +344,7 @@ int ptlrpc_send_reply (struct ptlrpc_request *req, int may_be_difficult) ptlrpc_rs_addref(rs); /* +1 ref for the network */ rc = ptl_send_buf (&rs->rs_md_h, req->rq_repmsg, req->rq_replen, - rs->rs_difficult ? PTL_ACK_REQ : PTL_NOACK_REQ, + rs->rs_difficult ? LNET_ACK_REQ : LNET_NOACK_REQ, &rs->rs_cb_id, conn, svc->srv_rep_portal, req->rq_xid); if (rc != 0) { @@ -429,7 +419,7 @@ int ptl_send_rpc_nowait(struct ptlrpc_request *request) ptlrpc_pinger_sending_on_import(request->rq_import); rc = ptl_send_buf(&request->rq_req_md_h, request->rq_reqmsg, request->rq_reqlen, - PTL_NOACK_REQ, &request->rq_req_cbid, + LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, request->rq_xid); @@ -449,8 +439,8 @@ int ptl_send_rpc(struct ptlrpc_request *request) int rc2; struct ptlrpc_connection *connection; unsigned long flags; - ptl_handle_me_t reply_me_h; - ptl_md_t reply_md; + lnet_handle_me_t reply_me_h; + lnet_md_t reply_md; ENTRY; OBD_FAIL_RETURN(OBD_FAIL_PTLRPC_DROP_RPC, 0); @@ -488,13 +478,12 @@ int ptl_send_rpc(struct ptlrpc_request *request) if (request->rq_repmsg == NULL) GOTO(cleanup_bulk, rc = -ENOMEM); - rc = PtlMEAttach(connection->c_peer.peer_ni->pni_ni_h, - request->rq_reply_portal, /* XXX FIXME bug 249 */ - connection->c_peer.peer_id, request->rq_xid, 0, - PTL_UNLINK, PTL_INS_AFTER, &reply_me_h); - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); - LASSERT (rc == PTL_NO_SPACE); + rc = LNetMEAttach(request->rq_reply_portal, /* XXX FIXME bug 249 */ + connection->c_peer, request->rq_xid, 0, + LNET_UNLINK, LNET_INS_AFTER, &reply_me_h); + if (rc != 0) { + CERROR("LNetMEAttach failed: %d\n", rc); + LASSERT (rc == -ENOMEM); GOTO(cleanup_repmsg, rc = -ENOMEM); } @@ -513,15 +502,15 @@ int ptl_send_rpc(struct ptlrpc_request *request) reply_md.start = request->rq_repmsg; reply_md.length = request->rq_replen; reply_md.threshold = 1; - reply_md.options = PTLRPC_MD_OPTIONS | PTL_MD_OP_PUT; + reply_md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT; reply_md.user_ptr = &request->rq_reply_cbid; - reply_md.eq_handle = connection->c_peer.peer_ni->pni_eq_h; + reply_md.eq_handle = ptlrpc_eq_h; - rc = PtlMDAttach(reply_me_h, reply_md, PTL_UNLINK, + rc = LNetMDAttach(reply_me_h, reply_md, LNET_UNLINK, &request->rq_reply_md_h); - if (rc != PTL_OK) { - CERROR("PtlMDAttach failed: %d\n", rc); - LASSERT (rc == PTL_NO_SPACE); + if (rc != 0) { + CERROR("LNetMDAttach failed: %d\n", rc); + LASSERT (rc == -ENOMEM); spin_lock_irqsave (&request->rq_lock, flags); /* ...but the MD attach didn't succeed... */ request->rq_receiving_reply = 0; @@ -530,10 +519,9 @@ int ptl_send_rpc(struct ptlrpc_request *request) } CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 - ", portal %u on %s\n", + ", portal %u\n", request->rq_replen, request->rq_xid, - request->rq_reply_portal, - connection->c_peer.peer_ni->pni_name); + request->rq_reply_portal); ptlrpc_request_addref(request); /* +1 ref for the SENT callback */ @@ -541,7 +529,7 @@ int ptl_send_rpc(struct ptlrpc_request *request) ptlrpc_pinger_sending_on_import(request->rq_import); rc = ptl_send_buf(&request->rq_req_md_h, request->rq_reqmsg, request->rq_reqlen, - PTL_NOACK_REQ, &request->rq_req_cbid, + LNET_NOACK_REQ, &request->rq_req_cbid, connection, request->rq_request_portal, request->rq_xid); @@ -556,8 +544,8 @@ int ptl_send_rpc(struct ptlrpc_request *request) /* MEUnlink is safe; the PUT didn't even get off the ground, and * nobody apart from the PUT's target has the right nid+XID to * access the reply buffer. */ - rc2 = PtlMEUnlink(reply_me_h); - LASSERT (rc2 == PTL_OK); + rc2 = LNetMEUnlink(reply_me_h); + LASSERT (rc2 == 0); /* UNLINKED callback called synchronously */ LASSERT (!request->rq_receiving_reply); @@ -574,23 +562,22 @@ int ptl_send_rpc(struct ptlrpc_request *request) int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd) { - struct ptlrpc_srv_ni *srv_ni = rqbd->rqbd_srv_ni; - struct ptlrpc_service *service = srv_ni->sni_service; - static ptl_process_id_t match_id = {PTL_NID_ANY, PTL_PID_ANY}; + struct ptlrpc_service *service = rqbd->rqbd_service; + static lnet_process_id_t match_id = {LNET_NID_ANY, LNET_PID_ANY}; int rc; - ptl_md_t md; - ptl_handle_me_t me_h; + lnet_md_t md; + lnet_handle_me_t me_h; - CDEBUG(D_NET, "PtlMEAttach: portal %d on %s\n", - service->srv_req_portal, srv_ni->sni_ni->pni_name); + CDEBUG(D_NET, "LNetMEAttach: portal %d\n", + service->srv_req_portal); if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_RQBD)) return (-ENOMEM); - rc = PtlMEAttach(srv_ni->sni_ni->pni_ni_h, service->srv_req_portal, - match_id, 0, ~0, PTL_UNLINK, PTL_INS_AFTER, &me_h); - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); + rc = LNetMEAttach(service->srv_req_portal, + match_id, 0, ~0, LNET_UNLINK, LNET_INS_AFTER, &me_h); + if (rc != 0) { + CERROR("LNetMEAttach failed: %d\n", rc); return (-ENOMEM); } @@ -600,19 +587,19 @@ int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd) md.start = rqbd->rqbd_buffer; md.length = service->srv_buf_size; md.max_size = service->srv_max_req_size; - md.threshold = PTL_MD_THRESH_INF; - md.options = PTLRPC_MD_OPTIONS | PTL_MD_OP_PUT | PTL_MD_MAX_SIZE; + md.threshold = LNET_MD_THRESH_INF; + md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE; md.user_ptr = &rqbd->rqbd_cbid; - md.eq_handle = srv_ni->sni_ni->pni_eq_h; + md.eq_handle = ptlrpc_eq_h; - rc = PtlMDAttach(me_h, md, PTL_UNLINK, &rqbd->rqbd_md_h); - if (rc == PTL_OK) + rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h); + if (rc == 0) return (0); - CERROR("PtlMDAttach failed: %d; \n", rc); - LASSERT (rc == PTL_NO_SPACE); - rc = PtlMEUnlink (me_h); - LASSERT (rc == PTL_OK); + CERROR("LNetMDAttach failed: %d; \n", rc); + LASSERT (rc == -ENOMEM); + rc = LNetMEUnlink (me_h); + LASSERT (rc == 0); rqbd->rqbd_refcount = 0; return (-ENOMEM); diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 1d66084..1abbdab 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -173,15 +173,14 @@ int lustre_pack_reply (struct ptlrpc_request *req, size = offsetof (struct ptlrpc_reply_state, rs_msg) + msg_len; OBD_ALLOC (rs, size); if (unlikely(rs == NULL)) { - rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_srv_ni->sni_service, - size); + rs = lustre_get_emerg_rs(req->rq_rqbd->rqbd_service, size); if (!rs) RETURN (-ENOMEM); } atomic_set(&rs->rs_refcount, 1); /* 1 ref for rq_reply_state */ rs->rs_cb_id.cbid_fn = reply_out_callback; rs->rs_cb_id.cbid_arg = rs; - rs->rs_srv_ni = req->rq_rqbd->rqbd_srv_ni; + rs->rs_service = req->rq_rqbd->rqbd_service; rs->rs_size = size; INIT_LIST_HEAD(&rs->rs_exp_list); INIT_LIST_HEAD(&rs->rs_obd_list); @@ -211,7 +210,7 @@ void lustre_free_reply_state (struct ptlrpc_reply_state *rs) if (unlikely(rs->rs_prealloc)) { unsigned long flags; - struct ptlrpc_service *svc = rs->rs_srv_ni->sni_service; + struct ptlrpc_service *svc = rs->rs_service; spin_lock_irqsave(&svc->srv_lock, flags); list_add(&rs->rs_list, @@ -723,7 +722,7 @@ static void print_lum_objs(struct lov_user_md *lum) struct lov_user_ost_data *lod; int i; ENTRY; - if (!(portal_debug & D_OTHER)) /* don't loop on nothing */ + if (!(libcfs_debug & D_OTHER)) /* don't loop on nothing */ return; CDEBUG(D_OTHER, "lov_user_md_objects: %p\n", lum); for (i = 0; i < lum->lmm_stripe_count; i++) { @@ -2158,8 +2157,6 @@ void lustre_assert_wire_constants(void) (long long)MDS_SETATTR_REC); LASSERTF(OBD_CFG_REC == 274857984, " found %lld\n", (long long)OBD_CFG_REC); - LASSERTF(PTL_CFG_REC == 274923520, " found %lld\n", - (long long)PTL_CFG_REC); LASSERTF(LLOG_GEN_REC == 274989056, " found %lld\n", (long long)LLOG_GEN_REC); LASSERTF(LLOG_HDR_MAGIC == 275010873, " found %lld\n", diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c index 56a8565..b09fb8c 100644 --- a/lustre/ptlrpc/pers.c +++ b/lustre/ptlrpc/pers.c @@ -38,14 +38,13 @@ #include "ptlrpc_internal.h" #ifdef __KERNEL__ -#if !CRAY_PORTALS -void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc) +void ptlrpc_fill_bulk_md (lnet_md_t *md, struct ptlrpc_bulk_desc *desc) { LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); - LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS))); + LASSERT (!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS))); - md->options |= PTL_MD_KIOV; + md->options |= LNET_MD_KIOV; md->start = &desc->bd_iov[0]; md->length = desc->bd_iov_count; } @@ -53,7 +52,7 @@ void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc) void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, int pageoffset, int len) { - ptl_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count]; + lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count]; kiov->kiov_page = page; kiov->kiov_offset = pageoffset; @@ -62,62 +61,23 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, desc->bd_iov_count++; } -#else /* CRAY_PORTALS */ -#ifdef PTL_MD_KIOV -#error "Conflicting compilation directives" -#endif - -void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc) -{ - LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); - LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS))); - - md->options |= (PTL_MD_IOVEC | PTL_MD_PHYS); - md->start = &desc->bd_iov[0]; - md->length = desc->bd_iov_count; -} - -void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, - int pageoffset, int len) -{ - ptl_md_iovec_t *iov = &desc->bd_iov[desc->bd_iov_count]; - - /* Should get a compiler warning if sizeof(physaddr) > sizeof(void *) */ - iov->iov_base = (void *)(page_to_phys(page) + pageoffset); - iov->iov_len = len; - - desc->bd_iov_count++; -} - -#endif /* CRAY_PORTALS */ #else /* !__KERNEL__ */ -void ptlrpc_fill_bulk_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc) +void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc) { -#if CRAY_PORTALS - LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS))); -#if defined(REDSTORM) && (NALID_FROM_IFACE(CRAY_QK_NAL) == PTL_IFACE_SS_ACCEL) - /* Enforce iov_count == 1 constraint only for SeaStar accel mode on - * compute nodes (ie, REDSTORM) - * - * iov_count of > 1 is supported via PTL_MD_IOVEC in other contexts */ - LASSERT (desc->bd_iov_count == 1); -#endif -#else - LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS))); -#endif + LASSERT (!(md->options & (LNET_MD_IOVEC | LNET_MD_KIOV | LNET_MD_PHYS))); if (desc->bd_iov_count == 1) { md->start = desc->bd_iov[0].iov_base; md->length = desc->bd_iov[0].iov_len; return; } - md->options |= PTL_MD_IOVEC; + md->options |= LNET_MD_IOVEC; md->start = &desc->bd_iov[0]; md->length = desc->bd_iov_count; } -static int can_merge_iovs(ptl_md_iovec_t *existing, ptl_md_iovec_t *candidate) +static int can_merge_iovs(lnet_md_iovec_t *existing, lnet_md_iovec_t *candidate) { if (existing->iov_base + existing->iov_len == candidate->iov_base) return 1; @@ -133,7 +93,7 @@ static int can_merge_iovs(ptl_md_iovec_t *existing, ptl_md_iovec_t *candidate) void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, int pageoffset, int len) { - ptl_md_iovec_t *iov = &desc->bd_iov[desc->bd_iov_count]; + lnet_md_iovec_t *iov = &desc->bd_iov[desc->bd_iov_count]; iov->iov_base = page->addr + pageoffset; iov->iov_len = len; diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index b7ff4d2..364fbca 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -111,7 +111,7 @@ enum { int ptlrpc_expire_one_request(struct ptlrpc_request *req); /* pers.c */ -void ptlrpc_fill_bulk_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc); +void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc); void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, int pageoffset, int len); diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 0f37e49..38c2258 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -155,7 +155,7 @@ static int ptlrpcd(void *arg) unsigned long flags; ENTRY; - kportal_daemonize(pc->pc_name); + libcfs_daemonize(pc->pc_name); SIGNAL_MASK_LOCK(current, flags); sigfillset(¤t->blocked); diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 4e6690a..1c6ada7 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include "ptlrpc_internal.h" diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 10c6555..af61984 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "ptlrpc_internal.h" /* forward ref */ @@ -63,9 +63,8 @@ ptlrpc_free_request_buffer (char *ptr, int size) } struct ptlrpc_request_buffer_desc * -ptlrpc_alloc_rqbd (struct ptlrpc_srv_ni *srv_ni) +ptlrpc_alloc_rqbd (struct ptlrpc_service *svc) { - struct ptlrpc_service *svc = srv_ni->sni_service; unsigned long flags; struct ptlrpc_request_buffer_desc *rqbd; @@ -73,7 +72,7 @@ ptlrpc_alloc_rqbd (struct ptlrpc_srv_ni *srv_ni) if (rqbd == NULL) return (NULL); - rqbd->rqbd_srv_ni = srv_ni; + rqbd->rqbd_service = svc; rqbd->rqbd_refcount = 0; rqbd->rqbd_cbid.cbid_fn = request_in_callback; rqbd->rqbd_cbid.cbid_arg = rqbd; @@ -96,8 +95,7 @@ ptlrpc_alloc_rqbd (struct ptlrpc_srv_ni *srv_ni) void ptlrpc_free_rqbd (struct ptlrpc_request_buffer_desc *rqbd) { - struct ptlrpc_srv_ni *sni = rqbd->rqbd_srv_ni; - struct ptlrpc_service *svc = sni->sni_service; + struct ptlrpc_service *svc = rqbd->rqbd_service; unsigned long flags; LASSERT (rqbd->rqbd_refcount == 0); @@ -113,21 +111,20 @@ ptlrpc_free_rqbd (struct ptlrpc_request_buffer_desc *rqbd) } int -ptlrpc_grow_req_bufs(struct ptlrpc_srv_ni *srv_ni) +ptlrpc_grow_req_bufs(struct ptlrpc_service *svc) { - struct ptlrpc_service *svc = srv_ni->sni_service; struct ptlrpc_request_buffer_desc *rqbd; int i; CDEBUG(D_RPCTRACE, "%s: allocate %d new %d-byte reqbufs (%d/%d left)\n", svc->srv_name, svc->srv_nbuf_per_group, svc->srv_buf_size, - srv_ni->sni_nrqbd_receiving, svc->srv_nbufs); + svc->srv_nrqbd_receiving, svc->srv_nbufs); for (i = 0; i < svc->srv_nbuf_per_group; i++) { - rqbd = ptlrpc_alloc_rqbd(srv_ni); + rqbd = ptlrpc_alloc_rqbd(svc); if (rqbd == NULL) { - CERROR ("%s/%s: Can't allocate request buffer\n", - svc->srv_name, srv_ni->sni_ni->pni_name); + CERROR ("%s: Can't allocate request buffer\n", + svc->srv_name); return (-ENOMEM); } @@ -157,7 +154,7 @@ ptlrpc_save_lock (struct ptlrpc_request *req, void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs) { - struct ptlrpc_service *svc = rs->rs_srv_ni->sni_service; + struct ptlrpc_service *svc = rs->rs_service; #ifdef CONFIG_SMP LASSERT (spin_is_locked (&svc->srv_lock)); @@ -194,7 +191,7 @@ ptlrpc_commit_replies (struct obd_device *obd) LASSERT (rs->rs_difficult); if (rs->rs_transno <= obd->obd_last_committed) { - struct ptlrpc_service *svc = rs->rs_srv_ni->sni_service; + struct ptlrpc_service *svc = rs->rs_service; spin_lock (&svc->srv_lock); list_del_init (&rs->rs_obd_list); @@ -216,7 +213,6 @@ timeval_sub(struct timeval *large, struct timeval *small) static int ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc) { - struct ptlrpc_srv_ni *srv_ni; struct ptlrpc_request_buffer_desc *rqbd; unsigned long flags; int rc; @@ -236,9 +232,8 @@ ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc) list_del (&rqbd->rqbd_list); /* assume we will post successfully */ - srv_ni = rqbd->rqbd_srv_ni; - srv_ni->sni_nrqbd_receiving++; - list_add (&rqbd->rqbd_list, &srv_ni->sni_active_rqbds); + svc->srv_nrqbd_receiving++; + list_add (&rqbd->rqbd_list, &svc->srv_active_rqbds); spin_unlock_irqrestore(&svc->srv_lock, flags); @@ -251,17 +246,16 @@ ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc) spin_lock_irqsave(&svc->srv_lock, flags); - srv_ni->sni_nrqbd_receiving--; + svc->srv_nrqbd_receiving--; list_del(&rqbd->rqbd_list); list_add_tail(&rqbd->rqbd_list, &svc->srv_idle_rqbds); - if (srv_ni->sni_nrqbd_receiving == 0) { + if (svc->srv_nrqbd_receiving == 0) { /* This service is off-air on this interface because all * its request buffers are busy. Portals will have started * dropping incoming requests until more buffers get * posted */ - CERROR("All %s %s request buffers busy\n", - svc->srv_name, srv_ni->sni_ni->pni_name); + CERROR("All %s request buffers busy\n", svc->srv_name); } spin_unlock_irqrestore (&svc->srv_lock, flags); @@ -276,23 +270,19 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, struct proc_dir_entry *proc_entry, svcreq_printfn_t svcreq_printfn, int num_threads) { - int i; - int rc; - int ssize; - struct ptlrpc_service *service; - struct ptlrpc_srv_ni *srv_ni; + int rc; + struct ptlrpc_service *service; ENTRY; - LASSERT (ptlrpc_ninterfaces > 0); LASSERT (nbufs > 0); LASSERT (bufsize >= max_req_size); - - ssize = offsetof (struct ptlrpc_service, - srv_interfaces[ptlrpc_ninterfaces]); - OBD_ALLOC(service, ssize); + + OBD_ALLOC(service, sizeof(*service)); if (service == NULL) RETURN(NULL); + /* First initialise enough for early teardown */ + service->srv_name = name; spin_lock_init(&service->srv_lock); INIT_LIST_HEAD(&service->srv_threads); @@ -312,39 +302,24 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, INIT_LIST_HEAD(&service->srv_request_queue); INIT_LIST_HEAD(&service->srv_idle_rqbds); + INIT_LIST_HEAD(&service->srv_active_rqbds); INIT_LIST_HEAD(&service->srv_history_rqbds); INIT_LIST_HEAD(&service->srv_request_history); + INIT_LIST_HEAD(&service->srv_active_replies); INIT_LIST_HEAD(&service->srv_reply_queue); INIT_LIST_HEAD(&service->srv_free_rs_list); init_waitqueue_head(&service->srv_free_rs_waitq); - /* First initialise enough for early teardown */ - for (i = 0; i < ptlrpc_ninterfaces; i++) { - srv_ni = &service->srv_interfaces[i]; - - srv_ni->sni_service = service; - srv_ni->sni_ni = &ptlrpc_interfaces[i]; - INIT_LIST_HEAD(&srv_ni->sni_active_rqbds); - INIT_LIST_HEAD(&srv_ni->sni_active_replies); - } - spin_lock (&ptlrpc_all_services_lock); list_add (&service->srv_list, &ptlrpc_all_services); spin_unlock (&ptlrpc_all_services_lock); - - /* Now allocate the request buffers, assuming all interfaces require - * the same number. */ - for (i = 0; i < ptlrpc_ninterfaces; i++) { - srv_ni = &service->srv_interfaces[i]; - CDEBUG (D_NET, "%s: initialising interface %s\n", name, - srv_ni->sni_ni->pni_name); - - rc = ptlrpc_grow_req_bufs(srv_ni); - /* We shouldn't be under memory pressure at startup, so - * fail if we can't post all our buffers at this time. */ - if (rc != 0) - GOTO(failed, NULL); - } + + /* Now allocate the request buffers */ + rc = ptlrpc_grow_req_bufs(service); + /* We shouldn't be under memory pressure at startup, so + * fail if we can't post all our buffers at this time. */ + if (rc != 0) + GOTO(failed, NULL); /* Now allocate pool of reply buffers */ /* Increase max reply size to next power of two */ @@ -355,8 +330,8 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, if (proc_entry != NULL) ptlrpc_lprocfs_register_service(proc_entry, service); - CDEBUG(D_NET, "%s: Started on %d interfaces, listening on portal %d\n", - service->srv_name, ptlrpc_ninterfaces, service->srv_req_portal); + CDEBUG(D_NET, "%s: Started, listening on portal %d\n", + service->srv_name, service->srv_req_portal); RETURN(service); failed: @@ -387,8 +362,7 @@ static void ptlrpc_server_free_request(struct ptlrpc_request *req) { struct ptlrpc_request_buffer_desc *rqbd = req->rq_rqbd; - struct ptlrpc_srv_ni *srv_ni = rqbd->rqbd_srv_ni; - struct ptlrpc_service *svc = srv_ni->sni_service; + struct ptlrpc_service *svc = rqbd->rqbd_service; unsigned long flags; int refcount; struct list_head *tmp; @@ -508,14 +482,15 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, if (rc != 0) { CERROR ("error unpacking request: ptl %d from %s" " xid "LPU64"\n", svc->srv_req_portal, - request->rq_peerstr, request->rq_xid); + libcfs_id2str(request->rq_peer), request->rq_xid); goto out; } rc = -EINVAL; if (request->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) { CERROR("wrong packet type received (type=%u) from %s\n", - request->rq_reqmsg->type, request->rq_peerstr); + request->rq_reqmsg->type, + libcfs_id2str(request->rq_peer)); goto out; } @@ -554,36 +529,35 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, if (timediff / 1000000 > (long)obd_timeout) { CERROR("Dropping timed-out opc %d request from %s" ": %ld seconds old\n", request->rq_reqmsg->opc, - request->rq_peerstr, timediff / 1000000); + libcfs_id2str(request->rq_peer), + timediff / 1000000); goto put_conn; } request->rq_phase = RQ_PHASE_INTERPRET; - CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:ni:nid:opc " - "%s:%s+%d:%d:"LPU64":%s:%s:%d\n", current->comm, + CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc " + "%s:%s+%d:%d:"LPU64":%s:%d\n", current->comm, (request->rq_export ? (char *)request->rq_export->exp_client_uuid.uuid : "0"), (request->rq_export ? atomic_read(&request->rq_export->exp_refcount) : -99), request->rq_reqmsg->status, request->rq_xid, - request->rq_peer.peer_ni->pni_name, - request->rq_peerstr, + libcfs_id2str(request->rq_peer), request->rq_reqmsg->opc); rc = svc->srv_handler(request); request->rq_phase = RQ_PHASE_COMPLETE; - CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:ni:nid:opc " - "%s:%s+%d:%d:"LPU64":%s:%s:%d\n", current->comm, + CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc " + "%s:%s+%d:%d:"LPU64":%s:%d\n", current->comm, (request->rq_export ? (char *)request->rq_export->exp_client_uuid.uuid : "0"), (request->rq_export ? atomic_read(&request->rq_export->exp_refcount) : -99), request->rq_reqmsg->status, request->rq_xid, - request->rq_peer.peer_ni->pni_name, - request->rq_peerstr, + libcfs_id2str(request->rq_peer), request->rq_reqmsg->opc); put_conn: @@ -598,14 +572,14 @@ put_conn: if (timediff / 1000000 > (long)obd_timeout) CERROR("request "LPU64" opc %u from %s processed in %lds\n", request->rq_xid, request->rq_reqmsg->opc, - request->rq_peerstr, + libcfs_id2str(request->rq_peer), timeval_sub(&work_end, &request->rq_arrival_time) / 1000000); else - CDEBUG(D_HA,"request "LPU64" opc %u from %s processed in %ldus " - "(%ldus total)\n", request->rq_xid, - request->rq_reqmsg->opc, request->rq_peerstr, - timediff, + CDEBUG(D_HA,"request "LPU64" opc %u from %s processed in %ldus" + " (%ldus total)\n", request->rq_xid, + request->rq_reqmsg->opc, + libcfs_id2str(request->rq_peer), timediff, timeval_sub(&work_end, &request->rq_arrival_time)); if (svc->srv_stats != NULL) { @@ -632,7 +606,6 @@ ptlrpc_server_handle_reply (struct ptlrpc_service *svc) struct obd_device *obd; int nlocks; int been_handled; - char str[PTL_NALFMT_SIZE]; ENTRY; spin_lock_irqsave (&svc->srv_lock, flags); @@ -681,7 +654,7 @@ ptlrpc_server_handle_reply (struct ptlrpc_service *svc) rs, rs->rs_xid, rs->rs_transno, rs->rs_msg.opc, - ptlrpc_peernid2str(&exp->exp_connection->c_peer, str)); + libcfs_nid2str(exp->exp_connection->c_peer.nid)); } if ((!been_handled && rs->rs_on_net) || @@ -689,7 +662,7 @@ ptlrpc_server_handle_reply (struct ptlrpc_service *svc) spin_unlock_irqrestore(&svc->srv_lock, flags); if (!been_handled && rs->rs_on_net) { - PtlMDUnlink(rs->rs_md_h); + LNetMDUnlink(rs->rs_md_h); /* Ignore return code; we're racing with * completion... */ } @@ -773,27 +746,20 @@ void ptlrpc_daemonize(void) } static void -ptlrpc_check_rqbd_pools(struct ptlrpc_service *svc) +ptlrpc_check_rqbd_pool(struct ptlrpc_service *svc) { - struct ptlrpc_srv_ni *sni; - int i; - int avail = 0; - int low_water = svc->srv_nbuf_per_group/2; - - for (i = 0; i < ptlrpc_ninterfaces; i++) { - sni = &svc->srv_interfaces[i]; + int avail = svc->srv_nrqbd_receiving; + int low_water = svc->srv_nbuf_per_group/2; - avail += sni->sni_nrqbd_receiving; - /* NB I'm not locking; just looking. */ + /* NB I'm not locking; just looking. */ - /* CAVEAT EMPTOR: We might be allocating buffers here - * because we've allowed the request history to grow out of - * control. We could put a sanity check on that here and - * cull some history if we need the space. */ + /* CAVEAT EMPTOR: We might be allocating buffers here because we've + * allowed the request history to grow out of control. We could put a + * sanity check on that here and cull some history if we need the + * space. */ - if (sni->sni_nrqbd_receiving <= low_water) - ptlrpc_grow_req_bufs(sni); - } + if (avail <= low_water) + ptlrpc_grow_req_bufs(svc); lprocfs_counter_add(svc->srv_stats, PTLRPC_REQBUF_AVAIL_CNTR, avail); } @@ -919,7 +885,7 @@ static int ptlrpc_main(void *arg) lc_watchdog_touch(watchdog); - ptlrpc_check_rqbd_pools(svc); + ptlrpc_check_rqbd_pool(svc); if (!list_empty (&svc->srv_reply_queue)) ptlrpc_server_handle_reply (svc); @@ -1073,10 +1039,8 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc, int ptlrpc_unregister_service(struct ptlrpc_service *service) { - int i; int rc; unsigned long flags; - struct ptlrpc_srv_ni *srv_ni; struct l_wait_info lwi; struct list_head *tmp; struct ptlrpc_reply_state *rs, *t; @@ -1094,56 +1058,49 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service) * freed */ service->srv_max_history_rqbds = 0; - for (i = 0; i < ptlrpc_ninterfaces; i++) { - srv_ni = &service->srv_interfaces[i]; - CDEBUG(D_NET, "%s: tearing down interface %s\n", - service->srv_name, srv_ni->sni_ni->pni_name); + CDEBUG(D_NET, "%s: tearing down\n", service->srv_name); - /* Unlink all the request buffers. This forces a 'final' - * event with its 'unlink' flag set for each posted rqbd */ - list_for_each(tmp, &srv_ni->sni_active_rqbds) { - struct ptlrpc_request_buffer_desc *rqbd = - list_entry(tmp, struct ptlrpc_request_buffer_desc, - rqbd_list); + /* Unlink all the request buffers. This forces a 'final' event with + * its 'unlink' flag set for each posted rqbd */ + list_for_each(tmp, &service->srv_active_rqbds) { + struct ptlrpc_request_buffer_desc *rqbd = + list_entry(tmp, struct ptlrpc_request_buffer_desc, + rqbd_list); - rc = PtlMDUnlink(rqbd->rqbd_md_h); - LASSERT (rc == PTL_OK || rc == PTL_MD_INVALID); - } + rc = LNetMDUnlink(rqbd->rqbd_md_h); + LASSERT (rc == 0 || rc == -ENOENT); + } - /* Wait for the network to release any buffers it's - * currently filling */ - for (;;) { - spin_lock_irqsave(&service->srv_lock, flags); - rc = srv_ni->sni_nrqbd_receiving; - spin_unlock_irqrestore(&service->srv_lock, flags); + /* Wait for the network to release any buffers it's currently + * filling */ + for (;;) { + spin_lock_irqsave(&service->srv_lock, flags); + rc = service->srv_nrqbd_receiving; + spin_unlock_irqrestore(&service->srv_lock, flags); - if (rc == 0) - break; + if (rc == 0) + break; - /* Network access will complete in finite time but - * the HUGE timeout lets us CWARN for visibility of - * sluggish NALs */ - lwi = LWI_TIMEOUT(300 * HZ, NULL, NULL); - rc = l_wait_event(service->srv_waitq, - srv_ni->sni_nrqbd_receiving == 0, - &lwi); - if (rc == -ETIMEDOUT) - CWARN("Waiting for request buffers on " - "service %s on interface %s ", - service->srv_name, srv_ni->sni_ni->pni_name); - } + /* Network access will complete in finite time but the HUGE + * timeout lets us CWARN for visibility of sluggish NALs */ + lwi = LWI_TIMEOUT(300 * HZ, NULL, NULL); + rc = l_wait_event(service->srv_waitq, + service->srv_nrqbd_receiving == 0, + &lwi); + if (rc == -ETIMEDOUT) + CWARN("Service %s waiting for request buffers\n", + service->srv_name); + } - /* schedule all outstanding replies to terminate them */ - spin_lock_irqsave(&service->srv_lock, flags); - while (!list_empty(&srv_ni->sni_active_replies)) { - struct ptlrpc_reply_state *rs = - list_entry(srv_ni->sni_active_replies.next, - struct ptlrpc_reply_state, - rs_list); - ptlrpc_schedule_difficult_reply(rs); - } - spin_unlock_irqrestore(&service->srv_lock, flags); + /* schedule all outstanding replies to terminate them */ + spin_lock_irqsave(&service->srv_lock, flags); + while (!list_empty(&service->srv_active_replies)) { + struct ptlrpc_reply_state *rs = + list_entry(service->srv_active_replies.next, + struct ptlrpc_reply_state, rs_list); + ptlrpc_schedule_difficult_reply(rs); } + spin_unlock_irqrestore(&service->srv_lock, flags); /* purge the request queue. NB No new replies (rqbds all unlinked) * and no service threads, so I'm the only thread noodling the @@ -1163,11 +1120,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service) LASSERT(service->srv_n_queued_reqs == 0); LASSERT(service->srv_n_active_reqs == 0); LASSERT(service->srv_n_history_rqbds == 0); - - for (i = 0; i < ptlrpc_ninterfaces; i++) { - srv_ni = &service->srv_interfaces[i]; - LASSERT(list_empty(&srv_ni->sni_active_rqbds)); - } + LASSERT(list_empty(&service->srv_active_rqbds)); /* Now free all the request buffers since nothing references them * any more... */ @@ -1201,9 +1154,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service) OBD_FREE(rs, service->srv_max_reply_size); } - OBD_FREE(service, - offsetof(struct ptlrpc_service, - srv_interfaces[ptlrpc_ninterfaces])); + OBD_FREE(service, sizeof(*service)); return 0; } diff --git a/lustre/scripts/bdev-io-survey.sh b/lustre/scripts/bdev-io-survey.sh index 016bd65..084ca5a 100755 --- a/lustre/scripts/bdev-io-survey.sh +++ b/lustre/scripts/bdev-io-survey.sh @@ -12,6 +12,7 @@ # discard first vmstat line # +HOSTNAME=`hostname` # a temp dir that is setup and torn down for each script run tmpdir="" # so we can kill background processes as the test cleans up @@ -422,13 +423,13 @@ echo_filter_config() { if [ $index = 0 ]; then if ! lmc -m $config --add net \ - --node localhost --nid localhost --nettype tcp; then - echo "error adding localhost net node" + --node $HOSTNAME --nid $HOSTNAME --nettype tcp; then + echo "error adding $HOSTNAME net node" return 1 fi fi - if ! lmc -m $config --add ost --ost ost_$index --node localhost \ + if ! lmc -m $config --add ost --ost ost_$index --node $HOSTNAME \ --fstype ext3 --dev $bdev --journal_size 400; then echo "error adding $bdev to config with lmc" return 1 @@ -451,8 +452,8 @@ echo_filter_prepare() { fi running_config="$config" - echo 0 > /proc/sys/portals/debug - echo 0 > /proc/sys/portals/subsystem_debug + echo 0 > /proc/sys/lnet/debug + echo 0 > /proc/sys/lnet/subsystem_debug if ! grep -q '^obdecho\>' /proc/modules; then local m diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre index 63e9660..2444c65 100755 --- a/lustre/scripts/lustre +++ b/lustre/scripts/lustre @@ -138,7 +138,7 @@ status() { egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded" # check for any routes - on a portals router this is the only thing - [ "`cat /proc/sys/portals/routes 2> /dev/null`" ] && STATE="running" + [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" # check for any configured devices (may indicate partial startup) [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index 0b2290a..20157d8 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -70,3 +70,4 @@ mmap_sanity rmdirmany flock_test writemany +random-reads diff --git a/lustre/tests/2ost.sh b/lustre/tests/2ost.sh index 1f890fb..57ddc08 100644 --- a/lustre/tests/2ost.sh +++ b/lustre/tests/2ost.sh @@ -7,6 +7,8 @@ config=${1:-`basename $0 .sh`.xml} LMC="${LMC:-lmc} -m $config" TMP=${TMP:-/tmp} +HOSTNAME=`hostname` + MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=${MDSSIZE:-400000} FSTYPE=${FSTYPE:-ext3} @@ -29,24 +31,24 @@ STRIPES_PER_OBJ=0 # 0 means stripe over all OSTs rm -f $config # create nodes -${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid `hostname` --nettype $NETTYPE || exit 11 +${LMC} --add node --node $HOSTNAME || exit 10 +${LMC} --add net --node $HOSTNAME --nid `hostname` --nettype $NETTYPE || exit 11 ${LMC} --add net --node client --nid '*' --nettype $NETTYPE || exit 12 # configure mds server -${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE \ +${LMC} --add mds --node $HOSTNAME --mds mds1 --fstype $FSTYPE \ --dev $MDSDEV --size $MDSSIZE $JARG $IARG $MDSOPT || exit 20 # configure ost ${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES \ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 $LOVOPT || exit 20 -${LMC} --add ost --ost ost1 --node localhost --lov lov1 \ +${LMC} --add ost --ost ost1 --node $HOSTNAME --lov lov1 \ --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE $JARG $OSTOPT || exit 30 -${LMC} --add ost --ost ost2 --node localhost --lov lov1 \ +${LMC} --add ost --ost ost2 --node $HOSTNAME --lov lov1 \ --fstype $FSTYPE --dev ${OSTDEV}2 --size $OSTSIZE $JARG $OSTOPT || exit 30 # create client config -${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 \ +${LMC} --add mtpt --node $HOSTNAME --path $MOUNT --mds mds1 --lov lov1 \ $CLIENTOPT || exit 40 ${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 \ $CLIENTOPT || exit 41 diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 13ae5dc..9c33b51 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -33,7 +33,7 @@ bin_PROGRAMS = mcreate munlink endif # TESTS stat_SOURCES = stat.c stat_fs.h -# mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl $(LIBREADLINE) +# mkdirdeep_LDADD=-L$(top_builddir)/lnet/utils -lptlctl $(LIBREADLINE) mmap_sanity_SOURCES= mmap_sanity.c if MPITESTS diff --git a/lustre/tests/acceptance-metadata-double.sh b/lustre/tests/acceptance-metadata-double.sh index 707198a..9c9df63 100644 --- a/lustre/tests/acceptance-metadata-double.sh +++ b/lustre/tests/acceptance-metadata-double.sh @@ -26,12 +26,12 @@ display_elapsed_time() { debug_client_on() { - echo -1 > /proc/sys/portals/debug + echo -1 > /proc/sys/lnet/debug } debug_client_off() { - echo 0x3f0400 > /proc/sys/portals/debug + echo 0x3f0400 > /proc/sys/lnet/debug } MNT=${MNT:-/mnt/lustre} diff --git a/lustre/tests/acceptance-metadata-parallel.sh b/lustre/tests/acceptance-metadata-parallel.sh index c4f0dd4..e302ecf 100644 --- a/lustre/tests/acceptance-metadata-parallel.sh +++ b/lustre/tests/acceptance-metadata-parallel.sh @@ -39,7 +39,7 @@ display_elapsed_time() { set_debug_level() { - $PDSH $CLIENTS "echo $1 > /proc/sys/portals/debug" + $PDSH $CLIENTS "echo $1 > /proc/sys/lnet/debug" } debug_client_on() diff --git a/lustre/tests/acceptance-metadata-single.sh b/lustre/tests/acceptance-metadata-single.sh index 306ce37..ad927fa 100644 --- a/lustre/tests/acceptance-metadata-single.sh +++ b/lustre/tests/acceptance-metadata-single.sh @@ -26,12 +26,12 @@ display_elapsed_time() { debug_client_on() { - echo -1 > /proc/sys/portals/debug + echo -1 > /proc/sys/lnet/debug } debug_client_off() { - echo 0x3f0400 > /proc/sys/portals/debug + echo 0x3f0400 > /proc/sys/lnet/debug } MNT=${MNT:-/mnt/lustre} diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index 81c8197..c769134 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -21,8 +21,8 @@ fi [ "$COUNT" ] || COUNT=1000 #[ "$DEBUG_LVL" ] || DEBUG_LVL=0x370200 [ "$DEBUG_LVL" ] || DEBUG_LVL=0 -[ "$DEBUG_OFF" ] || DEBUG_OFF="sysctl -w portals.debug=$DEBUG_LVL" -[ "$DEBUG_ON" ] || DEBUG_ON="sysctl -w portals.debug=0x33f0480" +[ "$DEBUG_OFF" ] || DEBUG_OFF="sysctl -w lnet.debug=$DEBUG_LVL" +[ "$DEBUG_ON" ] || DEBUG_ON="sysctl -w lnet.debug=0x33f0480" LIBLUSTRE=${LIBLUSTRE:-../liblustre} LIBLUSTRETESTS=${LIBLUSTRETESTS:-$LIBLUSTRE/tests} @@ -154,7 +154,7 @@ for NAME in $CONFIGS; do export LIBLUSTRE_MOUNT_POINT=$MOUNT2 export LIBLUSTRE_MOUNT_TARGET=$MDSNODE:/$MDSNAME/$CLIENT export LIBLUSTRE_TIMEOUT=`cat /proc/sys/lustre/timeout` - #export LIBLUSTRE_DEBUG_MASK=`cat /proc/sys/portals/debug` + #export LIBLUSTRE_DEBUG_MASK=`cat /proc/sys/lnet/debug` if [ -x $LIBLUSTRETESTS/sanity ]; then $LIBLUSTRETESTS/sanity --target=$LIBLUSTRE_MOUNT_TARGET fi diff --git a/lustre/tests/cobd.sh b/lustre/tests/cobd.sh index 983df93..3b7e8ac 100755 --- a/lustre/tests/cobd.sh +++ b/lustre/tests/cobd.sh @@ -6,6 +6,8 @@ config=${1:-$(basename $0 .sh)}.xml LMC=${LMC:-../utils/lmc -m $config} TMP=${TMP:-/tmp} +HOSTNAME=`hostname` + MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=50000 FSTYPE=${FSTYPE:-ext3} @@ -15,18 +17,18 @@ OSTSIZE=200000 rm -f $config # create nodes -${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11 +${LMC} --add node --node $HOSTNAME || exit 10 +${LMC} --add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp || exit 11 # configure mds server -${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20 +${LMC} --add mds --node $HOSTNAME --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20 # configure ost -${LMC} --add ost --node localhost --obd obd1 --fstype $FSTYPE --obdtype obdecho || exit 30 +${LMC} --add ost --node $HOSTNAME --obd obd1 --fstype $FSTYPE --obdtype obdecho || exit 30 # configure ost -${LMC} --add ost --node localhost --obd obd2 --fstype $FSTYPE --obdtype obdecho || exit 30 +${LMC} --add ost --node $HOSTNAME --obd obd2 --fstype $FSTYPE --obdtype obdecho || exit 30 -${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2 +${LMC} --add cobd --node $HOSTNAME --real_obd obd1 --cache_obd obd2 # create client config -# ${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --obd obd1 || exit 40 +# ${LMC} -m $config --add mtpt --node $HOSTNAME --path /mnt/lustre --mds mds1 --obd obd1 || exit 40 diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 039911b..c91a4ed 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -22,6 +22,7 @@ PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH LUSTRE=${LUSTRE:-`dirname $0`/..} RLUSTRE=${RLUSTRE:-$LUSTRE} MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre} +HOSTNAME=`hostname` . $LUSTRE/tests/test-framework.sh @@ -96,7 +97,7 @@ cleanup() { stop_mds $FORCE || return 201 stop_ost $FORCE || return 202 # catch case where these return just fine, but modules are still not unloaded - /sbin/lsmod | egrep -q "portals|libcfs" + /sbin/lsmod | egrep -q "lnet|libcfs" if [ 1 -ne $? ]; then echo "modules still loaded..." /sbin/lsmod @@ -215,7 +216,7 @@ test_5() { stop_mds || return 4 stop_ost || return 5 - lsmod | grep -q portals && return 6 + lsmod | grep -q lnet && return 6 return 0 } run_test 5 "force cleanup mds, then cleanup" @@ -234,7 +235,7 @@ test_5b() { stop_mds || return 2 stop_ost || return 3 - lsmod | grep -q portals && return 4 + lsmod | grep -q lnet && return 4 return 0 } @@ -254,7 +255,7 @@ test_5c() { stop_mds || return 2 stop_ost || return 3 - lsmod | grep -q portals && return 4 + lsmod | grep -q lnet && return 4 return 0 } @@ -275,7 +276,7 @@ test_5d() { stop_mds || return 3 - lsmod | grep -q portals && return 4 + lsmod | grep -q lnet && return 4 return 0 } @@ -327,14 +328,14 @@ test_9() { start_ost start_mds mount_client $MOUNT - CHECK_PTLDEBUG="`cat /proc/sys/portals/debug`" + CHECK_PTLDEBUG="`cat /proc/sys/lnet/debug`" if [ $CHECK_PTLDEBUG = "1" ]; then echo "lmc --debug success" else echo "lmc --debug: want 1, have $CHECK_PTLDEBUG" return 1 fi - CHECK_SUBSYSTEM="`cat /proc/sys/portals/subsystem_debug`" + CHECK_SUBSYSTEM="`cat /proc/sys/lnet/subsystem_debug`" if [ $CHECK_SUBSYSTEM = "2" ]; then echo "lmc --subsystem success" else @@ -351,14 +352,14 @@ test_9() { # check lconf --ptldebug/subsystem overriding lmc --ptldebug/subsystem start_ost start_mds - CHECK_PTLDEBUG="`do_facet mds sysctl portals.debug | cut -d= -f2`" + CHECK_PTLDEBUG="`do_facet mds sysctl lnet.debug | cut -d= -f2`" if [ $CHECK_PTLDEBUG = "3" ]; then echo "lconf --debug success" else echo "lconf --debug: want 3, have $CHECK_PTLDEBUG" return 1 fi - CHECK_SUBSYS="`do_facet mds sysctl portals.subsystem_debug|cut -d= -f2`" + CHECK_SUBSYS="`do_facet mds sysctl lnet.subsystem_debug|cut -d= -f2`" if [ $CHECK_SUBSYS = "20" ]; then echo "lconf --subsystem success" else @@ -439,8 +440,8 @@ test_12() { # test double quote [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG [ -f "$BATCHFILE" ] && rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128\"" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \"-I 128\"" >> $BATCHFILE # --mkfsoptions "-I 128" do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \"-I 128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \"-I 128" >> $BATCHFILE # --mkfsoptions "-I 128 do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "unmatched double quote should return error" # test single quote rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128'" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions '-I 128'" >> $BATCHFILE # --mkfsoptions '-I 128' do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions '-I 128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions '-I 128" >> $BATCHFILE # --mkfsoptions '-I 128 do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "unmatched single quote should return error" # test backslash rm -f $BATCHFILE - echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions \-\I\ \128" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions \-\I\ \128" >> $BATCHFILE # --mkfsoptions \-\I\ \128 do_lmc -m $XMLCONFIG --batch $BATCHFILE || return $? if [ `sed -n '/>-I 128 $BATCHFILE - echo "--add mds --node localhost --mds mds1 --mkfsoptions -I\ 128\\" >> $BATCHFILE + echo "--add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp" > $BATCHFILE + echo "--add mds --node $HOSTNAME --mds mds1 --mkfsoptions -I\ 128\\" >> $BATCHFILE # --mkfsoptions -I\ 128\ do_lmc -m $XMLCONFIG --batch $BATCHFILE && return $? echo "backslash followed by nothing should return error" @@ -510,9 +511,9 @@ test_13() { # check long uuid will be truncated properly and uniquely echo "To generate XML configuration file(with long ost name): $XMLCONFIG" [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG - do_lmc --add net --node localhost --nid localhost.localdomain --nettype tcp - do_lmc --add mds --node localhost --mds mds1_name_longer_than_31characters - do_lmc --add mds --node localhost --mds mds2_name_longer_than_31characters + do_lmc --add net --node $HOSTNAME --nid $HOSTNAME --nettype tcp + do_lmc --add mds --node $HOSTNAME --mds mds1_name_longer_than_31characters + do_lmc --add mds --node $HOSTNAME --mds mds2_name_longer_than_31characters if [ ! -f "$XMLCONFIG" ]; then echo "Error:no file $XMLCONFIG created!" return 1 diff --git a/lustre/tests/echo.sh b/lustre/tests/echo.sh index 1d90308..0c2ab30 100755 --- a/lustre/tests/echo.sh +++ b/lustre/tests/echo.sh @@ -13,8 +13,9 @@ config=${config:-$(basename $0 .sh).xml} LMC=${LMC:-../utils/lmc -m $config} TMP=${TMP:-/tmp} -SERVER=${SERVER:-localhost} -CLIENT=${CLIENT:-localhost} +HOSTNAME=`hostname` +SERVER=${SERVER:-$HOSTNAME} +CLIENT=${CLIENT:-$HOSTNAME} NET=${NET:-tcp} h2tcp () { @@ -38,6 +39,15 @@ h2iib () { *) echo $1 | sed "s/[^0-9]*//" ;; esac } + +# +# PJK: I believe this is correct +# PTL NID's are of the form +# num@ptl +# +h2ptl () { + echo $1 | sed 's/[^0-9]*//g' +} # FIXME: make LMC not require MDS for obdecho LOV MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index 7007f8e..c6d0a1b 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -336,7 +336,9 @@ test_4() { #Check FS echo "Test Lustre stability after OST failure" - client_df + client_df & + DFPIDA=$! + sleep 5 #MDS Portion echo "Failing MDS" @@ -348,7 +350,7 @@ test_4() { reboot_facet mds client_df & - DFPID=$! + DFPIDB=$! sleep 5 #Reintegration @@ -362,7 +364,8 @@ test_4() { start mds #Check FS - wait $DFPID + wait $DFPIDA + wait $DFPIDB clients_recover_osts ost1 echo "Test Lustre stability after MDS failover" client_df || return 1 @@ -385,7 +388,9 @@ test_5() { #Check FS echo "Test Lustre stability after OST failure" - client_df + client_df & + DFPIDA=$! + sleep 5 #OST Portion echo "Failing OST" @@ -394,7 +399,9 @@ test_5() { #Check FS echo "Test Lustre stability after OST failure" - client_df + client_df & + DFPIDB=$! + sleep 5 #Reintegration echo "Reintegrating OSTs" @@ -407,6 +414,8 @@ test_5() { clients_recover_osts ost2 sleep $TIMEOUT + wait $DFPIDA + wait $DFPIDB client_df || return 2 } run_test 5 "Fifth Failure Mode: OST/OST `date`" @@ -428,7 +437,9 @@ test_6() { #Check FS echo "Test Lustre stability after OST failure" - client_df + client_df & + DFPIDA=$! + sleep 5 #CLIENT Portion echo "Failing CLIENTs" @@ -436,7 +447,9 @@ test_6() { #Check FS echo "Test Lustre stability after CLIENTs failure" - client_df + client_df & + DFPIDB=$! + sleep 5 #Reintegration echo "Reintegrating OST/CLIENTs" @@ -445,6 +458,8 @@ test_6() { reintegrate_clients sleep 5 + wait $DFPIDA + wait $DFPIDB echo "Verifying mount" client_df || return 3 } @@ -539,15 +554,19 @@ test_8() { #Check FS echo "Test Lustre stability after OST failure" - client_df - $PDSH $LIVE_CLIENT "ls -l $MOUNT" - $PDSH $LIVE_CLIENT "rm -f $MOUNT/*_testfile" + client_df & + DFPID=$! + sleep 5 + #non-failout hangs forever here + #$PDSH $LIVE_CLIENT "ls -l $MOUNT" + #$PDSH $LIVE_CLIENT "rm -f $MOUNT/*_testfile" #Reintegration echo "Reintegrating CLIENTs/OST" reintegrate_clients wait_for ost1 start ost1 + wait $DFPID client_df || return 1 client_touch testfile2 || return 2 diff --git a/lustre/tests/liblustre_sanity_uml.sh b/lustre/tests/liblustre_sanity_uml.sh index 9ba1798..ee035d0 100644 --- a/lustre/tests/liblustre_sanity_uml.sh +++ b/lustre/tests/liblustre_sanity_uml.sh @@ -8,6 +8,7 @@ LLIP=127.0.0.1 LTREE_KERNEL=${LTREE_KERNEL:-../../lustre} LTREE_USER=${LTREE_USER:-../../lustre-lib} +HOSTNAME=`hostname` # checking if [ ! -e $LTREE_KERNEL ]; then @@ -31,7 +32,7 @@ cleanup() { curdir=`pwd` cd $LTREE_KERNEL/tests - $LCONF --node localhost --cleanup --force $LTREE_USER/tests/$configfile 2>&1 > /dev/null + $LCONF --node $HOSTNAME --cleanup --force $LTREE_USER/tests/$configfile 2>&1 > /dev/null cd $curdir } @@ -39,7 +40,7 @@ configfile=liblustre_sanity_uml.xml # generate config file rm -f $configfile -MDSNODE=localhost OSTNODES=localhost CLIENTS=$LLIP sh uml.sh $configfile +MDSNODE=$HOSTNAME OSTNODES=$HOSTNAME CLIENTS=$LLIP sh uml.sh $configfile if [ ! -e $configfile ]; then echo "fail to generate config file $configfile" exit 1 @@ -55,7 +56,7 @@ fi #setup lustre server cd $LTREE_KERNEL/tests -$LCONF --node localhost --reformat $LTREE_USER/tests/$configfile +$LCONF --node $HOSTNAME --reformat $LTREE_USER/tests/$configfile rc=$? if [ $rc -ne 0 ]; then echo "setup lustre server: error $rc" diff --git a/lustre/tests/ll_dirstripe_verify.c b/lustre/tests/ll_dirstripe_verify.c index 4d36c18..7cb8212 100644 --- a/lustre/tests/ll_dirstripe_verify.c +++ b/lustre/tests/ll_dirstripe_verify.c @@ -23,7 +23,7 @@ #include #include -#include +#include #define MAX_LOV_UUID_COUNT 1000 diff --git a/lustre/tests/llmountcleanup.sh b/lustre/tests/llmountcleanup.sh index 7c907b0..b0d703b 100755 --- a/lustre/tests/llmountcleanup.sh +++ b/lustre/tests/llmountcleanup.sh @@ -48,6 +48,6 @@ if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then mv $TMP/debug $TMP/debug-leak.`date +%s` exit 254 fi -lsmod | grep portals && echo "modules still loaded" && exit 1 +lsmod | grep lnet && echo "modules still loaded" && exit 1 exit $rc diff --git a/lustre/tests/llrmount.sh b/lustre/tests/llrmount.sh index d34d8ca..d198c82 100755 --- a/lustre/tests/llrmount.sh +++ b/lustre/tests/llrmount.sh @@ -30,7 +30,7 @@ fi ${LCONF} $NOMOD $portals_opt $lustre_opt $node_opt $@ $conf_opt || exit 2 -[ $DEBUG ] && sysctl -w portals.debug=$DEBUG +[ $DEBUG ] && sysctl -w lnet.debug=$DEBUG if [ "$MOUNT2" ]; then $LLMOUNT -v `hostname`:/mds1/client $MOUNT2 || exit 3 diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index 0b8bd3a..3d28052 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -7,6 +7,7 @@ config=${1:-`basename $0 .sh`.xml} LMC="${LMC:-lmc} -m $config" TMP=${TMP:-/tmp} +HOSTNAME=`hostname` MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=${MDSSIZE:-400000} FSTYPE=${FSTYPE:-ext3} @@ -26,20 +27,45 @@ MDSISIZE=${MDSISIZE:-0} [ "$MDSISIZE" -gt 0 ] && IARG="--inode_size $MDSISIZE" STRIPE_BYTES=${STRIPE_BYTES:-1048576} -STRIPES_PER_OBJ=0 # 0 means stripe over all OSTs +STRIPES_PER_OBJ=1 # 0 means stripe over all OSTs rm -f $config +h2tcp () { + case $1 in + client) echo '\*' ;; + *) echo $1 ;; + esac +} + +h2elan () { + case $1 in + client) echo '\*' ;; + *) echo $1 | sed "s/[^0-9]*//" ;; + esac +} + +h2gm () { + echo `gmlndnid -n$1` +} + +h2iib () { + case $1 in + client) echo '\*' ;; + *) echo $1 | sed "s/[^0-9]*//" ;; + esac +} + # create nodes -${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid `hostname` --nettype $NETTYPE || exit 11 +${LMC} --add node --node $HOSTNAME || exit 10 +${LMC} --add net --node $HOSTNAME --nid `h2$NETTYPE $HOSTNAME` --nettype $NETTYPE || exit 11 ${LMC} --add net --node client --nid '*' --nettype $NETTYPE || exit 12 [ "x$MDS_MOUNT_OPTS" != "x" ] && MDS_MOUNT_OPTS="--mountfsoptions $MDS_MOUNT_OPTS" # configure mds server -${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE \ +${LMC} --add mds --node $HOSTNAME --mds mds1 --fstype $FSTYPE \ --dev $MDSDEV \ $MDS_MOUNT_OPTS --size $MDSSIZE $JARG $IARG $MDSOPT || exit 20 @@ -47,17 +73,16 @@ ${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE \ OST_MOUNT_OPTS="--mountfsoptions $OST_MOUNT_OPTS" # configure ost -${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES \ +${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES \ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 $LOVOPT || exit 20 -${LMC} --add ost --node localhost --lov lov1 --fstype $FSTYPE \ +${LMC} --add ost --node $HOSTNAME --lov lov1 --fstype $FSTYPE \ --dev $OSTDEV \ $OST_MOUNT_OPTS --size $OSTSIZE $JARG $OSTOPT || exit 30 # create client config [ "x$CLIENTOPT" != "x" ] && CLIENTOPT="--clientoptions $CLIENTOPT" - -${LMC} --add mtpt --node localhost --path $MOUNT \ +${LMC} --add mtpt --node $HOSTNAME --path $MOUNT \ --mds mds1 --lov lov1 $CLIENTOPT || exit 40 ${LMC} --add mtpt --node client --path $MOUNT2 \ --mds mds1 --lov lov1 $CLIENTOPT || exit 41 diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index 1f7be8b..2fa6b35 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -9,6 +9,7 @@ config=${1:-`basename $0 .sh`.xml} LMC="${LMC:-lmc} -m $config" TMP=${TMP:-/tmp} +HOSTNAME=`hostname` MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=${MDSSIZE:-400000} FSTYPE=${FSTYPE:-ext3} @@ -36,12 +37,12 @@ JARG="" rm -f $config # create nodes -${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid `hostname` --nettype $NETTYPE || exit 11 +${LMC} --add node --node $HOSTNAME || exit 10 +${LMC} --add net --node $HOSTNAME --nid $HOSTNAME --nettype $NETTYPE || exit 11 ${LMC} --add net --node client --nid '*' --nettype $NETTYPE || exit 12 # configure mds server -${LMC} --format --add mds --node localhost --mds mds1 --fstype $FSTYPE \ +${LMC} --format --add mds --node $HOSTNAME --mds mds1 --fstype $FSTYPE \ --dev $MDSDEV --size $MDSSIZE $MDSOPT || exit 20 # configure ost @@ -52,7 +53,7 @@ for num in `seq $OSTCOUNT`; do OST=ost$num DEVPTR=OSTDEV$num eval $DEVPTR=${!DEVPTR:=$TMP/$OST-`hostname`} - ${LMC} --add ost --node localhost --lov lov1 --ost $OST --fstype $FSTYPE \ + ${LMC} --add ost --node $HOSTNAME --lov lov1 --ost $OST --fstype $FSTYPE \ --dev ${!DEVPTR} --size $OSTSIZE $JARG $OSTOPT || exit 30 done @@ -60,10 +61,10 @@ done if [ -z "$ECHO_CLIENT" ]; then # create client config [ "x$CLIENTOPT" != "x" ] && CLIENTOPT="--clientoptions $CLIENTOPT" - ${LMC} --add mtpt --node localhost --path $MOUNT \ + ${LMC} --add mtpt --node $HOSTNAME --path $MOUNT \ --mds mds1 --lov lov1 $CLIENTOPT || exit 40 ${LMC} --add mtpt --node client --path $MOUNT2 \ --mds mds1 --lov lov1 $CLIENTOPT || exit 41 else - ${LMC} --add echo_client --node localhost --ost lov1 || exit 42 + ${LMC} --add echo_client --node $HOSTNAME --ost lov1 || exit 42 fi diff --git a/lustre/tests/mkdirdeep.c b/lustre/tests/mkdirdeep.c index 8641d1d..d5f1b27 100644 --- a/lustre/tests/mkdirdeep.c +++ b/lustre/tests/mkdirdeep.c @@ -2,8 +2,8 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Compile with: - * cc -I../../portals/include -o mkdirdeep mkdirdeep.c - * -L../../portals/linux/utils -lptlctl + * cc -I../../lnet/include -o mkdirdeep mkdirdeep.c + * -L../../lnet/linux/utils -lptlctl */ #include diff --git a/lustre/tests/oos.sh b/lustre/tests/oos.sh index 3b2f7e7..28d1b2b 100755 --- a/lustre/tests/oos.sh +++ b/lustre/tests/oos.sh @@ -17,8 +17,8 @@ rm -f $OOS sleep 1 # to ensure we get up-to-date statfs info -#echo -1 > /proc/sys/portals/debug -#echo 0x40a8 > /proc/sys/portals/subsystem_debug +#echo -1 > /proc/sys/lnet/debug +#echo 0x40a8 > /proc/sys/lnet/subsystem_debug #lctl clear #lctl debug_daemon start /r/tmp/debug 1024 diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 7607646..c784c50 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -65,7 +65,7 @@ if [ ! -z "$EVAL" ]; then fi if [ "$ONLY" == "cleanup" ]; then - sysctl -w portals.debug=0 || true + sysctl -w lnet.debug=0 || true FORCE=--force cleanup exit fi diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index b4662c5..7f3ccb2 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -51,7 +51,7 @@ cleanup() { } if [ "$ONLY" == "cleanup" ]; then - sysctl -w portals.debug=0 + sysctl -w lnet.debug=0 FORCE=--force cleanup exit fi diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index 753b858..cbe7da1 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -40,7 +40,7 @@ cleanup() { } if [ "$ONLY" == "cleanup" ]; then - sysctl -w portals.debug=0 + sysctl -w lnet.debug=0 FORCE=--force cleanup exit fi diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index 14d3fea..23bd3c3 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -47,7 +47,7 @@ cleanup() { } if [ "$ONLY" == "cleanup" ]; then - sysctl -w portals.debug=0 || true + sysctl -w lnet.debug=0 || true FORCE=--force cleanup exit fi @@ -837,8 +837,8 @@ test_42() { createmany -o $DIR/$tfile-%d 800 replay_barrier ost unlinkmany $DIR/$tfile-%d 0 400 - DEBUG42=`sysctl -n portals.debug` - sysctl -w portals.debug=-1 + DEBUG42=`sysctl -n lnet.debug` + sysctl -w lnet.debug=-1 facet_failover ost # osc is evicted, fs is smaller (but only with failout OSTs (bug 7287) @@ -846,7 +846,7 @@ test_42() { #[ $blocks_after -lt $blocks ] || return 1 echo wait for MDS to timeout and recover sleep $((TIMEOUT * 2)) - sysctl -w portals.debug=$DEBUG42 + sysctl -w lnet.debug=$DEBUG42 unlinkmany $DIR/$tfile-%d 400 400 $CHECKSTAT -t file $DIR/$tfile-* && return 2 || true } diff --git a/lustre/tests/rundbench b/lustre/tests/rundbench index 821ac46..b23ea12 100755 --- a/lustre/tests/rundbench +++ b/lustre/tests/rundbench @@ -1,7 +1,7 @@ #!/bin/sh MNT=${MNT:-/mnt/lustre} DIR=${DIR:-$MNT/`hostname`} -#[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug +#[ -e /proc/sys/lnet/debug ] && echo 0 > /proc/sys/lnet/debug mkdir -p $DIR TGT=$DIR/client.txt SRC=${SRC:-/usr/lib/dbench/client.txt} diff --git a/lustre/tests/runfailure-net b/lustre/tests/runfailure-net index ce5634b..4e9bdd7 100755 --- a/lustre/tests/runfailure-net +++ b/lustre/tests/runfailure-net @@ -26,7 +26,7 @@ test_fail() { set -vx LCTL=../utils/lctl -OSC=OSC_localhost_UUID +OSC=OSC_`hostname`_UUID MDC=MDC_client1_UUID TIMEOUT=5 # complete in finite time diff --git a/lustre/tests/runiozone b/lustre/tests/runiozone index db74c2e..5eacb9c 100755 --- a/lustre/tests/runiozone +++ b/lustre/tests/runiozone @@ -8,7 +8,7 @@ [ $1 ] && SIZE=$1 LOOP=0 rm -f endiozone -echo 0 > /proc/sys/portals/debug +echo 0 > /proc/sys/lnet/debug while date; do LOOP=`expr $LOOP + 1` echo "Test #$LOOP" diff --git a/lustre/tests/runregression-mds.sh b/lustre/tests/runregression-mds.sh index 7167d2d..d403bb4 100755 --- a/lustre/tests/runregression-mds.sh +++ b/lustre/tests/runregression-mds.sh @@ -48,7 +48,7 @@ USED=`expr $USED + 16` # Some space for the status file THREADS=1 while [ $THREADS -lt 196 ]; do echo "starting $THREADS threads at `date`" - [ $V -gt 0 ] || echo 0 > /proc/sys/portals/debug + [ $V -gt 0 ] || echo 0 > /proc/sys/lnet/debug $SRCDIR/createdestroy /mnt/lustre/file-$$ $COUNT $V $THREADS $SRCDIR/openclose /mnt/lustre/file-$$ $COUNT $THREADS THREADS=`expr $THREADS + 5` diff --git a/lustre/tests/runregression-net.sh b/lustre/tests/runregression-net.sh index 77d6768..58f33ef 100644 --- a/lustre/tests/runregression-net.sh +++ b/lustre/tests/runregression-net.sh @@ -76,7 +76,7 @@ for CMD in test_getattr test_brw_write test_brw_read; do runthreads 1 $CMD 1 1 $PG runthreads 1 $CMD 100 1 $PG - echo 0 > /proc/sys/portals/debug + echo 0 > /proc/sys/lnet/debug runthreads 1 $CMD $COUNT_100 -10 $PG [ "$PGV" ] && runthreads 1 $CMD $COUNT_1000 -10 $PGV diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 4b276a0..4570e08 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -11,7 +11,7 @@ ONLY=${ONLY:-"$*"} ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42c 45 68"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 51b 51c 64b 71" +[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 51b 51c 64b 71 101" case `uname -r` in 2.4*) FSTYPE=${FSTYPE:-ext3} ;; @@ -1886,7 +1886,7 @@ run_test 48b "Access removed working dir (should return errors)=" test_48c() { # bug 2350 check_kernel_version 36 || return 0 - #sysctl -w portals.debug=-1 + #sysctl -w lnet.debug=-1 #set -vx mkdir -p $DIR/d48c/dir cd $DIR/d48c/dir @@ -1909,7 +1909,7 @@ run_test 48c "Access removed working subdir (should return errors)" test_48d() { # bug 2350 check_kernel_version 36 || return 0 - #sysctl -w portals.debug=-1 + #sysctl -w lnet.debug=-1 #set -vx mkdir -p $DIR/d48d/dir cd $DIR/d48d/dir @@ -1933,7 +1933,7 @@ run_test 48d "Access removed parent subdir (should return errors)" test_48e() { # bug 4134 check_kernel_version 41 || return 0 - #sysctl -w portals.debug=-1 + #sysctl -w lnet.debug=-1 #set -vx mkdir -p $DIR/d48e/dir cd $DIR/d48e/dir @@ -2317,8 +2317,8 @@ run_test 63 "Verify oig_wait interruption does not crash =======" # bug 2248 - async write errors didn't return to application on sync # bug 3677 - async write errors left page locked test_63b() { - DBG_SAVE=`sysctl -n portals.debug` - sysctl -w portals.debug=-1 + DBG_SAVE=`sysctl -n lnet.debug` + sysctl -w lnet.debug=-1 # ensure we have a grant to do async writes dd if=/dev/zero of=/mnt/lustre/f63b bs=4k count=1 @@ -2328,13 +2328,13 @@ test_63b() { sysctl -w lustre.fail_loc=0x80000406 multiop /mnt/lustre/f63b Owy && \ $LCTL dk /tmp/test63b.debug && \ - sysctl -w portals.debug=$DBG_SAVE && \ + sysctl -w lnet.debug=$DBG_SAVE && \ error "sync didn't return ENOMEM" grep -q locked /proc/fs/lustre/llite/fs*/dump_page_cache && \ $LCTL dk /tmp/test63b.debug && \ - sysctl -w portls.debug=$DBG_SAVE && \ + sysctl -w lnet.debug=$DBG_SAVE && \ error "locked page left in cache after async error" || true - sysctl -w portals.debug=$DBG_SAVE + sysctl -w lnet.debug=$DBG_SAVE } run_test 63b "async write errors should be returned to fsync ===" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 7631827..c04e5ce 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -4,6 +4,7 @@ set -e export REFORMAT="" export VERBOSE=false +export GMNALNID=${GMNALNID:-/usr/sbin/gmlndnid} # eg, assert_env LUSTRE MDSNODES OSTNODES CLIENTS assert_env() { @@ -89,15 +90,20 @@ zconf_mount() { do_node $client mkdir $mnt 2> /dev/null || : + # Only supply -o to mount if we have options + if [ -n "$MOUNTOPT" ]; then + MOUNTOPT="-o $MOUNTOPT" + fi + if [ -x /sbin/mount.lustre ] ; then - do_node $client mount -t lustre -o nettype=$NETTYPE,$MOUNTOPT \ - `facet_active_host mds`:/mds_svc/client_facet $mnt || return 1 + do_node $client mount -t lustre $MOUNTOPT \ + `facet_nid mds`:/mds_svc/client_facet $mnt || return 1 else # this is so cheating do_node $client $LCONF --nosetup --node client_facet $XMLCONFIG > \ /dev/null || return 2 - do_node $client $LLMOUNT -o nettype=$NETTYPE,$MOUNTOPT \ - `facet_active_host mds`:/mds_svc/client_facet $mnt || return 4 + do_node $client $LLMOUNT $MOUNTOPT \ + `facet_nid mds`:/mds_svc/client_facet $mnt || return 4 fi [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname` @@ -223,27 +229,30 @@ do_lmc() { h2gm () { if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else - $PDSH $1 $GMNALNID -l | cut -d\ -f2 + ID=`$PDSH $1 $GMNALNID -l | cut -d\ -f2` + echo $ID"@gm" fi } h2tcp() { if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else - echo $1 + echo $1"@tcp" fi } declare -fx h2tcp h2elan() { if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else - echo $1 | sed 's/[^0-9]*//g' + ID=`echo $1 | sed 's/[^0-9]*//g'` + echo $ID"@elan" fi } declare -fx h2elan h2openib() { if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else - echo $1 | sed 's/[^0-9]*//g' + ID=`echo $1 | sed 's/[^0-9]*//g'` + echo $ID"@openib" fi } declare -fx h2openib @@ -261,6 +270,10 @@ facet_nid() { echo "The env variable ${facet}_HOST must be set." exit 1 fi + if [ -z "$NETTYPE" ]; then + echo "The env variable NETTYPE must be set." + exit 1 + fi echo `h2$NETTYPE $HOST` } @@ -329,8 +342,7 @@ add_facet() { echo "add facet $facet: `facet_host $facet`" do_lmc --add node --node ${facet}_facet $@ --timeout $TIMEOUT \ --lustre_upcall $UPCALL --ptldebug $PTLDEBUG --subsystem $SUBSYSTEM - do_lmc --add net --node ${facet}_facet --nid `facet_nid $facet` \ - --nettype $NETTYPE + do_lmc --add net --node ${facet}_facet --nid `facet_nid $facet` --nettype lnet } add_mds() { diff --git a/lustre/tests/test-lwizard.sh b/lustre/tests/test-lwizard.sh index 5445b0a..4f86411 100755 --- a/lustre/tests/test-lwizard.sh +++ b/lustre/tests/test-lwizard.sh @@ -1,6 +1,7 @@ #!/usr/bin/expect spawn lwizard $argv +HOSTNAME=`hostname` set timeout 3 expect { "overwrite existing" { @@ -8,7 +9,7 @@ expect { } } expect "HOSTNAME for mds" -send -- "localhost\n" +send -- "$HOSTNAME\n" expect "network INTERFACE" send -- "192.168.1.29/24 10.0.0.29/24\n" expect "enter the device or loop file name for mds" @@ -18,7 +19,7 @@ send -- "10000\n" expect "configure FAILOVER" send -- "n\n" expect "HOSTNAME for ost" -send -- "localhost\n" +send -- "$HOSTNAME\n" expect "network INTERFACE" send -- "192.168.1.29/24 10.0.0.29/24\n" expect "device or loop file name for ost" diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh index 0157297..7edf0ba 100644 --- a/lustre/tests/uml.sh +++ b/lustre/tests/uml.sh @@ -57,10 +57,6 @@ CLIENTS=${CLIENTS:-"uml3"} rm -f $config -h2localhost () { - echo localhost -} - h2tcp () { case $1 in client) echo '\*' ;; @@ -76,7 +72,7 @@ h2elan () { } h2gm () { - echo `gmnalnid -n$1` + echo `gmlndnid -n$1` } h2iib () { diff --git a/lustre/tests/writemany.c b/lustre/tests/writemany.c index ff9a21c..23be7e4 100644 --- a/lustre/tests/writemany.c +++ b/lustre/tests/writemany.c @@ -182,8 +182,12 @@ int run_one_child(char *file, int thread, int seconds) } if (unlink(filename) < 0) { print_err("unlink", filename, &cur, errno); - rc = errno; - break; + if (errno == ENOENT) { + printf("Ignoring known bug 6082\n"); + } else { + rc = errno; + break; + } } } diff --git a/lustre/utils/.cvsignore b/lustre/utils/.cvsignore index 6fbc4de..dc22291 100644 --- a/lustre/utils/.cvsignore +++ b/lustre/utils/.cvsignore @@ -19,5 +19,6 @@ llmount l_getgroups mount.lustre wiretest +llog_reader .*.cmd .*.d diff --git a/lustre/utils/Lustre/lustredb.py b/lustre/utils/Lustre/lustredb.py index 2c9fe6e..3283153 100644 --- a/lustre/utils/Lustre/lustredb.py +++ b/lustre/utils/Lustre/lustredb.py @@ -213,6 +213,9 @@ class LustreDB: def get_version(self): return self.get_val('version') + def get_mtime(self): + return self.get_val('mtime') + class LustreDB_XML(LustreDB): def __init__(self, dom, root_node): LustreDB.__init__(self) diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 3e8498f..6283f20 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -4,19 +4,19 @@ SUBDIRS = Lustre AM_CFLAGS=$(LLCFLAGS) AM_CPPFLAGS=$(LLCPPFLAGS) -DLUSTRE_UTILS=1 -AM_LDFLAGS := -L$(top_builddir)/portals/utils +AM_LDFLAGS := -L$(top_builddir)/lnet/utils -LIBPTLCTL := $(top_builddir)/portals/utils/libptlctl.a +LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a sbin_scripts = lconf lmc llanalyze llstat.pl llobdstat.pl lactive \ load_ldap.sh lrun lwizard -bin_scripts = lfind lstripe +bin_scripts = lfind lstripe llog_reader if UTILS rootsbin_SCRIPTS = mount.lustre sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest llmount \ - l_getgroups -bin_PROGRAMS = lfs + l_getgroups +bin_PROGRAMS = lfs llog_reader lib_LIBRARIES = liblustreapi.a sbin_SCRIPTS = $(sbin_scripts) bin_SCRIPTS = $(bin_scripts) @@ -42,6 +42,11 @@ lload_SOURCES = lload.c obdio_SOURCES = obdio.c obdiolib.c obdiolib.h obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h lfs_SOURCES = lfs.c parser.c obd.c + +llog_reader_LDADD := $(LIBREADLINE) $(LIBPTLCTL) +llog_reader_DEPENDENCIES := $(LIBPTLCTL) +llog_reader_SOURCES = llog_reader.c + llmount_SOURCES = llmount.c llmount_CFLAGS = $(LLMOUNT_GM_CFLAGS) llmount_LDADD = $(LIBREADLINE) $(LIBPTLCTL) $(LLMOUNT_GM_LDADD) diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 0da540e..645c30e 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -58,7 +58,7 @@ MAXTCPBUF = 16777216 # Maximum number of devices to search for. # (the /dev/loop* nodes need to be created beforehand) MAX_LOOP_DEVICES = 256 -PORTALS_DIR = '../portals' +PORTALS_DIR = '../lnet' # Needed to call lconf --record CONFIG_FILE = "" @@ -201,133 +201,6 @@ class CommandError (exceptions.Exception): else: print self.cmd_err - -# ============================================================ -# handle daemons, like the acceptor -class DaemonHandler: - """ Manage starting and stopping a daemon. Assumes daemon manages - it's own pid file. """ - - def __init__(self, cmd): - self.command = cmd - self.path ="" - - def start(self): - if self.running(): - log(self.command, "already running.") - if not self.path: - self.path = find_prog(self.command) - if not self.path: - panic(self.command, "not found.") - ret, out = runcmd(self.path +' '+ self.command_line()) - if ret: - # wait for up to 15 seconds checking to see if a competing daemon - # starts successfully - loop_count = 15 - while (not self.running()) and (loop_count > 0): - loop_count = loop_count - 1 - time.sleep(1) - - if not self.running(): - raise CommandError(self.path, out, ret) - - def stop(self): - if self.running(): - pid = self.read_pidfile() - if not pid: - return - try: - log ("killing process", pid) - os.kill(pid, 15) - #time.sleep(1) # let daemon die - except OSError, e: - log("unable to kill", self.command, e) - - # wait for the dameon to die for up to 15 seconds - # before complaining about it - loop_count = 15 - while self.running() and (self.read_pidfile == pid) and (loop_count > 0): - loop_count = loop_count - 1 - time.sleep(1) - if self.running() and (self.read_pidfile == pid): - log("unable to kill", self.command, "process", pid) - - def running(self): - pid = self.read_pidfile() - if pid: - try: - os.kill(pid, 0) - except OSError: - self.clean_pidfile() - else: - return 1 - return 0 - - def read_pidfile(self): - try: - fp = open(self.pidfile(), 'r') - pid = int(fp.read()) - fp.close() - return pid - except ValueError: - print "WARNING: invalid pid in %s, removed" % self.pidfile() - print "WARNING: You may need to stop acceptor by yourself" - os.unlink(self.pidfile()) - return 0 - except IOError: - return 0 - - def clean_pidfile(self): - """ Remove a stale pidfile """ - log("removing stale pidfile:", self.pidfile()) - try: - os.unlink(self.pidfile()) - except OSError, e: - log(self.pidfile(), e) - -class AcceptorHandler(DaemonHandler): - def __init__(self, port, net_type): - DaemonHandler.__init__(self, "acceptor") - self.port = port - self.net_type = net_type - self.flags = '' - if config.allow_unprivileged_port: - self.flags = '-p' - - def pidfile(self): - return "/var/run/%s-%d.pid" % (self.command, self.port) - - def command_line(self): - return string.join(map(str,(self.flags, self.port))) - -acceptors = {} - -# start the acceptors -def run_acceptors(): - if config.lctl_dump or config.record: - return - for port in acceptors.keys(): - daemon = acceptors[port] - if daemon.net_type == 'tcp' and not daemon.running(): - daemon.start() - -def run_one_acceptor(port): - if config.lctl_dump or config.record: - return - if acceptors.has_key(port): - daemon = acceptors[port] - if daemon.net_type == 'tcp' and not daemon.running(): - daemon.start() - else: - panic("run_one_acceptor: No acceptor defined for port:", port) - -def stop_acceptor(port): - if acceptors.has_key(port): - daemon = acceptors[port] - if daemon.net_type == 'tcp' and daemon.running(): - daemon.stop() - - # ============================================================ # handle lctl interface class LCTLInterface: @@ -434,6 +307,12 @@ class LCTLInterface: raise CommandError(self.lctl, out, rc) return rc, out + def unconfigure_network(self): + """get lnet to unreference itself""" + cmds = """ + network unconfigure""" + self.run(cmds) + def clear_log(self, dev, log): """ clear an existing log """ cmds = """ @@ -443,106 +322,25 @@ class LCTLInterface: quit """ % (dev, log) self.run(cmds) - def network(self, net, nid): - """ set mynid """ - cmds = """ - network %s - mynid %s - quit """ % (net, nid) - self.run(cmds) - - # add an interface - def add_interface(self, net, ip, netmask = ""): - """ add an interface """ - cmds = """ - network %s - add_interface %s %s - quit """ % (net, ip, netmask) - self.run(cmds) - - # delete an interface - def del_interface(self, net, ip): - """ delete an interface """ - cmds = """ - network %s - del_interface %s - quit """ % (net, ip) - self.run(cmds) - # create a new connection def add_uuid(self, net_type, uuid, nid): - cmds = "\n add_uuid %s %s %s" %(uuid, nid, net_type) + if net_type != 'lnet' and string.find(nid,'@') < 0: + nidstr = nid + "@" + net_type + else: + nidstr = nid + cmds = "\n add_uuid %s %s" %(uuid, nidstr) self.run(cmds) - def add_peer(self, net_type, nid, hostaddr, port): - if net_type in ('tcp','openib','ra','cray_kern_nal') and not config.lctl_dump: - cmds = """ - network %s - add_peer %s %s %d - quit""" % (net_type, - nid, hostaddr, port ) - self.run(cmds) - elif net_type in ('iib',) and not config.lctl_dump: - cmds = """ - network %s - add_peer %s - quit""" % (net_type, - nid ) - self.run(cmds) - elif net_type in ('vib',) and not config.lctl_dump: - cmds = """ - network %s - add_peer %s %s - quit""" % (net_type, - nid, hostaddr ) - self.run(cmds) - def connect(self, srv): - self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) - - if config.lctl_dump: - return - - if srv.net_type in ('tcp',): - host = socket.gethostname() - node_list = [] - if config.node: - node_list.append(config.node) - else: - if len(host) > 0: - node_list.append(host) - node_list.append('localhost') - - node_db = None - for h in node_list: # we are quite sure we can find the node_db - node_db = toplustreDB.lookup_name(h, 'node') - if node_db: - break - - hostaddr = None - for netuuid in node_db.get_networks(): - localnet = toplustreDB.lookup(netuuid) - localnet = Network(localnet) - if localnet.net_type != 'tcp': - continue # only tcp understands multiple hostaddrs - - # always true for tcp network - if localnet.hostaddr[0] and srv.hostaddr[0]: - for lnet in localnet.hostaddr: - for pnet in srv.hostaddr: - if srv.netmatch(lnet, pnet) != 0: - hostaddr = string.split(pnet, '/')[0] - #find one is enough, should break the top-most loop - break - if hostaddr: break - else: # can't find a match - hostaddr = string.split(srv.hostaddr[0], '/')[0] - break - - self.add_peer(srv.net_type, srv.nid, hostaddr, srv.port) - - if srv.net_type in ('openib','iib','vib','ra'): - self.add_peer(srv.net_type, srv.nid, srv.hostaddr[0], srv.port) + if not srv.nid_uuid: + panic('nid_uuid not set for ', srv.net_type, srv.nid) + hostaddr = srv.db.get_hostaddr() + if len(hostaddr) > 1: + panic('multiple --hostaddr for ', srv.nid_uuid, ' not supported') + elif len(hostaddr) == 1 and hostaddr[0] != srv.nid: + panic('different --hostaddr and --nid for ', srv.nid_uuid, ' not supported') + else: + self.add_uuid(srv.net_type, srv.nid_uuid, srv.nid) # Recover a device def recover(self, dev_name, new_conn): @@ -551,77 +349,11 @@ class LCTLInterface: recover %s""" %(dev_name, new_conn) self.run(cmds) - # add a route to a range - def add_route(self, net, gw, lo, hi): - cmds = """ - network %s - add_route %s %s %s - quit """ % (net, - gw, lo, hi) - try: - self.run(cmds) - except CommandError, e: - log ("ignore: ") - e.dump() - - def del_route(self, net, gw, lo, hi): - cmds = """ - ignore_errors - network %s - del_route %s %s %s - quit """ % (net, gw, lo, hi) - self.run(cmds) - - # add a route to a host - def add_route_host(self, net, uuid, gw, tgt): - self.add_uuid(net, uuid, tgt) - cmds = """ - network %s - add_route %s %s - quit """ % (net, - gw, tgt) - try: - self.run(cmds) - except CommandError, e: - log ("ignore: ") - e.dump() - - # add a route to a range - def del_route_host(self, net, uuid, gw, tgt): - self.del_uuid(uuid) - cmds = """ - ignore_errors - network %s - del_route %s %s - quit """ % (net, gw, tgt) - self.run(cmds) - - - def del_peer(self, net_type, nid, hostaddr): - if net_type in ('tcp',) and not config.lctl_dump: - cmds = """ - ignore_errors - network %s - del_peer %s %s single_share - quit""" % (net_type, - nid, hostaddr) - self.run(cmds) - elif net_type in ('openib','iib','vib','ra') and not config.lctl_dump: - cmds = """ - ignore_errors - network %s - del_peer %s single_share - quit""" % (net_type, - nid) - self.run(cmds) - # disconnect one connection def disconnect(self, srv): + if not srv.nid_uuid: + panic('nid_uuid not set for ', srv.net_type, srv.nid) self.del_uuid(srv.nid_uuid) - if srv.net_type in ('tcp','openib','iib','vib','ra') and not config.lctl_dump: - if srv.hostaddr[0]: - hostaddr = string.split(srv.hostaddr[0], '/')[0] - self.del_peer(srv.net_type, srv.nid, hostaddr) def del_uuid(self, uuid): cmds = """ @@ -630,15 +362,6 @@ class LCTLInterface: quit""" % (uuid,) self.run(cmds) - # disconnect all - def disconnectAll(self, net): - cmds = """ - ignore_errors - network %s - disconnect - quit""" % (net) - self.run(cmds) - def attach(self, type, name, uuid): cmds = """ attach %s %s %s @@ -1059,77 +782,6 @@ def def_mount_options(fstype, target): return mountfsoptions return "" -def sys_get_elan_position_file(): - procfiles = ["/proc/elan/device0/position", - "/proc/qsnet/elan4/device0/position", - "/proc/qsnet/elan3/device0/position"] - for p in procfiles: - if os.access(p, os.R_OK): - return p - return "" - -def sys_get_local_nid(net_type, wildcard, cluster_id): - """Return the local address for the network type.""" - local = "" - - # don't need a real nid for config log - client will replace (bug5619) - if config.record: - local = "54321" - elif net_type in ('tcp','openib','iib','vib','ra','cray_kern_nal'): - if ':' in wildcard: - iface, star = string.split(wildcard, ':') - local = if2addr(iface) - elif net_type == 'vib': - local = if2addr('ipoib0') - else: - host = socket.gethostname() - local = socket.gethostbyname(host) - if not local: - panic("unable to determine ip for:", wildcard) - elif net_type == 'elan': - # awk '/NodeId/ { print $2 }' 'sys_get_elan_position_file()' - f = sys_get_elan_position_file() - if not f: - panic("unable to determine local Elan ID") - try: - fp = open(f, 'r') - lines = fp.readlines() - fp.close() - for l in lines: - a = string.split(l) - if a[0] == 'NodeId': - elan_id = a[1] - break - try: - nid = my_int(cluster_id) + my_int(elan_id) - local = "%d" % (nid) - except ValueError, e: - local = elan_id - except IOError, e: - log(e) - elif net_type == 'lo': - fixme("automatic local address for loopback") - elif net_type == 'gm': - found=0 - paths = ['/usr/sbin'] - if config.portals: - paths = paths + [config.portals + "/utils"] - for path in paths: - gmnalnid = path + '/gmnalnid' - if (os.path.exists(gmnalnid) and os.access(gmnalnid, os.X_OK)): - found=1 - break - if not found: - panic (gmnalnid, " not found or not executable on node with GM networking") - (rc, local) = run(gmnalnid, "-l") - if rc: - panic (gmnalnid, " failed") - local=string.rstrip(local[0]) - else: - fixme("automatic local address for net type %s" % net_type) - - return local - def sys_get_branch(): """Returns kernel release""" return os.uname()[2][:3] @@ -1206,16 +858,24 @@ class kmod: if mod_loaded(mod) and not config.noexec: continue log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) + if mod == 'lnet': + #For LNET we really need modprobe to load defined LNDs + run('/sbin/modprobe lnet') + #But if that fails, try insmod anyhow if src_dir: module = find_module(src_dir, dev_dir, mod) if not module: panic('module not found:', mod) (rc, out) = run('/sbin/insmod', module) if rc and not mod_loaded(mod): + if rc == 1: + print("Bad module options? Check dmesg.") raise CommandError('insmod', out, rc) else: (rc, out) = run('/sbin/modprobe', mod) if rc and not mod_loaded(mod): + if rc == 1: + print("Bad module options? Check dmesg.") raise CommandError('modprobe', out, rc) def cleanup_module(self): @@ -1226,27 +886,40 @@ class kmod: for src_dir, dev_dir, mod in rev: if not mod_loaded(mod) and not config.noexec: continue - if mod == 'portals': - if not config.noexec: - # handle modules loaded automatically, but not in config - if mod_loaded("ksocknal"): - log('try unloading module: ksocknal') - (rc, out) = run('/sbin/rmmod ksocknal') - if mod_loaded("kqswnal"): - log('try unloading module: kqswnal') - (rc, out) = run('/sbin/rmmod kqswnal') - if mod_loaded("kptlrouter"): - log('try unloading module: kptlrouter') - (rc, out) = run('/sbin/rmmod kptlrouter') + if mod == 'ksocklnd' and not config.noexec: + # Ignore ksocklnd in module list (lnet will remove) + continue + log('unloading module:', mod) + if mod == 'lnet' and not config.noexec: + # remove any self-ref portals created + lctl.unconfigure_network() if config.dump: # debug hack lctl.dump(config.dump) - log('unloading module:', mod) + log('unloading the network') + lctl.unconfigure_network() + if mod_loaded("ksocklnd"): + run('/sbin/rmmod ksocklnd') + if mod_loaded("kqswlnd"): + run('/sbin/rmmod kqswlnd') + if mod_loaded("kgmlnd"): + run('/sbin/rmmod kgmlnd') + if mod_loaded("kopeniblnd"): + run('/sbin/rmmod kopeniblnd') + if mod_loaded("kiiblnd"): + run('/sbin/rmmod kiiblnd') + if mod_loaded("kviblnd"): + run('/sbin/rmmod kviblnd') + if mod_loaded("kralnd"): + run('/sbin/rmmod kralnd') + if mod_loaded("kptllnd"): + run('/sbin/rmmod kptllnd') (rc, out) = run('/sbin/rmmod', mod) if rc: log('! unable to unload module:', mod) logall(out) + # ============================================================ # Classes to prepare and cleanup the various objects # @@ -1301,252 +974,32 @@ class Module: return self.safe_to_clean() class Network(Module): - def __init__(self,db): + def __init__(self,db,nid_uuid=0): Module.__init__(self, 'NETWORK', db) self.net_type = self.db.get_val('nettype') self.nid = self.db.get_val('nid', '*') self.cluster_id = self.db.get_val('clusterid', "0") self.port = self.db.get_val_int('port', 0) - - if '*' in self.nid: - self.nid = sys_get_local_nid(self.net_type, self.nid, self.cluster_id) - if not self.nid: - panic("unable to set nid for", self.net_type, self.nid, self.cluster_id) - self.generic_nid = 1 - debug("nid:", self.nid) - else: - self.generic_nid = 0 - - self.nid_uuid = self.nid_to_uuid(self.nid) - - self.hostaddr = self.db.get_hostaddr() - if len(self.hostaddr) == 0: - self.hostaddr.append(self.nid) - if '*' in self.hostaddr[0]: - self.hostaddr[0] = sys_get_local_nid(self.net_type, self.hostaddr[0], self.cluster_id) - if not self.hostaddr[0]: - panic("unable to set hostaddr for", self.net_type, self.hostaddr[0], self.cluster_id) - debug("hostaddr:", self.hostaddr[0]) - - self.add_portals_module("libcfs", 'libcfs') - self.add_portals_module("portals", 'portals') - if node_needs_router(): - self.add_portals_module("router", 'kptlrouter') - if self.net_type == 'tcp': - self.add_portals_module("knals/socknal", 'ksocknal') - if self.net_type == 'elan': - self.add_portals_module("knals/qswnal", 'kqswnal') - if self.net_type == 'gm': - self.add_portals_module("knals/gmnal", 'kgmnal') - if self.net_type == 'openib': - self.add_portals_module("knals/openibnal", 'kopenibnal') - if self.net_type == 'iib': - self.add_portals_module("knals/iibnal", 'kiibnal') - if self.net_type == 'vib': - self.add_portals_module("knals/vibnal", 'kvibnal') - if self.net_type == 'lo': - self.add_portals_module("knals/lonal", 'klonal') - if self.net_type == 'ra': - self.add_portals_module("knals/ranal", 'kranal') - - def nid_to_uuid(self, nid): - return "NID_%s_UUID" %(nid,) + self.nid_uuid = nid_uuid + self.add_portals_module('libcfs', 'libcfs') + self.add_portals_module('lnet', 'lnet') + # Add the socklnd for developers without modprobe.conf (umls) + self.add_portals_module('klnds/socklnd', 'ksocklnd') def prepare(self): if is_network_prepared(): return - self.info(self.net_type, self.nid, self.port) - if not (config.record and self.generic_nid): - lctl.network(self.net_type, self.nid) + self.info(self.net_type, self.nid) if self.net_type == 'tcp': sys_tweak_socknal() - for hostaddr in self.db.get_hostaddr(): - ip = string.split(hostaddr, '/')[0] - if len(string.split(hostaddr, '/')) == 2: - netmask = string.split(hostaddr, '/')[1] - else: - netmask = "" - lctl.add_interface(self.net_type, ip, netmask) if self.net_type == 'elan': sys_optimize_elan() - if self.net_type == 'openib': - if self.port == 0: - panic("no port set for", self.net_type, self.hostaddr[0]) - sysctl('/proc/sys/openibnal/port', self.port) - if self.net_type == 'ra': - if self.port == 0: - panic("no port set for", self.net_type, self.hostaddr[0]) - sysctl('/proc/sys/ranal/port', self.port) - if self.port and node_is_router(): - run_one_acceptor(self.port) - self.connect_peer_gateways() - - def connect_peer_gateways(self): - for router in self.db.lookup_class('node'): - if router.get_val_int('router', 0): - for netuuid in router.get_networks(): - net = self.db.lookup(netuuid) - gw = Network(net) - if (gw.cluster_id == self.cluster_id and - gw.net_type == self.net_type): - if gw.nid != self.nid: - lctl.connect(gw) - - def disconnect_peer_gateways(self): - for router in self.db.lookup_class('node'): - if router.get_val_int('router', 0): - for netuuid in router.get_networks(): - net = self.db.lookup(netuuid) - gw = Network(net) - if (gw.cluster_id == self.cluster_id and - gw.net_type == self.net_type): - if gw.nid != self.nid: - try: - lctl.disconnect(gw) - except CommandError, e: - print "disconnect failed: ", self.name - e.dump() - cleanup_error(e.rc) - - def safe_to_clean(self): - return not is_network_prepared() - - def cleanup(self): - self.info(self.net_type, self.nid, self.port) - if self.port: - stop_acceptor(self.port) - if node_is_router(): - self.disconnect_peer_gateways() - if self.net_type == 'tcp': - for hostaddr in self.db.get_hostaddr(): - ip = string.split(hostaddr, '/')[0] - lctl.del_interface(self.net_type, ip) - - def my_inet_aton(self, net): - split = net.split('.') - if len(split) != 4: - raise ValueError, "Invalid IPv4 address %s" % net - - naddr = 0 - i = 0 - for n in split: - try: - naddr = naddr + int(n) * (256 ** (3-i)) - except: - raise ValueError, "Invalid IPv4 address %s" % net - i = i + 1 - return naddr - - def tointAddr(self, net): - """convert a net address/mask into (numeric-address, bitmap-mask)""" - try: - addr, mask = string.split(net, '/') - except: - addr = net - mask = 24 #eeb told me that kernel uses this value by default - - try: - mask = int(mask) - assert(mask >= 1 and mask <= 32) - mask = bitmap_32(mask) - except: - try: - mask = self.my_inet_aton(mask) - except: - raise ValueError("Invalid netmask %s" % str(mask)) - - try: - addr = socket.gethostbyname(addr) - naddr = self.my_inet_aton(addr) - except: - raise ValueError("Invalid host %s" % addr) - - return (naddr, mask) - - def netmatch(self, net1, net2): - # XXX this is only valid for IPv4 address - try: - addr1, mask1 = self.tointAddr(net1) - addr2, mask2 = self.tointAddr(net2) - except: - return 0 - - # If the masks are not identical we need to know if the 2 interfaces - # can talk to each other, so the most restrictive will tell us that - mask = max(mask1, mask2) - if addr1 & mask == addr2 & mask: - debug("Net1: %s Net2: %s Match: %s to %s" % (net1, net2, addr1 & mask, addr2 & mask)) - return 1 - return 0 - -def bitmap_32(n): - """n should be in [1, 32]""" - if n < 0 or n > 32: - raise ValueError("A number between 1 and 32 is expected. (not %d)" % n) - return (-1) << (32-n) - -class RouteTable(Module): - def __init__(self,db): - Module.__init__(self, 'ROUTES', db) - - def server_for_route(self, net_type, gw, gw_cluster_id, tgt_cluster_id, - lo, hi): - # only setup connections for tcp, ib, and ra NALs - srvdb = None - if not net_type in ('tcp','openib','iib','vib','ra'): - return None - - # connect to target if route is to single node and this node is the gw - if lo == hi and local_interface(net_type, gw_cluster_id, gw): - if not local_cluster(net_type, tgt_cluster_id): - panic("target", lo, " not on the local cluster") - srvdb = self.db.nid2server(lo, net_type, gw_cluster_id) - # connect to gateway if this node is not the gw - elif (local_cluster(net_type, gw_cluster_id) - and not local_interface(net_type, gw_cluster_id, gw)): - srvdb = self.db.nid2server(gw, net_type, gw_cluster_id) - else: - return None - - if not srvdb: - panic("no server for nid", lo) - return None - - return Network(srvdb) - - def prepare(self): - if is_network_prepared(): - return - self.info() - for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): - lctl.add_route(net_type, gw, lo, hi) - srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi) - if srv: - lctl.connect(srv) def safe_to_clean(self): return not is_network_prepared() def cleanup(self): - if is_network_prepared(): - # the network is still being used, don't clean it up - return - for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): - srv = self.server_for_route(net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi) - if srv: - try: - lctl.disconnect(srv) - except CommandError, e: - print "disconnect failed: ", self.name - e.dump() - cleanup_error(e.rc) - - try: - lctl.del_route(net_type, gw, lo, hi) - except CommandError, e: - print "del_route failed: ", self.name - e.dump() - cleanup_error(e.rc) + self.info(self.net_type, self.nid) # This is only needed to load the modules; the LDLM device # is now created automatically. @@ -1696,8 +1149,6 @@ class MDSDEV(Module): self.active = 1 else: self.active = 0 - if self.active and config.group and config.group != mds.get_val('group', mds.get_val('name')): - self.active = 0 self.inode_size = self.db.get_val_int('inodesize', 0) if self.inode_size == 0: @@ -1756,7 +1207,6 @@ class MDSDEV(Module): # run write_conf automatically, if --reformat used self.write_conf() self.info(self.devpath, self.fstype, self.size, self.format) - run_acceptors() # never reformat here blkdev = block_dev(self.devpath, self.size, self.fstype, 0, self.format, self.journal_size, self.inode_size, @@ -1789,8 +1239,8 @@ class MDSDEV(Module): except CommandError, e: if e.rc == 2: - panic("MDS is missing the config log. Need to run " + - "lconf --write_conf.") + panic("MDS failed to start. Check the syslog for details." + + " (May need to run lconf --write-conf)") else: raise e @@ -1844,7 +1294,7 @@ class MDSDEV(Module): noexec_opt = ('', '-n') ret, out = run (sys.argv[0], noexec_opt[old_noexec == 1], - " --record --nomod", + " -v --record --nomod", "--record_log", client_name, "--record_device", self.name, "--node", client_name, @@ -1867,6 +1317,21 @@ class MDSDEV(Module): Module.cleanup(self) clean_loop(self.devpath) + #change the mtime of LLOG to match the XML creation time + if toplustreDB.get_mtime(): + mtime = string.atof(toplustreDB.get_mtime()) + runcmd("mkdir /tmp/lustre-XXXX/") + if is_block(self.devpath): + ret, out = runcmd("mount %s /tmp/lustre-XXXX/" %self.devpath) + else: + ret, out = runcmd("mount -o loop %s /tmp/lustre-XXXX/" %self.devpath) + if ret: + print out[0] + os.utime("/tmp/lustre-XXXX/LOGS", (mtime, mtime)) + runcmd("umount -f /tmp/lustre-XXXX/") + else: + print "XML file does not contain mtime, skip mtime checking." + def mds_remaining(self): out = lctl.device_list() for s in out: @@ -1943,8 +1408,6 @@ class OSD(Module): self.active = 1 else: self.active = 0 - if self.active and config.group and config.group != ost.get_val('group', ost.get_val('name')): - self.active = 0 self.target_dev_uuid = self.uuid self.uuid = target_uuid @@ -1972,7 +1435,6 @@ class OSD(Module): return self.info(self.osdtype, self.devpath, self.size, self.fstype, self.format, self.journal_size, self.inode_size) - run_acceptors() if self.osdtype == 'obdecho': blkdev = '' else: @@ -2101,17 +1563,12 @@ class Client(Module): if is_prepared(self.name): self.cleanup() try: - srv_list = find_local_servers(self.get_servers()) + srv_list = self.get_servers() + debug('dbg CLIENT __prepare__:', self.target_uuid, srv_list) for srv in srv_list: lctl.connect(srv) - - routes = find_route(self.get_servers()) - for (srv, r) in routes: - lctl.add_route_host(r[0], srv.nid_uuid, r[1], r[3]) - srv_list.append(srv) - if len(srv_list) == 0: - panic("no local servers and no route to", self.target_uuid) + panic("no servers for ", self.target_uuid) except CommandError, e: if not ignore_connect_failure: raise e @@ -2134,17 +1591,10 @@ class Client(Module): this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) if len(this_nets) == 0: panic ("Unable to find a backup server for:", tgt_dev_uuid) - srv_list = find_local_servers(this_nets) - if srv_list: - for srv in srv_list: + else: + for srv in this_nets: lctl.connect(srv) break - else: - routes = find_route(this_nets); - if len(routes) == 0: - panic("no route to", tgt_dev_uuid) - for (srv, r) in routes: - lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3]) if srv: lctl.add_conn(self.name, srv.nid_uuid); @@ -2152,25 +1602,16 @@ class Client(Module): def cleanup(self): if is_prepared(self.name): Module.cleanup(self) - try: - routes = find_route(self.get_servers()) - for (srv, r) in routes: - lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3]) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - + srv_list = self.get_servers() + for srv in srv_list: + lctl.disconnect(srv) for tgt_dev_uuid in self.backup_targets: - this_net = get_ost_net(toplustreDB, tgt_dev_uuid) - srv_list = find_local_servers(self.get_servers()) - if srv_list: - for srv in srv_list: - lctl.disconnect(srv) - break + this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) + if len(this_nets) == 0: + panic ("Unable to find a backup server for:", tgt_dev_uuid) else: - for (srv, r) in find_route(this_net): - lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3]) + for srv in this_nets: + lctl.disconnect(srv) class MDC(Client): def __init__(self, db, uuid, fs_name): @@ -2238,7 +1679,6 @@ class ECHO_CLIENT(Module): def prepare(self): if is_prepared(self.name): return - run_acceptors() self.osc.prepare() # XXX This is so cheating. -p self.info(self.obd_uuid) @@ -2307,7 +1747,6 @@ class Mountpoint(Module): if fs_is_mounted(self.path): log(self.path, "already mounted.") return - run_acceptors() self.vosc.prepare() self.mdc.prepare() mdc_name = self.mdc.name @@ -2381,7 +1820,8 @@ def get_ost_net(self, osd_uuid): " node_ref:", node_uuid) for net_uuid in node.get_networks(): db = node.lookup(net_uuid) - srv_list.append(Network(db)) + net = Network(db, node_uuid) + srv_list.append(net) return srv_list @@ -2391,8 +1831,6 @@ def getServiceLevel(self): ret=0; if type in ('network',): ret = 5 - elif type in ('routetbl',): - ret = 6 elif type in ('ldlm',): ret = 20 elif type in ('osd', 'cobd'): @@ -2443,101 +1881,6 @@ def get_mdc(db, uuid, fs_name, mds_uuid): mdc = MDC(mds_db, uuid, fs_name) return mdc -############################################################ -# routing ("rooting") - -# list of (nettype, cluster_id, nid) -local_clusters = [] - -def find_local_clusters(node_db): - global local_clusters - for netuuid in node_db.get_networks(): - net = node_db.lookup(netuuid) - srv = Network(net) - debug("add_local", netuuid) - local_clusters.append((srv.net_type, srv.cluster_id, srv.nid)) - if srv.port > 0: - if not acceptors.has_key(srv.port): - acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type) - -# This node is a gateway. -is_router = 0 -def node_is_router(): - return is_router - -# If there are any routers found in the config, then this will be true -# and all nodes will load kptlrouter. -needs_router = 0 -def node_needs_router(): - return needs_router or is_router - -# list of (nettype, gw, tgt_cluster_id, lo, hi) -# Currently, these local routes are only added to kptlrouter route -# table if they are needed to connect to a specific server. This -# should be changed so all available routes are loaded, and the -# ptlrouter can make all the decisions. -local_routes = [] - -def find_local_routes(lustre): - """ Scan the lustre config looking for routers . Build list of - routes. """ - global local_routes, needs_router - local_routes = [] - list = lustre.lookup_class('node') - for router in list: - if router.get_val_int('router', 0): - needs_router = 1 - for (local_type, local_cluster_id, local_nid) in local_clusters: - gw = None - for netuuid in router.get_networks(): - db = router.lookup(netuuid) - if (local_type == db.get_val('nettype') and - local_cluster_id == db.get_val('clusterid')): - gw = db.get_val('nid') - break - if gw: - debug("find_local_routes: gw is", gw) - for route in router.get_local_routes(local_type, gw): - local_routes.append(route) - debug("find_local_routes:", local_routes) - - -def find_local_servers(srv_list): - result = [] - - for srv in srv_list: - if local_cluster(srv.net_type, srv.cluster_id): - result.append(srv) - return result - -def local_cluster(net_type, cluster_id): - for cluster in local_clusters: - if net_type == cluster[0] and cluster_id == cluster[1]: - return 1 - return 0 - -def local_interface(net_type, cluster_id, nid): - for cluster in local_clusters: - if (net_type == cluster[0] and cluster_id == cluster[1] - and nid == cluster[2]): - return 1 - return 0 - -def find_route(srv_list): - result = [] - frm_type = local_clusters[0][0] - for srv in srv_list: - debug("find_route: srv:", srv.nid, "type: ", srv.net_type) - to_type = srv.net_type - to = srv.nid - cluster_id = srv.cluster_id - debug ('looking for route to', to_type, to) - for r in local_routes: - debug("find_route: ", r) - if (r[3] <= to and to <= r[4]) and cluster_id == r[2]: - result.append((srv, r)) - return result - def get_active_target(db): target_uuid = db.getUUID() target_name = db.getName() @@ -2568,8 +1911,6 @@ def newService(db): n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID") elif type == 'network': n = Network(db) - elif type == 'routetbl': - n = RouteTable(db) elif type == 'osd': n = OSD(db) elif type == 'cobd': @@ -2603,10 +1944,15 @@ def for_each_profile(db, prof_list, operation): def doWriteconf(services): if config.nosetup: return + have_mds = 0 for s in services: if s[1].get_class() == 'mdsdev': n = newService(s[1]) n.write_conf() + have_mds = 1 + if have_mds == 0: + panic("Cannot find mds device, please run --write_conf on the mds node.") + def doSetup(services): if config.nosetup: @@ -2684,10 +2030,51 @@ def doMakeServiceScript(services): extra_error = "" panic("Error creating " + target_symlink + ": " + e[1] + extra_error) +# Check mtime of LLOG +def doCheckMtime(lustreDB, hosts): + for h in hosts: + node_db = lustreDB.lookup_name(h, 'node') + if node_db: + break + if not node_db: + return + + mdsdb = 0 + prof_list = node_db.get_refs('profile') + for prof_uuid in prof_list: + prof_db = node_db.lookup(prof_uuid) + if prof_db: + services = getServices(prof_db) + for s in services: + if s[1].get_class() == 'mdsdev': + mdsdb = s[1] + break + if mdsdb: + if lustreDB.get_mtime(): + print "Checking mtime of the LLOG" + devpath = mdsdb.get_val('devpath','') + xmtime = string.atof(lustreDB.get_mtime()) + runcmd("mkdir /tmp/lustre-XXXX/") + if is_block(devpath): + ret, out = runcmd("mount %s /tmp/lustre-XXXX/" %devpath) + else: + ret, out = runcmd("mount -o loop %s /tmp/lustre-XXXX/" %devpath) + if ret: + print out[0] + else: + out = os.stat("/tmp/lustre-XXXX/LOGS") + kmtime = string.atof(out[8]) + runcmd("umount -f /tmp/lustre-XXXX/") + if xmtime > kmtime : + panic("Warning: the startup logs are older than the XML file." + " Please run --write_conf to update.") + else: + print "XML file does not contain mtime, skip mtime checking." + # # Load profile for def doHost(lustreDB, hosts): - global is_router, local_node_name, tgt_select + global local_node_name, tgt_select node_db = None for h in hosts: node_db = lustreDB.lookup_name(h, 'node') @@ -2695,22 +2082,18 @@ def doHost(lustreDB, hosts): if config.service: tgt_select[config.service] = h config.group = config.service + print "Startup the", config.group, "service on current node." break if not node_db: panic('No host entry found.') local_node_name = node_db.get_val('name', 0) - is_router = node_db.get_val_int('router', 0) lustre_upcall = node_db.get_val('lustreUpcall', '') portals_upcall = node_db.get_val('portalsUpcall', '') timeout = node_db.get_val_int('timeout', 0) ptldebug = node_db.get_val('ptldebug', '') subsystem = node_db.get_val('subsystem', '') - find_local_clusters(node_db) - if not is_router: - find_local_routes(lustreDB) - # Two step process: (1) load modules, (2) setup lustre # if not cleaning, load modules first. prof_list = node_db.get_refs('profile') @@ -2733,7 +2116,7 @@ def doHost(lustreDB, hosts): doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid, config.conn_uuid) elif config.cleanup: - if not mod_loaded('portals'): + if not mod_loaded('lnet'): return if config.force: @@ -2985,7 +2368,7 @@ def sysctl(path, val): def sys_set_debug_path(): - sysctl('portals/debug_path', config.debug_path) + sysctl('lnet/debug_path', config.debug_path) def validate_upcall(upcall): import os @@ -3015,7 +2398,7 @@ def sys_set_portals_upcall(upcall): upcall = config.upcall if upcall: validate_upcall(upcall) - sysctl('portals/upcall', upcall) + sysctl('lnet/upcall', upcall) def sys_set_group_upcall(mds, upcall): if config.noexec: @@ -3057,7 +2440,7 @@ def sys_set_ptldebug(ptldebug): try: val = eval(ptldebug, ptldebug_names) val = "0x%x" % (val) - sysctl('portals/debug', val) + sysctl('lnet/debug', val) except NameError, e: panic(str(e)) @@ -3068,7 +2451,7 @@ def sys_set_subsystem(subsystem): try: val = eval(subsystem, subsystem_names) val = "0x%x" % (val) - sysctl('portals/subsystem_debug', val) + sysctl('lnet/subsystem_debug', val) except NameError, e: panic(str(e)) @@ -3086,8 +2469,8 @@ def sys_set_netmem_max(path, max): fp.close() def sys_make_devices(): - if not os.access('/dev/portals', os.R_OK): - run('mknod /dev/portals c 10 240') + if not os.access('/dev/lnet', os.R_OK): + run('mknod /dev/lnet c 10 240') if not os.access('/dev/obd', os.R_OK): run('mknod /dev/obd c 10 241') @@ -3297,6 +2680,10 @@ def main(): print 'see lconf --help for command summary' sys.exit(1) + if config.reformat and config.cleanup: + panic("Options \"reformat\" and \"cleanup\" are incompatible. "+ + "Please specify only one.") + toplustreDB = lustreDB ver = lustreDB.get_version() @@ -3312,7 +2699,7 @@ def main(): else: if len(host) > 0: node_list.append(host) - node_list.append('localhost') +# node_list.append('localhost') debug("configuring for host: ", node_list) @@ -3325,6 +2712,9 @@ def main(): if config.lctl_dump: lctl.use_save_file(config.lctl_dump) + if not (config.reformat or config.write_conf or config.cleanup): + doCheckMtime(lustreDB, node_list) + if config.record: if not (config.record_device and config.record_log): panic("When recording, both --record_log and --record_device must be specified.") diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 7ab020e..7c76a64a 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include "obdctl.h" #include "parser.h" @@ -65,8 +65,12 @@ command_t cmdlist[] = { {"==== network config ====", jt_noop, 0, "network config"}, {"--net", jt_opt_net, 0, "run after setting network to \n" "usage: --net "}, - {"network", jt_ptl_network, 0, "commands that follow apply to net\n" - "usage: network "}, + {"network", jt_ptl_network, 0, "set current NID" + "usage: network net"}, + {"list_nids", jt_ptl_list_nids, 0, "list local NIDs" + "usage: list_nids [all]"}, + {"which_nid", jt_ptl_which_nid, 0, "choose a NID" + "usage: which_nid NID [NID...]"}, {"interface_list", jt_ptl_print_interfaces,0,"print interface entries\n" "usage: interface_list"}, {"add_interface", jt_ptl_add_interface, 0, "add interface entry\n" @@ -81,8 +85,6 @@ command_t cmdlist[] = { "usage: del_autoconn [] [] [ks]"}, {"conn_list", jt_ptl_print_connections, 0, "print all the connected remote nid\n" "usage: conn_list"}, - {"connect", jt_ptl_connect, 0, "connect to a remote nid\n" - "usage: connect [iIOC]"}, {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid\n" "usage: disconnect []"}, {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits\n" @@ -91,8 +93,6 @@ command_t cmdlist[] = { "The nid defaults to hostname for tcp networks and is automatically " "setup for elan/myrinet networks.\n" "usage: mynid []"}, - {"shownid", jt_ptl_shownid, 0, "print the local NID\n" - "usage: shownid"}, {"add_uuid", jt_lcfg_add_uuid, 0, "associate a UUID with a nid\n" "usage: add_uuid "}, {"close_uuid", jt_obd_close_uuid, 0, "disconnect a UUID\n" @@ -118,9 +118,6 @@ command_t cmdlist[] = { "Omitting the count means indefinitely, 0 means restore, " "otherwise fail 'count' messages.\n" "usage: fail nid|_all_ [count]"}, - {"loopback", jt_ptl_loopback, 0, "print loopback state\n" - "With arg enable/disable\n" - "usage: loopback [on|off]"}, /* Device selection commands */ {"=== device selection ===", jt_noop, 0, "device selection"}, diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 7260275..8c88b1a 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -33,8 +33,8 @@ #include #include -#include -#include +#include +#include #include #include @@ -53,7 +53,13 @@ #define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */ #define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */ -unsigned int portal_subsystem_debug = 0; +/* Where is this stupid thing supposed to be defined? */ +#ifndef USRQUOTA +# define USRQUOTA 0 +# define GRPQUOTA 1 +#endif + +unsigned int libcfs_subsystem_debug = 0; /* all functions */ static int lfs_setstripe(int argc, char **argv); diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 2aead1d..9825387 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -49,7 +49,7 @@ #include #endif -#include +#include #include #include diff --git a/lustre/utils/llmount.c b/lustre/utils/llmount.c index f9909ac..af5882c 100644 --- a/lustre/utils/llmount.c +++ b/lustre/utils/llmount.c @@ -35,30 +35,19 @@ #include #include "obdctl.h" -#include +#include -int verbose; -int nomtab; -int fake; -int force; +int verbose; +int nomtab; +int fake; +int force; static char *progname = NULL; -typedef struct { - ptl_nid_t gw; - ptl_nid_t lo; - ptl_nid_t hi; -} llmount_route_t; - -#define MAX_ROUTES 1024 -int route_index; -ptl_nid_t lmd_cluster_id = 0; -llmount_route_t routes[MAX_ROUTES]; - void usage(FILE *out) { fprintf(out, "usage: %s :// " "[-fhnv] [-o mntopt]\n", progname); - fprintf(out, "\t: hostname or nid of MDS (config) node\n" + fprintf(out, "\t: nid of MDS (config) node\n" "\t: name of MDS service (e.g. mds1)\n" "\t: name of client config (e.g. client)\n" "\t: filesystem mountpoint (e.g. /mnt/lustre)\n" @@ -68,14 +57,9 @@ void usage(FILE *out) "\t-n|--nomtab: do not update /etc/mtab after mount\n" "\t-v|--verbose: print verbose config settings\n" "\t-o: filesystem mount options:\n" - "\t\tcluster_id=0xNNNN: cluster this node is part of\n" - "\t\t{no}flock: enable/disable flock support\n" - "\t\tlocal_nid=0xNNNN: client ID (default ipaddr or nodenum)\n" - "\t\tnettype={tcp,elan,openib,vib,iib,lo,gm}: network type\n" - "\t\tport=NNN: server port (default 988 for tcp)\n" - "\t\troute=[-]:[-]: portal route to MDS\n" - "\t\tserver_nid=0xNNNN: server node ID (default mdsnode)\n" - "\t\t{no}user_xattr: enable/disable manipulating user xattr\n"); + "\t\tflock/noflock: enable/disable flock support\n" + "\t\tuser_xattr/nouser_xattr: enable/disable user extended attributes\n" + ); exit(out != stdout); } @@ -141,103 +125,20 @@ init_options(struct lustre_mount_data *lmd) { memset(lmd, 0, sizeof(*lmd)); lmd->lmd_magic = LMD_MAGIC; - lmd->lmd_server_nid = PTL_NID_ANY; - lmd->lmd_local_nid = PTL_NID_ANY; - lmd->lmd_port = 988; /* XXX define LUSTRE_DEFAULT_PORT */ - lmd->lmd_nal = SOCKNAL; + lmd->lmd_nid = LNET_NID_ANY; return 0; } int print_options(struct lustre_mount_data *lmd) { -#if CRAY_PORTALS - const int cond_print = (lmd->lmd_nal != CRAY_KB_SSNAL); -#else - const int cond_print = 1; -#endif - int i; - + printf("nid: %s\n", libcfs_nid2str(lmd->lmd_nid)); printf("mds: %s\n", lmd->lmd_mds); printf("profile: %s\n", lmd->lmd_profile); - printf("server_nid: "LPX64"\n", lmd->lmd_server_nid); - - if (cond_print) - printf("local_nid: "LPX64"\n", lmd->lmd_local_nid); - - printf("nal: %x\n", lmd->lmd_nal); - - if (cond_print) { - printf("server_ipaddr: 0x%x\n", lmd->lmd_server_ipaddr); - printf("port: %d\n", lmd->lmd_port); - } - - for (i = 0; i < route_index; i++) - printf("route: "LPX64" : "LPX64" - "LPX64"\n", - routes[i].gw, routes[i].lo, routes[i].hi); return 0; } -static int parse_route(char *opteq, char *opttgts) -{ - char *gw_lo_ptr, *gw_hi_ptr, *tgt_lo_ptr, *tgt_hi_ptr; - ptl_nid_t gw_lo, gw_hi, tgt_lo, tgt_hi; - - opttgts[0] = '\0'; - gw_lo_ptr = opteq + 1; - if (!(gw_hi_ptr = strchr(gw_lo_ptr, '-'))) { - gw_hi_ptr = gw_lo_ptr; - } else { - gw_hi_ptr[0] = '\0'; - gw_hi_ptr++; - } - - if (ptl_parse_nid(&gw_lo, gw_lo_ptr) != 0) { - fprintf(stderr, "%s: can't parse NID %s\n", progname,gw_lo_ptr); - return(1); - } - - if (ptl_parse_nid(&gw_hi, gw_hi_ptr) != 0) { - fprintf(stderr, "%s: can't parse NID %s\n", progname,gw_hi_ptr); - return(1); - } - - tgt_lo_ptr = opttgts + 1; - if (!(tgt_hi_ptr = strchr(tgt_lo_ptr, '-'))) { - tgt_hi_ptr = tgt_lo_ptr; - } else { - tgt_hi_ptr[0] = '\0'; - tgt_hi_ptr++; - } - - if (ptl_parse_nid(&tgt_lo, tgt_lo_ptr) != 0) { - fprintf(stderr, "%s: can't parse NID %s\n",progname,tgt_lo_ptr); - return(1); - } - - if (ptl_parse_nid(&tgt_hi, tgt_hi_ptr) != 0) { - fprintf(stderr, "%s: can't parse NID %s\n",progname,tgt_hi_ptr); - return(1); - } - - while (gw_lo <= gw_hi) { - if (route_index >= MAX_ROUTES) { - fprintf(stderr, "%s: to many routes %d\n", - progname, MAX_ROUTES); - return(-1); - } - - routes[route_index].gw = gw_lo; - routes[route_index].lo = tgt_lo; - routes[route_index].hi = tgt_hi; - route_index++; - gw_lo++; - } - - return(0); -} - /***************************************************************************** * * This part was cribbed from util-linux/mount/mount.c. There was no clear @@ -300,9 +201,8 @@ static int parse_one_option(const char *check, int *ms_flags, int *lmd_flags) int parse_options(char *options, struct lustre_mount_data *lmd, int *flagp) { - ptl_nid_t nid = 0, cluster_id = 0; int val; - char *opt, *opteq, *opttgts; + char *opt, *opteq; *flagp = 0; /* parsing ideas here taken from util-linux/mount/nfsmount.c */ @@ -310,54 +210,14 @@ int parse_options(char *options, struct lustre_mount_data *lmd, int *flagp) if ((opteq = strchr(opt, '='))) { val = atoi(opteq + 1); *opteq = '\0'; - if (!strcmp(opt, "nettype")) { - lmd->lmd_nal = ptl_name2nal(opteq + 1); - if (lmd->lmd_nal < 0) { - fprintf(stderr, "%s: can't parse NET " - "%s\n", progname, opteq + 1); - return (1); - } - } else if(!strcmp(opt, "cluster_id")) { - if (ptl_parse_nid(&cluster_id, opteq+1) != 0) { - fprintf(stderr, "%s: can't parse NID " - "%s\n", progname, opteq+1); - return (1); - } - lmd_cluster_id = cluster_id; - } else if(!strcmp(opt, "route")) { - if (!(opttgts = strchr(opteq + 1, ':'))) { - fprintf(stderr, "%s: Route must be " - "of the form: route=" - "[-]:[-]\n", - progname); - return(1); - } - parse_route(opteq, opttgts); - } else if (!strcmp(opt, "local_nid")) { - if (ptl_parse_nid(&nid, opteq + 1) != 0) { - fprintf(stderr, "%s: " - "can't parse NID %s\n", - progname, - opteq+1); - return (1); - } - lmd->lmd_local_nid = nid; - } else if (!strcmp(opt, "server_nid")) { - if (ptl_parse_nid(&nid, opteq + 1) != 0) { - fprintf(stderr, "%s: " - "can't parse NID %s\n", - progname, opteq + 1); - return (1); - } - lmd->lmd_server_nid = nid; - } else if (!strcmp(opt, "port")) { - lmd->lmd_port = val; - } else if (!strcmp(opt, "sec")) { - /* do nothing */ + if (0) { + /* All the network options have gone :)) */ } else { - fprintf(stderr, "%s: unknown option '%s'\n", - progname, opt); + fprintf(stderr, "%s: unknown option '%s'. " + "Ignoring.\n", progname, opt); + /* Ignore old nettype= for now usage(stderr); + */ } } else { if (parse_one_option(opt, flagp, &lmd->lmd_flags)) @@ -372,327 +232,27 @@ int parse_options(char *options, struct lustre_mount_data *lmd, int *flagp) } int -get_local_elan_id(char *fname, char *buf) -{ - FILE *fp = fopen(fname, "r"); - int rc; - - if (fp == NULL) - return 1; - - rc = fscanf(fp, "NodeId %255s", buf); - - fclose(fp); - - return (rc == 1) ? 0 : -1; -} - -#if !CRAY_PORTALS -#if WITH_GM -#include -#define GM_UNIT 0 - -int getgmnid(char *name, ptl_nid_t *nid) -{ - struct gm_port *gm_port; - int gm_port_id = 2; - gm_status_t gm_status = GM_SUCCESS; - unsigned global_nid = 0, local_nid = 0; /* gm ids never 0 */ - - gm_status = gm_init(); - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_init: %s\n", gm_strerror(gm_status)); - return(0); - } - - gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id, "gmnalnid", - GM_API_VERSION); - if (gm_status != GM_SUCCESS) { - int num_ports = gm_num_ports(gm_port); - - /* Couldn't open port 2, try 4 ... num_ports */ - for (gm_port_id = 4; gm_port_id < num_ports; gm_port_id++) { - gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id, - "gmnalnid", GM_API_VERSION); - if (gm_status == GM_SUCCESS) - break; - } - - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_open: %s\n",gm_strerror(gm_status)); - gm_finalize(); - return(0); - } - } - - if (name == NULL) { - local_nid = 1; - } else { - gm_status = gm_host_name_to_node_id_ex(gm_port, 1000000, name, - &local_nid); - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_host_name_to_node_id_ex: %s\n", - gm_strerror(gm_status)); - gm_close(gm_port); - gm_finalize(); - return(0); - } - } - - gm_status = gm_node_id_to_global_id(gm_port, local_nid, &global_nid) ; - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_node_id_to_global_id: %s\n", - gm_strerror(gm_status)); - gm_close(gm_port); - gm_finalize(); - return(0); - } - gm_close(gm_port); - gm_finalize(); - - *nid = (__u64)global_nid; - return 1; -} -#else -int getgmnid(char *name, ptl_nid_t *nid) -{ - return 0; -} -#endif -#endif - -int -set_local(struct lustre_mount_data *lmd) -{ - /* XXX ClusterID? - * XXX PtlGetId() will be safer if portals is loaded and - * initialised correctly at this time... */ - char buf[256], *ptr = buf; - ptl_nid_t nid; - int rc; - - if (lmd->lmd_local_nid != PTL_NID_ANY) - return 0; - - memset(buf, 0, sizeof(buf)); - - switch (lmd->lmd_nal) { - default: - fprintf(stderr, "%s: Unknown network type: %d\n", - progname, lmd->lmd_nal); - return 1; - -#if CRAY_PORTALS - case CRAY_KB_SSNAL: - return 0; - - case CRAY_KB_ERNAL: -#else - case SOCKNAL: - /* We need to do this before the mount is started if routing */ - system("/sbin/modprobe -q ksocknal"); - case TCPNAL: - case OPENIBNAL: - case IIBNAL: - case VIBNAL: - case RANAL: -#endif - { - struct utsname uts; - - rc = gethostname(buf, sizeof(buf) - 1); - if (rc) { - fprintf(stderr, "%s: can't get hostname: %s\n", - progname, strerror(rc)); - return rc; - } - - rc = uname(&uts); - /* for 2.6 kernels, reserve at least 8MB free, or we will - * go OOM during heavy read load */ - if (rc == 0 && strncmp(uts.release, "2.6", 3) == 0) { - int f, minfree = 32768; - char name[40], val[40]; - FILE *meminfo; - - meminfo = fopen("/proc/meminfo", "r"); - if (meminfo != NULL) { - while (fscanf(meminfo, "%s %s %*s\n", name, val) != EOF) { - if (strcmp(name, "MemTotal:") == 0) { - f = strtol(val, NULL, 0); - if (f > 0 && f < 8 * minfree) - minfree = f / 16; - break; - } - } - fclose(meminfo); - } - f = open("/proc/sys/vm/min_free_kbytes", O_WRONLY); - if (f >= 0) { - sprintf(val, "%d", minfree); - write(f, val, strlen(val)); - close(f); - } - } - break; - } -#if !CRAY_PORTALS - case GMNAL: - if (!getgmnid(NULL, &lmd->lmd_local_nid)) { - fprintf(stderr, "Can't get local GM NID\n"); - return 1; - } - return 0; - - case QSWNAL: - { - char *pfiles[] = {"/proc/qsnet/elan3/device0/position", - "/proc/qsnet/elan4/device0/position", - "/proc/elan/device0/position", - NULL}; - int i = 0; - - /* We need to do this before the mount is started if routing */ - system("/sbin/modprobe -q kqswnal"); - do { - rc = get_local_elan_id(pfiles[i], buf); - } while (rc != 0 && pfiles[++i] != NULL); - - if (rc != 0) { - rc = gethostname(buf, sizeof(buf) - 1); - if (rc == 0) { - char *tmp = ptr; - while ((*tmp >= 'a' && *tmp <= 'z') || - (*tmp >= 'A' && *tmp <= 'Z')) - tmp++; - ptr = strsep(&tmp, "."); - } else { - fprintf(stderr, - "%s: can't read Elan ID from /proc\n", - progname); - return 1; - } - } - break; - } -#endif - } - - if (ptl_parse_nid (&nid, ptr) != 0) { - fprintf (stderr, "%s: can't parse NID %s\n", progname, buf); - return (1); - } - - lmd->lmd_local_nid = nid + lmd_cluster_id; - return 0; -} - -int -set_peer(char *hostname, struct lustre_mount_data *lmd) -{ - ptl_nid_t nid = 0; - int rc; - - switch (lmd->lmd_nal) { - default: - fprintf(stderr, "%s: Unknown network type: %d\n", - progname, lmd->lmd_nal); - return 1; - -#if CRAY_PORTALS - case CRAY_KB_SSNAL: - lmd->lmd_server_nid = strtoll(hostname,0,0); - return 0; - - case CRAY_KB_ERNAL: -#else - case IIBNAL: - if (lmd->lmd_server_nid != PTL_NID_ANY) - break; - if (ptl_parse_nid (&nid, hostname) != 0) { - fprintf (stderr, "%s: can't parse NID %s\n", - progname, hostname); - return (1); - } - lmd->lmd_server_nid = nid; - break; - - case SOCKNAL: - case TCPNAL: - case OPENIBNAL: - case VIBNAL: - case RANAL: -#endif - if (lmd->lmd_server_nid == PTL_NID_ANY) { - if (ptl_parse_nid (&nid, hostname) != 0) { - fprintf (stderr, "%s: can't parse NID %s\n", - progname, hostname); - return (1); - } - lmd->lmd_server_nid = nid; - } - - if (ptl_parse_ipaddr(&lmd->lmd_server_ipaddr, hostname) != 0) { - fprintf (stderr, "%s: can't parse host %s\n", - progname, hostname); - return (1); - } - break; -#if !CRAY_PORTALS - case GMNAL: - if (lmd->lmd_server_nid != PTL_NID_ANY) - break; - if (!getgmnid(hostname, &lmd->lmd_server_nid)) { - fprintf(stderr, "Can't get GM NID for %s\n", hostname); - return 1; - } - break; - - case QSWNAL: { - char buf[64]; - - if (lmd->lmd_server_nid != PTL_NID_ANY) - break; - - rc = sscanf(hostname, "%*[^0-9]%63[0-9]", buf); - if (rc != 1) { - fprintf (stderr, "%s: can't get elan id from host %s\n", - progname, hostname); - return 1; - } - if (ptl_parse_nid (&nid, buf) != 0) { - fprintf (stderr, "%s: can't parse NID %s\n", - progname, hostname); - return (1); - } - lmd->lmd_server_nid = nid; - - break; - } -#endif - } - return 0; -} - -int build_data(char *source, char *options, struct lustre_mount_data *lmd, int *flagp) { - char buf[1024]; - char *hostname = NULL, *mds = NULL, *profile = NULL, *s; - int rc; + char buf[1024]; + char *nid = NULL; + char *mds = NULL; + char *profile = NULL; + char *s; + int rc; if (lmd_bad_magic(lmd)) return 4; if (strlen(source) >= sizeof(buf)) { - fprintf(stderr, "%s: host:/mds/profile argument too long\n", + fprintf(stderr, "%s: nid:/mds/profile argument too long\n", progname); return 1; } strcpy(buf, source); if ((s = strchr(buf, ':'))) { - hostname = buf; + nid = buf; *s = '\0'; while (*++s == '/') @@ -709,7 +269,7 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd, } } else { fprintf(stderr, "%s: " - "directory to mount not in host:/mds/profile format\n", + "directory to mount not in nid:/mds/profile format\n", progname); return(1); } @@ -718,13 +278,12 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd, if (rc) return rc; - rc = set_local(lmd); - if (rc) - return rc; + lmd->lmd_nid = libcfs_str2nid(nid); + if (lmd->lmd_nid == LNET_NID_ANY) { + fprintf(stderr, "%s: can't parse nid '%s'\n", progname, nid); + return 1; + } - rc = set_peer(hostname, lmd); - if (rc) - return rc; if (strlen(mds) > sizeof(lmd->lmd_mds) + 1) { fprintf(stderr, "%s: mds name too long\n", progname); return(1); @@ -742,71 +301,6 @@ build_data(char *source, char *options, struct lustre_mount_data *lmd, return 0; } -static int set_routes(struct lustre_mount_data *lmd) { - struct portals_cfg pcfg; - struct portal_ioctl_data data; - int i, j, route_exists, rc, err = 0; - - register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); - - for (i = 0; i < route_index; i++) { - - /* Check for existing routes so as not to add duplicates */ - for (j = 0; ; j++) { - PCFG_INIT(pcfg, NAL_CMD_GET_ROUTE); - pcfg.pcfg_nal = ROUTER; - pcfg.pcfg_count = j; - - PORTAL_IOC_INIT(data); - data.ioc_pbuf1 = (char*)&pcfg; - data.ioc_plen1 = sizeof(pcfg); - data.ioc_nid = pcfg.pcfg_nid; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc != 0) { - route_exists = 0; - break; - } - - if ((pcfg.pcfg_gw_nal == lmd->lmd_nal) && - (pcfg.pcfg_nid == routes[i].gw) && - (pcfg.pcfg_nid2 == routes[i].lo) && - (pcfg.pcfg_nid3 == routes[i].hi)) { - route_exists = 1; - break; - } - } - - if (route_exists) - continue; - - PCFG_INIT(pcfg, NAL_CMD_ADD_ROUTE); - pcfg.pcfg_nid = routes[i].gw; - pcfg.pcfg_nal = ROUTER; - pcfg.pcfg_gw_nal = lmd->lmd_nal; - pcfg.pcfg_nid2 = MIN(routes[i].lo, routes[i].hi); - pcfg.pcfg_nid3 = MAX(routes[i].lo, routes[i].hi); - - PORTAL_IOC_INIT(data); - data.ioc_pbuf1 = (char*)&pcfg; - data.ioc_plen1 = sizeof(pcfg); - data.ioc_nid = pcfg.pcfg_nid; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc != 0) { - fprintf(stderr, "%s: Unable to add route " - LPX64" : "LPX64" - "LPX64"\n[%d] %s\n", - progname, routes[i].gw, routes[i].lo, - routes[i].hi, errno, strerror(errno)); - err = 2; - break; - } - } - - unregister_ioc_dev(PORTALS_DEV_ID); - return err; -} - int main(int argc, char *const argv[]) { char *source, *target, *options = ""; @@ -885,12 +379,6 @@ int main(int argc, char *const argv[]) exit(1); } - if (!fake) { - rc = set_routes(&lmd); - if (rc) - exit(2); - } - rc = access(target, F_OK); if (rc) { rc = errno; diff --git a/lustre/utils/llog_reader.c b/lustre/utils/llog_reader.c new file mode 100644 index 0000000..385c1b8 --- /dev/null +++ b/lustre/utils/llog_reader.c @@ -0,0 +1,331 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + + +#include +#include +#include +#include + +#include +#include +#include + +int llog_pack_buffer(int fd, struct llog_log_hdr** llog_buf, struct llog_rec_hdr*** recs, int* recs_number); + +void print_llog_header(struct llog_log_hdr* llog_buf); +void print_records(struct llog_rec_hdr** recs_buf,int rec_number); +void llog_unpack_buffer(int fd,struct llog_log_hdr* llog_buf,struct llog_rec_hdr** recs_buf); + +#define PTL_CMD_BASE 100 +char* portals_command[17]= +{ + "REGISTER_PEER_FD", + "CLOSE_CONNECTION", + "REGISTER_MYNID", + "PUSH_CONNECTION", + "GET_CONN", + "DEL_PEER", + "ADD_PEER", + "GET_PEER", + "GET_TXDESC", + "ADD_ROUTE", + "DEL_ROUTE", + "GET_ROUTE", + "NOTIFY_ROUTER", + "ADD_INTERFACE", + "DEL_INTERFACE", + "GET_INTERFACE", + "" +}; + +int main(int argc, char **argv) +{ + int rc=0; + int fd,rec_number; + + struct llog_log_hdr* llog_buf=NULL; + struct llog_rec_hdr** recs_buf=NULL; + + + setlinebuf(stdout); + + if(argc != 2 ){ + printf("Usage: llog_reader filename \n"); + return -1; + } + + fd = open(argv[1],O_RDONLY); + if (fd < 0){ + printf("Could not open the file %s \n",argv[1]); + goto out; + } + rc = llog_pack_buffer(fd, &llog_buf, &recs_buf, &rec_number); + + if(llog_buf == NULL ) + printf("error"); + print_llog_header(llog_buf); + + print_records(recs_buf,rec_number); + + llog_unpack_buffer(fd,llog_buf,recs_buf); + close(fd); +out: + return rc; +} + + + +int llog_pack_buffer(int fd, struct llog_log_hdr** llog, + struct llog_rec_hdr*** recs, + int* recs_number) +{ + int rc=0,recs_num,rd; + off_t file_size; + struct stat st; + char *file_buf=NULL, *recs_buf=NULL; + struct llog_rec_hdr** recs_pr=NULL; + char* ptr=NULL; + int cur_idx,i; + + rc = fstat(fd,&st); + if (rc < 0){ + printf("Get file stat error.\n"); + goto out; + } + file_size = st.st_size; + + file_buf = malloc(file_size); + if (file_buf == NULL){ + printf("Memory Alloc for file_buf error.\n"); + rc = -ENOMEM; + goto out; + } + *llog = (struct llog_log_hdr*)file_buf; + + rd = read(fd,file_buf,file_size); + if (rd < file_size){ + printf("Read file error.\n"); + rc = -EIO; /*FIXME*/ + goto clear_file_buf; + } + + /* the llog header not countable here.*/ + recs_num = le32_to_cpu((*llog)->llh_count)-1; + + recs_buf = malloc(recs_num*sizeof(struct llog_rec_hdr*)); + if (recs_buf == NULL){ + printf("Memory Alloc for recs_buf error.\n"); + rc = -ENOMEM; + goto clear_file_buf; + } + recs_pr = (struct llog_rec_hdr **)recs_buf; + + ptr = file_buf + le32_to_cpu((*llog)->llh_hdr.lrh_len); + cur_idx = 1; + i = 0; + while (i < recs_num){ + struct llog_rec_hdr* cur_rec=(struct llog_rec_hdr*)ptr; + + while(!ext2_test_bit(cur_idx,(*llog)->llh_bitmap)){ + cur_idx++; + ptr += cur_rec->lrh_len; + if ((ptr-file_buf) > file_size){ + printf("The log is corrupted. \n"); + rc = -EINVAL; + goto clear_recs_buf; + } + } + recs_pr[i] = cur_rec; + ptr+=cur_rec->lrh_len; + i++; + cur_idx++; + } + + *recs = recs_pr; + *recs_number=recs_num; + +out: + return rc; + +clear_recs_buf: + free(recs_buf); + +clear_file_buf: + free(file_buf); + + *llog=NULL; + goto out; + +} + + +void llog_unpack_buffer(int fd,struct llog_log_hdr* llog_buf,struct llog_rec_hdr **recs_buf) +{ + free(llog_buf); + free(recs_buf); + return; +} + + +void print_llog_header(struct llog_log_hdr* llog_buf) +{ + time_t t; + + printf("Header size : %d \n", + // le32_to_cpu(llog_buf->llh_hdr.lrh_len)); + llog_buf->llh_hdr.lrh_len); + + t = le64_to_cpu(llog_buf->llh_timestamp); + printf("Time : %s", ctime(&t)); + + printf("Number of records: %d\n", + le32_to_cpu(llog_buf->llh_count)-1); + + printf("Target uuid : %s \n", + (char *)(&llog_buf->llh_tgtuuid)); + + /* Add the other infor you want to view here*/ + + printf("-----------------------\n"); + return; +} + +static void print_1_cfg(struct lustre_cfg *lcfg) +{ + int i; + for (i = 0; i < lcfg->lcfg_bufcount; i++) + printf("%d:%s ", i, lustre_cfg_string(lcfg, i)); + return; +} + +void print_lustre_cfg(struct lustre_cfg *lcfg) +{ + enum lcfg_command_type cmd = le32_to_cpu(lcfg->lcfg_command); + + switch(cmd){ + case(LCFG_ATTACH):{ + printf("attach "); + print_1_cfg(lcfg); + break; + } + case(LCFG_SETUP):{ + printf("setup "); + print_1_cfg(lcfg); + break; + } + case(LCFG_DETACH):{ + printf("detach "); + print_1_cfg(lcfg); + break; + } + case(LCFG_CLEANUP):{ + printf("cleanup "); + print_1_cfg(lcfg); + break; + } + case(LCFG_ADD_UUID):{ + printf("add_uuid "); + printf("nid="LPX64"=%s ", lcfg->lcfg_nid, + libcfs_nid2str(lcfg->lcfg_nid)); + /* obsolete */ + if (lcfg->lcfg_nal) + printf("nal=%d ", lcfg->lcfg_nal); + print_1_cfg(lcfg); + break; + } + case(LCFG_DEL_UUID):{ + printf("del_uuid "); + print_1_cfg(lcfg); + break; + } + case(LCFG_ADD_CONN):{ + printf("add_conn "); + print_1_cfg(lcfg); + break; + } + case(LCFG_DEL_CONN):{ + printf("del_conn "); + print_1_cfg(lcfg); + break; + } + case(LCFG_LOV_ADD_OBD):{ + printf("lov_modify_tgts add "); + print_1_cfg(lcfg); + break; + } + case(LCFG_LOV_DEL_OBD):{ + printf("lov_modify_tgts del "); + print_1_cfg(lcfg); + break; + } + case(LCFG_MOUNTOPT):{ + printf("mount_option "); + print_1_cfg(lcfg); + break; + } + case(LCFG_DEL_MOUNTOPT):{ + printf("del_mount_option "); + print_1_cfg(lcfg); + break; + } + case(LCFG_SET_TIMEOUT):{ + printf("set_timeout=%d ", lcfg->lcfg_num); + print_1_cfg(lcfg); + break; + } + case(LCFG_SET_UPCALL):{ + printf("set_lustre_upcall "); + print_1_cfg(lcfg); + break; + } + default: + printf("unsupported cmd_code = %x\n",cmd); + } + printf("\n"); + return; +} + +void print_records(struct llog_rec_hdr** recs,int rec_number) +{ + __u32 lopt; + int i; + + for(i=0;ilrh_index)); + + lopt = le32_to_cpu(recs[i]->lrh_type); + + if (lopt == OBD_CFG_REC){ + struct lustre_cfg *lcfg; + printf("L "); + lcfg = (struct lustre_cfg *) + ((char*)(recs[i]) + sizeof(struct llog_rec_hdr)); + print_lustre_cfg(lcfg); + } + + if (lopt == PTL_CFG_REC){ + printf("Portals - unknown type\n"); + } + } +} diff --git a/lustre/utils/lmc b/lustre/utils/lmc index cc9ef6c..3979e6e 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -75,7 +75,7 @@ Object creation command summary: --node node_name --nid nid --cluster_id - --nettype tcp|elan|gm|openib|iib|vib|ra + --nettype tcp|elan|gm|openib|iib|vib|ra|ptl|lnet --hostaddr ip[/netmask] --port port --tcpbuf size @@ -164,11 +164,12 @@ lmc_options = [ ('subsystem', "Specify which Lustre subsystems have debug output recorded in the log", PARAM), # network - ('nettype', "Specify the network type. This can be tcp/elan/gm/openib/iib/vib/ra.", PARAM), + ('nettype', "Specify the network type. This can be tcp/elan/gm/openib/iib/vib/ra/ptl/lnet.", PARAM), ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM), ('port', "Optional argument to specify the TCP port number.", PARAM, DEFAULT_PORT), ('hostaddr', "Optional argument to specify the host address.", PARAMLIST), ('cluster_id', "Specify the cluster ID", PARAM, "0"), + ('nonet', "Skip the remote host networking check"), # routes ('route', "Add a new route for the cluster.", PARAM), @@ -273,6 +274,7 @@ def new_lustre(dom): """ % (Lustre.CONFIG_VERSION, ldlm_name, ldlm_uuid) return dom.parseString(str) + names = {} uuids = {} @@ -328,6 +330,10 @@ class GenConfig: node.appendChild(new) return new + def recordtime(self, timestr): + lustre = self.doc.getElementsByTagName("lustre") + lustre[0].setAttribute("mtime", timestr) + def network(self, name, uuid, nid, cluster_id, net, hostaddr="", port=0): """create node""" @@ -584,6 +590,15 @@ def get_attr(dom_node, attr, default=""): ############################################################ # Top level commands # +def runcmd(cmd): + f = os.popen(cmd) + ret = f.close() + if ret: + ret = ret >> 8 + else: + ret = 0 + return ret + def set_node_options(gen, node, options): if options.router: node.setAttribute('router', '1') @@ -645,14 +660,29 @@ def add_net(gen, lustre, options): hostaddr = get_option(options, 'hostaddr') net_type = get_option(options, 'nettype') - if net_type in ('tcp','openib','ra'): + if net_type in ('lnet','tcp','openib','ra'): port = get_option_int(options, 'port') - elif net_type in ('elan','gm','iib','vib','lo','cray_kern_nal'): + elif net_type in ('elan','gm','iib','vib','lo','ptl'): port = 0 else: print "Unknown net_type: ", net_type sys.exit(2) + real_net_type = net_type + if net_type == 'lnet': + real_net_type = string.split(nid,'@')[1] + + # testing network + if options.nonet: + if options.verbose: + print "Skip the remote host networking test." + elif node_name != 'client' and real_net_type in ('tcp'): + print "Testing network on", node_name + target = string.split(nid,'@')[0] + out = runcmd("ping -c 1 -w 10 %s" %target) + if out != 0: + print "Could not connect to", node_name,", Please check network." + ret = findByName(lustre, node_name, "node") if not ret: node = do_add_node(gen, lustre, options, node_name) @@ -1171,6 +1201,10 @@ def main(): except Lustre.OptionError, e: panic("lmc", e) + #record timestamp + timestr = string.split(str(time.time()), '.') + gen.recordtime(timestr[0]) + if outFile == '-': printDoc(doc) else: diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c index fbd95f3..755973c 100644 --- a/lustre/utils/lustre_cfg.c +++ b/lustre/utils/lustre_cfg.c @@ -49,7 +49,7 @@ #include "obdctl.h" -#include +#include #include "parser.h" #include @@ -253,9 +253,8 @@ int jt_obd_cleanup(int argc, char **argv) } static -int do_add_uuid(char * func, char *uuid, ptl_nid_t nid, int nal) +int do_add_uuid(char * func, char *uuid, lnet_nid_t nid) { - char tmp[64]; int rc; struct lustre_cfg_bufs bufs; struct lustre_cfg *lcfg; @@ -266,11 +265,10 @@ int do_add_uuid(char * func, char *uuid, ptl_nid_t nid, int nal) lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs); lcfg->lcfg_nid = nid; - lcfg->lcfg_nal = nal; #if 0 - fprintf(stderr, "adding\tnal: %d\tnid: %d\tuuid: %s\n", - lcfg->lcfg_nid, lcfg->lcfg_nal, uuid); + fprintf(stderr, "adding\tnid: %d\tuuid: %s\n", + lcfg->lcfg_nid, uuid); #endif rc = lcfg_ioctl(func, OBD_DEV_ID, lcfg); lustre_cfg_free(lcfg); @@ -280,37 +278,30 @@ int do_add_uuid(char * func, char *uuid, ptl_nid_t nid, int nal) return -1; } - printf ("Added uuid %s: %s\n", uuid, ptl_nid2str (tmp, nid)); + printf ("Added uuid %s: %s\n", uuid, libcfs_nid2str(nid)); return 0; } int jt_lcfg_add_uuid(int argc, char **argv) { - ptl_nid_t nid = 0; - int nal; + lnet_nid_t nid; - if (argc != 4) { + if (argc != 3) { return CMD_HELP; } - if (ptl_parse_nid (&nid, argv[2]) != 0) { + nid = libcfs_str2nid(argv[2]); + if (nid == LNET_NID_ANY) { fprintf (stderr, "Can't parse NID %s\n", argv[2]); - return (-1); + return (-1); } - nal = ptl_name2nal(argv[3]); - - if (nal <= 0) { - fprintf (stderr, "Can't parse NAL %s\n", argv[3]); - return -1; - } - - return do_add_uuid(argv[0], argv[1], nid, nal); + return do_add_uuid(argv[0], argv[1], nid); } -int obd_add_uuid(char *uuid, ptl_nid_t nid, int nal) +int obd_add_uuid(char *uuid, lnet_nid_t nid) { - return do_add_uuid("obd_add_uuid", uuid, nid, nal); + return do_add_uuid("obd_add_uuid", uuid, nid); } int jt_lcfg_del_uuid(int argc, char **argv) diff --git a/lustre/utils/lwizard b/lustre/utils/lwizard index 649b285..fa85240 100755 --- a/lustre/utils/lwizard +++ b/lustre/utils/lwizard @@ -378,9 +378,9 @@ create_config() run_lmc --add mds \ --node "$HOST_NAME" \ --mds "$DEVICE_NAME" \ - --fstype "$DEFAULT_FSTYPE" \ --dev "$DEVICE" \ --size "$DEVICE_SIZE" \ + --fstype "$DEFAULT_FSTYPE" \ $extraopt if [ "$FAILOVER_HOST" != "" ] ; then add_node "$FAILOVER_HOST" @@ -388,6 +388,7 @@ create_config() --node "$FAILOVER_HOST" \ --mds "$DEVICE_NAME" \ --dev "$FAILOVER_DEVICE" \ + --size "$DEVICE_SIZE" \ --fstype "$DEFAULT_FSTYPE" \ --failover \ --group "$HOST_NAME" @@ -411,17 +412,18 @@ create_config() --node "$HOST_NAME" \ --ost "$DEVICE_NAME" \ --lov "$DEVICE_LOV" \ - --fstype "$DEFAULT_FSTYPE" \ --dev "$DEVICE" \ --size "$DEVICE_SIZE" \ + --fstype "$DEFAULT_FSTYPE" \ $extraopt if [ "$FAILOVER_HOST" != "" ] ; then add_node "$FAILOVER_HOST" run_lmc --add ost \ --node "$FAILOVER_HOST" \ --ost "$DEVICE_NAME" \ - --dev "$FAILOVER_DEVICE" \ --lov "$DEVICE_LOV" \ + --dev "$FAILOVER_DEVICE" \ + --size "$DEVICE_SIZE" \ --fstype "$DEFAULT_FSTYPE" \ --failover \ --group "$HOST_NAME" diff --git a/lustre/utils/module_setup.sh b/lustre/utils/module_setup.sh index ff3b7bf..c422184 100755 --- a/lustre/utils/module_setup.sh +++ b/lustre/utils/module_setup.sh @@ -17,13 +17,12 @@ echo "Copying modules from local build dir to "$MDIR mkdir -p $MDIR -cp ../../portals/libcfs/libcfs.$EXT $MDIR -cp ../../portals/portals/portals.$EXT $MDIR -cp ../../portals/knals/socknal/ksocknal.$EXT $MDIR +cp ../../lnet/libcfs/libcfs.$EXT $MDIR +cp ../../lnet/lnet/lnet.$EXT $MDIR +cp ../../lnet/klnds/socklnd/ksocklnd.$EXT $MDIR cp ../lvfs/lvfs.$EXT $MDIR cp ../obdclass/obdclass.$EXT $MDIR cp ../ptlrpc/ptlrpc.$EXT $MDIR -#cp ../obdclass/confobd.$EXT $MDIR cp ../mdc/mdc.$EXT $MDIR cp ../osc/osc.$EXT $MDIR cp ../lov/lov.$EXT $MDIR @@ -34,36 +33,21 @@ cp ../ost/ost.$EXT $MDIR cp ../obdfilter/obdfilter.$EXT $MDIR cp ../llite/llite.$EXT $MDIR -# prevent warnings on my uml -rm -f /lib/modules/`uname -r`/modules.* echo "Depmod" depmod -a -e -echo "Copying mount and acceptor from local build dir to "$MDIR -cp ../../portals/utils/acceptor /sbin/. +echo "Copying mount from local build dir to "$MDIR cp ../utils/mount.lustre /sbin/. +MP="/sbin/modprobe" +MPI="$MP --ignore-install" + [ -e $MODFILE ] || touch $MODFILE -if [ `grep -c lustre $MODFILE` -eq 0 ]; then +if [ `egrep -c "lustre|lnet" $MODFILE` -eq 0 ]; then echo Modifying $MODFILE echo "# Lustre modules added by $0" >> $MODFILE - if [ $KVER -eq 24 ]; then - echo alias lustre null >> $MODFILE - echo above lustre llite osc mdc >> $MODFILE - echo above mds llite confobd osc >> $MODFILE - echo alias oss ost >> $MODFILE - echo above ost llite confobd obdfilter >> $MODFILE - echo above confobd $FSFLT >> $MODFILE - echo below ptlrpc ksocknal >> $MODFILE - else - MP="/sbin/modprobe" - MPI="$MP --ignore-install" - echo "install ptlrpc $MP ksocknal && $MPI ptlrpc" >> $MODFILE - echo "install confobd $MP $FSFLT && $MPI confobd" >> $MODFILE - echo "install ost $MP llite confobd obdfilter && $MPI ost" >> $MODFILE - echo "install oss $MP ost && $MPI oss" >> $MODFILE - echo "install mds $MP llite confobd osc && $MPI mds" >> $MODFILE - echo "install lustre $MP llite osc mdc" >> $MODFILE - fi + echo "# Networking options, see /sys/module/lnet/parameters" >> $MODFILE + echo "options lnet networks=tcp" >> $MODFILE + echo "alias lustre llite" >> $MODFILE echo "# end Lustre modules" >> $MODFILE fi diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 4dff394..bb40872 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -53,17 +53,20 @@ #endif #include -#include +#include #include "parser.h" #include -#include -#include -#include #define MAX_STRING_SIZE 128 #define DEVICES_LIST "/proc/fs/lustre/devices" +#if HAVE_LIBPTHREAD +#include +#include +#include + #define MAX_THREADS 1024 + struct shared_data { __u64 counters[MAX_THREADS]; __u64 offsets[MAX_THREADS]; @@ -72,19 +75,23 @@ struct shared_data { pthread_mutex_t mutex; pthread_cond_t cond; }; + static struct shared_data *shared_data; static __u64 counter_snapshot[2][MAX_THREADS]; static int prev_valid; -struct timeval prev_time; +static struct timeval prev_time; +static int thread; +static int nthreads; +#else +const int thread = 0; +const int nthreads = 1; +#endif static int jt_recording; static char rawbuf[8192]; static char *buf = rawbuf; static int max = sizeof(rawbuf); -static int thread; -static int nthreads; - static int cur_device = MAX_OBD_DEVICES; union lsm_buffer { @@ -420,6 +427,7 @@ int do_disconnect(char *func, int verbose) return 0; } +#ifdef MAX_THREADS static void shmem_setup(void) { /* Create new segment */ @@ -450,6 +458,16 @@ static void shmem_setup(void) } } +static inline void shmem_lock(void) +{ + pthread_mutex_lock(&shared_data->mutex); +} + +static inline void shmem_unlock(void) +{ + pthread_mutex_unlock(&shared_data->mutex); +} + static inline void shmem_reset(int total_threads) { if (shared_data == NULL) @@ -470,11 +488,11 @@ static inline void shmem_bump(void) if (shared_data == NULL || thread <= 0 || thread > MAX_THREADS) return; - pthread_mutex_lock(&shared_data->mutex); + shmem_lock(); shared_data->counters[thread - 1]++; if (!bumped_running) shared_data->running++; - pthread_mutex_unlock(&shared_data->mutex); + shmem_unlock(); bumped_running = 1; } @@ -490,11 +508,11 @@ static void shmem_snap(int total_threads, int live_threads) if (shared_data == NULL || total_threads > MAX_THREADS) return; - pthread_mutex_lock(&shared_data->mutex); + shmem_lock(); memcpy(counter_snapshot[0], shared_data->counters, total_threads * sizeof(counter_snapshot[0][0])); running = shared_data->running; - pthread_mutex_unlock(&shared_data->mutex); + shmem_unlock(); gettimeofday(&this_time, NULL); @@ -523,6 +541,27 @@ static void shmem_snap(int total_threads, int live_threads) running == total_threads) prev_valid = 1; } +#else +static void shmem_setup(void) +{ +} + +static inline void shmem_reset(int total_threads) +{ +} + +static inline void shmem_bump(void) +{ +} + +static void shmem_lock() +{ +} + +static void shmem_unlock() +{ +} +#endif extern command_t cmdlist[]; @@ -586,6 +625,7 @@ int jt_opt_device(int argc, char **argv) return rc; } +#ifdef MAX_THREADS static void parent_sighandler (int sig) { return; @@ -712,6 +752,14 @@ int jt_opt_threads(int argc, char **argv) sigprocmask(SIG_SETMASK, &saveset, NULL); return rc; } +#else +int jt_opt_threads(int argc, char **argv) +{ + fprintf(stderr, "%s not-supported in a single-threaded runtime\n", + jt_cmdname(argv[0])); + return CMD_HELP; +} +#endif int jt_opt_net(int argc, char **argv) { @@ -1493,8 +1541,9 @@ int jt_obd_test_brw(int argc, char **argv) thr_offset = offset_pages * getpagesize(); stride = len; +#ifdef MAX_THREADS if (thread) { - pthread_mutex_lock (&shared_data->mutex); + shmem_lock (); if (nthr_per_obj != 0) { /* threads interleave */ obj_idx = (thread - 1)/nthr_per_obj; @@ -1515,8 +1564,9 @@ int jt_obd_test_brw(int argc, char **argv) pthread_cond_wait(&shared_data->cond, &shared_data->mutex); - pthread_mutex_unlock (&shared_data->mutex); + shmem_unlock (); } +#endif data.ioc_obdo1.o_id = objid; data.ioc_obdo1.o_mode = S_IFREG; @@ -1546,23 +1596,28 @@ int jt_obd_test_brw(int argc, char **argv) write ? "write" : "read"); break; } else if (be_verbose(verbose, &next_time,i, &next_count,count)) { - pthread_mutex_lock (&shared_data->mutex); + shmem_lock (); printf("%s: %s number %d @ "LPD64":"LPU64" for %d\n", jt_cmdname(argv[0]), write ? "write" : "read", i, data.ioc_obdo1.o_id, data.ioc_offset, (int)(pages * getpagesize())); - pthread_mutex_unlock (&shared_data->mutex); + shmem_unlock (); } if (!repeat_offset) { +#ifdef MAX_THREADS if (stride == len) { data.ioc_offset += stride; } else if (i < count) { - pthread_mutex_lock (&shared_data->mutex); + shmem_lock (); data.ioc_offset = shared_data->offsets[obj_idx]; shared_data->offsets[obj_idx] += len; - pthread_mutex_unlock (&shared_data->mutex); + shmem_unlock (); } +#else + data.ioc_offset += len; + obj_idx = 0; /* avoids an unused var warning */ +#endif } } @@ -1857,25 +1912,17 @@ int jt_obd_mdc_lookup(int argc, char **argv) int jt_obd_close_uuid(int argc, char **argv) { - int rc, nal; + int rc; struct obd_ioctl_data data; - if (argc != 3) { - fprintf(stderr, "usage: %s \n", argv[0]); + if (argc != 2) { + fprintf(stderr, "usage: %s \n", argv[0]); return 0; } - nal = ptl_name2nal(argv[2]); - - if (nal <= 0) { - fprintf (stderr, "Can't parse NAL %s\n", argv[2]); - return -1; - } - IOC_INIT(data); data.ioc_inllen1 = strlen(argv[1]) + 1; data.ioc_inlbuf1 = argv[1]; - data.ioc_nal = nal; IOC_PACK(argv[0], data); rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_CLOSE_UUID, buf); @@ -1905,7 +1952,7 @@ int jt_cfg_record(int argc, char **argv) rc = l_ioctl(OBD_DEV_ID, OBD_IOC_RECORD, buf); if (rc == 0) { jt_recording = 1; - ptl_set_cfg_record_cb(obd_record); + // ptl_set_cfg_record_cb(obd_record); } else { fprintf(stderr, "OBD_IOC_RECORD failed: %s\n", strerror(errno)); @@ -2002,7 +2049,7 @@ int jt_cfg_endrecord(int argc, char **argv) rc = l_ioctl(OBD_DEV_ID, OBD_IOC_ENDRECORD, buf); if (rc == 0) { jt_recording = 0; - ptl_set_cfg_record_cb(NULL); + // ptl_set_cfg_record_cb(NULL); } else { fprintf(stderr, "OBD_IOC_ENDRECORD failed: %s\n", strerror(errno)); diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h index fed08df..7883dbd 100644 --- a/lustre/utils/obdctl.h +++ b/lustre/utils/obdctl.h @@ -89,6 +89,6 @@ int jt_lcfg_set_lustre_upcall(int argc, char **argv); int jt_lcfg_add_conn(int argc, char **argv); int jt_lcfg_del_conn(int argc, char **argv); -int obd_add_uuid(char *uuid, ptl_nid_t nid, int nal); +int obd_add_uuid(char *uuid, lnet_nid_t nid); #endif diff --git a/lustre/utils/rmmod_all.sh b/lustre/utils/rmmod_all.sh index 4e9376a..e948e31 100755 --- a/lustre/utils/rmmod_all.sh +++ b/lustre/utils/rmmod_all.sh @@ -4,7 +4,6 @@ rmmod llite rmmod mdc rmmod lov rmmod osc -rmmod confobd rmmod obdfilter rmmod fsfilt_ext3 rmmod fsfilt_ldiskfs @@ -14,6 +13,6 @@ rmmod mds rmmod ptlrpc rmmod obdclass rmmod lvfs -rmmod ksocknal -rmmod portals +rmmod ksocklnd +rmmod lnet rmmod libcfs -- 1.8.3.1