From 661d4364ce8dbcf3ce83a4c1e06899fd9694f0f8 Mon Sep 17 00:00:00 2001 From: green Date: Mon, 26 Apr 2004 17:05:46 +0000 Subject: [PATCH] Update to HEAD. --- lnet/klnds/socklnd/socklnd.c | 329 +++++++++++++++++++++------------ lustre/include/linux/lustre_export.h | 8 +- lustre/include/linux/lustre_fsfilt.h | 73 +++++--- lustre/lov/lproc_lov.c | 7 +- lustre/obdfilter/lproc_obdfilter.c | 74 ++++++-- lustre/osc/lproc_osc.c | 80 +++----- lustre/portals/knals/socknal/socknal.c | 329 +++++++++++++++++++++------------ lustre/tests/conf-sanity.sh | 236 +++++++++++++++++++---- 8 files changed, 750 insertions(+), 386 deletions(-) diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index c47dcb4..32bbbec 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -25,13 +25,10 @@ #include "socknal.h" +nal_t ksocknal_api; +ksock_nal_data_t ksocknal_data; ptl_handle_ni_t ksocknal_ni; -static nal_t ksocknal_api; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -ksock_nal_data_t ksocknal_data; -#else -static ksock_nal_data_t ksocknal_data; -#endif +ksock_tunables_t ksocknal_tunables; kpr_nal_interface_t ksocknal_router_interface = { kprni_nalid: SOCKNAL, @@ -40,6 +37,7 @@ kpr_nal_interface_t ksocknal_router_interface = { kprni_notify: ksocknal_notify, }; +#ifdef CONFIG_SYSCTL #define SOCKNAL_SYSCTL 200 #define SOCKNAL_SYSCTL_TIMEOUT 1 @@ -50,21 +48,21 @@ kpr_nal_interface_t ksocknal_router_interface = { static ctl_table ksocknal_ctl_table[] = { {SOCKNAL_SYSCTL_TIMEOUT, "timeout", - &ksocknal_data.ksnd_io_timeout, sizeof (int), + &ksocknal_tunables.ksnd_io_timeout, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", - &ksocknal_data.ksnd_eager_ack, sizeof (int), + &ksocknal_tunables.ksnd_eager_ack, sizeof (int), 0644, NULL, &proc_dointvec}, #if SOCKNAL_ZC {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", - &ksocknal_data.ksnd_zc_min_frag, sizeof (int), + &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int), 0644, NULL, &proc_dointvec}, #endif {SOCKNAL_SYSCTL_TYPED, "typed", - &ksocknal_data.ksnd_typed_conns, sizeof (int), + &ksocknal_tunables.ksnd_typed_conns, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", - &ksocknal_data.ksnd_min_bulk, sizeof (int), + &ksocknal_tunables.ksnd_min_bulk, sizeof (int), 0644, NULL, &proc_dointvec}, { 0 } }; @@ -73,6 +71,7 @@ static ctl_table ksocknal_top_ctl_table[] = { {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, { 0 } }; +#endif int ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, @@ -88,19 +87,6 @@ ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, return PTL_OK; } -int -ksocknal_api_shutdown(nal_t *nal, int ni) -{ - return PTL_OK; -} - -void -ksocknal_api_yield(nal_t *nal) -{ - our_cond_resched(); - return; -} - void ksocknal_api_lock(nal_t *nal, unsigned long *flags) { @@ -123,19 +109,44 @@ ksocknal_api_unlock(nal_t *nal, unsigned long *flags) nal_cb->cb_sti(nal_cb,flags); } -nal_t * -ksocknal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) +int +ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds) { - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); - lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); - return (&ksocknal_api); + /* NB called holding statelock */ + wait_queue_t wait; + unsigned long now = jiffies; + + CDEBUG (D_NET, "yield\n"); + + if (milliseconds == 0) { + our_cond_resched(); + return 0; + } + + init_waitqueue_entry(&wait, current); + set_current_state (TASK_INTERRUPTIBLE); + add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait); + + ksocknal_api_unlock(nal, flags); + + if (milliseconds < 0) + schedule (); + else + schedule_timeout((milliseconds * HZ) / 1000); + + ksocknal_api_lock(nal, flags); + + remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait); + + if (milliseconds > 0) { + milliseconds -= ((jiffies - now) * 1000) / HZ; + if (milliseconds < 0) + milliseconds = 0; + } + + return (milliseconds); } -/* - * EXTRA functions follow - */ - int ksocknal_set_mynid(ptl_nid_t nid) { @@ -196,7 +207,7 @@ ksocknal_bind_irq (unsigned int irq) /* FIXME: Find a better method of setting IRQ affinity... */ - call_usermodehelper (argv[0], argv, envp); + USERMODEHELPER(argv[0], argv, envp); #endif } @@ -745,6 +756,9 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, ksocknal_get_peer_addr (conn); + CWARN("New conn nid:"LPX64" ip:%08x/%d incarnation:"LPX64"\n", + nid, conn->ksnc_ipaddr, conn->ksnc_port, incarnation); + irq = ksocknal_conn_irq (conn); write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); @@ -798,7 +812,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, /* Set the deadline for the outgoing HELLO to drain */ conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; list_add (&conn->ksnc_list, &peer->ksnp_conns); atomic_inc (&conn->ksnc_refcount); @@ -1071,6 +1085,11 @@ ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) if (conn->ksnc_incarnation == incarnation) continue; + + CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d " + "incarnation:"LPX64"("LPX64")\n", + peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port, + conn->ksnc_incarnation, incarnation); count++; ksocknal_close_conn_locked (conn, -ESTALE); @@ -1388,6 +1407,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) void ksocknal_free_fmbs (ksock_fmb_pool_t *p) { + int npages = p->fmp_buff_pages; ksock_fmb_t *fmb; int i; @@ -1399,12 +1419,12 @@ ksocknal_free_fmbs (ksock_fmb_pool_t *p) fmb = list_entry(p->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); - for (i = 0; i < fmb->fmb_npages; i++) - if (fmb->fmb_pages[i] != NULL) - __free_page(fmb->fmb_pages[i]); - + for (i = 0; i < npages; i++) + if (fmb->fmb_kiov[i].kiov_page != NULL) + __free_page(fmb->fmb_kiov[i].kiov_page); + list_del(&fmb->fmb_list); - PORTAL_FREE(fmb, sizeof(*fmb)); + PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages])); } } @@ -1426,30 +1446,34 @@ ksocknal_free_buffers (void) } void -ksocknal_module_fini (void) +ksocknal_api_shutdown (nal_t *nal) { int i; + if (nal->nal_refct != 0) { + /* This module got the first ref */ + PORTAL_MODULE_UNUSE; + return; + } + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); + LASSERT(nal == &ksocknal_api); + switch (ksocknal_data.ksnd_init) { default: LASSERT (0); case SOCKNAL_INIT_ALL: -#if CONFIG_SYSCTL - if (ksocknal_data.ksnd_sysctl != NULL) - unregister_sysctl_table (ksocknal_data.ksnd_sysctl); -#endif - kportal_nal_unregister(SOCKNAL); - PORTAL_SYMBOL_UNREGISTER (ksocknal_ni); + libcfs_nal_cmd_unregister(SOCKNAL); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; /* fall through */ - case SOCKNAL_INIT_PTL: + case SOCKNAL_INIT_LIB: /* No more calls to ksocknal_cmd() to create new * autoroutes/connections since we're being unloaded. */ - PtlNIFini(ksocknal_ni); /* Delete all autoroute entries */ ksocknal_del_route(PTL_NID_ANY, 0, 0, 0); @@ -1470,6 +1494,8 @@ ksocknal_module_fini (void) /* Tell lib we've stopped calling into her. */ lib_fini(&ksocknal_lib); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; /* fall through */ case SOCKNAL_INIT_DATA: @@ -1517,6 +1543,8 @@ ksocknal_module_fini (void) kpr_deregister (&ksocknal_data.ksnd_router); ksocknal_free_buffers(); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; /* fall through */ case SOCKNAL_INIT_NOTHING: @@ -1531,7 +1559,7 @@ ksocknal_module_fini (void) } -void __init +void ksocknal_init_incarnation (void) { struct timeval tv; @@ -1547,43 +1575,31 @@ ksocknal_init_incarnation (void) (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; } -int __init -ksocknal_module_init (void) +int +ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; + ptl_process_id_t process_id; + int pkmem = atomic_read(&portal_kmemory); + int rc; + int i; + int j; - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int)); - LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int)); - /* check ksnr_connected/connecting field large enough */ - LASSERT(SOCKNAL_CONN_NTYPES <= 4); - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); + LASSERT (nal == &ksocknal_api); - ksocknal_api.forward = ksocknal_api_forward; - ksocknal_api.shutdown = ksocknal_api_shutdown; - ksocknal_api.yield = ksocknal_api_yield; - ksocknal_api.validate = NULL; /* our api validate is a NOOP */ - ksocknal_api.lock = ksocknal_api_lock; - ksocknal_api.unlock = ksocknal_api_unlock; - ksocknal_api.nal_data = &ksocknal_data; + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = ksocknal_lib.ni.actual_limits; + /* This module got the first ref */ + PORTAL_MODULE_USE; + return (PTL_OK); + } - ksocknal_lib.nal_data = &ksocknal_data; + LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; - ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK; - ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; - ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK; -#if SOCKNAL_ZC - ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; -#endif ksocknal_init_incarnation(); ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; @@ -1599,14 +1615,17 @@ ksocknal_module_init (void) ksocknal_data.ksnd_nal_cb = &ksocknal_lib; spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock); - + init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq); + spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock); INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns); + ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES; spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); + ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES; spin_lock_init (&ksocknal_data.ksnd_reaper_lock); INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); @@ -1627,7 +1646,7 @@ ksocknal_module_init (void) PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } @@ -1643,15 +1662,19 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - ksocknal_module_fini (); + /* NB we have to wait to be told our true NID... */ + process_id.pid = 0; + process_id.nid = 0; + + rc = lib_init(&ksocknal_lib, process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { + CERROR("lib_init failed: error %d\n", rc); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PtlNIDebug(ksocknal_ni, ~0); - ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called for (i = 0; i < SOCKNAL_N_SCHED; i++) { rc = ksocknal_thread_start (ksocknal_scheduler, @@ -1659,7 +1682,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1668,7 +1691,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i)); if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1676,7 +1699,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_reaper, NULL); if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } @@ -1686,62 +1709,127 @@ ksocknal_module_init (void) CDEBUG(D_NET, "Can't initialise routing interface " "(rc = %d): not routing\n", rc); } else { - /* Only allocate forwarding buffers if I'm on a gateway */ + /* Only allocate forwarding buffers if there's a router */ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb; + ksock_fmb_t *fmb; + ksock_fmb_pool_t *pool; - PORTAL_ALLOC(fmb, sizeof(*fmb)); + + if (i < SOCKNAL_SMALL_FWD_NMSGS) + pool = &ksocknal_data.ksnd_small_fmp; + else + pool = &ksocknal_data.ksnd_large_fmp; + + PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, + fmb_kiov[pool->fmp_buff_pages])); if (fmb == NULL) { - ksocknal_module_fini(); + ksocknal_api_shutdown(&ksocknal_api); return (-ENOMEM); } - if (i < SOCKNAL_SMALL_FWD_NMSGS) { - fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; - fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp; - } else { - fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES; - fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; - } - - for (j = 0; j < fmb->fmb_npages; j++) { - fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); + fmb->fmb_pool = pool; + + for (j = 0; j < pool->fmp_buff_pages; j++) { + fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL); - if (fmb->fmb_pages[j] == NULL) { - ksocknal_module_fini (); + if (fmb->fmb_kiov[j].kiov_page == NULL) { + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } - LASSERT(page_address(fmb->fmb_pages[j]) != NULL); + LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL); } - list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); + list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs); } } - rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL); + rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PORTAL_SYMBOL_REGISTER(ksocknal_ni); - -#ifdef CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - ksocknal_data.ksnd_sysctl = register_sysctl_table (ksocknal_top_ctl_table, 0); -#endif /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; printk(KERN_INFO "Lustre: Routing socket NAL loaded " - "(Routing %s, initial mem %d)\n", + "(Routing %s, initial mem %d, incarnation "LPX64")\n", kpr_routing (&ksocknal_data.ksnd_router) ? - "enabled" : "disabled", pkmem); + "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation); + + return (0); +} + +void __exit +ksocknal_module_fini (void) +{ +#ifdef CONFIG_SYSCTL + if (ksocknal_tunables.ksnd_sysctl != NULL) + unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); +#endif + PtlNIFini(ksocknal_ni); + + ptl_unregister_nal(SOCKNAL); +} + +int __init +ksocknal_module_init (void) +{ + int rc; + /* packet descriptor must fit in a router descriptor's scratchpad */ + LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); + /* the following must be sizeof(int) for proc_dointvec() */ + LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int)); +#if SOCKNAL_ZC + LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int)); +#endif + /* check ksnr_connected/connecting field large enough */ + LASSERT(SOCKNAL_CONN_NTYPES <= 4); + + ksocknal_api.startup = ksocknal_api_startup; + ksocknal_api.forward = ksocknal_api_forward; + ksocknal_api.shutdown = ksocknal_api_shutdown; + ksocknal_api.lock = ksocknal_api_lock; + ksocknal_api.unlock = ksocknal_api_unlock; + ksocknal_api.nal_data = &ksocknal_data; + + ksocknal_lib.nal_data = &ksocknal_data; + + /* Initialise dynamic tunables to defaults once only */ + ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; + ksocknal_tunables.ksnd_eager_ack = SOCKNAL_EAGER_ACK; + ksocknal_tunables.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; + ksocknal_tunables.ksnd_min_bulk = SOCKNAL_MIN_BULK; +#if SOCKNAL_ZC + ksocknal_tunables.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; +#endif + + rc = ptl_register_nal(SOCKNAL, &ksocknal_api); + if (rc != PTL_OK) { + CERROR("Can't register SOCKNAL: %d\n", rc); + return (-ENOMEM); /* or something... */ + } + + /* Pure gateways want the NAL started up at module load time... */ + rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + ptl_unregister_nal(SOCKNAL); + return (-ENODEV); + } + +#ifdef CONFIG_SYSCTL + /* Press on regardless even if registering sysctl doesn't work */ + ksocknal_tunables.ksnd_sysctl = + register_sysctl_table (ksocknal_top_ctl_table, 0); +#endif return (0); } @@ -1752,4 +1840,3 @@ MODULE_LICENSE("GPL"); module_init(ksocknal_module_init); module_exit(ksocknal_module_fini); -EXPORT_SYMBOL (ksocknal_ni); diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h index 218807c..9be781f 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/linux/lustre_export.h @@ -26,7 +26,7 @@ struct mds_export_data { struct osc_creator { spinlock_t oscc_lock; struct list_head oscc_list; - struct obd_export *oscc_exp; + struct obd_device *oscc_obd; obd_id oscc_last_id;//last available pre-created object obd_id oscc_next_id;// what object id to give out next int oscc_initial_create_count; @@ -38,10 +38,6 @@ struct osc_creator { wait_queue_head_t oscc_waitq; /* creating procs wait on this */ }; -struct osc_export_data { - struct osc_creator oed_oscc; -}; - struct ldlm_export_data { struct list_head led_held_locks; /* protected by namespace lock */ }; @@ -83,14 +79,12 @@ struct obd_export { struct mds_export_data eu_mds_data; struct filter_export_data eu_filter_data; struct ec_export_data eu_ec_data; - struct osc_export_data eu_osc_data; } u; }; #define exp_mds_data u.eu_mds_data #define exp_lov_data u.eu_lov_data #define exp_filter_data u.eu_filter_data -#define exp_osc_data u.eu_osc_data #define exp_ec_data u.eu_ec_data extern struct obd_export *class_conn2export(struct lustre_handle *conn); diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 3f3421a..40e9914 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -28,6 +28,7 @@ #ifdef __KERNEL__ #include +#include typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, void *data, int error); @@ -41,10 +42,11 @@ struct fsfilt_operations { struct list_head fs_list; struct module *fs_owner; char *fs_type; - void *(* fs_start)(struct inode *inode, int op, void *desc_private); + void *(* fs_start)(struct inode *inode, int op, void *desc_private, + int logs); void *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso, int niocount, struct niobuf_local *nb, - void *desc_private); + void *desc_private, int logs); int (* fs_commit)(struct inode *inode, void *handle,int force_sync); int (* fs_commit_async)(struct inode *inode, void *handle, void **wait_handle); @@ -72,6 +74,7 @@ struct fsfilt_operations { int force_sync); int (* fs_read_record)(struct file *, void *, int size, loff_t *); int (* fs_setup)(struct super_block *sb); + int (* fs_get_op_len)(int, struct fsfilt_objinfo *, int); }; extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops); @@ -88,24 +91,23 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); #define FSFILT_OP_MKNOD 7 #define FSFILT_OP_SETATTR 8 #define FSFILT_OP_LINK 9 -#define FSFILT_OP_CREATE_LOG 10 -#define FSFILT_OP_UNLINK_LOG 11 -#define FSFILT_OP_CANCEL_UNLINK_LOG 12 +#define FSFILT_OP_CANCEL_UNLINK 10 -static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode, - int op, struct obd_trans_info *oti) +static inline void *fsfilt_start_log(struct obd_device *obd, + struct inode *inode, int op, + struct obd_trans_info *oti, int logs) { unsigned long now = jiffies; void *parent_handle = oti ? oti->oti_handle : NULL; - void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle); - CDEBUG(D_INFO, "started handle %p (%p)\n", handle, parent_handle); + void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle, logs); + CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle); if (oti != NULL) { if (parent_handle == NULL) { oti->oti_handle = handle; } else if (handle != parent_handle) { CERROR("mismatch: parent %p, handle %p, oti %p\n", - parent_handle, handle, oti->oti_handle); + parent_handle, handle, oti); LBUG(); } } @@ -114,17 +116,22 @@ static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode, return handle; } -static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, - struct fsfilt_objinfo *fso, int niocount, - struct niobuf_local *nb, - struct obd_trans_info *oti) +static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode, + int op, struct obd_trans_info *oti) +{ + return fsfilt_start_log(obd, inode, op, oti, 0); +} + +static inline void *fsfilt_brw_start_log(struct obd_device *obd, + int objcount, + struct fsfilt_objinfo *fso, + int niocount, struct niobuf_local *nb, + struct obd_trans_info *oti, int logs) { unsigned long now = jiffies; void *parent_handle = oti ? oti->oti_handle : NULL; - void *handle; - - handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount, nb, - parent_handle); + void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount, nb, + parent_handle, logs); CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle); if (oti != NULL) { @@ -132,41 +139,53 @@ static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, oti->oti_handle = handle; } else if (handle != parent_handle) { CERROR("mismatch: parent %p, handle %p, oti %p\n", - parent_handle, handle, oti->oti_handle); + parent_handle, handle, oti); LBUG(); } } if (time_after(jiffies, now + 15 * HZ)) CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + return handle; } +static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, + struct fsfilt_objinfo *fso, int niocount, + struct niobuf_local *nb, + struct obd_trans_info *oti) +{ + return fsfilt_brw_start_log(obd, objcount, fso, niocount, nb, oti, 0); +} + static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, void *handle, int force_sync) { unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync); - CDEBUG(D_INFO, "committing handle %p\n", handle); + CDEBUG(D_HA, "committing handle %p\n", handle); + if (time_after(jiffies, now + 15 * HZ)) CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + return rc; } static inline int fsfilt_commit_async(struct obd_device *obd, - struct inode *inode, - void *handle, - void **wait_handle) + struct inode *inode, void *handle, + void **wait_handle) { unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle); + CDEBUG(D_HA, "committing handle %p (async)\n", *wait_handle); if (time_after(jiffies, now + 15 * HZ)) CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + return rc; } -static inline int fsfilt_commit_wait(struct obd_device *obd, struct inode *inode, - void *handle) +static inline int fsfilt_commit_wait(struct obd_device *obd, + struct inode *inode, void *handle) { unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit_wait(inode, handle); @@ -217,8 +236,8 @@ static inline int fsfilt_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, void *handle, fsfilt_cb_t cb_func, void *cb_data) { - return obd->obd_fsops->fs_add_journal_cb(obd, last_rcvd, handle, - cb_func, cb_data); + return obd->obd_fsops->fs_add_journal_cb(obd, last_rcvd, + handle, cb_func, cb_data); } /* very similar to obd_statfs(), but caller already holds obd_osfs_lock */ diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index c29644c..ee4883d 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -198,9 +198,10 @@ static struct lprocfs_vars lprocfs_module_vars[] = { }; struct file_operations lov_proc_target_fops = { - .open = lov_target_seq_open, - .read = seq_read, - .llseek = seq_lseek, + .owner = THIS_MODULE, + .open = lov_target_seq_open, + .read = seq_read, + .llseek = seq_lseek, .release = seq_release, }; diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 6fae59d..8f64926 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -34,10 +34,47 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; #else +static int lprocfs_filter_rd_groups(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + *eof = 1; + return snprintf(page, count, "%u\n", FILTER_GROUPS); +} + +static int lprocfs_filter_rd_tot_dirty(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + + LASSERT(obd != NULL); + *eof = 1; + return snprintf(page, count, LPU64"\n", obd->u.filter.fo_tot_dirty); +} + +static int lprocfs_filter_rd_tot_granted(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + + LASSERT(obd != NULL); + *eof = 1; + return snprintf(page, count, LPU64"\n", obd->u.filter.fo_tot_granted); +} + +static int lprocfs_filter_rd_tot_pending(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + + LASSERT(obd != NULL); + *eof = 1; + return snprintf(page, count, LPU64"\n", obd->u.filter.fo_tot_pending); +} + static int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count, int *eof, void *data) { - struct obd_device* obd = (struct obd_device *)data; + struct obd_device *obd = (struct obd_device *)data; LASSERT(obd != NULL); LASSERT(obd->u.filter.fo_vfsmnt->mnt_devname); @@ -92,10 +129,14 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "filestotal", lprocfs_rd_filestotal, 0, 0 }, { "filesfree", lprocfs_rd_filesfree, 0, 0 }, - //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, + { "filegroups", lprocfs_filter_rd_groups, 0, 0 }, { "fstype", lprocfs_rd_fstype, 0, 0 }, { "mntdev", lprocfs_filter_rd_mntdev, 0, 0 }, { "last_id", lprocfs_filter_rd_last_id,0, 0 }, + { "tot_dirty", lprocfs_filter_rd_tot_dirty, 0, 0 }, + { "tot_pending", lprocfs_filter_rd_tot_pending, 0, 0 }, + { "tot_granted", lprocfs_filter_rd_tot_granted, 0, 0 }, + { "num_exports", lprocfs_rd_num_exports, 0, 0 }, { "readcache_max_filesize", lprocfs_filter_rd_readcache, lprocfs_filter_wr_readcache, 0 }, @@ -137,7 +178,7 @@ void filter_tally_write(struct filter_obd *filter, struct page **pages, lprocfs_oh_tally(&filter->fo_w_discont_blocks, discont_blocks); } -void filter_tally_read(struct filter_obd *filter, struct niobuf_local *lnb, +void filter_tally_read(struct filter_obd *filter, struct niobuf_local *lnb, int niocount) { struct niobuf_local *end; @@ -156,7 +197,7 @@ void filter_tally_read(struct filter_obd *filter, struct niobuf_local *lnb, /* XXX not so smart for now */ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) if ((page->buffers && last_page->buffers) && - (page->buffers->b_blocknr != + (page->buffers->b_blocknr != (last_page->buffers->b_blocknr + 1))) discont_blocks++; #else @@ -202,9 +243,9 @@ static int filter_brw_stats_seq_show(struct seq_file *seq, void *v) unsigned long w = filter->fo_w_pages.oh_buckets[i]; read_cum += r; write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - 1 << i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", + 1 << i, r, pct(r, read_tot), + pct(read_cum, read_tot), w, pct(w, write_tot), pct(write_cum, write_tot)); if (read_cum == read_tot && write_cum == write_tot) @@ -226,9 +267,9 @@ static int filter_brw_stats_seq_show(struct seq_file *seq, void *v) unsigned long w = filter->fo_w_discont_pages.oh_buckets[i]; read_cum += r; write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", + i, r, pct(r, read_tot), + pct(read_cum, read_tot), w, pct(w, write_tot), pct(write_cum, write_tot)); if (read_cum == read_tot && write_cum == write_tot) @@ -249,9 +290,9 @@ static int filter_brw_stats_seq_show(struct seq_file *seq, void *v) unsigned long w = filter->fo_w_discont_blocks.oh_buckets[i]; read_cum += r; write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", + i, r, pct(r, read_tot), + pct(read_cum, read_tot), w, pct(w, write_tot), pct(write_cum, write_tot)); if (read_cum == read_tot && write_cum == write_tot) @@ -288,7 +329,7 @@ static int filter_brw_stats_seq_open(struct inode *inode, struct file *file) struct proc_dir_entry *dp = PDE(inode); struct seq_file *seq; int rc; - + rc = seq_open(file, &filter_brw_stats_seq_sops); if (rc) return rc; @@ -315,6 +356,7 @@ static ssize_t filter_brw_stats_seq_write(struct file *file, const char *buf, } struct file_operations filter_brw_stats_fops = { + .owner = THIS_MODULE, .open = filter_brw_stats_seq_open, .read = seq_read, .write = filter_brw_stats_seq_write, @@ -324,11 +366,11 @@ struct file_operations filter_brw_stats_fops = { int lproc_filter_attach_seqstat(struct obd_device *dev) { - return lprocfs_obd_seq_create(dev, "brw_stats", 0444, + return lprocfs_obd_seq_create(dev, "brw_stats", 0444, &filter_brw_stats_fops, dev); } #endif /* LPROCFS */ -LPROCFS_INIT_VARS(filter,lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(filter, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 9216ec0b..88b4d2a 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -59,7 +59,7 @@ int osc_wr_max_pages_per_rpc(struct file *file, const char *buffer, if (rc) return rc; - if (val < 1 || val > PTL_MD_MAX_PAGES) + if (val < 1 || val > PTLRPC_MAX_BRW_PAGES) return -ERANGE; spin_lock(&cli->cl_loi_list_lock); @@ -170,28 +170,21 @@ int osc_rd_create_low_wm(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *obd = data; - struct obd_export *exp; - if (obd == NULL || list_empty(&obd->obd_exports)) + if (obd == NULL) return 0; - spin_lock(&obd->obd_dev_lock); - exp = list_entry(obd->obd_exports.next, struct obd_export, - exp_obd_chain); - spin_unlock(&obd->obd_dev_lock); - return snprintf(page, count, "%d\n", - exp->exp_osc_data.oed_oscc.oscc_kick_barrier); + obd->u.cli.cl_oscc.oscc_kick_barrier); } int osc_wr_create_low_wm(struct file *file, const char *buffer, unsigned long count, void *data) { struct obd_device *obd = data; - struct obd_export *exp; int val, rc; - if (obd == NULL || list_empty(&obd->obd_exports)) + if (obd == NULL) return 0; rc = lprocfs_write_helper(buffer, count, &val); @@ -202,9 +195,7 @@ int osc_wr_create_low_wm(struct file *file, const char *buffer, return -ERANGE; spin_lock(&obd->obd_dev_lock); - exp = list_entry(obd->obd_exports.next, struct obd_export, - exp_obd_chain); - exp->exp_osc_data.oed_oscc.oscc_kick_barrier = val; + obd->u.cli.cl_oscc.oscc_kick_barrier = val; spin_unlock(&obd->obd_dev_lock); return count; @@ -214,28 +205,21 @@ int osc_rd_create_count(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *obd = data; - struct obd_export *exp; - if (obd == NULL || list_empty(&obd->obd_exports)) + if (obd == NULL) return 0; - spin_lock(&obd->obd_dev_lock); - exp = list_entry(obd->obd_exports.next, struct obd_export, - exp_obd_chain); - spin_unlock(&obd->obd_dev_lock); - return snprintf(page, count, "%d\n", - exp->exp_osc_data.oed_oscc.oscc_grow_count); + obd->u.cli.cl_oscc.oscc_grow_count); } int osc_wr_create_count(struct file *file, const char *buffer, unsigned long count, void *data) { struct obd_device *obd = data; - struct obd_export *exp; int val, rc; - if (obd == NULL || list_empty(&obd->obd_exports)) + if (obd == NULL) return 0; rc = lprocfs_write_helper(buffer, count, &val); @@ -245,11 +229,7 @@ int osc_wr_create_count(struct file *file, const char *buffer, if (val < 0) return -ERANGE; - spin_lock(&obd->obd_dev_lock); - exp = list_entry(obd->obd_exports.next, struct obd_export, - exp_obd_chain); - exp->exp_osc_data.oed_oscc.oscc_grow_count = val; - spin_unlock(&obd->obd_dev_lock); + obd->u.cli.cl_oscc.oscc_grow_count = val; return count; } @@ -258,36 +238,24 @@ int osc_rd_prealloc_next_id(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *obd = data; - struct obd_export *exp; - if (obd == NULL || list_empty(&obd->obd_exports)) + if (obd == NULL) return 0; - spin_lock(&obd->obd_dev_lock); - exp = list_entry(obd->obd_exports.next, struct obd_export, - exp_obd_chain); - spin_unlock(&obd->obd_dev_lock); - return snprintf(page, count, LPU64"\n", - exp->exp_osc_data.oed_oscc.oscc_next_id); + obd->u.cli.cl_oscc.oscc_next_id); } int osc_rd_prealloc_last_id(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *obd = data; - struct obd_export *exp; - if (obd == NULL || list_empty(&obd->obd_exports)) + if (obd == NULL) return 0; - spin_lock(&obd->obd_dev_lock); - exp = list_entry(obd->obd_exports.next, struct obd_export, - exp_obd_chain); - spin_unlock(&obd->obd_dev_lock); - return snprintf(page, count, LPU64"\n", - exp->exp_osc_data.oed_oscc.oscc_last_id); + obd->u.cli.cl_oscc.oscc_last_id); } static struct lprocfs_vars lprocfs_obd_vars[] = { @@ -338,7 +306,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) rpcs = cli->cl_brw_in_flight; r = cli->cl_pending_r_pages; w = cli->cl_pending_w_pages; - + seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n", now.tv_sec, now.tv_usec); seq_printf(seq, "RPCs in flight: %d\n", rpcs); @@ -359,9 +327,9 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) unsigned long w = cli->cl_write_page_hist.oh_buckets[i]; read_cum += r; write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - 1 << i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", + 1 << i, r, pct(r, read_tot), + pct(read_cum, read_tot), w, pct(w, write_tot), pct(write_cum, write_tot)); if (read_cum == read_tot && write_cum == write_tot) @@ -382,9 +350,9 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) unsigned long w = cli->cl_write_rpc_hist.oh_buckets[i]; read_cum += r; write_cum += w; - seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", - i, r, pct(r, read_tot), - pct(read_cum, read_tot), w, + seq_printf(seq, "%d:\t\t%10lu %3lu %3lu | %10lu %3lu %3lu\n", + i, r, pct(r, read_tot), + pct(read_cum, read_tot), w, pct(w, write_tot), pct(write_cum, write_tot)); if (read_cum == read_tot && write_cum == write_tot) @@ -423,7 +391,7 @@ static int osc_rpc_stats_seq_open(struct inode *inode, struct file *file) struct proc_dir_entry *dp = PDE(inode); struct seq_file *seq; int rc; - + rc = seq_open(file, &osc_rpc_stats_seq_sops); if (rc) return rc; @@ -448,6 +416,7 @@ static ssize_t osc_rpc_stats_seq_write(struct file *file, const char *buf, } struct file_operations osc_rpc_stats_fops = { + .owner = THIS_MODULE, .open = osc_rpc_stats_seq_open, .read = seq_read, .write = osc_rpc_stats_seq_write, @@ -457,10 +426,9 @@ struct file_operations osc_rpc_stats_fops = { int lproc_osc_attach_seqstat(struct obd_device *dev) { - return lprocfs_obd_seq_create(dev, "rpc_stats", 0444, + return lprocfs_obd_seq_create(dev, "rpc_stats", 0444, &osc_rpc_stats_fops, dev); } - #endif /* LPROCFS */ -LPROCFS_INIT_VARS(osc,lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(osc, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index c47dcb4..32bbbec 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -25,13 +25,10 @@ #include "socknal.h" +nal_t ksocknal_api; +ksock_nal_data_t ksocknal_data; ptl_handle_ni_t ksocknal_ni; -static nal_t ksocknal_api; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -ksock_nal_data_t ksocknal_data; -#else -static ksock_nal_data_t ksocknal_data; -#endif +ksock_tunables_t ksocknal_tunables; kpr_nal_interface_t ksocknal_router_interface = { kprni_nalid: SOCKNAL, @@ -40,6 +37,7 @@ kpr_nal_interface_t ksocknal_router_interface = { kprni_notify: ksocknal_notify, }; +#ifdef CONFIG_SYSCTL #define SOCKNAL_SYSCTL 200 #define SOCKNAL_SYSCTL_TIMEOUT 1 @@ -50,21 +48,21 @@ kpr_nal_interface_t ksocknal_router_interface = { static ctl_table ksocknal_ctl_table[] = { {SOCKNAL_SYSCTL_TIMEOUT, "timeout", - &ksocknal_data.ksnd_io_timeout, sizeof (int), + &ksocknal_tunables.ksnd_io_timeout, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", - &ksocknal_data.ksnd_eager_ack, sizeof (int), + &ksocknal_tunables.ksnd_eager_ack, sizeof (int), 0644, NULL, &proc_dointvec}, #if SOCKNAL_ZC {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", - &ksocknal_data.ksnd_zc_min_frag, sizeof (int), + &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int), 0644, NULL, &proc_dointvec}, #endif {SOCKNAL_SYSCTL_TYPED, "typed", - &ksocknal_data.ksnd_typed_conns, sizeof (int), + &ksocknal_tunables.ksnd_typed_conns, sizeof (int), 0644, NULL, &proc_dointvec}, {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", - &ksocknal_data.ksnd_min_bulk, sizeof (int), + &ksocknal_tunables.ksnd_min_bulk, sizeof (int), 0644, NULL, &proc_dointvec}, { 0 } }; @@ -73,6 +71,7 @@ static ctl_table ksocknal_top_ctl_table[] = { {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, { 0 } }; +#endif int ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, @@ -88,19 +87,6 @@ ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, return PTL_OK; } -int -ksocknal_api_shutdown(nal_t *nal, int ni) -{ - return PTL_OK; -} - -void -ksocknal_api_yield(nal_t *nal) -{ - our_cond_resched(); - return; -} - void ksocknal_api_lock(nal_t *nal, unsigned long *flags) { @@ -123,19 +109,44 @@ ksocknal_api_unlock(nal_t *nal, unsigned long *flags) nal_cb->cb_sti(nal_cb,flags); } -nal_t * -ksocknal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) +int +ksocknal_api_yield(nal_t *nal, unsigned long *flags, int milliseconds) { - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); - lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); - return (&ksocknal_api); + /* NB called holding statelock */ + wait_queue_t wait; + unsigned long now = jiffies; + + CDEBUG (D_NET, "yield\n"); + + if (milliseconds == 0) { + our_cond_resched(); + return 0; + } + + init_waitqueue_entry(&wait, current); + set_current_state (TASK_INTERRUPTIBLE); + add_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait); + + ksocknal_api_unlock(nal, flags); + + if (milliseconds < 0) + schedule (); + else + schedule_timeout((milliseconds * HZ) / 1000); + + ksocknal_api_lock(nal, flags); + + remove_wait_queue (&ksocknal_data.ksnd_yield_waitq, &wait); + + if (milliseconds > 0) { + milliseconds -= ((jiffies - now) * 1000) / HZ; + if (milliseconds < 0) + milliseconds = 0; + } + + return (milliseconds); } -/* - * EXTRA functions follow - */ - int ksocknal_set_mynid(ptl_nid_t nid) { @@ -196,7 +207,7 @@ ksocknal_bind_irq (unsigned int irq) /* FIXME: Find a better method of setting IRQ affinity... */ - call_usermodehelper (argv[0], argv, envp); + USERMODEHELPER(argv[0], argv, envp); #endif } @@ -745,6 +756,9 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, ksocknal_get_peer_addr (conn); + CWARN("New conn nid:"LPX64" ip:%08x/%d incarnation:"LPX64"\n", + nid, conn->ksnc_ipaddr, conn->ksnc_port, incarnation); + irq = ksocknal_conn_irq (conn); write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); @@ -798,7 +812,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, /* Set the deadline for the outgoing HELLO to drain */ conn->ksnc_tx_deadline = jiffies + - ksocknal_data.ksnd_io_timeout * HZ; + ksocknal_tunables.ksnd_io_timeout * HZ; list_add (&conn->ksnc_list, &peer->ksnp_conns); atomic_inc (&conn->ksnc_refcount); @@ -1071,6 +1085,11 @@ ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) if (conn->ksnc_incarnation == incarnation) continue; + + CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d " + "incarnation:"LPX64"("LPX64")\n", + peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port, + conn->ksnc_incarnation, incarnation); count++; ksocknal_close_conn_locked (conn, -ESTALE); @@ -1388,6 +1407,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) void ksocknal_free_fmbs (ksock_fmb_pool_t *p) { + int npages = p->fmp_buff_pages; ksock_fmb_t *fmb; int i; @@ -1399,12 +1419,12 @@ ksocknal_free_fmbs (ksock_fmb_pool_t *p) fmb = list_entry(p->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); - for (i = 0; i < fmb->fmb_npages; i++) - if (fmb->fmb_pages[i] != NULL) - __free_page(fmb->fmb_pages[i]); - + for (i = 0; i < npages; i++) + if (fmb->fmb_kiov[i].kiov_page != NULL) + __free_page(fmb->fmb_kiov[i].kiov_page); + list_del(&fmb->fmb_list); - PORTAL_FREE(fmb, sizeof(*fmb)); + PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages])); } } @@ -1426,30 +1446,34 @@ ksocknal_free_buffers (void) } void -ksocknal_module_fini (void) +ksocknal_api_shutdown (nal_t *nal) { int i; + if (nal->nal_refct != 0) { + /* This module got the first ref */ + PORTAL_MODULE_UNUSE; + return; + } + CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", atomic_read (&portal_kmemory)); + LASSERT(nal == &ksocknal_api); + switch (ksocknal_data.ksnd_init) { default: LASSERT (0); case SOCKNAL_INIT_ALL: -#if CONFIG_SYSCTL - if (ksocknal_data.ksnd_sysctl != NULL) - unregister_sysctl_table (ksocknal_data.ksnd_sysctl); -#endif - kportal_nal_unregister(SOCKNAL); - PORTAL_SYMBOL_UNREGISTER (ksocknal_ni); + libcfs_nal_cmd_unregister(SOCKNAL); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; /* fall through */ - case SOCKNAL_INIT_PTL: + case SOCKNAL_INIT_LIB: /* No more calls to ksocknal_cmd() to create new * autoroutes/connections since we're being unloaded. */ - PtlNIFini(ksocknal_ni); /* Delete all autoroute entries */ ksocknal_del_route(PTL_NID_ANY, 0, 0, 0); @@ -1470,6 +1494,8 @@ ksocknal_module_fini (void) /* Tell lib we've stopped calling into her. */ lib_fini(&ksocknal_lib); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; /* fall through */ case SOCKNAL_INIT_DATA: @@ -1517,6 +1543,8 @@ ksocknal_module_fini (void) kpr_deregister (&ksocknal_data.ksnd_router); ksocknal_free_buffers(); + + ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; /* fall through */ case SOCKNAL_INIT_NOTHING: @@ -1531,7 +1559,7 @@ ksocknal_module_fini (void) } -void __init +void ksocknal_init_incarnation (void) { struct timeval tv; @@ -1547,43 +1575,31 @@ ksocknal_init_incarnation (void) (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; } -int __init -ksocknal_module_init (void) +int +ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, + ptl_ni_limits_t *requested_limits, + ptl_ni_limits_t *actual_limits) { - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; + ptl_process_id_t process_id; + int pkmem = atomic_read(&portal_kmemory); + int rc; + int i; + int j; - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - /* the following must be sizeof(int) for proc_dointvec() */ - LASSERT(sizeof (ksocknal_data.ksnd_io_timeout) == sizeof (int)); - LASSERT(sizeof (ksocknal_data.ksnd_eager_ack) == sizeof (int)); - /* check ksnr_connected/connecting field large enough */ - LASSERT(SOCKNAL_CONN_NTYPES <= 4); - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); + LASSERT (nal == &ksocknal_api); - ksocknal_api.forward = ksocknal_api_forward; - ksocknal_api.shutdown = ksocknal_api_shutdown; - ksocknal_api.yield = ksocknal_api_yield; - ksocknal_api.validate = NULL; /* our api validate is a NOOP */ - ksocknal_api.lock = ksocknal_api_lock; - ksocknal_api.unlock = ksocknal_api_unlock; - ksocknal_api.nal_data = &ksocknal_data; + if (nal->nal_refct != 0) { + if (actual_limits != NULL) + *actual_limits = ksocknal_lib.ni.actual_limits; + /* This module got the first ref */ + PORTAL_MODULE_USE; + return (PTL_OK); + } - ksocknal_lib.nal_data = &ksocknal_data; + LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - ksocknal_data.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; - ksocknal_data.ksnd_eager_ack = SOCKNAL_EAGER_ACK; - ksocknal_data.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; - ksocknal_data.ksnd_min_bulk = SOCKNAL_MIN_BULK; -#if SOCKNAL_ZC - ksocknal_data.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; -#endif ksocknal_init_incarnation(); ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; @@ -1599,14 +1615,17 @@ ksocknal_module_init (void) ksocknal_data.ksnd_nal_cb = &ksocknal_lib; spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock); - + init_waitqueue_head(&ksocknal_data.ksnd_yield_waitq); + spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock); INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns); + ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES; spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); + ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES; spin_lock_init (&ksocknal_data.ksnd_reaper_lock); INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); @@ -1627,7 +1646,7 @@ ksocknal_module_init (void) PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); if (ksocknal_data.ksnd_schedulers == NULL) { - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } @@ -1643,15 +1662,19 @@ ksocknal_module_init (void) init_waitqueue_head (&kss->kss_waitq); } - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - ksocknal_module_fini (); + /* NB we have to wait to be told our true NID... */ + process_id.pid = 0; + process_id.nid = 0; + + rc = lib_init(&ksocknal_lib, process_id, + requested_limits, actual_limits); + if (rc != PTL_OK) { + CERROR("lib_init failed: error %d\n", rc); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PtlNIDebug(ksocknal_ni, ~0); - ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called + ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called for (i = 0; i < SOCKNAL_N_SCHED; i++) { rc = ksocknal_thread_start (ksocknal_scheduler, @@ -1659,7 +1682,7 @@ ksocknal_module_init (void) if (rc != 0) { CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1668,7 +1691,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i)); if (rc != 0) { CERROR("Can't spawn socknal autoconnectd: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } } @@ -1676,7 +1699,7 @@ ksocknal_module_init (void) rc = ksocknal_thread_start (ksocknal_reaper, NULL); if (rc != 0) { CERROR ("Can't spawn socknal reaper: %d\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } @@ -1686,62 +1709,127 @@ ksocknal_module_init (void) CDEBUG(D_NET, "Can't initialise routing interface " "(rc = %d): not routing\n", rc); } else { - /* Only allocate forwarding buffers if I'm on a gateway */ + /* Only allocate forwarding buffers if there's a router */ for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb; + ksock_fmb_t *fmb; + ksock_fmb_pool_t *pool; - PORTAL_ALLOC(fmb, sizeof(*fmb)); + + if (i < SOCKNAL_SMALL_FWD_NMSGS) + pool = &ksocknal_data.ksnd_small_fmp; + else + pool = &ksocknal_data.ksnd_large_fmp; + + PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, + fmb_kiov[pool->fmp_buff_pages])); if (fmb == NULL) { - ksocknal_module_fini(); + ksocknal_api_shutdown(&ksocknal_api); return (-ENOMEM); } - if (i < SOCKNAL_SMALL_FWD_NMSGS) { - fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; - fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp; - } else { - fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES; - fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; - } - - for (j = 0; j < fmb->fmb_npages; j++) { - fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); + fmb->fmb_pool = pool; + + for (j = 0; j < pool->fmp_buff_pages; j++) { + fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL); - if (fmb->fmb_pages[j] == NULL) { - ksocknal_module_fini (); + if (fmb->fmb_kiov[j].kiov_page == NULL) { + ksocknal_api_shutdown (&ksocknal_api); return (-ENOMEM); } - LASSERT(page_address(fmb->fmb_pages[j]) != NULL); + LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL); } - list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); + list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs); } } - rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL); + rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL); if (rc != 0) { CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_module_fini (); + ksocknal_api_shutdown (&ksocknal_api); return (rc); } - PORTAL_SYMBOL_REGISTER(ksocknal_ni); - -#ifdef CONFIG_SYSCTL - /* Press on regardless even if registering sysctl doesn't work */ - ksocknal_data.ksnd_sysctl = register_sysctl_table (ksocknal_top_ctl_table, 0); -#endif /* flag everything initialised */ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; printk(KERN_INFO "Lustre: Routing socket NAL loaded " - "(Routing %s, initial mem %d)\n", + "(Routing %s, initial mem %d, incarnation "LPX64")\n", kpr_routing (&ksocknal_data.ksnd_router) ? - "enabled" : "disabled", pkmem); + "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation); + + return (0); +} + +void __exit +ksocknal_module_fini (void) +{ +#ifdef CONFIG_SYSCTL + if (ksocknal_tunables.ksnd_sysctl != NULL) + unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); +#endif + PtlNIFini(ksocknal_ni); + + ptl_unregister_nal(SOCKNAL); +} + +int __init +ksocknal_module_init (void) +{ + int rc; + /* packet descriptor must fit in a router descriptor's scratchpad */ + LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); + /* the following must be sizeof(int) for proc_dointvec() */ + LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int)); + LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int)); +#if SOCKNAL_ZC + LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int)); +#endif + /* check ksnr_connected/connecting field large enough */ + LASSERT(SOCKNAL_CONN_NTYPES <= 4); + + ksocknal_api.startup = ksocknal_api_startup; + ksocknal_api.forward = ksocknal_api_forward; + ksocknal_api.shutdown = ksocknal_api_shutdown; + ksocknal_api.lock = ksocknal_api_lock; + ksocknal_api.unlock = ksocknal_api_unlock; + ksocknal_api.nal_data = &ksocknal_data; + + ksocknal_lib.nal_data = &ksocknal_data; + + /* Initialise dynamic tunables to defaults once only */ + ksocknal_tunables.ksnd_io_timeout = SOCKNAL_IO_TIMEOUT; + ksocknal_tunables.ksnd_eager_ack = SOCKNAL_EAGER_ACK; + ksocknal_tunables.ksnd_typed_conns = SOCKNAL_TYPED_CONNS; + ksocknal_tunables.ksnd_min_bulk = SOCKNAL_MIN_BULK; +#if SOCKNAL_ZC + ksocknal_tunables.ksnd_zc_min_frag = SOCKNAL_ZC_MIN_FRAG; +#endif + + rc = ptl_register_nal(SOCKNAL, &ksocknal_api); + if (rc != PTL_OK) { + CERROR("Can't register SOCKNAL: %d\n", rc); + return (-ENOMEM); /* or something... */ + } + + /* Pure gateways want the NAL started up at module load time... */ + rc = PtlNIInit(SOCKNAL, 0, NULL, NULL, &ksocknal_ni); + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { + ptl_unregister_nal(SOCKNAL); + return (-ENODEV); + } + +#ifdef CONFIG_SYSCTL + /* Press on regardless even if registering sysctl doesn't work */ + ksocknal_tunables.ksnd_sysctl = + register_sysctl_table (ksocknal_top_ctl_table, 0); +#endif return (0); } @@ -1752,4 +1840,3 @@ MODULE_LICENSE("GPL"); module_init(ksocknal_module_init); module_exit(ksocknal_module_fini); -EXPORT_SYMBOL (ksocknal_ni); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 9380a2d..4212cab 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -14,6 +14,7 @@ PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH LUSTRE=${LUSTRE:-`dirname $0`/..} RLUSTRE=${RLUSTRE:-$LUSTRE} +MOUNTLUSTRE=${MOUNTLUSTRE:-/sbin/mount.lustre} . $LUSTRE/tests/test-framework.sh @@ -47,7 +48,7 @@ start_mds() { } stop_mds() { echo "stop mds service on `facet_active_host mds`" - stop mds $@ || return 97 + stop mds $@ || return 97 } start_ost() { @@ -57,7 +58,7 @@ start_ost() { stop_ost() { echo "stop ost service on `facet_active_host ost`" - stop ost $@ || return 98 + stop ost $@ || return 98 } mount_client() { @@ -80,7 +81,7 @@ manual_umount_client(){ setup() { start_ost start_mds - mount_client $MOUNT + mount_client $MOUNT } cleanup() { @@ -88,7 +89,7 @@ cleanup() { stop_mds || return 201 stop_ost || return 202 # catch case where these return just fine, but modules are still not unloaded - /sbin/lsmod | grep -q portals + /sbin/lsmod | grep -q portals if [ 1 -ne $? ]; then echo "modules still loaded..." return 203 @@ -119,7 +120,7 @@ gen_config test_0() { start_ost start_mds - mount_client $MOUNT + mount_client $MOUNT check_mount || return 41 cleanup || return $? } @@ -128,7 +129,7 @@ run_test 0 "single mount setup" test_1() { start_ost echo "start ost second time..." - start ost --reformat $OSTLCONFARGS + start ost --reformat $OSTLCONFARGS start_mds mount_client $MOUNT check_mount || return 42 @@ -140,16 +141,16 @@ test_2() { start_ost start_mds echo "start mds second time.." - start mds --reformat $MDSLCONFARGS + start mds --reformat $MDSLCONFARGS - mount_client $MOUNT + mount_client $MOUNT check_mount || return 43 cleanup || return $? } run_test 2 "start up mds twice" test_3() { - setup + setup mount_client $MOUNT check_mount || return 44 @@ -163,7 +164,7 @@ test_4() { setup touch $DIR/$tfile || return 85 stop_ost --force - cleanup + cleanup eno=$? # ok for ost to fail shutdown if [ 202 -ne $eno ]; then @@ -178,8 +179,8 @@ test_5() { touch $DIR/$tfile || return 1 stop_mds --force || return 2 - # cleanup may return an error from the failed - # disconnects; for now I'll consider this successful + # cleanup may return an error from the failed + # disconnects; for now I'll consider this successful # if all the modules have unloaded. umount $MOUNT & UMOUNT_PID=$! @@ -187,10 +188,10 @@ test_5() { echo "killing umount" kill -TERM $UMOUNT_PID echo "waiting for umount to finish" - wait $UMOUNT_PID + wait $UMOUNT_PID # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null + $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null # stop_mds is a no-op here, and should not fail stop_mds || return 4 @@ -207,11 +208,11 @@ test_5b() { stop_mds [ -d $MOUNT ] || mkdir -p $MOUNT - $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null + $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null llmount $mds_HOST://mds_svc/client_facet $MOUNT && exit 1 # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null + $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null # stop_mds is a no-op here, and should not fail stop_mds || return 2 @@ -228,11 +229,11 @@ test_5c() { start_mds [ -d $MOUNT ] || mkdir -p $MOUNT - $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null + $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null llmount $mds_HOST://wrong_mds_svc/client_facet $MOUNT && exit 1 # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null + $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null stop_mds || return 2 stop_ost || return 3 @@ -263,12 +264,12 @@ test_8() { start_ost start_mds - mount_client $MOUNT - mount_client $MOUNT2 + mount_client $MOUNT + mount_client $MOUNT2 check_mount2 || return 45 umount $MOUNT - umount_client $MOUNT2 + umount_client $MOUNT2 stop_mds stop_ost @@ -279,7 +280,7 @@ test_9() { # backup the old values of PTLDEBUG and SUBSYSTEM OLDPTLDEBUG=$PTLDEBUG OLDSUBSYSTEM=$SUBSYSTEM - + # generate new configuration file with lmc --ptldebug and --subsystem PTLDEBUG="trace" SUBSYSTEM="mdc" @@ -340,20 +341,36 @@ test_9() { run_test 9 "test --ptldebug and --subsystem for lmc and lconf" test_10() { + echo "generate configuration with the same name for node and mds" OLDXMLCONFIG=$XMLCONFIG XMLCONFIG="broken.xml" [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG - SAMENAME="mds1" - do_lmc --add node --node $SAMENAME - do_lmc --add net --node $SAMENAME --nid $SAMENAME --nettype tcp - do_lmc --add mds --node $SAMENAME --mds $SAMENAME --nid $SAMENAME \ - --fstype ext3 --dev /dev/mds1 || return $? - do_lmc --add lov --lov lov1 --mds $SAMENAME --stripe_sz 65536 \ - --stripe_cnt 1 --stripe_pattern 0 || return $? + facet="mds" + rm -f ${facet}active + add_facet $facet + echo "the name for node and mds is the same" + do_lmc --add mds --node ${facet}_facet --mds ${facet}_facet \ + --dev $MDSDEV --size $MDSSIZE || return $? + do_lmc --add lov --mds ${facet}_facet --lov lov1 --stripe_sz \ + $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ \ + --stripe_pattern 0 || return $? + add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE + facet="client" + add_facet $facet --lustre_upcall $UPCALL + do_lmc --add mtpt --node ${facet}_facet --mds mds_facet \ + --lov lov1 --path $MOUNT + + echo "mount lustre" + start_ost + start_mds + mount_client $MOUNT + check_mount || return 41 + cleanup || return $? + echo "Success!" XMLCONFIG=$OLDXMLCONFIG } -run_test 10 "use lmc with the same name for node and mds" +run_test 10 "mount lustre with the same name for node and mds" test_11() { OLDXMLCONFIG=$XMLCONFIG @@ -364,7 +381,7 @@ test_11() { add_ost ost --dev $OSTDEV --size $OSTSIZE add_client client mds --path $MOUNT --ost ost_svc || return $? echo "Default lov config success!" - + [ -f "$XMLCONFIG" ] && rm -f $XMLCONFIG add_mds mds --dev $MDSDEV --size $MDSSIZE add_ost ost --dev $OSTDEV --size $OSTSIZE @@ -394,7 +411,7 @@ test_12() { else echo "matched double quote fail" return 1 - fi + fi rm -f $XMLCONFIG rm -f $BATCHFILE echo "--add net --node localhost --nid localhost.localdomain --nettype tcp" > $BATCHFILE @@ -507,11 +524,11 @@ test_14() { add_lov lov1 mds --stripe_sz $STRIPE_BYTES\ --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 add_ost ost --lov lov1 --dev $OSTDEV --size $OSTSIZE \ - --mkfsoptions "-Llabel_conf_15" + --mkfsoptions "-Llabel_conf_14" add_client client mds --lov lov1 --path $MOUNT FOUNDSTRING=`awk -F"<" '//{print $2}' $XMLCONFIG` - EXPECTEDSTRING="mkfsoptions>-Llabel_conf_15" + EXPECTEDSTRING="mkfsoptions>-Llabel_conf_14" if [ $EXPECTEDSTRING != $FOUNDSTRING ]; then echo "Error: expected: $EXPECTEDSTRING; found: $FOUNDSTRING" return 1 @@ -523,7 +540,7 @@ test_14() { start_ost start_mds mount_client $MOUNT || return $? - if [ -z "`dumpe2fs -h $OSTDEV | grep label_conf_15`" ]; then + if [ -z "`dumpe2fs -h $OSTDEV | grep label_conf_14`" ]; then echo "Error: the mkoptions not applied to mke2fs of ost." return 1 fi @@ -534,4 +551,153 @@ test_14() { } run_test 14 "test mkfsoptions of ost for lmc and lconf" +cleanup_15() { + trap 0 + [ -f $MOUNTLUSTRE ] && echo "remove $MOUNTLUSTRE" && rm -f $MOUNTLUSTRE + if [ -f $MOUNTLUSTRE.sav ]; then + echo "return original $MOUNTLUSTRE.sav to $MOUNTLUSTRE" + mv $MOUNTLUSTRE.sav $MOUNTLUSTRE + fi +} + +test_15() { + start_ost + start_mds + echo "mount lustre on ${MOUNT} with $MOUNTLUSTRE....." + if [ -f "$MOUNTLUSTRE" ]; then + echo "save $MOUNTLUSTRE to $MOUNTLUSTRE.sav" + mv $MOUNTLUSTRE $MOUNTLUSTRE.sav + fi + [ -f "$MOUNTLUSTRE" ] && echo "can't move $MOUNTLUSTRE" && return 40 + trap cleanup_15 EXIT INT + [ ! `cp $LUSTRE/utils/llmount $MOUNTLUSTRE` ] || return $? + do_node `hostname` mkdir -p $MOUNT 2> /dev/null + # load llite module on the client if it isn't in /lib/modules + do_node `hostname` lconf --nosetup --node client_facet $XMLCONFIG + do_node `hostname` mount -t lustre -o nettype=$NETTYPE \ + `facet_active_host mds`:/mds_svc/client_facet $MOUNT ||return $? + echo "mount lustre on $MOUNT with $MOUNTLUSTRE: success" + [ -d /r ] && $LCTL modules > /r/tmp/ogdb-`hostname` + check_mount || return 41 + do_node `hostname` umount $MOUNT + + [ -f "$MOUNTLUSTRE" ] && rm -f $MOUNTLUSTRE + echo "mount lustre on ${MOUNT} without $MOUNTLUSTRE....." + do_node `hostname` mount -t lustre -o nettype=$NETTYPE \ + `facet_active_host mds`:/mds_svc/client_facet $MOUNT &&return $? + echo "mount lustre on $MOUNT without $MOUNTLUSTRE failed as expected" + cleanup || return $? + cleanup_15 +} +run_test 15 "zconf-mount without /sbin/mount.lustre (should return error)" + +test_16() { + TMPMTPT="/mnt/conf16" + + if [ ! -f "$MDSDEV" ]; then + echo "no $MDSDEV existing, so mount Lustre to create one" + start_ost + start_mds + mount_client $MOUNT + check_mount || return 41 + cleanup || return $? + fi + + echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555" + [ -d $TMPMTPT ] || mkdir -p $TMPMTPT + mount -o loop -t ext3 $MDSDEV $TMPMTPT || return $? + chmod 555 $TMPMTPT/OBJECTS || return $? + chmod 555 $TMPMTPT/LOGS || return $? + chmod 555 $TMPMTPT/PENDING || return $? + umount $TMPMTPT || return $? + + echo "mount Lustre to change the mode of OBJECTS/LOGS/PENDING, then umount Lustre" + start_ost + start_mds + mount_client $MOUNT + check_mount || return 41 + cleanup || return $? + + echo "read the mode of OBJECTS/LOGS/PENDING and check if they has been changed properly" + EXPECTEDOBJECTSMODE=`debugfs -R "stat OBJECTS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'` + EXPECTEDLOGSMODE=`debugfs -R "stat LOGS" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'` + EXPECTEDPENDINGMODE=`debugfs -R "stat PENDING" $MDSDEV 2> /dev/null | awk '/Mode: /{print $6}'` + + if [ $EXPECTEDOBJECTSMODE = "0777" ]; then + echo "Success:Lustre change the mode of OBJECTS correctly" + else + echo "Error: Lustre does not change the mode of OBJECTS properly" + return 1 + fi + + if [ $EXPECTEDLOGSMODE = "0777" ]; then + echo "Success:Lustre change the mode of LOGS correctly" + else + echo "Error: Lustre does not change the mode of LOGS properly" + return 1 + fi + + if [ $EXPECTEDPENDINGMODE = "0777" ]; then + echo "Success:Lustre change the mode of PENDING correctly" + else + echo "Error: Lustre does not change the mode of PENDING properly" + return 1 + fi +} +run_test 16 "verify that lustre will correct the mode of OBJECTS/LOGS/PENDING" + +test_17() { + TMPMTPT="/mnt/conf17" + + if [ ! -f "$MDSDEV" ]; then + echo "no $MDSDEV existing, so mount Lustre to create one" + start_ost + start_mds + mount_client $MOUNT + check_mount || return 41 + cleanup || return $? + fi + + echo "Remove mds config log" + [ -d $TMPMTPT ] || mkdir -p $TMPMTPT + mount -o loop -t ext3 $MDSDEV $TMPMTPT || return $? + rm -f $TMPMTPT/LOGS/mds_svc || return $? + umount $TMPMTPT || return $? + + start_ost + start mds $MDSLCONFARGS && return 42 + cleanup || return $? +} +run_test 17 "Verify failed mds_postsetup won't fail assertion (2936)" + +test_18() { + [ -f $MDSDEV ] && echo "remove $MDSDEV" && rm -f $MDSDEV + echo "mount mds with large journal..." + OLDMDSSIZE=$MDSSIZE + MDSSIZE=2000000 + gen_config + + echo "mount lustre system..." + start_ost + start_mds + mount_client $MOUNT + check_mount || return 41 + + echo "check journal size..." + FOUNDJOURNALSIZE=`debugfs -R "stat <8>" $MDSDEV | awk '/Size: / { print $6; exit;}'` + if [ $FOUNDJOURNALSIZE = "79691776" ]; then + echo "Success:lconf creates large journals" + else + echo "Error:lconf not create large journals correctly" + echo "expected journal size: 79691776(76M), found journal size: $FOUNDJOURNALSIZE" + return 1 + fi + + cleanup || return $? + + MDSSIZE=$OLDMDSSIZE + gen_config +} +run_test 18 "check lconf creates large journals" + equals_msg "Done" -- 1.8.3.1