X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fptllnd%2Fptllnd.h;h=49b90d3d8bf1b0ef4dfb81ab9a8d8f958c703845;hb=5e1e6a6756d3b4ca19a0d7e0defcf974dbfed13c;hp=7243a6b6ba277378618a6c7896fdcbfc5cb98c48;hpb=ed88907a96ba81d3558e71ade9def98bdc785169;p=fs%2Flustre-release.git diff --git a/lnet/klnds/ptllnd/ptllnd.h b/lnet/klnds/ptllnd/ptllnd.h index 7243a6b..49b90d3 100755 --- a/lnet/klnds/ptllnd/ptllnd.h +++ b/lnet/klnds/ptllnd/ptllnd.h @@ -1,26 +1,49 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner + * GPL HEADER START * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/klnds/ptllnd/ptllnd.h + * + * Author: PJ Kirner */ #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif - +#ifndef AUTOCONF_INCLUDED #include +#endif #include #include #include @@ -50,9 +73,10 @@ #define DEBUG_SUBSYSTEM S_LND -#include +#include #include #include +#include #include #ifdef CRAY_XT3 #include @@ -65,13 +89,13 @@ */ //#define PJK_DEBUGGING -#if CONFIG_SMP +#ifdef CONFIG_SMP # define PTLLND_N_SCHED num_online_cpus() /* # schedulers */ #else # define PTLLND_N_SCHED 1 /* # schedulers */ #endif -#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peercredits)-1) +#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peertxcredits)-1) /* when eagerly to return credits */ typedef struct @@ -86,20 +110,23 @@ typedef struct int *kptl_rxb_npages; /* number of pages for rx buffer */ int *kptl_rxb_nspare; /* number of spare rx buffers */ int *kptl_credits; /* number of credits */ - int *kptl_peercredits; /* number of credits */ + int *kptl_peertxcredits; /* number of peer tx credits */ + int *kptl_peerrtrcredits; /* number of peer router credits */ int *kptl_max_msg_size; /* max immd message size*/ int *kptl_peer_hash_table_size; /* # slots in peer hash table */ int *kptl_reschedule_loops; /* scheduler yield loops */ + int *kptl_ack_puts; /* make portals ack PUTs */ #ifdef CRAY_XT3 int *kptl_ptltrace_on_timeout; /* dump pltrace on timeout? */ + int *kptl_ptltrace_on_fail; /* dump pltrace on PTL_NAL_FAILED? */ char **kptl_ptltrace_basename; /* ptltrace dump file basename */ #endif #ifdef PJK_DEBUGGING int *kptl_simulation_bitmap;/* simulation bitmap */ #endif -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *kptl_sysctl; /* sysctl interface */ +#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM + cfs_sysctl_table_header_t *kptl_sysctl; /* sysctl interface */ #endif } kptl_tunables_t; @@ -108,6 +135,7 @@ typedef struct /***********************************************************************/ typedef struct kptl_data kptl_data_t; +typedef struct kptl_net kptl_net_t; typedef struct kptl_rx_buffer kptl_rx_buffer_t; typedef struct kptl_peer kptl_peer_t; @@ -125,6 +153,7 @@ typedef struct kptl_rx /* receive message */ kptl_rx_buffer_t *rx_rxb; /* the rx buffer pointer */ kptl_msg_t *rx_msg; /* received message */ int rx_nob; /* received message size */ + unsigned long rx_treceived; /* time received */ ptl_process_id_t rx_initiator; /* sender's address */ #ifdef CRAY_XT3 ptl_uid_t rx_uid; /* sender's uid */ @@ -133,6 +162,10 @@ typedef struct kptl_rx /* receive message */ char rx_space[0]; /* copy of incoming request */ } kptl_rx_t; +#define PTLLND_POSTRX_DONT_POST 0 /* don't post */ +#define PTLLND_POSTRX_NO_CREDIT 1 /* post: no credits */ +#define PTLLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */ + typedef struct kptl_rx_buffer_pool { spinlock_t rxbp_lock; @@ -182,6 +215,7 @@ typedef struct kptl_tx /* transmit message */ enum kptl_tx_type tx_type; /* small msg/{put,get}{req,resp} */ int tx_active:1; /* queued on the peer */ int tx_idle:1; /* on the free list */ + int tx_acked:1; /* portals ACK wanted (for debug only) */ kptl_eventarg_t tx_msg_eventarg; /* event->md.user_ptr */ kptl_eventarg_t tx_rdma_eventarg; /* event->md.user_ptr */ int tx_status; /* the status of this tx descriptor */ @@ -192,8 +226,9 @@ typedef struct kptl_tx /* transmit message */ kptl_msg_t *tx_msg; /* the message data */ kptl_peer_t *tx_peer; /* the peer this is waiting on */ unsigned long tx_deadline; /* deadline */ - ptl_md_t tx_rdma_md; /* rdma buffer */ - kptl_fragvec_t *tx_rdma_frags; /* buffer fragments */ + unsigned long tx_tposted; /* time posted */ + ptl_md_t tx_rdma_md; /* rdma descriptor */ + kptl_fragvec_t *tx_frags; /* buffer fragments */ } kptl_tx_t; enum kptllnd_peer_state @@ -209,18 +244,24 @@ enum kptllnd_peer_state struct kptl_peer { struct list_head peer_list; - atomic_t peer_refcount; /* The current refrences */ + atomic_t peer_refcount; /* The current references */ enum kptllnd_peer_state peer_state; spinlock_t peer_lock; /* serialize */ + struct list_head peer_noops; /* PTLLND_MSG_TYPE_NOOP txs */ struct list_head peer_sendq; /* txs waiting for mh handles */ struct list_head peer_activeq; /* txs awaiting completion */ lnet_process_id_t peer_id; /* Peer's LNET id */ ptl_process_id_t peer_ptlid; /* Peer's portals id */ __u64 peer_incarnation; /* peer's incarnation */ + __u64 peer_myincarnation; /* my incarnation at HELLO */ int peer_sent_hello; /* have I sent HELLO? */ int peer_credits; /* number of send credits */ - int peer_outstanding_credits;/* number of peer credits */ + int peer_outstanding_credits;/* number of peer credits to return */ + int peer_sent_credits; /* #msg buffers posted for peer */ + int peer_max_msg_size; /* peer's rx buffer size */ int peer_error; /* errno on closing this peer */ + int peer_retry_noop; /* need to retry returning credits */ + int peer_check_stamp; /* watchdog check stamp */ cfs_time_t peer_last_alive; /* when (in jiffies) I was last alive */ __u64 peer_next_matchbits; /* Next value to register RDMA from peer */ __u64 peer_last_matchbits_seen; /* last matchbits used to RDMA to peer */ @@ -231,12 +272,14 @@ struct kptl_data int kptl_init; /* initialisation state */ volatile int kptl_shutdown; /* shut down? */ atomic_t kptl_nthreads; /* # live threads */ - lnet_ni_t *kptl_ni; /* _the_ LND instance */ ptl_handle_ni_t kptl_nih; /* network inteface handle */ ptl_process_id_t kptl_portals_id; /* Portals ID of interface */ __u64 kptl_incarnation; /* which one am I */ ptl_handle_eq_t kptl_eqh; /* Event Queue (EQ) */ + rwlock_t kptl_net_rw_lock; /* serialise... */ + struct list_head kptl_nets; /* kptl_net instances */ + spinlock_t kptl_sched_lock; /* serialise... */ wait_queue_head_t kptl_sched_waitq; /* schedulers sleep here */ struct list_head kptl_sched_txq; /* tx requiring attention */ @@ -244,6 +287,7 @@ struct kptl_data struct list_head kptl_sched_rxbq; /* rxb requiring reposting */ wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */ + atomic_t kptl_needs_ptltrace; /* watchdog thread to dump ptltrace */ kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */ cfs_mem_cache_t* kptl_rx_cache; /* rx descripter cache */ @@ -265,6 +309,14 @@ struct kptl_data spinlock_t kptl_ptlid2str_lock; /* serialise str ops */ }; +struct kptl_net +{ + struct list_head net_list; /* chain on kptl_data:: kptl_nets */ + lnet_ni_t *net_ni; + atomic_t net_refcount; /* # current references */ + int net_shutdown; /* lnd_shutdown called */ +}; + enum { PTLLND_INIT_NOTHING = 0, @@ -276,14 +328,12 @@ extern kptl_tunables_t kptllnd_tunables; extern kptl_data_t kptllnd_data; static inline lnet_nid_t -kptllnd_ptl2lnetnid(ptl_nid_t ptl_nid) +kptllnd_ptl2lnetnid(lnet_nid_t ni_nid, ptl_nid_t ptl_nid) { #ifdef _USING_LUSTRE_PORTALS_ - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid), - LNET_NIDADDR(ptl_nid)); + return LNET_MKNID(LNET_NIDNET(ni_nid), LNET_NIDADDR(ptl_nid)); #else - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid), - ptl_nid); + return LNET_MKNID(LNET_NIDNET(ni_nid), ptl_nid); #endif } @@ -298,9 +348,21 @@ kptllnd_lnet2ptlnid(lnet_nid_t lnet_nid) #endif } +static inline void +kptllnd_schedule_ptltrace_dump (void) +{ +#ifdef CRAY_XT3 + if (*kptllnd_tunables.kptl_ptltrace_on_fail) { + atomic_inc(&kptllnd_data.kptl_needs_ptltrace); + wake_up(&kptllnd_data.kptl_watchdog_waitq); + } +#endif +} + int kptllnd_startup(lnet_ni_t *ni); void kptllnd_shutdown(lnet_ni_t *ni); int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); +void kptllnd_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when); int kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); int kptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, unsigned int niov, @@ -317,6 +379,7 @@ void kptllnd_tunables_fini(void); const char *kptllnd_evtype2str(int evtype); const char *kptllnd_msgtype2str(int msgtype); +const char *kptllnd_errtype2str(int errtype); static inline void * kptllnd_eventarg2obj (kptl_eventarg_t *eva) @@ -390,8 +453,8 @@ kptllnd_rx_buffer_decref(kptl_rx_buffer_t *rxb) /* * RX SUPPORT FUNCTIONS */ -void kptllnd_rx_done(kptl_rx_t *rx); void kptllnd_rx_parse(kptl_rx_t *rx); +void kptllnd_rx_done(kptl_rx_t *rx, int post_credit); /* * PEER SUPPORT FUNCTIONS @@ -410,9 +473,12 @@ void kptllnd_peer_close(kptl_peer_t *peer, int why); void kptllnd_handle_closing_peers(void); int kptllnd_peer_connect(kptl_tx_t *tx, lnet_nid_t nid); void kptllnd_peer_check_sends(kptl_peer_t *peer); -void kptllnd_peer_check_bucket(int idx); -void kptllnd_tx_launch(kptl_tx_t *tx, lnet_process_id_t target); -kptl_peer_t *kptllnd_peer_handle_hello(ptl_process_id_t initiator, +void kptllnd_peer_check_bucket(int idx, int stamp); +void kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag); +int kptllnd_find_target(kptl_net_t *net, lnet_process_id_t target, + kptl_peer_t **peerp); +kptl_peer_t *kptllnd_peer_handle_hello(kptl_net_t *net, + ptl_process_id_t initiator, kptl_msg_t *msg); kptl_peer_t *kptllnd_id2peer_locked(lnet_process_id_t id); void kptllnd_peer_alive(kptl_peer_t *peer); @@ -431,6 +497,20 @@ kptllnd_peer_decref (kptl_peer_t *peer) } static inline void +kptllnd_net_addref (kptl_net_t *net) +{ + LASSERT (atomic_read(&net->net_refcount) > 0); + atomic_inc(&net->net_refcount); +} + +static inline void +kptllnd_net_decref (kptl_net_t *net) +{ + LASSERT (atomic_read(&net->net_refcount) > 0); + atomic_dec(&net->net_refcount); +} + +static inline void kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer) { LASSERT (tx->tx_peer == NULL); @@ -442,7 +522,9 @@ kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer) static inline struct list_head * kptllnd_nid2peerlist(lnet_nid_t nid) { - unsigned int hash = ((unsigned int)nid) % + /* Only one copy of peer state for all logical peers, so the net part + * of NIDs is ignored; e.g. A@ptl0 and A@ptl2 share peer state */ + unsigned int hash = ((unsigned int)LNET_NIDADDR(nid)) % kptllnd_data.kptl_peer_hash_size; return &kptllnd_data.kptl_peers[hash]; @@ -471,14 +553,14 @@ kptllnd_reserve_buffers(int n) static inline int kptllnd_peer_reserve_buffers(void) { - return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peercredits); + return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peertxcredits); } static inline void kptllnd_peer_unreserve_buffers(void) { kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool, - *kptllnd_tunables.kptl_peercredits); + *kptllnd_tunables.kptl_peertxcredits); } /* @@ -487,6 +569,8 @@ kptllnd_peer_unreserve_buffers(void) int kptllnd_setup_tx_descs(void); void kptllnd_cleanup_tx_descs(void); void kptllnd_tx_fini(kptl_tx_t *tx); +void kptllnd_cancel_txlist(struct list_head *peerq, struct list_head *txs); +void kptllnd_restart_txs(kptl_net_t *net, lnet_process_id_t id, struct list_head *restarts); kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose); void kptllnd_tx_callback(ptl_event_t *ev); const char *kptllnd_tx_typestr(int type); @@ -509,7 +593,8 @@ kptllnd_tx_decref(kptl_tx_t *tx) /* * MESSAGE SUPPORT FUNCTIONS */ -void kptllnd_init_msg(kptl_msg_t *msg, int type, int body_nob); +void kptllnd_init_msg(kptl_msg_t *msg, int type, + lnet_process_id_t target, int body_nob); void kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer); int kptllnd_msg_unpack(kptl_msg_t *msg, int nob); @@ -535,4 +620,3 @@ void kptllnd_dump_ptltrace(void); #else #define IS_SIMULATION_ENABLED(x) 0 #endif -