-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved.
- * Author: PJ Kirner <pjkirner@clusterfs.com>
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
*
- * This file is confidential source code owned by Cluster File Systems.
- * No viewing, modification, compilation, redistribution, or any other
- * form of use is permitted except through a signed license agreement.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, Intel Corporation.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
*
- * If you have not signed such an agreement, then you have no rights to
- * this file. Please destroy it immediately and contact CFS.
+ * lnet/klnds/ptllnd/ptllnd.h
*
+ * Author: PJ Kirner <pjkirner@clusterfs.com>
*/
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/errno.h>
-#include <linux/smp_lock.h>
#include <linux/unistd.h>
#include <linux/uio.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#define DEBUG_SUBSYSTEM S_LND
-#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
#include <lnet/lnet.h>
#include <lnet/lib-lnet.h>
+#include <lnet/lnet-sysctl.h>
#include <portals/p30.h>
-#ifdef CRAY_XT3
-#include <portals/ptltrace.h>
-#endif
#include <lnet/ptllnd.h> /* Depends on portals/p30.h */
/*
//#define PJK_DEBUGGING
#ifdef CONFIG_SMP
-# define PTLLND_N_SCHED num_online_cpus() /* # schedulers */
+# define PTLLND_N_SCHED cfs_num_online_cpus() /* # schedulers */
#else
# define PTLLND_N_SCHED 1 /* # schedulers */
#endif
-#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peercredits)-1)
+#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peertxcredits)-1)
/* when eagerly to return credits */
typedef struct
int *kptl_rxb_npages; /* number of pages for rx buffer */
int *kptl_rxb_nspare; /* number of spare rx buffers */
int *kptl_credits; /* number of credits */
- int *kptl_peercredits; /* number of credits */
+ int *kptl_peertxcredits; /* number of peer tx credits */
+ int *kptl_peerrtrcredits; /* number of peer router credits */
int *kptl_max_msg_size; /* max immd message size*/
int *kptl_peer_hash_table_size; /* # slots in peer hash table */
int *kptl_reschedule_loops; /* scheduler yield loops */
int *kptl_ack_puts; /* make portals ack PUTs */
-#ifdef CRAY_XT3
- int *kptl_ptltrace_on_timeout; /* dump pltrace on timeout? */
- char **kptl_ptltrace_basename; /* ptltrace dump file basename */
-#endif
#ifdef PJK_DEBUGGING
int *kptl_simulation_bitmap;/* simulation bitmap */
#endif
/***********************************************************************/
typedef struct kptl_data kptl_data_t;
+typedef struct kptl_net kptl_net_t;
typedef struct kptl_rx_buffer kptl_rx_buffer_t;
typedef struct kptl_peer kptl_peer_t;
typedef struct kptl_rx /* receive message */
{
- struct list_head rx_list; /* queue for attention */
+ cfs_list_t rx_list; /* queue for attention */
kptl_rx_buffer_t *rx_rxb; /* the rx buffer pointer */
kptl_msg_t *rx_msg; /* received message */
int rx_nob; /* received message size */
unsigned long rx_treceived; /* time received */
ptl_process_id_t rx_initiator; /* sender's address */
-#ifdef CRAY_XT3
- ptl_uid_t rx_uid; /* sender's uid */
-#endif
kptl_peer_t *rx_peer; /* pointer to peer */
char rx_space[0]; /* copy of incoming request */
} kptl_rx_t;
+#define PTLLND_POSTRX_DONT_POST 0 /* don't post */
+#define PTLLND_POSTRX_NO_CREDIT 1 /* post: no credits */
+#define PTLLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
+
typedef struct kptl_rx_buffer_pool
{
- spinlock_t rxbp_lock;
- struct list_head rxbp_list; /* all allocated buffers */
+ spinlock_t rxbp_lock;
+ cfs_list_t rxbp_list; /* all allocated buffers */
int rxbp_count; /* # allocated buffers */
int rxbp_reserved; /* # requests to buffer */
int rxbp_shutdown; /* shutdown flag */
struct kptl_rx_buffer
{
- kptl_rx_buffer_pool_t *rxb_pool;
- struct list_head rxb_list; /* for the rxb_pool list */
- struct list_head rxb_repost_list;/* for the kptl_sched_rxbq list */
- int rxb_posted:1; /* on the net */
- int rxb_idle:1; /* all done */
- kptl_eventarg_t rxb_eventarg; /* event->md.user_ptr */
- int rxb_refcount; /* reference count */
- ptl_handle_md_t rxb_mdh; /* the portals memory descriptor (MD) handle */
- char *rxb_buffer; /* the buffer */
+ kptl_rx_buffer_pool_t *rxb_pool;
+ cfs_list_t rxb_list; /* for the rxb_pool list */
+ cfs_list_t rxb_repost_list;/* for the kptl_sched_rxbq list */
+ int rxb_posted:1; /* on the net */
+ int rxb_idle:1; /* all done */
+ kptl_eventarg_t rxb_eventarg; /* event->md.user_ptr */
+ int rxb_refcount; /* reference count */
+ ptl_handle_md_t rxb_mdh; /* the portals memory descriptor (MD) handle */
+ char *rxb_buffer; /* the buffer */
};
typedef struct kptl_tx /* transmit message */
{
- struct list_head tx_list; /* queue on idle_txs etc */
- atomic_t tx_refcount; /* reference count*/
+ cfs_list_t tx_list; /* queue on idle_txs etc */
+ cfs_atomic_t tx_refcount; /* reference count*/
enum kptl_tx_type tx_type; /* small msg/{put,get}{req,resp} */
int tx_active:1; /* queued on the peer */
int tx_idle:1; /* on the free list */
struct kptl_peer
{
- struct list_head peer_list;
- atomic_t peer_refcount; /* The current refrences */
+ cfs_list_t peer_list;
+ cfs_atomic_t peer_refcount; /* The current references */
enum kptllnd_peer_state peer_state;
- spinlock_t peer_lock; /* serialize */
- struct list_head peer_sendq; /* txs waiting for mh handles */
- struct list_head peer_activeq; /* txs awaiting completion */
+ spinlock_t peer_lock; /* serialize */
+ cfs_list_t peer_noops; /* PTLLND_MSG_TYPE_NOOP txs */
+ cfs_list_t peer_sendq; /* txs waiting for mh handles */
+ cfs_list_t peer_activeq; /* txs awaiting completion */
lnet_process_id_t peer_id; /* Peer's LNET id */
ptl_process_id_t peer_ptlid; /* Peer's portals id */
__u64 peer_incarnation; /* peer's incarnation */
{
int kptl_init; /* initialisation state */
volatile int kptl_shutdown; /* shut down? */
- atomic_t kptl_nthreads; /* # live threads */
- lnet_ni_t *kptl_ni; /* _the_ LND instance */
+ cfs_atomic_t kptl_nthreads; /* # live threads */
ptl_handle_ni_t kptl_nih; /* network inteface handle */
ptl_process_id_t kptl_portals_id; /* Portals ID of interface */
__u64 kptl_incarnation; /* which one am I */
ptl_handle_eq_t kptl_eqh; /* Event Queue (EQ) */
- spinlock_t kptl_sched_lock; /* serialise... */
- wait_queue_head_t kptl_sched_waitq; /* schedulers sleep here */
- struct list_head kptl_sched_txq; /* tx requiring attention */
- struct list_head kptl_sched_rxq; /* rx requiring attention */
- struct list_head kptl_sched_rxbq; /* rxb requiring reposting */
+ rwlock_t kptl_net_rw_lock; /* serialise... */
+ cfs_list_t kptl_nets; /* kptl_net instance*/
+
+ spinlock_t kptl_sched_lock; /* serialise... */
+ cfs_waitq_t kptl_sched_waitq; /* schedulers sleep here */
+ cfs_list_t kptl_sched_txq; /* tx requiring attention */
+ cfs_list_t kptl_sched_rxq; /* rx requiring attention */
+ cfs_list_t kptl_sched_rxbq; /* rxb requiring reposting */
- wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */
+ cfs_waitq_t kptl_watchdog_waitq; /* watchdog sleeps here */
kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */
cfs_mem_cache_t* kptl_rx_cache; /* rx descripter cache */
- atomic_t kptl_ntx; /* # tx descs allocated */
- spinlock_t kptl_tx_lock; /* serialise idle tx list*/
- struct list_head kptl_idle_txs; /* idle tx descriptors */
+ cfs_atomic_t kptl_ntx; /* # tx descs allocated */
+ spinlock_t kptl_tx_lock; /* serialise idle tx list*/
+ cfs_list_t kptl_idle_txs; /* idle tx descriptors */
- rwlock_t kptl_peer_rw_lock; /* lock for peer table */
- struct list_head *kptl_peers; /* hash table of all my known peers */
- struct list_head kptl_closing_peers; /* peers being closed */
- struct list_head kptl_zombie_peers; /* peers waiting for refs to drain */
+ rwlock_t kptl_peer_rw_lock; /* lock for peer table */
+ cfs_list_t *kptl_peers; /* hash table of all my known peers */
+ cfs_list_t kptl_closing_peers; /* peers being closed */
+ cfs_list_t kptl_zombie_peers; /* peers waiting for refs to drain */
int kptl_peer_hash_size; /* size of kptl_peers */
int kptl_npeers; /* # peers extant */
int kptl_n_active_peers; /* # active peers */
int kptl_expected_peers; /* # peers I can buffer HELLOs from */
kptl_msg_t *kptl_nak_msg; /* common NAK message */
- spinlock_t kptl_ptlid2str_lock; /* serialise str ops */
+ spinlock_t kptl_ptlid2str_lock; /* serialise str ops */
+};
+
+struct kptl_net
+{
+ cfs_list_t net_list; /* chain on kptl_data:: kptl_nets */
+ lnet_ni_t *net_ni;
+ cfs_atomic_t net_refcount; /* # current references */
+ int net_shutdown; /* lnd_shutdown called */
};
enum
extern kptl_data_t kptllnd_data;
static inline lnet_nid_t
-kptllnd_ptl2lnetnid(ptl_nid_t ptl_nid)
+kptllnd_ptl2lnetnid(lnet_nid_t ni_nid, ptl_nid_t ptl_nid)
{
#ifdef _USING_LUSTRE_PORTALS_
- return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid),
- LNET_NIDADDR(ptl_nid));
+ return LNET_MKNID(LNET_NIDNET(ni_nid), LNET_NIDADDR(ptl_nid));
#else
- return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid),
- ptl_nid);
+ return LNET_MKNID(LNET_NIDNET(ni_nid), ptl_nid);
#endif
}
int kptllnd_startup(lnet_ni_t *ni);
void kptllnd_shutdown(lnet_ni_t *ni);
int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
+void kptllnd_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
int kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
int kptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
int delayed, unsigned int niov,
default:
LBUG();
case PTLLND_EVENTARG_TYPE_BUF:
- return list_entry(eva, kptl_rx_buffer_t, rxb_eventarg);
+ return cfs_list_entry(eva, kptl_rx_buffer_t, rxb_eventarg);
case PTLLND_EVENTARG_TYPE_RDMA:
- return list_entry(eva, kptl_tx_t, tx_rdma_eventarg);
+ return cfs_list_entry(eva, kptl_tx_t, tx_rdma_eventarg);
case PTLLND_EVENTARG_TYPE_MSG:
- return list_entry(eva, kptl_tx_t, tx_msg_eventarg);
+ return cfs_list_entry(eva, kptl_tx_t, tx_msg_eventarg);
}
}
static inline void
kptllnd_rx_buffer_addref(kptl_rx_buffer_t *rxb)
{
- unsigned long flags;
-
- spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags);
- rxb->rxb_refcount++;
- spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags);
+ rxb->rxb_refcount++;
+ spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags);
}
static inline void
kptllnd_rx_buffer_decref_locked(kptl_rx_buffer_t *rxb)
{
- if (--(rxb->rxb_refcount) == 0) {
- spin_lock(&kptllnd_data.kptl_sched_lock);
-
- list_add_tail(&rxb->rxb_repost_list,
- &kptllnd_data.kptl_sched_rxbq);
- wake_up(&kptllnd_data.kptl_sched_waitq);
-
- spin_unlock(&kptllnd_data.kptl_sched_lock);
- }
+ if (--(rxb->rxb_refcount) == 0) {
+ spin_lock(&kptllnd_data.kptl_sched_lock);
+
+ cfs_list_add_tail(&rxb->rxb_repost_list,
+ &kptllnd_data.kptl_sched_rxbq);
+ cfs_waitq_signal(&kptllnd_data.kptl_sched_waitq);
+
+ spin_unlock(&kptllnd_data.kptl_sched_lock);
+ }
}
static inline void
{
unsigned long flags;
int count;
-
- spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags);
- count = --(rxb->rxb_refcount);
- spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags);
+
+ spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags);
+ count = --(rxb->rxb_refcount);
+ spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags);
if (count == 0)
kptllnd_rx_buffer_post(rxb);
/*
* RX SUPPORT FUNCTIONS
*/
-void kptllnd_rx_done(kptl_rx_t *rx);
void kptllnd_rx_parse(kptl_rx_t *rx);
+void kptllnd_rx_done(kptl_rx_t *rx, int post_credit);
/*
* PEER SUPPORT FUNCTIONS
void kptllnd_peer_check_sends(kptl_peer_t *peer);
void kptllnd_peer_check_bucket(int idx, int stamp);
void kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag);
-int kptllnd_find_target(kptl_peer_t **peerp, lnet_process_id_t target);
-kptl_peer_t *kptllnd_peer_handle_hello(ptl_process_id_t initiator,
+int kptllnd_find_target(kptl_net_t *net, lnet_process_id_t target,
+ kptl_peer_t **peerp);
+kptl_peer_t *kptllnd_peer_handle_hello(kptl_net_t *net,
+ ptl_process_id_t initiator,
kptl_msg_t *msg);
kptl_peer_t *kptllnd_id2peer_locked(lnet_process_id_t id);
void kptllnd_peer_alive(kptl_peer_t *peer);
static inline void
kptllnd_peer_addref (kptl_peer_t *peer)
{
- atomic_inc(&peer->peer_refcount);
+ cfs_atomic_inc(&peer->peer_refcount);
}
static inline void
kptllnd_peer_decref (kptl_peer_t *peer)
{
- if (atomic_dec_and_test(&peer->peer_refcount))
+ if (cfs_atomic_dec_and_test(&peer->peer_refcount))
kptllnd_peer_destroy(peer);
}
static inline void
-kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer)
+kptllnd_net_addref (kptl_net_t *net)
+{
+ LASSERT (cfs_atomic_read(&net->net_refcount) > 0);
+ cfs_atomic_inc(&net->net_refcount);
+}
+
+static inline void
+kptllnd_net_decref (kptl_net_t *net)
+{
+ LASSERT (cfs_atomic_read(&net->net_refcount) > 0);
+ cfs_atomic_dec(&net->net_refcount);
+}
+
+static inline void
+kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer)
{
LASSERT (tx->tx_peer == NULL);
-
+
kptllnd_peer_addref(peer);
tx->tx_peer = peer;
}
-static inline struct list_head *
+static inline cfs_list_t *
kptllnd_nid2peerlist(lnet_nid_t nid)
{
- unsigned int hash = ((unsigned int)nid) %
+ /* Only one copy of peer state for all logical peers, so the net part
+ * of NIDs is ignored; e.g. A@ptl0 and A@ptl2 share peer state */
+ unsigned int hash = ((unsigned int)LNET_NIDADDR(nid)) %
kptllnd_data.kptl_peer_hash_size;
return &kptllnd_data.kptl_peers[hash];
kptl_peer_t *peer;
unsigned long flags;
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- peer = kptllnd_id2peer_locked(id);
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
+ read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
+ peer = kptllnd_id2peer_locked(id);
+ read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
return peer;
}
static inline int
kptllnd_peer_reserve_buffers(void)
{
- return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peercredits);
+ return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peertxcredits);
}
static inline void
kptllnd_peer_unreserve_buffers(void)
{
kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool,
- *kptllnd_tunables.kptl_peercredits);
+ *kptllnd_tunables.kptl_peertxcredits);
}
/*
int kptllnd_setup_tx_descs(void);
void kptllnd_cleanup_tx_descs(void);
void kptllnd_tx_fini(kptl_tx_t *tx);
+void kptllnd_cancel_txlist(cfs_list_t *peerq, cfs_list_t *txs);
+void kptllnd_restart_txs(kptl_net_t *net, lnet_process_id_t id,
+ cfs_list_t *restarts);
kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose);
void kptllnd_tx_callback(ptl_event_t *ev);
const char *kptllnd_tx_typestr(int type);
static inline void
kptllnd_tx_addref(kptl_tx_t *tx)
{
- atomic_inc(&tx->tx_refcount);
+ cfs_atomic_inc(&tx->tx_refcount);
}
-static inline void
+static inline void
kptllnd_tx_decref(kptl_tx_t *tx)
{
- LASSERT (!in_interrupt()); /* Thread context only */
+ LASSERT (!cfs_in_interrupt()); /* Thread context only */
- if (atomic_dec_and_test(&tx->tx_refcount))
+ if (cfs_atomic_dec_and_test(&tx->tx_refcount))
kptllnd_tx_fini(tx);
}
/*
* MESSAGE SUPPORT FUNCTIONS
*/
-void kptllnd_init_msg(kptl_msg_t *msg, int type, int body_nob);
+void kptllnd_init_msg(kptl_msg_t *msg, int type,
+ lnet_process_id_t target, int body_nob);
void kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer);
int kptllnd_msg_unpack(kptl_msg_t *msg, int nob);
#else
#define IS_SIMULATION_ENABLED(x) 0
#endif
-