X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lnet%2Fklnds%2Fralnd%2Fralnd.h;h=9d6fcab5432e636c03685fcf8db27c89bec703a6;hb=f5fb215365791507ec98796ea624abeafa31901c;hp=7e437056100aa5120f39c833e6efcce048d4b7dd;hpb=4ab1d51e7bbd98006a21a1655f7e5bffec3cf0d4;p=fs%2Flustre-release.git diff --git a/lnet/klnds/ralnd/ralnd.h b/lnet/klnds/ralnd/ralnd.h index 7e43705..9d6fcab 100644 --- a/lnet/klnds/ralnd/ralnd.h +++ b/lnet/klnds/ralnd/ralnd.h @@ -1,38 +1,50 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * This file is part of Lustre, http://www.lustre.org. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * + * GPL HEADER END + */ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lnet/klnds/ralnd/ralnd.h + * + * Author: Eric Barton */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - +#ifndef AUTOCONF_INCLUDED #include +#endif #include #include #include #include #include #include -#include #include #include @@ -51,124 +63,111 @@ #include #include -#define DEBUG_SUBSYSTEM S_NAL +#define DEBUG_SUBSYSTEM S_LND -#include -#include -#include -#include -#include +#include +#include +#include +#include #include -#define RANAL_MAXDEVS 2 /* max # devices RapidArray supports */ +/* tunables determined at compile time */ +#define RANAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define RANAL_N_CONND 4 /* # connection daemons */ +#define RANAL_PEER_HASH_SIZE 101 /* # peer lists */ +#define RANAL_CONN_HASH_SIZE 101 /* # conn lists */ -#define RANAL_MIN_RECONNECT_INTERVAL 1 /* first failed connection retry (seconds)... */ -#define RANAL_MAX_RECONNECT_INTERVAL 60 /* ...exponentially increasing to this */ +#define RANAL_MIN_TIMEOUT 5 /* minimum timeout interval (seconds) */ +#define RANAL_TIMEOUT2KEEPALIVE(t) (((t)+1)/2) /* timeout -> keepalive interval */ -#define RANAL_FMA_MAX_PREFIX 232 /* max size of FMA "Prefix" */ +/* fixed constants */ +#define RANAL_MAXDEVS 2 /* max # devices RapidArray supports */ +#define RANAL_FMA_MAX_PREFIX 232 /* max bytes in FMA "Prefix" we can use */ #define RANAL_FMA_MAX_DATA ((7<<10)-256) /* Max FMA MSG is 7K including prefix */ -#define RANAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define RANAL_CONN_HASH_SIZE 101 /* # conn lists */ -#define RANAL_NTX 64 /* # tx descs */ -#define RANAL_NTX_NBLK 256 /* # reserved tx descs */ - -#define RANAL_FMA_CQ_SIZE 8192 /* # entries in receive CQ - * (overflow is a performance hit) */ - -#define RANAL_RESCHED 100 /* # scheduler loops before reschedule */ - -#define RANAL_MIN_TIMEOUT 5 /* minimum timeout interval (seconds) */ -#define RANAL_TIMEOUT2KEEPALIVE(t) (((t)+1)/2) /* timeout -> keepalive interval */ - -/* default vals for runtime tunables */ -#define RANAL_TIMEOUT 30 /* comms timeout (seconds) */ -#define RANAL_LISTENER_TIMEOUT 5 /* listener timeout (seconds) */ -#define RANAL_BACKLOG 127 /* listener's backlog */ -#define RANAL_PORT 988 /* listener's port */ -#define RANAL_MAX_IMMEDIATE (2<<10) /* immediate payload breakpoint */ - -typedef struct +typedef struct { - int kra_timeout; /* comms timeout (seconds) */ - int kra_listener_timeout; /* max time the listener can block */ - int kra_backlog; /* listener's backlog */ - int kra_port; /* listener's TCP/IP port */ - int kra_max_immediate; /* immediate payload breakpoint */ - - struct ctl_table_header *kra_sysctl; /* sysctl interface */ + int *kra_n_connd; /* # connection daemons */ + int *kra_min_reconnect_interval; /* first failed connection retry... */ + int *kra_max_reconnect_interval; /* ...exponentially increasing to this */ + int *kra_ntx; /* # tx descs */ + int *kra_credits; /* # concurrent sends */ + int *kra_peercredits; /* # concurrent sends to 1 peer */ + int *kra_fma_cq_size; /* # entries in receive CQ */ + int *kra_timeout; /* comms timeout (seconds) */ + int *kra_max_immediate; /* immediate payload breakpoint */ + +#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM + cfs_sysctl_table_header_t *kra_sysctl; /* sysctl interface */ +#endif } kra_tunables_t; typedef struct { - RAP_PVOID rad_handle; /* device handle */ - RAP_PVOID rad_fma_cqh; /* FMA completion queue handle */ - RAP_PVOID rad_rdma_cqh; /* rdma completion queue handle */ - int rad_id; /* device id */ - int rad_idx; /* index in kra_devices */ - int rad_ready; /* set by device callback */ - struct list_head rad_connq; /* connections requiring attention */ - struct list_head rad_zombies; /* connections to free */ - wait_queue_head_t rad_waitq; /* scheduler waits here */ - spinlock_t rad_lock; /* serialise */ - void *rad_scheduler; /* scheduling thread */ + RAP_PVOID rad_handle; /* device handle */ + RAP_PVOID rad_fma_cqh; /* FMA completion queue handle */ + RAP_PVOID rad_rdma_cqh; /* rdma completion queue handle */ + int rad_id; /* device id */ + int rad_idx; /* index in kra_devices */ + int rad_ready; /* set by device callback */ + cfs_list_t rad_ready_conns;/* connections ready to tx/rx */ + cfs_list_t rad_new_conns; /* new connections to complete */ + cfs_waitq_t rad_waitq; /* scheduler waits here */ + cfs_spinlock_t rad_lock; /* serialise */ + void *rad_scheduler; /* scheduling thread */ + unsigned int rad_nphysmap; /* # phys mappings */ + unsigned int rad_nppphysmap;/* # phys pages mapped */ + unsigned int rad_nvirtmap; /* # virt mappings */ + unsigned long rad_nobvirtmap;/* # virt bytes mapped */ } kra_device_t; - -typedef struct + +typedef struct { - int kra_init; /* initialisation state */ - int kra_shutdown; /* shut down? */ - atomic_t kra_nthreads; /* # live threads */ - - struct semaphore kra_nid_mutex; /* serialise NID/listener ops */ - struct semaphore kra_listener_signal; /* block for listener startup/shutdown */ - struct socket *kra_listener_sock; /* listener's socket */ - int kra_listener_shutdown; /* ask listener to close */ - - kra_device_t kra_devices[RANAL_MAXDEVS]; /* device/ptag/cq etc */ - int kra_ndevs; /* # devices */ - - rwlock_t kra_global_lock; /* stabilize peer/conn ops */ - - struct list_head *kra_peers; /* hash table of all my known peers */ - int kra_peer_hash_size; /* size of kra_peers */ - atomic_t kra_npeers; /* # peers extant */ - - struct list_head *kra_conns; /* conns hashed by cqid */ - int kra_conn_hash_size; /* size of kra_conns */ - __u64 kra_peerstamp; /* when I started up */ - __u64 kra_connstamp; /* conn stamp generator */ - int kra_next_cqid; /* cqid generator */ - atomic_t kra_nconns; /* # connections extant */ - - long kra_new_min_timeout; /* minimum timeout on any new conn */ - wait_queue_head_t kra_reaper_waitq; /* reaper sleeps here */ - spinlock_t kra_reaper_lock; /* serialise */ - - struct list_head kra_connd_peers; /* peers waiting for a connection */ - struct list_head kra_connd_acceptq; /* accepted sockets to handshake */ - wait_queue_head_t kra_connd_waitq; /* connection daemons sleep here */ - spinlock_t kra_connd_lock; /* serialise */ - - struct list_head kra_idle_txs; /* idle tx descriptors */ - struct list_head kra_idle_nblk_txs; /* idle reserved tx descriptors */ - __u64 kra_next_tx_cookie; /* RDMA completion cookie */ - wait_queue_head_t kra_idle_tx_waitq; /* block here for tx descriptor */ - spinlock_t kra_tx_lock; /* serialise */ + int kra_init; /* initialisation state */ + int kra_shutdown; /* shut down? */ + cfs_atomic_t kra_nthreads; /* # live threads */ + lnet_ni_t *kra_ni; /* _the_ nal instance */ + + kra_device_t kra_devices[RANAL_MAXDEVS]; /* device/ptag/cq */ + int kra_ndevs; /* # devices */ + + cfs_rwlock_t kra_global_lock; /* stabilize peer/conn ops */ + + cfs_list_t *kra_peers; /* hash table of all my known peers */ + int kra_peer_hash_size; /* size of kra_peers */ + cfs_atomic_t kra_npeers; /* # peers extant */ + int kra_nonewpeers; /* prevent new peers */ + + cfs_list_t *kra_conns; /* conns hashed by cqid */ + int kra_conn_hash_size; /* size of kra_conns */ + __u64 kra_peerstamp; /* when I started up */ + __u64 kra_connstamp; /* conn stamp generator */ + int kra_next_cqid; /* cqid generator */ + cfs_atomic_t kra_nconns; /* # connections extant */ + + long kra_new_min_timeout; /* minimum timeout on any new conn */ + cfs_waitq_t kra_reaper_waitq; /* reaper sleeps here */ + cfs_spinlock_t kra_reaper_lock; /* serialise */ + + cfs_list_t kra_connd_peers; /* peers waiting for a connection */ + cfs_list_t kra_connd_acceptq; /* accepted sockets to handshake */ + cfs_waitq_t kra_connd_waitq; /* connection daemons sleep here */ + cfs_spinlock_t kra_connd_lock; /* serialise */ + + cfs_list_t kra_idle_txs; /* idle tx descriptors */ + __u64 kra_next_tx_cookie; /* RDMA completion cookie */ + cfs_spinlock_t kra_tx_lock; /* serialise */ } kra_data_t; #define RANAL_INIT_NOTHING 0 #define RANAL_INIT_DATA 1 -#define RANAL_INIT_LIB 2 -#define RANAL_INIT_ALL 3 +#define RANAL_INIT_ALL 2 -typedef struct kra_acceptsock /* accepted socket queued for connd */ +typedef struct kra_acceptsock /* accepted socket queued for connd */ { - struct list_head ras_list; /* queue for attention */ + cfs_list_t ras_list; /* queue for attention */ struct socket *ras_sock; /* the accepted socket */ } kra_acceptsock_t; @@ -199,13 +198,13 @@ typedef struct typedef struct { - ptl_hdr_t raim_hdr; /* portals header */ + lnet_hdr_t raim_hdr; /* portals header */ /* Portals payload is in FMA "Message Data" */ } kra_immediate_msg_t; typedef struct { - ptl_hdr_t raprm_hdr; /* portals header */ + lnet_hdr_t raprm_hdr; /* portals header */ __u64 raprm_cookie; /* opaque completion cookie */ } kra_putreq_msg_t; @@ -218,7 +217,7 @@ typedef struct typedef struct { - ptl_hdr_t ragm_hdr; /* portals header */ + lnet_hdr_t ragm_hdr; /* portals header */ __u64 ragm_cookie; /* opaque completion cookie */ kra_rdma_desc_t ragm_desc; /* sender's sink buffer */ } kra_get_msg_t; @@ -245,7 +244,7 @@ typedef struct /* NB must fit in FMA "Prefix" * __u32 ram_seq; /* incrementing sequence number */ } kra_msg_t; -#define RANAL_MSG_MAGIC 0x0be91b92 /* unique magic */ +#define RANAL_MSG_MAGIC LNET_PROTO_RA_MAGIC /* unique magic */ #define RANAL_MSG_VERSION 1 /* current protocol version */ #define RANAL_MSG_FENCE 0x80 /* fence RDMA */ @@ -266,21 +265,20 @@ typedef struct /* NB must fit in FMA "Prefix" * typedef struct kra_tx /* message descriptor */ { - struct list_head tx_list; /* queue on idle_txs/rac_sendq/rac_waitq */ - struct kra_conn *tx_conn; /* owning conn */ - lib_msg_t *tx_libmsg[2]; /* lib msgs to finalize on completion */ - unsigned long tx_qtime; /* when tx started to wait for something */ - int tx_isnblk; /* I'm reserved for non-blocking sends */ - int tx_nob; /* # bytes of payload */ - int tx_buftype; /* payload buffer type */ - void *tx_buffer; /* source/sink buffer */ - int tx_phys_offset; /* first page offset (if phys) */ - int tx_phys_npages; /* # physical pages */ - RAP_PHYS_REGION *tx_phys; /* page descriptors */ - RAP_MEM_KEY tx_map_key; /* mapping key */ - RAP_RDMA_DESCRIPTOR tx_rdma_desc; /* rdma descriptor */ - __u64 tx_cookie; /* identify this tx to peer */ - kra_msg_t tx_msg; /* FMA message buffer */ + cfs_list_t tx_list; /* queue on idle_txs/rac_sendq/rac_waitq */ + struct kra_conn *tx_conn; /* owning conn */ + lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */ + unsigned long tx_qtime; /* when tx started to wait for something (jiffies) */ + int tx_nob; /* # bytes of payload */ + int tx_buftype; /* payload buffer type */ + void *tx_buffer; /* source/sink buffer */ + int tx_phys_offset; /* first page offset (if phys) */ + int tx_phys_npages; /* # physical pages */ + RAP_PHYS_REGION *tx_phys; /* page descriptors */ + RAP_MEM_KEY tx_map_key; /* mapping key */ + RAP_RDMA_DESCRIPTOR tx_rdma_desc; /* rdma descriptor */ + __u64 tx_cookie; /* identify this tx to peer */ + kra_msg_t tx_msg; /* FMA message buffer */ } kra_tx_t; #define RANAL_BUF_NONE 0 /* buffer type not set */ @@ -290,50 +288,35 @@ typedef struct kra_tx /* message descriptor */ #define RANAL_BUF_VIRT_UNMAPPED 4 /* virtual: not mapped yet */ #define RANAL_BUF_VIRT_MAPPED 5 /* virtual: mapped already */ -#define RANAL_TX_IDLE 0x00 /* on freelist */ -#define RANAL_TX_SIMPLE 0x10 /* about to send a simple message */ -#define RANAL_TX_PUTI_REQ 0x20 /* PUT initiator about to send PUT_REQ */ -#define RANAL_TX_PUTI_WAIT_ACK 0x21 /* PUT initiator waiting for PUT_ACK */ -#define RANAL_TX_PUTI_RDMA 0x22 /* PUT initiator waiting for RDMA to complete */ -#define RANAL_TX_PUTI_DONE 0x23 /* PUT initiator about to send PUT_DONE */ -#define RANAL_TX_PUTT_NAK 0x30 /* PUT target about to send PUT_NAK */ -#define RANAL_TX_PUTT_ACK 0x30 /* PUT target about to send PUT_ACK */ -#define RANAL_TX_PUTT_WAIT_DONE 0x31 /* PUT target waiting for PUT_DONE */ -#define RANAL_TX_GETI_REQ 0x40 /* GET initiator about to send GET_REQ */ -#define RANAL_TX_GETI_WAIT_DONE 0x41 /* GET initiator waiting for GET_DONE */ -#define RANAL_TX_GETT_NAK 0x50 /* GET target about to send PUT_NAK */ -#define RANAL_TX_GETT_RDMA 0x51 /* GET target waiting for RDMA to complete */ -#define RANAL_TX_GETT_DONE 0x52 /* GET target about to send GET_DONE */ - typedef struct kra_conn -{ +{ struct kra_peer *rac_peer; /* owning peer */ - struct list_head rac_list; /* stash on peer's conn list */ - struct list_head rac_hashlist; /* stash in connection hash table */ - struct list_head rac_schedlist; /* schedule (on rad_connq) for attention */ - struct list_head rac_fmaq; /* txs queued for FMA */ - struct list_head rac_rdmaq; /* txs awaiting RDMA completion */ - struct list_head rac_replyq; /* txs awaiting replies */ - __u64 rac_peerstamp; /* peer's unique stamp */ - __u64 rac_peer_connstamp; /* peer's unique connection stamp */ - __u64 rac_my_connstamp; /* my unique connection stamp */ - unsigned long rac_last_tx; /* when I last sent an FMA message */ - unsigned long rac_last_rx; /* when I last received an FMA messages */ - long rac_keepalive; /* keepalive interval */ - long rac_timeout; /* infer peer death on (last_rx + timout > now) */ - __u32 rac_cqid; /* my completion callback id (non-unique) */ - __u32 rac_tx_seq; /* tx msg sequence number */ - __u32 rac_rx_seq; /* rx msg sequence number */ - atomic_t rac_refcount; /* # users */ - unsigned int rac_close_sent; /* I've sent CLOSE */ - unsigned int rac_close_recvd; /* I've received CLOSE */ - unsigned int rac_state; /* connection state */ - unsigned int rac_scheduled; /* being attented to */ - spinlock_t rac_lock; /* serialise */ - kra_device_t *rac_device; /* which device */ - RAP_PVOID rac_rihandle; /* RA endpoint */ - kra_msg_t *rac_rxmsg; /* incoming message (FMA prefix) */ - kra_msg_t rac_msg; /* keepalive/CLOSE message buffer */ + cfs_list_t rac_list; /* stash on peer's conn list */ + cfs_list_t rac_hashlist; /* stash in connection hash table */ + cfs_list_t rac_schedlist; /* schedule (on rad_???_conns) for attention */ + cfs_list_t rac_fmaq; /* txs queued for FMA */ + cfs_list_t rac_rdmaq; /* txs awaiting RDMA completion */ + cfs_list_t rac_replyq; /* txs awaiting replies */ + __u64 rac_peerstamp; /* peer's unique stamp */ + __u64 rac_peer_connstamp;/* peer's unique connection stamp */ + __u64 rac_my_connstamp; /* my unique connection stamp */ + unsigned long rac_last_tx; /* when I last sent an FMA message (jiffies) */ + unsigned long rac_last_rx; /* when I last received an FMA messages (jiffies) */ + long rac_keepalive; /* keepalive interval (seconds) */ + long rac_timeout; /* infer peer death if no rx for this many seconds */ + __u32 rac_cqid; /* my completion callback id (non-unique) */ + __u32 rac_tx_seq; /* tx msg sequence number */ + __u32 rac_rx_seq; /* rx msg sequence number */ + cfs_atomic_t rac_refcount; /* # users */ + unsigned int rac_close_sent; /* I've sent CLOSE */ + unsigned int rac_close_recvd; /* I've received CLOSE */ + unsigned int rac_state; /* connection state */ + unsigned int rac_scheduled; /* being attented to */ + cfs_spinlock_t rac_lock; /* serialise */ + kra_device_t *rac_device; /* which device */ + RAP_PVOID rac_rihandle; /* RA endpoint */ + kra_msg_t *rac_rxmsg; /* incoming message (FMA prefix) */ + kra_msg_t rac_msg; /* keepalive/CLOSE message buffer */ } kra_conn_t; #define RANAL_CONN_ESTABLISHED 0 @@ -342,34 +325,20 @@ typedef struct kra_conn typedef struct kra_peer { - struct list_head rap_list; /* stash on global peer list */ - struct list_head rap_connd_list; /* schedule on kra_connd_peers */ - struct list_head rap_conns; /* all active connections */ - struct list_head rap_tx_queue; /* msgs waiting for a conn */ - ptl_nid_t rap_nid; /* who's on the other end(s) */ - __u32 rap_ip; /* IP address of peer */ - int rap_port; /* port on which peer listens */ - atomic_t rap_refcount; /* # users */ - int rap_persistence; /* "known" peer refs */ - int rap_connecting; /* connection forming */ + cfs_list_t rap_list; /* stash on global peer list */ + cfs_list_t rap_connd_list; /* schedule on kra_connd_peers */ + cfs_list_t rap_conns; /* all active connections */ + cfs_list_t rap_tx_queue; /* msgs waiting for a conn */ + lnet_nid_t rap_nid; /* who's on the other end(s) */ + __u32 rap_ip; /* IP address of peer */ + int rap_port; /* port on which peer listens */ + cfs_atomic_t rap_refcount; /* # users */ + int rap_persistence; /* "known" peer refs */ + int rap_connecting; /* connection forming */ unsigned long rap_reconnect_time; /* CURRENT_SECONDS when reconnect OK */ unsigned long rap_reconnect_interval; /* exponential backoff */ } kra_peer_t; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -# define sk_allocation allocation -# define sk_data_ready data_ready -# define sk_write_space write_space -# define sk_user_data user_data -# define sk_prot prot -# define sk_sndbuf sndbuf -# define sk_socket socket -# define sk_wmem_queued wmem_queued -# define sk_err err -# define sk_sleep sleep -#endif - -extern lib_nal_t kranal_lib; extern kra_data_t kranal_data; extern kra_tunables_t kranal_tunables; @@ -379,25 +348,25 @@ extern void kranal_destroy_conn(kra_conn_t *conn); static inline void kranal_peer_addref(kra_peer_t *peer) { - CDEBUG(D_NET, "%p->"LPX64"\n", peer, peer->rap_nid); - LASSERT(atomic_read(&peer->rap_refcount) > 0); - atomic_inc(&peer->rap_refcount); + CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid)); + LASSERT(cfs_atomic_read(&peer->rap_refcount) > 0); + cfs_atomic_inc(&peer->rap_refcount); } static inline void kranal_peer_decref(kra_peer_t *peer) { - CDEBUG(D_NET, "%p->"LPX64"\n", peer, peer->rap_nid); - LASSERT(atomic_read(&peer->rap_refcount) > 0); - if (atomic_dec_and_test(&peer->rap_refcount)) + CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid)); + LASSERT(cfs_atomic_read(&peer->rap_refcount) > 0); + if (cfs_atomic_dec_and_test(&peer->rap_refcount)) kranal_destroy_peer(peer); } -static inline struct list_head * -kranal_nid2peerlist (ptl_nid_t nid) +static inline cfs_list_t * +kranal_nid2peerlist (lnet_nid_t nid) { unsigned int hash = ((unsigned int)nid) % kranal_data.kra_peer_hash_size; - + return (&kranal_data.kra_peers[hash]); } @@ -405,48 +374,50 @@ static inline int kranal_peer_active(kra_peer_t *peer) { /* Am I in the peer hash table? */ - return (!list_empty(&peer->rap_list)); + return (!cfs_list_empty(&peer->rap_list)); } static inline void kranal_conn_addref(kra_conn_t *conn) { - CDEBUG(D_NET, "%p->"LPX64"\n", conn, conn->rac_peer->rap_nid); - LASSERT(atomic_read(&conn->rac_refcount) > 0); - atomic_inc(&conn->rac_refcount); + CDEBUG(D_NET, "%p->%s\n", conn, + libcfs_nid2str(conn->rac_peer->rap_nid)); + LASSERT(cfs_atomic_read(&conn->rac_refcount) > 0); + cfs_atomic_inc(&conn->rac_refcount); } static inline void kranal_conn_decref(kra_conn_t *conn) { - CDEBUG(D_NET, "%p->"LPX64"\n", conn, conn->rac_peer->rap_nid); - LASSERT(atomic_read(&conn->rac_refcount) > 0); - if (atomic_dec_and_test(&conn->rac_refcount)) + CDEBUG(D_NET, "%p->%s\n", conn, + libcfs_nid2str(conn->rac_peer->rap_nid)); + LASSERT(cfs_atomic_read(&conn->rac_refcount) > 0); + if (cfs_atomic_dec_and_test(&conn->rac_refcount)) kranal_destroy_conn(conn); } -static inline struct list_head * -kranal_cqid2connlist (__u32 cqid) +static inline cfs_list_t * +kranal_cqid2connlist (__u32 cqid) { unsigned int hash = cqid % kranal_data.kra_conn_hash_size; - + return (&kranal_data.kra_conns [hash]); } static inline kra_conn_t * -kranal_cqid2conn_locked (__u32 cqid) +kranal_cqid2conn_locked (__u32 cqid) { - struct list_head *conns = kranal_cqid2connlist(cqid); - struct list_head *tmp; + cfs_list_t *conns = kranal_cqid2connlist(cqid); + cfs_list_t *tmp; kra_conn_t *conn; - - list_for_each(tmp, conns) { - conn = list_entry(tmp, kra_conn_t, rac_hashlist); - + + cfs_list_for_each(tmp, conns) { + conn = cfs_list_entry(tmp, kra_conn_t, rac_hashlist); + if (conn->rac_cqid == cqid) return conn; } - + return NULL; } @@ -457,30 +428,41 @@ kranal_tx_mapped (kra_tx_t *tx) tx->tx_buftype == RANAL_BUF_PHYS_MAPPED); } -static inline __u64 -kranal_page2phys (struct page *p) -{ - return page_to_phys(p); -} +int kranal_startup (lnet_ni_t *ni); +void kranal_shutdown (lnet_ni_t *ni); +int kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); +int kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); +int kranal_eager_recv(lnet_ni_t *ni, void *private, + lnet_msg_t *lntmsg, void **new_private); +int kranal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, + int delayed, unsigned int niov, + struct iovec *iov, lnet_kiov_t *kiov, + unsigned int offset, unsigned int mlen, unsigned int rlen); +int kranal_accept(lnet_ni_t *ni, struct socket *sock); extern void kranal_free_acceptsock (kra_acceptsock_t *ras); -extern int kranal_listener_procint (ctl_table *table, - int write, struct file *filp, +extern int kranal_listener_procint (cfs_sysctl_table_t *table, + int write, struct file *filp, void *buffer, size_t *lenp); extern void kranal_update_reaper_timeout (long timeout); extern void kranal_tx_done (kra_tx_t *tx, int completion); extern void kranal_unlink_peer_locked (kra_peer_t *peer); extern void kranal_schedule_conn (kra_conn_t *conn); -extern kra_peer_t *kranal_create_peer (ptl_nid_t nid); -extern kra_peer_t *kranal_find_peer_locked (ptl_nid_t nid); +extern int kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid); +extern int kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port); +extern kra_peer_t *kranal_find_peer_locked (lnet_nid_t nid); extern void kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx); -extern int kranal_del_peer (ptl_nid_t nid, int single_share); +extern int kranal_del_peer (lnet_nid_t nid); extern void kranal_device_callback (RAP_INT32 devid, RAP_PVOID arg); extern int kranal_thread_start (int(*fn)(void *arg), void *arg); extern int kranal_connd (void *arg); extern int kranal_reaper (void *arg); extern int kranal_scheduler (void *arg); extern void kranal_close_conn_locked (kra_conn_t *conn, int error); +extern void kranal_close_conn (kra_conn_t *conn, int error); extern void kranal_terminate_conn_locked (kra_conn_t *conn); extern void kranal_connect (kra_peer_t *peer); extern int kranal_conn_handshake (struct socket *sock, kra_peer_t *peer); +extern int kranal_tunables_init(void); +extern void kranal_tunables_fini(void); +extern void kranal_init_msg(kra_msg_t *msg, int type);