* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/** \defgroup obd_export PortalRPC export definitions
+ *
+ * @{
+ */
#ifndef __EXPORT_H
#define __EXPORT_H
struct mds_idmap_table;
struct mdt_idmap_table;
+/**
+ * Target-specific export data
+ */
struct tg_export_data {
/** Protects led_lcd below */
cfs_semaphore_t ted_lcd_lock;
int ted_lr_idx;
};
+/**
+ * MDT-specific export data
+ */
struct mdt_export_data {
struct tg_export_data med_ted;
+ /** List of all files opened by client on this MDT */
cfs_list_t med_open_head;
cfs_spinlock_t med_open_lock; /* lock med_open_head, mfd_list*/
+ /** Bitmask of all ibit locks this MDT understands */
__u64 med_ibits_known;
cfs_semaphore_t med_idmap_sem;
struct lustre_idmap_table *med_idmap;
};
/* In-memory access to client data from OST struct */
+/** Filter (oss-side) specific import data */
struct filter_export_data {
struct tg_export_data fed_ted;
cfs_spinlock_t fed_lock; /**< protects fed_mod_list */
__u32 fed_group;
};
+/**
+ * per-NID statistics structure.
+ * It tracks access patterns to this export on a per-client-NID basis
+ */
typedef struct nid_stat {
lnet_nid_t nid;
cfs_hlist_node_t nid_hash;
OBD_OPT_ABORT_RECOV = 0x0004,
};
+/**
+ * Export structure. Represents target-side of connection in portals.
+ * Also used in Lustre to connect between layers on the same node when
+ * there is no network-connection in-between.
+ * For every connected client there is an export structure on the server
+ * attached to the same obd device.
+ */
struct obd_export {
+ /**
+ * Export handle, it's id is provided to client on connect
+ * Subsequent client RPCs contain this handle id to identify
+ * what export they are talking to.
+ */
struct portals_handle exp_handle;
cfs_atomic_t exp_refcount;
/**
cfs_list_t exp_locks_list;
cfs_spinlock_t exp_locks_list_guard;
#endif
+ /** Number of queued replay requests to be processes */
cfs_atomic_t exp_replay_count;
+ /** UUID of client connected to this export */
struct obd_uuid exp_client_uuid;
+ /** To link all exports on an obd device */
cfs_list_t exp_obd_chain;
- cfs_hlist_node_t exp_uuid_hash; /* uuid-export hash*/
- cfs_hlist_node_t exp_nid_hash; /* nid-export hash */
- /* exp_obd_chain_timed fo ping evictor, protected by obd_dev_lock */
+ cfs_hlist_node_t exp_uuid_hash; /** uuid-export hash*/
+ cfs_hlist_node_t exp_nid_hash; /** nid-export hash */
+ /**
+ * All exports eligible for ping evictor are linked into a list
+ * through this field in "most time since last request on this export"
+ * order
+ * protected by obd_dev_lock
+ */
cfs_list_t exp_obd_chain_timed;
+ /** Obd device of this export */
struct obd_device *exp_obd;
- struct obd_import *exp_imp_reverse; /* to make RPCs backwards */
+ /** "reverse" import to send requests (e.g. from ldlm) back to client */
+ struct obd_import *exp_imp_reverse;
struct nid_stat *exp_nid_stats;
struct lprocfs_stats *exp_md_stats;
+ /** Active connetion */
struct ptlrpc_connection *exp_connection;
+ /** Connection count value from last succesful reconnect rpc */
__u32 exp_conn_cnt;
- cfs_hash_t *exp_lock_hash; /* existing lock hash */
+ /** Hash list of all ldlm locks granted on this export */
+ cfs_hash_t *exp_lock_hash;
+ /** lock to protect exp_lock_hash accesses */
cfs_spinlock_t exp_lock_hash_lock;
cfs_list_t exp_outstanding_replies;
cfs_list_t exp_uncommitted_replies;
cfs_spinlock_t exp_uncommitted_replies_lock;
+ /** Last committed transno for this export */
__u64 exp_last_committed;
+ /** When was last request received */
cfs_time_t exp_last_request_time;
+ /** On replay all requests waiting for replay are linked here */
cfs_list_t exp_req_replay_queue;
- cfs_spinlock_t exp_lock; /* protects flags int below */
- /* ^ protects exp_outstanding_replies too */
+ /** protects exp_flags and exp_outstanding_replies */
+ cfs_spinlock_t exp_lock;
+ /** Compatibility flags for this export */
__u64 exp_connect_flags;
enum obd_option exp_flags;
unsigned long exp_failed:1,
struct sptlrpc_flavor exp_flvr_old[2]; /* about-to-expire */
cfs_time_t exp_flvr_expire[2]; /* seconds */
+ /** Target specific data */
union {
struct tg_export_data eu_target_data;
struct mdt_export_data eu_mdt_data;
/** @} export */
#endif /* __EXPORT_H */
+/** @} obd_export */
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/** \defgroup obd_import PtlRPC import definitions
+ * Imports are client-side representation of remote obd target.
+ *
+ * @{
+ */
#ifndef __IMPORT_H
#define __IMPORT_H
#include <lustre/lustre_idl.h>
-/* Adaptive Timeout stuff */
+/**
+ * Adaptive Timeout stuff
+ *
+ * @{
+ */
#define D_ADAPTTO D_OTHER
#define AT_BINS 4 /* "bin" means "N seconds of history" */
#define AT_FLG_NOHIST 0x1 /* use last reported value only */
cfs_spinlock_t at_lock;
};
+struct ptlrpc_at_array {
+ cfs_list_t *paa_reqs_array; /** array to hold requests */
+ __u32 paa_size; /** the size of array */
+ __u32 paa_count; /** the total count of reqs */
+ time_t paa_deadline; /** the earliest deadline of reqs */
+ __u32 *paa_reqs_count; /** the count of reqs in each entry */
+};
+
+#define IMP_AT_MAX_PORTALS 8
+struct imp_at {
+ int iat_portal[IMP_AT_MAX_PORTALS];
+ struct adaptive_timeout iat_net_latency;
+ struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
+};
+
+
+/** @} */
+
+/** Possible import states */
enum lustre_imp_state {
LUSTRE_IMP_CLOSED = 1,
LUSTRE_IMP_NEW = 2,
LUSTRE_IMP_EVICTED = 10,
};
-struct ptlrpc_at_array {
- cfs_list_t *paa_reqs_array; /* array to hold requests */
- __u32 paa_size; /* the size of array */
- __u32 paa_count; /* the total count of reqs */
- time_t paa_deadline; /* earliest deadline of reqs */
- __u32 *paa_reqs_count; /* count of reqs in each entry */
-};
-
+/** Returns test string representation of numeric import state \a state */
static inline char * ptlrpc_import_state_name(enum lustre_imp_state state)
{
static char* import_state_names[] = {
return import_state_names[state];
}
+/**
+ * List of import event types
+ */
enum obd_import_event {
IMP_EVENT_DISCON = 0x808001,
IMP_EVENT_INACTIVE = 0x808002,
IMP_EVENT_OCD = 0x808005,
};
+/**
+ * Definition of import connection structure
+ */
struct obd_import_conn {
+ /** Item for linking connections together */
cfs_list_t oic_item;
+ /** Pointer to actual PortalRPC connection */
struct ptlrpc_connection *oic_conn;
+ /** uuid of remote side */
struct obd_uuid oic_uuid;
- __u64 oic_last_attempt; /* jiffies, 64-bit */
-};
-
-#define IMP_AT_MAX_PORTALS 8
-struct imp_at {
- int iat_portal[IMP_AT_MAX_PORTALS];
- struct adaptive_timeout iat_net_latency;
- struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS];
+ /**
+ * Time (64 bit jiffies) of last connection attempt on this connection
+ */
+ __u64 oic_last_attempt;
};
/* state history */
time_t ish_time;
};
+/**
+ * Defintion of PortalRPC import structure.
+ * Imports are representing client-side view to remote target.
+ */
struct obd_import {
+ /** Local handle (== id) for this import. */
struct portals_handle imp_handle;
+ /** Reference counter */
cfs_atomic_t imp_refcount;
struct lustre_handle imp_dlm_handle; /* client's ldlm export */
+ /** Currently active connection */
struct ptlrpc_connection *imp_connection;
+ /** PortalRPC client structure for this import */
struct ptlrpc_client *imp_client;
+ /** List element for linking into pinger chain */
cfs_list_t imp_pinger_chain;
- cfs_list_t imp_zombie_chain; /* queue for destruction */
+ /** List element for linking into chain for destruction */
+ cfs_list_t imp_zombie_chain;
- /* Lists of requests that are retained for replay, waiting for a reply,
+ /**
+ * Lists of requests that are retained for replay, waiting for a reply,
* or waiting for recovery to complete, respectively.
+ * @{
*/
cfs_list_t imp_replay_list;
cfs_list_t imp_sending_list;
cfs_list_t imp_delayed_list;
+ /** @} */
+ /** obd device for this import */
struct obd_device *imp_obd;
+
+ /**
+ * some seciruty-related fields
+ * @{
+ */
struct ptlrpc_sec *imp_sec;
cfs_semaphore_t imp_sec_mutex;
cfs_time_t imp_sec_expire;
+ /** @} */
+
+ /** Wait queue for those who need to wait for recovery completion */
cfs_waitq_t imp_recovery_waitq;
+ /** Number of requests currently in-flight */
cfs_atomic_t imp_inflight;
+ /** Number of requests currently unregistering */
cfs_atomic_t imp_unregistering;
+ /** Number of replay requests inflight */
cfs_atomic_t imp_replay_inflight;
- cfs_atomic_t imp_inval_count; /* in-progress invalidations */
+ /** Number of currently happening import invalidations */
+ cfs_atomic_t imp_inval_count;
+ /** Numbner of request timeouts */
cfs_atomic_t imp_timeouts;
+ /** Current import state */
enum lustre_imp_state imp_state;
+ /** History of import states */
struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN];
int imp_state_hist_idx;
+ /** Current import generation. Incremented on every reconnect */
int imp_generation;
+ /** Incremented every time we send reconnection request */
__u32 imp_conn_cnt;
+ /**
+ * \see ptlrpc_free_committed remembers imp_generation value here
+ * after a check to save on unnecessary replay list iterations
+ */
int imp_last_generation_checked;
+ /** Last tranno we replayed */
__u64 imp_last_replay_transno;
+ /** Last transno committed on remote side */
__u64 imp_peer_committed_transno;
+ /**
+ * \see ptlrpc_free_committed remembers last_transno since its last
+ * check here and if last_transno did not change since last run of
+ * ptlrpc_free_committed and import generation is the same, we can
+ * skip looking for requests to remove from replay list as optimisation
+ */
__u64 imp_last_transno_checked;
+ /**
+ * Remote export handle. This is how remote side knows what export
+ * we are talking to. Filled from response to connect request
+ */
struct lustre_handle imp_remote_handle;
- cfs_time_t imp_next_ping; /* jiffies */
- __u64 imp_last_success_conn; /* jiffies, 64-bit */
+ /** When to perform next ping. time in jiffies. */
+ cfs_time_t imp_next_ping;
+ /** When we last succesfully connected. time in 64bit jiffies */
+ __u64 imp_last_success_conn;
- /* all available obd_import_conn linked here */
+ /** List of all possible connection for import. */
cfs_list_t imp_conn_list;
+ /**
+ * Current connection. \a imp_connection is imp_conn_current->oic_conn
+ */
struct obd_import_conn *imp_conn_current;
- /* Protects flags, level, generation, conn_cnt, *_list */
+ /** Protects flags, level, generation, conn_cnt, *_list */
cfs_spinlock_t imp_lock;
/* flags */
typedef void (*obd_import_callback)(struct obd_import *imp, void *closure,
int event, void *event_arg, void *cb_data);
+/**
+ * Structure for import observer.
+ * It is possible to register "observer" on an import and every time
+ * something happens to an import (like connect/evict/disconnect)
+ * obderver will get its callback called with event type
+ */
struct obd_import_observer {
cfs_list_t oio_chain;
obd_import_callback oio_cb;
/** @} import */
#endif /* __IMPORT_H */
+
+/** @} obd_import */
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/** \defgroup PtlRPC Portal RPC and networking module.
+ *
+ * PortalRPC is the layer used by rest of lustre code to achieve network
+ * communications: establish connections with corresponding export and import
+ * states, listen for a service, send and receive RPCs.
+ * PortalRPC also includes base recovery framework: packet resending and
+ * replaying, reconnections, pinger.
+ *
+ * PortalRPC utilizes LNet as its transport layer.
+ *
+ * @{
+ */
+
#ifndef _LUSTRE_NET_H
#define _LUSTRE_NET_H
/* MD flags we _always_ use */
#define PTLRPC_MD_OPTIONS 0
-/* Define maxima for bulk I/O
+/**
+ * Define maxima for bulk I/O
* CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
* these limits are system wide and not interface-local. */
#define PTLRPC_MAX_BRW_BITS LNET_MTU_BITS
* buffers */
#define SVC_BUF_VMALLOC_THRESHOLD (2 * CFS_PAGE_SIZE)
-/* The following constants determine how memory is used to buffer incoming
+/**
+ * The following constants determine how memory is used to buffer incoming
* service requests.
*
* ?_NBUFS # buffers to allocate when growing the pool
* Messages larger than ?_MAXREQSIZE are dropped. Request buffers are
* considered full when less than ?_MAXREQSIZE is left in them.
*/
-
#define LDLM_THREADS_AUTO_MIN (2)
#define LDLM_THREADS_AUTO_MAX min_t(unsigned, cfs_num_online_cpus() * \
cfs_num_online_cpus() * 32, 128)
cfs_num_physpages >> (25 - CFS_PAGE_SHIFT)), \
2UL)
-/* Absolute limits */
+/** Absolute limits */
#define MDS_THREADS_MIN 2
#define MDS_THREADS_MAX 512
#define MDS_THREADS_MIN_READPAGE 2
#define MDS_NBUFS (64 * cfs_num_online_cpus())
#define MDS_BUFSIZE (8 * 1024)
-/* Assume file name length = FNAME_MAX = 256 (true for ext3).
+/**
+ * Assume file name length = FNAME_MAX = 256 (true for ext3).
* path name length = PATH_MAX = 4096
* LOV MD size max = EA_MAX = 4000
* symlink: FNAME_MAX + PATH_MAX <- largest
#define MDS_MAXREQSIZE (5 * 1024)
#define MDS_MAXREPSIZE max(9 * 1024, 362 + LOV_MAX_STRIPE_COUNT * 56)
-/* FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + md_fld */
+/** FLD_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + md_fld */
#define FLD_MAXREQSIZE (160)
-/* FLD_MAXREPSIZE == lustre_msg + ptlrpc_body + md_fld */
+/** FLD_MAXREPSIZE == lustre_msg + ptlrpc_body + md_fld */
#define FLD_MAXREPSIZE (152)
-/* SEQ_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + lu_range +
+/**
+ * SEQ_MAXREQSIZE == lustre_msg + __u32 padding + ptlrpc_body + opc + lu_range +
* __u32 padding */
#define SEQ_MAXREQSIZE (160)
-/* SEQ_MAXREPSIZE == lustre_msg + ptlrpc_body + lu_range */
+/** SEQ_MAXREPSIZE == lustre_msg + ptlrpc_body + lu_range */
#define SEQ_MAXREPSIZE (152)
-/* MGS threads must be >= 3, see bug 22458 comment #28 */
+/** MGS threads must be >= 3, see bug 22458 comment #28 */
#define MGS_THREADS_AUTO_MIN 3
#define MGS_THREADS_AUTO_MAX 32
#define MGS_NBUFS (64 * cfs_num_online_cpus())
#define MGS_MAXREQSIZE (7 * 1024)
#define MGS_MAXREPSIZE (9 * 1024)
-/* Absolute limits */
+/** Absolute OSS limits */
#define OSS_THREADS_MIN 3 /* difficult replies, HPQ, others */
#define OSS_THREADS_MAX 512
#define OST_NBUFS (64 * cfs_num_online_cpus())
#define OST_BUFSIZE (8 * 1024)
-/* OST_MAXREQSIZE ~= 4768 bytes =
+
+/**
+ * OST_MAXREQSIZE ~= 4768 bytes =
* lustre_msg + obdo + 16 * obd_ioobj + 256 * niobuf_remote
*
* - single object with 16 pages is 512 bytes
/* Macro to hide a typecast. */
#define ptlrpc_req_async_args(req) ((void *)&req->rq_async_args)
+/**
+ * Structure to single define portal connection.
+ */
struct ptlrpc_connection {
+ /** linkage for connections hash table */
cfs_hlist_node_t c_hash;
+ /** Our own lnet nid for this connection */
lnet_nid_t c_self;
+ /** Remote side nid for this connection */
lnet_process_id_t c_peer;
+ /** UUID of the other side */
struct obd_uuid c_remote_uuid;
+ /** reference counter for this connection */
cfs_atomic_t c_refcount;
};
+/** Client definition for PortalRPC */
struct ptlrpc_client {
+ /** What lnet portal does this client send messages to by default */
__u32 cli_request_portal;
+ /** What portal do we expect replies on */
__u32 cli_reply_portal;
+ /** Name of the client */
char *cli_name;
};
-/* state flags of requests */
+/** state flags of requests */
/* XXX only ones left are those used by the bulk descs as well! */
#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */
#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */
#define REQ_MAX_ACK_LOCKS 8
union ptlrpc_async_args {
- /* Scratchpad for passing args to completion interpreter. Users
+ /**
+ * Scratchpad for passing args to completion interpreter. Users
* cast to the struct of their choosing, and LASSERT that this is
* big enough. For _tons_ of context, OBD_ALLOC a struct and store
* a pointer to it here. The pointer_arg ensures this struct is at
- * least big enough for that. */
+ * least big enough for that.
+ */
void *pointer_arg[11];
__u64 space[6];
};
struct ptlrpc_request_set;
typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
+/**
+ * Definition of request set structure.
+ * Request set is a list of requests (not necessary to the same target) that
+ * once populated with RPCs could be sent in parallel.
+ * There are two kinds of request sets. General purpose and with dedicated
+ * serving thread. Example of the latter is ptlrpcd set.
+ * For general purpose sets once request set started sending it is impossible
+ * to add new requests to such set.
+ * Provides a way to call "completion callbacks" when all requests in the set
+ * returned.
+ */
struct ptlrpc_request_set {
- cfs_atomic_t set_remaining; /* # uncompleted requests */
+ /** number of uncompleted requests */
+ cfs_atomic_t set_remaining;
+ /** wait queue to wait on for request events */
cfs_waitq_t set_waitq;
cfs_waitq_t *set_wakeup_ptr;
+ /** List of requests in the set */
cfs_list_t set_requests;
- cfs_list_t set_cblist; /* list of completion callbacks */
- set_interpreter_func set_interpret; /* completion callback */
- void *set_arg; /* completion context */
- /* locked so that any old caller can communicate requests to
- * the set holder who can then fold them into the lock-free set */
+ /**
+ * List of completion callbacks to be called when the set is completed
+ * This is only used if \a set_interpret is NULL.
+ * Links struct ptlrpc_set_cbdata.
+ */
+ cfs_list_t set_cblist;
+ /** Completion callback, if only one. */
+ set_interpreter_func set_interpret;
+ /** opaq argument passed to completion \a set_interpret callback. */
+ void *set_arg;
+ /**
+ * Lock for \a set_new_requests manipulations
+ * locked so that any old caller can communicate requests to
+ * the set holder who can then fold them into the lock-free set
+ */
cfs_spinlock_t set_new_req_lock;
+ /** List of new yet unsent requests. Only used with ptlrpcd now. */
cfs_list_t set_new_requests;
};
+/**
+ * Description of a single ptrlrpc_set callback
+ */
struct ptlrpc_set_cbdata {
+ /** List linkage item */
cfs_list_t psc_item;
+ /** Pointer to interpreting function */
set_interpreter_func psc_interpret;
+ /** Opaq argument to pass to the callback */
void *psc_data;
};
struct ptlrpc_bulk_desc;
-/*
+/**
* ptlrpc callback & work item stuff
*/
struct ptlrpc_cb_id {
void *cbid_arg; /* additional arg */
};
+/** Maximum number of locks to fit into reply state */
#define RS_MAX_LOCKS 8
#define RS_DEBUG 1
+/**
+ * Structure to define reply state on the server
+ * Reply state holds various reply message information. Also for "difficult"
+ * replies (rep-ack case) we store the state after sending reply and wait
+ * for the client to acknowledge the reception. In these cases locks could be
+ * added to the state for replay/failover consistency guarantees.
+ */
struct ptlrpc_reply_state {
+ /** Callback description */
struct ptlrpc_cb_id rs_cb_id;
+ /** Linkage for list of all reply states in a system */
cfs_list_t rs_list;
+ /** Linkage for list of all reply states on same export */
cfs_list_t rs_exp_list;
+ /** Linkage for list of all reply states for same obd */
cfs_list_t rs_obd_list;
#if RS_DEBUG
cfs_list_t rs_debug_list;
#endif
- /* A spinlock to protect the reply state flags */
+ /** A spinlock to protect the reply state flags */
cfs_spinlock_t rs_lock;
- /* Reply state flags */
+ /** Reply state flags */
unsigned long rs_difficult:1; /* ACK/commit stuff */
unsigned long rs_no_ack:1; /* no ACK, even for
difficult requests */
unsigned long rs_committed:1;/* the transaction was committed
and the rs was dispatched
by ptlrpc_commit_replies */
+ /** Size of the state */
int rs_size;
+ /** opcode */
__u32 rs_opc;
+ /** Transaction number */
__u64 rs_transno;
+ /** xid */
__u64 rs_xid;
struct obd_export *rs_export;
struct ptlrpc_service *rs_service;
+ /** Lnet metadata handle for the reply */
lnet_handle_md_t rs_md_h;
cfs_atomic_t rs_refcount;
+ /** Context for the sevice thread */
struct ptlrpc_svc_ctx *rs_svc_ctx;
+ /** Reply buffer (actually sent to the client), encoded if needed */
struct lustre_msg *rs_repbuf; /* wrapper */
+ /** Size of the reply buffer */
int rs_repbuf_len; /* wrapper buf length */
+ /** Size of the reply message */
int rs_repdata_len; /* wrapper msg length */
+ /**
+ * Actual reply message. Its content is encrupted (if needed) to
+ * produce reply buffer for actual sending. In simple case
+ * of no network encryption we jus set \a rs_repbuf to \a rs_msg
+ */
struct lustre_msg *rs_msg; /* reply message */
- /* locks awaiting client reply ACK */
+ /** Number of locks awaiting client ACK */
int rs_nlocks;
+ /** Handles of locks awaiting client reply ACK */
struct lustre_handle rs_locks[RS_MAX_LOCKS];
+ /** Lock modes of locks in \a rs_locks */
ldlm_mode_t rs_modes[RS_MAX_LOCKS];
};
struct ptlrpc_thread;
+/** RPC stages */
enum rq_phase {
RQ_PHASE_NEW = 0xebc0de00,
RQ_PHASE_RPC = 0xebc0de01,
struct ptlrpc_request *req,
void *arg, int rc);
+/**
+ * Definition of request pool structure.
+ * The pool is used to store empty preallocated requests for the case
+ * when we would actually need to send something without performing
+ * any allocations (to avoid e.g. OOM).
+ */
struct ptlrpc_request_pool {
+ /** Locks the list */
cfs_spinlock_t prp_lock;
- cfs_list_t prp_req_list; /* list of ptlrpc_request structs */
+ /** list of ptlrpc_request structs */
+ cfs_list_t prp_req_list;
+ /** Maximum message size that would fit into a rquest from this pool */
int prp_rq_size;
+ /** Function to allocate more requests for this pool */
void (*prp_populate)(struct ptlrpc_request_pool *, int);
};
struct ldlm_lock;
+/**
+ * Basic request prioritization operations structure.
+ * The whole idea is centered around locks and RPCs that might affect locks.
+ * When a lock is contended we try to give priority to RPCs that might lead
+ * to fastest release of that lock.
+ * Currently only implemented for OSTs only in a way that makes all
+ * IO and truncate RPCs that are coming from a locked region where a lock is
+ * contended a priority over other requests.
+ */
struct ptlrpc_hpreq_ops {
/**
* Check if the lock handle of the given lock is the same as
/**
* Represents remote procedure call.
+ *
+ * This is a staple structure used by everybody wanting to send a request
+ * in Lustre.
*/
struct ptlrpc_request {
- int rq_type; /* one of PTL_RPC_MSG_* */
+ /* Request type: one of PTL_RPC_MSG_* */
+ int rq_type;
+ /**
+ * Linkage item through which this request is included into
+ * sending/delayed lists on client and into rqbd list on server
+ */
cfs_list_t rq_list;
- cfs_list_t rq_timed_list; /* server-side early replies */
- cfs_list_t rq_history_list; /* server-side history */
- cfs_list_t rq_exp_list; /* server-side per-export list */
- struct ptlrpc_hpreq_ops *rq_ops; /* server-side hp handlers */
- __u64 rq_history_seq; /* history sequence # */
- /* the index of service's srv_at_array into which request is linked */
+ /**
+ * Server side list of incoming unserved requests sorted by arrival
+ * time. Traversed from time to time to notice about to expire
+ * requests and sent back "early replies" to clients to let them
+ * know server is alive and well, just very busy to service their
+ * requests in time
+ */
+ cfs_list_t rq_timed_list;
+ /** server-side history, used for debuging purposes. */
+ cfs_list_t rq_history_list;
+ /** server-side per-export list */
+ cfs_list_t rq_exp_list;
+ /** server-side hp handlers */
+ struct ptlrpc_hpreq_ops *rq_ops;
+ /** history sequence # */
+ __u64 rq_history_seq;
+ /** the index of service's srv_at_array into which request is linked */
time_t rq_at_index;
+ /** Result of request processing */
int rq_status;
+ /** Lock to protect request flags and some other important bits, like
+ * rq_list
+ */
cfs_spinlock_t rq_lock;
- /* client-side flags are serialized by rq_lock */
+ /** client-side flags are serialized by rq_lock */
unsigned long rq_intr:1, rq_replied:1, rq_err:1,
rq_timedout:1, rq_resend:1, rq_restart:1,
- /*
+ /**
* when ->rq_replay is set, request is kept by the client even
* after server commits corresponding transaction. This is
* used for operations that require sequence of multiple
cfs_atomic_t rq_refcount;/* client-side refcount for SENT race,
server-side refcounf for multiple replies */
- struct ptlrpc_thread *rq_svc_thread; /* initial thread servicing req */
+ /** initial thread servicing this request */
+ struct ptlrpc_thread *rq_svc_thread;
+ /** Portal to which this request would be sent */
int rq_request_portal; /* XXX FIXME bug 249 */
+ /** Portal where to wait for reply and where reply would be sent */
int rq_reply_portal; /* XXX FIXME bug 249 */
- int rq_nob_received; /* client-side:
- * !rq_truncate : # reply bytes actually received,
- * rq_truncate : required repbuf_len for resend */
+ /**
+ * client-side:
+ * !rq_truncate : # reply bytes actually received,
+ * rq_truncate : required repbuf_len for resend
+ */
+ int rq_nob_received;
+ /** Request length */
int rq_reqlen;
+ /** Request message - what client sent */
struct lustre_msg *rq_reqmsg;
+ /** Reply length */
int rq_replen;
+ /** Reply message - server response */
struct lustre_msg *rq_repmsg;
+ /** Transaction number */
__u64 rq_transno;
+ /** xid */
__u64 rq_xid;
+ /**
+ * List item to for replay list. Not yet commited requests get linked
+ * there.
+ * Also see \a rq_replay comment above.
+ */
cfs_list_t rq_replay_list;
+ /**
+ * security and encryption data
+ * @{ */
struct ptlrpc_cli_ctx *rq_cli_ctx; /* client's half ctx */
struct ptlrpc_svc_ctx *rq_svc_ctx; /* server's half ctx */
cfs_list_t rq_ctx_chain; /* link to waited ctx */
/* (server side), pointed directly into req buffer */
struct ptlrpc_user_desc *rq_user_desc;
- /* early replies go to offset 0, regular replies go after that */
+ /** @} */
+
+ /** early replies go to offset 0, regular replies go after that */
unsigned int rq_reply_off;
/* various buffer pointers */
int rq_clrbuf_len; /* only in priv mode */
int rq_clrdata_len; /* only in priv mode */
+ /** Fields that help to see if request and reply were swabbed or not */
__u32 rq_req_swab_mask;
__u32 rq_rep_swab_mask;
+ /** What was import generation when this request was sent */
int rq_import_generation;
enum lustre_imp_state rq_send_state;
- int rq_early_count; /* how many early replies (for stats) */
+ /** how many early replies (for stats) */
+ int rq_early_count;
- /* client+server request */
+ /** client+server request */
lnet_handle_md_t rq_req_md_h;
struct ptlrpc_cb_id rq_req_cbid;
- cfs_duration_t rq_delay_limit; /* optional time limit for send attempts */
- cfs_time_t rq_queued_time; /* time request was first queued */
+ /** optional time limit for send attempts */
+ cfs_duration_t rq_delay_limit;
+ /** time request was first queued */
+ cfs_time_t rq_queued_time;
/* server-side... */
- struct timeval rq_arrival_time; /* request arrival time */
- struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */
- struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/
+ /** request arrival time */
+ struct timeval rq_arrival_time;
+ /** separated reply state */
+ struct ptlrpc_reply_state *rq_reply_state;
+ /** incoming request buffer */
+ struct ptlrpc_request_buffer_desc *rq_rqbd;
#ifdef CRAY_XT3
__u32 rq_uid; /* peer uid, used in MDS only */
#endif
- /* client-only incoming reply */
+ /** client-only incoming reply */
lnet_handle_md_t rq_reply_md_h;
cfs_waitq_t rq_reply_waitq;
struct ptlrpc_cb_id rq_reply_cbid;
+ /** our LNet NID */
lnet_nid_t rq_self;
+ /** Peer description (the other side) */
lnet_process_id_t rq_peer;
+ /** Server-side, export on which request was received */
struct obd_export *rq_export;
+ /** Client side, import where request is being sent */
struct obd_import *rq_import;
+ /** Replay callback, called after request is replayed at recovery */
void (*rq_replay_cb)(struct ptlrpc_request *);
+ /**
+ * Commit callback, called when request is committed and about to be
+ * freed.
+ */
void (*rq_commit_cb)(struct ptlrpc_request *);
+ /** Opaq data for replay and commit callbacks. */
void *rq_cb_data;
- struct ptlrpc_bulk_desc *rq_bulk;/* client side bulk */
-
- /* client outgoing req */
- time_t rq_sent; /* when request/reply sent (secs), or
- * time when request should be sent */
+ /** For bulk requests on client only: bulk descriptor */
+ struct ptlrpc_bulk_desc *rq_bulk;
- volatile time_t rq_deadline; /* when request must finish. volatile
- so that servers' early reply updates to the deadline aren't
- kept in per-cpu cache */
- time_t rq_reply_deadline; /* when req reply unlink must finish. */
- time_t rq_bulk_deadline; /* when req bulk unlink must finish. */
- int rq_timeout; /* service time estimate (secs) */
+ /** client outgoing req */
+ /**
+ * when request/reply sent (secs), or time when request should be sent
+ */
+ time_t rq_sent;
+
+ /** when request must finish. volatile
+ * so that servers' early reply updates to the deadline aren't
+ * kept in per-cpu cache */
+ volatile time_t rq_deadline;
+ /** when req reply unlink must finish. */
+ time_t rq_reply_deadline;
+ /** when req bulk unlink must finish. */
+ time_t rq_bulk_deadline;
+ /**
+ * service time estimate (secs)
+ * If the requestsis not served by this time, it is marked as timed out.
+ */
+ int rq_timeout;
- /* Multi-rpc bits */
+ /** Multi-rpc bits */
+ /** Link item for request set lists */
cfs_list_t rq_set_chain;
+ /** Per-request waitq introduced by bug 21938 for recovery waiting */
cfs_waitq_t rq_set_waitq;
+ /** Link back to the request set */
struct ptlrpc_request_set *rq_set;
- /** Async completion handler */
+ /** Async completion handler, called when reply is received */
ptlrpc_interpterer_t rq_interpret_reply;
- union ptlrpc_async_args rq_async_args; /* Async completion context */
- struct ptlrpc_request_pool *rq_pool; /* Pool if request from
- preallocated list */
+ /** Async completion context */
+ union ptlrpc_async_args rq_async_args;
+
+ /** Pool if request is from preallocated list */
+ struct ptlrpc_request_pool *rq_pool;
+
struct lu_context rq_session;
struct lu_context rq_recov_session;
- /* request format */
+ /** request format description */
struct req_capsule rq_pill;
};
+/**
+ * Call completion handler for rpc if any, return it's status or original
+ * rc if there was no handler defined for this request.
+ */
static inline int ptlrpc_req_interpret(const struct lu_env *env,
struct ptlrpc_request *req, int rc)
{
return rc;
}
+/**
+ * Returns 1 if request buffer at offset \a index was already swabbed
+ */
static inline int lustre_req_swabbed(struct ptlrpc_request *req, int index)
{
LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
return req->rq_req_swab_mask & (1 << index);
}
+/**
+ * Returns 1 if request reply buffer at offset \a index was already swabbed
+ */
static inline int lustre_rep_swabbed(struct ptlrpc_request *req, int index)
{
LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
return req->rq_rep_swab_mask & (1 << index);
}
+/**
+ * Returns 1 if request needs to be swabbed into local cpu byteorder
+ */
static inline int ptlrpc_req_need_swab(struct ptlrpc_request *req)
{
return lustre_req_swabbed(req, MSG_PTLRPC_HEADER_OFF);
}
+/**
+ * Returns 1 if request reply needs to be swabbed into local cpu byteorder
+ */
static inline int ptlrpc_rep_need_swab(struct ptlrpc_request *req)
{
return lustre_rep_swabbed(req, MSG_PTLRPC_HEADER_OFF);
}
+/**
+ * Mark request buffer at offset \a index that it was already swabbed
+ */
static inline void lustre_set_req_swabbed(struct ptlrpc_request *req, int index)
{
LASSERT(index < sizeof(req->rq_req_swab_mask) * 8);
req->rq_req_swab_mask |= 1 << index;
}
+/**
+ * Mark request reply buffer at offset \a index that it was already swabbed
+ */
static inline void lustre_set_rep_swabbed(struct ptlrpc_request *req, int index)
{
LASSERT(index < sizeof(req->rq_rep_swab_mask) * 8);
req->rq_rep_swab_mask |= 1 << index;
}
+/**
+ * Convert numerical request phase value \a phase into text string description
+ */
static inline const char *
ptlrpc_phase2str(enum rq_phase phase)
{
}
}
+/**
+ * Convert numerical request phase of the request \a req into text stringi
+ * description
+ */
static inline const char *
ptlrpc_rqphase2str(struct ptlrpc_request *req)
{
return ptlrpc_phase2str(req->rq_phase);
}
+/**
+ * Debugging functions and helpers to print request structure into debug log
+ * @{
+ */
/* Spare the preprocessor, spoil the bugs. */
#define FLAG(field, str) (field ? str : "")
+/** Convert bit flags into a string */
#define DEBUG_REQ_FLAGS(req) \
ptlrpc_rqphase2str(req), \
FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
struct libcfs_debug_msg_data *data, const char *fmt, ...)
__attribute__ ((format (printf, 4, 5)));
+/**
+ * Helper that decides if we need to print request accordig to current debug
+ * level settings
+ */
#define debug_req(cdls, level, req, file, func, line, fmt, a...) \
do { \
CFS_CHECK_STACK(); \
} \
} while(0)
-/* for most callers (level is a constant) this is resolved at compile time */
+/**
+ * This is the debug print function you need to use to print request sturucture
+ * content into lustre debug log.
+ * for most callers (level is a constant) this is resolved at compile time */
#define DEBUG_REQ(level, req, fmt, args...) \
do { \
if ((level) & (D_ERROR | D_WARNING)) { \
debug_req(NULL, level, req, __FILE__, __func__, __LINE__, \
"@@@ "fmt" ", ## args); \
} while (0)
+/** @} */
+/**
+ * Structure that defines a single page of a bulk transfer
+ */
struct ptlrpc_bulk_page {
+ /** Linkage to list of pages in a bulk */
cfs_list_t bp_link;
+ /**
+ * Number of bytes in a page to transfer starting from \a bp_pageoffset
+ */
int bp_buflen;
- int bp_pageoffset; /* offset within a page */
+ /** offset within a page */
+ int bp_pageoffset;
+ /** The page itself */
struct page *bp_page;
};
#define BULK_GET_SINK 2
#define BULK_PUT_SOURCE 3
+/**
+ * Definition of buk descriptor.
+ * Bulks are special "Two phase" RPCs where initial request message
+ * is sent first and it is followed bt a transfer (o receiving) of a large
+ * amount of data to be settled into pages referenced from the bulk descriptors.
+ * Bulks transfers (the actual data following the small requests) are done
+ * on separate LNet portals.
+ * In lustre we use bulk transfers for READ and WRITE transfers from/to OSTs.
+ * Another user is readpage for MDT.
+ */
struct ptlrpc_bulk_desc {
- unsigned long bd_success:1; /* completed successfully */
- unsigned long bd_network_rw:1; /* accessible to the network */
- unsigned long bd_type:2; /* {put,get}{source,sink} */
- unsigned long bd_registered:1; /* client side */
- cfs_spinlock_t bd_lock; /* serialise with callback */
+ /** completed successfully */
+ unsigned long bd_success:1;
+ /** accessible to the network (network io potentially in progress) */
+ unsigned long bd_network_rw:1;
+ /** {put,get}{source,sink} */
+ unsigned long bd_type:2;
+ /** client side */
+ unsigned long bd_registered:1;
+ /** For serialization with callback */
+ cfs_spinlock_t bd_lock;
+ /** Import generation when request for this bulk was sent */
int bd_import_generation;
+ /** Server side - export this bulk created for */
struct obd_export *bd_export;
+ /** Client side - import this bulk was sent on */
struct obd_import *bd_import;
+ /** LNet portal for this bulk */
__u32 bd_portal;
- struct ptlrpc_request *bd_req; /* associated request */
+ /** Back pointer to the request */
+ struct ptlrpc_request *bd_req;
cfs_waitq_t bd_waitq; /* server side only WQ */
int bd_iov_count; /* # entries in bd_iov */
int bd_max_iov; /* allocated size of bd_iov */
#endif
};
+/**
+ * Definition of server service thread structure
+ */
struct ptlrpc_thread {
/**
- * active threads in svc->srv_threads
+ * List of active threads in svc->srv_threads
*/
cfs_list_t t_link;
/**
struct lu_env *t_env;
};
+/**
+ * Request buffer descriptor structure.
+ * This is a structure that contains one posted request buffer for service.
+ * Once data land into a buffer, event callback creates actual request and
+ * notifies wakes one of the service threads to process new incoming request.
+ * More than one request can fit into the buffer.
+ */
struct ptlrpc_request_buffer_desc {
+ /** Link item for rqbds on a service */
cfs_list_t rqbd_list;
+ /** History of requests for this buffer */
cfs_list_t rqbd_reqs;
+ /** Back pointer to service for which this buffer is registered */
struct ptlrpc_service *rqbd_service;
+ /** LNet descriptor */
lnet_handle_md_t rqbd_md_h;
int rqbd_refcount;
+ /** The buffer itself */
char *rqbd_buffer;
struct ptlrpc_cb_id rqbd_cbid;
+ /**
+ * This "embedded" request structure is only used for the
+ * last request to fit into the buffer
+ */
struct ptlrpc_request rqbd_req;
};
typedef void (*svcreq_printfn_t)(void *, struct ptlrpc_request *);
typedef int (*svc_hpreq_handler_t)(struct ptlrpc_request *);
+/**
+ * How many high priority requests to serve before serving one normal
+ * priority request
+ */
#define PTLRPC_SVC_HP_RATIO 10
+/**
+ * Definition of PortalRPC service.
+ * The service is listening on a particular portal (like tcp port)
+ * and perform actions for a specific server like IO service for OST
+ * or general metadata service for MDS.
+ */
struct ptlrpc_service {
cfs_list_t srv_list; /* chain thru all services */
int srv_max_req_size; /* biggest request to receive */
unsigned srv_is_stopping:1; /* under unregister_service */
cfs_time_t srv_at_checktime; /* debug */
+ /** Local portal on which to receive requests */
__u32 srv_req_portal;
+ /** Portal on the client to send replies to */
__u32 srv_rep_portal;
- /* AT stuff */
+ /** AT stuff */
+ /** @{ */
struct adaptive_timeout srv_at_estimate;/* estimated rpc service time */
cfs_spinlock_t srv_at_lock;
struct ptlrpc_at_array srv_at_array; /* reqs waiting for replies */
cfs_timer_t srv_at_timer; /* early reply timer */
+ /** @} */
int srv_n_queued_reqs; /* # reqs in either of the queues below */
int srv_hpreq_count; /* # hp requests handled */
* difficult reply has to be handled. */
cfs_list_t srv_threads; /* service thread list */
+ /** Handler function for incoming requests for this service */
svc_handler_t srv_handler;
svc_hpreq_handler_t srv_hpreq_handler; /* hp request handler */
cfs_spinlock_t srv_lock;
+ /** Root of /proc dir tree for this service */
cfs_proc_dir_entry_t *srv_procroot;
+ /** Pointer to statistic data for this service */
struct lprocfs_stats *srv_stats;
- /* List of free reply_states */
+ /** List of free reply_states */
cfs_list_t srv_free_rs_list;
- /* waitq to run, when adding stuff to srv_free_rs_list */
+ /** waitq to run, when adding stuff to srv_free_rs_list */
cfs_waitq_t srv_free_rs_waitq;
- /*
+ /**
* Tags for lu_context associated with this thread, see struct
* lu_context.
*/
__u32 srv_ctx_tags;
- /*
+ /**
* if non-NULL called during thread creation (ptlrpc_start_thread())
* to initialize service specific per-thread state.
*/
int (*srv_init)(struct ptlrpc_thread *thread);
- /*
+ /**
* if non-NULL called during thread shutdown (ptlrpc_main()) to
* destruct state created by ->srv_init().
*/
//struct ptlrpc_srv_ni srv_interfaces[0];
};
+/**
+ * Declaration of ptlrpcd control structure
+ */
struct ptlrpcd_ctl {
/**
* Ptlrpc thread control flags (LIOD_START, LIOD_STOP, LIOD_FORCE)
extern lnet_handle_eq_t ptlrpc_eq_h;
extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
lnet_process_id_t *peer, lnet_nid_t *self);
+/**
+ * These callbacks are invoked by LNet when something happened to
+ * underlying buffer
+ * @{
+ */
extern void request_out_callback (lnet_event_t *ev);
extern void reply_in_callback(lnet_event_t *ev);
extern void client_bulk_callback (lnet_event_t *ev);
extern void request_in_callback(lnet_event_t *ev);
extern void reply_out_callback(lnet_event_t *ev);
extern void server_bulk_callback (lnet_event_t *ev);
+/** @} */
/* ptlrpc/connection.c */
struct ptlrpc_connection *ptlrpc_connection_get(lnet_process_id_t peer,
extern lnet_pid_t ptl_get_pid(void);
/* ptlrpc/niobuf.c */
+/**
+ * Actual interfacing with LNet to put/get/register/unregister stuff
+ * @{
+ */
int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc);
void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc);
int ptlrpc_register_bulk(struct ptlrpc_request *req);
int ptlrpc_at_get_net_latency(struct ptlrpc_request *req);
int ptl_send_rpc(struct ptlrpc_request *request, int noreply);
int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd);
+/** @} */
/* ptlrpc/client.c */
+/**
+ * Client-side portals API. Everything to send requests, receive replies,
+ * request queues, request management, etc.
+ * @{
+ */
void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
struct ptlrpc_client *);
void ptlrpc_cleanup_client(struct obd_import *imp);
__u64 ptlrpc_sample_next_xid(void);
__u64 ptlrpc_req_xid(struct ptlrpc_request *request);
+/** @} */
+
struct ptlrpc_service_conf {
int psc_nbufs;
int psc_bufsize;
};
/* ptlrpc/service.c */
+/**
+ * Server-side services API. Register/unregister service, request state
+ * management, service thread management
+ *
+ * @{
+ */
void ptlrpc_save_lock (struct ptlrpc_request *req,
struct lustre_handle *lock, int mode, int no_ack);
void ptlrpc_commit_replies(struct obd_export *exp);
struct ptlrpc_thread *thread;
struct obd_device *dev;
};
+/** @} */
/* ptlrpc/import.c */
+/**
+ * Import API
+ * @{
+ */
int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid);
int ptlrpc_init_import(struct obd_import *imp);
int ptlrpc_disconnect_import(struct obd_import *imp, int noclose);
/* ptlrpc/pack_generic.c */
int ptlrpc_reconnect_import(struct obd_import *imp);
+/** @} */
-/** ptlrpc mgs buffer swab interface */
+/**
+ * ptlrpc msg buffer and swab interface
+ *
+ * @{
+ */
int ptlrpc_buf_need_swab(struct ptlrpc_request *req, const int inout,
int index);
void ptlrpc_buf_set_swabbed(struct ptlrpc_request *req, const int inout,
req->rq_replen = lustre_shrink_msg(req->rq_repmsg, segment,
newlen, move_data);
}
+/** @} */
+/** Change request phase of \a req to \a new_phase */
static inline void
ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
{
req->rq_phase = new_phase;
}
+/**
+ * Returns true if request \a req got early reply and hard deadline is not met
+ */
static inline int
ptlrpc_client_early(struct ptlrpc_request *req)
{
return req->rq_early;
}
+/**
+ * Returns true if we got real reply from server for this request
+ */
static inline int
ptlrpc_client_replied(struct ptlrpc_request *req)
{
return req->rq_replied;
}
+/** Returns true if request \a req is in process of receiving server reply */
static inline int
ptlrpc_client_recv(struct ptlrpc_request *req)
{
}
/* ldlm/ldlm_lib.c */
+/**
+ * Target client logic
+ * @{
+ */
int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg);
int client_obd_cleanup(struct obd_device *obddev);
int client_connect_import(const struct lu_env *env,
int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
void client_destroy_import(struct obd_import *imp);
+/** @} */
int server_disconnect_export(struct obd_export *exp);
/* ptlrpc/pinger.c */
+/**
+ * Pinger API (client side only)
+ * @{
+ */
enum timeout_event {
TIMEOUT_GRANT = 1
};
#define ping_evictor_stop() do {} while (0)
#endif
int ptlrpc_check_and_wait_suspend(struct ptlrpc_request *req);
+/** @} */
/* ptlrpc/ptlrpcd.c */
void ptlrpcd_decref(void);
/* ptlrpc/lproc_ptlrpc.c */
+/**
+ * procfs output related functions
+ * @{
+ */
const char* ll_opcode2str(__u32 opcode);
#ifdef LPROCFS
void ptlrpc_lprocfs_register_obd(struct obd_device *obd);
static inline void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) {}
static inline void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) {}
#endif
+/** @} */
/* ptlrpc/llog_server.c */
int llog_origin_handle_create(struct ptlrpc_request *req);
/** @} net */
#endif
+/** @} PtlRPC */
* Lustre is a trademark of Sun Microsystems, Inc.
*/
+/** Implementation of client-side PortalRPC interfaces */
+
#define DEBUG_SUBSYSTEM S_RPC
#ifndef __KERNEL__
#include <errno.h>
#include "ptlrpc_internal.h"
+/**
+ * Initialize passed in client structure \a cl.
+ */
void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
struct ptlrpc_client *cl)
{
cl->cli_name = name;
}
+/**
+ * Return PortalRPC connection for remore uud \a uuid
+ */
struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid)
{
struct ptlrpc_connection *c;
return c;
}
+/**
+ * Allocate and initialize new bulk descriptor
+ * Returns pointer to the descriptor or NULL on error.
+ */
static inline struct ptlrpc_bulk_desc *new_bulk(int npages, int type, int portal)
{
struct ptlrpc_bulk_desc *desc;
return desc;
}
+/**
+ * Prepare bulk descriptor for specified outgoing request \a req that
+ * can fit \a npages * pages. \a type is bulk type. \a portal is where
+ * the bulk to be sent. Used on client-side.
+ * Returns pointer to newly allocatrd initialized bulk descriptor or NULL on
+ * error.
+ */
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req,
int npages, int type, int portal)
{
return desc;
}
+/**
+ * Prepare bulk descriptor for specified incoming request \a req that
+ * can fit \a npages * pages. \a type is bulk type. \a portal is where
+ * the bulk to be sent. Used on server-side after request was already
+ * received.
+ * Returns pointer to newly allocatrd initialized bulk descriptor or NULL on
+ * error.
+ */
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp(struct ptlrpc_request *req,
int npages, int type, int portal)
{
return desc;
}
+/**
+ * Add a page \a page to the bulk descriptor \a desc.
+ * Data to transfer in the page starts at offset \a pageoffset and
+ * amount of data to transfer from the page is \a len
+ */
void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
cfs_page_t *page, int pageoffset, int len)
{
ptlrpc_add_bulk_page(desc, page, pageoffset, len);
}
+/**
+ * Uninitialize and free bulk descriptor \a desc.
+ * Works on bulk descriptors both from server and client side.
+ */
void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
{
ENTRY;
EXIT;
}
-/* Set server timelimit for this req */
+/**
+ * Set server timelimit for this req, i.e. how long are we willing to wait
+ * for reply before timing out this request.
+ */
void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
{
__u32 serv_est;
if (AT_OFF) {
/* non-AT settings */
+ /**
+ * \a imp_server_timeout means this is reverse import and
+ * we send (currently only) ASTs to the client and cannot afford
+ * to wait too long for the reply, otherwise the other client
+ * (because of which we are sending this request) would
+ * timeout waiting for us
+ */
req->rq_timeout = req->rq_import->imp_server_timeout ?
obd_timeout / 2 : obd_timeout;
} else {
return 0;
}
-/*
+/**
* Handle an early reply message, called with the rq_lock held.
* If anything goes wrong just ignore it - same as if it never happened
*/
RETURN(rc);
}
+/**
+ * Wind down request pool \a pool.
+ * Frees all requests from the pool too
+ */
void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool)
{
cfs_list_t *l, *tmp;
OBD_FREE(pool, sizeof(*pool));
}
+/**
+ * Allocates, initializes and adds \a num_rq requests to the pool \a pool
+ */
void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
{
int i;
return;
}
+/**
+ * Create and initialize new request pool with given attributes:
+ * \a num_rq - initial number of requests to create for the pool
+ * \a msgsize - maximum message size possible for requests in thid pool
+ * \a populate_pool - function to be called when more requests need to be added
+ * to the pool
+ * Returns pointer to newly created pool or NULL on error.
+ */
struct ptlrpc_request_pool *
ptlrpc_init_rq_pool(int num_rq, int msgsize,
void (*populate_pool)(struct ptlrpc_request_pool *, int))
return pool;
}
+/**
+ * Fetches one request from pool \a pool
+ */
static struct ptlrpc_request *
ptlrpc_prep_req_from_pool(struct ptlrpc_request_pool *pool)
{
return request;
}
+/**
+ * Returns freed \a request to pool.
+ */
static void __ptlrpc_free_req_to_pool(struct ptlrpc_request *request)
{
struct ptlrpc_request_pool *pool = request->rq_pool;
}
EXPORT_SYMBOL(ptlrpc_request_bufs_pack);
+/**
+ * Pack request buffers for network transfer, performing necessary encryption
+ * steps if necessary.
+ */
int ptlrpc_request_pack(struct ptlrpc_request *request,
__u32 version, int opcode)
{
return ptlrpc_request_bufs_pack(request, version, opcode, NULL, NULL);
}
+/**
+ * Helper function to allocate new request on import \a imp
+ * and possibly using existing request from pool \a pool if provided.
+ * Returns allocated request structure with import field filled or
+ * NULL on error.
+ */
static inline
struct ptlrpc_request *__ptlrpc_request_alloc(struct obd_import *imp,
struct ptlrpc_request_pool *pool)
return request;
}
+/**
+ * Helper function for creating a request.
+ * Calls __ptlrpc_request_alloc to allocate new request sturcture and inits
+ * buffer structures according to capsule template \a format.
+ * Returns allocated request structure pointer or NULL on error.
+ */
static struct ptlrpc_request *
ptlrpc_request_alloc_internal(struct obd_import *imp,
struct ptlrpc_request_pool * pool,
return request;
}
+/**
+ * Allocate new request structure for import \a imp and initialize its
+ * buffer structure according to capsule template \a format.
+ */
struct ptlrpc_request *ptlrpc_request_alloc(struct obd_import *imp,
const struct req_format *format)
{
return ptlrpc_request_alloc_internal(imp, NULL, format);
}
+/**
+ * Allocate new request structure for import \a imp from pool \a pool and
+ * initialize its buffer structure according to capsule template \a format.
+ */
struct ptlrpc_request *ptlrpc_request_alloc_pool(struct obd_import *imp,
struct ptlrpc_request_pool * pool,
const struct req_format *format)
return ptlrpc_request_alloc_internal(imp, pool, format);
}
+/**
+ * For requests not from pool, free memory of the request structure.
+ * For requests obtained from a pool earlier, return request back to pool.
+ */
void ptlrpc_request_free(struct ptlrpc_request *request)
{
if (request->rq_pool)
OBD_FREE_PTR(request);
}
+/**
+ * Allocate new request for operatione \a opcode and immediatelly pack it for
+ * network transfer.
+ * Only used for simple requests like OBD_PING where the only important
+ * part of the request is operation itself.
+ * Returns allocated request or NULL on error.
+ */
struct ptlrpc_request *ptlrpc_request_alloc_pack(struct obd_import *imp,
const struct req_format *format,
__u32 version, int opcode)
return req;
}
+/**
+ * Prepare request (fetched from pool \a poolif not NULL) on import \a imp
+ * for operation \a opcode. Request would contain \a count buffers.
+ * Sizes of buffers are described in array \a lengths and buffers themselves
+ * are provided by a pointer \a bufs.
+ * Returns prepared request structure pointer or NULL on error.
+ */
struct ptlrpc_request *
ptlrpc_prep_req_pool(struct obd_import *imp,
__u32 version, int opcode,
return request;
}
+/**
+ * Same as ptlrpc_prep_req_pool, but without pool
+ */
struct ptlrpc_request *
ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, int count,
__u32 *lengths, char **bufs)
NULL);
}
+/**
+ * Allocate "fake" request that would not be sent anywhere in the end.
+ * Only used as a hack because we have no other way of performing
+ * async actions in lustre between layers.
+ * Used on MDS to request object preallocations from more than one OST at a
+ * time.
+ */
struct ptlrpc_request *ptlrpc_prep_fakereq(struct obd_import *imp,
unsigned int timeout,
ptlrpc_interpterer_t interpreter)
RETURN(request);
}
+/**
+ * Indicate that processing of "fake" request is finished.
+ */
void ptlrpc_fakereq_finished(struct ptlrpc_request *req)
{
/* if we kill request before timeout - need adjust counter */
cfs_list_del_init(&req->rq_list);
}
-
+/**
+ * Allocate and initialize new request set structure.
+ * Returns a pointer to the newly allocated set structure or NULL on error.
+ */
struct ptlrpc_request_set *ptlrpc_prep_set(void)
{
struct ptlrpc_request_set *set;
RETURN(set);
}
-/* Finish with this set; opposite of prep_set. */
+/**
+ * Wind down and free request set structure previously allocated with
+ * ptlrpc_prep_set.
+ * Ensures that all requests on the set have completed and removes
+ * all requests from the request list in a set.
+ * If any unsent request happen to be on the list, pretends that they got
+ * an error in flight and calls their completion handler.
+ */
void ptlrpc_set_destroy(struct ptlrpc_request_set *set)
{
cfs_list_t *tmp;
EXIT;
}
+/**
+ * Add a callback function \a fn to the set.
+ * This function would be called when all requests on this set are completed.
+ * The function will be passed \a data argument.
+ */
int ptlrpc_set_add_cb(struct ptlrpc_request_set *set,
set_interpreter_func fn, void *data)
{
RETURN(0);
}
+/**
+ * Add a new request to the general purpose request set.
+ * Assumes request reference from the caller.
+ */
void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
struct ptlrpc_request *req)
{
}
/**
- * Lock so many callers can add things, the context that owns the set
- * is supposed to notice these and move them into the set proper.
+ * Add a request to a request with dedicated server thread
+ * and wake the thread to make any necessary processing.
+ * Currently only used for ptlrpcd.
+ * Returns 0 if succesful or non zero error code on error.
+ * (the only possible error for now is if the dedicated server thread
+ * is shutting down)
*/
int ptlrpc_set_add_new_req(struct ptlrpcd_ctl *pc,
struct ptlrpc_request *req)
return 0;
}
-/*
+/**
* Based on the current state of the import, determine if the request
* can be sent, is an error, or should be delayed.
*
RETURN(delay);
}
-/* Conditionally suppress specific console messages */
+/**
+ * Decide if the eror message regarding provided request \a req
+ * should be printed to the console or not.
+ * Makes it's decision on request status and other properties.
+ * Returns 1 to print error on the system console or 0 if not.
+ */
static int ptlrpc_console_allow(struct ptlrpc_request *req)
{
__u32 opc = lustre_msg_get_opc(req->rq_reqmsg);
return 1;
}
+/**
+ * Check request processing status.
+ * Returns the status.
+ */
static int ptlrpc_check_status(struct ptlrpc_request *req)
{
int err;
}
/**
- * save pre-versions for replay
+ * save pre-versions of objects into request for replay.
+ * Versions are obtained from server reply.
+ * used for VBR.
*/
static void ptlrpc_save_versions(struct ptlrpc_request *req)
{
/**
* Callback function called when client receives RPC reply for \a req.
+ * Returns 0 on success or error code.
+ * The return alue would be assigned to req->rq_status by the caller
+ * as request processing status.
+ * This function also decides if the request needs to be saved for later replay.
*/
static int after_reply(struct ptlrpc_request *req)
{
RETURN(rc);
}
+/**
+ * Helper function to send request \a req over the network for the first time
+ * Also adjusts request phase.
+ * Returns 0 on success or error code.
+ */
static int ptlrpc_send_new_req(struct ptlrpc_request *req)
{
struct obd_import *imp;
RETURN(0);
}
-/* this sends any unsent RPCs in @set and returns TRUE if all are sent */
+/**
+ * this sends any unsent RPCs in \a set and returns 1 if all are sent
+ * and no more replies are expected.
+ * (it is possible to get less replies than requests sent e.g. due to timed out
+ * requests or requests that we had trouble to send out)
+ */
int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
{
cfs_list_t *tmp;
RETURN(cfs_atomic_read(&set->set_remaining) == 0 || force_timer_recalc);
}
-/* Return 1 if we should give up, else 0 */
+/**
+ * Time out request \a req. is \a async_unlink is set, that means do not wait
+ * until LNet actually confirms network buffer unlinking.
+ * Return 1 if we should give up further retrying attempts or 0 otherwise.
+ */
int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink)
{
struct obd_import *imp = req->rq_import;
RETURN(rc);
}
+/**
+ * Time out all uncompleted requests in request set pointed by \a data
+ * Callback used when waiting on sets with l_wait_event.
+ * Always returns 1.
+ */
int ptlrpc_expired_set(void *data)
{
struct ptlrpc_request_set *set = data;
RETURN(1);
}
+/**
+ * Sets rq_intr flag in \a req under spinlock.
+ */
void ptlrpc_mark_interrupted(struct ptlrpc_request *req)
{
cfs_spin_lock(&req->rq_lock);
cfs_spin_unlock(&req->rq_lock);
}
+/**
+ * Interrupts (sets interrupted flag) all uncompleted requests in
+ * a set \a data. Callback for l_wait_event for interruptible waits.
+ */
void ptlrpc_interrupted_set(void *data)
{
struct ptlrpc_request_set *set = data;
RETURN(timeout);
}
+/**
+ * Send all unset request from the set and then wait untill all
+ * requests in the set complete (either get a reply, timeout, get an
+ * error or otherwise be interrupted).
+ * Returns 0 on success or error code otherwise.
+ */
int ptlrpc_set_wait(struct ptlrpc_request_set *set)
{
cfs_list_t *tmp;
RETURN(rc);
}
+/**
+ * Helper fuction for request freeing.
+ * Called when request count reached zero and request needs to be freed.
+ * Removes request from all sorts of sending/replay lists it might be on,
+ * frees network buffers if any are present.
+ * If \a locked is set, that means caller is already holding import imp_lock
+ * and so we no longer need to reobtain it (for certain lists manipulations)
+ */
static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
{
ENTRY;
}
static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked);
+/**
+ * Drop one request reference. Must be called with import imp_lock held.
+ * When reference count drops to zero, reuqest is freed.
+ */
void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request)
{
LASSERT_SPIN_LOCKED(&request->rq_import->imp_lock);
(void)__ptlrpc_req_finished(request, 1);
}
+/**
+ * Helper function
+ * Drops one reference count for request \a request.
+ * \a locked set indicates that caller holds import imp_lock.
+ * Frees the request whe reference count reaches zero.
+ */
static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked)
{
ENTRY;
RETURN(0);
}
+/**
+ * Drops one reference count for a request.
+ */
void ptlrpc_req_finished(struct ptlrpc_request *request)
{
__ptlrpc_req_finished(request, 0);
}
+/**
+ * Returns xid of a \a request
+ */
__u64 ptlrpc_req_xid(struct ptlrpc_request *request)
{
return request->rq_xid;
}
EXPORT_SYMBOL(ptlrpc_req_xid);
-/* Disengage the client's reply buffer from the network
+/**
+ * Disengage the client's reply buffer from the network
* NB does _NOT_ unregister any client-side bulk.
* IDEMPOTENT, but _not_ safe against concurrent callers.
* The request owner (i.e. the thread doing the I/O) must call...
+ * Returns 0 on success or 1 if unregistering cannot be made.
*/
int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
{
RETURN(0);
}
-/* caller must hold imp->imp_lock */
+/**
+ * Iterates through replay_list on import and prunes
+ * all requests have transno smaller than last_committed for the
+ * import and don't have rq_replay set.
+ * Since requests are sorted in transno order, stops when meetign first
+ * transno bigger than last_committed.
+ * caller must hold imp->imp_lock
+ */
void ptlrpc_free_committed(struct obd_import *imp)
{
cfs_list_t *tmp, *saved;
return;
}
+/**
+ * Schedule previously sent request for resend.
+ * For bulk requests we assign new xid (to avoid problems with
+ * lost replies and therefore several transfers landing into same buffer
+ * from different sending attempts).
+ */
void ptlrpc_resend_req(struct ptlrpc_request *req)
{
DEBUG_REQ(D_HA, req, "going to resend");
cfs_spin_unlock(&req->rq_lock);
}
+/**
+ * Grab additional reference on a request \a req
+ */
struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req)
{
ENTRY;
RETURN(req);
}
+/**
+ * Add a request to import replay_list.
+ * Must be called under imp_lock
+ */
void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
struct obd_import *imp)
{
cfs_list_add(&req->rq_replay_list, &imp->imp_replay_list);
}
+/**
+ * Send request and wait until it completes.
+ * Returns request processing status.
+ */
int ptlrpc_queue_wait(struct ptlrpc_request *req)
{
struct ptlrpc_request_set *set;
int praa_old_status;
};
+/**
+ * Callback used for replayed requests reply processing.
+ * In case of succesful reply calls registeresd request replay callback.
+ * In case of error restart replay process.
+ */
static int ptlrpc_replay_interpret(const struct lu_env *env,
struct ptlrpc_request *req,
void * data, int rc)
RETURN(rc);
}
+/**
+ * Prepares and queues request for replay.
+ * Adds it to ptlrpcd queue for actual sending.
+ * Returns 0 on success.
+ */
int ptlrpc_replay_req(struct ptlrpc_request *req)
{
struct ptlrpc_replay_async_args *aa;
RETURN(0);
}
+/**
+ * Aborts all in-flight request on import \a imp sending and delayed lists
+ */
void ptlrpc_abort_inflight(struct obd_import *imp)
{
cfs_list_t *tmp, *n;
EXIT;
}
+/**
+ * Abort all uncompleted requests in request set \a set
+ */
void ptlrpc_abort_set(struct ptlrpc_request_set *set)
{
cfs_list_t *tmp, *pos;
static __u64 ptlrpc_last_xid;
static cfs_spinlock_t ptlrpc_last_xid_lock;
-/* Initialize the XID for the node. This is common among all requests on
+/**
+ * Initialize the XID for the node. This is common among all requests on
* this node, and only requires the property that it is monotonically
* increasing. It does not need to be sequential. Since this is also used
* as the RDMA match bits, it is important that a single client NOT have
}
}
+/**
+ * Increase xid and returns resultng new value to the caller.
+ */
__u64 ptlrpc_next_xid(void)
{
__u64 tmp;
return tmp;
}
+/**
+ * Get a glimpse at what next xid value might have been.
+ * Returns possible next xid.
+ */
__u64 ptlrpc_sample_next_xid(void)
{
#if BITS_PER_LONG == 32
int pcaa_initial_connect;
};
+/**
+ * Updates import \a imp current state to provided \a state value
+ * Helper function. Must be called under imp_lock.
+ */
static void __import_set_state(struct obd_import *imp,
enum lustre_imp_state state)
{
*uuid_len -= strlen(UUID_STR);
}
-/* Returns true if import was FULL, false if import was already not
+/**
+ * Returns true if import was FULL, false if import was already not
* connected.
* @imp - import to be disconnected
* @conn_cnt - connection count (epoch) of the request that timed out
return timeout;
}
-/*
+/**
* This function will invalidate the import, if necessary, then block
* for all the RPC completions, and finally notify the obd to
* invalidate its state (ie cancel locks, clear pending requests,
EXPORT_SYMBOL(ptlrpc_reconnect_import);
+/**
+ * Connection on import \a imp is changed to another one (if more than one is
+ * present). We typically chose connection that we have not tried to connect to
+ * the longest
+ */
static int import_select_connection(struct obd_import *imp)
{
struct obd_import_conn *imp_conn = NULL, *conn;
return 1;
}
+/**
+ * Attempt to (re)connect import \a imp. This includes all preparations,
+ * initializing CONNECT RPC request and passing it to ptlrpcd for
+ * actual sending.
+ * Returns 0 on success or error code.
+ */
int ptlrpc_connect_import(struct obd_import *imp, char *new_uuid)
{
struct obd_device *obd = imp->imp_obd;
return (rc == -EBUSY) || (rc == -EAGAIN);
}
-
+/**
+ * interpret_reply callback for connect RPCs.
+ * Looks into returned status of connect operation and decides
+ * what to do with the import - i.e enter recovery, promote it to
+ * full state for normal operations of disconnect it due to an error.
+ */
static int ptlrpc_connect_interpret(const struct lu_env *env,
struct ptlrpc_request *request,
void *data, int rc)
RETURN(rc);
}
+/**
+ * interpret callback for "completed replay" RPCs.
+ * \see signal_completed_replay
+ */
static int completed_replay_interpret(const struct lu_env *env,
struct ptlrpc_request *req,
void * data, int rc)
RETURN(0);
}
+/**
+ * Let server know that we have no requests to replay anymore.
+ * Achieved by just sending a PING request
+ */
static int signal_completed_replay(struct obd_import *imp)
{
struct ptlrpc_request *req;
}
#ifdef __KERNEL__
+/**
+ * In kernel code all import invalidation happens in its own
+ * separate thread, so that whatever application happened to encounter
+ * a problem could still be killed or otherwise continue
+ */
static int ptlrpc_invalidate_import_thread(void *data)
{
struct obd_import *imp = data;
}
#endif
+/**
+ * This is the state machine for client-side recovery on import.
+ *
+ * Typicaly we have two possibly paths. If we came to server and it is not
+ * in recovery, we just enter IMP_EVICTED state, invalidate our import
+ * state and reconnect from scratch.
+ * If we came to server that is in recovery, we enter IMP_REPLAY import state.
+ * We go through our list of requests to replay and send them to server one by
+ * one.
+ * After sending all request from the list we change import state to
+ * IMP_REPLAY_LOCKS and re-request all the locks we believe we have from server
+ * and also all the locks we don't yet have and wait for server to grant us.
+ * After that we send a special "replay completed" request and change import
+ * state to IMP_REPLAY_WAIT.
+ * Upon receiving reply to that "replay completed" RPC we enter IMP_RECOVER
+ * state and resend all requests from sending list.
+ * After that we promote import to FULL state and send all delayed requests
+ * and import is fully operational after that.
+ *
+ */
int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
{
int rc = 0;
#include <obd.h>
#include "ptlrpc_internal.h"
+/**
+ * Helper function. Sends \a len bytes from \a base at offset \a offset
+ * over \a conn connection to portal \a portal.
+ * Returns 0 on success or error code.
+ */
static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len,
lnet_ack_req_t ack, struct ptlrpc_cb_id *cbid,
struct ptlrpc_connection *conn, int portal, __u64 xid,
RETURN (0);
}
+/**
+ * Starts bulk transfer for descriptor \a desc
+ * Returns 0 on success or error code.
+ */
int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc)
{
struct ptlrpc_connection *conn = desc->bd_export->exp_connection;
RETURN(0);
}
-/* Server side bulk abort. Idempotent. Not thread-safe (i.e. only
- * serialises with completion callback) */
+/**
+ * Server side bulk abort. Idempotent. Not thread-safe (i.e. only
+ * serialises with completion callback)
+ */
void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
{
struct l_wait_info lwi;
}
}
+/**
+ * Register bulk for later transfer
+ * Returns 0 on success or error code.
+ */
int ptlrpc_register_bulk(struct ptlrpc_request *req)
{
struct ptlrpc_bulk_desc *desc = req->rq_bulk;
RETURN(0);
}
-/* Disconnect a bulk desc from the network. Idempotent. Not
- * thread-safe (i.e. only interlocks with completion callback). */
+/**
+ * Disconnect a bulk desc from the network. Idempotent. Not
+ * thread-safe (i.e. only interlocks with completion callback).
+ * Returns 1 on success or 0 if network unregistration failed for whatever
+ * reason.
+ */
int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
{
struct ptlrpc_bulk_desc *desc = req->rq_bulk;
}
}
+/**
+ * Send request reply from request \a req reply buffer.
+ * \a flags defines reply types
+ * Returns 0 on sucess or error code
+ */
int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
{
struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
return (ptlrpc_send_reply(req, 0));
}
+/**
+ * For request \a req send an error reply back. Create empty
+ * reply buffers if necessary.
+ */
int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult)
{
int rc;
return ptlrpc_send_error(req, 0);
}
+/**
+ * Send request \a request.
+ * if \a noreply is set, don't expect any reply back and don't set up
+ * reply buffers.
+ * Returns 0 on success or error code.
+ */
int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
{
int rc;
return rc;
}
+/**
+ * Register request buffer descriptor for request receiving.
+ */
int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
{
struct ptlrpc_service *service = rqbd->rqbd_service;
* lustre/ptlrpc/ptlrpcd.c
*/
+/** \defgroup ptlrpcd PortalRPC daemon
+ *
+ * ptlrpcd is a special thread with its own set where other user might add
+ * requests when they don't want to wait for their completion.
+ * PtlRPCD will take care of sending such requests and then processing their
+ * replies and calling completion callbacks as necessary.
+ * The callbacks are called directly from ptlrpcd context.
+ * It is important to never significantly block (esp. on RPCs!) within such
+ * completion handler or a deadlock might occur where ptlrpcd enters some
+ * callback that attempts to send another RPC and wait for it to return,
+ * during which time ptlrpcd is completely blocked, so e.g. if import
+ * fails, recovery cannot progress because connection requests are also
+ * sent by ptlrpcd.
+ *
+ * @{
+ */
+
#define DEBUG_SUBSYSTEM S_RPC
#ifdef __KERNEL__
cfs_waitq_signal(&rq_set->set_waitq);
}
-/*
+/**
* Move all request from an existing request set to the ptlrpcd queue.
* All requests from the set must be in phase RQ_PHASE_NEW.
*/
}
EXPORT_SYMBOL(ptlrpcd_add_rqset);
-/*
+/**
* Requests that are added to the ptlrpcd queue are sent via
* ptlrpcd_check->ptlrpc_check_set().
*/
return rc;
}
+/**
+ * Check if there is more work to do on ptlrpcd set.
+ * Returns 1 if yes.
+ */
static int ptlrpcd_check(const struct lu_env *env, struct ptlrpcd_ctl *pc)
{
cfs_list_t *tmp, *pos;
}
#ifdef __KERNEL__
-/*
+/**
+ * Main ptlrpcd thread.
* ptlrpc's code paths like to execute in process context, so we have this
- * thread which spins on a set which contains the io rpcs. llite specifies
- * ptlrpcd's set when it pushes pages down into the oscs.
+ * thread which spins on a set which contains the rpcs and sends them.
+ *
*/
static int ptlrpcd(void *arg)
{
#else /* !__KERNEL__ */
+/**
+ * In liblustre we do not have separate threads, so this function
+ * is called from time to time all across common code to see
+ * if something needs to be processed on ptlrpcd set.
+ */
int ptlrpcd_check_async_rpcs(void *arg)
{
struct ptlrpcd_ctl *pc = arg;
ptlrpcd_fini();
cfs_mutex_up(&ptlrpcd_sem);
}
+/** @} ptlrpcd */
static int ptlrpc_recover_import_no_retry(struct obd_import *, char *);
+/**
+ * Start recovery on disconnected import.
+ * This is done by just attempting a connect
+ */
void ptlrpc_initiate_recovery(struct obd_import *imp)
{
ENTRY;
EXIT;
}
+/**
+ * Identify what request from replay list needs to be replayed next
+ * (based on what we have already replayed) and send it to server.
+ */
int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
{
int rc = 0;
RETURN(rc);
}
+/**
+ * Schedule resending of request on sending_list. This is done after
+ * we completed replaying of requests and locks.
+ */
int ptlrpc_resend(struct obd_import *imp)
{
struct ptlrpc_request *req, *next;
RETURN(0);
}
+/**
+ * Go through all requests in delayed list and wake their threads
+ * for resending
+ */
void ptlrpc_wake_delayed(struct obd_import *imp)
{
cfs_list_t *tmp, *pos;
EXIT;
}
-/*
+/**
* Administratively active/deactive a client.
* This should only be called by the ioctl interface, currently
* - the lctl deactivate and activate commands
return (0);
}
+/**
+ * Part of Rep-Ack logic.
+ * Puts a lock and its mode into reply state assotiated to request reply.
+ */
void
ptlrpc_save_lock(struct ptlrpc_request *req,
struct lustre_handle *lock, int mode, int no_ack)
#endif /* __KERNEL__ */
+/**
+ * Put reply state into a queue for processing because we received
+ * ACK from the client
+ */
void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs)
{
#ifdef __KERNEL__
return (-1);
}
+/**
+ * Start a service with parameters from struct ptlrpc_service_conf \a c
+ * as opposed to directly calling ptlrpc_init_svc with tons of arguments.
+ */
struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c,
svc_handler_t h, char *name,
struct proc_dir_entry *proc_entry,
cfs_waitq_signal(&svc->srv_waitq);
}
-/* @threadname should be 11 characters or less - 3 will be added on */
+/**
+ * Initialize service on a given portal.
+ * This includes starting serving threads , allocating and posting rqbds and
+ * so on.
+ * \a nbufs is how many buffers to post
+ * \a bufsize is buffer size to post
+ * \a max_req_size - maximum request size to be accepted for this service
+ * \a max_reply_size maximum reply size this service can ever send
+ * \a req_portal - portal to listed for requests on
+ * \a rep_portal - portal of where to send replies to
+ * \a watchdog_factor soft watchdog timeout multiplifier to print stuck service traces.
+ * \a handler - function to process every new request
+ * \a name - service name
+ * \a proc_entry - entry in the /proc tree for sttistics reporting
+ * \a min_threads \a max_threads - min/max number of service threads to start.
+ * \a threadname should be 11 characters or less - 3 will be added on
+ * \a hp_handler - function to determine priority of the request, also called
+ * on every new request.
+ */
struct ptlrpc_service *
ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
int req_portal, int rep_portal, int watchdog_factor,
ptlrpc_server_drop_request(req);
}
-/* This function makes sure dead exports are evicted in a timely manner.
- This function is only called when some export receives a message (i.e.,
- the network is up.) */
+/**
+ * This function makes sure dead exports are evicted in a timely manner.
+ * This function is only called when some export receives a message (i.e.,
+ * the network is up.)
+ */
static void ptlrpc_update_export_timer(struct obd_export *exp, long extra_delay)
{
struct obd_export *oldest_exp;
EXIT;
}
+/**
+ * Sanity check request \a req.
+ * Return 0 if all is ok, error code otherwise.
+ */
static int ptlrpc_check_req(struct ptlrpc_request *req)
{
if (unlikely(lustre_msg_get_conn_cnt(req->rq_reqmsg) <
EXIT;
}
+/**
+ * \see ptlrpc_hpreq_reorder_nolock
+ */
void ptlrpc_hpreq_reorder(struct ptlrpc_request *req)
{
struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
RETURN(0);
}
-/* Only allow normal priority requests on a service that has a high-priority
+/**
+ * Only allow normal priority requests on a service that has a high-priority
* queue if forced (i.e. cleanup), if there are other high priority requests
* already being processed (i.e. those threads can service more high-priority
* requests), or if there are enough idle threads that a later thread can do
- * a high priority request. */
+ * a high priority request.
+ */
static int ptlrpc_server_allow_normal(struct ptlrpc_service *svc, int force)
{
return force || !svc->srv_hpreq_handler || svc->srv_n_hpreq > 0 ||
svc->srv_threads_running <= svc->srv_threads_started - 2;
}
+/**
+ * Fetch a request for processing from queue of unprocessed requests.
+ * Favors high-priority requests.
+ * Returns a pointer to fetched request.
+ */
static struct ptlrpc_request *
ptlrpc_server_request_get(struct ptlrpc_service *svc, int force)
{
RETURN(req);
}
+/**
+ * Returns true if there are requests available in incoming
+ * request queue for processing and it is allowed to fetch them
+ * \see ptlrpc_server_allow_normal
+ */
static int ptlrpc_server_request_pending(struct ptlrpc_service *svc, int force)
{
return ((ptlrpc_server_allow_normal(svc, force) &&
!cfs_list_empty(&svc->srv_request_hpq));
}
-/* Handle freshly incoming reqs, add to timed early reply list,
- pass on to regular request queue */
+/**
+ * Handle freshly incoming reqs, add to timed early reply list,
+ * pass on to regular request queue.
+ * All incoming requests pass through here before getting into
+ * ptlrpc_server_handle_req later on.
+ */
static int
ptlrpc_server_handle_req_in(struct ptlrpc_service *svc)
{
RETURN(1);
}
+/**
+ * Main incoming request handling logic.
+ * Calls handler function from service to do actual processing.
+ */
static int
ptlrpc_server_handle_request(struct ptlrpc_service *svc,
struct ptlrpc_thread *thread)
}
/**
- * Main prlrpc service thread routine.
+ * Main thread body for service threads.
+ * Waits in a loop waiting for new requests to process to appear.
+ * Every time an incoming requests is added to its queue, a waitq
+ * is woken up and one of the threads will handle it.
*/
static int ptlrpc_main(void *arg)
{
return result;
}
+/**
+ * Main body of "handle reply" function.
+ * It processes acked reply states
+ */
static int ptlrpc_hr_main(void *arg)
{
struct ptlrpc_hr_args * hr_args = arg;
EXIT;
}
+/**
+ * Stops all threads of a particular service \a svc
+ */
void ptlrpc_stop_all_threads(struct ptlrpc_service *svc)
{
struct ptlrpc_thread *thread;
RETURN(0);
}
-/* Returns 0 if the service is healthy.
+/**
+ * Returns 0 if the service is healthy.
*
* Right now, it just checks to make sure that requests aren't languishing
* in the queue. We'll use this health check to govern whether a node needs