#pragma pack(push, 4)
#endif
typedef struct {
- lnet_event_kind_t type;
- lnet_process_id_t target;
+ lnet_process_id_t target;
lnet_process_id_t initiator;
-#ifdef CRAY_XT3
- lnet_uid_t uid;
-#endif
+ lnet_nid_t sender;
+ lnet_event_kind_t type;
unsigned int pt_index;
__u64 match_bits;
unsigned int rlength;
unsigned int mlength;
- unsigned int offset;
lnet_handle_md_t md_handle;
lnet_md_t md;
__u64 hdr_data;
int status;
int unlinked;
+ unsigned int offset;
+#ifdef CRAY_XT3
+ lnet_uid_t uid;
+#endif
volatile lnet_seq_t sequence;
} lnet_event_t;
msg->msg_hdr.dest_nid = dest_nid;
msg->msg_hdr.dest_pid = le32_to_cpu(msg->msg_hdr.dest_pid);
msg->msg_hdr.payload_length = payload_length;
+
+ msg->msg_ev.sender = from_nid;
switch (type) {
case LNET_MSG_ACK:
msg->msg_ev.initiator.nid = LNET_NID_ANY;
msg->msg_ev.initiator.pid = the_lnet.ln_pid;
msg->msg_ev.target = target;
+ msg->msg_ev.sender = LNET_NID_ANY;
msg->msg_ev.pt_index = portal;
msg->msg_ev.match_bits = match_bits;
msg->msg_ev.rlength = md->md_length;
msg->msg_ev.type = LNET_EVENT_REPLY;
msg->msg_ev.initiator = peer_id;
+ msg->msg_ev.sender = peer_id.nid; /* optimized GETs can't be routed */
msg->msg_ev.rlength = msg->msg_ev.mlength = getmd->md_length;
msg->msg_ev.offset = 0;
msg->msg_ev.initiator.nid = LNET_NID_ANY;
msg->msg_ev.initiator.pid = the_lnet.ln_pid;
msg->msg_ev.target = target;
+ msg->msg_ev.sender = LNET_NID_ANY;
msg->msg_ev.pt_index = portal;
msg->msg_ev.match_bits = match_bits;
msg->msg_ev.rlength = md->md_length;
* Note that reiserfs quotas are temporarily disabled on SLES 10 in this
kernel.
+Severity : enhancement
+Bugzilla : 11548
+Description: Add LNET router traceability for debug purposes
+Details : If a checksum failure occurs with a router as part of the
+ IO path, the NID of the last router that forwarded the bulk data
+ is printed so it can be identified.
+
Severity : normal
Frequency : rare
Bugzilla : 11315
__u64 bd_last_xid;
struct ptlrpc_cb_id bd_cbid; /* network callback info */
- lnet_handle_md_t bd_md_h; /* associated MD */
+ lnet_handle_md_t bd_md_h; /* associated MD */
+ lnet_nid_t bd_sender; /* stash event::sender */
#if defined(__KERNEL__)
lnet_kiov_t bd_iov[0];
if (unlikely(body->oa.o_valid & OBD_MD_FLCKSUM)) {
static int cksum_counter;
- __u32 server_cksum = body->oa.o_cksum;
+ __u32 server_cksum = body->oa.o_cksum;
+ char *via;
+ char *router;
+
client_cksum = osc_checksum_bulk(rc, aa->aa_page_count,
aa->aa_ppga);
+ if (peer->nid == req->rq_bulk->bd_sender) {
+ via = router = "";
+ } else {
+ via = " via ";
+ router = libcfs_nid2str(req->rq_bulk->bd_sender);
+ }
+
if (server_cksum == ~0 && rc > 0) {
CERROR("Protocol error: server %s set the 'checksum' "
"bit, but didn't send a checksum. Not fatal, "
"but please tell CFS.\n",
libcfs_nid2str(peer->nid));
} else if (server_cksum != client_cksum) {
- LCONSOLE_ERROR("%s: BAD READ CHECKSUM: from %s inum "
+ LCONSOLE_ERROR("%s: BAD READ CHECKSUM: from %s%s%s inum "
LPU64"/"LPU64" object "LPU64"/"LPU64
" extent ["LPU64"-"LPU64"]\n",
req->rq_import->imp_obd->obd_name,
libcfs_nid2str(peer->nid),
+ via, router,
body->oa.o_valid & OBD_MD_FLFID ?
body->oa.o_fid : (__u64)0,
body->oa.o_valid & OBD_MD_FLFID ?
objcount, ioo, npages, local_nb, oti, rc);
if (unlikely(client_cksum != server_cksum && rc == 0)) {
- int new_cksum = ost_checksum_bulk(desc);
+ int new_cksum = ost_checksum_bulk(desc);
char *msg;
+ char *via;
+ char *router;
if (new_cksum == server_cksum)
msg = "changed in transit before arrival at OST";
else
msg = "changed in transit AND after initial checksum";
- LCONSOLE_ERROR("%s: BAD WRITE CHECKSUM: %s from %s inum "
+ if (req->rq_peer.nid == desc->bd_sender) {
+ via = router = "";
+ } else {
+ via = " via ";
+ router = libcfs_nid2str(desc->bd_sender);
+ }
+
+ LCONSOLE_ERROR("%s: BAD WRITE CHECKSUM: %s from %s%s%s inum "
LPU64"/"LPU64" object "LPU64"/"LPU64
" extent ["LPU64"-"LPU64"]\n",
req->rq_export->exp_obd->obd_name, msg,
libcfs_id2str(req->rq_peer),
+ via, router,
body->oa.o_valid & OBD_MD_FLFID ?
body->oa.o_fid : (__u64)0,
body->oa.o_valid & OBD_MD_FLFID ?
if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) {
desc->bd_success = 1;
desc->bd_nob_transferred = ev->mlength;
+ desc->bd_sender = ev->sender;
}
/* NB don't unlock till after wakeup; desc can disappear under us
* read/wrote the peer buffer and how much... */
desc->bd_success = 1;
desc->bd_nob_transferred = ev->mlength;
+ desc->bd_sender = ev->sender;
}
if (ev->unlinked) {
LASSERT (desc->bd_type == BULK_PUT_SOURCE ||
desc->bd_type == BULK_GET_SINK);
desc->bd_success = 0;
+ desc->bd_sender = LNET_NID_ANY;
md.user_ptr = &desc->bd_cbid;
md.eq_handle = ptlrpc_eq_h;
desc->bd_type == BULK_GET_SOURCE);
desc->bd_success = 0;
+ desc->bd_sender = LNET_NID_ANY;
peer = desc->bd_import->imp_connection->c_peer;