From: bobijam Date: Wed, 16 Sep 2009 02:03:48 +0000 (+0000) Subject: Branch b1_8 X-Git-Tag: v1_8_2_01~1^2~110 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=9d1fa4a6976f55133f488e0973737e1bb7c589ae Branch b1_8 b=20560 o=johann i=alex.ahuravlev i=zhenyu.xu (bobijam) don't poison pages for evicted clients --- diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index e48cf86..f9599c0 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -80,7 +80,7 @@ static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len, CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64", offset %u\n", len, portal, xid, offset); - rc = LNetPut (conn->c_self, *mdh, ack, + rc = LNetPut (conn->c_self, *mdh, ack, conn->c_peer, portal, xid, offset, 0); if (rc != 0) { int rc2; @@ -105,7 +105,7 @@ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc) __u64 xid; ENTRY; - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_PUT_NET)) + if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_PUT_NET)) RETURN(0); /* NB no locking required until desc is on the network */ @@ -138,17 +138,17 @@ int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc) xid = desc->bd_req->rq_xid; CDEBUG(D_NET, "Transferring %u pages %u bytes via portal %d " "id %s xid "LPX64"\n", desc->bd_iov_count, - desc->bd_nob, desc->bd_portal, + desc->bd_nob, desc->bd_portal, libcfs_id2str(conn->c_peer), xid); /* Network is about to get at the memory */ desc->bd_network_rw = 1; if (desc->bd_type == BULK_PUT_SOURCE) - rc = LNetPut (conn->c_self, desc->bd_md_h, LNET_ACK_REQ, + rc = LNetPut (conn->c_self, desc->bd_md_h, LNET_ACK_REQ, conn->c_peer, desc->bd_portal, xid, 0, 0); else - rc = LNetGet (conn->c_self, desc->bd_md_h, + rc = LNetGet (conn->c_self, desc->bd_md_h, conn->c_peer, desc->bd_portal, xid, 0); if (rc != 0) { @@ -175,10 +175,11 @@ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc) if (!ptlrpc_server_bulk_active(desc)) /* completed or */ return; /* never started */ - - /* Do not send any meaningful data over the wire for evicted clients */ - if (desc->bd_export && desc->bd_export->exp_failed) - ptl_rpc_wipe_bulk_pages(desc); + + /* We used to poison the pages with 0xab here because we did not want to + * send any meaningful data over the wire for evicted clients (bug 9297) + * However, this is no longer safe now that we use the page cache on the + * OSS (bug 20560) */ /* The unlink ensures the callback happens ASAP and is the last * one. If it fails, it must be because completion just happened, @@ -192,7 +193,7 @@ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc) * timeout lets us CWARN for visibility of sluggish NALs */ lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(LONG_UNLINK), cfs_time_seconds(1), NULL, NULL); - rc = l_wait_event(desc->bd_waitq, + rc = l_wait_event(desc->bd_waitq, !ptlrpc_server_bulk_active(desc), &lwi); if (rc == 0) return; @@ -212,7 +213,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req) lnet_md_t md; ENTRY; - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_GET_NET)) + if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_PTLRPC_BULK_GET_NET)) RETURN(0); /* NB no locking required until desc is on the network */ @@ -231,8 +232,8 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req) md.user_ptr = &desc->bd_cbid; md.eq_handle = ptlrpc_eq_h; md.threshold = 1; /* PUT or GET */ - md.options = PTLRPC_MD_OPTIONS | - ((desc->bd_type == BULK_GET_SOURCE) ? + md.options = PTLRPC_MD_OPTIONS | + ((desc->bd_type == BULK_GET_SOURCE) ? LNET_MD_OP_GET : LNET_MD_OP_PUT); ptlrpc_fill_bulk_md(&md, desc); @@ -293,7 +294,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) LASSERT(!in_interrupt()); /* might sleep */ /* Let's setup deadline for reply unlink. */ - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0) req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK; @@ -380,10 +381,10 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) lustre_msg_set_type(req->rq_repmsg, req->rq_type); lustre_msg_set_status(req->rq_repmsg, req->rq_status); lustre_msg_set_opc(req->rq_repmsg, lustre_msg_get_opc(req->rq_reqmsg)); - + service_time = max_t(int, cfs_time_current_sec() - req->rq_arrival_time.tv_sec, 1); - if (!(flags & PTLRPC_REPLY_EARLY) && + if (!(flags & PTLRPC_REPLY_EARLY) && (req->rq_type != PTL_RPC_MSG_ERR) && !(lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY | MSG_LAST_REPLAY))) { @@ -393,7 +394,7 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) if (oldse != 0) DEBUG_REQ(D_ADAPTTO, req, "svc %s changed estimate from %d to %d", - svc->srv_name, oldse, + svc->srv_name, oldse, at_get(&svc->srv_at_estimate)); } /* Report actual service time for client latency calc */ @@ -416,14 +417,14 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) offset = 0; /* Include a checksum on early replies - must be done after all other lustre_msg_set_* */ - lustre_msg_set_cksum(req->rq_repmsg, + lustre_msg_set_cksum(req->rq_repmsg, lustre_msg_calc_cksum(req->rq_repmsg)); } else { offset = lustre_msg_early_size(req); } } else { CDEBUG(D_ADAPTTO, "No early reply support: flags=%#x " - "req_flags=%#x magic=%d:%x/%x len=%d\n", + "req_flags=%#x magic=%d:%x/%x len=%d\n", flags, lustre_msg_get_flags(req->rq_reqmsg), lustre_msg_is_v1(req->rq_reqmsg), lustre_msg_get_magic(req->rq_reqmsg), @@ -439,11 +440,11 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags) CERROR("not replying on NULL connection\n"); /* bug 9635 */ return -ENOTCONN; } - + atomic_inc (&svc->srv_outstanding_replies); ptlrpc_rs_addref(rs); /* +1 ref for the network */ req->rq_sent = cfs_time_current_sec(); - + rc = ptl_send_buf (&rs->rs_md_h, req->rq_repmsg, req->rq_replen, rs->rs_difficult ? LNET_ACK_REQ : LNET_NOACK_REQ, &rs->rs_cb_id, conn, svc->srv_rep_portal, @@ -493,7 +494,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) struct obd_device *obd = request->rq_import->imp_obd; ENTRY; - OBD_FAIL_RETURN(OBD_FAIL_PTLRPC_DROP_RPC, 0); + OBD_FAIL_RETURN(OBD_FAIL_PTLRPC_DROP_RPC, 0); LASSERT (request->rq_type == PTL_RPC_MSG_REQUEST); @@ -580,7 +581,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) /* We must see the unlink callback to unset rq_must_unlink, so we can't auto-unlink */ - rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, + rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, &request->rq_reply_md_h); if (rc != 0) { CERROR("LNetMDAttach failed: %d\n", rc); @@ -608,13 +609,13 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) request->rq_sent = cfs_time_current_sec(); do_gettimeofday(&request->rq_arrival_time); - /* We give the server rq_timeout secs to process the req, and + /* We give the server rq_timeout secs to process the req, and add the network latency for our local timeout. */ - request->rq_deadline = request->rq_sent + request->rq_timeout + + request->rq_deadline = request->rq_sent + request->rq_timeout + ptlrpc_at_get_net_latency(request); ptlrpc_pinger_sending_on_import(request->rq_import); - + DEBUG_REQ(D_INFO, request, "send flg=%x", lustre_msg_get_flags(request->rq_reqmsg)); rc = ptl_send_buf(&request->rq_req_md_h, @@ -682,7 +683,7 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd) md.options = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT | LNET_MD_MAX_SIZE; md.user_ptr = &rqbd->rqbd_cbid; md.eq_handle = ptlrpc_eq_h; - + rc = LNetMDAttach(me_h, md, LNET_UNLINK, &rqbd->rqbd_md_h); if (rc == 0) return (0); @@ -692,6 +693,6 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd) rc = LNetMEUnlink (me_h); LASSERT (rc == 0); rqbd->rqbd_refcount = 0; - + return (-ENOMEM); } diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c index a297995..38432c3 100644 --- a/lustre/ptlrpc/pers.c +++ b/lustre/ptlrpc/pers.c @@ -73,19 +73,6 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, desc->bd_iov_count++; } -void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc) -{ - int i; - - for (i = 0; i < desc->bd_iov_count ; i++) { - lnet_kiov_t *kiov = &desc->bd_iov[i]; - ClearPageUptodate(kiov->kiov_page); - memset(cfs_kmap(kiov->kiov_page)+kiov->kiov_offset, 0xab, - kiov->kiov_len); - cfs_kunmap(kiov->kiov_page); - } -} - #else /* !__KERNEL__ */ void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc) @@ -130,14 +117,4 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, } } -void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc) -{ - int i; - - for(i = 0; i < desc->bd_iov_count; i++) { - lnet_md_iovec_t *iov = &desc->bd_iov[i]; - - memset(iov->iov_base, 0xab, iov->iov_len); - } -} #endif /* !__KERNEL__ */ diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 98ba674..84dccf9 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -89,7 +89,6 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink); void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc); void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, int pageoffset, int len); -void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc); /* pinger.c */ int ptlrpc_start_pinger(void); @@ -109,7 +108,7 @@ int llog_recov_init(void); void llog_recov_fini(void); static inline int ll_rpc_recoverable_error(int rc) -{ +{ return (rc == -ENOTCONN || rc == -ENODEV); } #endif /* PTLRPC_INTERNAL_H */