From: Alex Zhuravlev Date: Fri, 5 Oct 2012 10:25:44 +0000 (+0400) Subject: LU-2089 ofd: do not pin pages provided by osd X-Git-Tag: 2.3.53~4 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=a643e3860f5397f904cc3cc937134c3ac841d7db LU-2089 ofd: do not pin pages provided by osd depending on implementation, some pages can be allocated in order > 0 and kernel does increase refcounter on the first page only. in this case ptlrpc_free_bulk() calling cfs_unpin() will try to release such pages leading to warning and other bad things in the kernel. instead let ofd/ost to rely on dbo_bufs_get/dbo_bufs_put as they know details of the pages provided. Signed-off-by: Alex Zhuravlev Change-Id: I3591e21eef9557d6004d29e63986c7bd5987802b Reviewed-on: http://review.whamcloud.com/4198 Tested-by: Hudson Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 92b41ac..8f7575e 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -1772,9 +1772,31 @@ void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request); struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp(struct ptlrpc_request *req, int npages, int type, int portal); -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); -void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - cfs_page_t *page, int pageoffset, int len); +void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk, int pin); +static inline void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk) +{ + __ptlrpc_free_bulk(bulk, 1); +} +static inline void ptlrpc_free_bulk_nopin(struct ptlrpc_bulk_desc *bulk) +{ + __ptlrpc_free_bulk(bulk, 0); +} +void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, + cfs_page_t *page, int pageoffset, int len, int); +static inline void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, + cfs_page_t *page, int pageoffset, + int len) +{ + __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 1); +} + +static inline void ptlrpc_prep_bulk_page_nopin(struct ptlrpc_bulk_desc *desc, + cfs_page_t *page, int pageoffset, + int len) +{ + __ptlrpc_prep_bulk_page(desc, page, pageoffset, len, 0); +} + void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, struct obd_import *imp); __u64 ptlrpc_next_xid(void); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 61e0ba3..5fdc789 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -74,6 +74,15 @@ static char *oss_io_cpts; CFS_MODULE_PARM(oss_io_cpts, "s", charp, 0444, "CPU partitions OSS IO threads should run on"); +/* + * this page is allocated statically when module is initializing + * it is used to simulate data corruptions, see ost_checksum_bulk() + * for details. as the original pages provided by the layers below + * can be remain in the internal cache, we do not want to modify + * them. + */ +static struct page *ost_page_to_corrupt = NULL; + /** * Do not return server-side uid/gid to remote client */ @@ -555,7 +564,7 @@ static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc, int opc, OBD_FAIL_CHECK(OBD_FAIL_OST_CHECKSUM_RECEIVE)) { int off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; int len = desc->bd_iov[i].kiov_len; - struct page *np = cfs_alloc_page(CFS_ALLOC_STD); + struct page *np = ost_page_to_corrupt; char *ptr = kmap(desc->bd_iov[i].kiov_page) + off; if (np) { @@ -564,7 +573,6 @@ static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc, int opc, memcpy(ptr2, ptr, len); memcpy(ptr2, "bad3", min(4, len)); kunmap(np); - cfs_page_unpin(desc->bd_iov[i].kiov_page); desc->bd_iov[i].kiov_page = np; } else { CERROR("can't alloc page for corruption\n"); @@ -580,7 +588,7 @@ static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc, int opc, OBD_FAIL_CHECK(OBD_FAIL_OST_CHECKSUM_SEND)) { int off = desc->bd_iov[i].kiov_offset & ~CFS_PAGE_MASK; int len = desc->bd_iov[i].kiov_len; - struct page *np = cfs_alloc_page(CFS_ALLOC_STD); + struct page *np = ost_page_to_corrupt; char *ptr = kmap(desc->bd_iov[i].kiov_page) + off; if (np) { @@ -589,7 +597,6 @@ static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc, int opc, memcpy(ptr2, ptr, len); memcpy(ptr2, "bad4", min(4, len)); kunmap(np); - cfs_page_unpin(desc->bd_iov[i].kiov_page); desc->bd_iov[i].kiov_page = np; } else { CERROR("can't alloc page for corruption\n"); @@ -809,9 +816,9 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) nob += page_rc; if (page_rc != 0) { /* some data! */ LASSERT (local_nb[i].page != NULL); - ptlrpc_prep_bulk_page(desc, local_nb[i].page, - local_nb[i].lnb_page_offset, - page_rc); + ptlrpc_prep_bulk_page_nopin(desc, local_nb[i].page, + local_nb[i].lnb_page_offset, + page_rc); } if (page_rc != local_nb[i].len) { /* short read */ @@ -859,7 +866,7 @@ out_tls: ost_tls_put(req); out_bulk: if (desc && !CFS_FAIL_PRECHECK(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2)) - ptlrpc_free_bulk(desc); + ptlrpc_free_bulk_nopin(desc); out: LASSERT(rc <= 0); if (rc == 0) { @@ -1043,9 +1050,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) /* NB Having prepped, we must commit... */ for (i = 0; i < npages; i++) - ptlrpc_prep_bulk_page(desc, local_nb[i].page, - local_nb[i].lnb_page_offset, - local_nb[i].len); + ptlrpc_prep_bulk_page_nopin(desc, local_nb[i].page, + local_nb[i].lnb_page_offset, + local_nb[i].len); rc = sptlrpc_svc_prep_bulk(req, desc); if (rc != 0) @@ -1168,7 +1175,7 @@ out_tls: ost_tls_put(req); out_bulk: if (desc) - ptlrpc_free_bulk(desc); + ptlrpc_free_bulk_nopin(desc); out: if (rc == 0) { oti_to_request(oti, req); @@ -2677,6 +2684,8 @@ static int __init ost_init(void) int rc; ENTRY; + ost_page_to_corrupt = cfs_alloc_page(CFS_ALLOC_STD); + lprocfs_ost_init_vars(&lvars); rc = class_register_type(&ost_obd_ops, NULL, lvars.module_vars, LUSTRE_OSS_NAME, NULL); @@ -2693,6 +2702,9 @@ static int __init ost_init(void) static void /*__exit*/ ost_exit(void) { + if (ost_page_to_corrupt) + page_cache_release(ost_page_to_corrupt); + class_unregister_type(LUSTRE_OSS_NAME); } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 727425a..a41cd32 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -155,8 +155,8 @@ EXPORT_SYMBOL(ptlrpc_prep_bulk_imp); * Data to transfer in the page starts at offset \a pageoffset and * amount of data to transfer from the page is \a len */ -void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - cfs_page_t *page, int pageoffset, int len) +void __ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, + cfs_page_t *page, int pageoffset, int len, int pin) { LASSERT(desc->bd_iov_count < desc->bd_max_iov); LASSERT(page != NULL); @@ -166,16 +166,18 @@ void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, desc->bd_nob += len; - cfs_page_pin(page); + if (pin) + cfs_page_pin(page); + ptlrpc_add_bulk_page(desc, page, pageoffset, len); } -EXPORT_SYMBOL(ptlrpc_prep_bulk_page); +EXPORT_SYMBOL(__ptlrpc_prep_bulk_page); /** * Uninitialize and free bulk descriptor \a desc. * Works on bulk descriptors both from server and client side. */ -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) +void __ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc, int unpin) { int i; ENTRY; @@ -192,14 +194,16 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) else class_import_put(desc->bd_import); - for (i = 0; i < desc->bd_iov_count ; i++) - cfs_page_unpin(desc->bd_iov[i].kiov_page); + if (unpin) { + for (i = 0; i < desc->bd_iov_count ; i++) + cfs_page_unpin(desc->bd_iov[i].kiov_page); + } OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc, bd_iov[desc->bd_max_iov])); EXIT; } -EXPORT_SYMBOL(ptlrpc_free_bulk); +EXPORT_SYMBOL(__ptlrpc_free_bulk); /** * Set server timelimit for this req, i.e. how long are we willing to wait