From: Alexander Boyko Date: Wed, 5 Oct 2022 07:06:59 +0000 (-0700) Subject: LU-14599 osp: limit allocation at osp_sync_process_committed X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=989e202f8e9ee6676486450baec81ee4e92a7f9c;p=fs%2Flustre-release.git LU-14599 osp: limit allocation at osp_sync_process_committed Sometimes osp cancels very large cookie list with 64K elements. In this case osp_sync_process_committed() tries to allocate 64 pages and uses vmalloc. The fix limits memory allocation size to 4 page with kmalloc, and reuse it in a loop. Lustre-change: https://review.whamcloud.com/43250 Lustre-commit: 9b692e2e7d105f4926649ea46007ac65b24c4b6d HPE-bug-id: LUS-9815 Fixes: 6d7332102 ("LU-11924 osp: combine llog cancel operations") Signed-off-by: Alexander Boyko Change-Id: Ic875335a28f78494fdb3cbc4b0145e5a43831ee8 Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/48773 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- diff --git a/lustre/osp/osp_sync.c b/lustre/osp/osp_sync.c index d49190a..bed2cf8 100644 --- a/lustre/osp/osp_sync.c +++ b/lustre/osp/osp_sync.c @@ -991,7 +991,7 @@ static void osp_sync_process_committed(const struct lu_env *env, struct ptlrpc_request *req; struct llog_ctxt *ctxt; struct llog_handle *llh; - int *arr; + int *arr, arr_size; LIST_HEAD(list); struct list_head *le; struct llog_logid lgid; @@ -1031,10 +1031,16 @@ static void osp_sync_process_committed(const struct lu_env *env, list_for_each(le, &list) count++; - if (count > 2) - OBD_ALLOC_PTR_ARRAY_LARGE(arr, count); - else + if (count > 2) { + arr_size = sizeof(int) * count; + /* limit cookie array to order 2 */ + arr_size = arr_size < (PAGE_SIZE * 4) ? arr_size : + (PAGE_SIZE * 4); + OBD_ALLOC_LARGE(arr, arr_size); + } else { arr = NULL; + arr_size = 0; + } i = 0; while (!list_empty(&list)) { struct osp_job_req_args *jra; @@ -1063,7 +1069,7 @@ static void osp_sync_process_committed(const struct lu_env *env, rc = llog_cat_cancel_records(env, llh, 1, &jra->jra_lcookie); if (rc) - CERROR("%s: can't cancel record: %d\n", + CERROR("%s: can't cancel record: rc = %d\n", obd->obd_name, rc); } } else { @@ -1072,20 +1078,25 @@ static void osp_sync_process_committed(const struct lu_env *env, } ptlrpc_req_finished(req); done++; + if (arr && + ((i * sizeof(int)) == arr_size || + (list_empty(&list) && i > 0))) { + rc = llog_cat_cancel_arr_rec(env, llh, &lgid, i, arr); + + if (rc) + CERROR("%s: can't cancel %d records: rc = %d\n", + obd->obd_name, i, rc); + else + CDEBUG(D_OTHER, "%s: massive records cancel id "DFID" num %d\n", + obd->obd_name, PFID(&lgid.lgl_oi.oi_fid), + i); + i = 0; + } + } - if (arr && i > 0) { - rc = llog_cat_cancel_arr_rec(env, llh, &lgid, i, arr); - - if (rc) - CERROR("%s: can't cancel %d records rc: %d\n", - obd->obd_name, i, rc); - else - CDEBUG(D_OTHER, "%s: massive records cancel id "DFID\ - " num %d\n", obd->obd_name, - PFID(&lgid.lgl_oi.oi_fid), i); - } + if (arr) - OBD_FREE_PTR_ARRAY_LARGE(arr, count); + OBD_FREE_LARGE(arr, arr_size); llog_ctxt_put(ctxt);