Whamcloud - gitweb
LU-14599 osp: limit allocation at osp_sync_process_committed
authorAlexander Boyko <alexander.boyko@hpe.com>
Wed, 5 Oct 2022 07:06:59 +0000 (00:06 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Tue, 11 Oct 2022 07:54:45 +0000 (07:54 +0000)
Sometimes osp cancels very large cookie list with 64K elements.
In this case osp_sync_process_committed() tries to allocate 64 pages
and uses vmalloc.
The fix limits memory allocation size to 4 page with kmalloc, and
reuse it in a loop.

Lustre-change: https://review.whamcloud.com/43250
Lustre-commit: 9b692e2e7d105f4926649ea46007ac65b24c4b6d

HPE-bug-id: LUS-9815
Fixes: 6d7332102 ("LU-11924 osp: combine llog cancel operations")
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: Ic875335a28f78494fdb3cbc4b0145e5a43831ee8
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/48773
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/osp/osp_sync.c

index d49190a..bed2cf8 100644 (file)
@@ -991,7 +991,7 @@ static void osp_sync_process_committed(const struct lu_env *env,
        struct ptlrpc_request   *req;
        struct llog_ctxt        *ctxt;
        struct llog_handle      *llh;
-       int                     *arr;
+       int                     *arr, arr_size;
        LIST_HEAD(list);
        struct list_head         *le;
        struct llog_logid        lgid;
@@ -1031,10 +1031,16 @@ static void osp_sync_process_committed(const struct lu_env *env,
 
        list_for_each(le, &list)
                count++;
-       if (count > 2)
-               OBD_ALLOC_PTR_ARRAY_LARGE(arr, count);
-       else
+       if (count > 2) {
+               arr_size = sizeof(int) * count;
+               /* limit cookie array to order 2 */
+               arr_size = arr_size < (PAGE_SIZE * 4) ? arr_size :
+                       (PAGE_SIZE * 4);
+               OBD_ALLOC_LARGE(arr, arr_size);
+       } else {
                arr = NULL;
+               arr_size = 0;
+       }
        i = 0;
        while (!list_empty(&list)) {
                struct osp_job_req_args *jra;
@@ -1063,7 +1069,7 @@ static void osp_sync_process_committed(const struct lu_env *env,
                                rc = llog_cat_cancel_records(env, llh, 1,
                                                             &jra->jra_lcookie);
                                if (rc)
-                                       CERROR("%s: can't cancel record: %d\n",
+                                       CERROR("%s: can't cancel record: rc = %d\n",
                                               obd->obd_name, rc);
                        }
                } else {
@@ -1072,20 +1078,25 @@ static void osp_sync_process_committed(const struct lu_env *env,
                }
                ptlrpc_req_finished(req);
                done++;
+               if (arr &&
+                   ((i * sizeof(int)) == arr_size ||
+                    (list_empty(&list) && i > 0))) {
+                       rc = llog_cat_cancel_arr_rec(env, llh, &lgid, i, arr);
+
+                       if (rc)
+                               CERROR("%s: can't cancel %d records: rc = %d\n",
+                                      obd->obd_name, i, rc);
+                       else
+                               CDEBUG(D_OTHER, "%s: massive records cancel id "DFID" num %d\n",
+                                      obd->obd_name, PFID(&lgid.lgl_oi.oi_fid),
+                                      i);
+                       i = 0;
+               }
+
        }
-       if (arr && i > 0) {
-               rc = llog_cat_cancel_arr_rec(env, llh, &lgid, i, arr);
-
-               if (rc)
-                       CERROR("%s: can't cancel %d records rc: %d\n",
-                              obd->obd_name, i, rc);
-               else
-                       CDEBUG(D_OTHER, "%s: massive records cancel id "DFID\
-                              " num %d\n", obd->obd_name,
-                              PFID(&lgid.lgl_oi.oi_fid), i);
-       }
+
        if (arr)
-               OBD_FREE_PTR_ARRAY_LARGE(arr, count);
+               OBD_FREE_LARGE(arr, arr_size);
 
        llog_ctxt_put(ctxt);