Whamcloud - gitweb
LU-14599 osp: limit allocation at osp_sync_process_committed 50/43250/5
authorAlexander Boyko <alexander.boyko@hpe.com>
Fri, 9 Apr 2021 12:16:55 +0000 (08:16 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 5 May 2021 02:50:20 +0000 (02:50 +0000)
Sometimes osp cancels very large cookie list with 64K elements.
In this case osp_sync_process_committed() tries to allocate 64 pages
and uses vmalloc.
The fix limits memory allocation size to 4 page with kmalloc, and
reuse it in a loop.

HPE-bug-id: LUS-9815
Fixes: 6d7332102 ("LU-11924 osp: combine llog cancel operations")
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: Ic875335a28f78494fdb3cbc4b0145e5a43831ee8
Reviewed-on: https://review.whamcloud.com/43250
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/osp/osp_sync.c

index ca95f7e..2ea884c 100644 (file)
@@ -996,7 +996,7 @@ static void osp_sync_process_committed(const struct lu_env *env,
        struct ptlrpc_request   *req;
        struct llog_ctxt        *ctxt;
        struct llog_handle      *llh;
-       int                     *arr;
+       int                     *arr, arr_size;
        LIST_HEAD(list);
        struct list_head         *le;
        struct llog_logid        lgid;
@@ -1036,10 +1036,16 @@ static void osp_sync_process_committed(const struct lu_env *env,
 
        list_for_each(le, &list)
                count++;
-       if (count > 2)
-               OBD_ALLOC_PTR_ARRAY_LARGE(arr, count);
-       else
+       if (count > 2) {
+               arr_size = sizeof(int) * count;
+               /* limit cookie array to order 2 */
+               arr_size = arr_size < (PAGE_SIZE * 4) ? arr_size :
+                       (PAGE_SIZE * 4);
+               OBD_ALLOC_LARGE(arr, arr_size);
+       } else {
                arr = NULL;
+               arr_size = 0;
+       }
        i = 0;
        while (!list_empty(&list)) {
                struct osp_job_req_args *jra;
@@ -1068,7 +1074,7 @@ static void osp_sync_process_committed(const struct lu_env *env,
                                rc = llog_cat_cancel_records(env, llh, 1,
                                                             &jra->jra_lcookie);
                                if (rc)
-                                       CERROR("%s: can't cancel record: %d\n",
+                                       CERROR("%s: can't cancel record: rc = %d\n",
                                               obd->obd_name, rc);
                        }
                } else {
@@ -1077,20 +1083,25 @@ static void osp_sync_process_committed(const struct lu_env *env,
                }
                ptlrpc_req_finished(req);
                done++;
+               if (arr &&
+                   ((i * sizeof(int)) == arr_size ||
+                    (list_empty(&list) && i > 0))) {
+                       rc = llog_cat_cancel_arr_rec(env, llh, &lgid, i, arr);
+
+                       if (rc)
+                               CERROR("%s: can't cancel %d records: rc = %d\n",
+                                      obd->obd_name, i, rc);
+                       else
+                               CDEBUG(D_OTHER, "%s: massive records cancel id "DFID" num %d\n",
+                                      obd->obd_name, PFID(&lgid.lgl_oi.oi_fid),
+                                      i);
+                       i = 0;
+               }
+
        }
-       if (arr && i > 0) {
-               rc = llog_cat_cancel_arr_rec(env, llh, &lgid, i, arr);
-
-               if (rc)
-                       CERROR("%s: can't cancel %d records rc: %d\n",
-                              obd->obd_name, i, rc);
-               else
-                       CDEBUG(D_OTHER, "%s: massive records cancel id "DFID\
-                              " num %d\n", obd->obd_name,
-                              PFID(&lgid.lgl_oi.oi_fid), i);
-       }
+
        if (arr)
-               OBD_FREE_PTR_ARRAY_LARGE(arr, count);
+               OBD_FREE_LARGE(arr, arr_size);
 
        llog_ctxt_put(ctxt);