LU-16713 llite: writeback/commit pages under memory pressure

author Qian Yingjin <qian@ddn.com>

Tue, 6 Jun 2023 08:11:30 +0000 (15:11 +0700)

committer Oleg Drokin <green@whamcloud.com>

Tue, 26 Sep 2023 14:33:35 +0000 (14:33 +0000)
author Qian Yingjin <qian@ddn.com>
Tue, 6 Jun 2023 08:11:30 +0000 (15:11 +0700)
committer Oleg Drokin <green@whamcloud.com>
Tue, 26 Sep 2023 14:33:35 +0000 (14:33 +0000)
diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4

index 9a57e4d..558c503 100644 (file)
--- a/lustre/autoconf/lustre-core.m4
+++ b/lustre/autoconf/lustre-core.m4
@@ -3301,6 +3301,28 @@ LB_CHECK_EXPORT([delete_from_page_cache], [mm/filemap.c],
                         [delete_from_page_cache is exported])])
  ]) # LC_EXPORTS_DELETE_FROM_PAGE_CACHE
  
+
+#
+# LC_HAVE_WB_STAT_MOD
+#
+# Kernel 5.16-rc1 bd3488e7b4d61780eb3dfaca1cc6f4026bcffd48
+# mm/writeback: Rename __add_wb_stat() to wb_stat_mod()
+#
+AC_DEFUN([LC_HAVE_WB_STAT_MOD], [
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-Werror"
+LB_CHECK_COMPILE([if wb_stat_mod() exists],
+wb_stat_mode, [
+       #include <linux/backing-dev.h>
+],[
+       wb_stat_mod(NULL, WB_WRITEBACK, 1);
+],[
+       AC_DEFINE(HAVE_WB_STAT_MOD, 1,
+               [wb_stat_mod() exists])
+])
+EXTRA_KCFLAGS="$tmp_flags"
+]) # LC_HAVE_WB_STAT_MOD
+
  #
  # LC_HAVE_INVALIDATE_FOLIO
  #
@@ -4385,6 +4407,7 @@ AC_DEFUN([LC_PROG_LINUX_RESULTS], [
         LC_HAVE_SECURITY_DENTRY_INIT_WITH_XATTR_NAME_ARG
         LC_HAVE_KIOCB_COMPLETE_2ARGS
         LC_EXPORTS_DELETE_FROM_PAGE_CACHE
+       LC_HAVE_WB_STAT_MOD
  
         # 5.17
         LC_HAVE_INVALIDATE_FOLIO
diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h

index 5f92110..fbfd642 100644 (file)
--- a/lustre/include/cl_object.h
+++ b/lustre/include/cl_object.h
@@ -380,6 +380,14 @@ struct cl_object_operations {
           */
         int (*coo_attr_update)(const struct lu_env *env, struct cl_object *obj,
                                const struct cl_attr *attr, unsigned valid);
+       /**
+        * Mark the inode dirty. By this way, the inode will add into the
+        * writeback list of the corresponding @bdi_writeback, and then it will
+        * defer to write out the dirty pages to OSTs via the kernel writeback
+        * mechanism.
+        */
+       void (*coo_dirty_for_sync)(const struct lu_env *env,
+                                  struct cl_object *obj);
          /**
           * Update object configuration. Called top-to-bottom to modify object
           * configuration.
@@ -1707,14 +1715,16 @@ enum cl_io_lock_dmd {
  
  enum cl_fsync_mode {
         /** start writeback, do not wait for them to finish */
-       CL_FSYNC_NONE  = 0,
+       CL_FSYNC_NONE           = 0,
         /** start writeback and wait for them to finish */
-       CL_FSYNC_LOCAL = 1,
+       CL_FSYNC_LOCAL          = 1,
         /** discard all of dirty pages in a specific file range */
-       CL_FSYNC_DISCARD = 2,
+       CL_FSYNC_DISCARD        = 2,
         /** start writeback and make sure they have reached storage before
          * return. OST_SYNC RPC must be issued and finished */
-       CL_FSYNC_ALL   = 3
+       CL_FSYNC_ALL            = 3,
+       /** start writeback, thus the kernel can reclaim some memory */
+       CL_FSYNC_RECLAIM        = 4,
  };
  
  struct cl_io_rw_common {
@@ -2138,6 +2148,7 @@ int  cl_object_attr_get(const struct lu_env *env, struct cl_object *obj,
                         struct cl_attr *attr);
  int  cl_object_attr_update(const struct lu_env *env, struct cl_object *obj,
                             const struct cl_attr *attr, unsigned valid);
+void cl_object_dirty_for_sync(const struct lu_env *env, struct cl_object *obj);
  int  cl_object_glimpse    (const struct lu_env *env, struct cl_object *obj,
                             struct ost_lvb *lvb);
  int  cl_conf_set          (const struct lu_env *env, struct cl_object *obj,
diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h

index cc19072..1d05e87 100644 (file)
--- a/lustre/include/lustre_compat.h
+++ b/lustre/include/lustre_compat.h
@@ -663,6 +663,10 @@ static inline bool is_root_inode(struct inode *inode)
  #define ll_access_ok(ptr, len) access_ok(ptr, len)
  #endif
  
+#ifdef HAVE_WB_STAT_MOD
+#define __add_wb_stat(wb, item, amount)                wb_stat_mod(wb, item, amount)
+#endif
+
  #ifdef HAVE_SEC_RELEASE_SECCTX_1ARG
  #ifndef HAVE_LSMCONTEXT_INIT
  /* Ubuntu 5.19 */
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index c4c7b56..1eccf51 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -4809,7 +4809,8 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
         ENTRY;
  
         if (mode != CL_FSYNC_NONE && mode != CL_FSYNC_LOCAL &&
-           mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL)
+           mode != CL_FSYNC_DISCARD && mode != CL_FSYNC_ALL &&
+           mode != CL_FSYNC_RECLAIM)
                 RETURN(-EINVAL);
  
         env = cl_env_get(&refcheck);
diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c

index eba1cf4..918e867 100644 (file)
--- a/lustre/llite/llite_lib.c
+++ b/lustre/llite/llite_lib.c
@@ -1486,8 +1486,8 @@ void ll_put_super(struct super_block *sb)
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         char *profilenm = get_profile_name(sb);
         unsigned long cfg_instance = ll_get_cfg_instance(sb);
-       long ccc_count;
-       int next, force = 1, rc = 0;
+       int next, force = 1;
+
         ENTRY;
  
         if (IS_ERR(sbi))
@@ -1509,17 +1509,6 @@ void ll_put_super(struct super_block *sb)
                         force = obd->obd_force;
         }
  
-       /* Wait for unstable pages to be committed to stable storage */
-       if (force == 0) {
-               rc = l_wait_event_abortable(
-                       sbi->ll_cache->ccc_unstable_waitq,
-                       atomic_long_read(&sbi->ll_cache->ccc_unstable_nr) == 0);
-       }
-
-       ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
-       if (force == 0 && rc != -ERESTARTSYS)
-               LASSERTF(ccc_count == 0, "count: %li\n", ccc_count);
-
         /* We need to set force before the lov_disconnect in
          * lustre_common_put_super, since l_d cleans up osc's as well.
          */
diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c

index 7a6d0c3..d03a729 100644 (file)
--- a/lustre/llite/rw.c
+++ b/lustre/llite/rw.c
@@ -1555,6 +1555,7 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
         enum cl_fsync_mode mode;
         int range_whole = 0;
         int result;
+
         ENTRY;
  
         if (wbc->range_cyclic) {
@@ -1573,6 +1574,37 @@ int ll_writepages(struct address_space *mapping, struct writeback_control *wbc)
         if (wbc->sync_mode == WB_SYNC_ALL)
                 mode = CL_FSYNC_LOCAL;
  
+       if (wbc->sync_mode == WB_SYNC_NONE) {
+#ifdef SB_I_CGROUPWB
+               struct bdi_writeback *wb;
+
+               /*
+                * As it may break full stripe writes on the inode,
+                * disable periodic kupdate writeback (@wbc->for_kupdate)?
+                */
+
+               /*
+                * The system is under memory pressure and it is now reclaiming
+                * cache pages.
+                */
+               wb = inode_to_wb(inode);
+               if (wbc->for_background ||
+                   (wb->start_all_reason == WB_REASON_VMSCAN &&
+                    test_bit(WB_start_all, &wb->state)))
+                       mode = CL_FSYNC_RECLAIM;
+#else
+               /*
+                * We have no idea about writeback reason for memory reclaim
+                * WB_REASON_TRY_TO_FREE_PAGES in the old kernel such as rhel7
+                * (WB_REASON_VMSCAN in the newer kernel) ...
+                * Here set mode with CL_FSYNC_RECLAIM forcely on the old
+                * kernel.
+                */
+               if (!wbc->for_kupdate)
+                       mode = CL_FSYNC_RECLAIM;
+#endif
+       }
+
         if (ll_i2info(inode)->lli_clob == NULL)
                 RETURN(0);
  
diff --git a/lustre/llite/vvp_object.c b/lustre/llite/vvp_object.c

index be8ad1b..42a5148 100644 (file)
--- a/lustre/llite/vvp_object.c
+++ b/lustre/llite/vvp_object.c
@@ -127,6 +127,13 @@ static int vvp_attr_update(const struct lu_env *env, struct cl_object *obj,
         return 0;
  }
  
+static void vvp_dirty_for_sync(const struct lu_env *env, struct cl_object *obj)
+{
+       struct inode *inode = vvp_object_inode(obj);
+
+       __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+}
+
  static int vvp_conf_set(const struct lu_env *env, struct cl_object *obj,
                         const struct cl_object_conf *conf)
  {
@@ -291,6 +298,7 @@ static const struct cl_object_operations vvp_ops = {
         .coo_io_init      = vvp_io_init,
         .coo_attr_get     = vvp_attr_get,
         .coo_attr_update  = vvp_attr_update,
+       .coo_dirty_for_sync = vvp_dirty_for_sync,
         .coo_conf_set     = vvp_conf_set,
         .coo_prune        = vvp_prune,
         .coo_glimpse      = vvp_object_glimpse,
diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c

index b7d8ade..b8e094c 100644 (file)
--- a/lustre/mdc/mdc_dev.c
+++ b/lustre/mdc/mdc_dev.c
@@ -1193,6 +1193,16 @@ static int mdc_io_fsync_start(const struct lu_env *env,
  
         ENTRY;
  
+       if (fio->fi_mode == CL_FSYNC_RECLAIM) {
+               struct client_obd *cli = osc_cli(osc);
+
+               if (!atomic_long_read(&cli->cl_unstable_count)) {
+                       /* Stop flush when there are no unstable pages? */
+                       CDEBUG(D_CACHE, "unstable count is zero\n");
+                       RETURN(0);
+               }
+       }
+
         /* a MDC lock always covers whole object, do sync for whole
          * possible range despite of supplied start/end values.
          */
@@ -1202,19 +1212,25 @@ static int mdc_io_fsync_start(const struct lu_env *env,
                 fio->fi_nr_written += result;
                 result = 0;
         }
-       if (fio->fi_mode == CL_FSYNC_ALL) {
+       if (fio->fi_mode == CL_FSYNC_ALL || fio->fi_mode == CL_FSYNC_RECLAIM) {
+               struct osc_io *oio = cl2osc_io(env, slice);
+               struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
                 int rc;
  
-               rc = osc_cache_wait_range(env, osc, 0, CL_PAGE_EOF);
-               if (result == 0)
-                       result = rc;
+               if (fio->fi_mode == CL_FSYNC_ALL) {
+                       rc = osc_cache_wait_range(env, osc, 0, CL_PAGE_EOF);
+                       if (result == 0)
+                               result = rc;
+               }
                 /* Use OSC sync code because it is asynchronous.
                  * It is to be added into MDC and avoid the using of
                  * OST_SYNC at both MDC and MDT.
                  */
                 rc = osc_fsync_ost(env, osc, fio);
-               if (result == 0)
+               if (result == 0) {
+                       cbargs->opc_rpc_sent = 1;
                         result = rc;
+               }
         }
  
         RETURN(result);
diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c

index 718f182..e3bc29f 100644 (file)
--- a/lustre/obdclass/cl_object.c
+++ b/lustre/obdclass/cl_object.c
@@ -253,6 +253,25 @@ int cl_object_attr_update(const struct lu_env *env, struct cl_object *top,
  EXPORT_SYMBOL(cl_object_attr_update);
  
  /**
+ * Mark the inode as dirty when the inode has uncommitted (unstable) pages.
+ * Thus when the system is under momory pressure, it will trigger writeback
+ * on background to commit and unpin the pages.
+ */
+void cl_object_dirty_for_sync(const struct lu_env *env, struct cl_object *top)
+{
+       struct cl_object *obj;
+
+       ENTRY;
+
+       cl_object_for_each(obj, top) {
+               if (obj->co_ops->coo_dirty_for_sync != NULL)
+                       obj->co_ops->coo_dirty_for_sync(env, obj);
+       }
+       EXIT;
+}
+EXPORT_SYMBOL(cl_object_dirty_for_sync);
+
+/**
   * Notifies layers (bottom-to-top) that glimpse AST was received.
   *
   * Layers have to fill \a lvb fields with information that will be shipped
diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c

index 39a7f72..fe57fd8 100644 (file)
--- a/lustre/obdclass/cl_page.c
+++ b/lustre/obdclass/cl_page.c
@@ -1187,8 +1187,7 @@ struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
         spin_lock_init(&cache->ccc_lru_lock);
         INIT_LIST_HEAD(&cache->ccc_lru);
  
-       /* turn unstable check off by default as it impacts performance */
-       cache->ccc_unstable_check = 0;
+       cache->ccc_unstable_check = 1;
         atomic_long_set(&cache->ccc_unstable_nr, 0);
         init_waitqueue_head(&cache->ccc_unstable_waitq);
         mutex_init(&cache->ccc_max_cache_mb_lock);
diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c

index 516037c..5030427 100644 (file)
--- a/lustre/osc/osc_io.c
+++ b/lustre/osc/osc_io.c
@@ -957,15 +957,26 @@ EXPORT_SYMBOL(osc_fsync_ost);
  static int osc_io_fsync_start(const struct lu_env *env,
                               const struct cl_io_slice *slice)
  {
-       struct cl_io       *io  = slice->cis_io;
+       struct cl_io *io = slice->cis_io;
         struct cl_fsync_io *fio = &io->u.ci_fsync;
-       struct cl_object   *obj = slice->cis_obj;
-       struct osc_object  *osc = cl2osc(obj);
-       pgoff_t start  = fio->fi_start >> PAGE_SHIFT;
-       pgoff_t end    = fio->fi_end >> PAGE_SHIFT;
-       int     result = 0;
+       struct cl_object *obj = slice->cis_obj;
+       struct osc_object *osc = cl2osc(obj);
+       pgoff_t start = fio->fi_start >> PAGE_SHIFT;
+       pgoff_t end = fio->fi_end >> PAGE_SHIFT;
+       int result = 0;
+
         ENTRY;
  
+       if (fio->fi_mode == CL_FSYNC_RECLAIM) {
+               struct client_obd *cli = osc_cli(osc);
+
+               if (!atomic_long_read(&cli->cl_unstable_count)) {
+                       /* Stop flush when there are no unstable pages? */
+                       CDEBUG(D_CACHE, "unstable count is zero\n");
+                       RETURN(0);
+               }
+       }
+
         if (fio->fi_end == OBD_OBJECT_EOF)
                 end = CL_PAGE_EOF;
  
@@ -982,20 +993,30 @@ static int osc_io_fsync_start(const struct lu_env *env,
                 fio->fi_nr_written += result;
                 result = 0;
         }
-       if (fio->fi_mode == CL_FSYNC_ALL) {
+       if (fio->fi_mode == CL_FSYNC_ALL || fio->fi_mode == CL_FSYNC_RECLAIM) {
+               struct osc_io *oio = cl2osc_io(env, slice);
+               struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
                 int rc;
  
                 /* we have to wait for writeback to finish before we can
                  * send OST_SYNC RPC. This is bad because it causes extents
                  * to be written osc by osc. However, we usually start
                  * writeback before CL_FSYNC_ALL so this won't have any real
-                * problem. */
-               rc = osc_cache_wait_range(env, osc, start, end);
-               if (result == 0)
-                       result = rc;
+                * problem.
+                * We do not have to wait for waitback to finish in the memory
+                * reclaim environment.
+                */
+               if (fio->fi_mode == CL_FSYNC_ALL) {
+                       rc = osc_cache_wait_range(env, osc, start, end);
+                       if (result == 0)
+                               result = rc;
+               }
+
                 rc = osc_fsync_ost(env, osc, fio);
-               if (result == 0)
+               if (result == 0) {
+                       cbargs->opc_rpc_sent = 1;
                         result = rc;
+               }
         }
  
         RETURN(result);
@@ -1005,16 +1026,17 @@ void osc_io_fsync_end(const struct lu_env *env,
                       const struct cl_io_slice *slice)
  {
         struct cl_fsync_io *fio = &slice->cis_io->u.ci_fsync;
-       struct cl_object   *obj = slice->cis_obj;
+       struct cl_object *obj = slice->cis_obj;
+       struct osc_io *oio = cl2osc_io(env, slice);
+       struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
         pgoff_t start = fio->fi_start >> PAGE_SHIFT;
         pgoff_t end   = fio->fi_end >> PAGE_SHIFT;
         int result = 0;
  
         if (fio->fi_mode == CL_FSYNC_LOCAL) {
                 result = osc_cache_wait_range(env, cl2osc(obj), start, end);
-       } else if (fio->fi_mode == CL_FSYNC_ALL) {
-               struct osc_io           *oio    = cl2osc_io(env, slice);
-               struct osc_async_cbargs *cbargs = &oio->oi_cbarg;
+       } else if (cbargs->opc_rpc_sent && (fio->fi_mode == CL_FSYNC_ALL ||
+                                           fio->fi_mode == CL_FSYNC_RECLAIM)) {
  
                 wait_for_completion(&cbargs->opc_sync);
                 if (result == 0)
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index 477b2bd..2d5afc8 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -2512,6 +2512,7 @@ static int brw_interpret(const struct lu_env *env,
         struct osc_extent *tmp;
         struct client_obd *cli = aa->aa_cli;
         unsigned long transferred = 0;
+       struct cl_object *obj = NULL;
  
         ENTRY;
  
@@ -2552,7 +2553,6 @@ static int brw_interpret(const struct lu_env *env,
                 struct obdo *oa = aa->aa_oa;
                 struct cl_attr *attr = &osc_env_info(env)->oti_attr;
                 unsigned long valid = 0;
-               struct cl_object *obj;
                 struct osc_async_page *last;
  
                 last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]);
@@ -2602,8 +2602,16 @@ static int brw_interpret(const struct lu_env *env,
         OBD_SLAB_FREE_PTR(aa->aa_oa, osc_obdo_kmem);
         aa->aa_oa = NULL;
  
-       if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0)
+       if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0) {
                 osc_inc_unstable_pages(req);
+               /*
+                * If req->rq_committed is set, it means that the dirty pages
+                * have already committed into the stable storage on OSTs
+                * (i.e. Direct I/O).
+                */
+               if (!req->rq_committed)
+                       cl_object_dirty_for_sync(env, cl_object_top(obj));
+       }
  
         list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) {
                 list_del_init(&ext->oe_link);
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index f3dc745..74b0a06 100755 (executable)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -58,7 +58,12 @@ fi
  # skip cgroup tests on RHEL8.1 kernels until they are fixed
  if (( $LINUX_VERSION_CODE >= $(version_code 4.18.0) &&
        $LINUX_VERSION_CODE <  $(version_code 5.4.0) )); then
-       always_except LU-13063 411
+       always_except LU-13063 411a
+fi
+
+# skip cgroup tests for kernels < v4.18.0
+if (( $LINUX_VERSION_CODE < $(version_code 4.18.0) )); then
+       always_except LU-13063 411b
  fi
  
  #                                  5              12     8   12  15   (min)"
@@ -27472,10 +27477,11 @@ run_test 410 "Test inode number returned from kernel thread"
  
  cleanup_test411_cgroup() {
         trap 0
+       cat $1/memory.stat
         rmdir "$1"
  }
  
-test_411() {
+test_411a() {
         local cg_basedir=/sys/fs/cgroup/memory
         # LU-9966
         test -f "$cg_basedir/memory.kmem.limit_in_bytes" ||
@@ -27500,7 +27506,90 @@ test_411() {
  
         return 0
  }
-run_test 411 "Slab allocation error with cgroup does not LBUG"
+run_test 411a "Slab allocation error with cgroup does not LBUG"
+
+test_411b() {
+       local cg_basedir=/sys/fs/cgroup/memory
+       # LU-9966
+       [ -e "$cg_basedir/memory.kmem.limit_in_bytes" ] ||
+               skip "no setup for cgroup"
+       $LFS setstripe -c 2 $DIR/$tfile || error "unable to setstripe"
+       # testing suggests we can't reliably avoid OOM with a 64M limit, but it
+       # seems reasonable to ask that we have at least 128M in the cgroup
+       local memlimit_mb=256
+
+       # Create a cgroup and set memory limit
+       # (tfile is used as an easy way to get a recognizable cgroup name)
+       local cgdir=$cg_basedir/$tfile
+       mkdir $cgdir || error "cgroup mkdir '$cgdir' failed"
+       stack_trap "cleanup_test411_cgroup $cgdir" EXIT
+       echo $((memlimit_mb * 1024 * 1024)) > $cgdir/memory.limit_in_bytes
+
+       echo "writing first file"
+       # Write a file 4x the memory limit in size
+       bash -c "echo \$$ > $cgdir/tasks && dd if=/dev/zero of=$DIR/$tfile bs=1M count=$((memlimit_mb * 4))" ||
+               error "(1) failed to write successfully"
+
+       sync
+       cancel_lru_locks osc
+
+       rm -f $DIR/$tfile
+       $LFS setstripe -c 2 $DIR/$tfile || error "unable to setstripe"
+
+       # Try writing at a larger block size
+       # NB: if block size is >= 1/2 cgroup size, we sometimes get OOM killed
+       # so test with 1/4 cgroup size (this seems reasonable to me - we do
+       # need *some* memory to do IO in)
+       echo "writing at larger block size"
+       bash -c "echo \$$ > $cgdir/tasks && dd if=/dev/zero of=$DIR/$tfile bs=64M count=$((memlimit_mb * 4 / 128))" ||
+               error "(3) failed to write successfully"
+
+       sync
+       cancel_lru_locks osc
+       rm -f $DIR/$tfile
+       $LFS setstripe -c 2 $DIR/$tfile.{1..4} || error "unable to setstripe"
+
+       # Try writing multiple files at once
+       echo "writing multiple files"
+       bash -c "echo \$$ > $cgdir/tasks && dd if=/dev/zero of=$DIR/$tfile.1 bs=32M count=$((memlimit_mb * 4 / 64))" &
+       local pid1=$!
+       bash -c "echo \$$ > $cgdir/tasks && dd if=/dev/zero of=$DIR/$tfile.2 bs=32M count=$((memlimit_mb * 4 / 64))" &
+       local pid2=$!
+       bash -c "echo \$$ > $cgdir/tasks && dd if=/dev/zero of=$DIR/$tfile.3 bs=32M count=$((memlimit_mb * 4 / 64))" &
+       local pid3=$!
+       bash -c "echo \$$ > $cgdir/tasks && dd if=/dev/zero of=$DIR/$tfile.4 bs=32M count=$((memlimit_mb * 4 / 64))" &
+       local pid4=$!
+
+       wait $pid1
+       local rc1=$?
+       wait $pid2
+       local rc2=$?
+       wait $pid3
+       local rc3=$?
+       wait $pid4
+       local rc4=$?
+       if (( rc1 != 0)); then
+               error "error writing to file from $pid1"
+       fi
+       if (( rc2 != 0)); then
+               error "error writing to file from $pid2"
+       fi
+       if (( rc3 != 0)); then
+               error "error writing to file from $pid3"
+       fi
+       if (( rc4 != 0)); then
+               error "error writing to file from $pid4"
+       fi
+
+       sync
+       cancel_lru_locks osc
+
+       # These files can be large-ish (~1 GiB total), so delete them rather
+       # than leave for later cleanup
+       rm -f $DIR/$tfile.*
+       return 0
+}
+run_test 411b "confirm Lustre can avoid OOM with reasonable cgroups limits"
  
  test_412() {
         (( $MDSCOUNT > 1 )) || skip_env "needs >= 2 MDTs"
author	Qian Yingjin <qian@ddn.com>
	Tue, 6 Jun 2023 08:11:30 +0000 (15:11 +0700)
committer	Oleg Drokin <green@whamcloud.com>
	Tue, 26 Sep 2023 14:33:35 +0000 (14:33 +0000)
lustre/autoconf/lustre-core.m4		patch \| blob \| history
lustre/include/cl_object.h		patch \| blob \| history
lustre/include/lustre_compat.h		patch \| blob \| history
lustre/llite/file.c		patch \| blob \| history
lustre/llite/llite_lib.c		patch \| blob \| history
lustre/llite/rw.c		patch \| blob \| history
lustre/llite/vvp_object.c		patch \| blob \| history
lustre/mdc/mdc_dev.c		patch \| blob \| history
lustre/obdclass/cl_object.c		patch \| blob \| history
lustre/obdclass/cl_page.c		patch \| blob \| history
lustre/osc/osc_io.c		patch \| blob \| history
lustre/osc/osc_request.c		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history