Whamcloud - gitweb
LU-11392 tests: check race for llog_process_thread 92/33192/3
authorAlexander Boyko <c17825@cray.com>
Tue, 18 Sep 2018 12:58:41 +0000 (08:58 -0400)
committerOleg Drokin <green@whamcloud.com>
Tue, 23 Oct 2018 05:16:37 +0000 (05:16 +0000)
The patch adds 10h test at llog_test which runs at 60a sanity.
It reproduces a race between llog_process_thread and llog_add.
The llog should be wrapped so it has old data on disk and zero at
bitmap.
1. llog_process_thread reads part of llog at buffer.
1. process a last record, checks the next record fields
2. llog_add adds a record and marks new record at bitmap
1. check bitmap flag and process the old record from buffer

Test-Parameters: testlist=sanity envdefinitions=ONLY=60a
Cray-bug-id: LUS-6287
Signed-off-by: Alexander Boyko <c17825@cray.com>
Change-Id: Ic89c81dd918d856f441df4d3257377e09b91a8cc
Reviewed-on: https://review.whamcloud.com/33192
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alexander Zarochentsev <c17826@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/obdclass/llog.c
lustre/obdclass/llog_test.c

index 77157fc..d18c609 100644 (file)
@@ -531,6 +531,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_TIME_IN_CHLOG_USER                0x1314
 #define CFS_FAIL_CHLOG_USER_REG_UNREG_RACE         0x1315
 #define OBD_FAIL_FORCE_GC_THREAD                   0x1316
+#define OBD_FAIL_LLOG_PROCESS_TIMEOUT              0x1317
 
 #define OBD_FAIL_LLITE                              0x1400
 #define OBD_FAIL_LLITE_FAULT_TRUNC_RACE             0x1401
index 7f491ac..9046c65 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/kthread.h>
 #include <llog_swab.h>
 #include <lustre_log.h>
+#include <obd_support.h>
 #include <obd_class.h>
 #include "llog_internal.h"
 /*
@@ -610,6 +611,8 @@ repeat:
                        loghandle->lgh_cur_offset = (char *)rec - (char *)buf +
                                                    chunk_offset;
 
+                       OBD_FAIL_TIMEOUT(OBD_FAIL_LLOG_PROCESS_TIMEOUT, 2);
+
                        /* if set, process the callback on this record */
                        if (ext2_test_bit(index, LLOG_HDR_BITMAP(llh))) {
                                struct llog_cookie *lgc;
index f9a0eb3..9b555cd 100644 (file)
@@ -39,6 +39,8 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
 
 #include <obd_class.h>
 #include <lustre_fid.h>
@@ -1454,6 +1456,62 @@ out:
        RETURN(rc);
 }
 
+struct llog_process_info {
+       struct llog_handle      *lpi_loghandle;
+       llog_cb_t                lpi_cb;
+       void                    *lpi_cbdata;
+       void                    *lpi_catdata;
+       int                      lpi_rc;
+       struct completion        lpi_completion;
+       const struct lu_env     *lpi_env;
+       struct task_struct      *lpi_reftask;
+};
+
+
+static int llog_test_process_thread(void *arg)
+{
+       struct llog_process_info        *lpi = arg;
+
+       llog_process(NULL, lpi->lpi_loghandle, lpi->lpi_cb, lpi->lpi_cbdata,
+                    lpi->lpi_catdata);
+
+       complete(&lpi->lpi_completion);
+
+       return 0;
+}
+
+static int cat_check_old_cb(const struct lu_env *env, struct llog_handle *llh,
+                       struct llog_rec_hdr *rec, void *data)
+{
+       struct llog_logid_rec   *lir = (struct llog_logid_rec *)rec;
+       struct lu_fid            fid = {0};
+       struct lu_fid            *prev_fid = data;
+
+       if (rec->lrh_type != LLOG_LOGID_MAGIC) {
+               CERROR("invalid record in catalog\n");
+               RETURN(-EINVAL);
+       }
+
+       logid_to_fid(&lir->lid_id, &fid);
+
+       CWARN("seeing record at index %d - "DFID" in log "DFID"\n",
+             rec->lrh_index, PFID(&fid),
+             PFID(lu_object_fid(&llh->lgh_obj->do_lu)));
+
+       if (prev_fid->f_oid > fid.f_oid) {
+               CWARN("processing old record, fail\n");
+               prev_fid->f_oid = 0xbad;
+               RETURN(LLOG_PROC_BREAK);
+       }
+
+       if (prev_fid->f_oid == 0)
+               cfs_fail_loc = OBD_FAIL_LLOG_PROCESS_TIMEOUT;
+
+       *prev_fid = fid;
+
+       RETURN(0);
+}
+
 /* test catalog wrap around */
 static int llog_test_10(const struct lu_env *env, struct obd_device *obd)
 {
@@ -1946,6 +2004,60 @@ static int llog_test_10(const struct lu_env *env, struct obd_device *obd)
 
        CWARN("10g: llh_cat_idx has also successfully wrapped!\n");
 
+       /* catalog has only one valid entry other slots has outdated
+        * records. Trying to race the llog_thread_process with llog_add
+        * llog_thread_process read buffer and loop record on it.
+        * llog_add adds a record and mark a record in bitmap.
+        * llog_thread_process process record with old data. */
+       {
+       struct llog_process_info lpi;
+       struct lu_fid test_fid = {0};
+
+       lpi.lpi_loghandle = cath;
+       lpi.lpi_cb = cat_check_old_cb;
+       lpi.lpi_catdata = NULL;
+       lpi.lpi_cbdata = &test_fid;
+       init_completion(&lpi.lpi_completion);
+
+       kthread_run(llog_test_process_thread, &lpi, "llog_test_process_thread");
+
+       msleep(1 * MSEC_PER_SEC);
+       enospc = 0;
+       eok = 0;
+       CWARN("10h: write %d more log records\n", LLOG_TEST_RECNUM);
+       for (i = 0; i < LLOG_TEST_RECNUM; i++) {
+               rc = llog_cat_add(env, cath, &lmr.lmr_hdr, NULL);
+               if (rc && rc != -ENOSPC) {
+                       CERROR("10h: write %d records failed at #%d: %d\n",
+                              LLOG_TEST_RECNUM, i + 1, rc);
+                       GOTO(out, rc);
+               }
+               /* after last added plain LLOG has filled up, all new
+                * records add should fail with -ENOSPC */
+               if (rc == -ENOSPC) {
+                       enospc++;
+               } else {
+                       enospc = 0;
+                       eok++;
+               }
+       }
+
+       if ((enospc == 0) && (enospc+eok != LLOG_TEST_RECNUM)) {
+               CERROR("10h: all last records adds should have failed with"
+                      " -ENOSPC\n");
+               GOTO(out, rc = -EINVAL);
+       }
+
+       CWARN("10h: wrote %d records then %d failed with ENOSPC\n", eok,
+             enospc);
+
+       wait_for_completion(&lpi.lpi_completion);
+
+       if (test_fid.f_oid == 0xbad) {
+               CERROR("10h: race happened, old record was processed\n");
+               GOTO(out, rc = -EINVAL);
+       }
+       }
 out:
        cfs_fail_loc = 0;
        cfs_fail_val = 0;