Whamcloud - gitweb
LU-11392 llog: fix race llog_process_thread vs llog_add 93/33193/3
authorAlexander Boyko <c17825@cray.com>
Tue, 18 Sep 2018 13:13:36 +0000 (09:13 -0400)
committerOleg Drokin <green@whamcloud.com>
Fri, 5 Oct 2018 22:25:55 +0000 (22:25 +0000)
The llog should be wrapped so it has old data on disk and zero at
bitmap.
  1. llog_process_thread reads part of llog at buffer.
  1. process a last record, checks the next record fields
  2. llog_add adds a record and marks new record at bitmap
  1. check bitmap flag and process the old record from buffer

After LU-7001 small window for the race still exists. It locates
between partial_chunk check and ext2_test_bit.
The patch fixes this race.

Cray-bug-id: LUS-6287
Signed-off-by: Alexander Boyko <c17825@cray.com>
Change-Id: I45a742af0c987aa159a22a3255a4fb9d430b6a84
Reviewed-on: https://review.whamcloud.com/33193
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alexander Zarochentsev <c17826@cray.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/obdclass/llog.c

index 53887ae..dc33905 100644 (file)
@@ -531,16 +531,9 @@ repeat:
                        CDEBUG(D_OTHER, "after swabbing, type=%#x idx=%d\n",
                               rec->lrh_type, rec->lrh_index);
 
-                       /* the bitmap could be changed during processing
-                        * records from the chunk. For wrapped catalog
-                        * it means we can read deleted record and try to
-                        * process it. Check this case and reread the chunk. */
-
                        /* for partial chunk the end of it is zeroed, check
                         * for index 0 to distinguish it. */
-                       if ((partial_chunk && rec->lrh_index == 0) ||
-                            (index == lh_last_idx &&
-                             lh_last_idx != (loghandle->lgh_last_idx + 1))) {
+                       if (partial_chunk && rec->lrh_index == 0) {
                                /* concurrent llog_add() might add new records
                                 * while llog_processing, check this is not
                                 * the case and re-read the current chunk
@@ -616,7 +609,23 @@ repeat:
                                struct llog_cookie *lgc;
                                __u64   tmp_off;
                                int     tmp_idx;
+                       /* the bitmap could be changed during processing
+                        * records from the chunk. For wrapped catalog
+                        * it means we can read deleted record and try to
+                        * process it. Check this case and reread the chunk.
+                        * Checking the race with llog_add the bit is set
+                        * after incrementation of lgh_last_idx */
+                               if (index == lh_last_idx &&
+                                   lh_last_idx !=
+                                   (loghandle->lgh_last_idx + 1)) {
+                                       /* save offset inside buffer for
+                                        *  the re-read */
+                                       buf_offset = (char *)rec - (char *)buf;
+                                       cur_offset = chunk_offset;
+                                       repeated = true;
+                                       goto repeat;
 
+                               }
                                if (lti != NULL) {
                                        lgc = &lti->lgi_cookie;
                                        /* store lu_env for recursive calls */