Whamcloud - gitweb
LU-15938 llog: llog_reader to detect more corruptions 34/47934/6
authorMikhail Pershin <mpershin@whamcloud.com>
Tue, 12 Jul 2022 06:40:38 +0000 (09:40 +0300)
committerOleg Drokin <green@whamcloud.com>
Mon, 8 Aug 2022 19:53:52 +0000 (19:53 +0000)
Improve llog_reader to determine more corruptions and report
errors
 - notify if llog bitmap has bits set with no records in llog
 - compare header records count with amount of records really
   found
 - fix amount of records to output, preventing wrong output of
   NOT SET record
 - list missing records in gap if found
 - count all errors found, add prefix 'error:' in output for
   better output processing by third-party scripts
 - don't exit immediately in case of error but continue if
   possible and output all read valid data

Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: Ic47dc6bb6cbdd9db6f888a0b892254403a628912
Reviewed-on: https://review.whamcloud.com/47934
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/utils/llog_reader.c

index 153a471..f0fb503 100644 (file)
@@ -219,7 +219,7 @@ int llog_pack_buffer(int fd, struct llog_log_hdr **llog,
        char *file_buf = NULL, *recs_buf = NULL;
        struct llog_rec_hdr **recs_pr = NULL;
        char *ptr = NULL;
-       int count;
+       int count, errors = 0;
        int i, last_idx;
 
        *recs = NULL;
@@ -229,7 +229,7 @@ int llog_pack_buffer(int fd, struct llog_log_hdr **llog,
        if (rc < 0) {
                rc = -errno;
                llapi_error(LLAPI_MSG_ERROR, rc, "Got file stat error.");
-               goto out;
+               return rc;
        }
 
        file_size = st.st_size;
@@ -238,14 +238,14 @@ int llog_pack_buffer(int fd, struct llog_log_hdr **llog,
                            "File too small for llog header: want=%zd got=%lld",
                            sizeof(**llog), file_size);
                rc = -EIO;
-               goto out;
+               return rc;
        }
 
        file_buf = malloc(file_size);
        if (!file_buf) {
                rc = -ENOMEM;
                llapi_error(LLAPI_MSG_ERROR, rc, "Memory Alloc for file_buf.");
-               goto out;
+               return rc;
        }
        *llog = (struct llog_log_hdr *)file_buf;
 
@@ -300,65 +300,89 @@ int llog_pack_buffer(int fd, struct llog_log_hdr **llog,
                int idx;
                unsigned long offset;
 
-               if (ptr + sizeof(**recs_pr) > file_buf + file_size) {
-                       rc = -EINVAL;
-                       llapi_error(LLAPI_MSG_ERROR, rc,
-                                   "The log is corrupt (too big at %d)", i);
-                       goto clear_recs_buf;
+               offset = (unsigned long)ptr - (unsigned long)file_buf;
+               if (offset + sizeof(**recs_pr) > file_size) {
+                       printf("error: rec header is trimmed by EOF, last idx #%d offset %lu\n",
+                              last_idx, offset);
+                       errors++;
+                       break;
                }
-
                cur_rec = (struct llog_rec_hdr *)ptr;
                idx = __le32_to_cpu(cur_rec->lrh_index);
-               recs_pr[i] = cur_rec;
-               offset = (unsigned long)ptr - (unsigned long)file_buf;
                if (cur_rec->lrh_len == 0 ||
                    cur_rec->lrh_len > (*llog)->llh_hdr.lrh_len) {
                        cur_rec->lrh_len = (*llog)->llh_hdr.lrh_len -
                                offset % (*llog)->llh_hdr.lrh_len;
                        printf("off %lu skip %u to next chunk.\n", offset,
                               cur_rec->lrh_len);
-                       i--;
                } else if (ext2_test_bit(idx, LLOG_HDR_BITMAP(*llog))) {
                        printf("rec #%d type=%x len=%u offset %lu\n", idx,
                               cur_rec->lrh_type, cur_rec->lrh_len, offset);
+                       recs_pr[i] = cur_rec;
+                       i++;
                } else {
                        cur_rec->lrh_id = CANCELLED;
                        if (cur_rec->lrh_type == LLOG_PAD_MAGIC &&
-                          ((offset + cur_rec->lrh_len) & 0x7) != 0)
-                               printf("rec #%d wrong padding len=%u offset %lu to 0x%lx\n",
+                          ((offset + cur_rec->lrh_len) & 0x7) != 0) {
+                               printf("error: rec #%d wrong padding len=%u offset %lu to 0x%lx\n",
                                       idx, cur_rec->lrh_len, offset,
                                       offset + cur_rec->lrh_len);
+                               errors++;
+                       }
                        /* The header counts only set records */
-                       i--;
                }
-               if (last_idx + 1 != idx) {
-                       printf("Previous index is %d, current %d, offset %lu\n",
-                              last_idx, idx, offset);
+
+               while (++last_idx < idx) {
+                       printf("error: rec #%d is missing%s set in bitmap\n",
+                              last_idx,
+                              ext2_test_bit(last_idx, LLOG_HDR_BITMAP(*llog)) ?
+                              " but" : ", not");
+                       errors++;
+               }
+               /* index may decrease only when crosses index zero in catalog */
+               if (last_idx > idx && idx != 1) {
+                       printf("error: rec #%d index is less than last #%d\n",
+                              idx, last_idx);
+                       errors++;
+                       last_idx = idx;
                }
-               last_idx = idx;
 
                ptr += __le32_to_cpu(cur_rec->lrh_len);
                if ((ptr - file_buf) > file_size) {
-                       printf("The log is corrupt (too big at %d)\n", i);
-                       rc = -EINVAL;
-                       goto clear_recs_buf;
+                       printf("error: rec #%d is trimmed by EOF, offset %lu\n",
+                              idx, offset);
+                       errors++;
+                       break;
                }
-               i++;
        }
 
+       while (++last_idx < LLOG_HDR_BITMAP_SIZE((*llog))) {
+               if (ext2_test_bit(last_idx, LLOG_HDR_BITMAP(*llog))) {
+                       printf("error: rec #%d is set in bitmap only\n",
+                              last_idx);
+                       errors++;
+               }
+       }
+       if (i != recs_num)
+               printf("error: header reports %d records but %d were found\n",
+                      recs_num, i);
+
+       /* don't set rc to output what was found */
+       if (errors)
+               llapi_error(LLAPI_MSG_NO_ERRNO, 0,
+                           "The llog is corrupted, %d errors found", errors);
+
        *recs = recs_pr;
-       *recs_number = recs_num;
-out:
-       return rc;
+       /* don't try to output more recs than was found or allocated */
+       *recs_number = i > recs_num ? recs_num : i;
 
-clear_recs_buf:
-       free(recs_buf);
+       return 0;
 
 clear_file_buf:
        free(file_buf);
-
        *llog = NULL;
-       goto out;
+
+       return rc;
 }
 
 void llog_unpack_buffer(int fd, struct llog_log_hdr *llog_buf,