From 99d1f12c7c5ec2b4a8945559cffe3ef0c77093c0 Mon Sep 17 00:00:00 2001 From: Artem Blagodarenko Date: Wed, 7 Sep 2022 08:46:54 -0400 Subject: [PATCH] LU-15581 utils: add check_iam util A tool for parsing and checking IAM files. And a test to check utility works without segfaults for corrupted files. To process all files in OI catalog: for f in /root/md65_ldiskfs/oi.16.*; do echo $f; lustre/utils/check_iam -v $f; done > output.txt 2>&1 Test-Parameters: trivial testlist=conf-sanity env=ONLY=134 HPE-bug-id: LUS-10501 Change-Id: I7a8e83bc2720040e48c953511801816fd3dd6288 Signed-off-by: Artem Blagodarenko Signed-off-by: Alexander Boyko Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/46575 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Artem Blagodarenko Reviewed-by: Oleg Drokin --- lustre/include/lustre/libiam.h | 98 +++++++- lustre/tests/conf-sanity.sh | 39 ++++ lustre/utils/Makefile.am | 4 +- lustre/utils/check_iam.c | 520 +++++++++++++++++++++++++++++++++++++++++ lustre/utils/libiam.c | 68 +----- 5 files changed, 660 insertions(+), 69 deletions(-) create mode 100644 lustre/utils/check_iam.c diff --git a/lustre/include/lustre/libiam.h b/lustre/include/lustre/libiam.h index b0f20f7..3e400ec 100644 --- a/lustre/include/lustre/libiam.h +++ b/lustre/include/lustre/libiam.h @@ -50,11 +50,105 @@ #define DX_FMT_NAME_LEN 16 +#define IAM_LFIX_ROOT_MAGIC 0xbedabb1edULL +#define IAM_LVAR_ROOT_MAGIC 0xb01dfaceULL + +enum { + IAM_LEAF_HEADER_MAGIC = 0x1976, + IAM_LVAR_LEAF_MAGIC = 0x1973, + IAM_IDLE_HEADER_MAGIC = 0x7903 +}; + enum iam_fmt_t { - FMT_LFIX, - FMT_LVAR + FMT_LFIX = 0, + FMT_LVAR = 1 }; +struct dx_countlimit { + u_int16_t limit; + u_int16_t count; +} __attribute__((packed)); + +struct iam_lfix_root { + u_int64_t ilr_magic; + u_int16_t ilr_keysize; + u_int16_t ilr_recsize; + u_int16_t ilr_ptrsize; + u_int8_t ilr_indirect_levels; + u_int8_t ilr_padding; + struct dx_countlimit limit; + u_int32_t idle_blocks; + u_int8_t ilr_paddingdd2[12]; + unsigned char entries[0]; +} __attribute__((packed)); + +struct iam_leaf_head { + u_int16_t ill_magic; + u_int16_t ill_count; +} __attribute__((packed)); + +struct lvar_leaf_header { + u_int16_t vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */ + u_int16_t vlh_used; /* used bytes, including header */ +} __attribute__((packed)); + +/* + * Header structure to record idle blocks. + */ +struct iam_idle_head { + __le16 iih_magic; + __le16 iih_count; /* how many idle blocks in this head */ + __le32 iih_next; /* next head for idle blocks */ + __le32 iih_blks[0]; +} __attribute__((packed)); + +struct iam_index_head { + struct dx_countlimit limit; + u_int8_t paddingdd[16]; + unsigned char entries[0]; +} __attribute__((packed)); + +struct lvar_root { + u_int32_t vr_magic; + u_int16_t vr_recsize; + u_int16_t vr_ptrsize; + u_int8_t vr_indirect_levels; + u_int8_t vr_padding0; + u_int16_t vr_padding1; +} __attribute__((packed)); + + +struct lvar_leaf_entry { + u_int32_t vle_hash; + u_int16_t vle_keysize; + u_int8_t vle_key[0]; +} __attribute__((packed)); + +enum { + LVAR_PAD = 4, + LVAR_ROUND = LVAR_PAD - 1 +}; + +static inline unsigned int node_limit(unsigned int node_gap, + unsigned int block_size, + unsigned int size) +{ + return (block_size - node_gap) / size; +} + +static inline unsigned int root_limit(unsigned int root_gap, + unsigned int node_gap, + unsigned int block_size, + unsigned int size) +{ + unsigned int limit; + + limit = (block_size - root_gap) / size; + if (limit == node_limit(node_gap, block_size, size)) + limit--; + return limit; +} + struct iam_uapi_info { __u16 iui_keysize; __u16 iui_recsize; diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 61bf943..3e7be0f 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -10359,6 +10359,45 @@ test_133() { } run_test 133 "stripe QOS: free space balance in a pool" +test_134() { + [ "$mds1_FSTYPE" == "ldiskfs" ] || skip "ldiskfs only test" + local errors + local rc + local mdt_dev=$(facet_device mds1) + local tmp_dir=$TMP/$tdir + local out=$tmp_dir/check_iam.txt + local CHECK_IAM=${CHECK_IAM:-$(do_facet mds1 "which check_iam 2> /dev/null || true")} + + [[ -n "$CHECK_IAM" ]] || skip "check_iam not found" + + mkdir -p $tmp_dir + for ((i=0; i<64; i++)); do + local f=oi.16.$i + #cmd introduce a random corruption to IAM file + local cmd="dd if=/dev/urandom of=$tmp_dir/$f bs=2 conv=notrunc count=1 seek=$((RANDOM % 36))" + do_facet mds1 "mkdir -p $tmp_dir; \ + $DEBUGFS -c -R 'dump $f $tmp_dir/$f' $mdt_dev 2>&1; \ + $CHECK_IAM -v $tmp_dir/$f 2>&1; \ + echo $cmd; eval $cmd 2>/dev/null; + $CHECK_IAM -v $tmp_dir/$f 2>&1; echo \\\$?" >> $out + done + + tail -n50 $out + + stack_trap "rm -rf $tmp_dir && do_facet mds1 rm -rf $tmp_dir" EXIT + + rc=$(grep -c "fault\|except" $out) + (( rc == 0 )) || { cat $out && + error "check_iam failed with fault or exception $rc"; } + + rc=$(grep -c "^255" $out) + error=$(grep -c "FINISHED WITH ERRORS" $out) + + (( rc == error )) || { cat $out && + error "check_iam errcode does not fit with errors $rc $error"; } +} +run_test 134 "check_iam works without faults" + # # (This was sanity/802a) # diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 6a201fe..e1c078c 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -44,7 +44,7 @@ endif # TESTS if SERVER sbin_PROGRAMS += mkfs.lustre tunefs.lustre lr_reader lshowmount \ ll_decode_filter_fid llog_reader l_tunedisk \ - ofd_access_log_reader + ofd_access_log_reader check_iam endif if LIBPTHREAD sbin_PROGRAMS += lhsmtool_posix @@ -148,6 +148,8 @@ llog_reader_SOURCES = llog_reader.c llog_reader_LDADD := liblustreapi.la llog_reader_DEPENDENCIES := liblustreapi.la +check_iam_SOURCES = check_iam.c + lr_reader_SOURCES = lr_reader.c ofd_access_log_reader_SOURCES = \ diff --git a/lustre/utils/check_iam.c b/lustre/utils/check_iam.c new file mode 100644 index 0000000..0888dbf --- /dev/null +++ b/lustre/utils/check_iam.c @@ -0,0 +1,520 @@ +/* + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ +/* + * + * User-level tool to check iam files sanity. + * + * Author: Artem Blagodarenko + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +static int verbose; + +enum { + ROOT_NODE, + INDEX_NODE, + LEAF_NODE, + IDLE_NODE +}; + +struct node_info { + int referenced; + int recycled; + int node_type; +}; + +void usage(char *str) +{ + printf("usage: %s [-h] [-v] iam_file\n", str); +} + +struct iam_params { + char *filename; + int blocksize; + int fmt; + int keysize; + int recsize; + int ptrsize; + int indirect_levels; + int root_gap; + int node_gap; + unsigned long idle_blocks; + unsigned long current_block; + unsigned long long file_size; + unsigned long blocks_count; + struct node_info *node_info; + int rc; +}; + +int check_idle_blocks(char *buf, struct iam_params *params) +{ + struct iam_idle_head *idle; + int i; + + idle = (struct iam_idle_head *)buf; + + if (idle->iih_magic != __cpu_to_le16(IAM_IDLE_HEADER_MAGIC)) { + printf("Wrong magic 0x%x\n", idle->iih_magic); + return -1; + } + + if (verbose) { + printf(", %i blocks, next table in block %i, idle blocks: ", + __le16_to_cpu(idle->iih_count), + __le32_to_cpu(idle->iih_next)); + } + + for (i = 0; i < __le32_to_cpu(idle->iih_count); i++) { + unsigned int blk = __le32_to_cpu(idle->iih_blks[i]); + + if (verbose) + printf("%i, ", blk); + if (blk >= params->blocks_count) { + printf("Pointer to the idle block (%i) outside the file\n", + blk); + params->rc = -1; + } else { + if (params->node_info[blk].referenced && verbose) + printf("Reference to recycled node (%i)\n", + blk); + params->node_info[blk].recycled = 1; + } + } + + if (verbose) + printf("\n"); + + return 0; +} + +static int check_entries(unsigned char *entries, size_t size, int count, + struct iam_params *params) +{ + unsigned int ptr; + int i, j, rc; + + for (i = 0; i < count; i++) { + rc = 0; + size -= (params->keysize + params->ptrsize); + + if (size < 0) { + if (verbose) + printf("index outside of buffer\n"); + + return -1; + } + + if (verbose) + printf("key:"); + + for (j = 0; j < params->keysize; j++, entries++) + if (verbose) + printf("%02x", *entries); + + ptr = __le32_to_cpu(*((__le32 *)entries)); + + if (ptr >= params->blocks_count) { + params->rc = -1; + rc = -1; + } + if (verbose) + printf(", ptr: %u%s\n", ptr, rc ? " wrong" : ""); + + entries += params->ptrsize; + + if (rc) + continue; + + if (params->node_info[ptr].recycled && verbose) { + printf("Reference to recycled node (%u) from node %lu\n", + ptr, params->current_block); + } + params->node_info[ptr].referenced = 1; + } + + return 0; +} + +static int check_index(char *buf, struct iam_params *params) +{ + struct iam_index_head *index; + int counted_limit; + struct dx_countlimit *limit; + + index = (struct iam_index_head *)buf; + limit = &index->limit; + + params->node_info[params->current_block].node_type = INDEX_NODE; + + if (verbose) + printf("Index block, count %i, limit %i\n", + __le16_to_cpu(limit->count), + __le16_to_cpu(limit->limit)); + + counted_limit = node_limit(params->node_gap, params->blocksize, + params->keysize + params->ptrsize); + + if (__le16_to_cpu(limit->limit) != counted_limit) { + fprintf(stderr, "Wrong limit %i, counted limit %i\n", + __le16_to_cpu(limit->limit), counted_limit); + return -1; + } + + + if (__le16_to_cpu(limit->count) > __le16_to_cpu(limit->limit)) { + printf("More elements (%i) then limit (%i)\n", + __le16_to_cpu(limit->count), + __le16_to_cpu(limit->limit)); + return -1; + } + + /* count - 1, because limit is entry itself */ + if (check_entries(index->entries, + params->blocksize - offsetof(struct iam_index_head, + entries), + __le16_to_cpu(limit->count) - 1, params)) { + printf("Broken entries\n"); + return -1; + } + + return 0; +} + +static int check_root(char *buf, size_t size, struct iam_params *params) +{ + struct iam_lfix_root *root; + unsigned int counted_limit; + int min; + struct dx_countlimit *limit; + + if (verbose) + printf("Root format "); + + root = (struct iam_lfix_root *)buf; + if (root->ilr_magic == __cpu_to_le64(IAM_LFIX_ROOT_MAGIC)) { + params->fmt = FMT_LFIX; + if (verbose) + printf("LFIX,"); + } else if (root->ilr_magic == __cpu_to_le64(IAM_LVAR_ROOT_MAGIC)) { + params->fmt = FMT_LVAR; + if (verbose) + printf("LVAR,"); + } else { + printf("Bad magic %llu\n", __le64_to_cpu(root->ilr_magic)); + return -1; + } + + limit = &root->limit; + + params->keysize = __le16_to_cpu(root->ilr_keysize); + params->recsize = __le16_to_cpu(root->ilr_recsize); + params->ptrsize = __le16_to_cpu(root->ilr_ptrsize); + params->indirect_levels = root->ilr_indirect_levels; + + params->node_info[0].referenced = 1; //self referance + params->node_info[0].node_type = ROOT_NODE; + + params->idle_blocks = __le32_to_cpu(root->idle_blocks); + if (params->idle_blocks >= params->blocks_count) { + printf("Idle blocks number (%lu) is out of blocks range (%lu)\n", + params->idle_blocks, params->blocks_count); + params->rc = -1; + } else { + params->node_info[params->idle_blocks].referenced = 1; + params->node_info[params->idle_blocks].node_type = IDLE_NODE; + } + + if (verbose) { + printf("Idle blocks block number %lu\n", params->idle_blocks); + printf("keysize %i, recsize %i, ptrsize %i, indirect_levels %i\n", + params->keysize, params->recsize, params->ptrsize, + params->indirect_levels); + } + + if (params->ptrsize != 4 && params->ptrsize != 8) { + printf("Invalid ptrsize (%i). Only 4 and 8 are supported\n", + params->ptrsize); + return -1; + } + + if (params->keysize < 1 || params->recsize < 0) { + printf("Too small key(%i) or recorod(%i)\n", + params->keysize, params->recsize); + return -1; + } + + if ((params->keysize + params->recsize + + (int)sizeof(struct iam_leaf_head)) > (params->blocksize / 3)) { + printf("Too large record + key or too small block, %i, %i\n", + (params->keysize + params->recsize + + (int)sizeof(struct iam_leaf_head)), + params->blocksize); + return -1; + } + + counted_limit = root_limit(params->root_gap, params->node_gap, + params->blocksize, + params->keysize + params->ptrsize); + + + if (__le16_to_cpu(limit->limit) != counted_limit) { + fprintf(stderr, "Wrong limit %i, counted limit %i\n", + __le16_to_cpu(limit->limit), counted_limit); + params->rc = -1; + } + + min = (counted_limit < __le16_to_cpu(limit->limit)) ? + counted_limit : __le16_to_cpu(limit->limit); + + if (__le16_to_cpu(limit->count) > __le16_to_cpu(limit->limit)) { + printf("More elements (%i) then limit (%i)\n", + __le16_to_cpu(root->limit.count), + __le16_to_cpu(root->limit.limit)); + params->rc = -1; + } + + min = (__le16_to_cpu(limit->count) < min) ? + __le16_to_cpu(limit->count) : min; + + + if (verbose) + printf("count %i, limit %i\n", + __le16_to_cpu(root->limit.count), + __le16_to_cpu(root->limit.limit)); + + /* cound - 1, because limit is entry itself */ + if (check_entries(root->entries, + size - offsetof(struct iam_lfix_root, entries), + min - 1, params)) { + printf("Broken entries\n"); + return -1; + } + + return 0; +} + +static int check_block(char *buf, struct iam_params *params) +{ + struct iam_leaf_head *head; + + head = (struct iam_leaf_head *)buf; + + if (verbose) + printf("Block %lu,", params->current_block); + + switch (head->ill_magic) { + case __cpu_to_le16(IAM_LEAF_HEADER_MAGIC): + if (verbose) + printf("FIX leaf,"); + params->node_info[params->current_block].node_type = + LEAF_NODE; + break; + case __cpu_to_le16(IAM_LVAR_ROOT_MAGIC): + if (verbose) + printf("LVAR leaf,"); + break; + case __cpu_to_le16(IAM_IDLE_HEADER_MAGIC): + if (verbose) + printf("IDLE block"); + + params->node_info[params->current_block].referenced = 1; + + if (check_idle_blocks(buf, params)) { + printf("Broken idle blocks\n"); + params->rc = -1; + } + break; + default: + if (check_index(buf, params)) { + printf("Broken index node\n"); + params->rc = -1; + } + break; + } + if (verbose) + printf("count %i\n", head->ill_count); + + return 0; +} + +static void print_node_type(int type) +{ + switch (type) { + case ROOT_NODE: + printf("ROOT\n"); + break; + case INDEX_NODE: + printf("INDEX\n"); + break; + case LEAF_NODE: + printf("LEAF\n"); + break; + case IDLE_NODE: + printf("IDLE\n"); + break; + default: + printf("UNKNOWN %i\n", type); + break; + } +} +static int check_unconnected(struct iam_params *params) +{ + unsigned long i; + int rc = 0; + + for (i = 0; i < params->blocks_count; i++) { + if (params->node_info[i].referenced && + params->node_info[i].recycled) { + printf("Node %lu referenced and recycled. FAIL, ", i); + print_node_type(params->node_info[i].node_type); + } + + if (!params->node_info[i].referenced && + !params->node_info[i].recycled) { + printf("Unconnected node %lu. FAIL, ", i); + print_node_type(params->node_info[i].node_type); + rc = -1; + } + } + return rc; +} +int main(int argc, char **argv) +{ + struct iam_params params; + int rc = 0; + int opt; + void *buf; + int fd; + struct stat sb; + + params.rc = 0; + do { + opt = getopt(argc, argv, "hv"); + switch (opt) { + case 'v': + verbose++; + case -1: + break; + default: + fprintf(stderr, "Unable to parse options."); + case 'h': + printf("HERE\n"); + usage(argv[0]); + return 0; + } + } while (opt != -1); + + if (optind >= argc) { + fprintf(stderr, "Expected filename after options\n"); + return -1; + } + + params.filename = argv[optind]; + params.blocksize = 4096; + params.current_block = 0; + params.root_gap = sizeof(struct iam_lfix_root); + params.node_gap = 0; + + fd = open(params.filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Can not open file %s, %s\n", + params.filename, strerror(errno)); + return -1; + } + + if (fstat(fd, &sb) == -1) { + fprintf(stderr, "Error stat file.\n"); + close(fd); + return -1; + } + params.file_size = (unsigned long long)sb.st_size; + params.blocks_count = params.file_size / params.blocksize + + ((params.file_size % params.blocksize) ? 1 : 0); + + if (verbose) + printf("Filesize %llu, blocks count %lu\n", params.file_size, + params.blocks_count); + buf = malloc(params.blocksize); + if (buf == NULL) { + fprintf(stderr, "Can't allocate buffer\n"); + close(fd); + return -1; + } + params.node_info = malloc(params.blocks_count * + sizeof(struct node_info)); + memset(params.node_info, 0, + params.blocks_count * sizeof(struct node_info)); + + /* Read root block */ + if (read(fd, buf, params.blocksize) < params.blocksize) { + fprintf(stderr, "Can't read root block\n"); + params.rc = -1; + goto err; + } + + rc = check_root(buf, params.blocksize, ¶ms); + if (rc) { + printf("Root node is insane\n"); + params.rc = rc; + } + + params.current_block++; + + /* Read all another blocks */ + while (read(fd, buf, params.blocksize)) { + rc = check_block(buf, ¶ms); + if (rc) { + printf("Node with offset 0x%lx in %s is broken\n", + params.current_block * params.blocksize, + params.filename); + params.rc = rc; + } + params.current_block++; + } + + rc = check_unconnected(¶ms); + if (rc) + printf("There are unconnected nodes\n"); +err: + if (!(rc ? rc : params.rc)) + printf("NO ERRORS\n"); + else + printf("FINISHED WITH ERRORS\n"); + + free(params.node_info); + free(buf); + close(fd); + + return rc ?: params.rc; +} diff --git a/lustre/utils/libiam.c b/lustre/utils/libiam.c index af5472c..985f2c2 100644 --- a/lustre/utils/libiam.c +++ b/lustre/utils/libiam.c @@ -52,59 +52,6 @@ typedef __u32 lvar_hash_t; -enum { - IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL, - IAM_LVAR_ROOT_MAGIC = 0xb01dface -}; - -struct iam_lfix_root { - u_int64_t ilr_magic; - u_int16_t ilr_keysize; - u_int16_t ilr_recsize; - u_int16_t ilr_ptrsize; - u_int16_t ilr_indirect_levels; -}; - -enum { - IAM_LEAF_HEADER_MAGIC = 0x1976, - IAM_LVAR_LEAF_MAGIC = 0x1973 -}; - -struct iam_leaf_head { - u_int16_t ill_magic; - u_int16_t ill_count; -}; - -struct dx_countlimit { - u_int16_t limit; - u_int16_t count; -}; - -struct lvar_leaf_header { - u_int16_t vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */ - u_int16_t vlh_used; /* used bytes, including header */ -}; - -struct lvar_root { - u_int32_t vr_magic; - u_int16_t vr_recsize; - u_int16_t vr_ptrsize; - u_int8_t vr_indirect_levels; - u_int8_t vr_padding0; - u_int16_t vr_padding1; -}; - -struct lvar_leaf_entry { - u_int32_t vle_hash; - u_int16_t vle_keysize; - u_int8_t vle_key[0]; -}; - -enum { - LVAR_PAD = 4, - LVAR_ROUND = LVAR_PAD - 1 -}; - /** * Stores \a val at \a dst, where the latter is possibly unaligned. Uses * memcpy(). This macro is needed to avoid dependency of user level tools on @@ -117,21 +64,10 @@ enum { memcpy(dst, &__val, sizeof *(dst)); \ }) -static int root_limit(int rootgap, int blocksize, int size) -{ - int limit; - int nlimit; - - limit = (blocksize - rootgap) / size; - nlimit = blocksize / size; - if (limit == nlimit) - limit--; - return limit; -} static int lfix_root_limit(int blocksize, int size) { - return root_limit(sizeof(struct iam_lfix_root), blocksize, size); + return root_limit(sizeof(struct iam_lfix_root), 0, blocksize, size); } static void lfix_root(void *buf, @@ -199,7 +135,7 @@ static void lfix_leaf(void *buf, static int lvar_root_limit(int blocksize, int size) { - return root_limit(sizeof(struct lvar_root), blocksize, size); + return root_limit(sizeof(struct lvar_root), 0, blocksize, size); } static void lvar_root(void *buf, -- 1.8.3.1