From bbf0017fdea52f094c190f14fd82b9f5d0902c90 Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Thu, 18 Aug 2022 22:32:36 -0400 Subject: [PATCH] LU-16096 recovery: upgrade reply data after recovery finish As the batched RPC protocol will change the disk format of the client reply data "REPLY_DATA" for recovery, thus we need to handle the compatibility during upgrade carefully for this new format change of the reply data. The solution is as follows: When the client recovery has finished, the target truncates the reply data file with zero size and rewrite the header to use the new magic and reply data record size. And then new reply data records will be written in the new format. Enable the test case conf-sanity/32, 108 as the compatibility issue is fixed. This patch also fixes the usage of struct lsd_reply_data in lustre/utils/lr_reader.c to support both struct versions. Signed-off-by: Qian Yingjin Change-Id: I26921d41915b8cad2d913e15f502f4543180c5c6 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/48261 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Mikhail Pershin Reviewed-by: Oleg Drokin --- lustre/include/uapi/linux/lustre/lustre_disk.h | 20 ++++- lustre/target/tgt_lastrcvd.c | 107 +++++++++++++++++++------ lustre/tests/conf-sanity.sh | 4 +- lustre/utils/lr_reader.c | 44 +++++++--- 4 files changed, 135 insertions(+), 40 deletions(-) diff --git a/lustre/include/uapi/linux/lustre/lustre_disk.h b/lustre/include/uapi/linux/lustre/lustre_disk.h index 8bbd2bd..674abed 100644 --- a/lustre/include/uapi/linux/lustre/lustre_disk.h +++ b/lustre/include/uapi/linux/lustre/lustre_disk.h @@ -209,7 +209,15 @@ struct lsd_client_data { * The lrd_client_gen field is assigned with lcd_generation value * to allow identify which client the reply data belongs to. */ -struct lsd_reply_data { +struct lsd_reply_data_v1 { + __u64 lrd_transno; /* transaction number */ + __u64 lrd_xid; /* transmission id */ + __u64 lrd_data; /* per-operation data */ + __u32 lrd_result; /* request result */ + __u32 lrd_client_gen; /* client generation */ +}; + +struct lsd_reply_data_v2 { __u64 lrd_transno; /* transaction number */ __u64 lrd_xid; /* transmission id */ __u64 lrd_data; /* per-operation data */ @@ -219,13 +227,19 @@ struct lsd_reply_data { __u32 lrd_padding[7]; /* unused fields, total size is 8X __u64 */ }; +#define lsd_reply_data lsd_reply_data_v2 + /* Header of the reply_data file */ -#define LRH_MAGIC 0xbdabda01 +#define LRH_MAGIC_V1 0xbdabda01 +#define LRH_MAGIC_V2 0xbdabda02 +#define LRH_MAGIC LRH_MAGIC_V2 + +/* Don't change the header size for compatibility. */ struct lsd_reply_header { __u32 lrh_magic; __u32 lrh_header_size; __u32 lrh_reply_size; - __u8 lrh_pad[sizeof(struct lsd_reply_data) - 12]; + __u8 lrh_pad[sizeof(struct lsd_reply_data_v1) - 12]; }; /** @} disk */ diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index 5e0ff3c..d0ae469 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -287,31 +287,42 @@ static int tgt_reply_data_write(const struct lu_env *env, struct lu_target *tgt, * into structure @lrd */ static int tgt_reply_data_read(const struct lu_env *env, struct lu_target *tgt, - struct lsd_reply_data *lrd, loff_t off) + struct lsd_reply_data *lrd, loff_t off, + __u32 magic) { - int rc; - struct tgt_thread_info *tti = tgt_th_info(env); - struct lsd_reply_data *buf = &tti->tti_lrd; + struct tgt_thread_info *tti = tgt_th_info(env); + struct lsd_reply_data *buf = &tti->tti_lrd; + int rc; tti->tti_off = off; tti->tti_buf.lb_buf = buf; - tti->tti_buf.lb_len = sizeof(*buf); + + if (magic == LRH_MAGIC) + tti->tti_buf.lb_len = sizeof(*buf); + else if (magic == LRH_MAGIC_V1) + tti->tti_buf.lb_len = sizeof(struct lsd_reply_data_v1); + else + return -EINVAL; rc = dt_record_read(env, tgt->lut_reply_data, &tti->tti_buf, &tti->tti_off); if (rc != 0) return rc; - lrd->lrd_transno = le64_to_cpu(buf->lrd_transno); - lrd->lrd_xid = le64_to_cpu(buf->lrd_xid); - lrd->lrd_data = le64_to_cpu(buf->lrd_data); - lrd->lrd_result = le32_to_cpu(buf->lrd_result); - lrd->lrd_client_gen = le32_to_cpu(buf->lrd_client_gen); - lrd->lrd_batch_idx = le32_to_cpu(buf->lrd_batch_idx); + lrd->lrd_transno = le64_to_cpu(buf->lrd_transno); + lrd->lrd_xid = le64_to_cpu(buf->lrd_xid); + lrd->lrd_data = le64_to_cpu(buf->lrd_data); + lrd->lrd_result = le32_to_cpu(buf->lrd_result); + lrd->lrd_client_gen = le32_to_cpu(buf->lrd_client_gen); + + if (magic == LRH_MAGIC) + lrd->lrd_batch_idx = le32_to_cpu(buf->lrd_batch_idx); + else + lrd->lrd_batch_idx = 0; + return 0; } - /* Free the in-memory reply data structure @trd and release * the corresponding slot in the reply_data file of target @lut * Called with ted_lcd_lock held @@ -740,10 +751,9 @@ out: } EXPORT_SYMBOL(tgt_server_data_update); -static int tgt_truncate_last_rcvd(const struct lu_env *env, - struct lu_target *tgt, loff_t size) +static int tgt_truncate_object(const struct lu_env *env, struct lu_target *tgt, + struct dt_object *dt, loff_t size) { - struct dt_object *dt = tgt->lut_last_rcvd; struct thandle *th; struct lu_attr attr; int rc; @@ -793,6 +803,48 @@ static void tgt_client_epoch_update(const struct lu_env *env, tgt_client_data_update(env, exp); } +static int tgt_reply_data_upgrade_check(const struct lu_env *env, + struct lu_target *tgt) +{ + struct lsd_reply_header lrh; + int rc; + + /* + * Reply data is supported by MDT targets only for now. + * When reply data object @lut_reply_data is NULL, it indicates the + * target type is OST and it should skip the upgrade check. + */ + if (tgt->lut_reply_data == NULL) + RETURN(0); + + rc = tgt_reply_header_read(env, tgt, &lrh); + if (rc) { + CERROR("%s: failed to read %s: rc = %d\n", + tgt_name(tgt), REPLY_DATA, rc); + RETURN(rc); + } + + if (lrh.lrh_magic == LRH_MAGIC) + RETURN(0); + + rc = tgt_truncate_object(env, tgt, tgt->lut_reply_data, 0); + if (rc) { + CERROR("%s: failed to truncate %s: rc = %d\n", + tgt_name(tgt), REPLY_DATA, rc); + RETURN(rc); + } + + lrh.lrh_magic = LRH_MAGIC; + lrh.lrh_header_size = sizeof(struct lsd_reply_header); + lrh.lrh_reply_size = sizeof(struct lsd_reply_data); + rc = tgt_reply_header_write(env, tgt, &lrh); + if (rc) + CERROR("%s: failed to write header for %s: rc = %d\n", + tgt_name(tgt), REPLY_DATA, rc); + + RETURN(rc); +} + /** * Update boot epoch when recovery ends */ @@ -851,6 +903,7 @@ void tgt_boot_epoch_update(struct lu_target *tgt) /** update server epoch */ tgt_server_data_update(&env, tgt, 1); + tgt_reply_data_upgrade_check(&env, tgt); lu_env_fini(&env); } @@ -1887,8 +1940,8 @@ int tgt_server_data_init(const struct lu_env *env, struct lu_target *tgt) LCONSOLE_WARN("%s: mounting at first time on 1.8 FS, " "remove all clients for interop needs\n", tgt_name(tgt)); - rc = tgt_truncate_last_rcvd(env, tgt, - lsd->lsd_client_start); + rc = tgt_truncate_object(env, tgt, tgt->lut_last_rcvd, + lsd->lsd_client_start); if (rc) RETURN(rc); last_rcvd_size = lsd->lsd_client_start; @@ -2115,20 +2168,28 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) GOTO(out, rc); } } else { + __u32 recsz = sizeof(struct lsd_reply_data); + rc = tgt_reply_header_read(env, tgt, lrh); if (rc) { CERROR("%s: error reading %s: rc = %d\n", tgt_name(tgt), REPLY_DATA, rc); GOTO(out, rc); } - if (lrh->lrh_magic != LRH_MAGIC || - lrh->lrh_header_size != sizeof(struct lsd_reply_header) || - lrh->lrh_reply_size != sizeof(struct lsd_reply_data)) { + if (!(lrh->lrh_magic == LRH_MAGIC && + lrh->lrh_reply_size == sizeof(struct lsd_reply_data) && + lrh->lrh_header_size == sizeof(struct lsd_reply_header)) && + !(lrh->lrh_magic == LRH_MAGIC_V1 && + lrh->lrh_reply_size == sizeof(struct lsd_reply_data_v1) && + lrh->lrh_header_size == sizeof(struct lsd_reply_header))) { CERROR("%s: invalid header in %s\n", tgt_name(tgt), REPLY_DATA); GOTO(out, rc = -EINVAL); } + if (lrh->lrh_magic == LRH_MAGIC_V1) + recsz = sizeof(struct lsd_reply_data_v1); + hash = cfs_hash_getref(tgt->lut_obd->obd_gen_hash); if (hash == NULL) GOTO(out, rc = -ENODEV); @@ -2139,9 +2200,9 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) /* Load reply_data from disk */ for (idx = 0, off = sizeof(struct lsd_reply_header); - off < reply_data_size; - idx++, off += sizeof(struct lsd_reply_data)) { - rc = tgt_reply_data_read(env, tgt, lrd, off); + off < reply_data_size; idx++, off += recsz) { + rc = tgt_reply_data_read(env, tgt, lrd, off, + lrh->lrh_magic); if (rc) { CERROR("%s: error reading %s: rc = %d\n", tgt_name(tgt), REPLY_DATA, rc); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 93e24b8..0e88560 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -14,8 +14,8 @@ init_logging # tool to create lustre filesystem images ALWAYS_EXCEPT="$CONF_SANITY_EXCEPT 32newtarball" -# bug number for skipped test: LU-11915 LU-14393 -ALWAYS_EXCEPT="$ALWAYS_EXCEPT 110 32 108" +# bug number for skipped test: LU-11915 +ALWAYS_EXCEPT="$ALWAYS_EXCEPT 110" # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! if $SHARED_KEY; then diff --git a/lustre/utils/lr_reader.c b/lustre/utils/lr_reader.c index 2a14f8f..f8e9cb6 100644 --- a/lustre/utils/lr_reader.c +++ b/lustre/utils/lr_reader.c @@ -255,6 +255,7 @@ int print_reply_data(FILE *fp) { struct lsd_reply_header lrh = {}; unsigned long long slot; + __u32 recsz; int rc = 0; int n; @@ -271,12 +272,6 @@ int print_reply_data(FILE *fp) lrh.lrh_magic = __le32_to_cpu(lrh.lrh_magic); lrh.lrh_header_size = __le32_to_cpu(lrh.lrh_header_size); lrh.lrh_reply_size = __le32_to_cpu(lrh.lrh_reply_size); - if (lrh.lrh_magic != LRH_MAGIC) { - fprintf(stderr, - "%s: invalid %s header: lrh_magic=0x%08x expected 0x%08x\n", - progname, REPLY_DATA, lrh.lrh_magic, LRH_MAGIC); - rc = EINVAL; - } if (lrh.lrh_header_size != sizeof(struct lsd_reply_header)) { fprintf(stderr, "%s: invalid %s header: lrh_header_size=0x%08x expected 0x%08x\n", @@ -284,11 +279,31 @@ int print_reply_data(FILE *fp) (unsigned int)sizeof(struct lsd_reply_header)); rc = EINVAL; } - if (lrh.lrh_reply_size != sizeof(struct lsd_reply_data)) { + if (lrh.lrh_magic == LRH_MAGIC) { + if (lrh.lrh_reply_size != sizeof(struct lsd_reply_data)) { + fprintf(stderr, + "%s: invalid %s header: lrh_reply_size=0x%08x expected 0x%08x\n", + progname, REPLY_DATA, lrh.lrh_reply_size, + (unsigned int)sizeof(struct lsd_reply_data)); + rc = EINVAL; + } else { + recsz = sizeof(struct lsd_reply_data); + } + } else if (lrh.lrh_magic == LRH_MAGIC_V1) { + if (lrh.lrh_reply_size != sizeof(struct lsd_reply_data_v1)) { + fprintf(stderr, + "%s: invalid %s header: lrh_reply_size=0x%08x expected 0x%08x\n", + progname, REPLY_DATA, lrh.lrh_reply_size, + (unsigned int)sizeof(struct lsd_reply_data)); + rc = EINVAL; + } else { + recsz = sizeof(struct lsd_reply_data_v1); + } + } else { fprintf(stderr, - "%s: invalid %s header: lrh_reply_size=0x%08x expected 0x%08x\n", - progname, REPLY_DATA, lrh.lrh_reply_size, - (unsigned int)sizeof(struct lsd_reply_data)); + "%s: invalid %s header: lrh_magic=0x%08x expected 0x%08x or 0x%08x\n", + progname, REPLY_DATA, lrh.lrh_magic, LRH_MAGIC, + LRH_MAGIC_V1); rc = EINVAL; } @@ -306,8 +321,8 @@ int print_reply_data(FILE *fp) struct lsd_reply_data lrd; /* read a reply data */ - n = fread(&lrd, 1, sizeof(lrd), fp); - if (n < sizeof(lrd)) { + n = fread(&lrd, 1, recsz, fp); + if (n < recsz) { if (feof(fp)) break; fprintf(stderr, "%s: Short read (%d of %d)\n", @@ -322,6 +337,9 @@ int print_reply_data(FILE *fp) lrd.lrd_result = __le32_to_cpu(lrd.lrd_result); lrd.lrd_client_gen = __le32_to_cpu(lrd.lrd_client_gen); + if (lrh.lrh_magic == LRH_MAGIC) + lrd.lrd_batch_idx = __le32_to_cpu(lrd.lrd_batch_idx); + printf(" %lld:\n", slot); printf(" client_generation: %u\n", lrd.lrd_client_gen); @@ -332,6 +350,8 @@ int print_reply_data(FILE *fp) printf(" last_result: %u\n", lrd.lrd_result); printf(" last_data: %llu\n\n", (unsigned long long)lrd.lrd_data); + if (lrh.lrh_magic == LRH_MAGIC) + printf(" batch_idx: %u\n", lrd.lrd_batch_idx); } return 0; -- 1.8.3.1