From ad5d6d47dcfcdad720ec3e73cd5be2d5afe83cb1 Mon Sep 17 00:00:00 2001 From: Mr NeilBrown Date: Wed, 21 Aug 2024 07:51:39 -0600 Subject: [PATCH] LU-13308 mdd: add LLOG_F_EXT_X_NID_BE The log already has enough space for a large nid thanks to extra space in string changelog_ext_nid, but there is no way to tell if the provided nid is large. Also the nid is stored host-endian meaning the log is nor portable across architectures. !!! A large nid is always big-endian. So add a flag to assert that the nid is bigendian and use it only for large nids. For 4-byte nids we continue to use host-endian and don't set the BE flag. LLOG_F_EXT_X_NID_BE indicates that the log implementation supports large big-endian nids. CLFE_NID_BE indicates that the NID in this record is big-endian and large. CHANGELOG_FLAG_NID_BE is sent by user land tools to tell the kernel to present NIDs in a large big-endian format. uc_nid in struct lu_ucred is now a large nid - struct lnet_nid Signed-off-by: Mr NeilBrown Signed-off-by: James Simmons Change-Id: I6938090df4f30e916b79d1f58b34b518ab6bdcf7 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/50099 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Sebastien Buisson Reviewed-by: Etienne AUJAMES Reviewed-by: Oleg Drokin --- lustre/include/lustre/lustreapi.h | 37 ++++- lustre/include/md_object.h | 2 +- lustre/include/uapi/linux/lustre/lustre_idl.h | 4 +- lustre/include/uapi/linux/lustre/lustre_user.h | 199 ++---------------------- lustre/mdc/mdc_changelog.c | 8 +- lustre/mdd/mdd_dir.c | 11 +- lustre/mdd/mdd_internal.h | 2 +- lustre/mdd/mdd_object.c | 3 +- lustre/mdt/mdt_lib.c | 5 +- lustre/obdclass/llog_osd.c | 206 ++++++++++++++++++++++++- lustre/ptlrpc/wiretest.c | 1 + lustre/tests/lustre-rsync-test.sh | 0 lustre/utils/lfs.c | 15 +- lustre/utils/liblustreapi_chlg.c | 128 ++++++++++++--- lustre/utils/llog_reader.c | 39 +++-- lustre/utils/lustre_rsync.c | 5 +- lustre/utils/wirecheck.c | 1 + lustre/utils/wiretest.c | 1 + 18 files changed, 418 insertions(+), 249 deletions(-) mode change 100644 => 100755 lustre/tests/lustre-rsync-test.sh diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index a155950..94ec470 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -602,6 +602,17 @@ int llapi_swap_layouts(const char *path1, const char *path2, __u64 dv1, */ #define HAVE_CHANGELOG_EXTEND_REC 1 +enum changelog_send_extra_flag { + /* Pack uid/gid into the changelog record */ + CHANGELOG_EXTRA_FLAG_UIDGID = CLFE_UIDGID, + /* Pack nid into the changelog record */ + CHANGELOG_EXTRA_FLAG_NID = CLFE_NID, + /* Pack open mode into the changelog record */ + CHANGELOG_EXTRA_FLAG_OMODE = CLFE_OPEN, + /* Pack xattr name into the changelog record */ + CHANGELOG_EXTRA_FLAG_XATTR = CLFE_XATTR, +}; + int llapi_changelog_start(void **priv, enum changelog_send_flag flags, const char *mdtname, long long startrec); int llapi_changelog_fini(void **priv); @@ -612,8 +623,30 @@ int llapi_changelog_get_fd(void *priv); /* Allow records up to endrec to be destroyed; requires registered id. */ int llapi_changelog_clear(const char *mdtname, const char *idstr, long long endrec); -extern int llapi_changelog_set_xflags(void *priv, - enum changelog_send_extra_flag extra_flags); +int llapi_changelog_set_xflags(void *priv, + enum changelog_send_extra_flag extra_flags); +struct changelog_rec * +llapi_changelog_repack_rec(const struct changelog_rec *rec, + enum changelog_rec_flags crf_wanted, + enum changelog_rec_extra_flags cref_want); + +#ifndef changelog_remap_rec +static inline void changelog_remap_rec(struct changelog_rec *rec, + enum changelog_rec_flags crf_wanted, + enum changelog_rec_extra_flags cref_want) +{ + struct changelog_rec *new_rec; + + new_rec = llapi_changelog_repack_rec((const struct changelog_rec *)rec, + crf_wanted, cref_want); + if (new_rec) { + size_t len = changelog_rec_size(rec) + rec->cr_namelen; + + memcpy(rec, new_rec, len); + free(new_rec); + } +} +#endif /* HSM copytool interface. * priv is private state, managed internally by these functions diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index c2cc292..9c3e7c5 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -736,7 +736,7 @@ struct lu_ucred { struct group_info *uc_ginfo; struct md_identity *uc_identity; char uc_jobid[LUSTRE_JOBID_SIZE]; - lnet_nid_t uc_nid; + struct lnet_nid uc_nid; bool uc_enable_audit; int uc_rbac_file_perms:1; int uc_rbac_dne_ops:1; diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 108fe2b..874d677 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -3100,6 +3100,7 @@ enum llog_flag { LLOG_F_EXT_X_XATTR = 0x200, LLOG_F_RM_ON_ERR = 0x400, LLOG_F_MAX_AGE = 0x800, + LLOG_F_EXT_X_NID_BE = 0x1000, /* Note: Flags covered by LLOG_F_EXT_MASK will be inherited from * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here, @@ -3108,7 +3109,8 @@ enum llog_flag { */ LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID | LLOG_F_EXT_EXTRA_FLAGS | LLOG_F_EXT_X_UIDGID | LLOG_F_EXT_X_NID | - LLOG_F_EXT_X_OMODE | LLOG_F_EXT_X_XATTR, + LLOG_F_EXT_X_OMODE | LLOG_F_EXT_X_XATTR | + LLOG_F_EXT_X_NID_BE, }; /* On-disk header structure of each log object, stored in little endian order */ diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 7c6d2d3..d84697f 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -1871,31 +1871,26 @@ enum changelog_rec_extra_flags { CLFE_NID = 0x0002, CLFE_OPEN = 0x0004, CLFE_XATTR = 0x0008, - CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR + /* NID is in network-byte-order and may be large. */ + CLFE_NID_BE = 0x0010, + + CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR | + CLFE_NID_BE, }; enum changelog_send_flag { /* Use changelog follow mode: llapi_changelog_recv() will not stop at * the end of records and wait for new records to be generated. */ - CHANGELOG_FLAG_FOLLOW = 0x01, + CHANGELOG_FLAG_FOLLOW = 0x01, /* Deprecated since Lustre 2.10 */ - CHANGELOG_FLAG_BLOCK = 0x02, + CHANGELOG_FLAG_BLOCK = 0x02, /* Pack jobid into the changelog records if available. */ - CHANGELOG_FLAG_JOBID = 0x04, + CHANGELOG_FLAG_JOBID = 0x04, /* Pack additional flag bits into the changelog record */ - CHANGELOG_FLAG_EXTRA_FLAGS = 0x08, -}; - -enum changelog_send_extra_flag { - /* Pack uid/gid into the changelog record */ - CHANGELOG_EXTRA_FLAG_UIDGID = 0x01, - /* Pack nid into the changelog record */ - CHANGELOG_EXTRA_FLAG_NID = 0x02, - /* Pack open mode into the changelog record */ - CHANGELOG_EXTRA_FLAG_OMODE = 0x04, - /* Pack xattr name into the changelog record */ - CHANGELOG_EXTRA_FLAG_XATTR = 0x08, + CHANGELOG_FLAG_EXTRA_FLAGS = 0x08, + /* Request NIDs to be packed in large big-endian format */ + CHANGELOG_FLAG_NID_BE = 0x10, }; #define CR_MAXSIZE __ALIGN_KERNEL(2 * NAME_MAX + 2 + \ @@ -1950,9 +1945,10 @@ struct changelog_ext_uidgid { /* Changelog extra extension to include NID. */ struct changelog_ext_nid { - /* have __u64 instead of lnet_nid_t type for use by client api */ + /* If CLFE_NID_BE is not set cr_nid is of the lnet_nid_t type. + * With CLFE_NID_BE set then all this data is struct lnet_nid + */ __u64 cr_nid; - /* for use when IPv6 support is added */ __u64 extra; __u32 padding; }; @@ -2154,173 +2150,6 @@ inline __kernel_size_t changelog_rec_snamelen(const struct changelog_rec *rec) (changelog_rec_sname(rec) - changelog_rec_name(rec)); } -/** - * Remap a record to the desired format as specified by the crf flags. - * The record must be big enough to contain the final remapped version. - * Superfluous extension fields are removed and missing ones are added - * and zeroed. The flags of the record are updated accordingly. - * - * The jobid and rename extensions can be added to a record, to match the - * format an application expects, typically. In this case, the newly added - * fields will be zeroed. - * The Jobid field can be removed, to guarantee compatibility with older - * clients that don't expect this field in the records they process. - * - * The following assumptions are being made: - * - CLF_RENAME will not be removed - * - CLF_JOBID will not be added without CLF_RENAME being added too - * - CLF_EXTRA_FLAGS will not be added without CLF_JOBID being added too - * - * @param[in,out] rec The record to remap. - * @param[in] crf_wanted Flags describing the desired extensions. - * @param[in] cref_want Flags describing the desired extra extensions. - */ -static inline void changelog_remap_rec(struct changelog_rec *rec, - enum changelog_rec_flags crf_wanted, - enum changelog_rec_extra_flags cref_want) -{ - char *xattr_mov = NULL; - char *omd_mov = NULL; - char *nid_mov = NULL; - char *uidgid_mov = NULL; - char *ef_mov; - char *jid_mov; - char *rnm_mov; - enum changelog_rec_extra_flags cref = CLFE_INVALID; - - crf_wanted = (enum changelog_rec_flags) - (crf_wanted & CLF_SUPPORTED); - cref_want = (enum changelog_rec_extra_flags) - (cref_want & CLFE_SUPPORTED); - - if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted) { - if (!(rec->cr_flags & CLF_EXTRA_FLAGS) || - (rec->cr_flags & CLF_EXTRA_FLAGS && - (changelog_rec_extra_flags(rec)->cr_extra_flags & - CLFE_SUPPORTED) == - cref_want)) - return; - } - - /* First move the variable-length name field */ - memmove((char *)rec + changelog_rec_offset(crf_wanted, cref_want), - changelog_rec_name(rec), rec->cr_namelen); - - /* Locations of extensions in the remapped record */ - if (rec->cr_flags & CLF_EXTRA_FLAGS) { - xattr_mov = (char *)rec + - changelog_rec_offset( - (enum changelog_rec_flags) - (crf_wanted & CLF_SUPPORTED), - (enum changelog_rec_extra_flags) - (cref_want & ~CLFE_XATTR)); - omd_mov = (char *)rec + - changelog_rec_offset( - (enum changelog_rec_flags) - (crf_wanted & CLF_SUPPORTED), - (enum changelog_rec_extra_flags) - (cref_want & ~(CLFE_OPEN | CLFE_XATTR))); - nid_mov = (char *)rec + - changelog_rec_offset( - (enum changelog_rec_flags) - (crf_wanted & CLF_SUPPORTED), - (enum changelog_rec_extra_flags) - (cref_want & - ~(CLFE_NID | CLFE_OPEN | CLFE_XATTR))); - uidgid_mov = (char *)rec + - changelog_rec_offset( - (enum changelog_rec_flags) - (crf_wanted & CLF_SUPPORTED), - (enum changelog_rec_extra_flags) - (cref_want & ~(CLFE_UIDGID | - CLFE_NID | - CLFE_OPEN | - CLFE_XATTR))); - cref = (enum changelog_rec_extra_flags) - changelog_rec_extra_flags(rec)->cr_extra_flags; - } - - ef_mov = (char *)rec + - changelog_rec_offset( - (enum changelog_rec_flags) - (crf_wanted & ~CLF_EXTRA_FLAGS), CLFE_INVALID); - jid_mov = (char *)rec + - changelog_rec_offset((enum changelog_rec_flags)(crf_wanted & - ~(CLF_EXTRA_FLAGS | CLF_JOBID)), - CLFE_INVALID); - rnm_mov = (char *)rec + - changelog_rec_offset((enum changelog_rec_flags)(crf_wanted & - ~(CLF_EXTRA_FLAGS | - CLF_JOBID | - CLF_RENAME)), - CLFE_INVALID); - - /* Move the extension fields to the desired positions */ - if ((crf_wanted & CLF_EXTRA_FLAGS) && - (rec->cr_flags & CLF_EXTRA_FLAGS)) { - if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR)) - memmove(xattr_mov, changelog_rec_xattr(rec), - sizeof(struct changelog_ext_xattr)); - - if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN)) - memmove(omd_mov, changelog_rec_openmode(rec), - sizeof(struct changelog_ext_openmode)); - - if ((cref_want & CLFE_NID) && (cref & CLFE_NID)) - memmove(nid_mov, changelog_rec_nid(rec), - sizeof(struct changelog_ext_nid)); - - if ((cref_want & CLFE_UIDGID) && (cref & CLFE_UIDGID)) - memmove(uidgid_mov, changelog_rec_uidgid(rec), - sizeof(struct changelog_ext_uidgid)); - - memmove(ef_mov, changelog_rec_extra_flags(rec), - sizeof(struct changelog_ext_extra_flags)); - } - - if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID)) - memmove(jid_mov, changelog_rec_jobid(rec), - sizeof(struct changelog_ext_jobid)); - - if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME)) - memmove(rnm_mov, changelog_rec_rename(rec), - sizeof(struct changelog_ext_rename)); - - /* Clear newly added fields */ - if (xattr_mov && (cref_want & CLFE_XATTR) && - !(cref & CLFE_XATTR)) - memset(xattr_mov, 0, sizeof(struct changelog_ext_xattr)); - - if (omd_mov && (cref_want & CLFE_OPEN) && - !(cref & CLFE_OPEN)) - memset(omd_mov, 0, sizeof(struct changelog_ext_openmode)); - - if (nid_mov && (cref_want & CLFE_NID) && - !(cref & CLFE_NID)) - memset(nid_mov, 0, sizeof(struct changelog_ext_nid)); - - if (uidgid_mov && (cref_want & CLFE_UIDGID) && - !(cref & CLFE_UIDGID)) - memset(uidgid_mov, 0, sizeof(struct changelog_ext_uidgid)); - - if ((crf_wanted & CLF_EXTRA_FLAGS) && - !(rec->cr_flags & CLF_EXTRA_FLAGS)) - memset(ef_mov, 0, sizeof(struct changelog_ext_extra_flags)); - - if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID)) - memset(jid_mov, 0, sizeof(struct changelog_ext_jobid)); - - if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME)) - memset(rnm_mov, 0, sizeof(struct changelog_ext_rename)); - - /* Update the record's flags accordingly */ - rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted; - if (rec->cr_flags & CLF_EXTRA_FLAGS) - changelog_rec_extra_flags(rec)->cr_extra_flags = - changelog_rec_extra_flags(rec)->cr_extra_flags | - cref_want; -} - enum changelog_message_type { CL_RECORD = 10, /* message is a changelog_rec */ CL_EOF = 11, /* at end of current changelog */ diff --git a/lustre/mdc/mdc_changelog.c b/lustre/mdc/mdc_changelog.c index 9594d25..a9bc2d4 100644 --- a/lustre/mdc/mdc_changelog.c +++ b/lustre/mdc/mdc_changelog.c @@ -277,12 +277,12 @@ static int chlg_load(void *args) struct obd_device *obd = NULL; struct llog_ctxt *ctx = NULL; struct llog_handle *llh = NULL; + enum llog_flag nid_be_flag = 0; int rc; - ENTRY; + ENTRY; crs->crs_last_catidx = 0; crs->crs_last_idx = 0; - again: obd = chlg_obd_get(ced); if (obd == NULL) @@ -302,13 +302,15 @@ again: GOTO(err_out, rc); } + if (crs->crs_flags & CLFE_NID_BE) + nid_be_flag = LLOG_F_EXT_X_NID_BE; rc = llog_init_handle(NULL, llh, LLOG_F_IS_CAT | LLOG_F_EXT_JOBID | LLOG_F_EXT_EXTRA_FLAGS | LLOG_F_EXT_X_UIDGID | - LLOG_F_EXT_X_NID | + LLOG_F_EXT_X_NID | nid_be_flag | LLOG_F_EXT_X_OMODE | LLOG_F_EXT_X_XATTR, NULL); diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 43532bd..984384a 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -1075,12 +1075,16 @@ void mdd_changelog_rec_extra_uidgid(struct changelog_rec *rec, uidgid->cr_gid = gid; } +/* To support the new large NID structure we use all the space in + * struct changelog_ext_nid to store struct lnet_nid. + */ void mdd_changelog_rec_extra_nid(struct changelog_rec *rec, - lnet_nid_t nid) + const struct lnet_nid *nid) { struct changelog_ext_nid *clnid = changelog_rec_nid(rec); - clnid->cr_nid = nid; + BUILD_BUG_ON(sizeof(*clnid) < sizeof(*nid)); + memcpy(clnid, nid, sizeof(*nid)); } void mdd_changelog_rec_extra_omode(struct changelog_rec *rec, u32 flags) @@ -1213,6 +1217,7 @@ int mdd_changelog_ns_store(const struct lu_env *env, clf_flags |= CLF_JOBID; xflags |= CLFE_UIDGID; xflags |= CLFE_NID; + xflags |= CLFE_NID_BE; } if (sname != NULL) @@ -1228,7 +1233,7 @@ int mdd_changelog_ns_store(const struct lu_env *env, mdd_changelog_rec_extra_uidgid(&rec->cr, uc->uc_uid, uc->uc_gid); if (xflags & CLFE_NID) - mdd_changelog_rec_extra_nid(&rec->cr, uc->uc_nid); + mdd_changelog_rec_extra_nid(&rec->cr, &uc->uc_nid); } rec->cr.cr_type = (__u32)type; diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 8707d54..d9db024 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -359,7 +359,7 @@ void mdd_changelog_rec_ext_extra_flags(struct changelog_rec *rec, __u64 eflags); void mdd_changelog_rec_extra_uidgid(struct changelog_rec *rec, __u64 uid, __u64 gid); void mdd_changelog_rec_extra_nid(struct changelog_rec *rec, - lnet_nid_t nid); + const struct lnet_nid *nid); void mdd_changelog_rec_extra_omode(struct changelog_rec *rec, u32 flags); void mdd_changelog_rec_extra_xattr(struct changelog_rec *rec, const char *xattr_name); diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index ab48418..6a3d9f8 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -944,6 +944,7 @@ static int mdd_changelog_data_store_by_fid(const struct lu_env *env, clf_flags |= CLF_JOBID; xflags |= CLFE_UIDGID; xflags |= CLFE_NID; + xflags |= CLFE_NID_BE; } if (type == CL_OPEN || type == CL_DN_OPEN) xflags |= CLFE_OPEN; @@ -975,7 +976,7 @@ static int mdd_changelog_data_store_by_fid(const struct lu_env *env, mdd_changelog_rec_extra_uidgid(&rec->cr, uc->uc_uid, uc->uc_gid); if (xflags & CLFE_NID) - mdd_changelog_rec_extra_nid(&rec->cr, uc->uc_nid); + mdd_changelog_rec_extra_nid(&rec->cr, &uc->uc_nid); if (xflags & CLFE_OPEN) mdd_changelog_rec_extra_omode(&rec->cr, clf_flags); if (xflags & CLFE_XATTR) { diff --git a/lustre/mdt/mdt_lib.c b/lustre/mdt/mdt_lib.c index 0558050..b8f8412 100644 --- a/lustre/mdt/mdt_lib.c +++ b/lustre/mdt/mdt_lib.c @@ -148,10 +148,9 @@ static void ucred_set_jobid(struct mdt_thread_info *info, struct lu_ucred *uc) static void ucred_set_nid(struct mdt_thread_info *info, struct lu_ucred *uc) { if (info && info->mti_exp && info->mti_exp->exp_connection) - uc->uc_nid = lnet_nid_to_nid4( - &info->mti_exp->exp_connection->c_peer.nid); + uc->uc_nid = info->mti_exp->exp_connection->c_peer.nid; else - uc->uc_nid = LNET_NID_ANY; + uc->uc_nid = LNET_ANY_NID; } static void ucred_set_audit_enabled(struct mdt_thread_info *info, diff --git a/lustre/obdclass/llog_osd.c b/lustre/obdclass/llog_osd.c index e56483b..4720eb6 100644 --- a/lustre/obdclass/llog_osd.c +++ b/lustre/obdclass/llog_osd.c @@ -817,6 +817,201 @@ static inline void llog_skip_over(struct llog_handle *lgh, __u64 *off, } /** + * Remap a record to the desired format as specified by the crf flags. + * The record must be big enough to contain the final remapped version. + * Superfluous extension fields are removed and missing ones are added + * and zeroed. The flags of the record are updated accordingly to what + * the calling llog layer can support. Only influence user land has is + * to store the NID in large NID format. The user land end user will + * recieve all fields that supported by the kernel. + * + * The jobid and rename extensions will be added to a record, to match the + * format an application expects, typically. In this case, the newly added + * fields will be zeroed. + * The Jobid field can be removed, to guarantee compatibility with older + * clients that don't expect this field in the records they process. + * + * The following assumptions are being made: + * - CLF_RENAME will not be removed + * - CLF_JOBID will not be added without CLF_RENAME being added too + * - CLF_EXTRA_FLAGS will not be added without CLF_JOBID being added too + * + * @rec: The record to remap. + * @crf_wanted: Flags describing the desired extensions. + * @cref_want: Flags describing the desired extra extensions. + */ +static void changelog_remap_rec(struct changelog_rec *rec, + enum changelog_rec_flags crf_wanted, + enum changelog_rec_extra_flags cref_want) +{ + char *xattr_mov = NULL; + char *omd_mov = NULL; + char *nid_mov = NULL; + char *uidgid_mov = NULL; + char *ef_mov; + char *jid_mov; + char *rnm_mov; + enum changelog_rec_extra_flags cref = CLFE_INVALID; + + crf_wanted = (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED); + cref_want = (enum changelog_rec_extra_flags) + (cref_want & CLFE_SUPPORTED); + + if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted) { + if (!(rec->cr_flags & CLF_EXTRA_FLAGS) || + (rec->cr_flags & CLF_EXTRA_FLAGS && + (changelog_rec_extra_flags(rec)->cr_extra_flags & + CLFE_SUPPORTED) == + cref_want)) + return; + } + + /* First move the variable-length name field */ + memmove((char *)rec + changelog_rec_offset(crf_wanted, cref_want), + changelog_rec_name(rec), rec->cr_namelen); + + /* Locations of extensions in the remapped record */ + if (rec->cr_flags & CLF_EXTRA_FLAGS) { + xattr_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~CLFE_XATTR)); + omd_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~(CLFE_OPEN | CLFE_XATTR))); + nid_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & + ~(CLFE_NID | CLFE_OPEN | CLFE_XATTR))); + uidgid_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~(CLFE_UIDGID | + CLFE_NID | + CLFE_OPEN | + CLFE_XATTR))); + cref = (enum changelog_rec_extra_flags) + changelog_rec_extra_flags(rec)->cr_extra_flags; + } + + ef_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & ~CLF_EXTRA_FLAGS), CLFE_INVALID); + jid_mov = (char *)rec + + changelog_rec_offset((enum changelog_rec_flags)(crf_wanted & + ~(CLF_EXTRA_FLAGS | CLF_JOBID)), + CLFE_INVALID); + rnm_mov = (char *)rec + + changelog_rec_offset((enum changelog_rec_flags)(crf_wanted & + ~(CLF_EXTRA_FLAGS | + CLF_JOBID | + CLF_RENAME)), + CLFE_INVALID); + + /* Move the extension fields to the desired positions */ + if ((crf_wanted & CLF_EXTRA_FLAGS) && + (rec->cr_flags & CLF_EXTRA_FLAGS)) { + if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR)) + memmove(xattr_mov, changelog_rec_xattr(rec), + sizeof(struct changelog_ext_xattr)); + + if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN)) + memmove(omd_mov, changelog_rec_openmode(rec), + sizeof(struct changelog_ext_openmode)); + + if ((cref_want & CLFE_NID) && (cref & CLFE_NID)) { + struct changelog_ext_nid *cen = changelog_rec_nid(rec); + + if ((cref_want & CLFE_NID_BE) != (cref & CLFE_NID_BE)) { + struct lnet_nid *nid; + + if (!(cref_want & CLFE_NID_BE)) { + nid = (struct lnet_nid *)cen; + if (nid_is_nid4(nid)) { + struct changelog_ext_nid *mov; + + mov = (struct changelog_ext_nid *)nid_mov; + mov->cr_nid = lnet_nid_to_nid4(nid); + cref &= ~CLFE_NID_BE; + } else { + cref &= ~(CLFE_NID | + CLFE_NID_BE); + } + } else { + nid = (struct lnet_nid *)nid_mov; + lnet_nid4_to_nid(cen->cr_nid, nid); + } + changelog_rec_extra_flags(rec)->cr_extra_flags = + cref; + } else { + memmove(nid_mov, cen, sizeof(*cen)); + } + } + + if ((cref_want & CLFE_UIDGID) && (cref & CLFE_UIDGID)) + memmove(uidgid_mov, changelog_rec_uidgid(rec), + sizeof(struct changelog_ext_uidgid)); + + memmove(ef_mov, changelog_rec_extra_flags(rec), + sizeof(struct changelog_ext_extra_flags)); + } + + if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID)) + memmove(jid_mov, changelog_rec_jobid(rec), + sizeof(struct changelog_ext_jobid)); + + if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME)) + memmove(rnm_mov, changelog_rec_rename(rec), + sizeof(struct changelog_ext_rename)); + + /* Clear newly added fields */ + if (xattr_mov && (cref_want & CLFE_XATTR) && + !(cref & CLFE_XATTR)) + memset(xattr_mov, 0, sizeof(struct changelog_ext_xattr)); + + if (omd_mov && (cref_want & CLFE_OPEN) && + !(cref & CLFE_OPEN)) + memset(omd_mov, 0, sizeof(struct changelog_ext_openmode)); + + if (nid_mov && (cref_want & CLFE_NID) && + !(cref & CLFE_NID)) + memset(nid_mov, 0, sizeof(struct changelog_ext_nid)); + + if (uidgid_mov && (cref_want & CLFE_UIDGID) && + !(cref & CLFE_UIDGID)) + memset(uidgid_mov, 0, sizeof(struct changelog_ext_uidgid)); + + if ((crf_wanted & CLF_EXTRA_FLAGS) && + !(rec->cr_flags & CLF_EXTRA_FLAGS)) + memset(ef_mov, 0, sizeof(struct changelog_ext_extra_flags)); + + if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID)) + memset(jid_mov, 0, sizeof(struct changelog_ext_jobid)); + + if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME)) + memset(rnm_mov, 0, sizeof(struct changelog_ext_rename)); + + /* Update the record's flags accordingly */ + rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted; + if (rec->cr_flags & CLF_EXTRA_FLAGS) + changelog_rec_extra_flags(rec)->cr_extra_flags = + changelog_rec_extra_flags(rec)->cr_extra_flags | + cref_want; +} + +/** * Remove optional fields that the client doesn't expect. * This is typically in order to ensure compatibility with older clients. * It is assumed that since we exclusively remove fields, the block will be @@ -840,7 +1035,13 @@ static void changelog_block_trim_ext(struct llog_rec_hdr *hdr, if (!(loghandle->lgh_hdr->llh_flags & LLOG_F_EXT_X_OMODE)) extra_flags &= ~CLFE_OPEN; if (!(loghandle->lgh_hdr->llh_flags & LLOG_F_EXT_X_NID)) - extra_flags &= ~CLFE_NID; + extra_flags &= ~(CLFE_NID | CLFE_NID_BE); + if (!(loghandle->lgh_hdr->llh_flags & LLOG_F_EXT_X_NID_BE)) { + if (extra_flags & CLFE_NID_BE) { + /* The large nid won't be understood */ + extra_flags &= ~CLFE_NID_BE; + } + } if (!(loghandle->lgh_hdr->llh_flags & LLOG_F_EXT_X_UIDGID)) extra_flags &= ~CLFE_UIDGID; if (!(loghandle->lgh_hdr->llh_flags & LLOG_F_EXT_EXTRA_FLAGS)) @@ -879,6 +1080,9 @@ static void changelog_block_trim_ext(struct llog_rec_hdr *hdr, break; } + /* Fill up the changelog record with everything the kernel + * version supports. + */ changelog_remap_rec(rec, rec->cr_flags & flags, xflag); hdr = llog_rec_hdr_next(hdr); /* Yield CPU to avoid soft-lockup if there are too many records diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index b81281b..31b2839 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -4659,6 +4659,7 @@ void lustre_assert_wire_constants(void) BUILD_BUG_ON(LLOG_F_EXT_X_NID != 0x00000080); BUILD_BUG_ON(LLOG_F_EXT_X_OMODE != 0x00000100); BUILD_BUG_ON(LLOG_F_EXT_X_XATTR != 0x00000200); + BUILD_BUG_ON(LLOG_F_EXT_X_NID_BE != 0x00001000); /* Checks for struct llogd_body */ LASSERTF((int)sizeof(struct llogd_body) == 48, "found %lld\n", diff --git a/lustre/tests/lustre-rsync-test.sh b/lustre/tests/lustre-rsync-test.sh old mode 100644 new mode 100755 diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index b29b349..3067a49 100755 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -10163,6 +10163,7 @@ static int lfs_changelog(int argc, char **argv) rc = llapi_changelog_start(&changelog_priv, CHANGELOG_FLAG_BLOCK | CHANGELOG_FLAG_JOBID | + CHANGELOG_FLAG_NID_BE | CHANGELOG_FLAG_EXTRA_FLAGS | (follow ? CHANGELOG_FLAG_FOLLOW : 0), mdd, startrec); @@ -10231,11 +10232,17 @@ static int lfs_changelog(int argc, char **argv) (unsigned long long)uidgid->cr_gid); } if (ef->cr_extra_flags & CLFE_NID) { - struct changelog_ext_nid *nid = - changelog_rec_nid(rec); + if (ef->cr_extra_flags & CLFE_NID_BE) { + struct lnet_nid *nid = + (void *)changelog_rec_nid(rec); + printf(" nid=%s", libcfs_nidstr(nid)); + } else { + struct changelog_ext_nid *nid = + changelog_rec_nid(rec); - printf(" nid=%s", - libcfs_nid2str(nid->cr_nid)); + printf(" nid=%s", + libcfs_nid2str(nid->cr_nid)); + } } if (ef->cr_extra_flags & CLFE_OPEN) { diff --git a/lustre/utils/liblustreapi_chlg.c b/lustre/utils/liblustreapi_chlg.c index ce1727e..b993e45 100644 --- a/lustre/utils/liblustreapi_chlg.c +++ b/lustre/utils/liblustreapi_chlg.c @@ -151,14 +151,13 @@ int llapi_changelog_start(void **priv, enum changelog_send_flag flags, warned_jobid = true; } - if (flags & CHANGELOG_FLAG_FOLLOW) { + if (flags & (CHANGELOG_FLAG_FOLLOW | CHANGELOG_FLAG_NID_BE)) { int rc; - rc = ioctl(cp->clp_fd, OBD_IOC_CHLG_SET_FLAGS, - CHANGELOG_FLAG_FOLLOW); + rc = ioctl(cp->clp_fd, OBD_IOC_CHLG_SET_FLAGS, flags); if (rc < 0) - llapi_err_noerrno(LLAPI_MSG_ERROR, "can't enable " - "CHANGELOG_FLAG_FOLLOW"); + llapi_err_noerrno(LLAPI_MSG_ERROR, + "can't enable kernel send flags"); } return 0; @@ -217,6 +216,97 @@ int llapi_changelog_get_fd(void *priv) return cp->clp_fd; } +/** + * Messages containing changelog records sent by the kernel are collected in + * llapi_changelog_recv(). Those messages contain everything that the kernel + * can support. Our user land application might have no interest in many of + * those extra fields so we then repack the rec with only what we want. + */ +struct changelog_rec * +llapi_changelog_repack_rec(const struct changelog_rec *rec, + enum changelog_rec_flags crf_wanted, + enum changelog_rec_extra_flags cref_want) +{ + enum changelog_rec_extra_flags cref = CLFE_INVALID; + struct changelog_rec *new_rec; + + crf_wanted &= CLF_SUPPORTED; + cref_want &= CLFE_SUPPORTED; + + new_rec = calloc(1, CR_MAXSIZE); + if (!new_rec) + return NULL; + + /* Copy the changelog record header but reset the flags */ + memcpy((char *)new_rec, (char *)rec, sizeof(struct changelog_rec)); + + /* Keep the lower bits of cr_flags */ + new_rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | CLF_VERSION; + if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME)) { + new_rec->cr_flags |= CLF_RENAME; + memcpy(changelog_rec_rename(new_rec), + changelog_rec_rename(rec), + sizeof(struct changelog_ext_rename)); + } + + if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID)) { + new_rec->cr_flags |= CLF_JOBID; + memcpy(changelog_rec_jobid(new_rec), + changelog_rec_jobid(rec), + sizeof(struct changelog_ext_jobid)); + } + + if ((crf_wanted & CLF_EXTRA_FLAGS) && (rec->cr_flags & CLF_EXTRA_FLAGS)) { + cref = changelog_rec_extra_flags(rec)->cr_extra_flags; + + changelog_rec_extra_flags(new_rec)->cr_extra_flags = CLFE_INVALID; + new_rec->cr_flags |= CLF_EXTRA_FLAGS; + } + + if (cref == CLFE_INVALID) + goto no_extras; + + if ((cref_want & CLFE_UIDGID) && (cref & CLFE_UIDGID)) { + changelog_rec_extra_flags(new_rec)->cr_extra_flags |= CLFE_UIDGID; + memcpy(changelog_rec_uidgid(new_rec), + changelog_rec_uidgid(rec), + sizeof(struct changelog_ext_uidgid)); + } + + if ((cref_want & CLFE_NID) && (cref & CLFE_NID)) { + changelog_rec_extra_flags(new_rec)->cr_extra_flags |= CLFE_NID; + if ((cref_want & CLFE_NID_BE) && (cref & CLFE_NID_BE)) + changelog_rec_extra_flags(new_rec)->cr_extra_flags |= CLFE_NID_BE; + + memcpy(changelog_rec_nid(new_rec), + changelog_rec_nid(rec), + sizeof(struct changelog_ext_nid)); + } + + if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN)) { + changelog_rec_extra_flags(new_rec)->cr_extra_flags |= CLFE_OPEN; + memcpy(changelog_rec_openmode(new_rec), + changelog_rec_openmode(rec), + sizeof(struct changelog_ext_openmode)); + } else { + cref &= ~CLFE_OPEN; + } + + if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR)) { + changelog_rec_extra_flags(new_rec)->cr_extra_flags |= CLFE_XATTR; + memcpy(changelog_rec_xattr(new_rec), + changelog_rec_xattr(rec), + sizeof(struct changelog_ext_xattr)); + } + +no_extras: + /* Lastly move the variable-length name field */ + memcpy(changelog_rec_name(new_rec), + changelog_rec_name(rec), rec->cr_namelen); + + return new_rec; +} + /** Read the next changelog entry * @param priv Opaque private control structure * @param rech Changelog record handle; record will be allocated here @@ -239,10 +329,7 @@ int llapi_changelog_recv(void *priv, struct changelog_rec **rech) if (rech == NULL) return -EINVAL; - *rech = malloc(CR_MAXSIZE); - if (*rech == NULL) - return -ENOMEM; - + *rech = NULL; if (cp->clp_send_flags & CHANGELOG_FLAG_JOBID) rec_fmt |= CLF_JOBID; @@ -250,8 +337,11 @@ int llapi_changelog_recv(void *priv, struct changelog_rec **rech) rec_fmt |= CLF_EXTRA_FLAGS; if (cp->clp_send_extra_flags & CHANGELOG_EXTRA_FLAG_UIDGID) rec_extra_fmt |= CLFE_UIDGID; - if (cp->clp_send_extra_flags & CHANGELOG_EXTRA_FLAG_NID) + if (cp->clp_send_extra_flags & CHANGELOG_EXTRA_FLAG_NID) { rec_extra_fmt |= CLFE_NID; + if (cp->clp_send_flags & CHANGELOG_FLAG_NID_BE) + rec_extra_fmt |= CLFE_NID_BE; + } if (cp->clp_send_extra_flags & CHANGELOG_EXTRA_FLAG_OMODE) rec_extra_fmt |= CLFE_OPEN; if (cp->clp_send_extra_flags & CHANGELOG_EXTRA_FLAG_XATTR) @@ -265,27 +355,17 @@ int llapi_changelog_recv(void *priv, struct changelog_rec **rech) if (refresh == 0) { /* EOF */ rc = 1; - goto out_free; + goto out; } else if (refresh < 0) { rc = refresh; - goto out_free; + goto out; } } - /* TODO check changelog_rec_size */ tmp = (struct changelog_rec *)cp->clp_buf_pos; - - memcpy(*rech, cp->clp_buf_pos, - changelog_rec_size(tmp) + tmp->cr_namelen); - + *rech = llapi_changelog_repack_rec(tmp, rec_fmt, rec_extra_fmt); cp->clp_buf_pos += changelog_rec_size(tmp) + tmp->cr_namelen; - changelog_remap_rec(*rech, rec_fmt, rec_extra_fmt); - - return 0; - -out_free: - free(*rech); - *rech = NULL; +out: return rc; } diff --git a/lustre/utils/llog_reader.c b/lustre/utils/llog_reader.c index c7d6da2..dfe35af 100644 --- a/lustre/utils/llog_reader.c +++ b/lustre/utils/llog_reader.c @@ -742,6 +742,7 @@ static void print_hsm_action(struct llog_agent_req_rec *larr) static void print_changelog_rec(struct llog_changelog_rec *rec) { + __u32 crf = __le32_to_cpu(rec->cr.cr_flags); time_t secs; struct tm ts; @@ -750,15 +751,14 @@ static void print_changelog_rec(struct llog_changelog_rec *rec) printf("changelog record id:0x%x index:%llu cr_flags:0x%x cr_type:%s(0x%x) date:'%02d:%02d:%02d.%09d %04d.%02d.%02d' target:"DFID, __le32_to_cpu(rec->cr_hdr.lrh_id), (unsigned long long)__le64_to_cpu(rec->cr.cr_index), - __le32_to_cpu(rec->cr.cr_flags), - changelog_type2str(__le32_to_cpu(rec->cr.cr_type)), + crf, changelog_type2str(__le32_to_cpu(rec->cr.cr_type)), __le32_to_cpu(rec->cr.cr_type), ts.tm_hour, ts.tm_min, ts.tm_sec, (int)(__le64_to_cpu(rec->cr.cr_time) & ((1 << 30) - 1)), ts.tm_year + 1900, ts.tm_mon + 1, ts.tm_mday, PFID(&rec->cr.cr_tfid)); - if (rec->cr.cr_flags & CLF_JOBID) { + if (crf & CLF_JOBID) { struct changelog_ext_jobid *jid = changelog_rec_jobid(&rec->cr); @@ -766,14 +766,13 @@ static void print_changelog_rec(struct llog_changelog_rec *rec) printf(" jobid:%s", jid->cr_jobid); } - if (rec->cr.cr_flags & CLF_EXTRA_FLAGS) { + if (crf & CLF_EXTRA_FLAGS) { struct changelog_ext_extra_flags *ef = changelog_rec_extra_flags(&rec->cr); + unsigned long long cref = __le64_to_cpu(ef->cr_extra_flags); - printf(" cr_extra_flags:0x%llx", - (unsigned long long)__le64_to_cpu(ef->cr_extra_flags)); - - if (ef->cr_extra_flags & CLFE_UIDGID) { + printf(" cr_extra_flags:0x%llx", cref); + if (cref & CLFE_UIDGID) { struct changelog_ext_uidgid *uidgid = changelog_rec_uidgid(&rec->cr); @@ -781,15 +780,23 @@ static void print_changelog_rec(struct llog_changelog_rec *rec) __le32_to_cpu(uidgid->cr_uid), __le32_to_cpu(uidgid->cr_gid)); } - if (ef->cr_extra_flags & CLFE_NID) { - struct changelog_ext_nid *nid = - changelog_rec_nid(&rec->cr); + if (cref & CLFE_NID) { + if (cref & CLFE_NID_BE) { + struct changelog_ext_nid *cnid = + changelog_rec_nid(&rec->cr); + struct lnet_nid *nid = (void *)cnid; + + printf(" nid:%s", libcfs_nidstr(nid)); + } else { + struct changelog_ext_nid *nid = + changelog_rec_nid(&rec->cr); - printf(" nid:%s", - libcfs_nid2str(nid->cr_nid)); + printf(" nid:%s", + libcfs_nid2str(nid->cr_nid)); + } } - if (ef->cr_extra_flags & CLFE_OPEN) { + if (cref & CLFE_OPEN) { struct changelog_ext_openmode *omd = changelog_rec_openmode(&rec->cr); char mode[] = "---"; @@ -810,7 +817,7 @@ static void print_changelog_rec(struct llog_changelog_rec *rec) printf(" mode:%s", mode); } - if (ef->cr_extra_flags & CLFE_XATTR) { + if (cref & CLFE_XATTR) { struct changelog_ext_xattr *xattr = changelog_rec_xattr(&rec->cr); @@ -824,7 +831,7 @@ static void print_changelog_rec(struct llog_changelog_rec *rec) __le32_to_cpu(rec->cr.cr_namelen), changelog_rec_name(&rec->cr)); - if (rec->cr.cr_flags & CLF_RENAME) { + if (crf & CLF_RENAME) { struct changelog_ext_rename *rnm = changelog_rec_rename(&rec->cr); diff --git a/lustre/utils/lustre_rsync.c b/lustre/utils/lustre_rsync.c index 9ada5c3..a0c2efc 100644 --- a/lustre/utils/lustre_rsync.c +++ b/lustre/utils/lustre_rsync.c @@ -1619,10 +1619,7 @@ int lr_replicate(void) } rc = llapi_changelog_set_xflags(changelog_priv, - CHANGELOG_EXTRA_FLAG_UIDGID | - CHANGELOG_EXTRA_FLAG_NID | - CHANGELOG_EXTRA_FLAG_OMODE | - CHANGELOG_EXTRA_FLAG_XATTR); + CHANGELOG_EXTRA_FLAG_UIDGID); if (rc < 0) { fprintf(stderr, "Error setting xflag in changelog for fs %s.\n", status->ls_source_fs); diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 9556d51..a23b87a 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -2147,6 +2147,7 @@ check_llog_log_hdr(void) CHECK_CVALUE_X(LLOG_F_EXT_X_NID); CHECK_CVALUE_X(LLOG_F_EXT_X_OMODE); CHECK_CVALUE_X(LLOG_F_EXT_X_XATTR); + CHECK_CVALUE_X(LLOG_F_EXT_X_NID_BE); } static void diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 750a2b9..9d283d6 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -4684,6 +4684,7 @@ void lustre_assert_wire_constants(void) BUILD_BUG_ON(LLOG_F_EXT_X_NID != 0x00000080); BUILD_BUG_ON(LLOG_F_EXT_X_OMODE != 0x00000100); BUILD_BUG_ON(LLOG_F_EXT_X_XATTR != 0x00000200); + BUILD_BUG_ON(LLOG_F_EXT_X_NID_BE != 0x00001000); /* Checks for struct llogd_body */ LASSERTF((int)sizeof(struct llogd_body) == 48, "found %lld\n", -- 1.8.3.1