From: Bruno Faccini Date: Wed, 27 Feb 2019 21:59:52 +0000 (-0500) Subject: LU-11376 lov: new foreign LOV format X-Git-Tag: 2.12.54~99 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;ds=inline;h=6a20bdcc608bc2b933774b9f34ec25395e920a54;p=fs%2Flustre-release.git LU-11376 lov: new foreign LOV format This patch introduces a new layout/LOV format in order to allow to specify an arbitrary external reference for a file in Lustre namespace. The new LOV format is made of {newmagic, length, type, flags, string[length]} to be as flexible as possible. Foreign file can be created by using the open(O_LOV_DELAY_CREATE) + ioctl(LL_IOC_LOV_SETSTRIPE) operations and it can only be and remain an empty file until removed. A new API method llapi_file_create_foreign() has been introduced and "lfs [[get,set]stripe,find" modified to understand new layout. The idea behind this is to provide Lustre namespace support and layout prefetch/caching under layout protection, for user/external usage. Code has been added for lfsck to handle foreign files, and a new sub-test has been added in sanity-lfsck in order to verify if does not break foreign file and that reverse is also true. Signed-off-by: Bruno Faccini Change-Id: I5d9c0642fe8e7009c30918bfa946cac7c00c9af8 Reviewed-on: https://review.whamcloud.com/33755 Tested-by: Jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Patrick Farrell --- diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am index 11e92bf..b18525c 100644 --- a/lustre/doc/Makefile.am +++ b/lustre/doc/Makefile.am @@ -63,6 +63,7 @@ MANFILES = \ llapi_create_volatile_param.3 \ llapi_fd2parent.3 \ llapi_file_create.3 \ + llapi_file_create_foreign.3 \ llapi_file_get_stripe.3 \ llapi_file_open.3 \ llapi_group_lock.3 \ diff --git a/lustre/doc/lfs-find.1 b/lustre/doc/lfs-find.1 index 4486b92..046d89a 100644 --- a/lustre/doc/lfs-find.1 +++ b/lustre/doc/lfs-find.1 @@ -12,6 +12,7 @@ lfs-find \- Lustre client utility to list files with specific attributes [[\fB!\fR] \fB--component-start|\fB--comp-start\fR [\fB+-\fR]\fIn\fR[\fBKMGTPE\fR]] [[\fB!\fR] \fB--gid\fR|\fB-g\fR|\fB--group\fR|\fB-G\fR <\fIgname\fR>|<\fIgid\fR>] [[\fB!\fR] \fB--layout\fR|\fB-L mdt\fR,\fBraid0\fR,\fBreleased\fR] + [[\fB!\fR] \fB--foreign\fR [<\fItype\fR>]] [\fB--maxdepth\fR|\fB-D\fI n\fR] [[\fB!\fR] \fB--mdt\fR|\fB--mdt-index\fR|\fB-m\fR <\fIuuid\fR|\fIindex\fR,...>] [[\fB!\fR] \fB--mdt-count\fR|\fB-T\fR [\fB+-\fR]\fIn\fR] @@ -107,7 +108,7 @@ File has specified numeric group ID. .BR --group | -G File belongs to specified group, numeric group ID allowed. .TP -.BR --layout +.BR --layout | -L File has a layout of the given type, one of: .RS 1.2i .TP @@ -121,6 +122,11 @@ HSM-archived files that are not resident in the filesystem. Files that have the first data component on an MDT. .RE .TP +.BR --foreign[=] +File is foreign (non-Lustre layout) and is of type if specified. +Presently only none or daos are defined types. +.RE +.TP .BR --maxdepth Limits find to decend at most \fIn\fR levels of directory tree. .TP @@ -290,6 +296,9 @@ Recursively list all mirrored files that have more than 2 mirrors. .TP .B $ lfs find ! --mirror-state=ro /mnt/lustre Recursively list all out-of-sync mirrored files. +.TP +.B $ lfs find ! --foreign=daos /mnt/lustre +Recursively list all but foreign files of daos type. .SH BUGS The .B lfs find diff --git a/lustre/doc/lfs-setstripe.1 b/lustre/doc/lfs-setstripe.1 index eaa109a..5bca972 100644 --- a/lustre/doc/lfs-setstripe.1 +++ b/lustre/doc/lfs-setstripe.1 @@ -23,6 +23,9 @@ lfs setstripe \- set striping pattern of a file or directory default .B lfs setstripe --yaml=\fR<\fIyaml_template_file.lyl\fR> <\fIfile\fR> .br .B lfs setstripe --copy=\fR<\fIsource_template_file\fR> <\fIfile\fR> +.br +.B lfs setstripe --foreign[=\fR<\fIforeign_type\fR>\fB] \ +[--flags=\fR<\fIhex\fR>\fB] --xattr=\fR<\fIlayout_string\fR> <\fIfile\fR> .SH DESCRIPTION The .B lfs setstripe @@ -123,6 +126,18 @@ This is similar to the option but avoids the need for the intermediate .B .lyl file. +.TP +.B lfs setstripe --foreign[=\fR<\fIforeign_type\fR>\fB] \ +[--flags=\fR<\fIhex\fR>\fB] --xattr=\fR<\fIlayout_string\fR> <\fIfile\fR> +.br +Create a new +.I file +with a foreign/non-lustre layout of type +.I foreign_type \fR(\fBnone\fR, \fBdaos\fR, ...) +with flags +.I hex +and a free-format layout value of +.I layout_string. .SH STRIPE_OPTIONS The various OST stripe related options are listed and explained below: .TP @@ -231,6 +246,29 @@ or .BR pool_name=none (since Lustre 2.11) to force a component to inherit the pool from the parent or root directory instead of the previous component. +.TP +.B --foreign \fR[<\fIforeign_type\fR>] +file layout is non-lustre/free-format and of type < +.IR foreign_type +>, if specified (see also +.IR --xattr +and +.IR --flags +options). +Current known types are +.BR none +and +.BR daos +, default is +.BR none +. +.B -x\fR, \fB--xattr \fR<\fIlayout_string\fR> +Mandatory non-lustre/free-format layout/LOV EA content for +.I foreign +file. +.TP +.B -F\fR, \fB--flags \fR<\fIhex\fR> +Optional bitmap of flags for foreign type. .SH COMPONENT_OPTIONS The various component related options are listed and explained below. The .B --component-* @@ -436,6 +474,18 @@ file which can be created with the .B lfs getstripe --yaml command. +.TP +.B lfs setstripe --foreign=daos --flags=0xda08 \ + --xattr=PUUID:CUUID /mnt/lustre/file1 +This creates foreign +.BR file1 +of type +.BR daos +with non-lustre/free-format +.BR PUUID:CUUID +layout/LOV EA and flags +.BR 0xda08 +. .SH SEE ALSO .BR lctl (1), .BR lfs (1), diff --git a/lustre/doc/llapi_file_create_foreign.3 b/lustre/doc/llapi_file_create_foreign.3 new file mode 100644 index 0000000..be03522 --- /dev/null +++ b/lustre/doc/llapi_file_create_foreign.3 @@ -0,0 +1,91 @@ +.TH lustreapi 3 "2009 Jul 10" The Lustre user application interface library +.SH NAME +llapi_file_create_foreign \- create a file with foreign layout on a Lustre filesystem +.SH SYNOPSIS +.nf +.B #include +.B #include +.B #include +.B #include +.sp +.BI "int llapi_file_create_foreign(const char *" name ", mode_t " mode "," +.BI " __u32 " type ", __u32 " flags ", char *" foreign_lov ); +.sp +.fi +.SH DESCRIPTION +.LP +.B llapi_file_create_foreign(\|) +creates a file with +.I foreign_lov +free format layout and +.I mode +and +.I name +on a Lustre filesystem, followed by file close. +.TP 15 +.I mode +specifies the permission bits. +Refer to +.BR open(2) +man page for a detailed description. +.TP 15 +.I type +specifies a type of foreign layout. +.TP 15 +.I flags +specifies any flags for foreign layout. +.TP 15 +.I foreign_lov +free-format string to be set as file layout/LOV EA. +.SH RETURN VALUES +.LP +.B llapi_file_create_foreign(\|) +return: +.TP +>=0 +on success, for +.B llapi_file_open +the return value is a file descriptor. +.TP +<0 +on failure, the absolute value is an error code. +.SH ERRORS +.TP 15 +.SM EEXIST +.I name +already exists. +.TP +.SM ENOTTY +.I name +may not point to a Lustre filesystem. +.SH "EXAMPLE" +.nf +#include +#include +#include +#include +#include +#include +int main(int argc, char *argv[]) +{ + int rc; + + if (argc != 2) + return -1; + + rc = llapi_file_create_foreign(argv[1], 0600, LOV_FOREIGN_TYPE_DAOS, + 0xda08, "PUUID@CUUID"); + if (rc < 0) { + fprintf(stderr, "file creation has failed, %s\\n", strerror(-rc)); + return -1; + } + printf("foreign file %s has been created with 'PUUID@CUUID' layout!\\n", + argv[1]); + return 0; +} +.fi +.SH "SEE ALSO" +.BR llapi_file_open (3) +.BR lustre (7), +.BR lustreapi (7) +.BR open (2), diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index 0e51d25..1f8dae2 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -128,6 +128,8 @@ struct llapi_stripe_param { int llapi_file_open_param(const char *name, int flags, mode_t mode, const struct llapi_stripe_param *param); +int llapi_file_create_foreign(const char *name, mode_t mode, __u32 type, + __u32 flags, char *foreign_lov); int llapi_file_create(const char *name, unsigned long long stripe_size, int stripe_offset, int stripe_count, int stripe_pattern); int llapi_file_open(const char *name, int flags, int mode, @@ -263,7 +265,9 @@ struct find_param { fp_exclude_hash_type:1, fp_yaml:1, /* output layout in YAML */ fp_check_blocks:1, - fp_exclude_blocks:1; + fp_exclude_blocks:1, + fp_check_foreign:1, + fp_exclude_foreign:1; enum llapi_layout_verbose fp_verbose; int fp_quiet; @@ -321,6 +325,7 @@ struct find_param { unsigned int fp_depth; unsigned int fp_hash_type; unsigned int fp_time_margin; /* time margin in seconds */ + __u32 fp_foreign_type; }; int llapi_ostlist(char *path, struct find_param *param); diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 73b5cd2..e395e6c 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -1112,6 +1112,7 @@ enum obdo_flags { #define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC) #define LOV_MAGIC LOV_MAGIC_V1 #define LOV_MAGIC_COMP_V1 (0x0BD60000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC_FOREIGN (0x0BD70000 | LOV_MAGIC_MAGIC) /* * magic for fully defined striping diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 6229b61..3329aae 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -55,6 +55,7 @@ # include # include # include /* snprintf() */ +# include # include # define NEED_QUOTA_DEFS /* # include - this causes complaints about caddr_t */ @@ -540,6 +541,7 @@ struct fsxattr { /* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */ #define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */ #define LOV_USER_MAGIC_COMP_V1 0x0BD60BD0 +#define LOV_USER_MAGIC_FOREIGN 0x0BD70BD0 #define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */ #define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/ @@ -625,6 +627,21 @@ struct lov_user_md_v3 { /* LOV EA user data (host-endian) */ struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ } __attribute__((packed)); +struct lov_foreign_md { + __u32 lfm_magic; /* magic number = LOV_MAGIC_FOREIGN */ + __u32 lfm_length; /* length of lfm_value */ + __u32 lfm_type; /* type, see LOV_FOREIGN_TYPE_ */ + __u32 lfm_flags; /* flags, type specific */ + char lfm_value[]; +}; + +#define foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \ + offsetof(struct lov_foreign_md, lfm_value)) + +#define foreign_size_le(lfm) \ + (le32_to_cpu(((struct lov_foreign_md *)lfm)->lfm_length) + \ + offsetof(struct lov_foreign_md, lfm_value)) + struct lu_extent { __u64 e_start; __u64 e_end; @@ -782,6 +799,20 @@ enum lmv_hash_type { extern char *mdt_hash_name[LMV_HASH_TYPE_MAX]; +/** + * LOV foreign types + **/ +#define LOV_FOREIGN_TYPE_NONE 0 +#define LOV_FOREIGN_TYPE_DAOS 0xda05 +#define LOV_FOREIGN_TYPE_UNKNOWN UINT32_MAX + +struct lustre_foreign_type { + uint32_t lft_type; + const char *lft_name; +}; + +extern struct lustre_foreign_type lov_foreign_type[]; + /* Got this according to how get LOV_MAX_STRIPE_COUNT, see above, * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data) */ #define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index 4f250e0..bc091f4 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -379,8 +379,29 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj, return 0; } +static int lfsck_layout_verify_header_foreign(struct dt_object *obj, + struct lov_foreign_md *lfm, + size_t len) +{ + /* magic has been verified already */ + __u32 value_len = le32_to_cpu(lfm->lfm_length); + /* type and flags are not checked for instance */ + + CDEBUG(D_INFO, "foreign LOV EA, magic %x, len %u, type %x, flags %x, for file "DFID"\n", + le32_to_cpu(lfm->lfm_magic), value_len, + le32_to_cpu(lfm->lfm_type), le32_to_cpu(lfm->lfm_flags), + PFID(lfsck_dto2fid(obj))); + + if (len != value_len + offsetof(typeof(*lfm), lfm_value)) + CDEBUG(D_LFSCK, "foreign LOV EA internal size %u does not match EA full size %zu for file "DFID"\n", + value_len, len, PFID(lfsck_dto2fid(obj))); + + /* nothing to repair */ + return -ENODATA; +} + static int lfsck_layout_verify_header(struct dt_object *obj, - struct lov_mds_md_v1 *lmm) + struct lov_mds_md_v1 *lmm, size_t len) { int rc = 0; @@ -430,6 +451,10 @@ static int lfsck_layout_verify_header(struct dt_object *obj, le32_to_cpu(lcme->lcme_offset)), start, comp_id); } + } else if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_FOREIGN) { + rc = lfsck_layout_verify_header_foreign(obj, + (struct lov_foreign_md *)lmm, + len); } else { rc = lfsck_layout_verify_header_v1v3(obj, lmm, 1, 0); } @@ -468,7 +493,7 @@ again: goto again; } - rc1 = lfsck_layout_verify_header(obj, buf->lb_buf); + rc1 = lfsck_layout_verify_header(obj, buf->lb_buf, rc); return rc1 ? rc1 : rc; } @@ -2881,7 +2906,7 @@ again: } lmm = buf->lb_buf; - rc1 = lfsck_layout_verify_header(parent, lmm); + rc1 = lfsck_layout_verify_header(parent, lmm, lovea_size); /* If the LOV EA crashed, the rebuild it. */ if (rc1 == -EINVAL) { diff --git a/lustre/llite/file.c b/lustre/llite/file.c index b5b09d9..8d2368b 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1975,7 +1975,8 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, if (lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V1) && lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_V3) && - lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_COMP_V1)) + lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_COMP_V1) && + lmm->lmm_magic != cpu_to_le32(LOV_MAGIC_FOREIGN)) GOTO(out, rc = -EPROTO); /* @@ -2014,6 +2015,15 @@ int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename, cpu_to_le32(LOV_MAGIC_COMP_V1)) { lustre_swab_lov_comp_md_v1( (struct lov_comp_md_v1 *)lmm); + } else if (lmm->lmm_magic == + cpu_to_le32(LOV_MAGIC_FOREIGN)) { + struct lov_foreign_md *lfm; + + lfm = (struct lov_foreign_md *)lmm; + __swab32s(&lfm->lfm_magic); + __swab32s(&lfm->lfm_length); + __swab32s(&lfm->lfm_type); + __swab32s(&lfm->lfm_flags); } } diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 99c8ab3..4a59769 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -1025,6 +1025,8 @@ static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum) LOV_USER_MAGIC_SPECIFIC); case LOV_USER_MAGIC_COMP_V1: return ((struct lov_comp_md_v1 *)lum)->lcm_size; + case LOV_USER_MAGIC_FOREIGN: + return foreign_size(lum); } return -EINVAL; diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 3958dd6..c98a852 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -472,6 +472,7 @@ static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size) }; struct lu_env *env; u16 refcheck; + u32 magic; if (!obj) RETURN(-ENODATA); @@ -500,7 +501,8 @@ static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size) * recognizing layout gen as stripe offset when the * file is restored. See LU-2809. */ - if (((struct lov_mds_md *)buf)->lmm_magic == LOV_MAGIC_COMP_V1) + magic = ((struct lov_mds_md *)buf)->lmm_magic; + if (magic == LOV_MAGIC_COMP_V1 || magic == LOV_MAGIC_FOREIGN) goto out_env; ((struct lov_mds_md *)buf)->lmm_layout_gen = 0; diff --git a/lustre/llite/xattr26.c b/lustre/llite/xattr26.c index 917ea3f..152eb7f 100644 --- a/lustre/llite/xattr26.c +++ b/lustre/llite/xattr26.c @@ -451,6 +451,7 @@ static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size) .cl_buf.lb_len = buf_size, }; __u16 refcheck; + __u32 magic; if (obj == NULL) RETURN(-ENODATA); @@ -477,7 +478,8 @@ static ssize_t ll_getxattr_lov(struct inode *inode, void *buf, size_t buf_size) * otherwise it would confuse tar --xattr by * recognizing layout gen as stripe offset when the * file is restored. See LU-2809. */ - if (((struct lov_mds_md *)buf)->lmm_magic == LOV_MAGIC_COMP_V1) + magic = ((struct lov_mds_md *)buf)->lmm_magic; + if (magic == LOV_MAGIC_COMP_V1 || magic == LOV_MAGIC_FOREIGN) goto out_env; ((struct lov_mds_md *)buf)->lmm_layout_gen = 0; diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 83fb539..4304639 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -306,7 +306,8 @@ struct lod_object { struct lod_mirror_entry *ldo_mirrors; __u32 ldo_is_composite:1, ldo_flr_state:2, - ldo_comp_cached:1; + ldo_comp_cached:1, + ldo_is_foreign:1; }; /* directory stripe (LMV) */ struct { @@ -331,10 +332,19 @@ struct lod_object { struct lod_default_striping *ldo_def_striping; }; }; - /* file stripe (LOV) */ - struct lod_layout_component *ldo_comp_entries; - /* slave stripes of striped directory (LMV) */ - struct dt_object **ldo_stripe; + union { + struct { + /* foreign/raw format LOV */ + char *ldo_foreign_lov; + size_t ldo_foreign_lov_size; + }; + struct { + /* file stripe (LOV) */ + struct lod_layout_component *ldo_comp_entries; + /* slave stripes of striped directory (LMV) */ + struct dt_object **ldo_stripe; + }; + }; }; #define lod_foreach_mirror_comp(comp, lo, mirror_idx) \ @@ -519,6 +529,9 @@ static inline bool lod_obj_is_striped(struct dt_object *dt) if (S_ISDIR(dt->do_lu.lo_header->loh_attr)) return lo->ldo_dir_stripe_count != 0; + if (lo->ldo_is_foreign) + return false; + for (i = 0; i < lo->ldo_comp_cnt; i++) { if (lo->ldo_comp_entries[i].llc_stripe == NULL) continue; @@ -763,6 +776,8 @@ int lod_declare_striped_create(const struct lu_env *env, struct dt_object *dt, int lod_striped_create(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr, struct dt_object_format *dof, struct thandle *th); +int lod_alloc_foreign_lov(struct lod_object *lo, size_t size); +void lod_free_foreign_lov(struct lod_object *lo); void lod_striping_free_nolock(const struct lu_env *env, struct lod_object *lo); void lod_striping_free(const struct lu_env *env, struct lod_object *lo); diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index a990505..2197e41 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -945,7 +945,7 @@ int lod_generate_lovea(const struct lu_env *env, struct lod_object *lo, struct lov_comp_md_v1 *lcm; struct lod_layout_component *comp_entries; __u16 comp_cnt, mirror_cnt; - bool is_composite; + bool is_composite, is_foreign = false; int i, rc = 0, offset; ENTRY; @@ -960,9 +960,27 @@ int lod_generate_lovea(const struct lu_env *env, struct lod_object *lo, mirror_cnt = lo->ldo_mirror_count; comp_entries = lo->ldo_comp_entries; is_composite = lo->ldo_is_composite; + is_foreign = lo->ldo_is_foreign; } LASSERT(lmm_size != NULL); + + if (is_foreign) { + struct lov_foreign_md *lfm; + + lfm = (struct lov_foreign_md *)lmm; + memcpy(lfm, lo->ldo_foreign_lov, lo->ldo_foreign_lov_size); + /* need to store little-endian */ + if (cpu_to_le32(LOV_MAGIC_FOREIGN) != LOV_MAGIC_FOREIGN) { + __swab32s(&lfm->lfm_magic); + __swab32s(&lfm->lfm_length); + __swab32s(&lfm->lfm_type); + __swab32s(&lfm->lfm_flags); + } + *lmm_size = lo->ldo_foreign_lov_size; + RETURN(0); + } + LASSERT(comp_cnt != 0 && comp_entries != NULL); if (!is_composite) { @@ -1237,6 +1255,7 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo, { struct lov_mds_md_v1 *lmm; struct lov_comp_md_v1 *comp_v1 = NULL; + struct lov_foreign_md *foreign = NULL; struct lov_ost_data_v1 *objs; __u32 magic, pattern; int i, j, rc = 0; @@ -1253,10 +1272,13 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo, magic = le32_to_cpu(lmm->lmm_magic); if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3 && - magic != LOV_MAGIC_COMP_V1) + magic != LOV_MAGIC_COMP_V1 && magic != LOV_MAGIC_FOREIGN) GOTO(out, rc = -EINVAL); - lod_free_comp_entries(lo); + if (lo->ldo_is_foreign) + lod_free_foreign_lov(lo); + else + lod_free_comp_entries(lo); if (magic == LOV_MAGIC_COMP_V1) { comp_v1 = (struct lov_comp_md_v1 *)lmm; @@ -1268,6 +1290,25 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo, lo->ldo_flr_state = le16_to_cpu(comp_v1->lcm_flags) & LCM_FL_FLR_MASK; mirror_cnt = le16_to_cpu(comp_v1->lcm_mirror_count) + 1; + } else if (magic == LOV_MAGIC_FOREIGN) { + size_t length; + + foreign = (struct lov_foreign_md *)buf->lb_buf; + length = offsetof(typeof(*foreign), lfm_value); + if (buf->lb_len < length || + buf->lb_len < (length + le32_to_cpu(foreign->lfm_length))) { + CDEBUG(D_LAYOUT, + "buf len %zu too small for lov_foreign_md\n", + buf->lb_len); + GOTO(out, rc = -EINVAL); + } + + /* just cache foreign LOV EA raw */ + rc = lod_alloc_foreign_lov(lo, length); + if (rc) + GOTO(out, rc); + memcpy(lo->ldo_foreign_lov, buf->lb_buf, length); + GOTO(out, rc); } else { comp_cnt = 1; lo->ldo_layout_gen = le16_to_cpu(lmm->lmm_layout_gen); @@ -1810,15 +1851,47 @@ int lod_verify_striping(struct lod_device *d, struct lod_object *lo, int rc = 0; ENTRY; + if (buf->lb_len < sizeof(lum->lmm_magic)) { + CDEBUG(D_LAYOUT, "invalid buf len %zu\n", buf->lb_len); + RETURN(-EINVAL); + } + lum = buf->lb_buf; + magic = le32_to_cpu(lum->lmm_magic) & ~LOV_MAGIC_DEFINED; + /* treat foreign LOV EA/object case first + * XXX is it expected to try setting again a foreign? + * XXX should we care about different current vs new layouts ? + */ + if (unlikely(magic == LOV_USER_MAGIC_FOREIGN)) { + struct lov_foreign_md *lfm = buf->lb_buf; + + if (buf->lb_len < offsetof(typeof(*lfm), lfm_value)) { + CDEBUG(D_LAYOUT, + "buf len %zu < min lov_foreign_md size (%zu)\n", + buf->lb_len, offsetof(typeof(*lfm), + lfm_value)); + RETURN(-EINVAL); + } + + if (foreign_size_le(lfm) > buf->lb_len) { + CDEBUG(D_LAYOUT, + "buf len %zu < this lov_foreign_md size (%zu)\n", + buf->lb_len, foreign_size_le(lfm)); + RETURN(-EINVAL); + } + /* Don't do anything with foreign layouts */ + RETURN(0); + } + + /* normal LOV/layout cases */ + if (buf->lb_len < sizeof(*lum)) { CDEBUG(D_LAYOUT, "buf len %zu too small for lov_user_md\n", buf->lb_len); RETURN(-EINVAL); } - magic = le32_to_cpu(lum->lmm_magic) & ~LOV_MAGIC_DEFINED; if (magic != LOV_USER_MAGIC_V1 && magic != LOV_USER_MAGIC_V3 && magic != LOV_USER_MAGIC_SPECIFIC && diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 0b4a789..9bd60af 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -2578,7 +2578,7 @@ static int lod_comp_md_size(struct lod_object *lo, bool is_dir) int magic, size = 0, i; struct lod_layout_component *comp_entries; __u16 comp_cnt; - bool is_composite; + bool is_composite, is_foreign = false; if (is_dir) { comp_cnt = lo->ldo_def_striping->lds_def_comp_cnt; @@ -2589,8 +2589,11 @@ static int lod_comp_md_size(struct lod_object *lo, bool is_dir) comp_cnt = lo->ldo_comp_cnt; comp_entries = lo->ldo_comp_entries; is_composite = lo->ldo_is_composite; + is_foreign = lo->ldo_is_foreign; } + if (is_foreign) + return lo->ldo_foreign_lov_size; LASSERT(comp_cnt != 0 && comp_entries != NULL); if (is_composite) { @@ -3940,7 +3943,7 @@ static int lod_generate_and_set_lovea(const struct lu_env *env, LASSERT(lo); - if (lo->ldo_comp_cnt == 0) { + if (lo->ldo_comp_cnt == 0 && !lo->ldo_is_foreign) { lod_striping_free(env, lo); rc = lod_sub_xattr_del(env, next, XATTR_NAME_LOV, th); RETURN(rc); @@ -5256,7 +5259,8 @@ int lod_striped_create(const struct lu_env *env, struct dt_object *dt, int rc = 0, i, j; ENTRY; - LASSERT(lo->ldo_comp_cnt != 0 && lo->ldo_comp_entries != NULL); + LASSERT((lo->ldo_comp_cnt != 0 && lo->ldo_comp_entries != NULL) || + lo->ldo_is_foreign); mirror_id = 0; /* non-flr file's mirror_id is 0 */ if (lo->ldo_mirror_count > 1) { @@ -6819,6 +6823,41 @@ static int lod_object_init(const struct lu_env *env, struct lu_object *lo, /** * + * Alloc cached foreign LOV + * + * \param[in] lo object + * \param[in] size size of foreign LOV + * + * \retval 0 on success + * \retval negative if failed + */ +int lod_alloc_foreign_lov(struct lod_object *lo, size_t size) +{ + OBD_ALLOC_LARGE(lo->ldo_foreign_lov, size); + if (lo->ldo_foreign_lov == NULL) + return -ENOMEM; + lo->ldo_foreign_lov_size = size; + lo->ldo_is_foreign = 1; + return 0; +} + +/** + * + * Free cached foreign LOV + * + * \param[in] lo object + */ +void lod_free_foreign_lov(struct lod_object *lo) +{ + if (lo->ldo_foreign_lov != NULL) + OBD_FREE_LARGE(lo->ldo_foreign_lov, lo->ldo_foreign_lov_size); + lo->ldo_foreign_lov = NULL; + lo->ldo_foreign_lov_size = 0; + lo->ldo_is_foreign = 0; +} + +/** + * * Release resources associated with striping. * * If the object is striped (regular or directory), then release @@ -6832,7 +6871,10 @@ void lod_striping_free_nolock(const struct lu_env *env, struct lod_object *lo) struct lod_layout_component *lod_comp; int i, j; - if (lo->ldo_stripe != NULL) { + if (unlikely(lo->ldo_is_foreign)) { + lod_free_foreign_lov(lo); + lo->ldo_comp_cached = 0; + } else if (lo->ldo_stripe != NULL) { LASSERT(lo->ldo_comp_entries == NULL); LASSERT(lo->ldo_dir_stripes_allocated > 0); diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index 3749b38..934d6bc 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -1835,7 +1835,7 @@ int lod_use_defined_striping(const struct lu_env *env, magic = le32_to_cpu(v1->lmm_magic) & ~LOV_MAGIC_DEFINED; if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3 && - magic != LOV_MAGIC_COMP_V1) + magic != LOV_MAGIC_COMP_V1 && magic != LOV_MAGIC_FOREIGN) GOTO(unlock, rc = -EINVAL); if (magic == LOV_MAGIC_COMP_V1) { @@ -1847,6 +1847,32 @@ int lod_use_defined_striping(const struct lu_env *env, mo->ldo_flr_state = le16_to_cpu(comp_v1->lcm_flags) & LCM_FL_FLR_MASK; mo->ldo_is_composite = 1; + } else if (magic == LOV_MAGIC_FOREIGN) { + struct lov_foreign_md *foreign; + size_t length; + + if (buf->lb_len < offsetof(typeof(*foreign), lfm_value)) { + CDEBUG(D_LAYOUT, + "buf len %zu < min lov_foreign_md size (%zu)\n", + buf->lb_len, + offsetof(typeof(*foreign), lfm_value)); + GOTO(out, rc = -EINVAL); + } + foreign = (struct lov_foreign_md *)buf->lb_buf; + length = foreign_size_le(foreign); + if (buf->lb_len < length) { + CDEBUG(D_LAYOUT, + "buf len %zu < this lov_foreign_md size (%zu)\n", + buf->lb_len, length); + GOTO(out, rc = -EINVAL); + } + + /* just cache foreign LOV EA raw */ + rc = lod_alloc_foreign_lov(mo, length); + if (rc) + GOTO(out, rc); + memcpy(mo->ldo_foreign_lov, buf->lb_buf, length); + GOTO(out, rc); } else { mo->ldo_is_composite = 0; comp_cnt = 1; @@ -1951,16 +1977,17 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo, const struct lu_buf *buf) { struct lod_layout_component *lod_comp; - struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev); - struct lov_desc *desc = &d->lod_desc; - struct lov_user_md_v1 *v1 = NULL; - struct lov_user_md_v3 *v3 = NULL; - struct lov_comp_md_v1 *comp_v1 = NULL; - char def_pool[LOV_MAXPOOLNAME + 1]; - __u32 magic; - __u16 comp_cnt; - __u16 mirror_cnt; - int i, rc; + struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev); + struct lov_desc *desc = &d->lod_desc; + struct lov_user_md_v1 *v1 = NULL; + struct lov_user_md_v3 *v3 = NULL; + struct lov_comp_md_v1 *comp_v1 = NULL; + struct lov_foreign_md *lfm = NULL; + char def_pool[LOV_MAXPOOLNAME + 1]; + __u32 magic; + __u16 comp_cnt; + __u16 mirror_cnt; + int i, rc; ENTRY; if (buf == NULL || buf->lb_buf == NULL || buf->lb_len == 0) @@ -1972,7 +1999,10 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo, def_pool, sizeof(def_pool)); /* free default striping info */ - lod_free_comp_entries(lo); + if (lo->ldo_is_foreign) + lod_free_foreign_lov(lo); + else + lod_free_comp_entries(lo); rc = lod_verify_striping(d, lo, buf, false); if (rc) @@ -1981,6 +2011,7 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo, v3 = buf->lb_buf; v1 = buf->lb_buf; comp_v1 = buf->lb_buf; + /* {lmm,lfm}_magic position/length work for all LOV formats */ magic = v1->lmm_magic; if (unlikely(le32_to_cpu(magic) & LOV_MAGIC_DEFINED)) { @@ -2016,6 +2047,22 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo, /* fall trhough */ case LOV_USER_MAGIC_COMP_V1: break; + case __swab32(LOV_USER_MAGIC_FOREIGN): + lfm = buf->lb_buf; + __swab32s(&lfm->lfm_magic); + __swab32s(&lfm->lfm_length); + __swab32s(&lfm->lfm_type); + __swab32s(&lfm->lfm_flags); + magic = lfm->lfm_magic; + /* fall through */ + case LOV_USER_MAGIC_FOREIGN: + if (!lfm) + lfm = buf->lb_buf; + rc = lod_alloc_foreign_lov(lo, foreign_size(lfm)); + if (rc) + RETURN(rc); + memcpy(lo->ldo_foreign_lov, buf->lb_buf, foreign_size(lfm)); + RETURN(0); default: CERROR("%s: unrecognized magic %X\n", lod2obd(d)->obd_name, magic); diff --git a/lustre/lov/lov_cl_internal.h b/lustre/lov/lov_cl_internal.h index b28ff35..179e450 100644 --- a/lustre/lov/lov_cl_internal.h +++ b/lustre/lov/lov_cl_internal.h @@ -122,6 +122,7 @@ enum lov_layout_type { LLT_EMPTY, /** empty file without body (mknod + truncate) */ LLT_RELEASED, /** file with no objects (data in HSM) */ LLT_COMP, /** support composite layout */ + LLT_FOREIGN, /** foreign layout */ LLT_NR }; @@ -134,6 +135,8 @@ static inline char *llt2str(enum lov_layout_type llt) return "RELEASED"; case LLT_COMP: return "COMPOSITE"; + case LLT_FOREIGN: + return "FOREIGN"; case LLT_NR: LBUG(); } @@ -623,9 +626,12 @@ int lov_page_init_empty (const struct lu_env *env, struct cl_object *obj, struct cl_page *page, pgoff_t index); int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj, struct cl_page *page, pgoff_t index); +int lov_page_init_foreign(const struct lu_env *env, struct cl_object *obj, + struct cl_page *page, pgoff_t index); struct lu_object *lov_object_alloc (const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); + struct lu_object *lovsub_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index b44cbd1..49673c5 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -132,10 +132,16 @@ void lsm_free(struct lov_stripe_md *lsm) unsigned int i; size_t lsm_size; - for (i = 0; i < entry_count; i++) - lsme_free(lsm->lsm_entries[i]); + if (lsm->lsm_magic == LOV_MAGIC_FOREIGN) { + OBD_FREE_LARGE(lsm_foreign(lsm), lsm->lsm_foreign_size); + } else { + for (i = 0; i < entry_count; i++) + lsme_free(lsm->lsm_entries[i]); + } - lsm_size = offsetof(typeof(*lsm), lsm_entries[entry_count]); + lsm_size = lsm->lsm_magic == LOV_MAGIC_FOREIGN ? + offsetof(typeof(*lsm), lsm_entries[1]) : + offsetof(typeof(*lsm), lsm_entries[entry_count]); OBD_FREE(lsm, lsm_size); } @@ -510,16 +516,64 @@ const struct lsm_operations lsm_comp_md_v1_ops = { .lsm_unpackmd = lsm_unpackmd_comp_md_v1, }; +static struct +lov_stripe_md *lsm_unpackmd_foreign(struct lov_obd *lov, void *buf, + size_t buf_size) +{ + struct lov_foreign_md *lfm = buf; + struct lov_stripe_md *lsm; + size_t lsm_size; + struct lov_stripe_md_entry *lsme; + + lsm_size = offsetof(typeof(*lsm), lsm_entries[1]); + OBD_ALLOC(lsm, lsm_size); + if (lsm == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + atomic_set(&lsm->lsm_refc, 1); + spin_lock_init(&lsm->lsm_lock); + lsm->lsm_magic = le32_to_cpu(lfm->lfm_magic); + lsm->lsm_foreign_size = foreign_size_le(lfm); + + /* alloc for full foreign EA including format fields */ + OBD_ALLOC_LARGE(lsme, lsm->lsm_foreign_size); + if (lsme == NULL) { + OBD_FREE(lsm, lsm_size); + RETURN(ERR_PTR(-ENOMEM)); + } + + /* copy full foreign EA including format fields */ + memcpy(lsme, buf, lsm->lsm_foreign_size); + + lsm_foreign(lsm) = lsme; + + return lsm; +} + +const struct lsm_operations lsm_foreign_ops = { + .lsm_unpackmd = lsm_unpackmd_foreign, +}; + void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm) { int i, j; - CDEBUG(level, "lsm %p, objid "DOSTID", maxbytes %#llx, magic 0x%08X, " - "refc: %d, entry: %u, layout_gen %u\n", + CDEBUG_LIMIT(level, + "lsm %p, objid "DOSTID", maxbytes %#llx, magic 0x%08X, refc: %d, entry: %u, layout_gen %u\n", lsm, POSTID(&lsm->lsm_oi), lsm->lsm_maxbytes, lsm->lsm_magic, atomic_read(&lsm->lsm_refc), lsm->lsm_entry_count, lsm->lsm_layout_gen); + if (lsm->lsm_magic == LOV_MAGIC_FOREIGN) { + struct lov_foreign_md *lfm = (void *)lsm_foreign(lsm); + + CDEBUG_LIMIT(level, + "foreign LOV EA, magic %x, length %u, type %x, flags %x, value '%.*s'\n", + lfm->lfm_magic, lfm->lfm_length, lfm->lfm_type, + lfm->lfm_flags, lfm->lfm_length, lfm->lfm_value); + return; + } + for (i = 0; i < lsm->lsm_entry_count; i++) { struct lov_stripe_md_entry *lse = lsm->lsm_entries[i]; diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 05bfc12..33aa806 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -75,9 +75,14 @@ struct lov_stripe_md { spinlock_t lsm_lock; pid_t lsm_lock_owner; /* debugging */ - /* maximum possible file size, might change as OSTs status changes, - * e.g. disconnected, deactivated */ - loff_t lsm_maxbytes; + union { + /* maximum possible file size, might change as OSTs status + * changes, e.g. disconnected, deactivated + */ + loff_t lsm_maxbytes; + /* size of full foreign LOV */ + size_t lsm_foreign_size; + }; struct ost_id lsm_oi; u32 lsm_magic; u32 lsm_layout_gen; @@ -88,6 +93,8 @@ struct lov_stripe_md { struct lov_stripe_md_entry *lsm_entries[]; }; +#define lsm_foreign(lsm) (lsm->lsm_entries[0]) + static inline bool lsme_inited(const struct lov_stripe_md_entry *lsme) { return lsme->lsme_flags & LCME_FL_INIT; @@ -113,6 +120,9 @@ static inline size_t lov_comp_md_size(const struct lov_stripe_md *lsm) return lov_mds_md_size(lsm->lsm_entries[0]->lsme_stripe_count, lsm->lsm_entries[0]->lsme_magic); + if (lsm->lsm_magic == LOV_MAGIC_FOREIGN) + return lsm->lsm_foreign_size; + LASSERT(lsm->lsm_magic == LOV_MAGIC_COMP_V1); size = sizeof(struct lov_comp_md_v1); @@ -164,6 +174,7 @@ struct lsm_operations { extern const struct lsm_operations lsm_v1_ops; extern const struct lsm_operations lsm_v3_ops; extern const struct lsm_operations lsm_comp_md_v1_ops; +extern const struct lsm_operations lsm_foreign_ops; static inline const struct lsm_operations *lsm_op_find(int magic) { switch (magic) { @@ -173,6 +184,8 @@ static inline const struct lsm_operations *lsm_op_find(int magic) return &lsm_v3_ops; case LOV_MAGIC_COMP_V1: return &lsm_comp_md_v1_ops; + case LOV_MAGIC_FOREIGN: + return &lsm_foreign_ops; default: CERROR("unrecognized lsm_magic %08x\n", magic); return NULL; diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 200f943..884f744 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -811,10 +811,25 @@ static int lov_init_released(const struct lu_env *env, return 0; } +static int lov_init_foreign(const struct lu_env *env, + struct lov_device *dev, struct lov_object *lov, + struct lov_stripe_md *lsm, + const struct cl_object_conf *conf, + union lov_layout_state *state) +{ + LASSERT(lsm != NULL); + LASSERT(lov->lo_type == LLT_FOREIGN); + LASSERT(lov->lo_lsm == NULL); + + lov->lo_lsm = lsm_addref(lsm); + return 0; +} + static int lov_delete_empty(const struct lu_env *env, struct lov_object *lov, union lov_layout_state *state) { - LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED); + LASSERT(lov->lo_type == LLT_EMPTY || lov->lo_type == LLT_RELEASED || + lov->lo_type == LLT_FOREIGN); lov_layout_wait(env, lov); return 0; @@ -936,6 +951,23 @@ static int lov_print_released(const struct lu_env *env, void *cookie, return 0; } +static int lov_print_foreign(const struct lu_env *env, void *cookie, + lu_printer_t p, const struct lu_object *o) +{ + struct lov_object *lov = lu2lov(o); + struct lov_stripe_md *lsm = lov->lo_lsm; + + (*p)(env, cookie, + "foreign: %s, lsm{%p 0x%08X %d %u}:\n", + lov->lo_layout_invalid ? "invalid" : "valid", lsm, + lsm->lsm_magic, atomic_read(&lsm->lsm_refc), + lsm->lsm_layout_gen); + (*p)(env, cookie, + "raw_ea_content '%.*s'\n", + (int)lsm->lsm_foreign_size, (char *)lsm_foreign(lsm)); + return 0; +} + /** * Implements cl_object_operations::coo_attr_get() method for an object * without stripes (LLT_EMPTY layout type). @@ -1034,6 +1066,16 @@ const static struct lov_layout_operations lov_dispatch[] = { .llo_io_init = lov_io_init_composite, .llo_getattr = lov_attr_get_composite, }, + [LLT_FOREIGN] = { + .llo_init = lov_init_foreign, + .llo_delete = lov_delete_empty, + .llo_fini = lov_fini_released, + .llo_print = lov_print_foreign, + .llo_page_init = lov_page_init_foreign, + .llo_lock_init = lov_lock_init_empty, + .llo_io_init = lov_io_init_empty, + .llo_getattr = lov_attr_get_empty, + }, }; /** @@ -1065,6 +1107,9 @@ static enum lov_layout_type lov_type(struct lov_stripe_md *lsm) lsm->lsm_magic == LOV_MAGIC_COMP_V1) return LLT_COMP; + if (lsm->lsm_magic == LOV_MAGIC_FOREIGN) + return LLT_FOREIGN; + return LLT_EMPTY; } @@ -2136,6 +2181,8 @@ int lov_read_and_clear_async_rc(struct cl_object *clob) } case LLT_RELEASED: case LLT_EMPTY: + /* fall through */ + case LLT_FOREIGN: break; default: LBUG(); diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index 3fe19fe..23ffe6c 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -183,6 +183,28 @@ ssize_t lov_lsm_pack_v1v3(const struct lov_stripe_md *lsm, void *buf, RETURN(lmm_size); } +ssize_t lov_lsm_pack_foreign(const struct lov_stripe_md *lsm, void *buf, + size_t buf_size) +{ + struct lov_foreign_md *lfm = buf; + size_t lfm_size; + + lfm_size = lsm->lsm_foreign_size; + + if (buf_size == 0) + RETURN(lfm_size); + + if (buf_size < lfm_size) + RETURN(-ERANGE); + + /* full foreign LOV is already avail in its cache + * no need to translate format fields to little-endian + */ + memcpy(lfm, lsm_foreign(lsm), lsm->lsm_foreign_size); + + RETURN(lfm_size); +} + ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf, size_t buf_size) { @@ -200,6 +222,9 @@ ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf, if (lsm->lsm_magic == LOV_MAGIC_V1 || lsm->lsm_magic == LOV_MAGIC_V3) return lov_lsm_pack_v1v3(lsm, buf, buf_size); + if (lsm->lsm_magic == LOV_MAGIC_FOREIGN) + return lov_lsm_pack_foreign(lsm, buf, buf_size); + lmm_size = lov_comp_md_size(lsm); if (buf_size == 0) RETURN(lmm_size); @@ -362,6 +387,7 @@ int lov_getstripe(const struct lu_env *env, struct lov_object *obj, { /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */ struct lov_mds_md *lmmk, *lmm; + struct lov_foreign_md *lfm; struct lov_user_md_v1 lum; size_t lmmk_size; ssize_t lmm_size, lum_size = 0; @@ -371,7 +397,8 @@ int lov_getstripe(const struct lu_env *env, struct lov_object *obj, ENTRY; if (lsm->lsm_magic != LOV_MAGIC_V1 && lsm->lsm_magic != LOV_MAGIC_V3 && - lsm->lsm_magic != LOV_MAGIC_COMP_V1) { + lsm->lsm_magic != LOV_MAGIC_COMP_V1 && + lsm->lsm_magic != LOV_MAGIC_FOREIGN) { CERROR("bad LSM MAGIC: 0x%08X != 0x%08X nor 0x%08X\n", lsm->lsm_magic, LOV_MAGIC_V1, LOV_MAGIC_V3); GOTO(out, rc = -EIO); @@ -405,6 +432,12 @@ int lov_getstripe(const struct lu_env *env, struct lov_object *obj, } else if (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_COMP_V1)) { lustre_swab_lov_comp_md_v1( (struct lov_comp_md_v1 *)lmmk); + } else if (lmmk->lmm_magic == cpu_to_le32(LOV_MAGIC_FOREIGN)) { + lfm = (struct lov_foreign_md *)lmmk; + __swab32s(&lfm->lfm_magic); + __swab32s(&lfm->lfm_length); + __swab32s(&lfm->lfm_type); + __swab32s(&lfm->lfm_flags); } } @@ -412,8 +445,9 @@ int lov_getstripe(const struct lu_env *env, struct lov_object *obj, * Legacy appication passes limited buffer, we need to figure out * the user buffer size by the passed in lmm_stripe_count. */ - if (copy_from_user(&lum, lump, sizeof(struct lov_user_md_v1))) - GOTO(out_free, rc = -EFAULT); + if (lsm->lsm_magic != LOV_MAGIC_FOREIGN) + if (copy_from_user(&lum, lump, sizeof(struct lov_user_md_v1))) + GOTO(out_free, rc = -EFAULT); if (lum.lmm_magic == LOV_USER_MAGIC_V1 || lum.lmm_magic == LOV_USER_MAGIC_V3) diff --git a/lustre/lov/lov_page.c b/lustre/lov/lov_page.c index 34fbc66..cea48a1 100644 --- a/lustre/lov/lov_page.c +++ b/lustre/lov/lov_page.c @@ -148,6 +148,13 @@ int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, RETURN(0); } +int lov_page_init_foreign(const struct lu_env *env, struct cl_object *obj, + struct cl_page *page, pgoff_t index) +{ + CDEBUG(D_PAGE, DFID" has no data\n", PFID(lu_object_fid(&obj->co_lu))); + RETURN(-ENODATA); +} + bool lov_page_is_empty(const struct cl_page *page) { const struct cl_page_slice *slice = cl_page_at(page, &lov_device_type); diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 1b36587..5d4b23e 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -75,6 +75,7 @@ THETESTS += listxattr_size_check check_fhandle_syscalls badarea_io THETESTS += llapi_layout_test orphan_linkea_check llapi_hsm_test THETESTS += group_lock_test llapi_fid_test sendfile_grouplock mmap_cat THETESTS += swap_lock_test lockahead_test mirror_io mmap_mknod_test +THETESTS += create_foreign_file parse_foreign_file if TESTS if MPITESTS diff --git a/lustre/tests/create_foreign_file.c b/lustre/tests/create_foreign_file.c new file mode 100644 index 0000000..e270135 --- /dev/null +++ b/lustre/tests/create_foreign_file.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int main(int argc, char **argv) +{ + int c, fd; + char *fname = "FILE"; + char *xval = "UUID@UUID"; + size_t len; + struct lov_foreign_md *lfm; + char *end; + __u32 type = LOV_FOREIGN_TYPE_DAOS, flags = 0xda08; + + while ((c = getopt(argc, argv, "f:x:t:F:")) != -1) { + switch (c) { + case 'f': + fname = optarg; + break; + case 'x': + xval = optarg; + break; + case 't': + type = strtoul(optarg, &end, 0); + if (*end != '\0') { + fprintf(stderr, + "%s: invalid type '%s'\n", argv[0], + optarg); + exit(1); + } + break; + case 'F': + flags = strtoul(optarg, &end, 0); + if (*end != '\0') { + fprintf(stderr, + "%s: invalid flags '%s'\n", argv[0], + optarg); + exit(1); + } + break; + case 'h': + fprintf(stderr, + "Usage: %s -f -x \n", + argv[0]); + break; + } + } + + len = strlen(xval); + if (len > XATTR_SIZE_MAX || len <= 0) { + fprintf(stderr, + "invalid LOV EA length %zu > XATTR_SIZE_MAX (%u)\n", + len, XATTR_SIZE_MAX); + exit(1); + } + + fd = open(fname, O_WRONLY|O_CREAT|O_LOV_DELAY_CREATE, 0644); + if (fd == -1) { + perror("open()"); + exit(1); + } + + lfm = malloc(len + offsetof(struct lov_foreign_md, lfm_value)); + if (lfm == NULL) { + perror("malloc()"); + exit(1); + } + + lfm->lfm_magic = LOV_USER_MAGIC_FOREIGN; + lfm->lfm_length = len; + lfm->lfm_type = type; + lfm->lfm_flags = flags; + memcpy(lfm->lfm_value, xval, len); + + if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, lfm) != 0) { + perror("ioctl(LL_IOC_LOV_SETSTRIPE)"); + exit(1); + } + + close(fd); + return 0; +} diff --git a/lustre/tests/parse_foreign_file.c b/lustre/tests/parse_foreign_file.c new file mode 100644 index 0000000..5b2cddd --- /dev/null +++ b/lustre/tests/parse_foreign_file.c @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int main(int argc, char **argv) +{ + int c, i; + char *fname = "FILE"; + size_t len, len2; + struct lov_foreign_md *lfm; + + while ((c = getopt(argc, argv, "f:")) != -1) { + switch (c) { + case 'f': + fname = optarg; + break; + case 'h': + fprintf(stderr, "Usage: %s -f \n", argv[0]); + break; + } + } + + len = getxattr(fname, "lustre.lov", NULL, 0); + if (len == -1) { + perror("getxattr()"); + exit(1); + } + if (len > XATTR_SIZE_MAX || len <= 0) { + fprintf(stderr, + "invalid LOV EA length %zu > XATTR_SIZE_MAX (%u)\n", + len, XATTR_SIZE_MAX); + exit(1); + } + + lfm = malloc(len); + if (lfm == NULL) { + perror("malloc()"); + exit(1); + } + + len2 = getxattr(fname, "lustre.lov", lfm, len); + if (len2 == -1) { + perror("getxattr()"); + exit(1); + } + + if (len != len2) + fprintf(stderr, + "trusted.lov xattr size changed, before=%zu now=%zu\n", + len, len2); + + if (len2 < offsetof(struct lov_foreign_md, lfm_value)) { + fprintf(stderr, "trusted.lov size=%zu too small\n", len2); + fprintf(stderr, "printing its content in hex anyway:\n"); + for (i = 0; i < len2; i++) + fprintf(stderr, "%02x", *((char *)lfm + i)); + exit(1); + } + + + if (lfm->lfm_magic != LOV_USER_MAGIC_FOREIGN) { + if (lfm->lfm_magic == bswap_32(LOV_USER_MAGIC_FOREIGN)) + fprintf(stderr, "magic is swapped\n"); + else + fprintf(stderr, "wrong magic=(0x%x)\n", lfm->lfm_magic); + } + + if (lfm->lfm_length != len2 - offsetof(typeof(*lfm), lfm_value)) { + if (bswap_32(lfm->lfm_length) == len2 - offsetof(typeof(*lfm), + lfm_value)) + fprintf(stderr, "length is swapped\n"); + else + fprintf(stderr, + "wrong internal length=%u vs xattr size=%zu\n", + lfm->lfm_length, len2); + } + + fprintf(stdout, "lov_xattr_size: %zu\n", len2); + fprintf(stdout, "lov_foreign_magic: 0x%08X\n", lfm->lfm_magic); + fprintf(stdout, "lov_foreign_size: %u\n", lfm->lfm_length); + fprintf(stdout, "lov_foreign_type: %u\n", lfm->lfm_type); + fprintf(stdout, "lov_foreign_flags: 0x%08X\n", lfm->lfm_flags); + fprintf(stdout, "lov_foreign_value: 0x"); + for (i = 0; i < len2 - offsetof(typeof(*lfm), lfm_value); i++) + fprintf(stdout, "%02x", lfm->lfm_value[i]); + + fprintf(stdout, "\n"); + + return 0; +} diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 3058f44..27bbb01 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -5574,6 +5574,92 @@ test_37() } run_test 37 "LFSCK must skip a ORPHAN" +test_38() +{ + [[ $MDS1_VERSION -le $(version_code 2.12.51) ]] && + skip "Need MDS version newer than 2.12.51" + + test_mkdir $DIR/$tdir + local uuid1=$(cat /proc/sys/kernel/random/uuid) + local uuid2=$(cat /proc/sys/kernel/random/uuid) + + # create foreign file + $LFS setstripe --foreign=daos --flags 0xda05 \ + -x "${uuid1}@${uuid2}" $DIR/$tdir/$tfile || + error "$DIR/$tdir/$tfile: create failed" + + $LFS getstripe -v $DIR/$tdir/$tfile | + grep "lfm_magic:.*0x0BD70BD0" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign magic" + # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length) + $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_length:.*73" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign size" + $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*daos" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign type" + $LFS getstripe -v $DIR/$tdir/$tfile | + grep "lfm_flags:.*0x0000DA05" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign flags" + $LFS getstripe $DIR/$tdir/$tfile | + grep "lfm_value:.*${uuid1}@${uuid2}" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign value" + + # modify striping should fail + $LFS setstripe -c 2 $DIR/$tdir/$tfile && + error "$DIR/$tdir/$tfile: setstripe should fail" + + $START_NAMESPACE -r -A || error "Fail to start LFSCK for namespace" + + wait_all_targets_blocked namespace completed 1 + + # check that "global" namespace_repaired == 0 !!! + local repaired=$(do_facet mds1 \ + "$LCTL lfsck_query -t all -M ${FSNAME}-MDT0000 | + awk '/^namespace_repaired/ { print \\\$2 }'") + [ $repaired -eq 0 ] || + error "(2) Expect no namespace repair, but got: $repaired" + + $START_LAYOUT -A -r || error "Fail to start LFSCK for layout" + + wait_all_targets_blocked layout completed 2 + + # check that "global" layout_repaired == 0 !!! + local repaired=$(do_facet mds1 \ + "$LCTL lfsck_query -t all -M ${FSNAME}-MDT0000 | + awk '/^layout_repaired/ { print \\\$2 }'") + [ $repaired -eq 0 ] || + error "(2) Expect no layout repair, but got: $repaired" + + echo "post-lfsck checks of foreign file" + + $LFS getstripe -v $DIR/$tdir/$tfile | + grep "lfm_magic:.*0x0BD70BD0" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign magic" + # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length) + $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_length:.*73" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign size" + $LFS getstripe -v $DIR/$tdir/$tfile | grep "lfm_type:.*daos" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign type" + $LFS getstripe -v $DIR/$tdir/$tfile | + grep "lfm_flags:.*0x0000DA05" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign flags" + $LFS getstripe $DIR/$tdir/$tfile | + grep "lfm_value:.*${uuid1}@${uuid2}" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign value" + + # modify striping should fail + $LFS setstripe -c 2 $DIR/$tdir/$tfile && + error "$DIR/$tdir/$tfile: setstripe should fail" + + # R/W should fail + cat $DIR/$tdir/$tfile && "$DIR/$tdir/$tfile: read should fail" + cat /etc/passwd > $DIR/$tdir/$tfile && + error "$DIR/$tdir/$tfile: write should fail" + + #remove foreign file + rm $DIR/$tdir/$tfile || + error "$DIR/$tdir/$tfile: remove of foreign file has failed" +} +run_test 38 "LFSCK does not break foreign file and reverse is also true" # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d78ded2..4befadb 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -2405,6 +2405,101 @@ test_27I() { } run_test 27I "check that root dir striping does not break parent dir one" +test_27J() { + [[ $(lustre_version_code $SINGLEMDS) -le $(version_code 2.12.51) ]] && + skip "Need MDS version newer than 2.12.51" + + test_mkdir $DIR/$tdir + local uuid1=$(cat /proc/sys/kernel/random/uuid) + local uuid2=$(cat /proc/sys/kernel/random/uuid) + + # create foreign file (raw way) + create_foreign_file -f $DIR/$tdir/$tfile -x "${uuid1}@${uuid2}" \ + -t 1 -F 0xda08 || error "create_foreign_file failed" + + # verify foreign file (raw way) + parse_foreign_file -f $DIR/$tdir/$tfile | + grep "lov_foreign_magic: 0x0BD70BD0" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign magic" + parse_foreign_file -f $DIR/$tdir/$tfile | grep "lov_xattr_size: 89" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign size" + parse_foreign_file -f $DIR/$tdir/$tfile | + grep "lov_foreign_size: 73" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign size" + parse_foreign_file -f $DIR/$tdir/$tfile | + grep "lov_foreign_type: 1" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign type" + parse_foreign_file -f $DIR/$tdir/$tfile | + grep "lov_foreign_flags: 0x0000DA08" || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign flags" + local lov=$(parse_foreign_file -f $DIR/$tdir/$tfile | + grep "lov_foreign_value: 0x" | + sed -e 's/lov_foreign_value: 0x//') + local lov2=$(echo -n "${uuid1}@${uuid2}" | od -A n -t x1 -w160) + [[ $lov = ${lov2// /} ]] || + error "$DIR/$tdir/$tfile: invalid LOV EA foreign value" + + # create foreign file (lfs + API) + $LFS setstripe --foreign=daos --flags 0xda08 \ + -x "${uuid1}@${uuid2}" $DIR/$tdir/${tfile}2 || + error "$DIR/$tdir/${tfile}2: create failed" + + $LFS getstripe -v $DIR/$tdir/${tfile}2 | + grep "lfm_magic:.*0x0BD70BD0" || + error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign magic" + # lfm_length is LOV EA size - sizeof(lfm_magic) - sizeof(lfm_length) + $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_length:.*73" || + error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign size" + $LFS getstripe -v $DIR/$tdir/${tfile}2 | grep "lfm_type:.*daos" || + error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign type" + $LFS getstripe -v $DIR/$tdir/${tfile}2 | + grep "lfm_flags:.*0x0000DA08" || + error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign flags" + $LFS getstripe -v $DIR/$tdir/${tfile}2 | + grep "lfm_value:.*${uuid1}@${uuid2}" || + error "$DIR/$tdir/${tfile}2: invalid LOV EA foreign value" + + # modify striping should fail + $LFS setstripe -c 2 $DIR/$tdir/$tfile && + error "$DIR/$tdir/$tfile: setstripe should fail" + $LFS setstripe -c 2 $DIR/$tdir/${tfile}2 && + error "$DIR/$tdir/${tfile}2: setstripe should fail" + + # R/W should fail + cat $DIR/$tdir/$tfile && error "$DIR/$tdir/$tfile: read should fail" + cat $DIR/$tdir/${tfile}2 && + error "$DIR/$tdir/${tfile}2: read should fail" + cat /etc/passwd > $DIR/$tdir/$tfile && + error "$DIR/$tdir/$tfile: write should fail" + cat /etc/passwd > $DIR/$tdir/${tfile}2 && + error "$DIR/$tdir/${tfile}2: write should fail" + + # chmod should work + chmod 222 $DIR/$tdir/$tfile || + error "$DIR/$tdir/$tfile: chmod failed" + chmod 222 $DIR/$tdir/${tfile}2 || + error "$DIR/$tdir/${tfile}2: chmod failed" + + # chown should work + chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/$tfile || + error "$DIR/$tdir/$tfile: chown failed" + chown $RUNAS_ID:$RUNAS_GID $DIR/$tdir/${tfile}2 || + error "$DIR/$tdir/${tfile}2: chown failed" + + # rename should work + mv $DIR/$tdir/$tfile $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/$tfile: rename of foreign file has failed" + mv $DIR/$tdir/${tfile}2 $DIR/$tdir/${tfile}2.new || + error "$DIR/$tdir/${tfile}2: rename of foreign file has failed" + + #remove foreign file + rm $DIR/$tdir/${tfile}.new || + error "$DIR/$tdir/${tfile}.new: remove of foreign file has failed" + rm $DIR/$tdir/${tfile}2.new || + error "$DIR/$tdir/${tfile}2.new: remove of foreign file has failed" +} +run_test 27J "basic ops on file with foreign LOV" + # createtest also checks that device nodes are created and # then visible correctly (#2091) test_28() { # bug 2091 diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index d2413ff..b897626 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -321,6 +321,7 @@ command_t cmdlist[] = { "To create a file with specified striping/composite layout, or\n" "create/replace the default layout on an existing directory:\n" SSM_CMD_COMMON("setstripe") + " [--mode ]\n" " \n" " or\n" "To add component(s) to an existing composite file:\n" @@ -340,7 +341,11 @@ command_t cmdlist[] = { "\tcomp_id: Unique component ID to delete\n" "\tcomp_flags: 'init' indicating all instantiated components\n" "\t '^init' indicating all uninstantiated components\n" - "\t-I and -F cannot be specified at the same time\n"}, + "\t-I and -F cannot be specified at the same time\n" + "To create a file with a foreign (free format) layout:\n" + "usage: setstripe --foreign[=]\n" + " --xattr|-x [--flags ]\n" + " [--mode ] \n"}, {"getstripe", lfs_getstripe, 0, "To list the layout pattern for a given file or files in a\n" "directory or recursively for all files in a directory tree.\n" @@ -398,6 +403,7 @@ command_t cmdlist[] = { " [[!] --uid|-u|--user|-U |] [[!] --pool ]\n" " [[!] --projid ]\n" " [[!] --layout|-L released,raid0,mdt]\n" + " [[!] --foreign[=]]\n" " [[!] --component-count [+-]]\n" " [[!] --component-start [+-]N[kMGTPE]]\n" " [[!] --component-end|-E [+-]N[kMGTPE]]\n" @@ -631,6 +637,20 @@ static int check_hashtype(const char *hashtype) return 0; } +static uint32_t lov_check_foreign_type_name(const char *foreign_type_name) +{ + uint32_t i; + + for (i = 0; i < LOV_FOREIGN_TYPE_UNKNOWN; i++) { + if (lov_foreign_type[i].lft_name == NULL) + break; + if (strcmp(foreign_type_name, + lov_foreign_type[i].lft_name) == 0) + return lov_foreign_type[i].lft_type; + } + + return LOV_FOREIGN_TYPE_UNKNOWN; +} static const char *error_loc = "syserror"; @@ -2559,11 +2579,13 @@ enum { LFS_COMP_ADD_OPT, LFS_COMP_NO_VERIFY_OPT, LFS_PROJID_OPT, - LFS_MIRROR_FLAGS_OPT, + LFS_LAYOUT_FLAGS_OPT, /* used for mirror and foreign flags */ LFS_MIRROR_ID_OPT, LFS_MIRROR_STATE_OPT, LFS_LAYOUT_COPY, LFS_MIRROR_INDEX_OPT, + LFS_LAYOUT_FOREIGN_OPT, + LFS_MODE_OPT, }; /* functions */ @@ -2585,6 +2607,7 @@ static int lfs_setstripe_internal(int argc, char **argv, unsigned long long size_units = 1; bool migrate_mode = false; bool migrate_mdt_mode = false; + bool setstripe_mode = false; bool migration_block = false; __u64 migration_flags = 0; __u32 tgts[LOV_MAX_STRIPE_COUNT] = { 0 }; @@ -2605,6 +2628,12 @@ static int lfs_setstripe_internal(int argc, char **argv, bool from_yaml = false; bool from_copy = false; char *template = NULL; + bool foreign_mode = false; + char *xattr = NULL; + uint32_t type = LOV_FOREIGN_TYPE_NONE, flags = 0; + char *mode_opt = NULL; + mode_t previous_umask = 0; + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; struct option long_opts[] = { /* find { .val = '0', .name = "null", .has_arg = no_argument }, */ @@ -2631,10 +2660,14 @@ static int lfs_setstripe_internal(int argc, char **argv, .has_arg = no_argument}, { .val = LFS_COMP_NO_VERIFY_OPT, .name = "no-verify", .has_arg = no_argument}, - { .val = LFS_MIRROR_FLAGS_OPT, + { .val = LFS_LAYOUT_FLAGS_OPT, .name = "flags", .has_arg = required_argument}, + { .val = LFS_LAYOUT_FOREIGN_OPT, + .name = "foreign", .has_arg = optional_argument}, { .val = LFS_MIRROR_ID_OPT, .name = "mirror-id", .has_arg = required_argument}, + { .val = LFS_MODE_OPT, + .name = "mode", .has_arg = required_argument}, { .val = LFS_LAYOUT_COPY, .name = "copy", .has_arg = required_argument}, { .val = 'c', .name = "stripe-count", .has_arg = required_argument}, @@ -2683,6 +2716,7 @@ static int lfs_setstripe_internal(int argc, char **argv, /* find { .val = 'U', .name = "user", .has_arg = required_argument }*/ /* --verbose is only valid in migrate mode */ { .val = 'v', .name = "verbose", .has_arg = no_argument}, + { .val = 'x', .name = "xattr", .has_arg = required_argument }, { .val = 'y', .name = "yaml", .has_arg = required_argument }, { .name = NULL } }; @@ -2690,12 +2724,13 @@ static int lfs_setstripe_internal(int argc, char **argv, migrate_mode = (opc == SO_MIGRATE); mirror_mode = (opc == SO_MIRROR_CREATE || opc == SO_MIRROR_EXTEND); + setstripe_mode = (opc == SO_SETSTRIPE); snprintf(cmd, sizeof(cmd), "%s %s", progname, argv[0]); progname = cmd; while ((c = getopt_long(argc, argv, - "bc:dDE:f:H:i:I:m:N::no:p:L:s:S:vy:", long_opts, - NULL)) >= 0) { + "bc:dDE:f:H:i:I:m:N::no:p:L:s:S:vx:y:", + long_opts, NULL)) >= 0) { switch (c) { case 0: /* Long options. */ @@ -2740,8 +2775,20 @@ static int lfs_setstripe_internal(int argc, char **argv, goto usage_error; } break; - case LFS_MIRROR_FLAGS_OPT: { - __u32 flags; + case LFS_LAYOUT_FLAGS_OPT: { + uint32_t neg_flags; + + /* check for numeric flags (foreign and mirror cases) */ + if (setstripe_mode && !mirror_mode && !last_mirror) { + flags = strtoul(optarg, &end, 16); + if (*end != '\0') { + fprintf(stderr, + "%s %s: bad flags '%s'\n", + progname, argv[0], optarg); + return CMD_HELP; + } + break; + } if (!mirror_mode || !last_mirror) { fprintf(stderr, "error: %s: --flags must be specified with --mirror-count|-N option\n", @@ -2750,11 +2797,11 @@ static int lfs_setstripe_internal(int argc, char **argv, } result = comp_str2flags(optarg, &last_mirror->m_flags, - &flags); + &neg_flags); if (result != 0) goto usage_error; - if (flags) { + if (neg_flags) { fprintf(stderr, "%s: inverted flags are not supported\n", progname); result = -EINVAL; @@ -2769,6 +2816,37 @@ static int lfs_setstripe_internal(int argc, char **argv, } break; } + case LFS_LAYOUT_FOREIGN_OPT: + if (optarg != NULL) { + /* check pure numeric */ + type = strtoul(optarg, &end, 0); + if (*end) { + /* check name */ + type = lov_check_foreign_type_name(optarg); + if (type == LOV_FOREIGN_TYPE_UNKNOWN) { + fprintf(stderr, + "%s %s: unrecognized foreign type '%s'\n", + progname, argv[0], + optarg); + return CMD_HELP; + } + } + } + foreign_mode = true; + break; + case LFS_MODE_OPT: + mode_opt = optarg; + if (mode_opt != NULL) { + mode = strtoul(mode_opt, &end, 8); + if (*end != '\0') { + fprintf(stderr, + "%s %s: bad MODE '%s'\n", + progname, argv[0], mode_opt); + return CMD_HELP; + } + previous_umask = umask(0); + } + break; case LFS_LAYOUT_COPY: from_copy = true; template = optarg; @@ -3050,6 +3128,9 @@ static int lfs_setstripe_internal(int argc, char **argv, migrate_mdt_param.fp_verbose = VERBOSE_DETAIL; migration_flags = MIGRATION_VERBOSE; break; + case 'x': + xattr = optarg; + break; case 'y': from_yaml = true; template = optarg; @@ -3069,6 +3150,29 @@ static int lfs_setstripe_internal(int argc, char **argv, goto usage_error; } + if (xattr && !foreign_mode) { + /* only print a warning as this is harmless and will be ignored + */ + fprintf(stderr, + "%s %s: xattr has been specified for non-foreign layout\n", + progname, argv[0]); + } else if (foreign_mode && !xattr) { + fprintf(stderr, + "%s %s: xattr must be provided in foreign mode\n", + progname, argv[0]); + goto usage_error; + } + + if (foreign_mode && (!setstripe_mode || comp_add | comp_del || + comp_set || comp_id || delete || from_copy || + setstripe_args_specified(&lsa) || lsa.lsa_nr_tgts || + lsa.lsa_tgts)) { + fprintf(stderr, + "%s %s: only --xattr/--flags/--mode options are valid with --foreign\n", + progname, argv[0]); + return CMD_HELP; + } + if (mirror_mode && mirror_count == 0) { fprintf(stderr, "error: %s: --mirror-count|-N option is required\n", @@ -3364,7 +3468,14 @@ static int lfs_setstripe_internal(int argc, char **argv, NULL); } else if (layout != NULL) { result = lfs_component_create(fname, O_CREAT | O_WRONLY, - 0666, layout); + mode, layout); + if (result >= 0) { + close(result); + result = 0; + } + } else if (foreign_mode) { + result = llapi_file_create_foreign(fname, mode, type, + flags, xattr); if (result >= 0) { close(result); result = 0; @@ -3372,7 +3483,7 @@ static int lfs_setstripe_internal(int argc, char **argv, } else { result = llapi_file_open_param(fname, O_CREAT | O_WRONLY, - 0666, param); + mode, param); if (result >= 0) { close(result); result = 0; @@ -3386,6 +3497,9 @@ static int lfs_setstripe_internal(int argc, char **argv, } } + if (mode_opt != NULL && previous_umask != 0) + umask(previous_umask); + free(param); free(migrate_mdt_param.fp_lmv_md); llapi_layout_free(layout); @@ -3572,6 +3686,8 @@ static int lfs_find(int argc, char **argv) .has_arg = required_argument }, { .val = LFS_MIRROR_STATE_OPT, .name = "mirror-state", .has_arg = required_argument }, + { .val = LFS_LAYOUT_FOREIGN_OPT, + .name = "foreign", .has_arg = optional_argument}, { .val = 'c', .name = "stripe-count", .has_arg = required_argument }, { .val = 'c', .name = "stripe_count", .has_arg = required_argument }, { .val = 'C', .name = "ctime", .has_arg = required_argument }, @@ -3780,6 +3896,30 @@ static int lfs_find(int argc, char **argv) param.fp_mirror_state = state; } break; + case LFS_LAYOUT_FOREIGN_OPT: { + /* all types by default */ + uint32_t type = LOV_FOREIGN_TYPE_UNKNOWN; + + if (optarg != NULL) { + /* check pure numeric */ + type = strtoul(optarg, &endptr, 0); + if (*endptr) { + /* check name */ + type = lov_check_foreign_type_name(optarg); + if (type == LOV_FOREIGN_TYPE_UNKNOWN) { + fprintf(stderr, + "%s %s: unrecognized foreign type '%s'\n", + progname, argv[0], + optarg); + return CMD_HELP; + } + } + } + param.fp_foreign_type = type; + param.fp_check_foreign = 1; + param.fp_exclude_foreign = !!neg_opt; + break; + } case 'c': if (optarg[0] == '+') { param.fp_stripe_count_sign = -1; diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index c5098fd..7063285 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -86,6 +86,14 @@ char *mdt_hash_name[] = { "none", LMV_HASH_NAME_ALL_CHARS, LMV_HASH_NAME_FNV_1A_64 }; +struct lustre_foreign_type lov_foreign_type[] = { + {.lft_type = LOV_FOREIGN_TYPE_NONE, .lft_name = "none"}, + {.lft_type = LOV_FOREIGN_TYPE_DAOS, .lft_name = "daos"}, + /* must be the last element */ + {.lft_type = LOV_FOREIGN_TYPE_UNKNOWN, .lft_name = NULL} + /* array max dimension must be <= UINT32_MAX */ +}; + void llapi_msg_set_level(int level) { /* ensure level is in the good range */ @@ -798,6 +806,76 @@ int llapi_file_open(const char *name, int flags, int mode, stripe_pattern, NULL); } +int llapi_file_create_foreign(const char *name, mode_t mode, __u32 type, + __u32 flags, char *foreign_lov) +{ + size_t len; + struct lov_foreign_md *lfm; + int fd, rc; + + if (foreign_lov == NULL) { + llapi_error(LLAPI_MSG_ERROR, -EINVAL, + "foreign LOV EA content must be provided"); + return -EINVAL; + } + + len = strlen(foreign_lov); + if (len > XATTR_SIZE_MAX - offsetof(struct lov_foreign_md, lfm_value) || + len <= 0) { + llapi_error(LLAPI_MSG_ERROR, -EINVAL, + "foreign LOV EA size %zu (must be 0 < len < %zu)", + len, XATTR_SIZE_MAX - + offsetof(struct lov_foreign_md, lfm_value)); + return -EINVAL; + } + + lfm = malloc(len + offsetof(struct lov_foreign_md, lfm_value)); + if (lfm == NULL) { + llapi_error(LLAPI_MSG_ERROR, -ENOMEM, + "failed to allocate lov_foreign_md"); + return -ENOMEM; + } + + fd = open(name, O_WRONLY|O_CREAT|O_LOV_DELAY_CREATE, mode); + if (fd == -1) { + perror("open()"); + rc = -errno; + goto out_free; + } + + lfm->lfm_magic = LOV_USER_MAGIC_FOREIGN; + lfm->lfm_length = len; + lfm->lfm_type = type; + lfm->lfm_flags = flags; + memcpy(lfm->lfm_value, foreign_lov, len); + + if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, lfm) != 0) { + char *errmsg = "stripe already set"; + char fsname[MAX_OBD_NAME + 1] = { 0 }; + + rc = -errno; + if (errno != EEXIST && errno != EALREADY) + errmsg = strerror(errno); + + llapi_err_noerrno(LLAPI_MSG_ERROR, + "setstripe error for '%s': %s", name, errmsg); + + /* Make sure we are on a Lustre file system */ + if (rc == -ENOTTY && llapi_search_fsname(name, fsname)) + llapi_error(LLAPI_MSG_ERROR, rc, + "'%s' is not on a Lustre filesystem", + name); + + close(fd); + fd = rc; + } + +out_free: + free(lfm); + + return fd; +} + int llapi_file_create(const char *name, unsigned long long stripe_size, int stripe_offset, int stripe_count, int stripe_pattern) { @@ -3519,6 +3597,53 @@ static void lov_dump_plain_user_lmm(struct find_param *param, char *path, } } +static uint32_t lov_check_foreign_type(uint32_t foreign_type) +{ + uint32_t i; + + for (i = 0; i < LOV_FOREIGN_TYPE_UNKNOWN; i++) { + if (lov_foreign_type[i].lft_name == NULL) + break; + if (foreign_type == lov_foreign_type[i].lft_type) + return i; + } + + return LOV_FOREIGN_TYPE_UNKNOWN; +} + +static void lov_dump_foreign_lmm(struct find_param *param, char *path, + enum lov_dump_flags flags) +{ + struct lov_foreign_md *lfm = (void *)¶m->fp_lmd->lmd_lmm; + bool yaml = flags & LDF_YAML; + + if (!yaml && param->fp_depth && path) + llapi_printf(LLAPI_MSG_NORMAL, "%s\n", path); + + if (param->fp_verbose & VERBOSE_DETAIL) { + uint32_t type = lov_check_foreign_type(lfm->lfm_type); + + llapi_printf(LLAPI_MSG_NORMAL, "lfm_magic: 0x%08X\n", + lfm->lfm_magic); + llapi_printf(LLAPI_MSG_NORMAL, "lfm_length: %u\n", + lfm->lfm_length); + llapi_printf(LLAPI_MSG_NORMAL, "lfm_type: 0x%08X", + lfm->lfm_type); + if (type < LOV_FOREIGN_TYPE_UNKNOWN) + llapi_printf(LLAPI_MSG_NORMAL, " (%s)\n", + lov_foreign_type[type].lft_name); + else + llapi_printf(LLAPI_MSG_NORMAL, " (unknown)\n"); + + llapi_printf(LLAPI_MSG_NORMAL, "lfm_flags: 0x%08X\n", + lfm->lfm_flags); + } + + llapi_printf(LLAPI_MSG_NORMAL, "lfm_value: '%.*s'\n", + lfm->lfm_length, lfm->lfm_value); + llapi_printf(LLAPI_MSG_NORMAL, "\n"); +} + static void llapi_lov_dump_user_lmm(struct find_param *param, char *path, enum lov_dump_flags flags) { @@ -3540,6 +3665,9 @@ static void llapi_lov_dump_user_lmm(struct find_param *param, char *path, case LOV_USER_MAGIC_SPECIFIC: lov_dump_plain_user_lmm(param, path, flags); break; + case LOV_USER_MAGIC_FOREIGN: + lov_dump_foreign_lmm(param, path, flags); + break; case LMV_MAGIC_V1: case LMV_USER_MAGIC: { char pool_name[LOV_MAXPOOLNAME + 1]; @@ -3707,6 +3835,10 @@ static int check_obd_match(struct find_param *param) if (!S_ISREG(st->st_mode)) return 0; + /* exclude foreign */ + if (v1->lmm_magic == LOV_USER_MAGIC_FOREIGN) + return param->fp_exclude_obd; + /* Only those files should be accepted, which have a * stripe on the specified OST. */ if (v1->lmm_magic == LOV_USER_MAGIC_COMP_V1) { @@ -3789,6 +3921,9 @@ static int find_check_stripe_size(struct find_param *param) struct lov_user_md_v1 *v1 = ¶m->fp_lmd->lmd_lmm; int ret, i, count = 1; + if (v1->lmm_magic == LOV_USER_MAGIC_FOREIGN) + return param->fp_exclude_stripe_size ? 1 : -1; + if (v1->lmm_magic == LOV_USER_MAGIC_COMP_V1) { comp_v1 = (struct lov_comp_md_v1 *)v1; count = comp_v1->lcm_entry_count; @@ -3818,6 +3953,9 @@ static __u32 find_get_stripe_count(struct find_param *param) int i, count = 1; __u32 stripe_count = 0; + if (v1->lmm_magic == LOV_USER_MAGIC_FOREIGN) + return 0; + if (v1->lmm_magic == LOV_USER_MAGIC_COMP_V1) { comp_v1 = (struct lov_comp_md_v1 *)v1; count = comp_v1->lcm_entry_count; @@ -3850,6 +3988,10 @@ static int find_check_layout(struct find_param *param) if (comp_v1) v1 = lov_comp_entry(comp_v1, i); + /* foreign file have a special magic but no pattern field */ + if (v1->lmm_magic == LOV_USER_MAGIC_FOREIGN) + continue; + if (v1->lmm_pattern == LOV_PATTERN_INVALID) continue; @@ -3870,6 +4012,29 @@ static int find_check_layout(struct find_param *param) return -1; } +/* if no type specified, check/exclude all foreign + * if type specified, check all foreign&type and exclude !foreign + foreign&type + */ +static int find_check_foreign(struct find_param *param) +{ + if (S_ISREG(param->fp_lmd->lmd_st.st_mode)) { + struct lov_foreign_md *lfm; + + lfm = (void *)¶m->fp_lmd->lmd_lmm; + if (lfm->lfm_magic != LOV_USER_MAGIC_FOREIGN) { + if (param->fp_foreign_type == LOV_FOREIGN_TYPE_UNKNOWN) + return param->fp_exclude_foreign ? 1 : -1; + return -1; + } else { + if (param->fp_foreign_type == LOV_FOREIGN_TYPE_UNKNOWN || + lfm->lfm_type == param->fp_foreign_type) + return param->fp_exclude_foreign ? -1 : 1; + return param->fp_exclude_foreign ? 1 : -1; + } + } + return -1; +} + static int find_check_pool(struct find_param *param) { struct lov_comp_md_v1 *comp_v1 = NULL; @@ -3890,6 +4055,9 @@ static int find_check_pool(struct find_param *param) if (comp_v1 != NULL) v1 = lov_comp_entry(comp_v1, i); + if (v1->lmm_magic == LOV_USER_MAGIC_FOREIGN) + continue; + if (((v1->lmm_magic == LOV_USER_MAGIC_V1) && (param->fp_poolname[0] == '\0')) || ((v1->lmm_magic == LOV_USER_MAGIC_V3) && @@ -3917,6 +4085,9 @@ static int find_check_comp_options(struct find_param *param) struct lov_comp_md_entry_v1 *entry; int i, ret = 0; + if (v1->lmm_magic == LOV_USER_MAGIC_FOREIGN) + return -1; + if (v1->lmm_magic == LOV_USER_MAGIC_COMP_V1) { comp_v1 = (struct lov_comp_md_v1 *)v1; } else { @@ -4025,7 +4196,7 @@ static bool find_check_lmm_info(struct find_param *param) param->fp_check_stripe_size || param->fp_check_layout || param->fp_check_comp_count || param->fp_check_comp_end || param->fp_check_comp_start || param->fp_check_comp_flags || - param->fp_check_mirror_count || + param->fp_check_mirror_count || param->fp_check_foreign || param->fp_check_mirror_state || param->fp_check_projid; } @@ -4201,6 +4372,12 @@ static int cb_find_init(char *path, DIR *parent, DIR **dirp, } } + if (param->fp_check_foreign) { + decision = find_check_foreign(param); + if (decision == -1) + goto decided; + } + if (param->fp_check_stripe_size) { decision = find_check_stripe_size(param); if (decision == -1) diff --git a/lustre/utils/liblustreapi.map b/lustre/utils/liblustreapi.map index 45b8927..bdb832a 100644 --- a/lustre/utils/liblustreapi.map +++ b/lustre/utils/liblustreapi.map @@ -6,6 +6,7 @@ liblustreapi_initialized; l_ioctl; mdt_hash_name; + lov_foreign_type; Parser_*; register_ioc_*; local: