X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Fuapi%2Flinux%2Flustre%2Flustre_user.h;h=5c5cd41d7d4a7200ef7bfa267e6210960694fef7;hp=6a2cacb96f94fa806b1ca7a135a2522fcc43f4e9;hb=c1d0a355a6;hpb=361edea4707254f4752ffd8c2db6c77a3ab9539c;ds=sidebyside diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 6a2cacb..5c5cd41 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -23,7 +23,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2010, 2016, Intel Corporation. + * Copyright (c) 2010, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -42,6 +42,7 @@ * @{ */ +#include #include #ifdef __KERNEL__ @@ -51,16 +52,23 @@ # include # include #else /* !__KERNEL__ */ -# define NEED_QUOTA_DEFS # include # include # include /* snprintf() */ +# include # include -# include +# define NEED_QUOTA_DEFS +/* # include - this causes complaints about caddr_t */ # include # include #endif /* __KERNEL__ */ +/* Handle older distros */ +#ifndef __ALIGN_KERNEL +# define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +# define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#endif + #if defined(__cplusplus) extern "C" { #endif @@ -78,66 +86,208 @@ extern "C" { "project", /* PRJQUOTA */ \ "undefined", \ }; +#ifndef USRQUOTA +#define USRQUOTA 0 +#endif +#ifndef GRPQUOTA +#define GRPQUOTA 1 +#endif +#ifndef PRJQUOTA #define PRJQUOTA 2 +#endif -#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \ - defined(__craynv) || defined(__mips64__) || defined(__powerpc64__) || \ - defined(__aarch64__) -typedef struct stat lstat_t; -# define lstat_f lstat -# define fstat_f fstat -# define fstatat_f fstatat -# define HAVE_LOV_USER_MDS_DATA -#elif defined(__USE_LARGEFILE64) || defined(__KERNEL__) -typedef struct stat64 lstat_t; -# define lstat_f lstat64 -# define fstat_f fstat64 -# define fstatat_f fstatat64 -# define HAVE_LOV_USER_MDS_DATA +/* + * We need to always use 64bit version because the structure + * is shared across entire cluster where 32bit and 64bit machines + * are co-existing. + */ +#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64) +typedef struct stat64 lstat_t; +#define lstat_f lstat64 +#define fstat_f fstat64 +#define fstatat_f fstatat64 +#else +typedef struct stat lstat_t; +#define lstat_f lstat +#define fstat_f fstat +#define fstatat_f fstatat #endif +#ifndef STATX_BASIC_STATS +/* + * Timestamp structure for the timestamps in struct statx. + * + * tv_sec holds the number of seconds before (negative) or after (positive) + * 00:00:00 1st January 1970 UTC. + * + * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time. + * + * __reserved is held in case we need a yet finer resolution. + */ +struct statx_timestamp { + __s64 tv_sec; + __u32 tv_nsec; + __s32 __reserved; +}; + +/* + * Structures for the extended file attribute retrieval system call + * (statx()). + * + * The caller passes a mask of what they're specifically interested in as a + * parameter to statx(). What statx() actually got will be indicated in + * st_mask upon return. + * + * For each bit in the mask argument: + * + * - if the datum is not supported: + * + * - the bit will be cleared, and + * + * - the datum will be set to an appropriate fabricated value if one is + * available (eg. CIFS can take a default uid and gid), otherwise + * + * - the field will be cleared; + * + * - otherwise, if explicitly requested: + * + * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is + * set or if the datum is considered out of date, and + * + * - the field will be filled in and the bit will be set; + * + * - otherwise, if not requested, but available in approximate form without any + * effort, it will be filled in anyway, and the bit will be set upon return + * (it might not be up to date, however, and no attempt will be made to + * synchronise the internal state first); + * + * - otherwise the field and the bit will be cleared before returning. + * + * Items in STATX_BASIC_STATS may be marked unavailable on return, but they + * will have values installed for compatibility purposes so that stat() and + * co. can be emulated in userspace. + */ +struct statx { + /* 0x00 */ + __u32 stx_mask; /* What results were written [uncond] */ + __u32 stx_blksize; /* Preferred general I/O size [uncond] */ + __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* 0x10 */ + __u32 stx_nlink; /* Number of hard links */ + __u32 stx_uid; /* User ID of owner */ + __u32 stx_gid; /* Group ID of owner */ + __u16 stx_mode; /* File mode */ + __u16 __spare0[1]; + /* 0x20 */ + __u64 stx_ino; /* Inode number */ + __u64 stx_size; /* File size */ + __u64 stx_blocks; /* Number of 512-byte blocks allocated */ + __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* 0x40 */ + struct statx_timestamp stx_atime; /* Last access time */ + struct statx_timestamp stx_btime; /* File creation time */ + struct statx_timestamp stx_ctime; /* Last attribute change time */ + struct statx_timestamp stx_mtime; /* Last data modification time */ + /* 0x80 */ + __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_minor; + __u32 stx_dev_major; /* ID of device containing file [uncond] */ + __u32 stx_dev_minor; + /* 0x90 */ + __u64 __spare2[14]; /* Spare space for future expansion */ + /* 0x100 */ +}; + +/* + * Flags to be stx_mask + * + * Query request/result mask for statx() and struct statx::stx_mask. + * + * These bits should be set in the mask argument of statx() to request + * particular items when calling statx(). + */ +#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */ +#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */ +#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */ +#define STATX_UID 0x00000008U /* Want/got stx_uid */ +#define STATX_GID 0x00000010U /* Want/got stx_gid */ +#define STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define STATX_INO 0x00000100U /* Want/got stx_ino */ +#define STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_ALL 0x00000fffU /* All currently supported flags */ +#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ + +/* + * Attributes to be found in stx_attributes and masked in stx_attributes_mask. + * + * These give information about the features or the state of a file that might + * be of use to ordinary userspace programs such as GUIs or ls rather than + * specialised tools. + * + * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * semantically. Where possible, the numerical value is picked to correspond + * also. + */ +#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ +#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ +#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ +#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ +#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ + +#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ + +#endif + +typedef struct statx lstatx_t; + +#define HAVE_LOV_USER_MDS_DATA + #define LUSTRE_EOF 0xffffffffffffffffULL /* for statfs() */ #define LL_SUPER_MAGIC 0x0BD00BD0 -#ifndef FSFILT_IOC_GETFLAGS -#define FSFILT_IOC_GETFLAGS _IOR('f', 1, long) -#define FSFILT_IOC_SETFLAGS _IOW('f', 2, long) -#define FSFILT_IOC_GETVERSION _IOR('f', 3, long) -#define FSFILT_IOC_SETVERSION _IOW('f', 4, long) -#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long) -#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long) -#endif +#define FSFILT_IOC_GETVERSION _IOR('f', 3, long) /* FIEMAP flags supported by Lustre */ #define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER) enum obd_statfs_state { - OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */ - OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */ + OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */ + OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */ + OS_STATE_NOPRECREATE = 0x00000004, /**< no object precreation */ + OS_STATE_UNUSED1 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */ + OS_STATE_UNUSED2 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */ OS_STATE_ENOSPC = 0x00000020, /**< not enough free space */ OS_STATE_ENOINO = 0x00000040, /**< not enough inodes */ + OS_STATE_SUM = 0x00000100, /**< aggregated for all tagrets */ + OS_STATE_NONROT = 0x00000200, /**< non-rotational device */ }; +/** filesystem statistics/attributes for target device */ struct obd_statfs { - __u64 os_type; - __u64 os_blocks; - __u64 os_bfree; - __u64 os_bavail; - __u64 os_files; - __u64 os_ffree; - __u8 os_fsid[40]; - __u32 os_bsize; - __u32 os_namelen; - __u64 os_maxbytes; - __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */ - __u32 os_fprecreated; /* objs available now to the caller */ + __u64 os_type; /* EXT4_SUPER_MAGIC, UBERBLOCK_MAGIC */ + __u64 os_blocks; /* total size in #os_bsize blocks */ + __u64 os_bfree; /* number of unused blocks */ + __u64 os_bavail; /* blocks available for allocation */ + __u64 os_files; /* total number of objects */ + __u64 os_ffree; /* # objects that could be created */ + __u8 os_fsid[40]; /* identifier for filesystem */ + __u32 os_bsize; /* block size in bytes for os_blocks */ + __u32 os_namelen; /* maximum length of filename in bytes*/ + __u64 os_maxbytes; /* maximum object size in bytes */ + __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */ + __u32 os_fprecreated; /* objs available now to the caller */ /* used in QoS code to find preferred * OSTs */ - __u32 os_spare2; - __u32 os_spare3; - __u32 os_spare4; + __u32 os_granted; /* space granted for MDS */ + __u32 os_spare3; /* Unused padding fields. Remember */ + __u32 os_spare4; /* to fix lustre_swab_obd_statfs() */ __u32 os_spare5; __u32 os_spare6; __u32 os_spare7; @@ -145,6 +295,13 @@ struct obd_statfs { __u32 os_spare9; }; +/** additional filesystem attributes for target device */ +struct obd_statfs_info { + __u32 os_reserved_mb_low; /* reserved mb low */ + __u32 os_reserved_mb_high; /* reserved mb high */ + bool os_enable_pre; /* enable pre create logic */ +}; + /** * File IDentifier. * @@ -187,15 +344,47 @@ struct ost_layout { __u32 ol_comp_id; } __attribute__((packed)); -/* keep this one for compatibility */ -struct filter_fid_old { - struct lu_fid ff_parent; - __u64 ff_objid; - __u64 ff_seq; +/* The filter_fid structure has changed several times over its lifetime. + * For a long time "trusted.fid" held the MDT inode parent FID/IGIF and + * stripe_index and the "self FID" (objid/seq) to be able to recover the + * OST objects in case of corruption. With the move to 2.4 and OSD-API for + * the OST, the "trusted.lma" xattr was added to the OST objects to store + * the "self FID" to be consistent with the MDT on-disk format, and the + * filter_fid only stored the MDT inode parent FID and stripe index. + * + * In 2.10, the addition of PFL composite layouts required more information + * to be stored into the filter_fid in order to be able to identify which + * component the OST object belonged. As well, the stripe size may vary + * between components, so it was no longer safe to assume the stripe size + * or stripe_count of a file. This is also more robust for plain layouts. + * + * For ldiskfs OSTs that were formatted with 256-byte inodes, there is not + * enough space to store both the filter_fid and LMA in the inode, so they + * are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid + * an extra seek for every OST object access. + * + * In 2.11, FLR mirror layouts also need to store the layout version and + * range so that writes to old versions of the layout are not allowed. + * That ensures that mirrored objects are not modified by evicted clients, + * and ensures that the components are correctly marked stale on the MDT. + */ +struct filter_fid_18_23 { + struct lu_fid ff_parent; /* stripe_idx in f_ver */ + __u64 ff_objid; + __u64 ff_seq; +}; + +struct filter_fid_24_29 { + struct lu_fid ff_parent; /* stripe_idx in f_ver */ +}; + +struct filter_fid_210 { + struct lu_fid ff_parent; /* stripe_idx in f_ver */ + struct ost_layout ff_layout; }; struct filter_fid { - struct lu_fid ff_parent; + struct lu_fid ff_parent; /* stripe_idx in f_ver */ struct ost_layout ff_layout; __u32 ff_layout_version; __u32 ff_range; /* range of layout version that @@ -215,6 +404,7 @@ enum lma_compat { * under /O//d. */ LMAC_STRIPE_INFO = 0x00000010, /* stripe info in the LMA EA. */ LMAC_COMP_INFO = 0x00000020, /* Component info in the LMA EA. */ + LMAC_IDX_BACKUP = 0x00000040, /* Has index backup. */ }; /** @@ -281,8 +471,17 @@ struct lustre_ost_attrs { */ #define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64)) -enum { - LSOM_FL_VALID = 1 << 0, +enum lustre_som_flags { + /* Unknow or no SoM data, must get size from OSTs. */ + SOM_FL_UNKNOWN = 0x0000, + /* Known strictly correct, FLR or DoM file (SoM guaranteed). */ + SOM_FL_STRICT = 0x0001, + /* Known stale - was right at some point in the past, but it is + * known (or likely) to be incorrect now (e.g. opened for write). */ + SOM_FL_STALE = 0x0002, + /* Approximate, may never have been strictly correct, + * need to sync SOM data to achieve eventual consistency. */ + SOM_FL_LAZY = 0x0004, }; struct lustre_som_attrs { @@ -333,6 +532,9 @@ enum ll_lease_mode { enum ll_lease_flags { LL_LEASE_RESYNC = 0x1, LL_LEASE_RESYNC_DONE = 0x2, + LL_LEASE_LAYOUT_MERGE = 0x4, + LL_LEASE_LAYOUT_SPLIT = 0x8, + LL_LEASE_PCC_ATTACH = 0x10, }; #define IOC_IDS_MAX 4096 @@ -343,6 +545,16 @@ struct ll_ioc_lease { __u32 lil_ids[0]; }; +struct ll_ioc_lease_id { + __u32 lil_mode; + __u32 lil_flags; + __u32 lil_count; + __u16 lil_mirror_id; + __u16 lil_padding1; + __u64 lil_padding2; + __u32 lil_ids[0]; +}; + /* * The ioctl naming rules: * LL_* - works on the currently opened filehandle instead of parent dir @@ -400,6 +612,7 @@ struct ll_ioc_lease { #define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md) #define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md) #define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64) +#define LL_IOC_RMFID _IOR('f', 242, struct fid_array) #define LL_IOC_SET_LEASE _IOWR('f', 243, struct ll_ioc_lease) #define LL_IOC_SET_LEASE_OLD _IOWR('f', 243, long) #define LL_IOC_GET_LEASE _IO('f', 244) @@ -409,6 +622,11 @@ struct ll_ioc_lease { #define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid) #define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent) #define LL_IOC_LADVISE _IOR('f', 250, struct llapi_lu_ladvise) +#define LL_IOC_HEAT_GET _IOWR('f', 251, struct lu_heat) +#define LL_IOC_HEAT_SET _IOW('f', 251, __u64) +#define LL_IOC_PCC_DETACH _IOW('f', 252, struct lu_pcc_detach) +#define LL_IOC_PCC_DETACH_BY_FID _IOW('f', 252, struct lu_pcc_detach_fid) +#define LL_IOC_PCC_STATE _IOR('f', 252, struct lu_pcc_state) #ifndef FS_IOC_FSGETXATTR /* @@ -426,17 +644,24 @@ struct fsxattr { #endif #define LL_IOC_FSGETXATTR FS_IOC_FSGETXATTR #define LL_IOC_FSSETXATTR FS_IOC_FSSETXATTR +#ifndef FS_XFLAG_PROJINHERIT +#define FS_XFLAG_PROJINHERIT 0x00000200 +#endif #define LL_STATFS_LMV 1 #define LL_STATFS_LOV 2 #define LL_STATFS_NODELAY 4 -#define IOC_MDC_TYPE 'i' -#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) -#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *) -#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *) -#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *) +#define IOC_MDC_TYPE 'i' +#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) +#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *) +#ifdef HAVE_LOV_USER_MDS_DATA +#define IOC_MDC_GETFILEINFO_OLD _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data_v1 *) +#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data) +#define LL_IOC_MDC_GETINFO_OLD _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data_v1 *) +#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data) +#endif #define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */ @@ -447,6 +672,9 @@ struct fsxattr { /* To be compatible with old statically linked binary we keep the check for * the older 0100000000 flag. This is already removed upstream. LU-812. */ #define O_LOV_DELAY_CREATE_1_8 0100000000 /* FMODE_NONOTIFY masked in 2.6.36 */ +#ifndef FASYNC +#define FASYNC 00020000 /* fcntl, for BSD compatibility */ +#endif #define O_LOV_DELAY_CREATE_MASK (O_NOCTTY | FASYNC) #define O_LOV_DELAY_CREATE (O_LOV_DELAY_CREATE_1_8 | \ O_LOV_DELAY_CREATE_MASK) @@ -456,6 +684,7 @@ struct fsxattr { #define LL_FILE_READAHEA 0x00000004 #define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */ #define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */ +#define LL_FILE_FLOCK_WARNING 0x00000020 /* warned about disabled flock */ #define LOV_USER_MAGIC_V1 0x0BD10BD0 #define LOV_USER_MAGIC LOV_USER_MAGIC_V1 @@ -464,26 +693,44 @@ struct fsxattr { /* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */ #define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */ #define LOV_USER_MAGIC_COMP_V1 0x0BD60BD0 +#define LOV_USER_MAGIC_FOREIGN 0x0BD70BD0 +#define LOV_USER_MAGIC_SEL 0x0BD80BD0 #define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */ #define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/ +#define LMV_USER_MAGIC_SPECIFIC 0x0CD40CD0 -#define LOV_PATTERN_NONE 0x000 -#define LOV_PATTERN_RAID0 0x001 -#define LOV_PATTERN_RAID1 0x002 -#define LOV_PATTERN_MDT 0x100 -#define LOV_PATTERN_CMOBD 0x200 +#define LOV_PATTERN_NONE 0x000 +#define LOV_PATTERN_RAID0 0x001 +#define LOV_PATTERN_RAID1 0x002 +#define LOV_PATTERN_MDT 0x100 +#define LOV_PATTERN_OVERSTRIPING 0x200 #define LOV_PATTERN_F_MASK 0xffff0000 #define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */ #define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */ #define LOV_PATTERN_DEFAULT 0xffffffff +#define LOV_OFFSET_DEFAULT ((__u16)-1) +#define LMV_OFFSET_DEFAULT ((__u32)-1) + static inline bool lov_pattern_supported(__u32 pattern) { + return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 || + (pattern & ~LOV_PATTERN_F_RELEASED) == + (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) || + (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_MDT; +} + +/* RELEASED and MDT patterns are not valid in many places, so rather than + * having many extra checks on lov_pattern_supported, we have this separate + * check for non-released, non-DOM components + */ +static inline bool lov_pattern_supported_normal_comp(__u32 pattern) +{ return pattern == LOV_PATTERN_RAID0 || - pattern == LOV_PATTERN_MDT || - pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_F_RELEASED); + pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING); + } #define LOV_MAXPOOLNAME 15 @@ -501,7 +748,7 @@ static inline bool lov_pattern_supported(__u32 pattern) * allocation that is sufficient for the current generation of systems. * * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) */ -#define LOV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ +#define LOV_MAX_STRIPE_COUNT 2000 /* ~((12 * 4096 - 256) / 24) */ #define LOV_ALL_STRIPES 0xffff /* only valid for directories */ #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */ @@ -547,13 +794,34 @@ struct lov_user_md_v3 { /* LOV EA user data (host-endian) */ struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ } __attribute__((packed)); +struct lov_foreign_md { + __u32 lfm_magic; /* magic number = LOV_MAGIC_FOREIGN */ + __u32 lfm_length; /* length of lfm_value */ + __u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */ + __u32 lfm_flags; /* flags, type specific */ + char lfm_value[]; +}; + +#define foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \ + offsetof(struct lov_foreign_md, lfm_value)) + +#define foreign_size_le(lfm) \ + (le32_to_cpu(((struct lov_foreign_md *)lfm)->lfm_length) + \ + offsetof(struct lov_foreign_md, lfm_value)) + +/** + * The stripe size fields are shared for the extension size storage, however + * the extension size is stored in KB, not bytes. + */ +#define SEL_UNIT_SIZE 1024llu + struct lu_extent { __u64 e_start; __u64 e_end; }; #define DEXT "[%#llx, %#llx)" -#define PEXT(ext) (ext)->e_start, (ext)->e_end +#define PEXT(ext) (unsigned long long)(ext)->e_start, (unsigned long long)(ext)->e_end static inline bool lu_extent_is_overlapped(struct lu_extent *e1, struct lu_extent *e2) @@ -567,16 +835,41 @@ static inline bool lu_extent_is_whole(struct lu_extent *e) } enum lov_comp_md_entry_flags { - LCME_FL_PRIMARY = 0x00000001, /* Not used */ - LCME_FL_STALE = 0x00000002, /* Not used */ - LCME_FL_OFFLINE = 0x00000004, /* Not used */ - LCME_FL_PREFERRED = 0x00000008, /* Not used */ - LCME_FL_INIT = 0x00000010, /* instantiated */ - LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag, - won't be stored on disk */ -}; - -#define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT) + LCME_FL_STALE = 0x00000001, /* FLR: stale data */ + LCME_FL_PREF_RD = 0x00000002, /* FLR: preferred for reading */ + LCME_FL_PREF_WR = 0x00000004, /* FLR: preferred for writing */ + LCME_FL_PREF_RW = LCME_FL_PREF_RD | LCME_FL_PREF_WR, + LCME_FL_OFFLINE = 0x00000008, /* Not used */ + LCME_FL_INIT = 0x00000010, /* instantiated */ + LCME_FL_NOSYNC = 0x00000020, /* FLR: no sync for the mirror */ + LCME_FL_EXTENSION = 0x00000040, /* extension comp, never init */ + LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag, + * won't be stored on disk + */ +}; + +#define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT | LCME_FL_STALE | \ + LCME_FL_PREF_RW | LCME_FL_NOSYNC | \ + LCME_FL_EXTENSION) + +/* The component flags can be set by users at creation/modification time. */ +#define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \ + LCME_FL_EXTENSION) + +/* The mirror flags can be set by users at creation time. */ +#define LCME_USER_MIRROR_FLAGS (LCME_FL_PREF_RW) + +/* The allowed flags obtained from the client at component creation time. */ +#define LCME_CL_COMP_FLAGS (LCME_USER_MIRROR_FLAGS | LCME_FL_EXTENSION) + +/* The mirror flags sent by client */ +#define LCME_MIRROR_FLAGS (LCME_FL_NOSYNC) + +/* These flags have meaning when set in a default layout and will be inherited + * from the default/template layout set on a directory. + */ +#define LCME_TEMPLATE_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \ + LCME_FL_EXTENSION) /* the highest bit in obdo::o_layout_version is used to mark if the file is * being resynced. */ @@ -602,13 +895,16 @@ struct lov_comp_md_entry_v1 { __u32 lcme_offset; /* offset of component blob, start from lov_comp_md_v1 */ __u32 lcme_size; /* size of component blob */ - __u64 lcme_padding[2]; + __u32 lcme_layout_gen; + __u64 lcme_timestamp; /* snapshot time if applicable*/ + __u32 lcme_padding_1; } __attribute__((packed)); #define SEQ_ID_MAX 0x0000FFFF #define SEQ_ID_MASK SEQ_ID_MAX /* bit 30:16 of lcme_id is used to store mirror id */ #define MIRROR_ID_MASK 0x7FFF0000 +#define MIRROR_ID_NEG 0x8000 #define MIRROR_ID_SHIFT 16 static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid) @@ -626,7 +922,7 @@ static inline __u16 mirror_id_of(__u32 id) */ enum lov_comp_md_flags { /* the least 2 bits are used by FLR to record file state */ - LCM_FL_NOT_FLR = 0, + LCM_FL_NONE = 0, LCM_FL_RDONLY = 1, LCM_FL_WRITE_PENDING = 2, LCM_FL_SYNC_PENDING = 3, @@ -647,11 +943,6 @@ struct lov_comp_md_v1 { struct lov_comp_md_entry_v1 lcm_entries[0]; } __attribute__((packed)); -/* - * Maximum number of mirrors Lustre can support. - */ -#define LUSTRE_MIRROR_COUNT_MAX 16 - static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic) { if (stripes == (__u16)-1) @@ -668,15 +959,19 @@ static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic) * use this. It is unsafe to #define those values in this header as it * is possible the application has already #included . */ #ifdef HAVE_LOV_USER_MDS_DATA -#define lov_user_mds_data lov_user_mds_data_v1 +#define lov_user_mds_data lov_user_mds_data_v2 struct lov_user_mds_data_v1 { lstat_t lmd_st; /* MDS stat struct */ struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */ } __attribute__((packed)); -struct lov_user_mds_data_v3 { - lstat_t lmd_st; /* MDS stat struct */ - struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */ +struct lov_user_mds_data_v2 { + struct lu_fid lmd_fid; /* Lustre FID */ + lstatx_t lmd_stx; /* MDS statx struct */ + __u64 lmd_flags; /* MDS stat flags */ + __u32 lmd_lmmsize; /* LOV EA size */ + __u32 lmd_padding; /* unused */ + struct lov_user_md_v1 lmd_lmm; /* LOV EA user data */ } __attribute__((packed)); #endif @@ -693,11 +988,53 @@ enum lmv_hash_type { LMV_HASH_TYPE_MAX, }; +#define LMV_HASH_TYPE_DEFAULT LMV_HASH_TYPE_FNV_1A_64 + #define LMV_HASH_NAME_ALL_CHARS "all_char" #define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64" +/* not real hash type, but exposed to user as "space" hash type */ +#define LMV_HASH_NAME_SPACE "space" + +/* Right now only the lower part(0-16bits) of lmv_hash_type is being used, + * and the higher part will be the flag to indicate the status of object, + * for example the object is being migrated. And the hash function + * might be interpreted differently with different flags. */ +#define LMV_HASH_TYPE_MASK 0x0000ffff + +static inline bool lmv_is_known_hash_type(__u32 type) +{ + return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 || + (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS; +} + +/* The striped directory has ever lost its master LMV EA, then LFSCK + * re-generated it. This flag is used to indicate such case. It is an + * on-disk flag. */ +#define LMV_HASH_FLAG_LOST_LMV 0x10000000 + +#define LMV_HASH_FLAG_BAD_TYPE 0x20000000 +#define LMV_HASH_FLAG_MIGRATION 0x80000000 + extern char *mdt_hash_name[LMV_HASH_TYPE_MAX]; +struct lustre_foreign_type { + uint32_t lft_type; + const char *lft_name; +}; + +/** + * LOV/LMV foreign types + **/ +enum lustre_foreign_types { + LU_FOREIGN_TYPE_NONE = 0, + LU_FOREIGN_TYPE_DAOS = 0xda05, + /* must be the max/last one */ + LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff, +}; + +extern struct lustre_foreign_type lu_foreign_types[]; + /* Got this according to how get LOV_MAX_STRIPE_COUNT, see above, * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data) */ #define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ @@ -707,7 +1044,7 @@ struct lmv_user_md_v1 { __u32 lum_stripe_count; /* dirstripe count */ __u32 lum_stripe_offset; /* MDT idx for default dirstripe */ __u32 lum_hash_type; /* Dir stripe policy */ - __u32 lum_type; /* LMV type: default or normal */ + __u32 lum_type; /* LMV type: default */ __u32 lum_padding1; __u32 lum_padding2; __u32 lum_padding3; @@ -715,10 +1052,33 @@ struct lmv_user_md_v1 { struct lmv_user_mds_data lum_objects[0]; } __attribute__((packed)); +static inline __u32 lmv_foreign_to_md_stripes(__u32 size) +{ + if (size <= sizeof(struct lmv_user_md)) + return 0; + + size -= sizeof(struct lmv_user_md); + return (size + sizeof(struct lmv_user_mds_data) - 1) / + sizeof(struct lmv_user_mds_data); +} + +/* + * NB, historically default layout didn't set type, but use XATTR name to differ + * from normal layout, for backward compatibility, define LMV_TYPE_DEFAULT 0x0, + * and still use the same method. + */ +enum lmv_type { + LMV_TYPE_DEFAULT = 0x0000, +}; + static inline int lmv_user_md_size(int stripes, int lmm_magic) { - return sizeof(struct lmv_user_md) + - stripes * sizeof(struct lmv_user_mds_data); + int size = sizeof(struct lmv_user_md); + + if (lmm_magic == LMV_USER_MAGIC_SPECIFIC) + size += stripes * sizeof(struct lmv_user_mds_data); + + return size; } struct ll_recreate_obj { @@ -793,14 +1153,14 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen) #define FID_NOBRACE_LEN 40 #define FID_LEN (FID_NOBRACE_LEN + 2) #define DFID_NOBRACE "%#llx:0x%x:0x%x" -#define DFID "["DFID_NOBRACE"]" +#define DFID "[" DFID_NOBRACE "]" #define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver /* scanf input parse format for fids in DFID_NOBRACE format * Need to strip '[' from DFID format first or use "["SFID"]" at caller. * usage: sscanf(fidstr, SFID, RFID(&fid)); */ #define SFID "0x%llx:0x%x:0x%x" -#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver) +#define RFID(fid) (unsigned long long *)&((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver) /********* Quotas **********/ @@ -828,9 +1188,31 @@ static inline __u64 lustre_stoqb(size_t space) /* lustre-specific control commands */ #define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */ #define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */ +#define LUSTRE_Q_GETDEFAULT 0x80000d /* get default quota */ +#define LUSTRE_Q_SETDEFAULT 0x80000e /* set default quota */ + +/* In the current Lustre implementation, the grace time is either the time + * or the timestamp to be used after some quota ID exceeds the soft limt, + * 48 bits should be enough, its high 16 bits can be used as quota flags. + * */ +#define LQUOTA_GRACE_BITS 48 +#define LQUOTA_GRACE_MASK ((1ULL << LQUOTA_GRACE_BITS) - 1) +#define LQUOTA_GRACE_MAX LQUOTA_GRACE_MASK +#define LQUOTA_GRACE(t) (t & LQUOTA_GRACE_MASK) +#define LQUOTA_FLAG(t) (t >> LQUOTA_GRACE_BITS) +#define LQUOTA_GRACE_FLAG(t, f) ((__u64)t | (__u64)f << LQUOTA_GRACE_BITS) + +/* different quota flags */ + +/* the default quota flag, the corresponding quota ID will use the default + * quota setting, the hardlimit and softlimit of its quota record in the global + * quota file will be set to 0, the low 48 bits of the grace will be set to 0 + * and high 16 bits will contain this flag (see above comment). + * */ +#define LQUOTA_FLAG_DEFAULT 0x0001 #define ALLQUOTA 255 /* set all quota */ -static inline char *qtype_name(int qtype) +static inline const char *qtype_name(int qtype) { switch (qtype) { case USRQUOTA: @@ -844,6 +1226,7 @@ static inline char *qtype_name(int qtype) } #define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629 +#define SEPOL_DOWNCALL_MAGIC 0x8b8bb842 /* permission */ #define N_PERMS_MAX 64 @@ -865,6 +1248,13 @@ struct identity_downcall_data { __u32 idd_groups[0]; }; +struct sepol_downcall_data { + __u32 sdd_magic; + time_t sdd_sepol_mtime; + __u16 sdd_sepol_len; + char sdd_sepol[0]; +}; + #ifdef NEED_QUOTA_DEFS #ifndef QIF_BLIMITS #define QIF_BLIMITS 1 @@ -882,14 +1272,14 @@ struct identity_downcall_data { #endif /* !__KERNEL__ */ /* lustre volatile file support - * file name header: .^L^S^T^R:volatile" + * file name header: ".^L^S^T^R:volatile" */ #define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE" #define LUSTRE_VOLATILE_HDR_LEN 14 -typedef enum lustre_quota_version { +enum lustre_quota_version { LUSTRE_QUOTA_V2 = 1 -} lustre_quota_version_t; +}; /* XXX: same as if_dqinfo struct in kernel */ struct obd_dqinfo { @@ -939,8 +1329,6 @@ struct if_quotactl { #define SWAP_LAYOUTS_KEEP_MTIME (1 << 2) #define SWAP_LAYOUTS_KEEP_ATIME (1 << 3) #define SWAP_LAYOUTS_CLOSE (1 << 4) -#define MERGE_LAYOUTS_CLOSE (1 << 5) -#define INTENT_LAYOUTS_CLOSE (SWAP_LAYOUTS_CLOSE | MERGE_LAYOUTS_CLOSE) /* Swap XATTR_NAME_HSM as well, only on the MDT so far */ #define SWAP_LAYOUTS_MDS_HSM (1 << 31) @@ -952,10 +1340,95 @@ struct lustre_swap_layouts { __u64 sl_dv2; }; +/** Bit-mask of valid attributes */ +/* The LA_* flags are written to disk as part of the ChangeLog records + * so they are part of the on-disk and network protocol, and cannot be changed. + * Only the first 12 bits are currently saved. + */ +enum la_valid { + LA_ATIME = 1 << 0, /* 0x00001 */ + LA_MTIME = 1 << 1, /* 0x00002 */ + LA_CTIME = 1 << 2, /* 0x00004 */ + LA_SIZE = 1 << 3, /* 0x00008 */ + LA_MODE = 1 << 4, /* 0x00010 */ + LA_UID = 1 << 5, /* 0x00020 */ + LA_GID = 1 << 6, /* 0x00040 */ + LA_BLOCKS = 1 << 7, /* 0x00080 */ + LA_TYPE = 1 << 8, /* 0x00100 */ + LA_FLAGS = 1 << 9, /* 0x00200 */ + LA_NLINK = 1 << 10, /* 0x00400 */ + LA_RDEV = 1 << 11, /* 0x00800 */ + LA_BLKSIZE = 1 << 12, /* 0x01000 */ + LA_KILL_SUID = 1 << 13, /* 0x02000 */ + LA_KILL_SGID = 1 << 14, /* 0x04000 */ + LA_PROJID = 1 << 15, /* 0x08000 */ + LA_LAYOUT_VERSION = 1 << 16, /* 0x10000 */ + LA_LSIZE = 1 << 17, /* 0x20000 */ + LA_LBLOCKS = 1 << 18, /* 0x40000 */ + /** + * Attributes must be transmitted to OST objects + */ + LA_REMOTE_ATTR_SET = (LA_UID | LA_GID | LA_PROJID | LA_LAYOUT_VERSION) +}; + +#define MDS_FMODE_READ 00000001 +#define MDS_FMODE_WRITE 00000002 + +#define MDS_FMODE_CLOSED 00000000 +#define MDS_FMODE_EXEC 00000004 +/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */ +/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */ +/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */ + +#define MDS_OPEN_CREATED 00000010 +/* MDS_OPEN_CROSS 00000020 obsolete in 2.12, internal use only */ + +#define MDS_OPEN_CREAT 00000100 +#define MDS_OPEN_EXCL 00000200 +#define MDS_OPEN_TRUNC 00001000 +#define MDS_OPEN_APPEND 00002000 +#define MDS_OPEN_SYNC 00010000 +#define MDS_OPEN_DIRECTORY 00200000 + +#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */ +#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ +#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */ +#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file. + * We do not support JOIN FILE + * anymore, reserve this flags + * just for preventing such bit + * to be reused. */ + +#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */ +#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ +#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ +#define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */ +#define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or + * hsm restore) */ +#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created + unlinked */ +#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease + * delegation, succeed if it's not + * being opened with conflict mode. + */ +#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */ + +#define MDS_OPEN_RESYNC 04000000000000ULL /* FLR: file resync */ +#define MDS_OPEN_PCC 010000000000000ULL /* PCC: auto RW-PCC cache attach + * for newly created file */ + +/* lustre internal open flags, which should not be set from user space */ +#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \ + MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \ + MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \ + MDS_OPEN_RELEASE | MDS_OPEN_RESYNC | \ + MDS_OPEN_PCC) + /********* Changelogs **********/ /** Changelog record types */ enum changelog_rec_type { + CL_NONE = -1, CL_MARK = 0, CL_CREATE = 1, /* namespace */ CL_MKDIR = 2, /* namespace */ @@ -971,7 +1444,8 @@ enum changelog_rec_type { CL_LAYOUT = 12, /* file layout/striping modified */ CL_TRUNC = 13, CL_SETATTR = 14, - CL_XATTR = 15, + CL_SETXATTR = 15, + CL_XATTR = CL_SETXATTR, /* Deprecated name */ CL_HSM = 16, /* HSM specific events, see flags */ CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */ CL_CTIME = 18, @@ -979,6 +1453,8 @@ enum changelog_rec_type { CL_MIGRATE = 20, CL_FLRW = 21, /* FLR: file was firstly written */ CL_RESYNC = 22, /* FLR: file was resync-ed */ + CL_GETXATTR = 23, + CL_DN_OPEN = 24, /* denied open */ CL_LAST }; @@ -987,7 +1463,7 @@ static inline const char *changelog_type2str(int type) { "MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK", "RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC", "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT", - "FLRW", "RESYNC", + "FLRW", "RESYNC","GXATR", "NOPEN", }; if (type >= 0 && type < CL_LAST) @@ -995,16 +1471,17 @@ static inline const char *changelog_type2str(int type) { return NULL; } -/* per-record flags */ +/* 12 bits of per-record data can be stored in the bottom of the flags */ #define CLF_FLAGSHIFT 12 -#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1) -#define CLF_VERMASK (~CLF_FLAGMASK) enum changelog_rec_flags { CLF_VERSION = 0x1000, CLF_RENAME = 0x2000, CLF_JOBID = 0x4000, CLF_EXTRA_FLAGS = 0x8000, - CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS + CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID | + CLF_EXTRA_FLAGS, + CLF_FLAGMASK = (1U << CLF_FLAGSHIFT) - 1, + CLF_VERMASK = ~CLF_FLAGMASK, }; @@ -1064,35 +1541,44 @@ static inline enum hsm_event hsm_get_cl_event(__u16 flags) CLF_HSM_EVENT_L); } -static inline void hsm_set_cl_event(int *flags, enum hsm_event he) +static inline void hsm_set_cl_event(enum changelog_rec_flags *clf_flags, + enum hsm_event he) { - *flags |= (he << CLF_HSM_EVENT_L); + *clf_flags = (enum changelog_rec_flags) + (*clf_flags | (he << CLF_HSM_EVENT_L)); } -static inline __u16 hsm_get_cl_flags(int flags) +static inline __u16 hsm_get_cl_flags(enum changelog_rec_flags clf_flags) { - return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L); + return CLF_GET_BITS(clf_flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L); } -static inline void hsm_set_cl_flags(int *flags, int bits) +static inline void hsm_set_cl_flags(enum changelog_rec_flags *clf_flags, + unsigned int bits) { - *flags |= (bits << CLF_HSM_FLAG_L); + *clf_flags = (enum changelog_rec_flags) + (*clf_flags | (bits << CLF_HSM_FLAG_L)); } -static inline int hsm_get_cl_error(int flags) +static inline int hsm_get_cl_error(enum changelog_rec_flags clf_flags) { - return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L); + return CLF_GET_BITS(clf_flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L); } -static inline void hsm_set_cl_error(int *flags, int error) +static inline void hsm_set_cl_error(enum changelog_rec_flags *clf_flags, + unsigned int error) { - *flags |= (error << CLF_HSM_ERR_L); + *clf_flags = (enum changelog_rec_flags) + (*clf_flags | (error << CLF_HSM_ERR_L)); } enum changelog_rec_extra_flags { CLFE_INVALID = 0, CLFE_UIDGID = 0x0001, - CLFE_SUPPORTED = CLFE_UIDGID + CLFE_NID = 0x0002, + CLFE_OPEN = 0x0004, + CLFE_XATTR = 0x0008, + CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR }; enum changelog_send_flag { @@ -1111,11 +1597,17 @@ enum changelog_send_flag { enum changelog_send_extra_flag { /* Pack uid/gid into the changelog record */ CHANGELOG_EXTRA_FLAG_UIDGID = 0x01, + /* Pack nid into the changelog record */ + CHANGELOG_EXTRA_FLAG_NID = 0x02, + /* Pack open mode into the changelog record */ + CHANGELOG_EXTRA_FLAG_OMODE = 0x04, + /* Pack xattr name into the changelog record */ + CHANGELOG_EXTRA_FLAG_XATTR = 0x08, }; -#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \ +#define CR_MAXSIZE __ALIGN_KERNEL(2 * NAME_MAX + 2 + \ changelog_rec_offset(CLF_SUPPORTED, \ - CLFE_SUPPORTED)) + CLFE_SUPPORTED), 8) /* 31 usable bytes string + null terminator. */ #define LUSTRE_JOBID_SIZE 32 @@ -1163,6 +1655,25 @@ struct changelog_ext_uidgid { __u64 cr_gid; }; +/* Changelog extra extension to include NID. */ +struct changelog_ext_nid { + /* have __u64 instead of lnet_nid_t type for use by client api */ + __u64 cr_nid; + /* for use when IPv6 support is added */ + __u64 extra; + __u32 padding; +}; + +/* Changelog extra extension to include low 32 bits of MDS_OPEN_* flags. */ +struct changelog_ext_openmode { + __u32 cr_openflags; +}; + +/* Changelog extra extension to include xattr */ +struct changelog_ext_xattr { + char cr_xattr[XATTR_NAME_MAX + 1]; /**< zero-terminated string. */ +}; + static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags( const struct changelog_rec *rec); @@ -1181,6 +1692,12 @@ static inline size_t changelog_rec_offset(enum changelog_rec_flags crf, size += sizeof(struct changelog_ext_extra_flags); if (cref & CLFE_UIDGID) size += sizeof(struct changelog_ext_uidgid); + if (cref & CLFE_NID) + size += sizeof(struct changelog_ext_nid); + if (cref & CLFE_OPEN) + size += sizeof(struct changelog_ext_openmode); + if (cref & CLFE_XATTR) + size += sizeof(struct changelog_ext_xattr); } return size; @@ -1191,9 +1708,11 @@ static inline size_t changelog_rec_size(const struct changelog_rec *rec) enum changelog_rec_extra_flags cref = CLFE_INVALID; if (rec->cr_flags & CLF_EXTRA_FLAGS) - cref = changelog_rec_extra_flags(rec)->cr_extra_flags; + cref = (enum changelog_rec_extra_flags) + changelog_rec_extra_flags(rec)->cr_extra_flags; - return changelog_rec_offset(rec->cr_flags, cref); + return changelog_rec_offset( + (enum changelog_rec_flags)rec->cr_flags, cref); } static inline size_t changelog_rec_varsize(const struct changelog_rec *rec) @@ -1204,7 +1723,8 @@ static inline size_t changelog_rec_varsize(const struct changelog_rec *rec) static inline struct changelog_ext_rename *changelog_rec_rename(const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION; + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & CLF_VERSION); return (struct changelog_ext_rename *)((char *)rec + changelog_rec_offset(crf, @@ -1215,8 +1735,8 @@ struct changelog_ext_rename *changelog_rec_rename(const struct changelog_rec *re static inline struct changelog_ext_jobid *changelog_rec_jobid(const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME); + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & (CLF_VERSION | CLF_RENAME)); return (struct changelog_ext_jobid *)((char *)rec + changelog_rec_offset(crf, @@ -1228,8 +1748,8 @@ static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags( const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME | CLF_JOBID); + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & (CLF_VERSION | CLF_RENAME | CLF_JOBID)); return (struct changelog_ext_extra_flags *)((char *)rec + changelog_rec_offset(crf, @@ -1241,24 +1761,84 @@ static inline struct changelog_ext_uidgid *changelog_rec_uidgid( const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS); + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & + (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS)); return (struct changelog_ext_uidgid *)((char *)rec + changelog_rec_offset(crf, CLFE_INVALID)); } +/* The nid is the second extra extension */ +static inline +struct changelog_ext_nid *changelog_rec_nid(const struct changelog_rec *rec) +{ + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & + (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS)); + enum changelog_rec_extra_flags cref = CLFE_INVALID; + + if (rec->cr_flags & CLF_EXTRA_FLAGS) + cref = (enum changelog_rec_extra_flags) + (changelog_rec_extra_flags(rec)->cr_extra_flags & + CLFE_UIDGID); + + return (struct changelog_ext_nid *)((char *)rec + + changelog_rec_offset(crf, cref)); +} + +/* The OPEN mode is the third extra extension */ +static inline +struct changelog_ext_openmode *changelog_rec_openmode( + const struct changelog_rec *rec) +{ + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & + (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS)); + enum changelog_rec_extra_flags cref = CLFE_INVALID; + + if (rec->cr_flags & CLF_EXTRA_FLAGS) { + cref = (enum changelog_rec_extra_flags) + (changelog_rec_extra_flags(rec)->cr_extra_flags & + (CLFE_UIDGID | CLFE_NID)); + } + + return (struct changelog_ext_openmode *)((char *)rec + + changelog_rec_offset(crf, cref)); +} + +/* The xattr name is the fourth extra extension */ +static inline +struct changelog_ext_xattr *changelog_rec_xattr( + const struct changelog_rec *rec) +{ + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & + (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS)); + enum changelog_rec_extra_flags cref = CLFE_INVALID; + + if (rec->cr_flags & CLF_EXTRA_FLAGS) + cref = (enum changelog_rec_extra_flags) + (changelog_rec_extra_flags(rec)->cr_extra_flags & + (CLFE_UIDGID | CLFE_NID | CLFE_OPEN)); + + return (struct changelog_ext_xattr *)((char *)rec + + changelog_rec_offset(crf, cref)); +} + /* The name follows the rename, jobid and extra flags extns, if present */ static inline char *changelog_rec_name(const struct changelog_rec *rec) { enum changelog_rec_extra_flags cref = CLFE_INVALID; if (rec->cr_flags & CLF_EXTRA_FLAGS) - cref = changelog_rec_extra_flags(rec)->cr_extra_flags; + cref = (enum changelog_rec_extra_flags) + changelog_rec_extra_flags(rec)->cr_extra_flags; - return (char *)rec + changelog_rec_offset(rec->cr_flags & CLF_SUPPORTED, - cref & CLFE_SUPPORTED); + return (char *)rec + changelog_rec_offset( + (enum changelog_rec_flags)(rec->cr_flags & CLF_SUPPORTED), + (enum changelog_rec_extra_flags)(cref & CLFE_SUPPORTED)); } static inline size_t changelog_rec_snamelen(const struct changelog_rec *rec) @@ -1298,14 +1878,19 @@ static inline void changelog_remap_rec(struct changelog_rec *rec, enum changelog_rec_flags crf_wanted, enum changelog_rec_extra_flags cref_want) { + char *xattr_mov = NULL; + char *omd_mov = NULL; + char *nid_mov = NULL; char *uidgid_mov = NULL; char *ef_mov; char *jid_mov; char *rnm_mov; enum changelog_rec_extra_flags cref = CLFE_INVALID; - crf_wanted &= CLF_SUPPORTED; - cref_want &= CLFE_SUPPORTED; + crf_wanted = (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED); + cref_want = (enum changelog_rec_extra_flags) + (cref_want & CLFE_SUPPORTED); if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted) { if (!(rec->cr_flags & CLF_EXTRA_FLAGS) || @@ -1322,29 +1907,68 @@ static inline void changelog_remap_rec(struct changelog_rec *rec, /* Locations of extensions in the remapped record */ if (rec->cr_flags & CLF_EXTRA_FLAGS) { + xattr_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~CLFE_XATTR)); + omd_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~(CLFE_OPEN | CLFE_XATTR))); + nid_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & + ~(CLFE_NID | CLFE_OPEN | CLFE_XATTR))); uidgid_mov = (char *)rec + - changelog_rec_offset(crf_wanted & CLF_SUPPORTED, - CLFE_INVALID); - cref = changelog_rec_extra_flags(rec)->cr_extra_flags; + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~(CLFE_UIDGID | + CLFE_NID | + CLFE_OPEN | + CLFE_XATTR))); + cref = (enum changelog_rec_extra_flags) + changelog_rec_extra_flags(rec)->cr_extra_flags; } ef_mov = (char *)rec + - changelog_rec_offset(crf_wanted & ~CLF_EXTRA_FLAGS, - CLFE_INVALID); + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & ~CLF_EXTRA_FLAGS), CLFE_INVALID); jid_mov = (char *)rec + - changelog_rec_offset(crf_wanted & - ~(CLF_EXTRA_FLAGS | CLF_JOBID), + changelog_rec_offset((enum changelog_rec_flags)(crf_wanted & + ~(CLF_EXTRA_FLAGS | CLF_JOBID)), CLFE_INVALID); rnm_mov = (char *)rec + - changelog_rec_offset(crf_wanted & + changelog_rec_offset((enum changelog_rec_flags)(crf_wanted & ~(CLF_EXTRA_FLAGS | CLF_JOBID | - CLF_RENAME), + CLF_RENAME)), CLFE_INVALID); /* Move the extension fields to the desired positions */ if ((crf_wanted & CLF_EXTRA_FLAGS) && (rec->cr_flags & CLF_EXTRA_FLAGS)) { + if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR)) + memmove(xattr_mov, changelog_rec_xattr(rec), + sizeof(struct changelog_ext_xattr)); + + if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN)) + memmove(omd_mov, changelog_rec_openmode(rec), + sizeof(struct changelog_ext_openmode)); + + if ((cref_want & CLFE_NID) && (cref & CLFE_NID)) + memmove(nid_mov, changelog_rec_nid(rec), + sizeof(struct changelog_ext_nid)); + if ((cref_want & CLFE_UIDGID) && (cref & CLFE_UIDGID)) memmove(uidgid_mov, changelog_rec_uidgid(rec), sizeof(struct changelog_ext_uidgid)); @@ -1362,6 +1986,18 @@ static inline void changelog_remap_rec(struct changelog_rec *rec, sizeof(struct changelog_ext_rename)); /* Clear newly added fields */ + if (xattr_mov && (cref_want & CLFE_XATTR) && + !(cref & CLFE_XATTR)) + memset(xattr_mov, 0, sizeof(struct changelog_ext_xattr)); + + if (omd_mov && (cref_want & CLFE_OPEN) && + !(cref & CLFE_OPEN)) + memset(omd_mov, 0, sizeof(struct changelog_ext_openmode)); + + if (nid_mov && (cref_want & CLFE_NID) && + !(cref & CLFE_NID)) + memset(nid_mov, 0, sizeof(struct changelog_ext_nid)); + if (uidgid_mov && (cref_want & CLFE_UIDGID) && !(cref & CLFE_UIDGID)) memset(uidgid_mov, 0, sizeof(struct changelog_ext_uidgid)); @@ -1438,14 +2074,14 @@ enum hsm_states { #define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK) /** - * HSM request progress state + * HSM request progress state */ enum hsm_progress_states { + HPS_NONE = 0, HPS_WAITING = 1, HPS_RUNNING = 2, HPS_DONE = 3, }; -#define HPS_NONE 0 static inline const char *hsm_progress_state2name(enum hsm_progress_states s) { @@ -1575,7 +2211,7 @@ static inline ssize_t hur_len(struct hsm_user_request *hur) (__u64)hur->hur_request.hr_itemcount * sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len; - if (size != (ssize_t)size) + if ((ssize_t)size < 0) return -1; return size; @@ -1656,7 +2292,7 @@ static inline char *hai_dump_data_field(const struct hsm_action_item *hai, struct hsm_action_list { __u32 hal_version; __u32 hal_count; /* number of hai's to follow */ - __u64 hal_compound_id; /* returned by coordinator */ + __u64 hal_compound_id; /* returned by coordinator, ignored */ __u64 hal_flags; __u32 hal_archive_id; /* which archive backend */ __u32 padding1; @@ -1665,27 +2301,20 @@ struct hsm_action_list { boundaries. See hai_zero */ } __attribute__((packed)); -#ifndef HAVE_CFS_SIZE_ROUND -static inline int cfs_size_round (int val) -{ - return (val + 7) & (~0x7); -} -#define HAVE_CFS_SIZE_ROUND -#endif - /* Return pointer to first hai in action list */ static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal) { - return (struct hsm_action_item *)(hal->hal_fsname + - cfs_size_round(strlen(hal-> \ - hal_fsname) - + 1)); + size_t offset = __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8); + + return (struct hsm_action_item *)(hal->hal_fsname + offset); } + /* Return pointer to next hai */ static inline struct hsm_action_item * hai_next(struct hsm_action_item *hai) { - return (struct hsm_action_item *)((char *)hai + - cfs_size_round(hai->hai_len)); + size_t offset = __ALIGN_KERNEL(hai->hai_len, 8); + + return (struct hsm_action_item *)((char *)hai + offset); } /* Return size of an hsm_action_list */ @@ -1695,10 +2324,10 @@ static inline size_t hal_size(struct hsm_action_list *hal) size_t sz; struct hsm_action_item *hai; - sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1); + sz = sizeof(*hal) + __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8); hai = hai_first(hal); for (i = 0; i < hal->hal_count ; i++, hai = hai_next(hai)) - sz += cfs_size_round(hai->hai_len); + sz += __ALIGN_KERNEL(hai->hai_len, 8); return sz; } @@ -1830,7 +2459,6 @@ enum sk_crypt_alg { SK_CRYPT_INVALID = -1, SK_CRYPT_EMPTY = 0, SK_CRYPT_AES256_CTR = 1, - SK_CRYPT_MAX = 2, }; enum sk_hmac_alg { @@ -1838,17 +2466,16 @@ enum sk_hmac_alg { SK_HMAC_EMPTY = 0, SK_HMAC_SHA256 = 1, SK_HMAC_SHA512 = 2, - SK_HMAC_MAX = 3, }; struct sk_crypt_type { - char *sct_name; - size_t sct_bytes; + const char *sct_name; + int sct_type; }; struct sk_hmac_type { - char *sht_name; - size_t sht_bytes; + const char *sht_name; + int sht_type; }; enum lock_mode_user { @@ -1868,6 +2495,111 @@ enum lockahead_results { LLA_RESULT_SAME, }; +enum lu_heat_flag_bit { + LU_HEAT_FLAG_BIT_INVALID = 0, + LU_HEAT_FLAG_BIT_OFF, + LU_HEAT_FLAG_BIT_CLEAR, +}; + +enum lu_heat_flag { + LU_HEAT_FLAG_OFF = 1ULL << LU_HEAT_FLAG_BIT_OFF, + LU_HEAT_FLAG_CLEAR = 1ULL << LU_HEAT_FLAG_BIT_CLEAR, +}; + +enum obd_heat_type { + OBD_HEAT_READSAMPLE = 0, + OBD_HEAT_WRITESAMPLE = 1, + OBD_HEAT_READBYTE = 2, + OBD_HEAT_WRITEBYTE = 3, + OBD_HEAT_COUNT +}; + +#define LU_HEAT_NAMES { \ + [OBD_HEAT_READSAMPLE] = "readsample", \ + [OBD_HEAT_WRITESAMPLE] = "writesample", \ + [OBD_HEAT_READBYTE] = "readbyte", \ + [OBD_HEAT_WRITEBYTE] = "writebyte", \ +} + +struct lu_heat { + __u32 lh_count; + __u32 lh_flags; + __u64 lh_heat[0]; +}; + +enum lu_pcc_type { + LU_PCC_NONE = 0, + LU_PCC_READWRITE, + LU_PCC_MAX +}; + +static inline const char *pcc_type2string(enum lu_pcc_type type) +{ + switch (type) { + case LU_PCC_NONE: + return "none"; + case LU_PCC_READWRITE: + return "readwrite"; + default: + return "fault"; + } +} + +struct lu_pcc_attach { + __u32 pcca_type; /* PCC type */ + __u32 pcca_id; /* archive ID for readwrite, group ID for readonly */ +}; + +enum lu_pcc_detach_opts { + PCC_DETACH_OPT_NONE = 0, /* Detach only, keep the PCC copy */ + PCC_DETACH_OPT_UNCACHE, /* Remove the cached file after detach */ +}; + +struct lu_pcc_detach_fid { + /* fid of the file to detach */ + struct lu_fid pccd_fid; + __u32 pccd_opt; +}; + +struct lu_pcc_detach { + __u32 pccd_opt; +}; + +enum lu_pcc_state_flags { + PCC_STATE_FL_NONE = 0x0, + /* The inode attr is cached locally */ + PCC_STATE_FL_ATTR_VALID = 0x01, + /* The file is being attached into PCC */ + PCC_STATE_FL_ATTACHING = 0x02, + /* Allow to auto attach at open */ + PCC_STATE_FL_OPEN_ATTACH = 0x04, + /* Allow to auto attach during I/O after layout lock revocation */ + PCC_STATE_FL_IO_ATTACH = 0x08, + /* Allow to auto attach at stat */ + PCC_STATE_FL_STAT_ATTACH = 0x10, + /* Allow to auto attach at the next open or layout refresh */ + PCC_STATE_FL_AUTO_ATTACH = PCC_STATE_FL_OPEN_ATTACH | + PCC_STATE_FL_IO_ATTACH | + PCC_STATE_FL_STAT_ATTACH, +}; + +struct lu_pcc_state { + __u32 pccs_type; /* enum lu_pcc_type */ + __u32 pccs_open_count; + __u32 pccs_flags; /* enum lu_pcc_state_flags */ + __u32 pccs_padding; + char pccs_path[PATH_MAX]; +}; + +struct fid_array { + __u32 fa_nr; + /* make header's size equal lu_fid */ + __u32 fa_padding0; + __u64 fa_padding1; + struct lu_fid fa_fids[0]; +}; +#define OBD_MAX_FIDS_IN_ARRAY 4096 + #if defined(__cplusplus) } #endif