X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Fuapi%2Flinux%2Flustre%2Flustre_user.h;h=495335bc4d6fa3e3bb054a3ada352a153b123185;hp=8479c1147f4ea5d02fe336a22a48b42b2ca0b530;hb=b31792b0e72425c8c7850d69837f08c9f3e95a9c;hpb=fa15e5347c2eccd432924e57440829606c7339f6 diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 8479c11..495335b 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lustre/include/lustre/lustre_user.h * @@ -42,29 +41,32 @@ * @{ */ +#include +#include +#include +#include +#include #include +#include +#include +#include -#ifdef __KERNEL__ -# include -# include -# include /* snprintf() */ -# include -# include -#else /* !__KERNEL__ */ -# define NEED_QUOTA_DEFS -# include +#ifndef __KERNEL__ +# define __USE_ISOC99 1 # include # include /* snprintf() */ -# include -# include # include -# include -#endif /* __KERNEL__ */ +# define FILEID_LUSTRE 0x97 /* for name_to_handle_at() (and llapi_fd2fid()) */ +#endif /* !__KERNEL__ */ #if defined(__cplusplus) extern "C" { #endif +#ifdef __STRICT_ANSI__ +#define typeof __typeof__ +#endif + /* * This is a temporary solution of adding quota type. * Should be removed as soon as system header is updated. @@ -78,46 +80,190 @@ extern "C" { "project", /* PRJQUOTA */ \ "undefined", \ }; +#ifndef USRQUOTA +#define USRQUOTA 0 +#endif +#ifndef GRPQUOTA +#define GRPQUOTA 1 +#endif +#ifndef PRJQUOTA #define PRJQUOTA 2 +#endif -#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \ - defined(__craynv) || defined(__mips64__) || defined(__powerpc64__) || \ - defined(__aarch64__) -typedef struct stat lstat_t; -# define lstat_f lstat -# define fstat_f fstat -# define fstatat_f fstatat -# define HAVE_LOV_USER_MDS_DATA -#elif defined(__USE_LARGEFILE64) || defined(__KERNEL__) -typedef struct stat64 lstat_t; -# define lstat_f lstat64 -# define fstat_f fstat64 -# define fstatat_f fstatat64 -# define HAVE_LOV_USER_MDS_DATA +/* + * We need to always use 64bit version because the structure + * is shared across entire cluster where 32bit and 64bit machines + * are co-existing. + */ +#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64) +typedef struct stat64 lstat_t; +#define lstat_f lstat64 +#define fstat_f fstat64 +#define fstatat_f fstatat64 +#else +typedef struct stat lstat_t; +#define lstat_f lstat +#define fstat_f fstat +#define fstatat_f fstatat #endif +#ifndef STATX_BASIC_STATS +/* + * Timestamp structure for the timestamps in struct statx. + * + * tv_sec holds the number of seconds before (negative) or after (positive) + * 00:00:00 1st January 1970 UTC. + * + * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time. + * + * __reserved is held in case we need a yet finer resolution. + */ +struct statx_timestamp { + __s64 tv_sec; + __u32 tv_nsec; + __s32 __reserved; +}; + +/* + * Structures for the extended file attribute retrieval system call + * (statx()). + * + * The caller passes a mask of what they're specifically interested in as a + * parameter to statx(). What statx() actually got will be indicated in + * st_mask upon return. + * + * For each bit in the mask argument: + * + * - if the datum is not supported: + * + * - the bit will be cleared, and + * + * - the datum will be set to an appropriate fabricated value if one is + * available (eg. CIFS can take a default uid and gid), otherwise + * + * - the field will be cleared; + * + * - otherwise, if explicitly requested: + * + * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is + * set or if the datum is considered out of date, and + * + * - the field will be filled in and the bit will be set; + * + * - otherwise, if not requested, but available in approximate form without any + * effort, it will be filled in anyway, and the bit will be set upon return + * (it might not be up to date, however, and no attempt will be made to + * synchronise the internal state first); + * + * - otherwise the field and the bit will be cleared before returning. + * + * Items in STATX_BASIC_STATS may be marked unavailable on return, but they + * will have values installed for compatibility purposes so that stat() and + * co. can be emulated in userspace. + */ +struct statx { + /* 0x00 */ + __u32 stx_mask; /* What results were written [uncond] */ + __u32 stx_blksize; /* Preferred general I/O size [uncond] */ + __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* 0x10 */ + __u32 stx_nlink; /* Number of hard links */ + __u32 stx_uid; /* User ID of owner */ + __u32 stx_gid; /* Group ID of owner */ + __u16 stx_mode; /* File mode */ + __u16 __spare0[1]; + /* 0x20 */ + __u64 stx_ino; /* Inode number */ + __u64 stx_size; /* File size */ + __u64 stx_blocks; /* Number of 512-byte blocks allocated */ + __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* 0x40 */ + struct statx_timestamp stx_atime; /* Last access time */ + struct statx_timestamp stx_btime; /* File creation time */ + struct statx_timestamp stx_ctime; /* Last attribute change time */ + struct statx_timestamp stx_mtime; /* Last data modification time */ + /* 0x80 */ + __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_minor; + __u32 stx_dev_major; /* ID of device containing file [uncond] */ + __u32 stx_dev_minor; + /* 0x90 */ + __u64 __spare2[14]; /* Spare space for future expansion */ + /* 0x100 */ +}; + +/* + * Flags to be stx_mask + * + * Query request/result mask for statx() and struct statx::stx_mask. + * + * These bits should be set in the mask argument of statx() to request + * particular items when calling statx(). + */ +#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */ +#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */ +#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */ +#define STATX_UID 0x00000008U /* Want/got stx_uid */ +#define STATX_GID 0x00000010U /* Want/got stx_gid */ +#define STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define STATX_INO 0x00000100U /* Want/got stx_ino */ +#define STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_ALL 0x00000fffU /* All currently supported flags */ +#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ + +/* + * Attributes to be found in stx_attributes and masked in stx_attributes_mask. + * + * These give information about the features or the state of a file that might + * be of use to ordinary userspace programs such as GUIs or ls rather than + * specialised tools. + * + * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * semantically. Where possible, the numerical value is picked to correspond + * also. + */ +#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ +#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ +#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ +#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ +#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ + +#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ + +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + +#endif /* STATX_BASIC_STATS */ + +typedef struct statx lstatx_t; + #define LUSTRE_EOF 0xffffffffffffffffULL /* for statfs() */ #define LL_SUPER_MAGIC 0x0BD00BD0 -#ifndef FSFILT_IOC_GETFLAGS -#define FSFILT_IOC_GETFLAGS _IOR('f', 1, long) -#define FSFILT_IOC_SETFLAGS _IOW('f', 2, long) -#define FSFILT_IOC_GETVERSION _IOR('f', 3, long) -#define FSFILT_IOC_SETVERSION _IOW('f', 4, long) -#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long) -#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long) -#endif +#define FSFILT_IOC_GETVERSION _IOR('f', 3, long) /* FIEMAP flags supported by Lustre */ #define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER) enum obd_statfs_state { - OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */ - OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */ - OS_STATE_ENOSPC = 0x00000020, /**< not enough free space */ - OS_STATE_ENOINO = 0x00000040, /**< not enough inodes */ + OS_STATFS_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */ + OS_STATFS_READONLY = 0x00000002, /**< filesystem is read-only */ + OS_STATFS_NOPRECREATE = 0x00000004, /**< no object precreation */ + OS_STATFS_UNUSED1 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */ + OS_STATFS_UNUSED2 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */ + OS_STATFS_ENOSPC = 0x00000020, /**< not enough free space */ + OS_STATFS_ENOINO = 0x00000040, /**< not enough inodes */ + OS_STATFS_SUM = 0x00000100, /**< aggregated for all tagrets */ + OS_STATFS_NONROT = 0x00000200, /**< non-rotational device */ }; /** filesystem statistics/attributes for target device */ @@ -132,18 +278,25 @@ struct obd_statfs { __u32 os_bsize; /* block size in bytes for os_blocks */ __u32 os_namelen; /* maximum length of filename in bytes*/ __u64 os_maxbytes; /* maximum object size in bytes */ - __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */ + __u32 os_state; /**< obd_statfs_state OS_STATFS_* */ __u32 os_fprecreated; /* objs available now to the caller */ /* used in QoS code to find preferred * OSTs */ - __u32 os_spare2; /* Unused padding fields. Remember */ - __u32 os_spare3; /* to fix lustre_swab_obd_statfs() */ - __u32 os_spare4; - __u32 os_spare5; - __u32 os_spare6; - __u32 os_spare7; - __u32 os_spare8; - __u32 os_spare9; + __u32 os_granted; /* space granted for MDS */ + __u32 os_spare3; /* Unused padding fields. Remember */ + __u32 os_spare4; /* to fix lustre_swab_obd_statfs() */ + __u32 os_spare5; + __u32 os_spare6; + __u32 os_spare7; + __u32 os_spare8; + __u32 os_spare9; +}; + +/** additional filesystem attributes for target device */ +struct obd_statfs_info { + __u32 os_reserved_mb_low; /* reserved mb low */ + __u32 os_reserved_mb_high; /* reserved mb high */ + bool os_enable_pre; /* enable pre create logic */ }; /** @@ -168,13 +321,19 @@ struct lu_fid { * used. **/ __u32 f_ver; -}; +} __attribute__((packed)); static inline bool fid_is_zero(const struct lu_fid *fid) { return fid->f_seq == 0 && fid->f_oid == 0; } +/* The data name_to_handle_at() places in a struct file_handle (at f_handle) */ +struct lustre_file_handle { + struct lu_fid lfh_child; + struct lu_fid lfh_parent; +}; + /* Currently, the filter_fid::ff_parent::f_ver is not the real parent * MDT-object's FID::f_ver, instead it is the OST-object index in its * parent MDT-object's layout EA. */ @@ -188,15 +347,47 @@ struct ost_layout { __u32 ol_comp_id; } __attribute__((packed)); -/* keep this one for compatibility */ -struct filter_fid_old { - struct lu_fid ff_parent; - __u64 ff_objid; - __u64 ff_seq; +/* The filter_fid structure has changed several times over its lifetime. + * For a long time "trusted.fid" held the MDT inode parent FID/IGIF and + * stripe_index and the "self FID" (objid/seq) to be able to recover the + * OST objects in case of corruption. With the move to 2.4 and OSD-API for + * the OST, the "trusted.lma" xattr was added to the OST objects to store + * the "self FID" to be consistent with the MDT on-disk format, and the + * filter_fid only stored the MDT inode parent FID and stripe index. + * + * In 2.10, the addition of PFL composite layouts required more information + * to be stored into the filter_fid in order to be able to identify which + * component the OST object belonged. As well, the stripe size may vary + * between components, so it was no longer safe to assume the stripe size + * or stripe_count of a file. This is also more robust for plain layouts. + * + * For ldiskfs OSTs that were formatted with 256-byte inodes, there is not + * enough space to store both the filter_fid and LMA in the inode, so they + * are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid + * an extra seek for every OST object access. + * + * In 2.11, FLR mirror layouts also need to store the layout version and + * range so that writes to old versions of the layout are not allowed. + * That ensures that mirrored objects are not modified by evicted clients, + * and ensures that the components are correctly marked stale on the MDT. + */ +struct filter_fid_18_23 { + struct lu_fid ff_parent; /* stripe_idx in f_ver */ + __u64 ff_objid; + __u64 ff_seq; +}; + +struct filter_fid_24_29 { + struct lu_fid ff_parent; /* stripe_idx in f_ver */ +}; + +struct filter_fid_210 { + struct lu_fid ff_parent; /* stripe_idx in f_ver */ + struct ost_layout ff_layout; }; struct filter_fid { - struct lu_fid ff_parent; + struct lu_fid ff_parent; /* stripe_idx in f_ver */ struct ost_layout ff_layout; __u32 ff_layout_version; __u32 ff_range; /* range of layout version that @@ -231,8 +422,9 @@ enum lma_incompat { is on the remote MDT */ LMAI_STRIPED = 0x00000008, /* striped directory inode */ LMAI_ORPHAN = 0x00000010, /* inode is orphan */ + LMAI_ENCRYPT = 0x00000020, /* inode is encrypted */ LMA_INCOMPAT_SUPP = (LMAI_AGENT | LMAI_REMOTE_PARENT | \ - LMAI_STRIPED | LMAI_ORPHAN) + LMAI_STRIPED | LMAI_ORPHAN | LMAI_ENCRYPT) }; @@ -283,8 +475,17 @@ struct lustre_ost_attrs { */ #define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64)) -enum { - LSOM_FL_VALID = 1 << 0, +enum lustre_som_flags { + /* Unknow or no SoM data, must get size from OSTs. */ + SOM_FL_UNKNOWN = 0x0000, + /* Known strictly correct, FLR or DoM file (SoM guaranteed). */ + SOM_FL_STRICT = 0x0001, + /* Known stale - was right at some point in the past, but it is + * known (or likely) to be incorrect now (e.g. opened for write). */ + SOM_FL_STALE = 0x0002, + /* Approximate, may never have been strictly correct, + * need to sync SOM data to achieve eventual consistency. */ + SOM_FL_LAZY = 0x0004, }; struct lustre_som_attrs { @@ -305,7 +506,7 @@ struct ost_id { } oi; struct lu_fid oi_fid; }; -}; +} __attribute__((packed)); #define DOSTID "%#llx:%llu" #define POSTID(oi) ((unsigned long long)ostid_seq(oi)), \ @@ -337,6 +538,7 @@ enum ll_lease_flags { LL_LEASE_RESYNC_DONE = 0x2, LL_LEASE_LAYOUT_MERGE = 0x4, LL_LEASE_LAYOUT_SPLIT = 0x8, + LL_LEASE_PCC_ATTACH = 0x10, }; #define IOC_IDS_MAX 4096 @@ -347,6 +549,16 @@ struct ll_ioc_lease { __u32 lil_ids[0]; }; +struct ll_ioc_lease_id { + __u32 lil_mode; + __u32 lil_flags; + __u32 lil_count; + __u16 lil_mirror_id; + __u16 lil_padding1; + __u64 lil_padding2; + __u32 lil_ids[0]; +}; + /* * The ioctl naming rules: * LL_* - works on the currently opened filehandle instead of parent dir @@ -358,6 +570,7 @@ struct ll_ioc_lease { * *INFO - set/get lov_user_mds_data */ /* lustre_ioctl.h 101-150 */ +/* ioctl codes 128-143 are reserved for fsverity */ #define LL_IOC_GETFLAGS _IOR ('f', 151, long) #define LL_IOC_SETFLAGS _IOW ('f', 152, long) #define LL_IOC_CLRFLAGS _IOW ('f', 153, long) @@ -404,6 +617,8 @@ struct ll_ioc_lease { #define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md) #define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md) #define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64) +#define LL_IOC_RMFID _IOR('f', 242, struct fid_array) +#define LL_IOC_UNLOCK_FOREIGN _IO('f', 242) #define LL_IOC_SET_LEASE _IOWR('f', 243, struct ll_ioc_lease) #define LL_IOC_SET_LEASE_OLD _IOWR('f', 243, long) #define LL_IOC_GET_LEASE _IO('f', 244) @@ -413,6 +628,12 @@ struct ll_ioc_lease { #define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid) #define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent) #define LL_IOC_LADVISE _IOR('f', 250, struct llapi_lu_ladvise) +#define LL_IOC_HEAT_GET _IOWR('f', 251, struct lu_heat) +#define LL_IOC_HEAT_SET _IOW('f', 251, __u64) +#define LL_IOC_PCC_DETACH _IOW('f', 252, struct lu_pcc_detach) +#define LL_IOC_PCC_DETACH_BY_FID _IOW('f', 252, struct lu_pcc_detach_fid) +#define LL_IOC_PCC_STATE _IOR('f', 252, struct lu_pcc_state) +#define LL_IOC_PROJECT _IOW('f', 253, struct lu_project) #ifndef FS_IOC_FSGETXATTR /* @@ -428,20 +649,24 @@ struct fsxattr { #define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr) #define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) #endif -#define LL_IOC_FSGETXATTR FS_IOC_FSGETXATTR -#define LL_IOC_FSSETXATTR FS_IOC_FSSETXATTR -#define LL_PROJINHERIT_FL 0x20000000 +#ifndef FS_XFLAG_PROJINHERIT +#define FS_XFLAG_PROJINHERIT 0x00000200 +#endif #define LL_STATFS_LMV 1 #define LL_STATFS_LOV 2 #define LL_STATFS_NODELAY 4 -#define IOC_MDC_TYPE 'i' -#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) -#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *) -#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *) -#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *) +#define IOC_MDC_TYPE 'i' +#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) +#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *) +#define IOC_MDC_GETFILEINFO_V1 _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data_v1 *) +#define IOC_MDC_GETFILEINFO_V2 _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data) +#define LL_IOC_MDC_GETINFO_V1 _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data_v1 *) +#define LL_IOC_MDC_GETINFO_V2 _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data) +#define IOC_MDC_GETFILEINFO IOC_MDC_GETFILEINFO_V1 +#define LL_IOC_MDC_GETINFO LL_IOC_MDC_GETINFO_V1 #define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */ @@ -452,6 +677,9 @@ struct fsxattr { /* To be compatible with old statically linked binary we keep the check for * the older 0100000000 flag. This is already removed upstream. LU-812. */ #define O_LOV_DELAY_CREATE_1_8 0100000000 /* FMODE_NONOTIFY masked in 2.6.36 */ +#ifndef FASYNC +#define FASYNC 00020000 /* fcntl, for BSD compatibility */ +#endif #define O_LOV_DELAY_CREATE_MASK (O_NOCTTY | FASYNC) #define O_LOV_DELAY_CREATE (O_LOV_DELAY_CREATE_1_8 | \ O_LOV_DELAY_CREATE_MASK) @@ -460,7 +688,7 @@ struct fsxattr { #define LL_FILE_GROUP_LOCKED 0x00000002 #define LL_FILE_READAHEA 0x00000004 #define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */ -#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */ +#define LL_FILE_FLOCK_WARNING 0x00000020 /* warned about disabled flock */ #define LOV_USER_MAGIC_V1 0x0BD10BD0 #define LOV_USER_MAGIC LOV_USER_MAGIC_V1 @@ -469,27 +697,45 @@ struct fsxattr { /* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */ #define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */ #define LOV_USER_MAGIC_COMP_V1 0x0BD60BD0 +#define LOV_USER_MAGIC_FOREIGN 0x0BD70BD0 +#define LOV_USER_MAGIC_SEL 0x0BD80BD0 #define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */ #define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/ #define LMV_USER_MAGIC_SPECIFIC 0x0CD40CD0 -#define LOV_PATTERN_NONE 0x000 -#define LOV_PATTERN_RAID0 0x001 -#define LOV_PATTERN_RAID1 0x002 -#define LOV_PATTERN_MDT 0x100 -#define LOV_PATTERN_CMOBD 0x200 +#define LOV_PATTERN_NONE 0x000 +#define LOV_PATTERN_RAID0 0x001 +#define LOV_PATTERN_RAID1 0x002 +#define LOV_PATTERN_MDT 0x100 +#define LOV_PATTERN_OVERSTRIPING 0x200 +#define LOV_PATTERN_FOREIGN 0x400 #define LOV_PATTERN_F_MASK 0xffff0000 #define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */ #define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */ #define LOV_PATTERN_DEFAULT 0xffffffff +#define LOV_OFFSET_DEFAULT ((__u16)-1) +#define LMV_OFFSET_DEFAULT ((__u32)-1) + static inline bool lov_pattern_supported(__u32 pattern) { + return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 || + (pattern & ~LOV_PATTERN_F_RELEASED) == + (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) || + (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_MDT; +} + +/* RELEASED and MDT patterns are not valid in many places, so rather than + * having many extra checks on lov_pattern_supported, we have this separate + * check for non-released, non-DOM components + */ +static inline bool lov_pattern_supported_normal_comp(__u32 pattern) +{ return pattern == LOV_PATTERN_RAID0 || - pattern == LOV_PATTERN_MDT || - pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_F_RELEASED); + pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING); + } #define LOV_MAXPOOLNAME 15 @@ -507,13 +753,19 @@ static inline bool lov_pattern_supported(__u32 pattern) * allocation that is sufficient for the current generation of systems. * * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) */ -#define LOV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ +#define LOV_MAX_STRIPE_COUNT 2000 /* ~((12 * 4096 - 256) / 24) */ #define LOV_ALL_STRIPES 0xffff /* only valid for directories */ #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */ #define XATTR_LUSTRE_PREFIX "lustre." #define XATTR_LUSTRE_LOV XATTR_LUSTRE_PREFIX"lov" +/* Please update if XATTR_LUSTRE_LOV".set" groks more flags in the future */ +#define allowed_lustre_lov(att) (strcmp((att), XATTR_LUSTRE_LOV".add") == 0 || \ + strcmp((att), XATTR_LUSTRE_LOV".set") == 0 || \ + strcmp((att), XATTR_LUSTRE_LOV".set.flags") == 0 || \ + strcmp((att), XATTR_LUSTRE_LOV".del") == 0) + #define lov_user_ost_data lov_user_ost_data_v1 struct lov_user_ost_data_v1 { /* per-stripe data structure */ struct ost_id l_ost_oi; /* OST object ID */ @@ -535,7 +787,7 @@ struct lov_user_md_v1 { /* LOV EA user data (host-endian) */ * used when reading */ }; struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ -} __attribute__((packed, __may_alias__)); +} __attribute__((packed, __may_alias__)); struct lov_user_md_v3 { /* LOV EA user data (host-endian) */ __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */ @@ -551,15 +803,36 @@ struct lov_user_md_v3 { /* LOV EA user data (host-endian) */ }; char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */ struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ +} __attribute__((packed, __may_alias__)); + +struct lov_foreign_md { + __u32 lfm_magic; /* magic number = LOV_MAGIC_FOREIGN */ + __u32 lfm_length; /* length of lfm_value */ + __u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */ + __u32 lfm_flags; /* flags, type specific */ + char lfm_value[]; } __attribute__((packed)); +#define foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \ + offsetof(struct lov_foreign_md, lfm_value)) + +#define foreign_size_le(lfm) \ + (le32_to_cpu(((struct lov_foreign_md *)lfm)->lfm_length) + \ + offsetof(struct lov_foreign_md, lfm_value)) + +/** + * The stripe size fields are shared for the extension size storage, however + * the extension size is stored in KB, not bytes. + */ +#define SEL_UNIT_SIZE 1024llu + struct lu_extent { __u64 e_start; __u64 e_end; -}; +} __attribute__((packed)); #define DEXT "[%#llx, %#llx)" -#define PEXT(ext) (ext)->e_start, (ext)->e_end +#define PEXT(ext) (unsigned long long)(ext)->e_start, (unsigned long long)(ext)->e_end static inline bool lu_extent_is_overlapped(struct lu_extent *e1, struct lu_extent *e2) @@ -573,20 +846,41 @@ static inline bool lu_extent_is_whole(struct lu_extent *e) } enum lov_comp_md_entry_flags { - LCME_FL_STALE = 0x00000001, /* FLR: stale data */ - LCME_FL_PREF_RD = 0x00000002, /* FLR: preferred for reading */ - LCME_FL_PREF_WR = 0x00000004, /* FLR: preferred for writing */ - LCME_FL_PREF_RW = LCME_FL_PREF_RD | LCME_FL_PREF_WR, - LCME_FL_OFFLINE = 0x00000008, /* Not used */ - LCME_FL_INIT = 0x00000010, /* instantiated */ - LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag, - won't be stored on disk */ + LCME_FL_STALE = 0x00000001, /* FLR: stale data */ + LCME_FL_PREF_RD = 0x00000002, /* FLR: preferred for reading */ + LCME_FL_PREF_WR = 0x00000004, /* FLR: preferred for writing */ + LCME_FL_PREF_RW = LCME_FL_PREF_RD | LCME_FL_PREF_WR, + LCME_FL_OFFLINE = 0x00000008, /* Not used */ + LCME_FL_INIT = 0x00000010, /* instantiated */ + LCME_FL_NOSYNC = 0x00000020, /* FLR: no sync for the mirror */ + LCME_FL_EXTENSION = 0x00000040, /* extension comp, never init */ + LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag, + * won't be stored on disk + */ }; #define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT | LCME_FL_STALE | \ - LCME_FL_PREF_RW) -/* The flags can be set by users at mirror creation time. */ -#define LCME_USER_FLAGS (LCME_FL_PREF_RW) + LCME_FL_PREF_RW | LCME_FL_NOSYNC | \ + LCME_FL_EXTENSION) + +/* The component flags can be set by users at creation/modification time. */ +#define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \ + LCME_FL_EXTENSION) + +/* The mirror flags can be set by users at creation time. */ +#define LCME_USER_MIRROR_FLAGS (LCME_FL_PREF_RW) + +/* The allowed flags obtained from the client at component creation time. */ +#define LCME_CL_COMP_FLAGS (LCME_USER_MIRROR_FLAGS | LCME_FL_EXTENSION) + +/* The mirror flags sent by client */ +#define LCME_MIRROR_FLAGS (LCME_FL_NOSYNC) + +/* These flags have meaning when set in a default layout and will be inherited + * from the default/template layout set on a directory. + */ +#define LCME_TEMPLATE_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \ + LCME_FL_EXTENSION) /* the highest bit in obdo::o_layout_version is used to mark if the file is * being resynced. */ @@ -612,13 +906,16 @@ struct lov_comp_md_entry_v1 { __u32 lcme_offset; /* offset of component blob, start from lov_comp_md_v1 */ __u32 lcme_size; /* size of component blob */ - __u64 lcme_padding[2]; + __u32 lcme_layout_gen; + __u64 lcme_timestamp; /* snapshot time if applicable*/ + __u32 lcme_padding_1; } __attribute__((packed)); #define SEQ_ID_MAX 0x0000FFFF #define SEQ_ID_MASK SEQ_ID_MAX /* bit 30:16 of lcme_id is used to store mirror id */ #define MIRROR_ID_MASK 0x7FFF0000 +#define MIRROR_ID_NEG 0x8000 #define MIRROR_ID_SHIFT 16 static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid) @@ -672,58 +969,196 @@ static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic) /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to * use this. It is unsafe to #define those values in this header as it * is possible the application has already #included . */ -#ifdef HAVE_LOV_USER_MDS_DATA -#define lov_user_mds_data lov_user_mds_data_v1 +#define lov_user_mds_data lov_user_mds_data_v2 struct lov_user_mds_data_v1 { lstat_t lmd_st; /* MDS stat struct */ struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */ } __attribute__((packed)); -struct lov_user_mds_data_v3 { - lstat_t lmd_st; /* MDS stat struct */ - struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */ +struct lov_user_mds_data_v2 { + struct lu_fid lmd_fid; /* Lustre FID */ + lstatx_t lmd_stx; /* MDS statx struct */ + __u64 lmd_flags; /* MDS stat flags */ + __u32 lmd_lmmsize; /* LOV EA size */ + __u32 lmd_padding; /* unused */ + struct lov_user_md_v1 lmd_lmm; /* LOV EA user data */ } __attribute__((packed)); -#endif struct lmv_user_mds_data { struct lu_fid lum_fid; __u32 lum_padding; __u32 lum_mds; -}; +} __attribute__((packed, __may_alias__)); enum lmv_hash_type { LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */ LMV_HASH_TYPE_ALL_CHARS = 1, LMV_HASH_TYPE_FNV_1A_64 = 2, + LMV_HASH_TYPE_CRUSH = 3, LMV_HASH_TYPE_MAX, }; -#define LMV_HASH_NAME_ALL_CHARS "all_char" -#define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64" +static __attribute__((unused)) const char *mdt_hash_name[] = { + "none", + "all_char", + "fnv_1a_64", + "crush", +}; + +#define LMV_HASH_TYPE_DEFAULT LMV_HASH_TYPE_CRUSH + +/* Right now only the lower part(0-16bits) of lmv_hash_type is being used, + * and the higher part will be the flag to indicate the status of object, + * for example the object is being migrated. And the hash function + * might be interpreted differently with different flags. */ +#define LMV_HASH_TYPE_MASK 0x0000ffff + +static inline bool lmv_is_known_hash_type(__u32 type) +{ + return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 || + (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS || + (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_CRUSH; +} + +#define LMV_HASH_FLAG_MERGE 0x04000000 +#define LMV_HASH_FLAG_SPLIT 0x08000000 + +/* The striped directory has ever lost its master LMV EA, then LFSCK + * re-generated it. This flag is used to indicate such case. It is an + * on-disk flag. */ +#define LMV_HASH_FLAG_LOST_LMV 0x10000000 + +#define LMV_HASH_FLAG_BAD_TYPE 0x20000000 +#define LMV_HASH_FLAG_MIGRATION 0x80000000 + +#define LMV_HASH_FLAG_LAYOUT_CHANGE \ + (LMV_HASH_FLAG_MIGRATION | LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MERGE) + +/* both SPLIT and MIGRATION are set for directory split */ +static inline bool lmv_hash_is_splitting(__u32 hash) +{ + return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) == + (LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MIGRATION); +} + +/* both MERGE and MIGRATION are set for directory merge */ +static inline bool lmv_hash_is_merging(__u32 hash) +{ + return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) == + (LMV_HASH_FLAG_MERGE | LMV_HASH_FLAG_MIGRATION); +} + +/* only MIGRATION is set for directory migration */ +static inline bool lmv_hash_is_migrating(__u32 hash) +{ + return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) == LMV_HASH_FLAG_MIGRATION; +} + +static inline bool lmv_hash_is_restriping(__u32 hash) +{ + return lmv_hash_is_splitting(hash) || lmv_hash_is_merging(hash); +} + +static inline bool lmv_hash_is_layout_changing(__u32 hash) +{ + return lmv_hash_is_splitting(hash) || lmv_hash_is_merging(hash) || + lmv_hash_is_migrating(hash); +} + +struct lustre_foreign_type { + __u32 lft_type; + const char *lft_name; +}; + +/** + * LOV/LMV foreign types + **/ +enum lustre_foreign_types { + LU_FOREIGN_TYPE_NONE = 0, + LU_FOREIGN_TYPE_SYMLINK = 0xda05, + /* must be the max/last one */ + LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff, +}; -extern char *mdt_hash_name[LMV_HASH_TYPE_MAX]; +extern struct lustre_foreign_type lu_foreign_types[]; /* Got this according to how get LOV_MAX_STRIPE_COUNT, see above, * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data) */ #define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */ #define lmv_user_md lmv_user_md_v1 struct lmv_user_md_v1 { - __u32 lum_magic; /* must be the first field */ + __u32 lum_magic; /* must be the first field */ __u32 lum_stripe_count; /* dirstripe count */ __u32 lum_stripe_offset; /* MDT idx for default dirstripe */ __u32 lum_hash_type; /* Dir stripe policy */ - __u32 lum_type; /* LMV type: default or normal */ - __u32 lum_padding1; + __u32 lum_type; /* LMV type: default */ + __u8 lum_max_inherit; /* inherit depth of default LMV */ + __u8 lum_max_inherit_rr; /* inherit depth of default LMV to round-robin mkdir */ + __u16 lum_padding1; __u32 lum_padding2; __u32 lum_padding3; char lum_pool_name[LOV_MAXPOOLNAME + 1]; struct lmv_user_mds_data lum_objects[0]; } __attribute__((packed)); +static inline __u32 lmv_foreign_to_md_stripes(__u32 size) +{ + if (size <= sizeof(struct lmv_user_md)) + return 0; + + size -= sizeof(struct lmv_user_md); + return (size + sizeof(struct lmv_user_mds_data) - 1) / + sizeof(struct lmv_user_mds_data); +} + +/* + * NB, historically default layout didn't set type, but use XATTR name to differ + * from normal layout, for backward compatibility, define LMV_TYPE_DEFAULT 0x0, + * and still use the same method. + */ +enum lmv_type { + LMV_TYPE_DEFAULT = 0x0000, +}; + +/* lum_max_inherit will be decreased by 1 after each inheritance if it's not + * LMV_INHERIT_UNLIMITED or > LMV_INHERIT_MAX. + */ +enum { + /* for historical reason, 0 means unlimited inheritance */ + LMV_INHERIT_UNLIMITED = 0, + /* unlimited lum_max_inherit by default */ + LMV_INHERIT_DEFAULT = 0, + /* not inherit any more */ + LMV_INHERIT_END = 1, + /* max inherit depth */ + LMV_INHERIT_MAX = 250, + /* [251, 254] are reserved */ + /* not set, or when inherit depth goes beyond end, */ + LMV_INHERIT_NONE = 255, +}; + +enum { + /* not set, or when inherit_rr depth goes beyond end, */ + LMV_INHERIT_RR_NONE = 0, + /* disable lum_max_inherit_rr by default */ + LMV_INHERIT_RR_DEFAULT = 0, + /* not inherit any more */ + LMV_INHERIT_RR_END = 1, + /* max inherit depth */ + LMV_INHERIT_RR_MAX = 250, + /* [251, 254] are reserved */ + /* unlimited inheritance */ + LMV_INHERIT_RR_UNLIMITED = 255, +}; + static inline int lmv_user_md_size(int stripes, int lmm_magic) { - return sizeof(struct lmv_user_md) + - stripes * sizeof(struct lmv_user_mds_data); + int size = sizeof(struct lmv_user_md); + + if (lmm_magic == LMV_USER_MAGIC_SPECIFIC) + size += stripes * sizeof(struct lmv_user_mds_data); + + return size; } struct ll_recreate_obj { @@ -778,6 +1213,7 @@ static inline char *obd_uuid2str(const struct obd_uuid *uuid) } #define LUSTRE_MAXFSNAME 8 +#define LUSTRE_MAXINSTANCE 16 /* Extract fsname from uuid (or target name) of a target e.g. (myfs-OST0007_UUID -> myfs) @@ -798,21 +1234,21 @@ static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen) #define FID_NOBRACE_LEN 40 #define FID_LEN (FID_NOBRACE_LEN + 2) #define DFID_NOBRACE "%#llx:0x%x:0x%x" -#define DFID "["DFID_NOBRACE"]" +#define DFID "[" DFID_NOBRACE "]" #define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver /* scanf input parse format for fids in DFID_NOBRACE format * Need to strip '[' from DFID format first or use "["SFID"]" at caller. * usage: sscanf(fidstr, SFID, RFID(&fid)); */ #define SFID "0x%llx:0x%x:0x%x" -#define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver) +#define RFID(fid) (unsigned long long *)&((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver) /********* Quotas **********/ #define LUSTRE_QUOTABLOCK_BITS 10 #define LUSTRE_QUOTABLOCK_SIZE (1 << LUSTRE_QUOTABLOCK_BITS) -static inline __u64 lustre_stoqb(size_t space) +static inline __u64 lustre_stoqb(__kernel_size_t space) { return (space + LUSTRE_QUOTABLOCK_SIZE - 1) >> LUSTRE_QUOTABLOCK_BITS; } @@ -833,9 +1269,49 @@ static inline __u64 lustre_stoqb(size_t space) /* lustre-specific control commands */ #define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */ #define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */ +#define LUSTRE_Q_GETDEFAULT 0x80000d /* get default quota */ +#define LUSTRE_Q_SETDEFAULT 0x80000e /* set default quota */ +#define LUSTRE_Q_GETQUOTAPOOL 0x80000f /* get user pool quota */ +#define LUSTRE_Q_SETQUOTAPOOL 0x800010 /* set user pool quota */ +#define LUSTRE_Q_GETINFOPOOL 0x800011 /* get pool quota info */ +#define LUSTRE_Q_SETINFOPOOL 0x800012 /* set pool quota info */ +#define LUSTRE_Q_GETDEFAULT_POOL 0x800013 /* get default pool quota*/ +#define LUSTRE_Q_SETDEFAULT_POOL 0x800014 /* set default pool quota */ +/* In the current Lustre implementation, the grace time is either the time + * or the timestamp to be used after some quota ID exceeds the soft limt, + * 48 bits should be enough, its high 16 bits can be used as quota flags. + * */ +#define LQUOTA_GRACE_BITS 48 +#define LQUOTA_GRACE_MASK ((1ULL << LQUOTA_GRACE_BITS) - 1) +#define LQUOTA_GRACE_MAX LQUOTA_GRACE_MASK +#define LQUOTA_GRACE(t) (t & LQUOTA_GRACE_MASK) +#define LQUOTA_FLAG(t) (t >> LQUOTA_GRACE_BITS) +#define LQUOTA_GRACE_FLAG(t, f) ((__u64)t | (__u64)f << LQUOTA_GRACE_BITS) + +/* special grace time, only notify the user when its quota is over soft limit + * but doesn't block new writes until the hard limit is reached. */ +#define NOTIFY_GRACE "notify" +#define NOTIFY_GRACE_TIME LQUOTA_GRACE_MASK + +/* different quota flags */ + +/* the default quota flag, the corresponding quota ID will use the default + * quota setting, the hardlimit and softlimit of its quota record in the global + * quota file will be set to 0, the low 48 bits of the grace will be set to 0 + * and high 16 bits will contain this flag (see above comment). + * */ +#define LQUOTA_FLAG_DEFAULT 0x0001 + +#define LUSTRE_Q_CMD_IS_POOL(cmd) \ + (cmd == LUSTRE_Q_GETQUOTAPOOL || \ + cmd == LUSTRE_Q_SETQUOTAPOOL || \ + cmd == LUSTRE_Q_SETINFOPOOL || \ + cmd == LUSTRE_Q_GETINFOPOOL || \ + cmd == LUSTRE_Q_SETDEFAULT_POOL || \ + cmd == LUSTRE_Q_GETDEFAULT_POOL) #define ALLQUOTA 255 /* set all quota */ -static inline char *qtype_name(int qtype) +static inline const char *qtype_name(int qtype) { switch (qtype) { case USRQUOTA: @@ -870,6 +1346,26 @@ struct identity_downcall_data { __u32 idd_groups[0]; }; +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0) +/* old interface struct is deprecated in 2.14 */ +#define SEPOL_DOWNCALL_MAGIC_OLD 0x8b8bb842 +struct sepol_downcall_data_old { + __u32 sdd_magic; + __s64 sdd_sepol_mtime; + __u16 sdd_sepol_len; + char sdd_sepol[0]; +}; +#endif + +#define SEPOL_DOWNCALL_MAGIC 0x8b8bb843 +struct sepol_downcall_data { + __u32 sdd_magic; + __u16 sdd_sepol_len; + __u16 sdd_padding1; + __s64 sdd_sepol_mtime; + char sdd_sepol[0]; +}; + #ifdef NEED_QUOTA_DEFS #ifndef QIF_BLIMITS #define QIF_BLIMITS 1 @@ -892,9 +1388,9 @@ struct identity_downcall_data { #define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE" #define LUSTRE_VOLATILE_HDR_LEN 14 -typedef enum lustre_quota_version { +enum lustre_quota_version { LUSTRE_QUOTA_V2 = 1 -} lustre_quota_version_t; +}; /* XXX: same as if_dqinfo struct in kernel */ struct obd_dqinfo { @@ -906,9 +1402,9 @@ struct obd_dqinfo { /* XXX: same as if_dqblk struct in kernel, plus one padding */ struct obd_dqblk { - __u64 dqb_bhardlimit; - __u64 dqb_bsoftlimit; - __u64 dqb_curspace; + __u64 dqb_bhardlimit; /* kbytes unit */ + __u64 dqb_bsoftlimit; /* kbytes unit */ + __u64 dqb_curspace; /* bytes unit */ __u64 dqb_ihardlimit; __u64 dqb_isoftlimit; __u64 dqb_curinodes; @@ -936,6 +1432,7 @@ struct if_quotactl { struct obd_dqblk qc_dqblk; char obd_type[16]; struct obd_uuid obd_uuid; + char qc_poolname[0]; }; /* swap layout flags */ @@ -961,33 +1458,34 @@ struct lustre_swap_layouts { * Only the first 12 bits are currently saved. */ enum la_valid { - LA_ATIME = 1 << 0, - LA_MTIME = 1 << 1, - LA_CTIME = 1 << 2, - LA_SIZE = 1 << 3, - LA_MODE = 1 << 4, - LA_UID = 1 << 5, - LA_GID = 1 << 6, - LA_BLOCKS = 1 << 7, - LA_TYPE = 1 << 8, - LA_FLAGS = 1 << 9, - LA_NLINK = 1 << 10, - LA_RDEV = 1 << 11, - LA_BLKSIZE = 1 << 12, - LA_KILL_SUID = 1 << 13, - LA_KILL_SGID = 1 << 14, - LA_PROJID = 1 << 15, - LA_LAYOUT_VERSION = 1 << 16, + LA_ATIME = 1 << 0, /* 0x00001 */ + LA_MTIME = 1 << 1, /* 0x00002 */ + LA_CTIME = 1 << 2, /* 0x00004 */ + LA_SIZE = 1 << 3, /* 0x00008 */ + LA_MODE = 1 << 4, /* 0x00010 */ + LA_UID = 1 << 5, /* 0x00020 */ + LA_GID = 1 << 6, /* 0x00040 */ + LA_BLOCKS = 1 << 7, /* 0x00080 */ + LA_TYPE = 1 << 8, /* 0x00100 */ + LA_FLAGS = 1 << 9, /* 0x00200 */ + LA_NLINK = 1 << 10, /* 0x00400 */ + LA_RDEV = 1 << 11, /* 0x00800 */ + LA_BLKSIZE = 1 << 12, /* 0x01000 */ + LA_KILL_SUID = 1 << 13, /* 0x02000 */ + LA_KILL_SGID = 1 << 14, /* 0x04000 */ + LA_PROJID = 1 << 15, /* 0x08000 */ + LA_LAYOUT_VERSION = 1 << 16, /* 0x10000 */ + LA_LSIZE = 1 << 17, /* 0x20000 */ + LA_LBLOCKS = 1 << 18, /* 0x40000 */ + LA_BTIME = 1 << 19, /* 0x80000 */ /** * Attributes must be transmitted to OST objects */ LA_REMOTE_ATTR_SET = (LA_UID | LA_GID | LA_PROJID | LA_LAYOUT_VERSION) }; -#ifndef FMODE_READ -#define FMODE_READ 00000001 -#define FMODE_WRITE 00000002 -#endif +#define MDS_FMODE_READ 00000001 +#define MDS_FMODE_WRITE 00000002 #define MDS_FMODE_CLOSED 00000000 #define MDS_FMODE_EXEC 00000004 @@ -996,7 +1494,7 @@ enum la_valid { /* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */ #define MDS_OPEN_CREATED 00000010 -#define MDS_OPEN_CROSS 00000020 +/* MDS_OPEN_CROSS 00000020 obsolete in 2.12, internal use only */ #define MDS_OPEN_CREAT 00000100 #define MDS_OPEN_EXCL 00000200 @@ -1029,17 +1527,21 @@ enum la_valid { #define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */ #define MDS_OPEN_RESYNC 04000000000000ULL /* FLR: file resync */ +#define MDS_OPEN_PCC 010000000000000ULL /* PCC: auto RW-PCC cache attach + * for newly created file */ /* lustre internal open flags, which should not be set from user space */ #define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \ MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \ MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \ - MDS_OPEN_RELEASE | MDS_OPEN_RESYNC) + MDS_OPEN_RELEASE | MDS_OPEN_RESYNC | \ + MDS_OPEN_PCC) /********* Changelogs **********/ /** Changelog record types */ enum changelog_rec_type { + CL_NONE = -1, CL_MARK = 0, CL_CREATE = 1, /* namespace */ CL_MKDIR = 2, /* namespace */ @@ -1055,7 +1557,8 @@ enum changelog_rec_type { CL_LAYOUT = 12, /* file layout/striping modified */ CL_TRUNC = 13, CL_SETATTR = 14, - CL_XATTR = 15, + CL_SETXATTR = 15, + CL_XATTR = CL_SETXATTR, /* Deprecated name */ CL_HSM = 16, /* HSM specific events, see flags */ CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */ CL_CTIME = 18, @@ -1063,15 +1566,17 @@ enum changelog_rec_type { CL_MIGRATE = 20, CL_FLRW = 21, /* FLR: file was firstly written */ CL_RESYNC = 22, /* FLR: file was resync-ed */ - CL_LAST + CL_GETXATTR = 23, + CL_DN_OPEN = 24, /* denied open */ + CL_LAST, }; static inline const char *changelog_type2str(int type) { - static const char *changelog_str[] = { + static const char *const changelog_str[] = { "MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK", "RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC", "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT", - "FLRW", "RESYNC", + "FLRW", "RESYNC","GXATR", "NOPEN", }; if (type >= 0 && type < CL_LAST) @@ -1079,16 +1584,17 @@ static inline const char *changelog_type2str(int type) { return NULL; } -/* per-record flags */ +/* 12 bits of per-record data can be stored in the bottom of the flags */ #define CLF_FLAGSHIFT 12 -#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1) -#define CLF_VERMASK (~CLF_FLAGMASK) enum changelog_rec_flags { CLF_VERSION = 0x1000, CLF_RENAME = 0x2000, CLF_JOBID = 0x4000, CLF_EXTRA_FLAGS = 0x8000, - CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS + CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID | + CLF_EXTRA_FLAGS, + CLF_FLAGMASK = (1U << CLF_FLAGSHIFT) - 1, + CLF_VERMASK = ~CLF_FLAGMASK, }; @@ -1148,29 +1654,35 @@ static inline enum hsm_event hsm_get_cl_event(__u16 flags) CLF_HSM_EVENT_L); } -static inline void hsm_set_cl_event(int *flags, enum hsm_event he) +static inline void hsm_set_cl_event(enum changelog_rec_flags *clf_flags, + enum hsm_event he) { - *flags |= (he << CLF_HSM_EVENT_L); + *clf_flags = (enum changelog_rec_flags) + (*clf_flags | (he << CLF_HSM_EVENT_L)); } -static inline __u16 hsm_get_cl_flags(int flags) +static inline __u16 hsm_get_cl_flags(enum changelog_rec_flags clf_flags) { - return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L); + return CLF_GET_BITS(clf_flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L); } -static inline void hsm_set_cl_flags(int *flags, int bits) +static inline void hsm_set_cl_flags(enum changelog_rec_flags *clf_flags, + unsigned int bits) { - *flags |= (bits << CLF_HSM_FLAG_L); + *clf_flags = (enum changelog_rec_flags) + (*clf_flags | (bits << CLF_HSM_FLAG_L)); } -static inline int hsm_get_cl_error(int flags) +static inline int hsm_get_cl_error(enum changelog_rec_flags clf_flags) { - return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L); + return CLF_GET_BITS(clf_flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L); } -static inline void hsm_set_cl_error(int *flags, int error) +static inline void hsm_set_cl_error(enum changelog_rec_flags *clf_flags, + unsigned int error) { - *flags |= (error << CLF_HSM_ERR_L); + *clf_flags = (enum changelog_rec_flags) + (*clf_flags | (error << CLF_HSM_ERR_L)); } enum changelog_rec_extra_flags { @@ -1178,7 +1690,8 @@ enum changelog_rec_extra_flags { CLFE_UIDGID = 0x0001, CLFE_NID = 0x0002, CLFE_OPEN = 0x0004, - CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN + CLFE_XATTR = 0x0008, + CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR }; enum changelog_send_flag { @@ -1201,11 +1714,13 @@ enum changelog_send_extra_flag { CHANGELOG_EXTRA_FLAG_NID = 0x02, /* Pack open mode into the changelog record */ CHANGELOG_EXTRA_FLAG_OMODE = 0x04, + /* Pack xattr name into the changelog record */ + CHANGELOG_EXTRA_FLAG_XATTR = 0x08, }; -#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \ +#define CR_MAXSIZE __ALIGN_KERNEL(2 * NAME_MAX + 2 + \ changelog_rec_offset(CLF_SUPPORTED, \ - CLFE_SUPPORTED)) + CLFE_SUPPORTED), 8) /* 31 usable bytes string + null terminator. */ #define LUSTRE_JOBID_SIZE 32 @@ -1229,7 +1744,7 @@ struct changelog_rec { __u32 cr_markerflags; /**< CL_MARK flags */ }; struct lu_fid cr_pfid; /**< parent fid */ -}; +} __attribute__ ((packed)); /* Changelog extension for RENAME. */ struct changelog_ext_rename { @@ -1262,18 +1777,23 @@ struct changelog_ext_nid { __u32 padding; }; -/* Changelog extra extension to include OPEN mode. */ +/* Changelog extra extension to include low 32 bits of MDS_OPEN_* flags. */ struct changelog_ext_openmode { __u32 cr_openflags; }; +/* Changelog extra extension to include xattr */ +struct changelog_ext_xattr { + char cr_xattr[XATTR_NAME_MAX + 1]; /**< zero-terminated string. */ +}; + static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags( const struct changelog_rec *rec); -static inline size_t changelog_rec_offset(enum changelog_rec_flags crf, +static inline __kernel_size_t changelog_rec_offset(enum changelog_rec_flags crf, enum changelog_rec_extra_flags cref) { - size_t size = sizeof(struct changelog_rec); + __kernel_size_t size = sizeof(struct changelog_rec); if (crf & CLF_RENAME) size += sizeof(struct changelog_ext_rename); @@ -1289,22 +1809,26 @@ static inline size_t changelog_rec_offset(enum changelog_rec_flags crf, size += sizeof(struct changelog_ext_nid); if (cref & CLFE_OPEN) size += sizeof(struct changelog_ext_openmode); + if (cref & CLFE_XATTR) + size += sizeof(struct changelog_ext_xattr); } return size; } -static inline size_t changelog_rec_size(const struct changelog_rec *rec) +static inline __kernel_size_t changelog_rec_size(const struct changelog_rec *rec) { enum changelog_rec_extra_flags cref = CLFE_INVALID; if (rec->cr_flags & CLF_EXTRA_FLAGS) - cref = changelog_rec_extra_flags(rec)->cr_extra_flags; + cref = (enum changelog_rec_extra_flags) + changelog_rec_extra_flags(rec)->cr_extra_flags; - return changelog_rec_offset(rec->cr_flags, cref); + return changelog_rec_offset( + (enum changelog_rec_flags)rec->cr_flags, cref); } -static inline size_t changelog_rec_varsize(const struct changelog_rec *rec) +static inline __kernel_size_t changelog_rec_varsize(const struct changelog_rec *rec) { return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen; } @@ -1312,7 +1836,8 @@ static inline size_t changelog_rec_varsize(const struct changelog_rec *rec) static inline struct changelog_ext_rename *changelog_rec_rename(const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION; + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & CLF_VERSION); return (struct changelog_ext_rename *)((char *)rec + changelog_rec_offset(crf, @@ -1323,8 +1848,8 @@ struct changelog_ext_rename *changelog_rec_rename(const struct changelog_rec *re static inline struct changelog_ext_jobid *changelog_rec_jobid(const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME); + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & (CLF_VERSION | CLF_RENAME)); return (struct changelog_ext_jobid *)((char *)rec + changelog_rec_offset(crf, @@ -1336,8 +1861,8 @@ static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags( const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME | CLF_JOBID); + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & (CLF_VERSION | CLF_RENAME | CLF_JOBID)); return (struct changelog_ext_extra_flags *)((char *)rec + changelog_rec_offset(crf, @@ -1349,8 +1874,9 @@ static inline struct changelog_ext_uidgid *changelog_rec_uidgid( const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS); + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & + (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS)); return (struct changelog_ext_uidgid *)((char *)rec + changelog_rec_offset(crf, @@ -1361,13 +1887,15 @@ struct changelog_ext_uidgid *changelog_rec_uidgid( static inline struct changelog_ext_nid *changelog_rec_nid(const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS); + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & + (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS)); enum changelog_rec_extra_flags cref = CLFE_INVALID; if (rec->cr_flags & CLF_EXTRA_FLAGS) - cref = changelog_rec_extra_flags(rec)->cr_extra_flags & - CLFE_UIDGID; + cref = (enum changelog_rec_extra_flags) + (changelog_rec_extra_flags(rec)->cr_extra_flags & + CLFE_UIDGID); return (struct changelog_ext_nid *)((char *)rec + changelog_rec_offset(crf, cref)); @@ -1378,31 +1906,55 @@ static inline struct changelog_ext_openmode *changelog_rec_openmode( const struct changelog_rec *rec) { - enum changelog_rec_flags crf = rec->cr_flags & - (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS); + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & + (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS)); enum changelog_rec_extra_flags cref = CLFE_INVALID; - if (rec->cr_flags & CLF_EXTRA_FLAGS) - cref = changelog_rec_extra_flags(rec)->cr_extra_flags & - (CLFE_UIDGID | CLFE_NID); + if (rec->cr_flags & CLF_EXTRA_FLAGS) { + cref = (enum changelog_rec_extra_flags) + (changelog_rec_extra_flags(rec)->cr_extra_flags & + (CLFE_UIDGID | CLFE_NID)); + } return (struct changelog_ext_openmode *)((char *)rec + changelog_rec_offset(crf, cref)); } +/* The xattr name is the fourth extra extension */ +static inline +struct changelog_ext_xattr *changelog_rec_xattr( + const struct changelog_rec *rec) +{ + enum changelog_rec_flags crf = (enum changelog_rec_flags) + (rec->cr_flags & + (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS)); + enum changelog_rec_extra_flags cref = CLFE_INVALID; + + if (rec->cr_flags & CLF_EXTRA_FLAGS) + cref = (enum changelog_rec_extra_flags) + (changelog_rec_extra_flags(rec)->cr_extra_flags & + (CLFE_UIDGID | CLFE_NID | CLFE_OPEN)); + + return (struct changelog_ext_xattr *)((char *)rec + + changelog_rec_offset(crf, cref)); +} + /* The name follows the rename, jobid and extra flags extns, if present */ static inline char *changelog_rec_name(const struct changelog_rec *rec) { enum changelog_rec_extra_flags cref = CLFE_INVALID; if (rec->cr_flags & CLF_EXTRA_FLAGS) - cref = changelog_rec_extra_flags(rec)->cr_extra_flags; + cref = (enum changelog_rec_extra_flags) + changelog_rec_extra_flags(rec)->cr_extra_flags; - return (char *)rec + changelog_rec_offset(rec->cr_flags & CLF_SUPPORTED, - cref & CLFE_SUPPORTED); + return (char *)rec + changelog_rec_offset( + (enum changelog_rec_flags)(rec->cr_flags & CLF_SUPPORTED), + (enum changelog_rec_extra_flags)(cref & CLFE_SUPPORTED)); } -static inline size_t changelog_rec_snamelen(const struct changelog_rec *rec) +static inline __kernel_size_t changelog_rec_snamelen(const struct changelog_rec *rec) { return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1; } @@ -1439,6 +1991,7 @@ static inline void changelog_remap_rec(struct changelog_rec *rec, enum changelog_rec_flags crf_wanted, enum changelog_rec_extra_flags cref_want) { + char *xattr_mov = NULL; char *omd_mov = NULL; char *nid_mov = NULL; char *uidgid_mov = NULL; @@ -1447,8 +2000,10 @@ static inline void changelog_remap_rec(struct changelog_rec *rec, char *rnm_mov; enum changelog_rec_extra_flags cref = CLFE_INVALID; - crf_wanted &= CLF_SUPPORTED; - cref_want &= CLFE_SUPPORTED; + crf_wanted = (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED); + cref_want = (enum changelog_rec_extra_flags) + (cref_want & CLFE_SUPPORTED); if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted) { if (!(rec->cr_flags & CLF_EXTRA_FLAGS) || @@ -1465,38 +2020,60 @@ static inline void changelog_remap_rec(struct changelog_rec *rec, /* Locations of extensions in the remapped record */ if (rec->cr_flags & CLF_EXTRA_FLAGS) { + xattr_mov = (char *)rec + + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~CLFE_XATTR)); omd_mov = (char *)rec + - changelog_rec_offset(crf_wanted & CLF_SUPPORTED, - cref_want & ~CLFE_OPEN); + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~(CLFE_OPEN | CLFE_XATTR))); nid_mov = (char *)rec + - changelog_rec_offset(crf_wanted & CLF_SUPPORTED, - cref_want & ~(CLFE_NID | - CLFE_OPEN)); + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & + ~(CLFE_NID | CLFE_OPEN | CLFE_XATTR))); uidgid_mov = (char *)rec + - changelog_rec_offset(crf_wanted & CLF_SUPPORTED, - cref_want & ~(CLFE_UIDGID | + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & CLF_SUPPORTED), + (enum changelog_rec_extra_flags) + (cref_want & ~(CLFE_UIDGID | CLFE_NID | - CLFE_OPEN)); - cref = changelog_rec_extra_flags(rec)->cr_extra_flags; + CLFE_OPEN | + CLFE_XATTR))); + cref = (enum changelog_rec_extra_flags) + changelog_rec_extra_flags(rec)->cr_extra_flags; } ef_mov = (char *)rec + - changelog_rec_offset(crf_wanted & ~CLF_EXTRA_FLAGS, - CLFE_INVALID); + changelog_rec_offset( + (enum changelog_rec_flags) + (crf_wanted & ~CLF_EXTRA_FLAGS), CLFE_INVALID); jid_mov = (char *)rec + - changelog_rec_offset(crf_wanted & - ~(CLF_EXTRA_FLAGS | CLF_JOBID), + changelog_rec_offset((enum changelog_rec_flags)(crf_wanted & + ~(CLF_EXTRA_FLAGS | CLF_JOBID)), CLFE_INVALID); rnm_mov = (char *)rec + - changelog_rec_offset(crf_wanted & + changelog_rec_offset((enum changelog_rec_flags)(crf_wanted & ~(CLF_EXTRA_FLAGS | CLF_JOBID | - CLF_RENAME), + CLF_RENAME)), CLFE_INVALID); /* Move the extension fields to the desired positions */ if ((crf_wanted & CLF_EXTRA_FLAGS) && (rec->cr_flags & CLF_EXTRA_FLAGS)) { + if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR)) + memmove(xattr_mov, changelog_rec_xattr(rec), + sizeof(struct changelog_ext_xattr)); + if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN)) memmove(omd_mov, changelog_rec_openmode(rec), sizeof(struct changelog_ext_openmode)); @@ -1522,6 +2099,10 @@ static inline void changelog_remap_rec(struct changelog_rec *rec, sizeof(struct changelog_ext_rename)); /* Clear newly added fields */ + if (xattr_mov && (cref_want & CLFE_XATTR) && + !(cref & CLFE_XATTR)) + memset(xattr_mov, 0, sizeof(struct changelog_ext_xattr)); + if (omd_mov && (cref_want & CLFE_OPEN) && !(cref & CLFE_OPEN)) memset(omd_mov, 0, sizeof(struct changelog_ext_openmode)); @@ -1606,14 +2187,14 @@ enum hsm_states { #define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK) /** - * HSM request progress state + * HSM request progress state */ enum hsm_progress_states { + HPS_NONE = 0, HPS_WAITING = 1, HPS_RUNNING = 2, HPS_DONE = 3, }; -#define HPS_NONE 0 static inline const char *hsm_progress_state2name(enum hsm_progress_states s) { @@ -1730,11 +2311,12 @@ static inline void *hur_data(struct hsm_user_request *hur) /** * Compute the current length of the provided hsm_user_request. This returns -1 - * instead of an errno because ssize_t is defined to be only [ -1, SSIZE_MAX ] + * instead of an errno because __kernel_ssize_t is defined to be only + * [ -1, SSIZE_MAX ] * * return -1 on bounds check error. */ -static inline ssize_t hur_len(struct hsm_user_request *hur) +static inline __kernel_size_t hur_len(struct hsm_user_request *hur) { __u64 size; @@ -1743,7 +2325,7 @@ static inline ssize_t hur_len(struct hsm_user_request *hur) (__u64)hur->hur_request.hr_itemcount * sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len; - if (size != (ssize_t)size) + if ((__kernel_ssize_t)size < 0) return -1; return size; @@ -1799,7 +2381,7 @@ struct hsm_action_item { * \retval buffer */ static inline char *hai_dump_data_field(const struct hsm_action_item *hai, - char *buffer, size_t len) + char *buffer, __kernel_size_t len) { int i; int data_len; @@ -1824,7 +2406,7 @@ static inline char *hai_dump_data_field(const struct hsm_action_item *hai, struct hsm_action_list { __u32 hal_version; __u32 hal_count; /* number of hai's to follow */ - __u64 hal_compound_id; /* returned by coordinator */ + __u64 hal_compound_id; /* returned by coordinator, ignored */ __u64 hal_flags; __u32 hal_archive_id; /* which archive backend */ __u32 padding1; @@ -1833,40 +2415,33 @@ struct hsm_action_list { boundaries. See hai_zero */ } __attribute__((packed)); -#ifndef HAVE_CFS_SIZE_ROUND -static inline int cfs_size_round (int val) -{ - return (val + 7) & (~0x7); -} -#define HAVE_CFS_SIZE_ROUND -#endif - /* Return pointer to first hai in action list */ static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal) { - return (struct hsm_action_item *)(hal->hal_fsname + - cfs_size_round(strlen(hal-> \ - hal_fsname) - + 1)); + __kernel_size_t offset = __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8); + + return (struct hsm_action_item *)(hal->hal_fsname + offset); } + /* Return pointer to next hai */ static inline struct hsm_action_item * hai_next(struct hsm_action_item *hai) { - return (struct hsm_action_item *)((char *)hai + - cfs_size_round(hai->hai_len)); + __kernel_size_t offset = __ALIGN_KERNEL(hai->hai_len, 8); + + return (struct hsm_action_item *)((char *)hai + offset); } /* Return size of an hsm_action_list */ -static inline size_t hal_size(struct hsm_action_list *hal) +static inline __kernel_size_t hal_size(struct hsm_action_list *hal) { __u32 i; - size_t sz; + __kernel_size_t sz; struct hsm_action_item *hai; - sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1); + sz = sizeof(*hal) + __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8); hai = hai_first(hal); for (i = 0; i < hal->hal_count ; i++, hai = hai_next(hai)) - sz += cfs_size_round(hai->hai_len); + sz += __ALIGN_KERNEL(hai->hai_len, 8); return sz; } @@ -1907,31 +2482,6 @@ struct hsm_copy { struct hsm_action_item hc_hai; }; -/* JSON objects */ -enum llapi_json_types { - LLAPI_JSON_INTEGER = 1, - LLAPI_JSON_BIGNUM, - LLAPI_JSON_REAL, - LLAPI_JSON_STRING -}; - -struct llapi_json_item { - char *lji_key; - __u32 lji_type; - union { - int lji_integer; - __u64 lji_u64; - double lji_real; - char *lji_string; - }; - struct llapi_json_item *lji_next; -}; - -struct llapi_json_item_list { - int ljil_item_count; - struct llapi_json_item *ljil_items; -}; - enum lu_ladvise_type { LU_LADVISE_INVALID = 0, LU_LADVISE_WILLREAD = 1, @@ -1998,7 +2548,6 @@ enum sk_crypt_alg { SK_CRYPT_INVALID = -1, SK_CRYPT_EMPTY = 0, SK_CRYPT_AES256_CTR = 1, - SK_CRYPT_MAX = 2, }; enum sk_hmac_alg { @@ -2006,17 +2555,16 @@ enum sk_hmac_alg { SK_HMAC_EMPTY = 0, SK_HMAC_SHA256 = 1, SK_HMAC_SHA512 = 2, - SK_HMAC_MAX = 3, }; struct sk_crypt_type { - char *sct_name; - size_t sct_bytes; + const char *sct_name; + int sct_type; }; struct sk_hmac_type { - char *sht_name; - size_t sht_bytes; + const char *sht_name; + int sht_type; }; enum lock_mode_user { @@ -2036,6 +2584,162 @@ enum lockahead_results { LLA_RESULT_SAME, }; +enum lu_heat_flag_bit { + LU_HEAT_FLAG_BIT_INVALID = 0, + LU_HEAT_FLAG_BIT_OFF, + LU_HEAT_FLAG_BIT_CLEAR, +}; + +enum lu_heat_flag { + LU_HEAT_FLAG_OFF = 1ULL << LU_HEAT_FLAG_BIT_OFF, + LU_HEAT_FLAG_CLEAR = 1ULL << LU_HEAT_FLAG_BIT_CLEAR, +}; + +enum obd_heat_type { + OBD_HEAT_READSAMPLE = 0, + OBD_HEAT_WRITESAMPLE = 1, + OBD_HEAT_READBYTE = 2, + OBD_HEAT_WRITEBYTE = 3, + OBD_HEAT_COUNT +}; + +#define LU_HEAT_NAMES { \ + [OBD_HEAT_READSAMPLE] = "readsample", \ + [OBD_HEAT_WRITESAMPLE] = "writesample", \ + [OBD_HEAT_READBYTE] = "readbyte", \ + [OBD_HEAT_WRITEBYTE] = "writebyte", \ +} + +struct lu_heat { + __u32 lh_count; + __u32 lh_flags; + __u64 lh_heat[0]; +}; + +enum lu_pcc_type { + LU_PCC_NONE = 0, + LU_PCC_READWRITE, + LU_PCC_MAX +}; + +static inline const char *pcc_type2string(enum lu_pcc_type type) +{ + switch (type) { + case LU_PCC_NONE: + return "none"; + case LU_PCC_READWRITE: + return "readwrite"; + default: + return "fault"; + } +} + +struct lu_pcc_attach { + __u32 pcca_type; /* PCC type */ + __u32 pcca_id; /* archive ID for readwrite, group ID for readonly */ +}; + +enum lu_pcc_detach_opts { + PCC_DETACH_OPT_NONE = 0, /* Detach only, keep the PCC copy */ + PCC_DETACH_OPT_UNCACHE, /* Remove the cached file after detach */ +}; + +struct lu_pcc_detach_fid { + /* fid of the file to detach */ + struct lu_fid pccd_fid; + __u32 pccd_opt; +}; + +struct lu_pcc_detach { + __u32 pccd_opt; +}; + +enum lu_pcc_state_flags { + PCC_STATE_FL_NONE = 0x0, + /* The inode attr is cached locally */ + PCC_STATE_FL_ATTR_VALID = 0x01, + /* The file is being attached into PCC */ + PCC_STATE_FL_ATTACHING = 0x02, +}; + +struct lu_pcc_state { + __u32 pccs_type; /* enum lu_pcc_type */ + __u32 pccs_open_count; + __u32 pccs_flags; /* enum lu_pcc_state_flags */ + __u32 pccs_padding; + char pccs_path[PATH_MAX]; +}; + +enum lu_project_type { + LU_PROJECT_NONE = 0, + LU_PROJECT_SET, + LU_PROJECT_GET, + LU_PROJECT_MAX +}; + +struct lu_project { + __u32 project_type; /* enum lu_project_type */ + __u32 project_id; + __u32 project_xflags; + __u32 project_reserved; + char project_name[NAME_MAX + 1]; +}; + +struct fid_array { + __u32 fa_nr; + /* make header's size equal lu_fid */ + __u32 fa_padding0; + __u64 fa_padding1; + struct lu_fid fa_fids[0]; +}; +#define OBD_MAX_FIDS_IN_ARRAY 4096 + +/* more types could be defined upon need for more complex + * format to be used in foreign symlink LOV/LMV EAs, like + * one to describe a delimiter string and occurence number + * of delimited sub-string, ... + */ +enum ll_foreign_symlink_upcall_item_type { + EOB_TYPE = 1, + STRING_TYPE = 2, + POSLEN_TYPE = 3, +}; + +/* may need to be modified to allow for more format items to be defined, and + * like for ll_foreign_symlink_upcall_item_type enum + */ +struct ll_foreign_symlink_upcall_item { + __u32 type; + union { + struct { + __u32 pos; + __u32 len; + }; + struct { + size_t size; + union { + /* internal storage of constant string */ + char *string; + /* upcall stores constant string in a raw */ + char bytestring[0]; + }; + }; + }; +}; + +#define POSLEN_ITEM_SZ (offsetof(struct ll_foreign_symlink_upcall_item, len) + \ + sizeof(((struct ll_foreign_symlink_upcall_item *)0)->len)) +#define STRING_ITEM_SZ(sz) ( \ + offsetof(struct ll_foreign_symlink_upcall_item, bytestring) + \ + (sz + sizeof(__u32) - 1) / sizeof(__u32) * sizeof(__u32)) + +/* presently limited to not cause max stack frame size to be reached + * because of temporary automatic array of + * "struct ll_foreign_symlink_upcall_item" presently used in + * foreign_symlink_upcall_info_store() + */ +#define MAX_NB_UPCALL_ITEMS 32 + #if defined(__cplusplus) } #endif