# include <linux/version.h>
# include <uapi/linux/lustre/lustre_fiemap.h>
#else /* !__KERNEL__ */
-# define NEED_QUOTA_DEFS
# include <limits.h>
# include <stdbool.h>
# include <stdio.h> /* snprintf() */
+# include <stdint.h>
# include <string.h>
-# include <sys/quota.h>
+# define NEED_QUOTA_DEFS
+/* # include <sys/quota.h> - this causes complaints about caddr_t */
# include <sys/stat.h>
# include <linux/lustre/lustre_fiemap.h>
#endif /* __KERNEL__ */
"project", /* PRJQUOTA */ \
"undefined", \
};
+#ifndef USRQUOTA
+#define USRQUOTA 0
+#endif
+#ifndef GRPQUOTA
+#define GRPQUOTA 1
+#endif
+#ifndef PRJQUOTA
#define PRJQUOTA 2
+#endif
/*
* We need to always use 64bit version because the structure
OS_STATE_ENOSPC = 0x00000020, /**< not enough free space */
OS_STATE_ENOINO = 0x00000040, /**< not enough inodes */
OS_STATE_SUM = 0x00000100, /**< aggregated for all tagrets */
+ OS_STATE_NONROT = 0x00000200, /**< non-rotational device */
};
/** filesystem statistics/attributes for target device */
__u32 ol_comp_id;
} __attribute__((packed));
-/* keep this one for compatibility */
-struct filter_fid_old {
- struct lu_fid ff_parent;
- __u64 ff_objid;
- __u64 ff_seq;
+/* The filter_fid structure has changed several times over its lifetime.
+ * For a long time "trusted.fid" held the MDT inode parent FID/IGIF and
+ * stripe_index and the "self FID" (objid/seq) to be able to recover the
+ * OST objects in case of corruption. With the move to 2.4 and OSD-API for
+ * the OST, the "trusted.lma" xattr was added to the OST objects to store
+ * the "self FID" to be consistent with the MDT on-disk format, and the
+ * filter_fid only stored the MDT inode parent FID and stripe index.
+ *
+ * In 2.10, the addition of PFL composite layouts required more information
+ * to be stored into the filter_fid in order to be able to identify which
+ * component the OST object belonged. As well, the stripe size may vary
+ * between components, so it was no longer safe to assume the stripe size
+ * or stripe_count of a file. This is also more robust for plain layouts.
+ *
+ * For ldiskfs OSTs that were formatted with 256-byte inodes, there is not
+ * enough space to store both the filter_fid and LMA in the inode, so they
+ * are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid
+ * an extra seek for every OST object access.
+ *
+ * In 2.11, FLR mirror layouts also need to store the layout version and
+ * range so that writes to old versions of the layout are not allowed.
+ * That ensures that mirrored objects are not modified by evicted clients,
+ * and ensures that the components are correctly marked stale on the MDT.
+ */
+struct filter_fid_18_23 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+ __u64 ff_objid;
+ __u64 ff_seq;
+};
+
+struct filter_fid_24_29 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+};
+
+struct filter_fid_210 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+ struct ost_layout ff_layout;
};
struct filter_fid {
- struct lu_fid ff_parent;
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
struct ost_layout ff_layout;
__u32 ff_layout_version;
__u32 ff_range; /* range of layout version that
LL_LEASE_RESYNC_DONE = 0x2,
LL_LEASE_LAYOUT_MERGE = 0x4,
LL_LEASE_LAYOUT_SPLIT = 0x8,
+ LL_LEASE_PCC_ATTACH = 0x10,
};
#define IOC_IDS_MAX 4096
__u32 lil_ids[0];
};
+struct ll_ioc_lease_id {
+ __u32 lil_mode;
+ __u32 lil_flags;
+ __u32 lil_count;
+ __u16 lil_mirror_id;
+ __u16 lil_padding1;
+ __u64 lil_padding2;
+ __u32 lil_ids[0];
+};
+
/*
* The ioctl naming rules:
* LL_* - works on the currently opened filehandle instead of parent dir
#define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid)
#define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent)
#define LL_IOC_LADVISE _IOR('f', 250, struct llapi_lu_ladvise)
+#define LL_IOC_HEAT_GET _IOWR('f', 251, struct lu_heat)
+#define LL_IOC_HEAT_SET _IOW('f', 251, __u64)
+#define LL_IOC_PCC_DETACH _IOW('f', 252, struct lu_pcc_detach)
+#define LL_IOC_PCC_DETACH_BY_FID _IOW('f', 252, struct lu_pcc_detach_fid)
+#define LL_IOC_PCC_STATE _IOR('f', 252, struct lu_pcc_state)
#ifndef FS_IOC_FSGETXATTR
/*
/* To be compatible with old statically linked binary we keep the check for
* the older 0100000000 flag. This is already removed upstream. LU-812. */
#define O_LOV_DELAY_CREATE_1_8 0100000000 /* FMODE_NONOTIFY masked in 2.6.36 */
+#ifndef FASYNC
+#define FASYNC 00020000 /* fcntl, for BSD compatibility */
+#endif
#define O_LOV_DELAY_CREATE_MASK (O_NOCTTY | FASYNC)
#define O_LOV_DELAY_CREATE (O_LOV_DELAY_CREATE_1_8 | \
O_LOV_DELAY_CREATE_MASK)
#define LL_FILE_READAHEA 0x00000004
#define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */
+#define LL_FILE_FLOCK_WARNING 0x00000020 /* warned about disabled flock */
#define LOV_USER_MAGIC_V1 0x0BD10BD0
#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */
#define LOV_USER_MAGIC_COMP_V1 0x0BD60BD0
+#define LOV_USER_MAGIC_FOREIGN 0x0BD70BD0
#define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */
#define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/
#define LMV_USER_MAGIC_SPECIFIC 0x0CD40CD0
-#define LOV_PATTERN_NONE 0x000
-#define LOV_PATTERN_RAID0 0x001
-#define LOV_PATTERN_RAID1 0x002
-#define LOV_PATTERN_MDT 0x100
-#define LOV_PATTERN_CMOBD 0x200
+#define LOV_PATTERN_NONE 0x000
+#define LOV_PATTERN_RAID0 0x001
+#define LOV_PATTERN_RAID1 0x002
+#define LOV_PATTERN_MDT 0x100
+#define LOV_PATTERN_OVERSTRIPING 0x200
#define LOV_PATTERN_F_MASK 0xffff0000
#define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */
#define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */
#define LOV_PATTERN_DEFAULT 0xffffffff
+#define LOV_OFFSET_DEFAULT ((__u16)-1)
+
static inline bool lov_pattern_supported(__u32 pattern)
{
return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
+ (pattern & ~LOV_PATTERN_F_RELEASED) ==
+ (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) ||
(pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_MDT;
}
+/* RELEASED and MDT patterns are not valid in many places, so rather than
+ * having many extra checks on lov_pattern_supported, we have this separate
+ * check for non-released, non-DOM components
+ */
+static inline bool lov_pattern_supported_normal_comp(__u32 pattern)
+{
+ return pattern == LOV_PATTERN_RAID0 ||
+ pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING);
+
+}
+
#define LOV_MAXPOOLNAME 15
#define LOV_POOLNAMEF "%.15s"
* allocation that is sufficient for the current generation of systems.
*
* (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) */
-#define LOV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
+#define LOV_MAX_STRIPE_COUNT 2000 /* ~((12 * 4096 - 256) / 24) */
#define LOV_ALL_STRIPES 0xffff /* only valid for directories */
#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
} __attribute__((packed));
+struct lov_foreign_md {
+ __u32 lfm_magic; /* magic number = LOV_MAGIC_FOREIGN */
+ __u32 lfm_length; /* length of lfm_value */
+ __u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */
+ __u32 lfm_flags; /* flags, type specific */
+ char lfm_value[];
+};
+
+#define foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \
+ offsetof(struct lov_foreign_md, lfm_value))
+
+#define foreign_size_le(lfm) \
+ (le32_to_cpu(((struct lov_foreign_md *)lfm)->lfm_length) + \
+ offsetof(struct lov_foreign_md, lfm_value))
+
struct lu_extent {
__u64 e_start;
__u64 e_end;
/* The flags are for mirrors */
#define LCME_MIRROR_FLAGS (LCME_FL_NOSYNC)
+/* These flags have meaning when set in a default layout and will be inherited
+ * from the default/template layout set on a directory.
+ */
+#define LCME_TEMPLATE_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC)
+
/* the highest bit in obdo::o_layout_version is used to mark if the file is
* being resynced. */
#define LU_LAYOUT_RESYNC LCME_FL_NEG
#define SEQ_ID_MASK SEQ_ID_MAX
/* bit 30:16 of lcme_id is used to store mirror id */
#define MIRROR_ID_MASK 0x7FFF0000
+#define MIRROR_ID_NEG 0x8000
#define MIRROR_ID_SHIFT 16
static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid)
LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */
LMV_HASH_TYPE_ALL_CHARS = 1,
LMV_HASH_TYPE_FNV_1A_64 = 2,
+ LMV_HASH_TYPE_SPACE = 3, /*
+ * distribute subdirs among all MDTs
+ * with balanced space usage.
+ */
LMV_HASH_TYPE_MAX,
};
#define LMV_HASH_NAME_ALL_CHARS "all_char"
#define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64"
+#define LMV_HASH_NAME_SPACE "space"
extern char *mdt_hash_name[LMV_HASH_TYPE_MAX];
+struct lustre_foreign_type {
+ uint32_t lft_type;
+ const char *lft_name;
+};
+
+/**
+ * LOV/LMV foreign types
+ **/
+enum lustre_foreign_types {
+ LU_FOREIGN_TYPE_NONE = 0,
+ LU_FOREIGN_TYPE_DAOS = 0xda05,
+ /* must be the max/last one */
+ LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff,
+};
+
+extern struct lustre_foreign_type lu_foreign_types[];
+
/* Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
* (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data) */
#define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
__u32 lum_stripe_count; /* dirstripe count */
__u32 lum_stripe_offset; /* MDT idx for default dirstripe */
__u32 lum_hash_type; /* Dir stripe policy */
- __u32 lum_type; /* LMV type: default or normal */
+ __u32 lum_type; /* LMV type: default */
__u32 lum_padding1;
__u32 lum_padding2;
__u32 lum_padding3;
struct lmv_user_mds_data lum_objects[0];
} __attribute__((packed));
+static inline __u32 lmv_foreign_to_md_stripes(__u32 size)
+{
+ if (size <= sizeof(struct lmv_user_md))
+ return 0;
+
+ size -= sizeof(struct lmv_user_md);
+ return (size + sizeof(struct lmv_user_mds_data) - 1) /
+ sizeof(struct lmv_user_mds_data);
+}
+
+/*
+ * NB, historically default layout didn't set type, but use XATTR name to differ
+ * from normal layout, for backward compatibility, define LMV_TYPE_DEFAULT 0x0,
+ * and still use the same method.
+ */
+enum lmv_type {
+ LMV_TYPE_DEFAULT = 0x0000,
+};
+
static inline int lmv_user_md_size(int stripes, int lmm_magic)
{
int size = sizeof(struct lmv_user_md);
}
#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
+#define SEPOL_DOWNCALL_MAGIC 0x8b8bb842
/* permission */
#define N_PERMS_MAX 64
__u32 idd_groups[0];
};
+struct sepol_downcall_data {
+ __u32 sdd_magic;
+ time_t sdd_sepol_mtime;
+ __u16 sdd_sepol_len;
+ char sdd_sepol[0];
+};
+
#ifdef NEED_QUOTA_DEFS
#ifndef QIF_BLIMITS
#define QIF_BLIMITS 1
* Only the first 12 bits are currently saved.
*/
enum la_valid {
- LA_ATIME = 1 << 0,
- LA_MTIME = 1 << 1,
- LA_CTIME = 1 << 2,
- LA_SIZE = 1 << 3,
- LA_MODE = 1 << 4,
- LA_UID = 1 << 5,
- LA_GID = 1 << 6,
- LA_BLOCKS = 1 << 7,
- LA_TYPE = 1 << 8,
- LA_FLAGS = 1 << 9,
- LA_NLINK = 1 << 10,
- LA_RDEV = 1 << 11,
- LA_BLKSIZE = 1 << 12,
- LA_KILL_SUID = 1 << 13,
- LA_KILL_SGID = 1 << 14,
- LA_PROJID = 1 << 15,
- LA_LAYOUT_VERSION = 1 << 16,
- LA_LSIZE = 1 << 17,
- LA_LBLOCKS = 1 << 18,
+ LA_ATIME = 1 << 0, /* 0x00001 */
+ LA_MTIME = 1 << 1, /* 0x00002 */
+ LA_CTIME = 1 << 2, /* 0x00004 */
+ LA_SIZE = 1 << 3, /* 0x00008 */
+ LA_MODE = 1 << 4, /* 0x00010 */
+ LA_UID = 1 << 5, /* 0x00020 */
+ LA_GID = 1 << 6, /* 0x00040 */
+ LA_BLOCKS = 1 << 7, /* 0x00080 */
+ LA_TYPE = 1 << 8, /* 0x00100 */
+ LA_FLAGS = 1 << 9, /* 0x00200 */
+ LA_NLINK = 1 << 10, /* 0x00400 */
+ LA_RDEV = 1 << 11, /* 0x00800 */
+ LA_BLKSIZE = 1 << 12, /* 0x01000 */
+ LA_KILL_SUID = 1 << 13, /* 0x02000 */
+ LA_KILL_SGID = 1 << 14, /* 0x04000 */
+ LA_PROJID = 1 << 15, /* 0x08000 */
+ LA_LAYOUT_VERSION = 1 << 16, /* 0x10000 */
+ LA_LSIZE = 1 << 17, /* 0x20000 */
+ LA_LBLOCKS = 1 << 18, /* 0x40000 */
/**
* Attributes must be transmitted to OST objects
*/
/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */
#define MDS_OPEN_CREATED 00000010
-#define MDS_OPEN_CROSS 00000020
+/* MDS_OPEN_CROSS 00000020 obsolete in 2.12, internal use only */
#define MDS_OPEN_CREAT 00000100
#define MDS_OPEN_EXCL 00000200
#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */
#define MDS_OPEN_RESYNC 04000000000000ULL /* FLR: file resync */
+#define MDS_OPEN_PCC 010000000000000ULL /* PCC: auto RW-PCC cache attach
+ * for newly created file */
/* lustre internal open flags, which should not be set from user space */
#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \
MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \
MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \
- MDS_OPEN_RELEASE | MDS_OPEN_RESYNC)
+ MDS_OPEN_RELEASE | MDS_OPEN_RESYNC | \
+ MDS_OPEN_PCC)
/********* Changelogs **********/
/** Changelog record types */
enum changelog_rec_type {
+ CL_NONE = -1,
CL_MARK = 0,
CL_CREATE = 1, /* namespace */
CL_MKDIR = 2, /* namespace */
return NULL;
}
-/* per-record flags */
+/* 12 bits of per-record data can be stored in the bottom of the flags */
#define CLF_FLAGSHIFT 12
-#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1)
-#define CLF_VERMASK (~CLF_FLAGMASK)
enum changelog_rec_flags {
CLF_VERSION = 0x1000,
CLF_RENAME = 0x2000,
CLF_JOBID = 0x4000,
CLF_EXTRA_FLAGS = 0x8000,
- CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS
+ CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID |
+ CLF_EXTRA_FLAGS,
+ CLF_FLAGMASK = (1U << CLF_FLAGSHIFT) - 1,
+ CLF_VERMASK = ~CLF_FLAGMASK,
};
CLF_HSM_EVENT_L);
}
-static inline void hsm_set_cl_event(int *flags, enum hsm_event he)
+static inline void hsm_set_cl_event(enum changelog_rec_flags *clf_flags,
+ enum hsm_event he)
{
- *flags |= (he << CLF_HSM_EVENT_L);
+ *clf_flags |= (he << CLF_HSM_EVENT_L);
}
-static inline __u16 hsm_get_cl_flags(int flags)
+static inline __u16 hsm_get_cl_flags(enum changelog_rec_flags clf_flags)
{
- return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
+ return CLF_GET_BITS(clf_flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
}
-static inline void hsm_set_cl_flags(int *flags, int bits)
+static inline void hsm_set_cl_flags(enum changelog_rec_flags *clf_flags,
+ unsigned int bits)
{
- *flags |= (bits << CLF_HSM_FLAG_L);
+ *clf_flags |= (bits << CLF_HSM_FLAG_L);
}
-static inline int hsm_get_cl_error(int flags)
+static inline int hsm_get_cl_error(enum changelog_rec_flags clf_flags)
{
- return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
+ return CLF_GET_BITS(clf_flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
}
-static inline void hsm_set_cl_error(int *flags, int error)
+static inline void hsm_set_cl_error(enum changelog_rec_flags *clf_flags,
+ unsigned int error)
{
- *flags |= (error << CLF_HSM_ERR_L);
+ *clf_flags |= (error << CLF_HSM_ERR_L);
}
enum changelog_rec_extra_flags {
__u32 padding;
};
-/* Changelog extra extension to include OPEN mode. */
+/* Changelog extra extension to include low 32 bits of MDS_OPEN_* flags. */
struct changelog_ext_openmode {
__u32 cr_openflags;
};
SK_CRYPT_INVALID = -1,
SK_CRYPT_EMPTY = 0,
SK_CRYPT_AES256_CTR = 1,
- SK_CRYPT_MAX = 2,
};
enum sk_hmac_alg {
SK_HMAC_EMPTY = 0,
SK_HMAC_SHA256 = 1,
SK_HMAC_SHA512 = 2,
- SK_HMAC_MAX = 3,
};
struct sk_crypt_type {
- char *sct_name;
- size_t sct_bytes;
+ const char *sct_name;
+ int sct_type;
};
struct sk_hmac_type {
- char *sht_name;
- size_t sht_bytes;
+ const char *sht_name;
+ int sht_type;
};
enum lock_mode_user {
LLA_RESULT_SAME,
};
+enum lu_heat_flag_bit {
+ LU_HEAT_FLAG_BIT_INVALID = 0,
+ LU_HEAT_FLAG_BIT_OFF,
+ LU_HEAT_FLAG_BIT_CLEAR,
+};
+
+enum lu_heat_flag {
+ LU_HEAT_FLAG_OFF = 1ULL << LU_HEAT_FLAG_BIT_OFF,
+ LU_HEAT_FLAG_CLEAR = 1ULL << LU_HEAT_FLAG_BIT_CLEAR,
+};
+
+enum obd_heat_type {
+ OBD_HEAT_READSAMPLE = 0,
+ OBD_HEAT_WRITESAMPLE = 1,
+ OBD_HEAT_READBYTE = 2,
+ OBD_HEAT_WRITEBYTE = 3,
+ OBD_HEAT_COUNT
+};
+
+#define LU_HEAT_NAMES { \
+ [OBD_HEAT_READSAMPLE] = "readsample", \
+ [OBD_HEAT_WRITESAMPLE] = "writesample", \
+ [OBD_HEAT_READBYTE] = "readbyte", \
+ [OBD_HEAT_WRITEBYTE] = "writebyte", \
+}
+
+struct lu_heat {
+ __u32 lh_count;
+ __u32 lh_flags;
+ __u64 lh_heat[0];
+};
+
+enum lu_pcc_type {
+ LU_PCC_NONE = 0,
+ LU_PCC_READWRITE,
+ LU_PCC_MAX
+};
+
+static inline const char *pcc_type2string(enum lu_pcc_type type)
+{
+ switch (type) {
+ case LU_PCC_NONE:
+ return "none";
+ case LU_PCC_READWRITE:
+ return "readwrite";
+ default:
+ return "fault";
+ }
+}
+
+struct lu_pcc_attach {
+ __u32 pcca_type; /* PCC type */
+ __u32 pcca_id; /* archive ID for readwrite, group ID for readonly */
+};
+
+enum lu_pcc_detach_opts {
+ PCC_DETACH_OPT_NONE = 0, /* Detach only, keep the PCC copy */
+ PCC_DETACH_OPT_UNCACHE, /* Remove the cached file after detach */
+};
+
+struct lu_pcc_detach_fid {
+ /* fid of the file to detach */
+ struct lu_fid pccd_fid;
+ __u32 pccd_opt;
+};
+
+struct lu_pcc_detach {
+ __u32 pccd_opt;
+};
+
+enum lu_pcc_state_flags {
+ PCC_STATE_FL_NONE = 0x0,
+ /* The inode attr is cached locally */
+ PCC_STATE_FL_ATTR_VALID = 0x01,
+ /* The file is being attached into PCC */
+ PCC_STATE_FL_ATTACHING = 0x02,
+ /* Allow to auto attach at open */
+ PCC_STATE_FL_OPEN_ATTACH = 0x04,
+};
+
+struct lu_pcc_state {
+ __u32 pccs_type; /* enum lu_pcc_type */
+ __u32 pccs_open_count;
+ __u32 pccs_flags; /* enum lu_pcc_state_flags */
+ __u32 pccs_padding;
+ char pccs_path[PATH_MAX];
+};
+
#if defined(__cplusplus)
}
#endif