* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2016, Intel Corporation.
+ * Copyright (c) 2010, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* @{
*/
+#include <linux/kernel.h>
#include <linux/types.h>
#ifdef __KERNEL__
# include <linux/version.h>
# include <uapi/linux/lustre/lustre_fiemap.h>
#else /* !__KERNEL__ */
-# define NEED_QUOTA_DEFS
# include <limits.h>
# include <stdbool.h>
# include <stdio.h> /* snprintf() */
# include <string.h>
-# include <sys/quota.h>
+# define NEED_QUOTA_DEFS
+/* # include <sys/quota.h> - this causes complaints about caddr_t */
# include <sys/stat.h>
# include <linux/lustre/lustre_fiemap.h>
#endif /* __KERNEL__ */
+/* Handle older distros */
+#ifndef __ALIGN_KERNEL
+# define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+# define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
+#endif
+
#if defined(__cplusplus)
extern "C" {
#endif
"project", /* PRJQUOTA */ \
"undefined", \
};
+#ifndef USRQUOTA
+#define USRQUOTA 0
+#endif
+#ifndef GRPQUOTA
+#define GRPQUOTA 1
+#endif
+#ifndef PRJQUOTA
#define PRJQUOTA 2
+#endif
-#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \
- defined(__craynv) || defined(__mips64__) || defined(__powerpc64__) || \
- defined(__aarch64__)
-typedef struct stat lstat_t;
-# define lstat_f lstat
-# define fstat_f fstat
-# define fstatat_f fstatat
-# define HAVE_LOV_USER_MDS_DATA
-#elif defined(__USE_LARGEFILE64) || defined(__KERNEL__)
-typedef struct stat64 lstat_t;
-# define lstat_f lstat64
-# define fstat_f fstat64
-# define fstatat_f fstatat64
-# define HAVE_LOV_USER_MDS_DATA
+/*
+ * We need to always use 64bit version because the structure
+ * is shared across entire cluster where 32bit and 64bit machines
+ * are co-existing.
+ */
+#if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
+typedef struct stat64 lstat_t;
+#define lstat_f lstat64
+#define fstat_f fstat64
+#define fstatat_f fstatat64
+#else
+typedef struct stat lstat_t;
+#define lstat_f lstat
+#define fstat_f fstat
+#define fstatat_f fstatat
#endif
+#define HAVE_LOV_USER_MDS_DATA
+
#define LUSTRE_EOF 0xffffffffffffffffULL
/* for statfs() */
#define LL_SUPER_MAGIC 0x0BD00BD0
-#ifndef FSFILT_IOC_GETFLAGS
-#define FSFILT_IOC_GETFLAGS _IOR('f', 1, long)
-#define FSFILT_IOC_SETFLAGS _IOW('f', 2, long)
-#define FSFILT_IOC_GETVERSION _IOR('f', 3, long)
-#define FSFILT_IOC_SETVERSION _IOW('f', 4, long)
-#define FSFILT_IOC_GETVERSION_OLD _IOR('v', 1, long)
-#define FSFILT_IOC_SETVERSION_OLD _IOW('v', 2, long)
-#endif
+#define FSFILT_IOC_GETVERSION _IOR('f', 3, long)
/* FIEMAP flags supported by Lustre */
#define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
enum obd_statfs_state {
OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */
OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */
+ OS_STATE_NOPRECREATE = 0x00000004, /**< no object precreation */
OS_STATE_ENOSPC = 0x00000020, /**< not enough free space */
OS_STATE_ENOINO = 0x00000040, /**< not enough inodes */
+ OS_STATE_SUM = 0x00000100, /**< aggregated for all tagrets */
};
/** filesystem statistics/attributes for target device */
__u32 os_fprecreated; /* objs available now to the caller */
/* used in QoS code to find preferred
* OSTs */
- __u32 os_spare2; /* Unused padding fields. Remember */
- __u32 os_spare3; /* to fix lustre_swab_obd_statfs() */
- __u32 os_spare4;
- __u32 os_spare5;
- __u32 os_spare6;
- __u32 os_spare7;
- __u32 os_spare8;
- __u32 os_spare9;
+ __u32 os_granted; /* space granted for MDS */
+ __u32 os_spare3; /* Unused padding fields. Remember */
+ __u32 os_spare4; /* to fix lustre_swab_obd_statfs() */
+ __u32 os_spare5;
+ __u32 os_spare6;
+ __u32 os_spare7;
+ __u32 os_spare8;
+ __u32 os_spare9;
};
/**
__u32 ol_comp_id;
} __attribute__((packed));
-/* keep this one for compatibility */
-struct filter_fid_old {
- struct lu_fid ff_parent;
- __u64 ff_objid;
- __u64 ff_seq;
+/* The filter_fid structure has changed several times over its lifetime.
+ * For a long time "trusted.fid" held the MDT inode parent FID/IGIF and
+ * stripe_index and the "self FID" (objid/seq) to be able to recover the
+ * OST objects in case of corruption. With the move to 2.4 and OSD-API for
+ * the OST, the "trusted.lma" xattr was added to the OST objects to store
+ * the "self FID" to be consistent with the MDT on-disk format, and the
+ * filter_fid only stored the MDT inode parent FID and stripe index.
+ *
+ * In 2.10, the addition of PFL composite layouts required more information
+ * to be stored into the filter_fid in order to be able to identify which
+ * component the OST object belonged. As well, the stripe size may vary
+ * between components, so it was no longer safe to assume the stripe size
+ * or stripe_count of a file. This is also more robust for plain layouts.
+ *
+ * For ldiskfs OSTs that were formatted with 256-byte inodes, there is not
+ * enough space to store both the filter_fid and LMA in the inode, so they
+ * are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid
+ * an extra seek for every OST object access.
+ *
+ * In 2.11, FLR mirror layouts also need to store the layout version and
+ * range so that writes to old versions of the layout are not allowed.
+ * That ensures that mirrored objects are not modified by evicted clients,
+ * and ensures that the components are correctly marked stale on the MDT.
+ */
+struct filter_fid_18_23 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+ __u64 ff_objid;
+ __u64 ff_seq;
+};
+
+struct filter_fid_24_29 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+};
+
+struct filter_fid_210 {
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
+ struct ost_layout ff_layout;
};
struct filter_fid {
- struct lu_fid ff_parent;
+ struct lu_fid ff_parent; /* stripe_idx in f_ver */
struct ost_layout ff_layout;
__u32 ff_layout_version;
__u32 ff_range; /* range of layout version that
* under /O/<seq>/d<x>. */
LMAC_STRIPE_INFO = 0x00000010, /* stripe info in the LMA EA. */
LMAC_COMP_INFO = 0x00000020, /* Component info in the LMA EA. */
+ LMAC_IDX_BACKUP = 0x00000040, /* Has index backup. */
};
/**
*/
#define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
-enum {
- LSOM_FL_VALID = 1 << 0,
+enum lustre_som_flags {
+ /* Unknow or no SoM data, must get size from OSTs. */
+ SOM_FL_UNKNOWN = 0x0000,
+ /* Known strictly correct, FLR or DoM file (SoM guaranteed). */
+ SOM_FL_STRICT = 0x0001,
+ /* Known stale - was right at some point in the past, but it is
+ * known (or likely) to be incorrect now (e.g. opened for write). */
+ SOM_FL_STALE = 0x0002,
+ /* Approximate, may never have been strictly correct,
+ * need to sync SOM data to achieve eventual consistency. */
+ SOM_FL_LAZY = 0x0004,
};
struct lustre_som_attrs {
enum ll_lease_flags {
LL_LEASE_RESYNC = 0x1,
LL_LEASE_RESYNC_DONE = 0x2,
+ LL_LEASE_LAYOUT_MERGE = 0x4,
+ LL_LEASE_LAYOUT_SPLIT = 0x8,
};
#define IOC_IDS_MAX 4096
__u32 lil_ids[0];
};
+struct ll_ioc_lease_id {
+ __u32 lil_mode;
+ __u32 lil_flags;
+ __u32 lil_count;
+ __u16 lil_mirror_id;
+ __u16 lil_padding1;
+ __u64 lil_padding2;
+ __u32 lil_ids[0];
+};
+
/*
* The ioctl naming rules:
* LL_* - works on the currently opened filehandle instead of parent dir
#define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid)
#define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent)
#define LL_IOC_LADVISE _IOR('f', 250, struct llapi_lu_ladvise)
+#define LL_IOC_HEAT_GET _IOWR('f', 251, struct lu_heat)
+#define LL_IOC_HEAT_SET _IOW('f', 252, long)
#ifndef FS_IOC_FSGETXATTR
/*
#endif
#define LL_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define LL_IOC_FSSETXATTR FS_IOC_FSSETXATTR
+#ifndef FS_XFLAG_PROJINHERIT
+#define FS_XFLAG_PROJINHERIT 0x00000200
+#endif
#define LL_STATFS_LMV 1
#define LL_STATFS_LOV 2
#define LL_STATFS_NODELAY 4
-#define IOC_MDC_TYPE 'i'
-#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
-#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
-#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
+#define IOC_MDC_TYPE 'i'
+#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
+#define IOC_MDC_GETFILEINFO_OLD _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data_v1 *)
+#define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data)
+#define LL_IOC_MDC_GETINFO_OLD _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data_v1 *)
+#define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data)
#define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
/* To be compatible with old statically linked binary we keep the check for
* the older 0100000000 flag. This is already removed upstream. LU-812. */
#define O_LOV_DELAY_CREATE_1_8 0100000000 /* FMODE_NONOTIFY masked in 2.6.36 */
+#ifndef FASYNC
+#define FASYNC 00020000 /* fcntl, for BSD compatibility */
+#endif
#define O_LOV_DELAY_CREATE_MASK (O_NOCTTY | FASYNC)
#define O_LOV_DELAY_CREATE (O_LOV_DELAY_CREATE_1_8 | \
O_LOV_DELAY_CREATE_MASK)
#define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */
#define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/
+#define LMV_USER_MAGIC_SPECIFIC 0x0CD40CD0
#define LOV_PATTERN_NONE 0x000
#define LOV_PATTERN_RAID0 0x001
#define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */
#define LOV_PATTERN_DEFAULT 0xffffffff
+#define LOV_OFFSET_DEFAULT ((__u16)-1)
+
static inline bool lov_pattern_supported(__u32 pattern)
{
- return pattern == LOV_PATTERN_RAID0 ||
- pattern == LOV_PATTERN_MDT ||
- pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_F_RELEASED);
+ return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
+ (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_MDT;
}
#define LOV_MAXPOOLNAME 15
}
enum lov_comp_md_entry_flags {
- LCME_FL_PRIMARY = 0x00000001, /* Not used */
- LCME_FL_STALE = 0x00000002, /* Not used */
- LCME_FL_OFFLINE = 0x00000004, /* Not used */
- LCME_FL_PREFERRED = 0x00000008, /* Not used */
+ LCME_FL_STALE = 0x00000001, /* FLR: stale data */
+ LCME_FL_PREF_RD = 0x00000002, /* FLR: preferred for reading */
+ LCME_FL_PREF_WR = 0x00000004, /* FLR: preferred for writing */
+ LCME_FL_PREF_RW = LCME_FL_PREF_RD | LCME_FL_PREF_WR,
+ LCME_FL_OFFLINE = 0x00000008, /* Not used */
LCME_FL_INIT = 0x00000010, /* instantiated */
+ LCME_FL_NOSYNC = 0x00000020, /* FLR: no sync for the mirror */
LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag,
won't be stored on disk */
};
-#define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT)
+#define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT | LCME_FL_STALE | \
+ LCME_FL_PREF_RW | LCME_FL_NOSYNC)
+/* The flags can be set by users at mirror creation time. */
+#define LCME_USER_FLAGS (LCME_FL_PREF_RW)
+
+/* The flags are for mirrors */
+#define LCME_MIRROR_FLAGS (LCME_FL_NOSYNC)
+
+/* These flags have meaning when set in a default layout and will be inherited
+ * from the default/template layout set on a directory.
+ */
+#define LCME_TEMPLATE_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC)
/* the highest bit in obdo::o_layout_version is used to mark if the file is
* being resynced. */
__u32 lcme_offset; /* offset of component blob,
start from lov_comp_md_v1 */
__u32 lcme_size; /* size of component blob */
- __u64 lcme_padding[2];
+ __u32 lcme_layout_gen;
+ __u64 lcme_timestamp; /* snapshot time if applicable*/
+ __u32 lcme_padding_1;
} __attribute__((packed));
#define SEQ_ID_MAX 0x0000FFFF
#define SEQ_ID_MASK SEQ_ID_MAX
/* bit 30:16 of lcme_id is used to store mirror id */
#define MIRROR_ID_MASK 0x7FFF0000
+#define MIRROR_ID_NEG 0x8000
#define MIRROR_ID_SHIFT 16
static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid)
*/
enum lov_comp_md_flags {
/* the least 2 bits are used by FLR to record file state */
- LCM_FL_NOT_FLR = 0,
+ LCM_FL_NONE = 0,
LCM_FL_RDONLY = 1,
LCM_FL_WRITE_PENDING = 2,
LCM_FL_SYNC_PENDING = 3,
struct lov_comp_md_entry_v1 lcm_entries[0];
} __attribute__((packed));
-/*
- * Maximum number of mirrors Lustre can support.
- */
-#define LUSTRE_MIRROR_COUNT_MAX 16
-
static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
{
if (stripes == (__u16)-1)
* use this. It is unsafe to #define those values in this header as it
* is possible the application has already #included <sys/stat.h>. */
#ifdef HAVE_LOV_USER_MDS_DATA
-#define lov_user_mds_data lov_user_mds_data_v1
+#define lov_user_mds_data lov_user_mds_data_v2
struct lov_user_mds_data_v1 {
lstat_t lmd_st; /* MDS stat struct */
struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
} __attribute__((packed));
-struct lov_user_mds_data_v3 {
- lstat_t lmd_st; /* MDS stat struct */
- struct lov_user_md_v3 lmd_lmm; /* LOV EA V3 user data */
+struct lov_user_mds_data_v2 {
+ lstat_t lmd_st; /* MDS stat struct */
+ __u64 lmd_flags; /* MDS stat flags */
+ struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
} __attribute__((packed));
#endif
static inline int lmv_user_md_size(int stripes, int lmm_magic)
{
- return sizeof(struct lmv_user_md) +
- stripes * sizeof(struct lmv_user_mds_data);
+ int size = sizeof(struct lmv_user_md);
+
+ if (lmm_magic == LMV_USER_MAGIC_SPECIFIC)
+ size += stripes * sizeof(struct lmv_user_mds_data);
+
+ return size;
}
struct ll_recreate_obj {
/* lustre-specific control commands */
#define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */
#define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */
+#define LUSTRE_Q_GETDEFAULT 0x80000d /* get default quota */
+#define LUSTRE_Q_SETDEFAULT 0x80000e /* set default quota */
+
+/* In the current Lustre implementation, the grace time is either the time
+ * or the timestamp to be used after some quota ID exceeds the soft limt,
+ * 48 bits should be enough, its high 16 bits can be used as quota flags.
+ * */
+#define LQUOTA_GRACE_BITS 48
+#define LQUOTA_GRACE_MASK ((1ULL << LQUOTA_GRACE_BITS) - 1)
+#define LQUOTA_GRACE_MAX LQUOTA_GRACE_MASK
+#define LQUOTA_GRACE(t) (t & LQUOTA_GRACE_MASK)
+#define LQUOTA_FLAG(t) (t >> LQUOTA_GRACE_BITS)
+#define LQUOTA_GRACE_FLAG(t, f) ((__u64)t | (__u64)f << LQUOTA_GRACE_BITS)
+
+/* different quota flags */
+
+/* the default quota flag, the corresponding quota ID will use the default
+ * quota setting, the hardlimit and softlimit of its quota record in the global
+ * quota file will be set to 0, the low 48 bits of the grace will be set to 0
+ * and high 16 bits will contain this flag (see above comment).
+ * */
+#define LQUOTA_FLAG_DEFAULT 0x0001
#define ALLQUOTA 255 /* set all quota */
static inline char *qtype_name(int qtype)
}
#define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
+#define SEPOL_DOWNCALL_MAGIC 0x8b8bb842
/* permission */
#define N_PERMS_MAX 64
__u32 idd_groups[0];
};
+struct sepol_downcall_data {
+ __u32 sdd_magic;
+ time_t sdd_sepol_mtime;
+ __u16 sdd_sepol_len;
+ char sdd_sepol[0];
+};
+
#ifdef NEED_QUOTA_DEFS
#ifndef QIF_BLIMITS
#define QIF_BLIMITS 1
#endif /* !__KERNEL__ */
/* lustre volatile file support
- * file name header: .^L^S^T^R:volatile"
+ * file name header: ".^L^S^T^R:volatile"
*/
#define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE"
#define LUSTRE_VOLATILE_HDR_LEN 14
-typedef enum lustre_quota_version {
+enum lustre_quota_version {
LUSTRE_QUOTA_V2 = 1
-} lustre_quota_version_t;
+};
/* XXX: same as if_dqinfo struct in kernel */
struct obd_dqinfo {
#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
#define SWAP_LAYOUTS_CLOSE (1 << 4)
-#define MERGE_LAYOUTS_CLOSE (1 << 5)
-#define INTENT_LAYOUTS_CLOSE (SWAP_LAYOUTS_CLOSE | MERGE_LAYOUTS_CLOSE)
/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
#define SWAP_LAYOUTS_MDS_HSM (1 << 31)
__u64 sl_dv2;
};
+/** Bit-mask of valid attributes */
+/* The LA_* flags are written to disk as part of the ChangeLog records
+ * so they are part of the on-disk and network protocol, and cannot be changed.
+ * Only the first 12 bits are currently saved.
+ */
+enum la_valid {
+ LA_ATIME = 1 << 0, /* 0x00001 */
+ LA_MTIME = 1 << 1, /* 0x00002 */
+ LA_CTIME = 1 << 2, /* 0x00004 */
+ LA_SIZE = 1 << 3, /* 0x00008 */
+ LA_MODE = 1 << 4, /* 0x00010 */
+ LA_UID = 1 << 5, /* 0x00020 */
+ LA_GID = 1 << 6, /* 0x00040 */
+ LA_BLOCKS = 1 << 7, /* 0x00080 */
+ LA_TYPE = 1 << 8, /* 0x00100 */
+ LA_FLAGS = 1 << 9, /* 0x00200 */
+ LA_NLINK = 1 << 10, /* 0x00400 */
+ LA_RDEV = 1 << 11, /* 0x00800 */
+ LA_BLKSIZE = 1 << 12, /* 0x01000 */
+ LA_KILL_SUID = 1 << 13, /* 0x02000 */
+ LA_KILL_SGID = 1 << 14, /* 0x04000 */
+ LA_PROJID = 1 << 15, /* 0x08000 */
+ LA_LAYOUT_VERSION = 1 << 16, /* 0x10000 */
+ LA_LSIZE = 1 << 17, /* 0x20000 */
+ LA_LBLOCKS = 1 << 18, /* 0x40000 */
+ /**
+ * Attributes must be transmitted to OST objects
+ */
+ LA_REMOTE_ATTR_SET = (LA_UID | LA_GID | LA_PROJID | LA_LAYOUT_VERSION)
+};
+
+#define MDS_FMODE_READ 00000001
+#define MDS_FMODE_WRITE 00000002
+
+#define MDS_FMODE_CLOSED 00000000
+#define MDS_FMODE_EXEC 00000004
+/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */
+/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */
+/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */
+
+#define MDS_OPEN_CREATED 00000010
+/* MDS_OPEN_CROSS 00000020 obsolete in 2.12, internal use only */
+
+#define MDS_OPEN_CREAT 00000100
+#define MDS_OPEN_EXCL 00000200
+#define MDS_OPEN_TRUNC 00001000
+#define MDS_OPEN_APPEND 00002000
+#define MDS_OPEN_SYNC 00010000
+#define MDS_OPEN_DIRECTORY 00200000
+
+#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */
+#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */
+#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
+#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file.
+ * We do not support JOIN FILE
+ * anymore, reserve this flags
+ * just for preventing such bit
+ * to be reused. */
+
+#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */
+#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */
+#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */
+#define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */
+#define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or
+ * hsm restore) */
+#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created
+ unlinked */
+#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease
+ * delegation, succeed if it's not
+ * being opened with conflict mode.
+ */
+#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */
+
+#define MDS_OPEN_RESYNC 04000000000000ULL /* FLR: file resync */
+
+/* lustre internal open flags, which should not be set from user space */
+#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \
+ MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \
+ MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \
+ MDS_OPEN_RELEASE | MDS_OPEN_RESYNC)
+
/********* Changelogs **********/
/** Changelog record types */
enum changelog_rec_type {
+ CL_NONE = -1,
CL_MARK = 0,
CL_CREATE = 1, /* namespace */
CL_MKDIR = 2, /* namespace */
CL_LAYOUT = 12, /* file layout/striping modified */
CL_TRUNC = 13,
CL_SETATTR = 14,
- CL_XATTR = 15,
+ CL_SETXATTR = 15,
+ CL_XATTR = CL_SETXATTR, /* Deprecated name */
CL_HSM = 16, /* HSM specific events, see flags */
CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */
CL_CTIME = 18,
CL_MIGRATE = 20,
CL_FLRW = 21, /* FLR: file was firstly written */
CL_RESYNC = 22, /* FLR: file was resync-ed */
+ CL_GETXATTR = 23,
+ CL_DN_OPEN = 24, /* denied open */
CL_LAST
};
"MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
"RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC",
"SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT",
- "FLRW", "RESYNC",
+ "FLRW", "RESYNC","GXATR", "NOPEN",
};
if (type >= 0 && type < CL_LAST)
return NULL;
}
-/* per-record flags */
+/* 12 bits of per-record data can be stored in the bottom of the flags */
#define CLF_FLAGSHIFT 12
-#define CLF_FLAGMASK ((1U << CLF_FLAGSHIFT) - 1)
-#define CLF_VERMASK (~CLF_FLAGMASK)
enum changelog_rec_flags {
CLF_VERSION = 0x1000,
CLF_RENAME = 0x2000,
CLF_JOBID = 0x4000,
CLF_EXTRA_FLAGS = 0x8000,
- CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS
+ CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID |
+ CLF_EXTRA_FLAGS,
+ CLF_FLAGMASK = (1U << CLF_FLAGSHIFT) - 1,
+ CLF_VERMASK = ~CLF_FLAGMASK,
};
CLF_HSM_EVENT_L);
}
-static inline void hsm_set_cl_event(int *flags, enum hsm_event he)
+static inline void hsm_set_cl_event(enum changelog_rec_flags *clf_flags,
+ enum hsm_event he)
{
- *flags |= (he << CLF_HSM_EVENT_L);
+ *clf_flags |= (he << CLF_HSM_EVENT_L);
}
-static inline __u16 hsm_get_cl_flags(int flags)
+static inline __u16 hsm_get_cl_flags(enum changelog_rec_flags clf_flags)
{
- return CLF_GET_BITS(flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
+ return CLF_GET_BITS(clf_flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
}
-static inline void hsm_set_cl_flags(int *flags, int bits)
+static inline void hsm_set_cl_flags(enum changelog_rec_flags *clf_flags,
+ unsigned int bits)
{
- *flags |= (bits << CLF_HSM_FLAG_L);
+ *clf_flags |= (bits << CLF_HSM_FLAG_L);
}
-static inline int hsm_get_cl_error(int flags)
+static inline int hsm_get_cl_error(enum changelog_rec_flags clf_flags)
{
- return CLF_GET_BITS(flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
+ return CLF_GET_BITS(clf_flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
}
-static inline void hsm_set_cl_error(int *flags, int error)
+static inline void hsm_set_cl_error(enum changelog_rec_flags *clf_flags,
+ unsigned int error)
{
- *flags |= (error << CLF_HSM_ERR_L);
+ *clf_flags |= (error << CLF_HSM_ERR_L);
}
enum changelog_rec_extra_flags {
CLFE_INVALID = 0,
CLFE_UIDGID = 0x0001,
- CLFE_SUPPORTED = CLFE_UIDGID
+ CLFE_NID = 0x0002,
+ CLFE_OPEN = 0x0004,
+ CLFE_XATTR = 0x0008,
+ CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR
};
enum changelog_send_flag {
enum changelog_send_extra_flag {
/* Pack uid/gid into the changelog record */
CHANGELOG_EXTRA_FLAG_UIDGID = 0x01,
+ /* Pack nid into the changelog record */
+ CHANGELOG_EXTRA_FLAG_NID = 0x02,
+ /* Pack open mode into the changelog record */
+ CHANGELOG_EXTRA_FLAG_OMODE = 0x04,
+ /* Pack xattr name into the changelog record */
+ CHANGELOG_EXTRA_FLAG_XATTR = 0x08,
};
-#define CR_MAXSIZE cfs_size_round(2 * NAME_MAX + 2 + \
+#define CR_MAXSIZE __ALIGN_KERNEL(2 * NAME_MAX + 2 + \
changelog_rec_offset(CLF_SUPPORTED, \
- CLFE_SUPPORTED))
+ CLFE_SUPPORTED), 8)
/* 31 usable bytes string + null terminator. */
#define LUSTRE_JOBID_SIZE 32
__u64 cr_gid;
};
+/* Changelog extra extension to include NID. */
+struct changelog_ext_nid {
+ /* have __u64 instead of lnet_nid_t type for use by client api */
+ __u64 cr_nid;
+ /* for use when IPv6 support is added */
+ __u64 extra;
+ __u32 padding;
+};
+
+/* Changelog extra extension to include low 32 bits of MDS_OPEN_* flags. */
+struct changelog_ext_openmode {
+ __u32 cr_openflags;
+};
+
+/* Changelog extra extension to include xattr */
+struct changelog_ext_xattr {
+ char cr_xattr[XATTR_NAME_MAX + 1]; /**< zero-terminated string. */
+};
+
static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags(
const struct changelog_rec *rec);
size += sizeof(struct changelog_ext_extra_flags);
if (cref & CLFE_UIDGID)
size += sizeof(struct changelog_ext_uidgid);
+ if (cref & CLFE_NID)
+ size += sizeof(struct changelog_ext_nid);
+ if (cref & CLFE_OPEN)
+ size += sizeof(struct changelog_ext_openmode);
+ if (cref & CLFE_XATTR)
+ size += sizeof(struct changelog_ext_xattr);
}
return size;
CLFE_INVALID));
}
+/* The nid is the second extra extension */
+static inline
+struct changelog_ext_nid *changelog_rec_nid(const struct changelog_rec *rec)
+{
+ enum changelog_rec_flags crf = rec->cr_flags &
+ (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
+ enum changelog_rec_extra_flags cref = CLFE_INVALID;
+
+ if (rec->cr_flags & CLF_EXTRA_FLAGS)
+ cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
+ CLFE_UIDGID;
+
+ return (struct changelog_ext_nid *)((char *)rec +
+ changelog_rec_offset(crf, cref));
+}
+
+/* The OPEN mode is the third extra extension */
+static inline
+struct changelog_ext_openmode *changelog_rec_openmode(
+ const struct changelog_rec *rec)
+{
+ enum changelog_rec_flags crf = rec->cr_flags &
+ (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
+ enum changelog_rec_extra_flags cref = CLFE_INVALID;
+
+ if (rec->cr_flags & CLF_EXTRA_FLAGS)
+ cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
+ (CLFE_UIDGID | CLFE_NID);
+
+ return (struct changelog_ext_openmode *)((char *)rec +
+ changelog_rec_offset(crf, cref));
+}
+
+/* The xattr name is the fourth extra extension */
+static inline
+struct changelog_ext_xattr *changelog_rec_xattr(
+ const struct changelog_rec *rec)
+{
+ enum changelog_rec_flags crf = rec->cr_flags &
+ (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
+ enum changelog_rec_extra_flags cref = CLFE_INVALID;
+
+ if (rec->cr_flags & CLF_EXTRA_FLAGS)
+ cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
+ (CLFE_UIDGID | CLFE_NID | CLFE_OPEN);
+
+ return (struct changelog_ext_xattr *)((char *)rec +
+ changelog_rec_offset(crf, cref));
+}
+
/* The name follows the rename, jobid and extra flags extns, if present */
static inline char *changelog_rec_name(const struct changelog_rec *rec)
{
enum changelog_rec_flags crf_wanted,
enum changelog_rec_extra_flags cref_want)
{
+ char *xattr_mov = NULL;
+ char *omd_mov = NULL;
+ char *nid_mov = NULL;
char *uidgid_mov = NULL;
char *ef_mov;
char *jid_mov;
/* Locations of extensions in the remapped record */
if (rec->cr_flags & CLF_EXTRA_FLAGS) {
+ xattr_mov = (char *)rec +
+ changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
+ cref_want & ~CLFE_XATTR);
+ omd_mov = (char *)rec +
+ changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
+ cref_want & ~(CLFE_OPEN |
+ CLFE_XATTR));
+ nid_mov = (char *)rec +
+ changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
+ cref_want & ~(CLFE_NID |
+ CLFE_OPEN |
+ CLFE_XATTR));
uidgid_mov = (char *)rec +
changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
- CLFE_INVALID);
+ cref_want & ~(CLFE_UIDGID |
+ CLFE_NID |
+ CLFE_OPEN |
+ CLFE_XATTR));
cref = changelog_rec_extra_flags(rec)->cr_extra_flags;
}
/* Move the extension fields to the desired positions */
if ((crf_wanted & CLF_EXTRA_FLAGS) &&
(rec->cr_flags & CLF_EXTRA_FLAGS)) {
+ if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR))
+ memmove(xattr_mov, changelog_rec_xattr(rec),
+ sizeof(struct changelog_ext_xattr));
+
+ if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN))
+ memmove(omd_mov, changelog_rec_openmode(rec),
+ sizeof(struct changelog_ext_openmode));
+
+ if ((cref_want & CLFE_NID) && (cref & CLFE_NID))
+ memmove(nid_mov, changelog_rec_nid(rec),
+ sizeof(struct changelog_ext_nid));
+
if ((cref_want & CLFE_UIDGID) && (cref & CLFE_UIDGID))
memmove(uidgid_mov, changelog_rec_uidgid(rec),
sizeof(struct changelog_ext_uidgid));
sizeof(struct changelog_ext_rename));
/* Clear newly added fields */
+ if (xattr_mov && (cref_want & CLFE_XATTR) &&
+ !(cref & CLFE_XATTR))
+ memset(xattr_mov, 0, sizeof(struct changelog_ext_xattr));
+
+ if (omd_mov && (cref_want & CLFE_OPEN) &&
+ !(cref & CLFE_OPEN))
+ memset(omd_mov, 0, sizeof(struct changelog_ext_openmode));
+
+ if (nid_mov && (cref_want & CLFE_NID) &&
+ !(cref & CLFE_NID))
+ memset(nid_mov, 0, sizeof(struct changelog_ext_nid));
+
if (uidgid_mov && (cref_want & CLFE_UIDGID) &&
!(cref & CLFE_UIDGID))
memset(uidgid_mov, 0, sizeof(struct changelog_ext_uidgid));
#define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK)
/**
- * HSM request progress state
+ * HSM request progress state
*/
enum hsm_progress_states {
+ HPS_NONE = 0,
HPS_WAITING = 1,
HPS_RUNNING = 2,
HPS_DONE = 3,
};
-#define HPS_NONE 0
static inline const char *hsm_progress_state2name(enum hsm_progress_states s)
{
struct hsm_action_list {
__u32 hal_version;
__u32 hal_count; /* number of hai's to follow */
- __u64 hal_compound_id; /* returned by coordinator */
+ __u64 hal_compound_id; /* returned by coordinator, ignored */
__u64 hal_flags;
__u32 hal_archive_id; /* which archive backend */
__u32 padding1;
boundaries. See hai_zero */
} __attribute__((packed));
-#ifndef HAVE_CFS_SIZE_ROUND
-static inline int cfs_size_round (int val)
-{
- return (val + 7) & (~0x7);
-}
-#define HAVE_CFS_SIZE_ROUND
-#endif
-
/* Return pointer to first hai in action list */
static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
{
- return (struct hsm_action_item *)(hal->hal_fsname +
- cfs_size_round(strlen(hal-> \
- hal_fsname)
- + 1));
+ size_t offset = __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
+
+ return (struct hsm_action_item *)(hal->hal_fsname + offset);
}
+
/* Return pointer to next hai */
static inline struct hsm_action_item * hai_next(struct hsm_action_item *hai)
{
- return (struct hsm_action_item *)((char *)hai +
- cfs_size_round(hai->hai_len));
+ size_t offset = __ALIGN_KERNEL(hai->hai_len, 8);
+
+ return (struct hsm_action_item *)((char *)hai + offset);
}
/* Return size of an hsm_action_list */
size_t sz;
struct hsm_action_item *hai;
- sz = sizeof(*hal) + cfs_size_round(strlen(hal->hal_fsname) + 1);
+ sz = sizeof(*hal) + __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
hai = hai_first(hal);
for (i = 0; i < hal->hal_count ; i++, hai = hai_next(hai))
- sz += cfs_size_round(hai->hai_len);
+ sz += __ALIGN_KERNEL(hai->hai_len, 8);
return sz;
}
SK_CRYPT_INVALID = -1,
SK_CRYPT_EMPTY = 0,
SK_CRYPT_AES256_CTR = 1,
- SK_CRYPT_MAX = 2,
};
enum sk_hmac_alg {
SK_HMAC_EMPTY = 0,
SK_HMAC_SHA256 = 1,
SK_HMAC_SHA512 = 2,
- SK_HMAC_MAX = 3,
};
struct sk_crypt_type {
- char *sct_name;
- size_t sct_bytes;
+ const char *sct_name;
+ int sct_type;
};
struct sk_hmac_type {
- char *sht_name;
- size_t sht_bytes;
+ const char *sht_name;
+ int sht_type;
};
enum lock_mode_user {
LLA_RESULT_SAME,
};
+enum lu_heat_flag_bit {
+ LU_HEAT_FLAG_BIT_INVALID = 0,
+ LU_HEAT_FLAG_BIT_OFF,
+ LU_HEAT_FLAG_BIT_CLEAR,
+};
+
+#define LU_HEAT_FLAG_CLEAR (1 << LU_HEAT_FLAG_BIT_CLEAR)
+#define LU_HEAT_FLAG_OFF (1 << LU_HEAT_FLAG_BIT_OFF)
+
+enum obd_heat_type {
+ OBD_HEAT_READSAMPLE = 0,
+ OBD_HEAT_WRITESAMPLE = 1,
+ OBD_HEAT_READBYTE = 2,
+ OBD_HEAT_WRITEBYTE = 3,
+ OBD_HEAT_COUNT
+};
+
+#define LU_HEAT_NAMES { \
+ [OBD_HEAT_READSAMPLE] = "readsample", \
+ [OBD_HEAT_WRITESAMPLE] = "writesample", \
+ [OBD_HEAT_READBYTE] = "readbyte", \
+ [OBD_HEAT_WRITEBYTE] = "writebyte", \
+}
+
+struct lu_heat {
+ __u32 lh_count;
+ __u32 lh_flags;
+ __u64 lh_heat[0];
+};
+
#if defined(__cplusplus)
}
#endif