#include <asm/byteorder.h>
#include <linux/errno.h>
+#include <linux/fiemap.h>
#include <linux/types.h>
-
/*
* This is due to us being out of kernel and the way the OpenSFS branch
* handles CFLAGS.
*/
#ifdef __KERNEL__
# include <uapi/linux/lnet/lnet-types.h>
-# include <uapi/linux/lustre/lustre_user.h> /* Defn's shared with user-space. */
-# include <uapi/linux/lustre/lustre_ver.h>
#else
# include <linux/lnet/lnet-types.h>
-# include <linux/lustre/lustre_user.h>
-# include <linux/lustre/lustre_ver.h>
#endif
+#include <linux/lustre/lustre_user.h>
+#include <linux/lustre/lustre_ver.h>
#if defined(__cplusplus)
extern "C" {
return next;
}
-static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
+static inline __kernel_size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
{
- size_t size;
+ __kernel_size_t size;
if (attr & LUDA_TYPE) {
- const size_t align = sizeof(struct luda_type) - 1;
+ const __kernel_size_t align = sizeof(struct luda_type) - 1;
size = (sizeof(struct lu_dirent) + namelen + 1 + align) &
~align;
RPCs in parallel */
#define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL /* striped DNE dir */
#define OBD_CONNECT_SUBTREE 0x800000000000000ULL /* fileset mount */
-#define OBD_CONNECT_LOCKAHEAD_OLD 0x1000000000000000ULL /* Old Cray lockahead */
+/* was OBD_CONNECT_LOCKAHEAD_OLD 0x1000000000000000ULL old lockahead 2.12-2.13*/
/** bulk matchbits is sent within ptlrpc_body */
#define OBD_CONNECT_BULK_MBITS 0x2000000000000000ULL
#define OBD_CONNECT2_LOCKAHEAD 0x2ULL /* ladvise lockahead v2 */
#define OBD_CONNECT2_DIR_MIGRATE 0x4ULL /* migrate striped dir */
#define OBD_CONNECT2_SUM_STATFS 0x8ULL /* MDT return aggregated stats */
+#define OBD_CONNECT2_OVERSTRIPING 0x10ULL /* OST overstriping support */
#define OBD_CONNECT2_FLR 0x20ULL /* FLR support */
#define OBD_CONNECT2_WBC_INTENTS 0x40ULL /* create/unlink/... intents for wbc, also operations under client-held parent locks */
#define OBD_CONNECT2_LOCK_CONVERT 0x80ULL /* IBITS lock convert support */
#define OBD_CONNECT2_ARCHIVE_ID_ARRAY 0x100ULL /* store HSM archive_id in array */
+#define OBD_CONNECT2_INC_XID 0x200ULL /* Increasing xid */
#define OBD_CONNECT2_SELINUX_POLICY 0x400ULL /* has client SELinux policy */
#define OBD_CONNECT2_LSOM 0x800ULL /* LSOM support */
#define OBD_CONNECT2_PCC 0x1000ULL /* Persistent Client Cache */
-#define OBD_CONNECT2_PLAIN_LAYOUT 0x2000ULL /* Plain Directory Layout */
-
+#define OBD_CONNECT2_CRUSH 0x2000ULL /* crush hash striped directory */
+#define OBD_CONNECT2_ASYNC_DISCARD 0x4000ULL /* support async DoM data discard */
+#define OBD_CONNECT2_ENCRYPT 0x8000ULL /* client-to-disk encrypt */
/* XXX README XXX:
* Please DO NOT add flag values here before first ensuring that this same
* flag value is not in use on some other branch. Please clear any such
* changes with senior engineers before starting to use a new flag. Then,
* submit a small patch against EVERY branch that ONLY adds the new flag,
- * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the
- * flag to check_obd_connect_data(), and updates wiretests accordingly, so it
- * can be approved and landed easily to reserve the flag for future use. */
+ * updates obd_connect_names[], adds the flag to check_obd_connect_data(),
+ * and updates wiretests accordingly, so it can be approved and landed easily
+ * to reserve the flag for future use.
+ */
/* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS
* connection. It is a temporary bug fix for Imperative Recovery interop
OBD_CONNECT_GRANT_PARAM | \
OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2)
-#define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | OBD_CONNECT2_FLR | \
- OBD_CONNECT2_SUM_STATFS | \
- OBD_CONNECT2_LOCK_CONVERT | \
+#define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | \
OBD_CONNECT2_DIR_MIGRATE | \
+ OBD_CONNECT2_SUM_STATFS | \
+ OBD_CONNECT2_OVERSTRIPING | \
+ OBD_CONNECT2_FLR |\
+ OBD_CONNECT2_LOCK_CONVERT | \
OBD_CONNECT2_ARCHIVE_ID_ARRAY | \
+ OBD_CONNECT2_INC_XID | \
OBD_CONNECT2_SELINUX_POLICY | \
- OBD_CONNECT2_LSOM)
+ OBD_CONNECT2_LSOM | \
+ OBD_CONNECT2_ASYNC_DISCARD | \
+ OBD_CONNECT2_PCC | \
+ OBD_CONNECT2_CRUSH)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_GRANT_PARAM | \
OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2)
-#define OST_CONNECT_SUPPORTED2 OBD_CONNECT2_LOCKAHEAD
+#define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID)
#define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID)
#define ECHO_CONNECT_SUPPORTED2 0
OST_SYNC = 16,
OST_SET_INFO = 17,
OST_QUOTACHECK = 18, /* not used since 2.4 */
- OST_QUOTACTL = 19,
+ OST_QUOTACTL = 19,
OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
OST_LADVISE = 21,
+ OST_FALLOCATE = 22,
OST_LAST_OPC /* must be < 33 to avoid MDS_GETATTR */
};
#define OST_FIRST_OPC OST_REPLY
#define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC)
#define LOV_MAGIC LOV_MAGIC_V1
#define LOV_MAGIC_COMP_V1 (0x0BD60000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC_FOREIGN (0x0BD70000 | LOV_MAGIC_MAGIC)
+#define LOV_MAGIC_SEL (0x0BD80000 | LOV_MAGIC_MAGIC)
/*
* magic for fully defined striping
}
static inline __u32
-lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic)
+lov_mds_md_max_stripe_count(__kernel_size_t buf_size, __u32 lmm_magic)
{
switch (lmm_magic) {
case LOV_MAGIC_V1: {
#define OBD_MD_FLPROJID (0x0100000000000000ULL) /* project ID */
#define OBD_MD_SECCTX (0x0200000000000000ULL) /* embed security xattr */
+#define OBD_MD_FLLAZYSIZE (0x0400000000000000ULL) /* Lazy size */
+#define OBD_MD_FLLAZYBLOCKS (0x0800000000000000ULL) /* Lazy blocks */
+
#define OBD_MD_FLALLQUOTA (OBD_MD_FLUSRQUOTA | \
OBD_MD_FLGRPQUOTA | \
OBD_MD_FLPRJQUOTA)
#define OBD_BRW_CHECK 0x10
#define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */
#define OBD_BRW_GRANTED 0x40 /* the ost manages this */
+/* OBD_BRW_NOCACHE is currently neither set nor tested */
#define OBD_BRW_NOCACHE 0x80 /* this page is a part of non-cached IO */
#define OBD_BRW_NOQUOTA 0x100
#define OBD_BRW_SRVLOCK 0x200 /* Client holds no lock over this page */
* space for unstable pages; asking
* it to sync quickly */
#define OBD_BRW_OVER_PRJQUOTA 0x8000 /* Running out of project quota */
+#define OBD_BRW_RDMA_ONLY 0x20000 /* RPC contains RDMA-only pages*/
#define OBD_BRW_OVER_ALLQUOTA (OBD_BRW_OVER_USRQUOTA | \
OBD_BRW_OVER_GRPQUOTA | \
MDS_HSM_CT_REGISTER = 59,
MDS_HSM_CT_UNREGISTER = 60,
MDS_SWAP_LAYOUTS = 61,
+ MDS_RMFID = 62,
MDS_LAST_OPC
};
#define DISP_OPEN_DENY 0x10000000
/* INODE LOCK PARTS */
-#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also
- * was used to protect permission (mode,
- * owner, group etc) before 2.4. */
-#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */
-#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */
-#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */
-
-/* The PERM bit is added int 2.4, and it is used to protect permission(mode,
- * owner, group, acl etc), so to separate the permission from LOOKUP lock.
- * Because for remote directories(in DNE), these locks will be granted by
- * different MDTs(different ldlm namespace).
- *
- * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together.
- * For Remote directory, the master MDT, where the remote directory is, will
- * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is,
- * will grant LOOKUP_LOCK. */
-#define MDS_INODELOCK_PERM 0x000010
-#define MDS_INODELOCK_XATTR 0x000020 /* extended attributes */
-#define MDS_INODELOCK_DOM 0x000040 /* Data for data-on-mdt files */
-
-#define MDS_INODELOCK_MAXSHIFT 6
+enum mds_ibits_locks {
+ MDS_INODELOCK_LOOKUP = 0x000001, /* For namespace, dentry etc. Was
+ * used to protect permission (mode,
+ * owner, group, etc) before 2.4. */
+ MDS_INODELOCK_UPDATE = 0x000002, /* size, links, timestamps */
+ MDS_INODELOCK_OPEN = 0x000004, /* For opened files */
+ MDS_INODELOCK_LAYOUT = 0x000008, /* for layout */
+
+ /* The PERM bit is added in 2.4, and is used to protect permission
+ * (mode, owner, group, ACL, etc.) separate from LOOKUP lock.
+ * For remote directories (in DNE) these locks will be granted by
+ * different MDTs (different LDLM namespace).
+ *
+ * For local directory, the MDT always grants UPDATE|PERM together.
+ * For remote directory, master MDT (where remote directory is) grants
+ * UPDATE|PERM, and remote MDT (where name entry is) grants LOOKUP_LOCK.
+ */
+ MDS_INODELOCK_PERM = 0x000010,
+ MDS_INODELOCK_XATTR = 0x000020, /* non-permission extended attrs */
+ MDS_INODELOCK_DOM = 0x000040, /* Data for Data-on-MDT files */
+ /* Do not forget to increase MDS_INODELOCK_NUMBITS when adding bits */
+};
+#define MDS_INODELOCK_NUMBITS 7
/* This FULL lock is useful to take on unlink sort of operations */
-#define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1)
+#define MDS_INODELOCK_FULL ((1 << MDS_INODELOCK_NUMBITS) - 1)
/* DOM lock shouldn't be canceled early, use this macro for ELC */
#define MDS_INODELOCK_ELC (MDS_INODELOCK_FULL & ~MDS_INODELOCK_DOM)
LUSTRE_INDEX_FL = 0x00001000, /* hash-indexed directory */
LUSTRE_DIRSYNC_FL = 0x00010000, /* dirsync behaviour (dir only) */
LUSTRE_TOPDIR_FL = 0x00020000, /* Top of directory hierarchies*/
- LUSTRE_DIRECTIO_FL = 0x00100000, /* Use direct i/o */
LUSTRE_INLINE_DATA_FL = 0x10000000, /* Inode has inline data. */
LUSTRE_PROJINHERIT_FL = 0x20000000, /* Create with parents projid */
MDS_CLOSE_RESYNC_DONE = 1 << 16,
MDS_CLOSE_LAYOUT_SPLIT = 1 << 17,
MDS_TRUNC_KEEP_LEASE = 1 << 18,
+ MDS_PCC_ATTACH = 1 << 19,
+ MDS_CLOSE_UPDATE_TIMES = 1 << 20,
};
#define MDS_CLOSE_INTENT (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP | \
struct lu_fid cr_fid2;
struct lustre_handle cr_open_handle_old; /* in case of open replay */
__s64 cr_time;
- __u64 cr_rdev;
+ union {
+ __u64 cr_rdev;
+ __u32 cr_archive_id;
+ };
__u64 cr_ioepoch;
__u64 cr_padding_1; /* rr_blocks */
__u32 cr_mode;
__u16 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */
};
+#define LMV_DESC_QOS_MAXAGE_DEFAULT 60 /* Seconds */
+
/* lmv structures */
struct lmv_desc {
__u32 ld_tgt_count; /* how many MDS's */
struct lu_fid lmv_stripe_fids[0]; /* FIDs for each stripe */
};
+#define LMV_DEBUG(mask, lmv, msg) \
+ CDEBUG(mask, "%s LMV: magic %#x count %u index %u hash %#x version %u migrate offset %u migrate hash %u.\n", \
+ msg, (lmv)->lmv_magic, (lmv)->lmv_stripe_count, \
+ (lmv)->lmv_master_mdt_index, (lmv)->lmv_hash_type, \
+ (lmv)->lmv_layout_version, (lmv)->lmv_migrate_offset, \
+ (lmv)->lmv_migrate_hash)
+
+/* foreign LMV EA */
+struct lmv_foreign_md {
+ __u32 lfm_magic; /* magic number = LMV_MAGIC_FOREIGN */
+ __u32 lfm_length; /* length of lfm_value */
+ __u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */
+ __u32 lfm_flags; /* flags, type specific */
+ char lfm_value[]; /* free format value */
+};
+
#define LMV_MAGIC_V1 0x0CD20CD0 /* normal stripe lmv magic */
#define LMV_MAGIC LMV_MAGIC_V1
/* #define LMV_USER_MAGIC 0x0CD30CD0 */
#define LMV_MAGIC_STRIPE 0x0CD40CD0 /* magic for dir sub_stripe */
-
-/* Right now only the lower part(0-16bits) of lmv_hash_type is being used,
- * and the higher part will be the flag to indicate the status of object,
- * for example the object is being migrated. And the hash function
- * might be interpreted differently with different flags. */
-#define LMV_HASH_TYPE_MASK 0x0000ffff
-
-#define LMV_HASH_FLAG_MIGRATION 0x80000000
-
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 12, 55, 0)
-/* Since lustre 2.8, this flag will not be needed, instead this DEAD
- * and orphan flags will be stored in LMA (see LMAI_ORPHAN)
- * Keep this flag just for LFSCK, because it still might meet such
- * flag when it checks the old FS */
-#define LMV_HASH_FLAG_DEAD 0x40000000
-#endif
-#define LMV_HASH_FLAG_BAD_TYPE 0x20000000
-
-/* The striped directory has ever lost its master LMV EA, then LFSCK
- * re-generated it. This flag is used to indicate such case. It is an
- * on-disk flag. */
-#define LMV_HASH_FLAG_LOST_LMV 0x10000000
+#define LMV_MAGIC_FOREIGN 0x0CD50CD0 /* magic for lmv foreign */
/**
* The FNV-1a hash algorithm is as follows:
**/
#define LUSTRE_FNV_1A_64_PRIME 0x100000001b3ULL
#define LUSTRE_FNV_1A_64_OFFSET_BIAS 0xcbf29ce484222325ULL
-static inline __u64 lustre_hash_fnv_1a_64(const void *buf, size_t size)
+static inline __u64 lustre_hash_fnv_1a_64(const void *buf, __kernel_size_t size)
{
__u64 hash = LUSTRE_FNV_1A_64_OFFSET_BIAS;
const unsigned char *p = buf;
- size_t i;
+ __kernel_size_t i;
for (i = 0; i < size; i++) {
hash ^= p[i];
return hash;
}
+/* CRUSH placement group count */
+#define LMV_CRUSH_PG_COUNT 4096
+
union lmv_mds_md {
__u32 lmv_magic;
struct lmv_mds_md_v1 lmv_md_v1;
struct lmv_user_md lmv_user_md;
+ struct lmv_foreign_md lmv_foreign_md;
};
static inline int lmv_mds_md_size(int stripe_count, unsigned int lmm_magic)
MGS_FIRST_OPC = MGS_CONNECT
};
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0)
-#define MGS_PARAM_MAXLEN 1024
-#define KEY_SET_INFO "set_info"
-
-struct mgs_send_param {
- char mgs_param[MGS_PARAM_MAXLEN];
-};
-#endif
-
/* We pass this info to the MGS so it can write config logs */
#define MTI_NAME_MAXLEN 64
#define MTI_PARAM_MAXLEN 4096
__u32 lrh_index;
__u32 lrh_type;
__u32 lrh_id;
-};
+} __attribute__((packed));
struct llog_rec_tail {
__u32 lrt_len;
__u32 lrt_index;
-};
+} __attribute__((packed));
/* Where data follow just after header */
#define REC_DATA(ptr) \
#define o_dropped o_misc
#define o_cksum o_nlink
#define o_grant_used o_data_version
+#define o_falloc_mode o_nlink
struct lfsck_request {
__u32 lr_event;
SEC_FIRST_OPC = SEC_CTX_INIT
};
-/*
- * capa related definitions
- */
-#define CAPA_HMAC_MAX_LEN 64
-#define CAPA_HMAC_KEY_MAX_LEN 56
-
-/* NB take care when changing the sequence of elements this struct,
- * because the offset info is used in find_capa() */
-struct lustre_capa {
- struct lu_fid lc_fid; /** fid */
- __u64 lc_opc; /** operations allowed */
- __u64 lc_uid; /** file owner */
- __u64 lc_gid; /** file group */
- __u32 lc_flags; /** HMAC algorithm & flags */
- __u32 lc_keyid; /** key# used for the capability */
- __u32 lc_timeout; /** capa timeout value (sec) */
- __u32 lc_expiry; /** expiry time (sec) */
- __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /** HMAC */
-} __attribute__((packed));
-
-/** lustre_capa::lc_opc */
-enum {
- CAPA_OPC_BODY_WRITE = 1<<0, /**< write object data */
- CAPA_OPC_BODY_READ = 1<<1, /**< read object data */
- CAPA_OPC_INDEX_LOOKUP = 1<<2, /**< lookup object fid */
- CAPA_OPC_INDEX_INSERT = 1<<3, /**< insert object fid */
- CAPA_OPC_INDEX_DELETE = 1<<4, /**< delete object fid */
- CAPA_OPC_OSS_WRITE = 1<<5, /**< write oss object data */
- CAPA_OPC_OSS_READ = 1<<6, /**< read oss object data */
- CAPA_OPC_OSS_TRUNC = 1<<7, /**< truncate oss object */
- CAPA_OPC_OSS_DESTROY = 1<<8, /**< destroy oss object */
- CAPA_OPC_META_WRITE = 1<<9, /**< write object meta data */
- CAPA_OPC_META_READ = 1<<10, /**< read object meta data */
-};
-
-#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE)
-#define CAPA_OPC_MDS_ONLY \
- (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \
- CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE)
-#define CAPA_OPC_OSS_ONLY \
- (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC | \
- CAPA_OPC_OSS_DESTROY)
-#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
-#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
-
-/* lustre_capa::lc_hmac_alg */
-enum {
- CAPA_HMAC_ALG_SHA1 = 1, /**< sha1 algorithm */
- CAPA_HMAC_ALG_MAX,
-};
-
-#define CAPA_FL_MASK 0x00ffffff
-#define CAPA_HMAC_ALG_MASK 0xff000000
-
-struct lustre_capa_key {
- __u64 lk_seq; /**< mds# */
- __u32 lk_keyid; /**< key# */
- __u32 lk_padding;
- __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /**< key */
-} __attribute__((packed));
-
/** The link ea holds 1 \a link_ea_entry for each hardlink */
#define LINK_EA_MAGIC 0x11EAF1DFUL
struct link_ea_header {
unsigned char lee_reclen[2];
unsigned char lee_parent_fid[sizeof(struct lu_fid)];
char lee_name[0];
-}__attribute__((packed));
+} __attribute__((packed));
/** fid2path request/reply structure */
struct getinfo_fid2path {
struct close_data_resync_done cd_resync;
/* split close */
__u16 cd_mirror_id;
+ /* PCC release */
+ __u32 cd_archive_id;
};
};
__u16 uop_type;
__u16 uop_param_count;
__u16 uop_params_off[0];
-};
+} __attribute__((packed));
struct update_ops {
struct update_op uops_op[0];