X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Fuapi%2Flinux%2Flustre%2Flustre_idl.h;h=2843da8727f43306b55fc2e4a41e6a1fc56adcf2;hp=313e2eb27a6ac88e569dbc73eec6d45cc01c1272;hb=749b641e89db51dff01ae2f6ffe416b6d4d4f23a;hpb=c7a833830de691967081cd7a42199b924ea7efdc diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 313e2eb..2843da8 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -23,7 +23,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2016, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -132,28 +132,8 @@ extern "C" { #define SEQ_DATA_PORTAL 31 #define SEQ_CONTROLLER_PORTAL 32 #define MGS_BULK_PORTAL 33 - -/* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com, n8851@cray.com */ - -/* packet types */ -#define PTL_RPC_MSG_REQUEST 4711 -#define PTL_RPC_MSG_ERR 4712 -#define PTL_RPC_MSG_REPLY 4713 - -/* DON'T use swabbed values of MAGIC as magic! */ -#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3 -#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B - -#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2 - -#define PTLRPC_MSG_VERSION 0x00000003 -#define LUSTRE_VERSION_MASK 0xffff0000 -#define LUSTRE_OBD_VERSION 0x00010000 -#define LUSTRE_MDS_VERSION 0x00020000 -#define LUSTRE_OST_VERSION 0x00030000 -#define LUSTRE_DLM_VERSION 0x00040000 -#define LUSTRE_LOG_VERSION 0x00050000 -#define LUSTRE_MGS_VERSION 0x00060000 +/* #define DVS_PORTAL 63 */ +/* reserved for Cray DVS - spitzcor@cray.com, roe@cray.com, n8851@cray.com */ /** * Describes a range of sequence, lsr_start is included but lsr_end is @@ -193,12 +173,14 @@ extern void lustre_loa_init(struct lustre_ost_attrs *loa, const struct lu_fid *fid, __u32 compat, __u32 incompat); -/* copytool uses a 32b bitmask field to encode archive-Ids during register - * with MDT thru kuc. +/* copytool can use any nonnegative integer to represent archive-Ids during + * register with MDT thru kuc. * archive num = 0 => all - * archive num from 1 to 32 + * archive num from 1 to MAX_U32 */ -#define LL_HSM_MAX_ARCHIVE (sizeof(__u32) * 8) +#define LL_HSM_ORIGIN_MAX_ARCHIVE (sizeof(__u32) * 8) +/* the max count of archive ids that one agent can support */ +#define LL_HSM_MAX_ARCHIVES_PER_AGENT 1024 /** * HSM on-disk attributes stored in a separate xattr. @@ -404,6 +386,23 @@ struct lu_orphan_ent_v2 { struct lu_orphan_rec_v2 loe_rec; }; +struct lu_orphan_rec_v3 { + struct lu_orphan_rec lor_rec; + struct ost_layout lor_layout; + /* The OST-object declared layout version in PFID EA.*/ + __u32 lor_layout_version; + /* The OST-object declared layout range (of version) in PFID EA.*/ + __u32 lor_range; + __u32 lor_padding_1; + __u64 lor_padding_2; +}; + +struct lu_orphan_ent_v3 { + /* The orphan OST-object's FID */ + struct lu_fid loe_key; + struct lu_orphan_rec_v3 loe_rec; +}; + /** @} lu_fid */ /** \defgroup lu_dir lu_dir @@ -535,12 +534,15 @@ static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr) if (attr & LUDA_TYPE) { const size_t align = sizeof(struct luda_type) - 1; - size = (sizeof(struct lu_dirent) + namelen + align) & ~align; - size += sizeof(struct luda_type); - } else - size = sizeof(struct lu_dirent) + namelen; - return (size + 7) & ~7; + size = (sizeof(struct lu_dirent) + namelen + 1 + align) & + ~align; + size += sizeof(struct luda_type); + } else { + size = sizeof(struct lu_dirent) + namelen + 1; + } + + return (size + 7) & ~7; } #define MDS_DIR_END_OFF 0xfffffffffffffffeULL @@ -584,59 +586,109 @@ static inline void lustre_handle_copy(struct lustre_handle *tgt, tgt->cookie = src->cookie; } -struct lustre_handle_array { - unsigned int count; - struct lustre_handle handles[0]; +/* lustre_msg struct magic. DON'T use swabbed values of MAGIC as magic! */ +enum lustre_msg_magic { + LUSTRE_MSG_MAGIC_V2 = 0x0BD00BD3, + LUSTRE_MSG_MAGIC_V2_SWABBED = 0xD30BD00B, + LUSTRE_MSG_MAGIC = LUSTRE_MSG_MAGIC_V2 }; /* flags for lm_flags */ -#define MSGHDR_AT_SUPPORT 0x1 -#define MSGHDR_CKSUM_INCOMPAT18 0x2 +enum lustre_msghdr { + MSGHDR_AT_SUPPORT = 0x1, /* adaptive timeouts, lm_cksum valid + * in early reply messages */ + MSGHDR_CKSUM_INCOMPAT18 = 0x2, /* compat for 1.8, needs to be set well + * beyond 2.8.0 for compatibility */ +}; #define lustre_msg lustre_msg_v2 /* we depend on this structure to be 8-byte aligned */ /* this type is only endian-adjusted in lustre_unpack_msg() */ struct lustre_msg_v2 { - __u32 lm_bufcount; - __u32 lm_secflvr; - __u32 lm_magic; - __u32 lm_repsize; - __u32 lm_cksum; - __u32 lm_flags; - __u32 lm_padding_2; - __u32 lm_padding_3; - __u32 lm_buflens[0]; -}; - -/* without gss, ptlrpc_body is put at the first buffer. */ + __u32 lm_bufcount; /* number of buffers in lm_buflens[] */ + __u32 lm_secflvr; /* 0 = no crypto, or sptlrpc security flavour */ + __u32 lm_magic; /* RPC version magic = LUSTRE_MSG_MAGIC_V2 */ + __u32 lm_repsize; /* size of preallocated reply buffer */ + __u32 lm_cksum; /* CRC32 of ptlrpc_body early reply messages */ + __u32 lm_flags; /* enum lustre_msghdr MSGHDR_* flags */ + __u32 lm_padding_2; /* unused */ + __u32 lm_padding_3; /* unused */ + __u32 lm_buflens[0]; /* length of additional buffers in bytes, + * padded to a multiple of 8 bytes. */ + /* + * message buffers are packed after padded lm_buflens[] array, + * padded to a multiple of 8 bytes each to align contents. + */ +}; + +/* ptlrpc_body packet pb_types */ +#define PTL_RPC_MSG_REQUEST 4711 /* normal RPC request message */ +#define PTL_RPC_MSG_ERR 4712 /* error reply if request unprocessed */ +#define PTL_RPC_MSG_REPLY 4713 /* normal RPC reply message */ + +/* ptlrpc_body pb_version ((target_version << 16) | rpc_version) */ +enum lustre_msg_version { + PTLRPC_MSG_VERSION = 0x00000003, + LUSTRE_VERSION_MASK = 0xffff0000, + LUSTRE_OBD_VERSION = 0x00010000, + LUSTRE_MDS_VERSION = 0x00020000, + LUSTRE_OST_VERSION = 0x00030000, + LUSTRE_DLM_VERSION = 0x00040000, + LUSTRE_LOG_VERSION = 0x00050000, + LUSTRE_MGS_VERSION = 0x00060000, +}; + +/* pb_flags that apply to all request messages */ +/* #define MSG_LAST_REPLAY 0x0001 obsolete 2.0 => {REQ,LOCK}_REPLAY_DONE */ +#define MSG_RESENT 0x0002 /* was previously sent, no reply seen */ +#define MSG_REPLAY 0x0004 /* was processed, got reply, recovery */ +/* #define MSG_AT_SUPPORT 0x0008 obsolete since 1.5, AT always enabled */ +/* #define MSG_DELAY_REPLAY 0x0010 obsolete since 2.0 */ +/* #define MSG_VERSION_REPLAY 0x0020 obsolete since 1.8.2, VBR always on */ +#define MSG_REQ_REPLAY_DONE 0x0040 /* request replay over, locks next */ +#define MSG_LOCK_REPLAY_DONE 0x0080 /* lock replay over, client done */ + +/* pb_op_flags for connect opcodes: MDS_CONNECT, OST_CONNECT, MGS_CONNECT */ +#define MSG_CONNECT_RECOVERING 0x00000001 /* target is in recovery */ +#define MSG_CONNECT_RECONNECT 0x00000002 /* tgt already has client import */ +#define MSG_CONNECT_REPLAYABLE 0x00000004 /* target supports RPC replay */ +/* #define MSG_CONNECT_PEER 0x00000008 obsolete since 1.2, removed in 1.5 */ +#define MSG_CONNECT_LIBCLIENT 0x00000010 /* obsolete since 2.3, removed 2.6 */ +#define MSG_CONNECT_INITIAL 0x00000020 /* first client connection attempt */ +/* #define MSG_CONNECT_ASYNC 0x00000040 obsolete since 1.5 */ +#define MSG_CONNECT_NEXT_VER 0x00000080 /* use next version of lustre_msg */ +#define MSG_CONNECT_TRANSNO 0x00000100 /* client sent transno in replay */ + +/* number of previous object versions in pb_pre_versions[] */ #define PTLRPC_NUM_VERSIONS 4 +/* without gss, ptlrpc_body is put at the first buffer. */ struct ptlrpc_body_v3 { struct lustre_handle pb_handle; - __u32 pb_type; - __u32 pb_version; - __u32 pb_opc; - __u32 pb_status; - __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */ - __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */ + __u32 pb_type; /* request/reply/err type: PTL_RPC_MSG_* */ + __u32 pb_version; /* LUSTRE_*_VERSION | PTLRPC_MSG_VERSION */ + __u32 pb_opc; /* RPC opcodes: MDS_*, OST_*, LDLM_, ... */ + __u32 pb_status; /* negative Linux x86 error number */ + __u64 pb_last_xid; /* highest replied XID w/o lower unreplied XID*/ + __u16 pb_tag; /* multiple modifying RPCs virtual slot index */ __u16 pb_padding0; __u32 pb_padding1; - __u64 pb_last_committed; - __u64 pb_transno; - __u32 pb_flags; - __u32 pb_op_flags; - __u32 pb_conn_cnt; - __u32 pb_timeout; /* for req, the deadline, for rep, the service est */ - __u32 pb_service_time; /* for rep, actual service time */ - __u32 pb_limit; - __u64 pb_slv; - /* VBR: pre-versions */ + __u64 pb_last_committed;/* rep: highest pb_transno committed to disk */ + __u64 pb_transno; /* server-assigned transno for modifying RPCs */ + __u32 pb_flags; /* req: MSG_* flags */ + __u32 pb_op_flags; /* req: MSG_CONNECT_* flags */ + __u32 pb_conn_cnt; /* connect instance of this client on server */ + __u32 pb_timeout; /* req: max wait time; rep: service estimate */ + __u32 pb_service_time; /* rep: server arrival to reply in seconds */ + __u32 pb_limit; /* rep: dynamic DLM LRU lock count limit */ + __u64 pb_slv; /* rep: dynamic DLM LRU server lock volume */ + /* VBR: rep: previous pb_version(s) of objects modified by this RPC */ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS]; __u64 pb_mbits; /**< match bits for bulk request */ - /* padding for future needs */ + /* padding for future needs - fix lustre_swab_ptlrpc_body() also */ __u64 pb_padding64_0; __u64 pb_padding64_1; __u64 pb_padding64_2; - char pb_jobid[LUSTRE_JOBID_SIZE]; + char pb_jobid[LUSTRE_JOBID_SIZE]; /* req: ASCII jobid from env + NUL */ }; #define ptlrpc_body ptlrpc_body_v3 @@ -692,38 +744,6 @@ struct ptlrpc_body_v2 { /** only use in req->rq_{req,rep}_swab_mask */ #define MSG_PTLRPC_HEADER_OFF 31 -/* Flags that are operation-specific go in the top 16 bits. */ -#define MSG_OP_FLAG_MASK 0xffff0000 -#define MSG_OP_FLAG_SHIFT 16 - -/* Flags that apply to all requests are in the bottom 16 bits */ -#define MSG_GEN_FLAG_MASK 0x0000ffff -#define MSG_LAST_REPLAY 0x0001 -#define MSG_RESENT 0x0002 -#define MSG_REPLAY 0x0004 -/* #define MSG_AT_SUPPORT 0x0008 - * This was used in early prototypes of adaptive timeouts, and while there - * shouldn't be any users of that code there also isn't a need for using this - * bits. Defer usage until at least 1.10 to avoid potential conflict. */ -#define MSG_DELAY_REPLAY 0x0010 -#define MSG_VERSION_REPLAY 0x0020 -#define MSG_REQ_REPLAY_DONE 0x0040 -#define MSG_LOCK_REPLAY_DONE 0x0080 - -/* - * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT) - */ - -#define MSG_CONNECT_RECOVERING 0x00000001 -#define MSG_CONNECT_RECONNECT 0x00000002 -#define MSG_CONNECT_REPLAYABLE 0x00000004 -/* #define MSG_CONNECT_PEER 0x00000008 removed 1.5 */ -#define MSG_CONNECT_LIBCLIENT 0x00000010 -#define MSG_CONNECT_INITIAL 0x00000020 -#define MSG_CONNECT_ASYNC 0x00000040 -#define MSG_CONNECT_NEXT_VER 0x00000080 /* use next version of lustre_msg */ -#define MSG_CONNECT_TRANSNO 0x00000100 /* report transno */ - /* Connect flags */ #define OBD_CONNECT_RDONLY 0x1ULL /*client has read-only access*/ #define OBD_CONNECT_INDEX 0x2ULL /*connect specific LOV idx */ @@ -805,17 +825,31 @@ struct ptlrpc_body_v2 { #define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */ #define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */ /* ocd_connect_flags2 flags */ -#define OBD_CONNECT2_FILE_SECCTX 0x1ULL /* set file security context at create */ -#define OBD_CONNECT2_LOCKAHEAD 0x2ULL /* ladvise lockahead v2 */ - +#define OBD_CONNECT2_FILE_SECCTX 0x1ULL /* set file security context at create */ +#define OBD_CONNECT2_LOCKAHEAD 0x2ULL /* ladvise lockahead v2 */ +#define OBD_CONNECT2_DIR_MIGRATE 0x4ULL /* migrate striped dir */ +#define OBD_CONNECT2_SUM_STATFS 0x8ULL /* MDT return aggregated stats */ +#define OBD_CONNECT2_OVERSTRIPING 0x10ULL /* OST overstriping support */ +#define OBD_CONNECT2_FLR 0x20ULL /* FLR support */ +#define OBD_CONNECT2_WBC_INTENTS 0x40ULL /* create/unlink/... intents for wbc, also operations under client-held parent locks */ +#define OBD_CONNECT2_LOCK_CONVERT 0x80ULL /* IBITS lock convert support */ +#define OBD_CONNECT2_ARCHIVE_ID_ARRAY 0x100ULL /* store HSM archive_id in array */ +#define OBD_CONNECT2_INC_XID 0x200ULL /* Increasing xid */ +#define OBD_CONNECT2_SELINUX_POLICY 0x400ULL /* has client SELinux policy */ +#define OBD_CONNECT2_LSOM 0x800ULL /* LSOM support */ +#define OBD_CONNECT2_PCC 0x1000ULL /* Persistent Client Cache */ +#define OBD_CONNECT2_CRUSH 0x2000ULL /* crush hash striped directory */ +#define OBD_CONNECT2_ASYNC_DISCARD 0x4000ULL /* support async DoM data discard */ +#define OBD_CONNECT2_ENCRYPT 0x8000ULL /* client-to-disk encrypt */ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same * flag value is not in use on some other branch. Please clear any such * changes with senior engineers before starting to use a new flag. Then, * submit a small patch against EVERY branch that ONLY adds the new flag, - * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the - * flag to check_obd_connect_data(), and updates wiretests accordingly, so it - * can be approved and landed easily to reserve the flag for future use. */ + * updates obd_connect_names[], adds the flag to check_obd_connect_data(), + * and updates wiretests accordingly, so it can be approved and landed easily + * to reserve the flag for future use. + */ /* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS * connection. It is a temporary bug fix for Imperative Recovery interop @@ -855,9 +889,20 @@ struct ptlrpc_body_v2 { OBD_CONNECT_MULTIMODRPCS | \ OBD_CONNECT_SUBTREE | OBD_CONNECT_LARGE_ACL | \ OBD_CONNECT_GRANT_PARAM | \ - OBD_CONNECT_FLAGS2) - -#define MDT_CONNECT_SUPPORTED2 OBD_CONNECT2_FILE_SECCTX + OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2) + +#define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | \ + OBD_CONNECT2_DIR_MIGRATE | \ + OBD_CONNECT2_SUM_STATFS | \ + OBD_CONNECT2_OVERSTRIPING | \ + OBD_CONNECT2_FLR |\ + OBD_CONNECT2_LOCK_CONVERT | \ + OBD_CONNECT2_ARCHIVE_ID_ARRAY | \ + OBD_CONNECT2_INC_XID | \ + OBD_CONNECT2_SELINUX_POLICY | \ + OBD_CONNECT2_LSOM | \ + OBD_CONNECT2_ASYNC_DISCARD | \ + OBD_CONNECT2_PCC) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ @@ -875,11 +920,12 @@ struct ptlrpc_body_v2 { OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_FID | \ OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK | \ OBD_CONNECT_BULK_MBITS | \ - OBD_CONNECT_GRANT_PARAM | OBD_CONNECT_FLAGS2) + OBD_CONNECT_GRANT_PARAM | \ + OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2) -#define OST_CONNECT_SUPPORTED2 OBD_CONNECT2_LOCKAHEAD +#define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID) -#define ECHO_CONNECT_SUPPORTED 0 +#define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID) #define ECHO_CONNECT_SUPPORTED2 0 #define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \ @@ -891,6 +937,7 @@ struct ptlrpc_body_v2 { /* Features required for this version of the client to work with server */ #define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_FID | \ + OBD_CONNECT_ATTRFID | \ OBD_CONNECT_FULL20) /* This structure is used for both request and reply. @@ -947,21 +994,43 @@ struct obd_connect_data { /* * Supported checksum algorithms. Up to 32 checksum types are supported. * (32-bit mask stored in obd_connect_data::ocd_cksum_types) - * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new - * algorithm and also the OBD_FL_CKSUM* flags. + * Please update DECLARE_CKSUM_NAME in obd_cksum.h when adding a new + * algorithm and also the OBD_FL_CKSUM* flags, OBD_CKSUM_ALL flag, + * OBD_FL_CKSUM_ALL flag and potentially OBD_CKSUM_T10_ALL flag. + */ +enum cksum_types { + OBD_CKSUM_CRC32 = 0x00000001, + OBD_CKSUM_ADLER = 0x00000002, + OBD_CKSUM_CRC32C = 0x00000004, + OBD_CKSUM_RESERVED = 0x00000008, + OBD_CKSUM_T10IP512 = 0x00000010, + OBD_CKSUM_T10IP4K = 0x00000020, + OBD_CKSUM_T10CRC512 = 0x00000040, + OBD_CKSUM_T10CRC4K = 0x00000080, +}; + +#define OBD_CKSUM_T10_ALL (OBD_CKSUM_T10IP512 | OBD_CKSUM_T10IP4K | \ + OBD_CKSUM_T10CRC512 | OBD_CKSUM_T10CRC4K) + +#define OBD_CKSUM_ALL (OBD_CKSUM_CRC32 | OBD_CKSUM_ADLER | OBD_CKSUM_CRC32C | \ + OBD_CKSUM_T10_ALL) + +/* + * The default checksum algorithm used on top of T10PI GRD tags for RPC. + * Considering that the checksum-of-checksums is only computing CRC32 on a + * 4KB chunk of GRD tags for a 1MB RPC for 512B sectors, or 16KB of GRD + * tags for 16MB of 4KB sectors, this is only 1/256 or 1/1024 of the + * total data being checksummed, so the checksum type used here should not + * affect overall system performance noticeably. */ -typedef enum cksum_types { - OBD_CKSUM_CRC32 = 0x00000001, - OBD_CKSUM_ADLER = 0x00000002, - OBD_CKSUM_CRC32C= 0x00000004, -} cksum_type_t; +#define OBD_CKSUM_T10_TOP OBD_CKSUM_ADLER /* * OST requests: OBDO & OBD request records */ /* opcodes */ -typedef enum { +enum ost_cmd { OST_REPLY = 0, /* reply ? */ OST_GETATTR = 1, OST_SETATTR = 2, @@ -979,11 +1048,12 @@ typedef enum { OST_SYNC = 16, OST_SET_INFO = 17, OST_QUOTACHECK = 18, /* not used since 2.4 */ - OST_QUOTACTL = 19, + OST_QUOTACTL = 19, OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */ OST_LADVISE = 21, + OST_FALLOCATE = 22, OST_LAST_OPC /* must be < 33 to avoid MDS_GETATTR */ -} ost_cmd_t; +}; #define OST_FIRST_OPC OST_REPLY enum obdo_flags { @@ -1000,13 +1070,16 @@ enum obdo_flags { OBD_FL_NO_GRPQUOTA = 0x00000200, /* the object's group is over quota */ OBD_FL_CREATE_CROW = 0x00000400, /* object should be create on write */ OBD_FL_SRVLOCK = 0x00000800, /* delegate DLM locking to server */ - OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */ - OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */ - OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */ - OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */ - OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ - OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ - OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client. + OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */ + OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */ + OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */ + OBD_FL_CKSUM_T10IP512 = 0x00005000, /* T10PI IP cksum, 512B sector */ + OBD_FL_CKSUM_T10IP4K = 0x00006000, /* T10PI IP cksum, 4KB sector */ + OBD_FL_CKSUM_T10CRC512 = 0x00007000, /* T10PI CRC cksum, 512B sector */ + OBD_FL_CKSUM_T10CRC4K = 0x00008000, /* T10PI CRC cksum, 4KB sector */ + OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ + OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ + OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client. * XXX: obsoleted - reserved for old * clients prior than 2.2 */ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ @@ -1015,10 +1088,18 @@ enum obdo_flags { OBD_FL_SHORT_IO = 0x00400000, /* short io request */ /* OBD_FL_LOCAL_MASK = 0xF0000000, was local-only flags until 2.10 */ - /* Note that while these checksum values are currently separate bits, - * in 2.x we can actually allow all values from 1-31 if we wanted. */ + /* + * Note that while the original checksum values were separate bits, + * in 2.x we can actually allow all values from 1-31. T10-PI checksum + * types already use values which are not separate bits. + */ OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER | - OBD_FL_CKSUM_CRC32C, + OBD_FL_CKSUM_CRC32C | OBD_FL_CKSUM_T10IP512 | + OBD_FL_CKSUM_T10IP4K | OBD_FL_CKSUM_T10CRC512 | + OBD_FL_CKSUM_T10CRC4K, + + OBD_FL_NO_QUOTA_ALL = OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA | + OBD_FL_NO_PRJQUOTA, }; /* @@ -1039,6 +1120,8 @@ enum obdo_flags { #define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC) #define LOV_MAGIC LOV_MAGIC_V1 #define LOV_MAGIC_COMP_V1 (0x0BD60000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC_FOREIGN (0x0BD70000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC_SEL (0x0BD80000 | LOV_MAGIC_MAGIC) /* * magic for fully defined striping @@ -1082,7 +1165,11 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ }; -#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) +#define MAX_MD_SIZE_OLD (sizeof(struct lov_mds_md) + \ + 4 * sizeof(struct lov_ost_data)) +#define MAX_MD_SIZE (sizeof(struct lov_comp_md_v1) + \ + 4 * (sizeof(struct lov_comp_md_entry_v1) + \ + MAX_MD_SIZE_OLD)) #define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data)) /* This is the default MDT reply size allocated, should the striping be bigger, @@ -1097,6 +1184,7 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define XATTR_TRUSTED_PREFIX "trusted." #define XATTR_SECURITY_PREFIX "security." +#define XATTR_NAME_SOM "trusted.som" #define XATTR_NAME_LOV "trusted.lov" #define XATTR_NAME_LMA "trusted.lma" #define XATTR_NAME_LMV "trusted.lmv" @@ -1176,19 +1264,19 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLFLAGS (0x00000800ULL) /* flags word */ #define OBD_MD_DOM_SIZE (0X00001000ULL) /* Data-on-MDT component size */ #define OBD_MD_FLNLINK (0x00002000ULL) /* link count */ -#define OBD_MD_FLGENER (0x00004000ULL) /* generation number */ -/*#define OBD_MD_FLINLINE (0x00008000ULL) inline data. used until 1.6.5 */ +#define OBD_MD_FLPARENT (0x00004000ULL) /* parent FID */ +#define OBD_MD_LAYOUT_VERSION (0x00008000ULL) /* OST object layout version */ #define OBD_MD_FLRDEV (0x00010000ULL) /* device number */ #define OBD_MD_FLEASIZE (0x00020000ULL) /* extended attribute data */ #define OBD_MD_LINKNAME (0x00040000ULL) /* symbolic link target */ #define OBD_MD_FLHANDLE (0x00080000ULL) /* file/lock handle */ #define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */ -#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */ -/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */ +/* OBD_MD_FLQOS (0x00200000ULL) has never been used */ +/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */ #define OBD_MD_FLPRJQUOTA (0x00400000ULL) /* over quota flags sent from ost */ #define OBD_MD_FLGROUP (0x01000000ULL) /* group */ #define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ -#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */ +/* OBD_MD_FLEPOCH (0x04000000ULL) obsolete 2.7.50 */ /* ->mds if epoch opens or closes */ #define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */ #define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */ @@ -1197,7 +1285,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */ #define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */ -#define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */ +/* OBD_MD_REINT (0x0000000200000000ULL) obsolete 1.8 */ #define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */ #define OBD_MD_TSTATE (0x0000000800000000ULL) /* transient state field */ @@ -1205,10 +1293,10 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */ #define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ #define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ -/* OBD_MD_FLRMTPERM (0x0000010000000000ULL) remote perm, obsolete */ -#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */ -#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */ -#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */ +#define OBD_MD_FLAGSTATFS (0x0000010000000000ULL) /* aggregated statfs */ +/* OBD_MD_FLMDSCAPA (0x0000020000000000ULL) obsolete 2.7.54 */ +/* OBD_MD_FLOSSCAPA (0x0000040000000000ULL) obsolete 2.7.54 */ +/* OBD_MD_FLCKSPLIT (0x0000080000000000ULL) obsolete 2.3.58*/ #define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */ #define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes * under lock; for xattr @@ -1223,6 +1311,10 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */ #define OBD_MD_FLOSTLAYOUT (0x0080000000000000ULL) /* contain ost_layout */ #define OBD_MD_FLPROJID (0x0100000000000000ULL) /* project ID */ +#define OBD_MD_SECCTX (0x0200000000000000ULL) /* embed security xattr */ + +#define OBD_MD_FLLAZYSIZE (0x0400000000000000ULL) /* Lazy size */ +#define OBD_MD_FLLAZYBLOCKS (0x0800000000000000ULL) /* Lazy blocks */ #define OBD_MD_FLALLQUOTA (OBD_MD_FLUSRQUOTA | \ OBD_MD_FLGRPQUOTA | \ @@ -1232,7 +1324,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \ OBD_MD_FLMODE | OBD_MD_FLTYPE | OBD_MD_FLUID | \ OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \ - OBD_MD_FLGENER | OBD_MD_FLRDEV | OBD_MD_FLGROUP | \ + OBD_MD_FLPARENT | OBD_MD_FLRDEV | OBD_MD_FLGROUP | \ OBD_MD_FLPROJID) #define OBD_MD_FLXATTRALL (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS) @@ -1258,12 +1350,16 @@ struct hsm_state_set { #define OBD_BRW_READ 0x01 #define OBD_BRW_WRITE 0x02 #define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE) +#define OBD_BRW_NDELAY 0x04 /* Non-delay RPC should be issued for + * this page. Non-delay RPCs have bit + * rq_no_delay set. */ #define OBD_BRW_SYNC 0x08 /* this page is a part of synchronous * transfer and is not accounted in * the grant. */ #define OBD_BRW_CHECK 0x10 #define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */ #define OBD_BRW_GRANTED 0x40 /* the ost manages this */ +/* OBD_BRW_NOCACHE is currently neither set nor tested */ #define OBD_BRW_NOCACHE 0x80 /* this page is a part of non-cached IO */ #define OBD_BRW_NOQUOTA 0x100 #define OBD_BRW_SRVLOCK 0x200 /* Client holds no lock over this page */ @@ -1276,6 +1372,7 @@ struct hsm_state_set { * space for unstable pages; asking * it to sync quickly */ #define OBD_BRW_OVER_PRJQUOTA 0x8000 /* Running out of project quota */ +#define OBD_BRW_RDMA_ONLY 0x20000 /* RPC contains RDMA-only pages*/ #define OBD_BRW_OVER_ALLQUOTA (OBD_BRW_OVER_USRQUOTA | \ OBD_BRW_OVER_GRPQUOTA | \ @@ -1288,6 +1385,8 @@ struct hsm_state_set { #define OBD_BRW_LOCALS (OBD_BRW_LOCAL1) +#define OBD_MAX_GRANT 0x7fffffffUL /* Max grant allowed to one client: 2 GiB */ + #define OBD_OBJECT_EOF LUSTRE_EOF #define OST_MIN_PRECREATE 32 @@ -1510,11 +1609,11 @@ struct lquota_lvb { #define lvb_glb_ver lvb_id_may_rel /* current version of the global index */ /* op codes */ -typedef enum { +enum quota_cmd { QUOTA_DQACQ = 601, QUOTA_DQREL = 602, QUOTA_LAST_OPC -} quota_cmd_t; +}; #define QUOTA_FIRST_OPC QUOTA_DQACQ /* @@ -1522,7 +1621,7 @@ typedef enum { */ /* opcodes */ -typedef enum { +enum mds_cmd { MDS_GETATTR = 33, MDS_GETATTR_NAME = 34, MDS_CLOSE = 35, @@ -1552,17 +1651,18 @@ typedef enum { MDS_HSM_CT_REGISTER = 59, MDS_HSM_CT_UNREGISTER = 60, MDS_SWAP_LAYOUTS = 61, + MDS_RMFID = 62, MDS_LAST_OPC -} mds_cmd_t; +}; #define MDS_FIRST_OPC MDS_GETATTR /* opcodes for object update */ -typedef enum { +enum update_cmd { OUT_UPDATE = 1000, OUT_UPDATE_LAST_OPC -} update_cmd_t; +}; #define OUT_UPDATE_FIRST_OPC OUT_UPDATE @@ -1580,7 +1680,8 @@ enum mds_reint_op { REINT_SETXATTR = 7, REINT_RMENTRY = 8, REINT_MIGRATE = 9, - REINT_MAX + REINT_RESYNC = 10, + REINT_MAX }; /* the disposition of the intent outlines what was executed */ @@ -1599,29 +1700,31 @@ enum mds_reint_op { #define DISP_OPEN_DENY 0x10000000 /* INODE LOCK PARTS */ -#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also - * was used to protect permission (mode, - * owner, group etc) before 2.4. */ -#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */ -#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */ -#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */ - -/* The PERM bit is added int 2.4, and it is used to protect permission(mode, - * owner, group, acl etc), so to separate the permission from LOOKUP lock. - * Because for remote directories(in DNE), these locks will be granted by - * different MDTs(different ldlm namespace). - * - * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together. - * For Remote directory, the master MDT, where the remote directory is, will - * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is, - * will grant LOOKUP_LOCK. */ -#define MDS_INODELOCK_PERM 0x000010 -#define MDS_INODELOCK_XATTR 0x000020 /* extended attributes */ -#define MDS_INODELOCK_DOM 0x000040 /* Data for data-on-mdt files */ - -#define MDS_INODELOCK_MAXSHIFT 6 +enum mds_ibits_locks { + MDS_INODELOCK_LOOKUP = 0x000001, /* For namespace, dentry etc. Was + * used to protect permission (mode, + * owner, group, etc) before 2.4. */ + MDS_INODELOCK_UPDATE = 0x000002, /* size, links, timestamps */ + MDS_INODELOCK_OPEN = 0x000004, /* For opened files */ + MDS_INODELOCK_LAYOUT = 0x000008, /* for layout */ + + /* The PERM bit is added in 2.4, and is used to protect permission + * (mode, owner, group, ACL, etc.) separate from LOOKUP lock. + * For remote directories (in DNE) these locks will be granted by + * different MDTs (different LDLM namespace). + * + * For local directory, the MDT always grants UPDATE|PERM together. + * For remote directory, master MDT (where remote directory is) grants + * UPDATE|PERM, and remote MDT (where name entry is) grants LOOKUP_LOCK. + */ + MDS_INODELOCK_PERM = 0x000010, + MDS_INODELOCK_XATTR = 0x000020, /* non-permission extended attrs */ + MDS_INODELOCK_DOM = 0x000040, /* Data for Data-on-MDT files */ + /* Do not forget to increase MDS_INODELOCK_NUMBITS when adding bits */ +}; +#define MDS_INODELOCK_NUMBITS 7 /* This FULL lock is useful to take on unlink sort of operations */ -#define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1) +#define MDS_INODELOCK_FULL ((1 << MDS_INODELOCK_NUMBITS) - 1) /* DOM lock shouldn't be canceled early, use this macro for ELC */ #define MDS_INODELOCK_ELC (MDS_INODELOCK_FULL & ~MDS_INODELOCK_DOM) @@ -1643,17 +1746,16 @@ enum { enum { /* these should be identical to their EXT4_*_FL counterparts, they are * redefined here only to avoid dragging in fs/ext4/ext4.h */ - LUSTRE_SYNC_FL = 0x00000008, /* Synchronous updates */ - LUSTRE_IMMUTABLE_FL = 0x00000010, /* Immutable file */ - LUSTRE_APPEND_FL = 0x00000020, /* writes to file may only append */ - LUSTRE_NODUMP_FL = 0x00000040, /* do not dump file */ - LUSTRE_NOATIME_FL = 0x00000080, /* do not update atime */ - LUSTRE_INDEX_FL = 0x00001000, /* hash-indexed directory */ - LUSTRE_DIRSYNC_FL = 0x00010000, /* dirsync behaviour (dir only) */ - LUSTRE_TOPDIR_FL = 0x00020000, /* Top of directory hierarchies*/ - LUSTRE_DIRECTIO_FL = 0x00100000, /* Use direct i/o */ - LUSTRE_INLINE_DATA_FL = 0x10000000, /* Inode has inline data. */ - LUSTRE_PROJINHERIT_FL = 0x20000000, /* Create with parents projid */ + LUSTRE_SYNC_FL = 0x00000008, /* Synchronous updates */ + LUSTRE_IMMUTABLE_FL = 0x00000010, /* Immutable file */ + LUSTRE_APPEND_FL = 0x00000020, /* file writes may only append */ + LUSTRE_NODUMP_FL = 0x00000040, /* do not dump file */ + LUSTRE_NOATIME_FL = 0x00000080, /* do not update atime */ + LUSTRE_INDEX_FL = 0x00001000, /* hash-indexed directory */ + LUSTRE_DIRSYNC_FL = 0x00010000, /* dirsync behaviour (dir only) */ + LUSTRE_TOPDIR_FL = 0x00020000, /* Top of directory hierarchies*/ + LUSTRE_INLINE_DATA_FL = 0x10000000, /* Inode has inline data. */ + LUSTRE_PROJINHERIT_FL = 0x20000000, /* Create with parents projid */ /* These flags will not be identical to any EXT4_*_FL counterparts, * and only reserved for lustre purpose. Note: these flags might @@ -1662,11 +1764,24 @@ enum { * wired by la_flags see osd_attr_get(). * 2. If these flags needs to be stored into inode, they will be * stored in LMA. see LMAI_XXXX */ - LUSTRE_ORPHAN_FL = 0x00002000, + LUSTRE_ORPHAN_FL = 0x00002000, + LUSTRE_SET_SYNC_FL = 0x00040000, /* Synchronous setattr on OSTs */ - LUSTRE_LMA_FL_MASKS = LUSTRE_ORPHAN_FL, + LUSTRE_LMA_FL_MASKS = LUSTRE_ORPHAN_FL, }; +#ifndef FS_XFLAG_SYNC +#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ +#endif +#ifndef FS_XFLAG_NOATIME +#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ +#endif +#ifndef FS_XFLAG_IMMUTABLE +#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ +#endif +#ifndef FS_XFLAG_APPEND +#define FS_XFLAG_APPEND 0x00000010 /* all writes append */ +#endif #ifndef FS_XFLAG_PROJINHERIT #define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ #endif @@ -1686,8 +1801,7 @@ static inline int ll_ext_to_inode_flags(int flags) #if defined(S_DIRSYNC) ((flags & LUSTRE_DIRSYNC_FL) ? S_DIRSYNC : 0) | #endif - ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0) | - ((flags & LUSTRE_PROJINHERIT_FL) ? FS_XFLAG_PROJINHERIT : 0)); + ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0)); } static inline int ll_inode_to_ext_flags(int iflags) @@ -1698,8 +1812,23 @@ static inline int ll_inode_to_ext_flags(int iflags) #if defined(S_DIRSYNC) ((iflags & S_DIRSYNC) ? LUSTRE_DIRSYNC_FL : 0) | #endif - ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0) | - ((iflags & FS_XFLAG_PROJINHERIT) ? LUSTRE_PROJINHERIT_FL : 0)); + ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0)); +} + +static inline int ll_xflags_to_inode_flags(int xflags) +{ + return ((xflags & FS_XFLAG_SYNC) ? S_SYNC : 0) | + ((xflags & FS_XFLAG_NOATIME) ? S_NOATIME : 0) | + ((xflags & FS_XFLAG_APPEND) ? S_APPEND : 0) | + ((xflags & FS_XFLAG_IMMUTABLE) ? S_IMMUTABLE : 0); +} + +static inline int ll_inode_flags_to_xflags(int flags) +{ + return ((flags & S_SYNC) ? FS_XFLAG_SYNC : 0) | + ((flags & S_NOATIME) ? FS_XFLAG_NOATIME : 0) | + ((flags & S_APPEND) ? FS_XFLAG_APPEND : 0) | + ((flags & S_IMMUTABLE) ? FS_XFLAG_IMMUTABLE : 0); } #endif @@ -1711,14 +1840,14 @@ enum md_transient_state { struct mdt_body { struct lu_fid mbo_fid1; struct lu_fid mbo_fid2; - struct lustre_handle mbo_handle; + struct lustre_handle mbo_open_handle; __u64 mbo_valid; __u64 mbo_size; /* Offset, in the case of MDS_READPAGE */ __s64 mbo_mtime; __s64 mbo_atime; __s64 mbo_ctime; __u64 mbo_blocks; /* XID, in the case of MDS_READPAGE */ - __u64 mbo_ioepoch; + __u64 mbo_version; /* was mbo_ioepoch before 2.11 */ __u64 mbo_t_state; /* transient file state defined in * enum md_transient_state * was "ino" until 2.4.0 */ @@ -1731,7 +1860,7 @@ struct mdt_body { __u32 mbo_flags; /* LUSTRE_*_FL file attributes */ __u32 mbo_rdev; __u32 mbo_nlink; /* #bytes to read in the case of MDS_READPAGE */ - __u32 mbo_unused2; /* was "generation" until 2.4.0 */ + __u32 mbo_layout_gen; /* was "generation" until 2.4.0 */ __u32 mbo_suppgid; __u32 mbo_eadatasize; __u32 mbo_aclsize; @@ -1748,7 +1877,7 @@ struct mdt_body { }; /* 216 */ struct mdt_ioepoch { - struct lustre_handle mio_handle; + struct lustre_handle mio_open_handle; __u64 mio_unused1; /* was ioepoch */ __u32 mio_unused2; /* was flags */ __u32 mio_padding; @@ -1812,103 +1941,69 @@ struct mdt_rec_setattr { #define MDS_ATTR_FROM_OPEN 0x4000ULL /* = 16384, called from open path, ie O_TRUNC */ #define MDS_ATTR_BLOCKS 0x8000ULL /* = 32768 */ #define MDS_ATTR_PROJID 0x10000ULL /* = 65536 */ - -#ifndef FMODE_READ -#define FMODE_READ 00000001 -#define FMODE_WRITE 00000002 -#endif - -#define MDS_FMODE_CLOSED 00000000 -#define MDS_FMODE_EXEC 00000004 -/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */ -/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */ -/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */ - -#define MDS_OPEN_CREATED 00000010 -#define MDS_OPEN_CROSS 00000020 - -#define MDS_OPEN_CREAT 00000100 -#define MDS_OPEN_EXCL 00000200 -#define MDS_OPEN_TRUNC 00001000 -#define MDS_OPEN_APPEND 00002000 -#define MDS_OPEN_SYNC 00010000 -#define MDS_OPEN_DIRECTORY 00200000 - -#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */ -#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ -#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */ -#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file. - * We do not support JOIN FILE - * anymore, reserve this flags - * just for preventing such bit - * to be reused. */ - -#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */ -#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ -#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ -#define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */ -#define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or - * hsm restore) */ -#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created - unlinked */ -#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease - * delegation, succeed if it's not - * being opened with conflict mode. - */ -#define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */ - -/* lustre internal open flags, which should not be set from user space */ -#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \ - MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \ - MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \ - MDS_OPEN_RELEASE) +#define MDS_ATTR_LSIZE 0x20000ULL /* = 131072 */ +#define MDS_ATTR_LBLOCKS 0x40000ULL /* = 262144 */ +#define MDS_ATTR_OVERRIDE 0x2000000ULL /* = 33554432 */ enum mds_op_bias { - MDS_CHECK_SPLIT = 1 << 0, +/* MDS_CHECK_SPLIT = 1 << 0, obsolete before 2.3.58 */ MDS_CROSS_REF = 1 << 1, - MDS_VTX_BYPASS = 1 << 2, +/* MDS_VTX_BYPASS = 1 << 2, obsolete since 2.3.54 */ MDS_PERM_BYPASS = 1 << 3, /* MDS_SOM = 1 << 4, obsolete since 2.8.0 */ MDS_QUOTA_IGNORE = 1 << 5, - /* Was MDS_CLOSE_CLEANUP (1 << 6), No more used */ +/* MDS_CLOSE_CLEANUP = 1 << 6, obsolete since 2.3.51 */ MDS_KEEP_ORPHAN = 1 << 7, MDS_RECOV_OPEN = 1 << 8, MDS_DATA_MODIFIED = 1 << 9, MDS_CREATE_VOLATILE = 1 << 10, MDS_OWNEROVERRIDE = 1 << 11, MDS_HSM_RELEASE = 1 << 12, - MDS_RENAME_MIGRATE = 1 << 13, + MDS_CLOSE_MIGRATE = 1 << 13, MDS_CLOSE_LAYOUT_SWAP = 1 << 14, + MDS_CLOSE_LAYOUT_MERGE = 1 << 15, + MDS_CLOSE_RESYNC_DONE = 1 << 16, + MDS_CLOSE_LAYOUT_SPLIT = 1 << 17, + MDS_TRUNC_KEEP_LEASE = 1 << 18, + MDS_PCC_ATTACH = 1 << 19, + MDS_CLOSE_UPDATE_TIMES = 1 << 20, }; +#define MDS_CLOSE_INTENT (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP | \ + MDS_CLOSE_LAYOUT_MERGE | MDS_CLOSE_LAYOUT_SPLIT | \ + MDS_CLOSE_RESYNC_DONE) + /* instance of mdt_reint_rec */ struct mdt_rec_create { - __u32 cr_opcode; - __u32 cr_cap; - __u32 cr_fsuid; - __u32 cr_fsuid_h; - __u32 cr_fsgid; - __u32 cr_fsgid_h; - __u32 cr_suppgid1; - __u32 cr_suppgid1_h; - __u32 cr_suppgid2; - __u32 cr_suppgid2_h; - struct lu_fid cr_fid1; - struct lu_fid cr_fid2; - struct lustre_handle cr_old_handle; /* handle in case of open replay */ + __u32 cr_opcode; + __u32 cr_cap; + __u32 cr_fsuid; + __u32 cr_fsuid_h; + __u32 cr_fsgid; + __u32 cr_fsgid_h; + __u32 cr_suppgid1; + __u32 cr_suppgid1_h; + __u32 cr_suppgid2; + __u32 cr_suppgid2_h; + struct lu_fid cr_fid1; + struct lu_fid cr_fid2; + struct lustre_handle cr_open_handle_old; /* in case of open replay */ __s64 cr_time; - __u64 cr_rdev; - __u64 cr_ioepoch; - __u64 cr_padding_1; /* rr_blocks */ - __u32 cr_mode; - __u32 cr_bias; - /* use of helpers set/get_mrc_cr_flags() is needed to access - * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to - * extend cr_flags size without breaking 1.8 compat */ - __u32 cr_flags_l; /* for use with open, low 32 bits */ - __u32 cr_flags_h; /* for use with open, high 32 bits */ - __u32 cr_umask; /* umask for create */ - __u32 cr_padding_4; /* rr_padding_4 */ + union { + __u64 cr_rdev; + __u32 cr_archive_id; + }; + __u64 cr_ioepoch; + __u64 cr_padding_1; /* rr_blocks */ + __u32 cr_mode; + __u32 cr_bias; + /* use of helpers set/get_mrc_cr_flags() is needed to access + * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to + * extend cr_flags size without breaking 1.8 compat */ + __u32 cr_flags_l; /* for use with open, low 32 bits */ + __u32 cr_flags_h; /* for use with open, high 32 bits */ + __u32 cr_umask; /* umask for create */ + __u32 cr_padding_4; /* rr_padding_4 */ }; /* instance of mdt_reint_rec */ @@ -2021,6 +2116,35 @@ struct mdt_rec_setxattr { __u32 sx_padding_11; /* rr_padding_4 */ }; +/* instance of mdt_reint_rec + * FLR: for file resync MDS_REINT_RESYNC RPC. */ +struct mdt_rec_resync { + __u32 rs_opcode; + __u32 rs_cap; + __u32 rs_fsuid; + __u32 rs_fsuid_h; + __u32 rs_fsgid; + __u32 rs_fsgid_h; + __u32 rs_suppgid1; + __u32 rs_suppgid1_h; + __u32 rs_suppgid2; + __u32 rs_suppgid2_h; + struct lu_fid rs_fid; + __u8 rs_padding0[sizeof(struct lu_fid)]; + struct lustre_handle rs_lease_handle; /* rr_mtime */ + __s64 rs_padding1; /* rr_atime */ + __s64 rs_padding2; /* rr_ctime */ + __u64 rs_padding3; /* rr_size */ + __u64 rs_padding4; /* rr_blocks */ + __u32 rs_bias; + __u32 rs_padding5; /* rr_mode */ + __u32 rs_padding6; /* rr_flags */ + __u32 rs_padding7; /* rr_flags_h */ + __u32 rs_padding8; /* rr_umask */ + __u16 rs_mirror_id; + __u16 rs_padding9; /* rr_padding_4 */ +}; + /* * mdt_rec_reint is the template for all mdt_reint_xxx structures. * Do NOT change the size of various members, otherwise the value @@ -2052,9 +2176,12 @@ struct mdt_rec_reint { __u32 rr_flags; __u32 rr_flags_h; __u32 rr_umask; - __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ + __u16 rr_mirror_id; + __u16 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ }; +#define LMV_DESC_QOS_MAXAGE_DEFAULT 60 /* Seconds */ + /* lmv structures */ struct lmv_desc { __u32 ld_tgt_count; /* how many MDS's */ @@ -2083,41 +2210,36 @@ struct lmv_mds_md_v1 { * used for now. Higher 16 bits will * be used to mark the object status, * for example migrating or dead. */ - __u32 lmv_layout_version; /* Used for directory restriping */ - __u32 lmv_padding1; - __u64 lmv_padding2; + __u32 lmv_layout_version; /* increased each time layout changed, + * by directory migration, restripe + * and LFSCK. */ + __u32 lmv_migrate_offset; /* once this is set, it means this + * directory is been migrated, stripes + * before this offset belong to target, + * from this to source. */ + __u32 lmv_migrate_hash; /* hash type of source stripes of + * migrating directory */ + __u32 lmv_padding2; __u64 lmv_padding3; char lmv_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */ struct lu_fid lmv_stripe_fids[0]; /* FIDs for each stripe */ }; +/* foreign LMV EA */ +struct lmv_foreign_md { + __u32 lfm_magic; /* magic number = LMV_MAGIC_FOREIGN */ + __u32 lfm_length; /* length of lfm_value */ + __u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */ + __u32 lfm_flags; /* flags, type specific */ + char lfm_value[]; /* free format value */ +}; + #define LMV_MAGIC_V1 0x0CD20CD0 /* normal stripe lmv magic */ #define LMV_MAGIC LMV_MAGIC_V1 /* #define LMV_USER_MAGIC 0x0CD30CD0 */ #define LMV_MAGIC_STRIPE 0x0CD40CD0 /* magic for dir sub_stripe */ - -/* Right now only the lower part(0-16bits) of lmv_hash_type is being used, - * and the higher part will be the flag to indicate the status of object, - * for example the object is being migrated. And the hash function - * might be interpreted differently with different flags. */ -#define LMV_HASH_TYPE_MASK 0x0000ffff - -#define LMV_HASH_FLAG_MIGRATION 0x80000000 - -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 11, 56, 0) -/* Since lustre 2.8, this flag will not be needed, instead this DEAD - * and orphan flags will be stored in LMA (see LMAI_ORPHAN) - * Keep this flag just for LFSCK, because it still might meet such - * flag when it checks the old FS */ -#define LMV_HASH_FLAG_DEAD 0x40000000 -#endif -#define LMV_HASH_FLAG_BAD_TYPE 0x20000000 - -/* The striped directory has ever lost its master LMV EA, then LFSCK - * re-generated it. This flag is used to indicate such case. It is an - * on-disk flag. */ -#define LMV_HASH_FLAG_LOST_LMV 0x10000000 +#define LMV_MAGIC_FOREIGN 0x0CD50CD0 /* magic for lmv foreign */ /** * The FNV-1a hash algorithm is as follows: @@ -2151,6 +2273,7 @@ union lmv_mds_md { __u32 lmv_magic; struct lmv_mds_md_v1 lmv_md_v1; struct lmv_user_md lmv_user_md; + struct lmv_foreign_md lmv_foreign_md; }; static inline int lmv_mds_md_size(int stripe_count, unsigned int lmm_magic) @@ -2216,12 +2339,12 @@ enum fld_op { }; /* LFSCK opcodes */ -typedef enum { +enum lfsck_cmd { LFSCK_NOTIFY = 1101, LFSCK_QUERY = 1102, LFSCK_LAST_OPC, - LFSCK_FIRST_OPC = LFSCK_NOTIFY -} lfsck_cmd_t; + LFSCK_FIRST_OPC = LFSCK_NOTIFY +}; /* * LOV data structures @@ -2257,7 +2380,7 @@ struct lov_desc { * LDLM requests: */ /* opcodes -- MUST be distinct from OST/MDS opcodes */ -typedef enum { +enum ldlm_cmd { LDLM_ENQUEUE = 101, LDLM_CONVERT = 102, LDLM_CANCEL = 103, @@ -2266,7 +2389,7 @@ typedef enum { LDLM_GL_CALLBACK = 106, LDLM_SET_INFO = 107, LDLM_LAST_OPC -} ldlm_cmd_t; +}; #define LDLM_FIRST_OPC LDLM_ENQUEUE #define RES_NAME_SIZE 4 @@ -2281,7 +2404,7 @@ struct ldlm_res_id { (unsigned long long)(res)->lr_name.name[3] /* lock types */ -typedef enum ldlm_mode { +enum ldlm_mode { LCK_MINMODE = 0, LCK_EX = 1, LCK_PW = 2, @@ -2292,17 +2415,17 @@ typedef enum ldlm_mode { LCK_GROUP = 64, LCK_COS = 128, LCK_MAXMODE -} ldlm_mode_t; +}; #define LCK_MODE_NUM 8 -typedef enum ldlm_type { +enum ldlm_type { LDLM_PLAIN = 10, LDLM_EXTENT = 11, LDLM_FLOCK = 12, LDLM_IBITS = 13, LDLM_MAX_TYPE -} ldlm_type_t; +}; #define LDLM_MIN_TYPE LDLM_PLAIN @@ -2319,8 +2442,11 @@ static inline bool ldlm_extent_equal(const struct ldlm_extent *ex1, } struct ldlm_inodebits { - __u64 bits; - __u64 try_bits; /* optional bits to try */ + __u64 bits; + union { + __u64 try_bits; /* optional bits to try */ + __u64 cancel_bits; /* for lock convert */ + }; }; struct ldlm_flock_wire { @@ -2337,11 +2463,11 @@ struct ldlm_flock_wire { * this ever changes we will need to swab the union differently based * on the resource type. */ -typedef union ldlm_wire_policy_data { +union ldlm_wire_policy_data { struct ldlm_extent l_extent; struct ldlm_flock_wire l_flock; struct ldlm_inodebits l_inodebits; -} ldlm_wire_policy_data_t; +}; struct barrier_lvb { __u32 lvb_status; @@ -2363,19 +2489,19 @@ union ldlm_gl_desc { enum ldlm_intent_flags { IT_OPEN = 0x00000001, IT_CREAT = 0x00000002, - IT_OPEN_CREAT = 0x00000003, - IT_READDIR = 0x00000004, + IT_OPEN_CREAT = IT_OPEN | IT_CREAT, /* To allow case label. */ + IT_READDIR = 0x00000004, /* Used by mdc, not put on the wire. */ IT_GETATTR = 0x00000008, IT_LOOKUP = 0x00000010, - IT_UNLINK = 0x00000020, - IT_TRUNC = 0x00000040, +/* IT_UNLINK = 0x00000020, Obsolete. */ +/* IT_TRUNC = 0x00000040, Obsolete. */ IT_GETXATTR = 0x00000080, - IT_EXEC = 0x00000100, - IT_PIN = 0x00000200, +/* IT_EXEC = 0x00000100, Obsolete. */ +/* IT_PIN = 0x00000200, Obsolete. */ IT_LAYOUT = 0x00000400, IT_QUOTA_DQACQ = 0x00000800, IT_QUOTA_CONN = 0x00001000, - IT_SETXATTR = 0x00002000, +/* IT_SETXATTR = 0x00002000, Obsolete. */ IT_GLIMPSE = 0x00004000, IT_BRW = 0x00008000, }; @@ -2401,10 +2527,10 @@ struct ldlm_lock_desc { #define LDLM_ENQUEUE_CANCEL_OFF 1 struct ldlm_request { - __u32 lock_flags; - __u32 lock_count; - struct ldlm_lock_desc lock_desc; - struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES]; + __u32 lock_flags; /* LDLM_FL_*, see lustre_dlm_flags.h */ + __u32 lock_count; /* number of locks in lock_handle[] */ + struct ldlm_lock_desc lock_desc;/* lock descriptor */ + struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES]; }; struct ldlm_reply { @@ -2422,17 +2548,17 @@ struct ldlm_reply { /* * Opcodes for mountconf (mgs and mgc) */ -typedef enum { - MGS_CONNECT = 250, - MGS_DISCONNECT, - MGS_EXCEPTION, /* node died, etc. */ - MGS_TARGET_REG, /* whenever target starts up */ - MGS_TARGET_DEL, - MGS_SET_INFO, - MGS_CONFIG_READ, - MGS_LAST_OPC -} mgs_cmd_t; -#define MGS_FIRST_OPC MGS_CONNECT +enum mgs_cmd { + MGS_CONNECT = 250, + MGS_DISCONNECT = 251, + MGS_EXCEPTION = 252, /* node died, etc. */ + MGS_TARGET_REG = 253, /* whenever target starts up */ + MGS_TARGET_DEL = 254, + MGS_SET_INFO = 255, + MGS_CONFIG_READ = 256, + MGS_LAST_OPC, + MGS_FIRST_OPC = MGS_CONNECT +}; #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) #define MGS_PARAM_MAXLEN 1024 @@ -2448,17 +2574,17 @@ struct mgs_send_param { #define MTI_PARAM_MAXLEN 4096 #define MTI_NIDS_MAX 32 struct mgs_target_info { - __u32 mti_lustre_ver; - __u32 mti_stripe_index; - __u32 mti_config_ver; - __u32 mti_flags; - __u32 mti_nid_count; - __u32 mti_instance; /* Running instance of target */ - char mti_fsname[MTI_NAME_MAXLEN]; - char mti_svname[MTI_NAME_MAXLEN]; - char mti_uuid[sizeof(struct obd_uuid)]; - __u64 mti_nids[MTI_NIDS_MAX]; /* host nids (lnet_nid_t)*/ - char mti_params[MTI_PARAM_MAXLEN]; + __u32 mti_lustre_ver; + __u32 mti_stripe_index; + __u32 mti_config_ver; + __u32 mti_flags; /* LDD_F_* */ + __u32 mti_nid_count; + __u32 mti_instance; /* Running instance of target */ + char mti_fsname[MTI_NAME_MAXLEN]; + char mti_svname[MTI_NAME_MAXLEN]; + char mti_uuid[sizeof(struct obd_uuid)]; + __u64 mti_nids[MTI_NIDS_MAX]; /* host nids (lnet_nid_t) */ + char mti_params[MTI_PARAM_MAXLEN]; }; struct mgs_nidtbl_entry { @@ -2524,15 +2650,14 @@ struct cfg_marker { /* * Opcodes for multiple servers. */ - -typedef enum { - OBD_PING = 400, - OBD_LOG_CANCEL, - OBD_QC_CALLBACK, /* not used since 2.4 */ - OBD_IDX_READ, - OBD_LAST_OPC -} obd_cmd_t; -#define OBD_FIRST_OPC OBD_PING +enum obd_cmd { + OBD_PING = 400, +/* OBD_LOG_CANCEL = 401, obsolete since 1.5 */ +/* OBD_QC_CALLBACK = 402, obsolete since 2.4 */ + OBD_IDX_READ = 403, + OBD_LAST_OPC, + OBD_FIRST_OPC = OBD_PING +}; /** * llog contexts indices. @@ -2581,7 +2706,7 @@ struct llog_catid { #define LLOG_OP_MAGIC 0x10600000 #define LLOG_OP_MASK 0xfff00000 -typedef enum { +enum llog_op_type { LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000, OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00, /* OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000, never used */ @@ -2602,7 +2727,7 @@ typedef enum { UPDATE_REC = LLOG_OP_MAGIC | 0xa0000, LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539, LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b, -} llog_op_type; +}; #define LLOG_REC_HDR_NEEDS_SWABBING(r) \ (((r)->lrh_type & __swab32(LLOG_OP_MASK)) == __swab32(LLOG_OP_MAGIC)) @@ -2616,12 +2741,12 @@ struct llog_rec_hdr { __u32 lrh_index; __u32 lrh_type; __u32 lrh_id; -}; +} __attribute__((packed)); struct llog_rec_tail { __u32 lrt_len; __u32 lrt_index; -}; +} __attribute__((packed)); /* Where data follow just after header */ #define REC_DATA(ptr) \ @@ -2679,7 +2804,7 @@ struct llog_setattr64_rec_v2 { __u32 lsr_gid_h; __u64 lsr_valid; __u32 lsr_projid; - __u32 lsr_padding1; + __u32 lsr_layout_version; __u64 lsr_padding2; __u64 lsr_padding3; struct llog_rec_tail lsr_tail; @@ -2703,8 +2828,13 @@ struct llog_size_change_rec { #define CHANGELOG_ALLMASK 0XFFFFFFFF /** default \a changelog_rec_type mask. Allow all of them, except * CL_ATIME since it can really be time consuming, and not necessary - * under normal use. */ -#define CHANGELOG_DEFMASK (CHANGELOG_ALLMASK & ~(1 << CL_ATIME)) + * under normal use. + * Remove also CL_OPEN, CL_GETXATTR and CL_DN_OPEN from default list as it can + * be costly and only necessary for audit purpose. + */ +#define CHANGELOG_DEFMASK (CHANGELOG_ALLMASK & \ + ~(1 << CL_ATIME | 1 << CL_OPEN | 1 << CL_GETXATTR | \ + 1 << CL_DN_OPEN)) /* changelog llog name, needed by client replicators */ #define CHANGELOG_CATALOG "changelog_catalog" @@ -2724,11 +2854,13 @@ struct llog_changelog_rec { #define CHANGELOG_USER_PREFIX "cl" struct llog_changelog_user_rec { - struct llog_rec_hdr cur_hdr; - __u32 cur_id; - __u32 cur_padding; - __u64 cur_endrec; - struct llog_rec_tail cur_tail; + struct llog_rec_hdr cur_hdr; + __u32 cur_id; + /* only intended to be used in relative time comparisons to + * detect idle users */ + __u32 cur_time; + __u64 cur_endrec; + struct llog_rec_tail cur_tail; } __attribute__((packed)); enum agent_req_status { @@ -2764,7 +2896,7 @@ struct llog_agent_req_rec { * agent_req_status */ __u32 arr_archive_id; /**< backend archive number */ __u64 arr_flags; /**< req flags */ - __u64 arr_compound_id; /**< compound cookie */ + __u64 arr_compound_id; /**< compound cookie, ignored */ __u64 arr_req_create; /**< req. creation time */ __u64 arr_req_change; /**< req. status change time */ struct hsm_action_item arr_hai; /**< req. to the agent */ @@ -2794,12 +2926,23 @@ enum llog_flag { LLOG_F_EXT_JOBID = 0x8, LLOG_F_IS_FIXSIZE = 0x10, LLOG_F_EXT_EXTRA_FLAGS = 0x20, + LLOG_F_EXT_X_UIDGID = 0x40, + LLOG_F_EXT_X_NID = 0x80, + LLOG_F_EXT_X_OMODE = 0x100, + LLOG_F_EXT_X_XATTR = 0x200, /* Note: Flags covered by LLOG_F_EXT_MASK will be inherited from * catlog to plain log, so do not add LLOG_F_IS_FIXSIZE here, * because the catlog record is usually fixed size, but its plain * log record can be variable */ - LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID | LLOG_F_EXT_EXTRA_FLAGS, + LLOG_F_EXT_MASK = LLOG_F_EXT_JOBID | LLOG_F_EXT_EXTRA_FLAGS | + LLOG_F_EXT_X_UIDGID | LLOG_F_EXT_X_NID | + LLOG_F_EXT_X_OMODE | LLOG_F_EXT_X_XATTR, +}; + +/* means first record of catalog */ +enum { + LLOG_CAT_FIRST = -1, }; /* On-disk header structure of each log object, stored in little endian order */ @@ -2845,9 +2988,13 @@ struct llog_log_hdr { llh->llh_hdr.lrh_len - \ sizeof(llh->llh_tail))) -/** log cookies are used to reference a specific log file and a record therein */ +/** log cookies are used to reference a specific log file and a record therein, + and pass record offset from llog_process_thread to llog_write */ struct llog_cookie { - struct llog_logid lgc_lgl; + union { + struct llog_logid lgc_lgl; + __u64 lgc_offset; + }; __u32 lgc_subsys; __u32 lgc_index; __u32 lgc_padding; @@ -2855,17 +3002,17 @@ struct llog_cookie { /** llog protocol */ enum llogd_rpc_ops { - LLOG_ORIGIN_HANDLE_CREATE = 501, - LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502, - LLOG_ORIGIN_HANDLE_READ_HEADER = 503, - LLOG_ORIGIN_HANDLE_WRITE_REC = 504, - LLOG_ORIGIN_HANDLE_CLOSE = 505, - LLOG_ORIGIN_CONNECT = 506, - LLOG_CATINFO = 507, /* deprecated */ - LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, - LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ - LLOG_LAST_OPC, - LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE + LLOG_ORIGIN_HANDLE_CREATE = 501, + LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502, + LLOG_ORIGIN_HANDLE_READ_HEADER = 503, +/* LLOG_ORIGIN_HANDLE_WRITE_REC = 504, Obsolete by 2.1. */ +/* LLOG_ORIGIN_HANDLE_CLOSE = 505, Obsolete by 1.8. */ +/* LLOG_ORIGIN_CONNECT = 506, Obsolete by 2.4. */ +/* LLOG_CATINFO = 507, Obsolete by 2.3. */ + LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, + LLOG_ORIGIN_HANDLE_DESTROY = 509, /* Obsolete by 2.11. */ + LLOG_LAST_OPC, + LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE }; struct llogd_body { @@ -2919,7 +3066,7 @@ struct obdo { * * sizeof(ost_layout) + sieof(__u32) == sizeof(llog_cookie). */ struct ost_layout o_layout; - __u32 o_padding_3; + __u32 o_layout_version; __u32 o_uid_h; __u32 o_gid_h; @@ -2939,6 +3086,7 @@ struct obdo { #define o_dropped o_misc #define o_cksum o_nlink #define o_grant_used o_data_version +#define o_falloc_mode o_nlink struct lfsck_request { __u32 lr_event; @@ -3092,13 +3240,13 @@ union lu_page { }; /* security opcodes */ -typedef enum { +enum sec_cmd { SEC_CTX_INIT = 801, SEC_CTX_INIT_CONT = 802, SEC_CTX_FINI = 803, SEC_LAST_OPC, SEC_FIRST_OPC = SEC_CTX_INIT -} sec_cmd_t; +}; /* * capa related definitions @@ -3179,7 +3327,7 @@ struct link_ea_entry { unsigned char lee_reclen[2]; unsigned char lee_parent_fid[sizeof(struct lu_fid)]; char lee_name[0]; -}__attribute__((packed)); +} __attribute__((packed)); /** fid2path request/reply structure */ struct getinfo_fid2path { @@ -3201,7 +3349,7 @@ struct getparent { char gp_name[0]; /**< zero-terminated link name */ } __attribute__((packed)); -enum { +enum layout_intent_opc { LAYOUT_INTENT_ACCESS = 0, /** generic access */ LAYOUT_INTENT_READ = 1, /** not used */ LAYOUT_INTENT_WRITE = 2, /** write file, for comp layout */ @@ -3215,8 +3363,7 @@ enum { struct layout_intent { __u32 li_opc; /* intent operation for enqueue, read, write etc */ __u32 li_flags; - __u64 li_start; - __u64 li_end; + struct lu_extent li_extent; } __attribute__((packed)); /** @@ -3291,6 +3438,7 @@ enum update_type { OUT_PUNCH = 14, OUT_READ = 15, OUT_NOOP = 16, + OUT_XATTR_LIST = 17, OUT_LAST }; @@ -3381,11 +3529,24 @@ struct mdc_swap_layouts { __u64 msl_flags; } __attribute__((packed)); +#define INLINE_RESYNC_ARRAY_SIZE 15 +struct close_data_resync_done { + __u32 resync_count; + __u32 resync_ids_inline[INLINE_RESYNC_ARRAY_SIZE]; +}; + struct close_data { struct lustre_handle cd_handle; struct lu_fid cd_fid; __u64 cd_data_version; - __u64 cd_reserved[8]; + union { + __u64 cd_reserved[8]; + struct close_data_resync_done cd_resync; + /* split close */ + __u16 cd_mirror_id; + /* PCC release */ + __u32 cd_archive_id; + }; }; /* Update llog format */ @@ -3394,7 +3555,7 @@ struct update_op { __u16 uop_type; __u16 uop_param_count; __u16 uop_params_off[0]; -}; +} __attribute__((packed)); struct update_ops { struct update_op uops_op[0]; @@ -3445,6 +3606,19 @@ struct llog_update_record { */ }; +/* sepol string format is: + * <1-digit for SELinux status>::: + */ +/* Max length of the sepol string + * Should be large enough to contain a sha512sum of the policy + */ +#define SELINUX_MODE_LEN 1 +#define SELINUX_POLICY_VER_LEN 3 /* 3 chars to leave room for the future */ +#define SELINUX_POLICY_HASH_LEN 64 +#define LUSTRE_NODEMAP_SEPOL_LENGTH (SELINUX_MODE_LEN + NAME_MAX + \ + SELINUX_POLICY_VER_LEN + \ + SELINUX_POLICY_HASH_LEN + 3) + /* nodemap records, uses 32 byte record length */ #define LUSTRE_NODEMAP_NAME_LENGTH 16 struct nodemap_cluster_rec {