X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Finclude%2Fuapi%2Flinux%2Flustre%2Flustre_idl.h;h=df2ce2b1726e64be95c0e6bdf9b4184e8f900914;hb=ff168481a1b2e549df22059db364cae7a315ec9b;hp=2803aa0be5c1f68bfdc5e1aa3acdf89cfe46061a;hpb=c0246d8878097a618efa9296d90896ac2cc2e9e4;p=fs%2Flustre-release.git diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 2803aa0..df2ce2b 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -173,12 +173,14 @@ extern void lustre_loa_init(struct lustre_ost_attrs *loa, const struct lu_fid *fid, __u32 compat, __u32 incompat); -/* copytool uses a 32b bitmask field to encode archive-Ids during register - * with MDT thru kuc. +/* copytool can use any nonnegative integer to represent archive-Ids during + * register with MDT thru kuc. * archive num = 0 => all - * archive num from 1 to 32 + * archive num from 1 to MAX_U32 */ -#define LL_HSM_MAX_ARCHIVE (sizeof(__u32) * 8) +#define LL_HSM_ORIGIN_MAX_ARCHIVE (sizeof(__u32) * 8) +/* the max count of archive ids that one agent can support */ +#define LL_HSM_MAX_ARCHIVES_PER_AGENT 1024 /** * HSM on-disk attributes stored in a separate xattr. @@ -384,6 +386,23 @@ struct lu_orphan_ent_v2 { struct lu_orphan_rec_v2 loe_rec; }; +struct lu_orphan_rec_v3 { + struct lu_orphan_rec lor_rec; + struct ost_layout lor_layout; + /* The OST-object declared layout version in PFID EA.*/ + __u32 lor_layout_version; + /* The OST-object declared layout range (of version) in PFID EA.*/ + __u32 lor_range; + __u32 lor_padding_1; + __u64 lor_padding_2; +}; + +struct lu_orphan_ent_v3 { + /* The orphan OST-object's FID */ + struct lu_fid loe_key; + struct lu_orphan_rec_v3 loe_rec; +}; + /** @} lu_fid */ /** \defgroup lu_dir lu_dir @@ -515,12 +534,15 @@ static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr) if (attr & LUDA_TYPE) { const size_t align = sizeof(struct luda_type) - 1; - size = (sizeof(struct lu_dirent) + namelen + align) & ~align; - size += sizeof(struct luda_type); - } else - size = sizeof(struct lu_dirent) + namelen; - return (size + 7) & ~7; + size = (sizeof(struct lu_dirent) + namelen + 1 + align) & + ~align; + size += sizeof(struct luda_type); + } else { + size = sizeof(struct lu_dirent) + namelen + 1; + } + + return (size + 7) & ~7; } #define MDS_DIR_END_OFF 0xfffffffffffffffeULL @@ -564,11 +586,6 @@ static inline void lustre_handle_copy(struct lustre_handle *tgt, tgt->cookie = src->cookie; } -struct lustre_handle_array { - unsigned int count; - struct lustre_handle handles[0]; -}; - /* lustre_msg struct magic. DON'T use swabbed values of MAGIC as magic! */ enum lustre_msg_magic { LUSTRE_MSG_MAGIC_V2 = 0x0BD00BD3, @@ -808,18 +825,31 @@ struct ptlrpc_body_v2 { #define OBD_CONNECT_OBDOPACK 0x4000000000000000ULL /* compact OUT obdo */ #define OBD_CONNECT_FLAGS2 0x8000000000000000ULL /* second flags word */ /* ocd_connect_flags2 flags */ -#define OBD_CONNECT2_FILE_SECCTX 0x1ULL /* set file security context at create */ -#define OBD_CONNECT2_LOCKAHEAD 0x2ULL /* ladvise lockahead v2 */ -#define OBD_CONNECT2_FLR 0x20ULL /* FLR support */ - +#define OBD_CONNECT2_FILE_SECCTX 0x1ULL /* set file security context at create */ +#define OBD_CONNECT2_LOCKAHEAD 0x2ULL /* ladvise lockahead v2 */ +#define OBD_CONNECT2_DIR_MIGRATE 0x4ULL /* migrate striped dir */ +#define OBD_CONNECT2_SUM_STATFS 0x8ULL /* MDT return aggregated stats */ +#define OBD_CONNECT2_OVERSTRIPING 0x10ULL /* OST overstriping support */ +#define OBD_CONNECT2_FLR 0x20ULL /* FLR support */ +#define OBD_CONNECT2_WBC_INTENTS 0x40ULL /* create/unlink/... intents for wbc, also operations under client-held parent locks */ +#define OBD_CONNECT2_LOCK_CONVERT 0x80ULL /* IBITS lock convert support */ +#define OBD_CONNECT2_ARCHIVE_ID_ARRAY 0x100ULL /* store HSM archive_id in array */ +#define OBD_CONNECT2_INC_XID 0x200ULL /* Increasing xid */ +#define OBD_CONNECT2_SELINUX_POLICY 0x400ULL /* has client SELinux policy */ +#define OBD_CONNECT2_LSOM 0x800ULL /* LSOM support */ +#define OBD_CONNECT2_PCC 0x1000ULL /* Persistent Client Cache */ +#define OBD_CONNECT2_CRUSH 0x2000ULL /* crush hash striped directory */ +#define OBD_CONNECT2_ASYNC_DISCARD 0x4000ULL /* support async DoM data discard */ +#define OBD_CONNECT2_ENCRYPT 0x8000ULL /* client-to-disk encrypt */ /* XXX README XXX: * Please DO NOT add flag values here before first ensuring that this same * flag value is not in use on some other branch. Please clear any such * changes with senior engineers before starting to use a new flag. Then, * submit a small patch against EVERY branch that ONLY adds the new flag, - * updates obd_connect_names[] for lprocfs_rd_connect_flags(), adds the - * flag to check_obd_connect_data(), and updates wiretests accordingly, so it - * can be approved and landed easily to reserve the flag for future use. */ + * updates obd_connect_names[], adds the flag to check_obd_connect_data(), + * and updates wiretests accordingly, so it can be approved and landed easily + * to reserve the flag for future use. + */ /* The MNE_SWAB flag is overloading the MDS_MDS bit only for the MGS * connection. It is a temporary bug fix for Imperative Recovery interop @@ -861,7 +891,18 @@ struct ptlrpc_body_v2 { OBD_CONNECT_GRANT_PARAM | \ OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2) -#define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | OBD_CONNECT2_FLR) +#define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | \ + OBD_CONNECT2_DIR_MIGRATE | \ + OBD_CONNECT2_SUM_STATFS | \ + OBD_CONNECT2_OVERSTRIPING | \ + OBD_CONNECT2_FLR |\ + OBD_CONNECT2_LOCK_CONVERT | \ + OBD_CONNECT2_ARCHIVE_ID_ARRAY | \ + OBD_CONNECT2_INC_XID | \ + OBD_CONNECT2_SELINUX_POLICY | \ + OBD_CONNECT2_LSOM | \ + OBD_CONNECT2_ASYNC_DISCARD | \ + OBD_CONNECT2_PCC) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ @@ -882,7 +923,7 @@ struct ptlrpc_body_v2 { OBD_CONNECT_GRANT_PARAM | \ OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2) -#define OST_CONNECT_SUPPORTED2 OBD_CONNECT2_LOCKAHEAD +#define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID) #define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID) #define ECHO_CONNECT_SUPPORTED2 0 @@ -953,15 +994,37 @@ struct obd_connect_data { /* * Supported checksum algorithms. Up to 32 checksum types are supported. * (32-bit mask stored in obd_connect_data::ocd_cksum_types) - * Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new - * algorithm and also the OBD_FL_CKSUM* flags. + * Please update DECLARE_CKSUM_NAME in obd_cksum.h when adding a new + * algorithm and also the OBD_FL_CKSUM* flags, OBD_CKSUM_ALL flag, + * OBD_FL_CKSUM_ALL flag and potentially OBD_CKSUM_T10_ALL flag. */ enum cksum_types { - OBD_CKSUM_CRC32 = 0x00000001, - OBD_CKSUM_ADLER = 0x00000002, - OBD_CKSUM_CRC32C= 0x00000004, + OBD_CKSUM_CRC32 = 0x00000001, + OBD_CKSUM_ADLER = 0x00000002, + OBD_CKSUM_CRC32C = 0x00000004, + OBD_CKSUM_RESERVED = 0x00000008, + OBD_CKSUM_T10IP512 = 0x00000010, + OBD_CKSUM_T10IP4K = 0x00000020, + OBD_CKSUM_T10CRC512 = 0x00000040, + OBD_CKSUM_T10CRC4K = 0x00000080, }; +#define OBD_CKSUM_T10_ALL (OBD_CKSUM_T10IP512 | OBD_CKSUM_T10IP4K | \ + OBD_CKSUM_T10CRC512 | OBD_CKSUM_T10CRC4K) + +#define OBD_CKSUM_ALL (OBD_CKSUM_CRC32 | OBD_CKSUM_ADLER | OBD_CKSUM_CRC32C | \ + OBD_CKSUM_T10_ALL) + +/* + * The default checksum algorithm used on top of T10PI GRD tags for RPC. + * Considering that the checksum-of-checksums is only computing CRC32 on a + * 4KB chunk of GRD tags for a 1MB RPC for 512B sectors, or 16KB of GRD + * tags for 16MB of 4KB sectors, this is only 1/256 or 1/1024 of the + * total data being checksummed, so the checksum type used here should not + * affect overall system performance noticeably. + */ +#define OBD_CKSUM_T10_TOP OBD_CKSUM_ADLER + /* * OST requests: OBDO & OBD request records */ @@ -1006,13 +1069,16 @@ enum obdo_flags { OBD_FL_NO_GRPQUOTA = 0x00000200, /* the object's group is over quota */ OBD_FL_CREATE_CROW = 0x00000400, /* object should be create on write */ OBD_FL_SRVLOCK = 0x00000800, /* delegate DLM locking to server */ - OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */ - OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */ - OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */ - OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */ - OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ - OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ - OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client. + OBD_FL_CKSUM_CRC32 = 0x00001000, /* CRC32 checksum type */ + OBD_FL_CKSUM_ADLER = 0x00002000, /* ADLER checksum type */ + OBD_FL_CKSUM_CRC32C = 0x00004000, /* CRC32C checksum type */ + OBD_FL_CKSUM_T10IP512 = 0x00005000, /* T10PI IP cksum, 512B sector */ + OBD_FL_CKSUM_T10IP4K = 0x00006000, /* T10PI IP cksum, 4KB sector */ + OBD_FL_CKSUM_T10CRC512 = 0x00007000, /* T10PI CRC cksum, 512B sector */ + OBD_FL_CKSUM_T10CRC4K = 0x00008000, /* T10PI CRC cksum, 4KB sector */ + OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */ + OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */ + OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client. * XXX: obsoleted - reserved for old * clients prior than 2.2 */ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ @@ -1021,10 +1087,18 @@ enum obdo_flags { OBD_FL_SHORT_IO = 0x00400000, /* short io request */ /* OBD_FL_LOCAL_MASK = 0xF0000000, was local-only flags until 2.10 */ - /* Note that while these checksum values are currently separate bits, - * in 2.x we can actually allow all values from 1-31 if we wanted. */ + /* + * Note that while the original checksum values were separate bits, + * in 2.x we can actually allow all values from 1-31. T10-PI checksum + * types already use values which are not separate bits. + */ OBD_FL_CKSUM_ALL = OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER | - OBD_FL_CKSUM_CRC32C, + OBD_FL_CKSUM_CRC32C | OBD_FL_CKSUM_T10IP512 | + OBD_FL_CKSUM_T10IP4K | OBD_FL_CKSUM_T10CRC512 | + OBD_FL_CKSUM_T10CRC4K, + + OBD_FL_NO_QUOTA_ALL = OBD_FL_NO_USRQUOTA | OBD_FL_NO_GRPQUOTA | + OBD_FL_NO_PRJQUOTA, }; /* @@ -1045,6 +1119,8 @@ enum obdo_flags { #define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC) #define LOV_MAGIC LOV_MAGIC_V1 #define LOV_MAGIC_COMP_V1 (0x0BD60000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC_FOREIGN (0x0BD70000 | LOV_MAGIC_MAGIC) +#define LOV_MAGIC_SEL (0x0BD80000 | LOV_MAGIC_MAGIC) /* * magic for fully defined striping @@ -1088,7 +1164,11 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ }; -#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) +#define MAX_MD_SIZE_OLD (sizeof(struct lov_mds_md) + \ + 4 * sizeof(struct lov_ost_data)) +#define MAX_MD_SIZE (sizeof(struct lov_comp_md_v1) + \ + 4 * (sizeof(struct lov_comp_md_entry_v1) + \ + MAX_MD_SIZE_OLD)) #define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data)) /* This is the default MDT reply size allocated, should the striping be bigger, @@ -1183,20 +1263,19 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLFLAGS (0x00000800ULL) /* flags word */ #define OBD_MD_DOM_SIZE (0X00001000ULL) /* Data-on-MDT component size */ #define OBD_MD_FLNLINK (0x00002000ULL) /* link count */ -#define OBD_MD_FLGENER (0x00004000ULL) /* generation number */ -#define OBD_MD_LAYOUT_VERSION (0x00008000ULL) /* layout version for - * OST objects */ +#define OBD_MD_FLPARENT (0x00004000ULL) /* parent FID */ +#define OBD_MD_LAYOUT_VERSION (0x00008000ULL) /* OST object layout version */ #define OBD_MD_FLRDEV (0x00010000ULL) /* device number */ #define OBD_MD_FLEASIZE (0x00020000ULL) /* extended attribute data */ #define OBD_MD_LINKNAME (0x00040000ULL) /* symbolic link target */ #define OBD_MD_FLHANDLE (0x00080000ULL) /* file/lock handle */ #define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */ -#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */ -/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */ +/* OBD_MD_FLQOS (0x00200000ULL) has never been used */ +/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */ #define OBD_MD_FLPRJQUOTA (0x00400000ULL) /* over quota flags sent from ost */ #define OBD_MD_FLGROUP (0x01000000ULL) /* group */ #define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ -#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */ +/* OBD_MD_FLEPOCH (0x04000000ULL) obsolete 2.7.50 */ /* ->mds if epoch opens or closes */ #define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */ #define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */ @@ -1205,7 +1284,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */ #define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */ -#define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */ +/* OBD_MD_REINT (0x0000000200000000ULL) obsolete 1.8 */ #define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */ #define OBD_MD_TSTATE (0x0000000800000000ULL) /* transient state field */ @@ -1213,10 +1292,10 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */ #define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ #define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ -/* OBD_MD_FLRMTPERM (0x0000010000000000ULL) remote perm, obsolete */ -#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */ -#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */ -#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */ +#define OBD_MD_FLAGSTATFS (0x0000010000000000ULL) /* aggregated statfs */ +/* OBD_MD_FLMDSCAPA (0x0000020000000000ULL) obsolete 2.7.54 */ +/* OBD_MD_FLOSSCAPA (0x0000040000000000ULL) obsolete 2.7.54 */ +/* OBD_MD_FLCKSPLIT (0x0000080000000000ULL) obsolete 2.3.58*/ #define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */ #define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes * under lock; for xattr @@ -1231,6 +1310,10 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) #define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */ #define OBD_MD_FLOSTLAYOUT (0x0080000000000000ULL) /* contain ost_layout */ #define OBD_MD_FLPROJID (0x0100000000000000ULL) /* project ID */ +#define OBD_MD_SECCTX (0x0200000000000000ULL) /* embed security xattr */ + +#define OBD_MD_FLLAZYSIZE (0x0400000000000000ULL) /* Lazy size */ +#define OBD_MD_FLLAZYBLOCKS (0x0800000000000000ULL) /* Lazy blocks */ #define OBD_MD_FLALLQUOTA (OBD_MD_FLUSRQUOTA | \ OBD_MD_FLGRPQUOTA | \ @@ -1240,7 +1323,7 @@ lov_mds_md_max_stripe_count(size_t buf_size, __u32 lmm_magic) OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \ OBD_MD_FLMODE | OBD_MD_FLTYPE | OBD_MD_FLUID | \ OBD_MD_FLGID | OBD_MD_FLFLAGS | OBD_MD_FLNLINK | \ - OBD_MD_FLGENER | OBD_MD_FLRDEV | OBD_MD_FLGROUP | \ + OBD_MD_FLPARENT | OBD_MD_FLRDEV | OBD_MD_FLGROUP | \ OBD_MD_FLPROJID) #define OBD_MD_FLXATTRALL (OBD_MD_FLXATTR | OBD_MD_FLXATTRLS) @@ -1275,6 +1358,7 @@ struct hsm_state_set { #define OBD_BRW_CHECK 0x10 #define OBD_BRW_FROM_GRANT 0x20 /* the osc manages this under llite */ #define OBD_BRW_GRANTED 0x40 /* the ost manages this */ +/* OBD_BRW_NOCACHE is currently neither set nor tested */ #define OBD_BRW_NOCACHE 0x80 /* this page is a part of non-cached IO */ #define OBD_BRW_NOQUOTA 0x100 #define OBD_BRW_SRVLOCK 0x200 /* Client holds no lock over this page */ @@ -1565,6 +1649,7 @@ enum mds_cmd { MDS_HSM_CT_REGISTER = 59, MDS_HSM_CT_UNREGISTER = 60, MDS_SWAP_LAYOUTS = 61, + MDS_RMFID = 62, MDS_LAST_OPC }; @@ -1613,29 +1698,31 @@ enum mds_reint_op { #define DISP_OPEN_DENY 0x10000000 /* INODE LOCK PARTS */ -#define MDS_INODELOCK_LOOKUP 0x000001 /* For namespace, dentry etc, and also - * was used to protect permission (mode, - * owner, group etc) before 2.4. */ -#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */ -#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */ -#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */ - -/* The PERM bit is added int 2.4, and it is used to protect permission(mode, - * owner, group, acl etc), so to separate the permission from LOOKUP lock. - * Because for remote directories(in DNE), these locks will be granted by - * different MDTs(different ldlm namespace). - * - * For local directory, MDT will always grant UPDATE_LOCK|PERM_LOCK together. - * For Remote directory, the master MDT, where the remote directory is, will - * grant UPDATE_LOCK|PERM_LOCK, and the remote MDT, where the name entry is, - * will grant LOOKUP_LOCK. */ -#define MDS_INODELOCK_PERM 0x000010 -#define MDS_INODELOCK_XATTR 0x000020 /* extended attributes */ -#define MDS_INODELOCK_DOM 0x000040 /* Data for data-on-mdt files */ - -#define MDS_INODELOCK_MAXSHIFT 6 +enum mds_ibits_locks { + MDS_INODELOCK_LOOKUP = 0x000001, /* For namespace, dentry etc. Was + * used to protect permission (mode, + * owner, group, etc) before 2.4. */ + MDS_INODELOCK_UPDATE = 0x000002, /* size, links, timestamps */ + MDS_INODELOCK_OPEN = 0x000004, /* For opened files */ + MDS_INODELOCK_LAYOUT = 0x000008, /* for layout */ + + /* The PERM bit is added in 2.4, and is used to protect permission + * (mode, owner, group, ACL, etc.) separate from LOOKUP lock. + * For remote directories (in DNE) these locks will be granted by + * different MDTs (different LDLM namespace). + * + * For local directory, the MDT always grants UPDATE|PERM together. + * For remote directory, master MDT (where remote directory is) grants + * UPDATE|PERM, and remote MDT (where name entry is) grants LOOKUP_LOCK. + */ + MDS_INODELOCK_PERM = 0x000010, + MDS_INODELOCK_XATTR = 0x000020, /* non-permission extended attrs */ + MDS_INODELOCK_DOM = 0x000040, /* Data for Data-on-MDT files */ + /* Do not forget to increase MDS_INODELOCK_NUMBITS when adding bits */ +}; +#define MDS_INODELOCK_NUMBITS 7 /* This FULL lock is useful to take on unlink sort of operations */ -#define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1) +#define MDS_INODELOCK_FULL ((1 << MDS_INODELOCK_NUMBITS) - 1) /* DOM lock shouldn't be canceled early, use this macro for ELC */ #define MDS_INODELOCK_ELC (MDS_INODELOCK_FULL & ~MDS_INODELOCK_DOM) @@ -1657,17 +1744,16 @@ enum { enum { /* these should be identical to their EXT4_*_FL counterparts, they are * redefined here only to avoid dragging in fs/ext4/ext4.h */ - LUSTRE_SYNC_FL = 0x00000008, /* Synchronous updates */ - LUSTRE_IMMUTABLE_FL = 0x00000010, /* Immutable file */ - LUSTRE_APPEND_FL = 0x00000020, /* writes to file may only append */ - LUSTRE_NODUMP_FL = 0x00000040, /* do not dump file */ - LUSTRE_NOATIME_FL = 0x00000080, /* do not update atime */ - LUSTRE_INDEX_FL = 0x00001000, /* hash-indexed directory */ - LUSTRE_DIRSYNC_FL = 0x00010000, /* dirsync behaviour (dir only) */ - LUSTRE_TOPDIR_FL = 0x00020000, /* Top of directory hierarchies*/ - LUSTRE_DIRECTIO_FL = 0x00100000, /* Use direct i/o */ - LUSTRE_INLINE_DATA_FL = 0x10000000, /* Inode has inline data. */ - LUSTRE_PROJINHERIT_FL = 0x20000000, /* Create with parents projid */ + LUSTRE_SYNC_FL = 0x00000008, /* Synchronous updates */ + LUSTRE_IMMUTABLE_FL = 0x00000010, /* Immutable file */ + LUSTRE_APPEND_FL = 0x00000020, /* file writes may only append */ + LUSTRE_NODUMP_FL = 0x00000040, /* do not dump file */ + LUSTRE_NOATIME_FL = 0x00000080, /* do not update atime */ + LUSTRE_INDEX_FL = 0x00001000, /* hash-indexed directory */ + LUSTRE_DIRSYNC_FL = 0x00010000, /* dirsync behaviour (dir only) */ + LUSTRE_TOPDIR_FL = 0x00020000, /* Top of directory hierarchies*/ + LUSTRE_INLINE_DATA_FL = 0x10000000, /* Inode has inline data. */ + LUSTRE_PROJINHERIT_FL = 0x20000000, /* Create with parents projid */ /* These flags will not be identical to any EXT4_*_FL counterparts, * and only reserved for lustre purpose. Note: these flags might @@ -1676,12 +1762,24 @@ enum { * wired by la_flags see osd_attr_get(). * 2. If these flags needs to be stored into inode, they will be * stored in LMA. see LMAI_XXXX */ - LUSTRE_ORPHAN_FL = 0x00002000, - LUSTRE_SET_SYNC_FL = 0x00040000, /* Synchronous setattr on OSTs */ + LUSTRE_ORPHAN_FL = 0x00002000, + LUSTRE_SET_SYNC_FL = 0x00040000, /* Synchronous setattr on OSTs */ - LUSTRE_LMA_FL_MASKS = LUSTRE_ORPHAN_FL, + LUSTRE_LMA_FL_MASKS = LUSTRE_ORPHAN_FL, }; +#ifndef FS_XFLAG_SYNC +#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ +#endif +#ifndef FS_XFLAG_NOATIME +#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ +#endif +#ifndef FS_XFLAG_IMMUTABLE +#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ +#endif +#ifndef FS_XFLAG_APPEND +#define FS_XFLAG_APPEND 0x00000010 /* all writes append */ +#endif #ifndef FS_XFLAG_PROJINHERIT #define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ #endif @@ -1701,8 +1799,7 @@ static inline int ll_ext_to_inode_flags(int flags) #if defined(S_DIRSYNC) ((flags & LUSTRE_DIRSYNC_FL) ? S_DIRSYNC : 0) | #endif - ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0) | - ((flags & LUSTRE_PROJINHERIT_FL) ? FS_XFLAG_PROJINHERIT : 0)); + ((flags & LUSTRE_IMMUTABLE_FL) ? S_IMMUTABLE : 0)); } static inline int ll_inode_to_ext_flags(int iflags) @@ -1713,8 +1810,23 @@ static inline int ll_inode_to_ext_flags(int iflags) #if defined(S_DIRSYNC) ((iflags & S_DIRSYNC) ? LUSTRE_DIRSYNC_FL : 0) | #endif - ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0) | - ((iflags & FS_XFLAG_PROJINHERIT) ? LUSTRE_PROJINHERIT_FL : 0)); + ((iflags & S_IMMUTABLE) ? LUSTRE_IMMUTABLE_FL : 0)); +} + +static inline int ll_xflags_to_inode_flags(int xflags) +{ + return ((xflags & FS_XFLAG_SYNC) ? S_SYNC : 0) | + ((xflags & FS_XFLAG_NOATIME) ? S_NOATIME : 0) | + ((xflags & FS_XFLAG_APPEND) ? S_APPEND : 0) | + ((xflags & FS_XFLAG_IMMUTABLE) ? S_IMMUTABLE : 0); +} + +static inline int ll_inode_flags_to_xflags(int flags) +{ + return ((flags & S_SYNC) ? FS_XFLAG_SYNC : 0) | + ((flags & S_NOATIME) ? FS_XFLAG_NOATIME : 0) | + ((flags & S_APPEND) ? FS_XFLAG_APPEND : 0) | + ((flags & S_IMMUTABLE) ? FS_XFLAG_IMMUTABLE : 0); } #endif @@ -1726,7 +1838,7 @@ enum md_transient_state { struct mdt_body { struct lu_fid mbo_fid1; struct lu_fid mbo_fid2; - struct lustre_handle mbo_handle; + struct lustre_handle mbo_open_handle; __u64 mbo_valid; __u64 mbo_size; /* Offset, in the case of MDS_READPAGE */ __s64 mbo_mtime; @@ -1763,7 +1875,7 @@ struct mdt_body { }; /* 216 */ struct mdt_ioepoch { - struct lustre_handle mio_handle; + struct lustre_handle mio_open_handle; __u64 mio_unused1; /* was ioepoch */ __u32 mio_unused2; /* was flags */ __u32 mio_padding; @@ -1827,26 +1939,32 @@ struct mdt_rec_setattr { #define MDS_ATTR_FROM_OPEN 0x4000ULL /* = 16384, called from open path, ie O_TRUNC */ #define MDS_ATTR_BLOCKS 0x8000ULL /* = 32768 */ #define MDS_ATTR_PROJID 0x10000ULL /* = 65536 */ +#define MDS_ATTR_LSIZE 0x20000ULL /* = 131072 */ +#define MDS_ATTR_LBLOCKS 0x40000ULL /* = 262144 */ +#define MDS_ATTR_OVERRIDE 0x2000000ULL /* = 33554432 */ enum mds_op_bias { - MDS_CHECK_SPLIT = 1 << 0, +/* MDS_CHECK_SPLIT = 1 << 0, obsolete before 2.3.58 */ MDS_CROSS_REF = 1 << 1, - MDS_VTX_BYPASS = 1 << 2, +/* MDS_VTX_BYPASS = 1 << 2, obsolete since 2.3.54 */ MDS_PERM_BYPASS = 1 << 3, /* MDS_SOM = 1 << 4, obsolete since 2.8.0 */ MDS_QUOTA_IGNORE = 1 << 5, - /* Was MDS_CLOSE_CLEANUP (1 << 6), No more used */ +/* MDS_CLOSE_CLEANUP = 1 << 6, obsolete since 2.3.51 */ MDS_KEEP_ORPHAN = 1 << 7, MDS_RECOV_OPEN = 1 << 8, MDS_DATA_MODIFIED = 1 << 9, MDS_CREATE_VOLATILE = 1 << 10, MDS_OWNEROVERRIDE = 1 << 11, MDS_HSM_RELEASE = 1 << 12, - MDS_RENAME_MIGRATE = 1 << 13, + MDS_CLOSE_MIGRATE = 1 << 13, MDS_CLOSE_LAYOUT_SWAP = 1 << 14, MDS_CLOSE_LAYOUT_MERGE = 1 << 15, MDS_CLOSE_RESYNC_DONE = 1 << 16, MDS_CLOSE_LAYOUT_SPLIT = 1 << 17, + MDS_TRUNC_KEEP_LEASE = 1 << 18, + MDS_PCC_ATTACH = 1 << 19, + MDS_CLOSE_UPDATE_TIMES = 1 << 20, }; #define MDS_CLOSE_INTENT (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP | \ @@ -1855,32 +1973,35 @@ enum mds_op_bias { /* instance of mdt_reint_rec */ struct mdt_rec_create { - __u32 cr_opcode; - __u32 cr_cap; - __u32 cr_fsuid; - __u32 cr_fsuid_h; - __u32 cr_fsgid; - __u32 cr_fsgid_h; - __u32 cr_suppgid1; - __u32 cr_suppgid1_h; - __u32 cr_suppgid2; - __u32 cr_suppgid2_h; - struct lu_fid cr_fid1; - struct lu_fid cr_fid2; - struct lustre_handle cr_old_handle; /* handle in case of open replay */ + __u32 cr_opcode; + __u32 cr_cap; + __u32 cr_fsuid; + __u32 cr_fsuid_h; + __u32 cr_fsgid; + __u32 cr_fsgid_h; + __u32 cr_suppgid1; + __u32 cr_suppgid1_h; + __u32 cr_suppgid2; + __u32 cr_suppgid2_h; + struct lu_fid cr_fid1; + struct lu_fid cr_fid2; + struct lustre_handle cr_open_handle_old; /* in case of open replay */ __s64 cr_time; - __u64 cr_rdev; - __u64 cr_ioepoch; - __u64 cr_padding_1; /* rr_blocks */ - __u32 cr_mode; - __u32 cr_bias; - /* use of helpers set/get_mrc_cr_flags() is needed to access - * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to - * extend cr_flags size without breaking 1.8 compat */ - __u32 cr_flags_l; /* for use with open, low 32 bits */ - __u32 cr_flags_h; /* for use with open, high 32 bits */ - __u32 cr_umask; /* umask for create */ - __u32 cr_padding_4; /* rr_padding_4 */ + union { + __u64 cr_rdev; + __u32 cr_archive_id; + }; + __u64 cr_ioepoch; + __u64 cr_padding_1; /* rr_blocks */ + __u32 cr_mode; + __u32 cr_bias; + /* use of helpers set/get_mrc_cr_flags() is needed to access + * 64 bits cr_flags [cr_flags_l, cr_flags_h], this is done to + * extend cr_flags size without breaking 1.8 compat */ + __u32 cr_flags_l; /* for use with open, low 32 bits */ + __u32 cr_flags_h; /* for use with open, high 32 bits */ + __u32 cr_umask; /* umask for create */ + __u32 cr_padding_4; /* rr_padding_4 */ }; /* instance of mdt_reint_rec */ @@ -2008,7 +2129,7 @@ struct mdt_rec_resync { __u32 rs_suppgid2_h; struct lu_fid rs_fid; __u8 rs_padding0[sizeof(struct lu_fid)]; - struct lustre_handle rs_handle; /* rr_mtime */ + struct lustre_handle rs_lease_handle; /* rr_mtime */ __s64 rs_padding1; /* rr_atime */ __s64 rs_padding2; /* rr_ctime */ __u64 rs_padding3; /* rr_size */ @@ -2018,7 +2139,8 @@ struct mdt_rec_resync { __u32 rs_padding6; /* rr_flags */ __u32 rs_padding7; /* rr_flags_h */ __u32 rs_padding8; /* rr_umask */ - __u32 rs_padding9; /* rr_padding_4 */ + __u16 rs_mirror_id; + __u16 rs_padding9; /* rr_padding_4 */ }; /* @@ -2052,9 +2174,12 @@ struct mdt_rec_reint { __u32 rr_flags; __u32 rr_flags_h; __u32 rr_umask; - __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ + __u16 rr_mirror_id; + __u16 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ }; +#define LMV_DESC_QOS_MAXAGE_DEFAULT 60 /* Seconds */ + /* lmv structures */ struct lmv_desc { __u32 ld_tgt_count; /* how many MDS's */ @@ -2083,41 +2208,36 @@ struct lmv_mds_md_v1 { * used for now. Higher 16 bits will * be used to mark the object status, * for example migrating or dead. */ - __u32 lmv_layout_version; /* Used for directory restriping */ - __u32 lmv_padding1; - __u64 lmv_padding2; + __u32 lmv_layout_version; /* increased each time layout changed, + * by directory migration, restripe + * and LFSCK. */ + __u32 lmv_migrate_offset; /* once this is set, it means this + * directory is been migrated, stripes + * before this offset belong to target, + * from this to source. */ + __u32 lmv_migrate_hash; /* hash type of source stripes of + * migrating directory */ + __u32 lmv_padding2; __u64 lmv_padding3; char lmv_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */ struct lu_fid lmv_stripe_fids[0]; /* FIDs for each stripe */ }; +/* foreign LMV EA */ +struct lmv_foreign_md { + __u32 lfm_magic; /* magic number = LMV_MAGIC_FOREIGN */ + __u32 lfm_length; /* length of lfm_value */ + __u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */ + __u32 lfm_flags; /* flags, type specific */ + char lfm_value[]; /* free format value */ +}; + #define LMV_MAGIC_V1 0x0CD20CD0 /* normal stripe lmv magic */ #define LMV_MAGIC LMV_MAGIC_V1 /* #define LMV_USER_MAGIC 0x0CD30CD0 */ #define LMV_MAGIC_STRIPE 0x0CD40CD0 /* magic for dir sub_stripe */ - -/* Right now only the lower part(0-16bits) of lmv_hash_type is being used, - * and the higher part will be the flag to indicate the status of object, - * for example the object is being migrated. And the hash function - * might be interpreted differently with different flags. */ -#define LMV_HASH_TYPE_MASK 0x0000ffff - -#define LMV_HASH_FLAG_MIGRATION 0x80000000 - -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 11, 56, 0) -/* Since lustre 2.8, this flag will not be needed, instead this DEAD - * and orphan flags will be stored in LMA (see LMAI_ORPHAN) - * Keep this flag just for LFSCK, because it still might meet such - * flag when it checks the old FS */ -#define LMV_HASH_FLAG_DEAD 0x40000000 -#endif -#define LMV_HASH_FLAG_BAD_TYPE 0x20000000 - -/* The striped directory has ever lost its master LMV EA, then LFSCK - * re-generated it. This flag is used to indicate such case. It is an - * on-disk flag. */ -#define LMV_HASH_FLAG_LOST_LMV 0x10000000 +#define LMV_MAGIC_FOREIGN 0x0CD50CD0 /* magic for lmv foreign */ /** * The FNV-1a hash algorithm is as follows: @@ -2151,6 +2271,7 @@ union lmv_mds_md { __u32 lmv_magic; struct lmv_mds_md_v1 lmv_md_v1; struct lmv_user_md lmv_user_md; + struct lmv_foreign_md lmv_foreign_md; }; static inline int lmv_mds_md_size(int stripe_count, unsigned int lmm_magic) @@ -2319,8 +2440,11 @@ static inline bool ldlm_extent_equal(const struct ldlm_extent *ex1, } struct ldlm_inodebits { - __u64 bits; - __u64 try_bits; /* optional bits to try */ + __u64 bits; + union { + __u64 try_bits; /* optional bits to try */ + __u64 cancel_bits; /* for lock convert */ + }; }; struct ldlm_flock_wire { @@ -2363,19 +2487,19 @@ union ldlm_gl_desc { enum ldlm_intent_flags { IT_OPEN = 0x00000001, IT_CREAT = 0x00000002, - IT_OPEN_CREAT = 0x00000003, - IT_READDIR = 0x00000004, + IT_OPEN_CREAT = IT_OPEN | IT_CREAT, /* To allow case label. */ + IT_READDIR = 0x00000004, /* Used by mdc, not put on the wire. */ IT_GETATTR = 0x00000008, IT_LOOKUP = 0x00000010, - IT_UNLINK = 0x00000020, - IT_TRUNC = 0x00000040, +/* IT_UNLINK = 0x00000020, Obsolete. */ +/* IT_TRUNC = 0x00000040, Obsolete. */ IT_GETXATTR = 0x00000080, - IT_EXEC = 0x00000100, - IT_PIN = 0x00000200, +/* IT_EXEC = 0x00000100, Obsolete. */ +/* IT_PIN = 0x00000200, Obsolete. */ IT_LAYOUT = 0x00000400, IT_QUOTA_DQACQ = 0x00000800, IT_QUOTA_CONN = 0x00001000, - IT_SETXATTR = 0x00002000, +/* IT_SETXATTR = 0x00002000, Obsolete. */ IT_GLIMPSE = 0x00004000, IT_BRW = 0x00008000, }; @@ -2423,16 +2547,16 @@ struct ldlm_reply { * Opcodes for mountconf (mgs and mgc) */ enum mgs_cmd { - MGS_CONNECT = 250, - MGS_DISCONNECT, - MGS_EXCEPTION, /* node died, etc. */ - MGS_TARGET_REG, /* whenever target starts up */ - MGS_TARGET_DEL, - MGS_SET_INFO, - MGS_CONFIG_READ, - MGS_LAST_OPC + MGS_CONNECT = 250, + MGS_DISCONNECT = 251, + MGS_EXCEPTION = 252, /* node died, etc. */ + MGS_TARGET_REG = 253, /* whenever target starts up */ + MGS_TARGET_DEL = 254, + MGS_SET_INFO = 255, + MGS_CONFIG_READ = 256, + MGS_LAST_OPC, + MGS_FIRST_OPC = MGS_CONNECT }; -#define MGS_FIRST_OPC MGS_CONNECT #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0) #define MGS_PARAM_MAXLEN 1024 @@ -2525,13 +2649,13 @@ struct cfg_marker { * Opcodes for multiple servers. */ enum obd_cmd { - OBD_PING = 400, - OBD_LOG_CANCEL, - OBD_QC_CALLBACK, /* not used since 2.4 */ - OBD_IDX_READ, - OBD_LAST_OPC + OBD_PING = 400, +/* OBD_LOG_CANCEL = 401, obsolete since 1.5 */ +/* OBD_QC_CALLBACK = 402, obsolete since 2.4 */ + OBD_IDX_READ = 403, + OBD_LAST_OPC, + OBD_FIRST_OPC = OBD_PING }; -#define OBD_FIRST_OPC OBD_PING /** * llog contexts indices. @@ -2615,12 +2739,12 @@ struct llog_rec_hdr { __u32 lrh_index; __u32 lrh_type; __u32 lrh_id; -}; +} __attribute__((packed)); struct llog_rec_tail { __u32 lrt_len; __u32 lrt_index; -}; +} __attribute__((packed)); /* Where data follow just after header */ #define REC_DATA(ptr) \ @@ -2814,6 +2938,11 @@ enum llog_flag { LLOG_F_EXT_X_OMODE | LLOG_F_EXT_X_XATTR, }; +/* means first record of catalog */ +enum { + LLOG_CAT_FIRST = -1, +}; + /* On-disk header structure of each log object, stored in little endian order */ #define LLOG_MIN_CHUNK_SIZE 8192 #define LLOG_HEADER_SIZE (96) /* sizeof (llog_log_hdr) + sizeof(llh_tail) @@ -2871,17 +3000,17 @@ struct llog_cookie { /** llog protocol */ enum llogd_rpc_ops { - LLOG_ORIGIN_HANDLE_CREATE = 501, - LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502, - LLOG_ORIGIN_HANDLE_READ_HEADER = 503, - LLOG_ORIGIN_HANDLE_WRITE_REC = 504, - LLOG_ORIGIN_HANDLE_CLOSE = 505, - LLOG_ORIGIN_CONNECT = 506, - LLOG_CATINFO = 507, /* deprecated */ - LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, - LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ - LLOG_LAST_OPC, - LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE + LLOG_ORIGIN_HANDLE_CREATE = 501, + LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502, + LLOG_ORIGIN_HANDLE_READ_HEADER = 503, +/* LLOG_ORIGIN_HANDLE_WRITE_REC = 504, Obsolete by 2.1. */ +/* LLOG_ORIGIN_HANDLE_CLOSE = 505, Obsolete by 1.8. */ +/* LLOG_ORIGIN_CONNECT = 506, Obsolete by 2.4. */ +/* LLOG_CATINFO = 507, Obsolete by 2.3. */ + LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, + LLOG_ORIGIN_HANDLE_DESTROY = 509, /* Obsolete by 2.11. */ + LLOG_LAST_OPC, + LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE }; struct llogd_body { @@ -3195,7 +3324,7 @@ struct link_ea_entry { unsigned char lee_reclen[2]; unsigned char lee_parent_fid[sizeof(struct lu_fid)]; char lee_name[0]; -}__attribute__((packed)); +} __attribute__((packed)); /** fid2path request/reply structure */ struct getinfo_fid2path { @@ -3306,6 +3435,7 @@ enum update_type { OUT_PUNCH = 14, OUT_READ = 15, OUT_NOOP = 16, + OUT_XATTR_LIST = 17, OUT_LAST }; @@ -3411,6 +3541,8 @@ struct close_data { struct close_data_resync_done cd_resync; /* split close */ __u16 cd_mirror_id; + /* PCC release */ + __u32 cd_archive_id; }; }; @@ -3420,7 +3552,7 @@ struct update_op { __u16 uop_type; __u16 uop_param_count; __u16 uop_params_off[0]; -}; +} __attribute__((packed)); struct update_ops { struct update_op uops_op[0]; @@ -3471,6 +3603,19 @@ struct llog_update_record { */ }; +/* sepol string format is: + * <1-digit for SELinux status>::: + */ +/* Max length of the sepol string + * Should be large enough to contain a sha512sum of the policy + */ +#define SELINUX_MODE_LEN 1 +#define SELINUX_POLICY_VER_LEN 3 /* 3 chars to leave room for the future */ +#define SELINUX_POLICY_HASH_LEN 64 +#define LUSTRE_NODEMAP_SEPOL_LENGTH (SELINUX_MODE_LEN + NAME_MAX + \ + SELINUX_POLICY_VER_LEN + \ + SELINUX_POLICY_HASH_LEN + 3) + /* nodemap records, uses 32 byte record length */ #define LUSTRE_NODEMAP_NAME_LENGTH 16 struct nodemap_cluster_rec {