X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Finclude%2Flustre%2Flustre_idl.h;h=6b2bf7c9b4124dcba13cad347cd0421c0484b2bd;hb=fd908da9;hp=149610bd41d4bad45d34ee929cd3dda1d07063a6;hpb=113303973ec9f8484eb2355a1a6ef3c4c7fd6a56;p=fs%2Flustre-release.git diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 149610b..6b2bf7c 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -5,6 +5,11 @@ * * Lustre wire protocol definitions. * + * + * We assume all nodes are either little-endian or big-endian, and we + * always send messages in the sender's native format. The receiver + * detects the message format by checking the 'magic' field of the message + * (see lustre_msg_swabbed() below). * ALL structs passing over the wire should be declared here. Structs * that are used in interfaces with userspace should go in lustre_user.h. * @@ -31,11 +36,6 @@ * in the code to ensure that new/old clients that see this larger struct * do not fail, otherwise you need to implement protocol compatibility). * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines, * implemented either here, inline (trivial implementations) or in * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other" @@ -52,6 +52,8 @@ #ifndef _LUSTRE_IDL_H_ #define _LUSTRE_IDL_H_ +#include + #if defined(__linux__) #include #elif defined(__APPLE__) @@ -66,14 +68,6 @@ #include /* - * this file contains all data structures used in Lustre interfaces: - * - obdo and obd_request records - * - mds_request records - * - ldlm data - * - ioctl's - */ - -/* * GENERAL STUFF */ /* FOO_REQUEST_PORTAL is for incoming requests on the FOO @@ -104,11 +98,16 @@ //#define PTLBD_BULK_PORTAL 21 #define MDS_SETATTR_PORTAL 22 #define MDS_READPAGE_PORTAL 23 +#define MDS_MDS_PORTAL 24 #define MGC_REPLY_PORTAL 25 #define MGS_REQUEST_PORTAL 26 #define MGS_REPLY_PORTAL 27 #define OST_REQUEST_PORTAL 28 +#define FLD_REQUEST_PORTAL 29 +#define SEQ_METADATA_PORTAL 30 +#define SEQ_DATA_PORTAL 31 +#define SEQ_CONTROLLER_PORTAL 32 #define SVC_KILLED 1 #define SVC_EVENT 2 @@ -140,6 +139,267 @@ #define LUSTRE_LOG_VERSION 0x00050000 #define LUSTRE_MGS_VERSION 0x00060000 +typedef __u64 mdsno_t; +typedef __u64 seqno_t; + +struct lu_range { + __u64 lr_start; + __u64 lr_end; +}; + +static inline __u64 range_space(struct lu_range *r) +{ + return r->lr_end - r->lr_start; +} + +static inline void range_zero(struct lu_range *r) +{ + r->lr_start = r->lr_end = 0; +} + +static inline int range_within(struct lu_range *r, + __u64 s) +{ + return s >= r->lr_start && s < r->lr_end; +} + +static inline void range_alloc(struct lu_range *r, + struct lu_range *s, + __u64 w) +{ + r->lr_start = s->lr_start; + r->lr_end = s->lr_start + w; + s->lr_start += w; +} + +static inline int range_is_sane(struct lu_range *r) +{ + return (r->lr_end >= r->lr_start); +} + +static inline int range_is_zero(struct lu_range *r) +{ + return (r->lr_start == 0 && r->lr_end == 0); +} + +static inline int range_is_exhausted(struct lu_range *r) +{ + return range_space(r) == 0; +} + +#define DRANGE "[%#16.16"LPF64"x-%#16.16"LPF64"x]" + +#define PRANGE(range) \ + (range)->lr_start, \ + (range)->lr_end + +struct lu_fid { + __u64 f_seq; /* holds fid sequence. Lustre should support 2 ^ 64 + * objects, thus even if one sequence has one object we + * reach this value. */ + __u32 f_oid; /* fid number within its sequence. */ + __u32 f_ver; /* holds fid version. */ +}; + +/* + * fid constants + */ +enum { + LUSTRE_ROOT_FID_SEQ = 1ULL, /* XXX: should go into mkfs. */ + + /* initial fid id value */ + LUSTRE_FID_INIT_OID = 1UL +}; + +/* get object sequence */ +static inline __u64 fid_seq(const struct lu_fid *fid) +{ + return fid->f_seq; +} + +/* get object id */ +static inline __u32 fid_oid(const struct lu_fid *fid) +{ + return fid->f_oid; +} + +/* get object version */ +static inline __u32 fid_ver(const struct lu_fid *fid) +{ + return fid->f_ver; +} + +static inline int fid_seq_is_sane(__u64 seq) +{ + return seq != 0; +} + +static inline void fid_zero(struct lu_fid *fid) +{ + memset(fid, 0, sizeof(*fid)); +} + +static inline int fid_is_igif(const struct lu_fid *fid) +{ + return fid_seq(fid) == LUSTRE_ROOT_FID_SEQ; +} + +#define DFID "[0x%16.16"LPF64"x/0x%8.8x:0x%8.8x]" + +#define PFID(fid) \ + fid_seq(fid), \ + fid_oid(fid), \ + fid_ver(fid) + +static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src)); + dst->f_seq = cpu_to_le64(fid_seq(src)); + dst->f_oid = cpu_to_le32(fid_oid(src)); + dst->f_ver = cpu_to_le32(fid_ver(src)); +} + +static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = le64_to_cpu(fid_seq(src)); + dst->f_oid = le32_to_cpu(fid_oid(src)); + dst->f_ver = le32_to_cpu(fid_ver(src)); + LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst)); +} + +static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + LASSERTF(fid_is_igif(src) || fid_ver(src) == 0, DFID"\n", PFID(src)); + dst->f_seq = cpu_to_be64(fid_seq(src)); + dst->f_oid = cpu_to_be32(fid_oid(src)); + dst->f_ver = cpu_to_be32(fid_ver(src)); +} + +static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src) +{ + /* check that all fields are converted */ + CLASSERT(sizeof *src == + sizeof fid_seq(src) + + sizeof fid_oid(src) + sizeof fid_ver(src)); + dst->f_seq = be64_to_cpu(fid_seq(src)); + dst->f_oid = be32_to_cpu(fid_oid(src)); + dst->f_ver = be32_to_cpu(fid_ver(src)); + LASSERTF(fid_is_igif(dst) || fid_ver(dst) == 0, DFID"\n", PFID(dst)); +} + +#ifdef __KERNEL__ +/* + * Storage representation for fids. + * + * Variable size, first byte contains the length of the whole record. + */ + +struct lu_fid_pack { + char fp_len; + char fp_area[sizeof(struct lu_fid)]; +}; + +void fid_pack(struct lu_fid_pack *pack, const struct lu_fid *fid, + struct lu_fid *befider); +int fid_unpack(const struct lu_fid_pack *pack, struct lu_fid *fid); + +/* __KERNEL__ */ +#endif + +static inline int fid_is_sane(const struct lu_fid *fid) +{ + return + fid != NULL && + ((fid_seq_is_sane(fid_seq(fid)) && fid_oid(fid) != 0 + && fid_ver(fid) == 0) || + fid_is_igif(fid)); +} + +static inline int fid_is_zero(const struct lu_fid *fid) +{ + return fid_seq(fid) == 0 && fid_oid(fid) == 0; +} + +extern void lustre_swab_lu_fid(struct lu_fid *fid); +extern void lustre_swab_lu_range(struct lu_range *range); + +static inline int lu_fid_eq(const struct lu_fid *f0, + const struct lu_fid *f1) +{ + /* Check that there is no alignment padding. */ + CLASSERT(sizeof *f0 == + sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver); + LASSERTF(fid_is_igif(f0) || fid_ver(f0) == 0, DFID, PFID(f0)); + LASSERTF(fid_is_igif(f1) || fid_ver(f1) == 0, DFID, PFID(f1)); + return memcmp(f0, f1, sizeof *f0) == 0; +} + +/* + * Layout of readdir pages, as transmitted on wire. + */ +struct lu_dirent { + struct lu_fid lde_fid; + __u32 lde_hash; + __u16 lde_reclen; + __u16 lde_namelen; + char lde_name[0]; +}; + +struct lu_dirpage { + __u32 ldp_hash_start; + __u32 ldp_hash_end; + __u16 ldp_flags; + __u32 ldp_pad0; + struct lu_dirent ldp_entries[0]; +}; + +enum lu_dirpage_flags { + LDF_EMPTY = 1 << 0 +}; + +static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp) +{ + if (le16_to_cpu(dp->ldp_flags) & LDF_EMPTY) + return NULL; + else + return dp->ldp_entries; +} + +static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent) +{ + struct lu_dirent *next; + + if (le16_to_cpu(ent->lde_reclen) != 0) + next = ((void *)ent) + le16_to_cpu(ent->lde_reclen); + else + next = NULL; + + return next; +} + +static inline int lu_dirent_size(struct lu_dirent *ent) +{ + if (le16_to_cpu(ent->lde_reclen) == 0) { + return (sizeof(*ent) + + le16_to_cpu(ent->lde_namelen) + 3) & ~3; + } + return le16_to_cpu(ent->lde_reclen); +} + +#define DIR_END_OFF 0xfffffffeUL + struct lustre_handle { __u64 cookie; }; @@ -195,7 +455,7 @@ struct lustre_msg_v2 { __u32 lm_buflens[0]; }; -/* without security, ptlrpc_body is put in the first buffer. */ +/* without gss, ptlrpc_body is put at the first buffer. */ struct ptlrpc_body { struct lustre_handle pb_handle; __u32 pb_type; @@ -211,9 +471,8 @@ struct ptlrpc_body { __u32 pb_conn_cnt; __u32 pb_padding_1; __u32 pb_padding_2; - __u32 pb_padding_3; - __u32 pb_padding_4; - __u32 pb_padding_5; + __u32 pb_limit; + __u64 pb_slv; }; extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); @@ -247,6 +506,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define MSG_LAST_REPLAY 1 #define MSG_RESENT 2 #define MSG_REPLAY 4 +#define MSG_REQ_REPLAY_DONE 8 +#define MSG_LOCK_REPLAY_DONE 16 /* * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT) @@ -259,45 +520,68 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define MSG_CONNECT_LIBCLIENT 0x10 #define MSG_CONNECT_INITIAL 0x20 #define MSG_CONNECT_ASYNC 0x40 -#define MSG_CONNECT_NEXT_VER 0x80 /* use next version of lustre_msg */ +#define MSG_CONNECT_NEXT_VER 0x80 /* use next version of lustre_msg */ +#define MSG_CONNECT_TRANSNO 0x100 /* report transno */ /* Connect flags */ -#define OBD_CONNECT_RDONLY 0x1ULL /* client allowed read-only access */ -#define OBD_CONNECT_INDEX 0x2ULL /* connect to specific LOV idx */ -#define OBD_CONNECT_GRANT 0x8ULL /* OSC acquires grant at connect */ -#define OBD_CONNECT_SRVLOCK 0x10ULL /* server takes locks for client */ -#define OBD_CONNECT_VERSION 0x20ULL /* Server supports versions in ocd */ -#define OBD_CONNECT_REQPORTAL 0x40ULL /* Separate portal for non-IO reqs */ -#define OBD_CONNECT_ACL 0x80ULL /* client uses access control lists */ -#define OBD_CONNECT_XATTR 0x100ULL /* client using extended attributes*/ -#define OBD_CONNECT_CROW 0x200ULL /* MDS+OST create objects on write */ -#define OBD_CONNECT_TRUNCLOCK 0x400ULL /* locks on server for punch b=9528 */ -#define OBD_CONNECT_TRANSNO 0x800ULL /* replay sends initial transno */ -#define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */ -#define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */ -#define OBD_CONNECT_ATTRFID 0x4000ULL /* Server supports GetAttr By Fid */ -#define OBD_CONNECT_NODEVOH 0x8000ULL /* No open handle for special nodes */ -#define OBD_CONNECT_LCL_CLIENT 0x10000ULL /* local 1.8 client */ -#define OBD_CONNECT_RMT_CLIENT 0x20000ULL /* Remote 1.8 client */ -#define OBD_CONNECT_BRW_SIZE 0x40000ULL /* Max bytes per rpc */ -#define OBD_CONNECT_QUOTA64 0x80000ULL /* 64bit qunit_data.qd_count b=10707*/ -#define OBD_CONNECT_FID_CAPA 0x100000ULL /* fid capability */ -#define OBD_CONNECT_OSS_CAPA 0x200000ULL /* OSS capability */ +#define OBD_CONNECT_RDONLY 0x00000001ULL /* client allowed read-only access */ +#define OBD_CONNECT_INDEX 0x00000002ULL /* connect to specific LOV idx */ +#define OBD_CONNECT_GRANT 0x00000008ULL /* OSC acquires grant at connect */ +#define OBD_CONNECT_SRVLOCK 0x00000010ULL /* server takes locks for client */ +#define OBD_CONNECT_VERSION 0x00000020ULL /* Server supports versions in ocd */ +#define OBD_CONNECT_REQPORTAL 0x00000040ULL /* Separate portal for non-IO reqs */ +#define OBD_CONNECT_ACL 0x00000080ULL /* client uses access control lists */ +#define OBD_CONNECT_XATTR 0x00000100ULL /* client using extended attributes*/ +#define OBD_CONNECT_TRUNCLOCK 0x00000400ULL /* locks on server for punch b=9528 */ +#define OBD_CONNECT_IBITS 0x00001000ULL /* support for inodebits locks */ +#define OBD_CONNECT_JOIN 0x00002000ULL /* files can be concatenated */ +#define OBD_CONNECT_ATTRFID 0x00004000ULL /* Server supports GetAttr By Fid */ +#define OBD_CONNECT_NODEVOH 0x00008000ULL /* No open handle for special nodes */ +#define OBD_CONNECT_LCL_CLIENT 0x00010000ULL /* local 1.8 client */ +#define OBD_CONNECT_RMT_CLIENT 0x00020000ULL /* Remote 1.8 client */ +#define OBD_CONNECT_BRW_SIZE 0x00040000ULL /* Max bytes per rpc */ +#define OBD_CONNECT_QUOTA64 0x00080000ULL /* 64bit qunit_data.qd_count b=10707*/ +#define OBD_CONNECT_MDS_CAPA 0x00100000ULL /* MDS capability */ +#define OBD_CONNECT_OSS_CAPA 0x00200000ULL /* OSS capability */ +#define OBD_CONNECT_CANCELSET 0x00400000ULL /* Early batched cancels. */ +#define OBD_CONNECT_SOM 0x00800000ULL /* SOM feature */ +#define OBD_CONNECT_AT 0x01000000ULL /* client uses adaptive timeouts */ +#define OBD_CONNECT_LRU_RESIZE 0x02000000ULL /* Lru resize feature. */ +#define OBD_CONNECT_MDS_MDS 0x04000000ULL /* MDS-MDS connection*/ +#define OBD_CONNECT_REAL 0x08000000ULL /* real connection */ +#define OBD_CONNECT_FID 0x10000000ULL /* FID is supported by server */ +#define OBD_CONNECT_CKSUM 0x20000000ULL /* support several cksum algos */ + /* also update obd_connect_names[] for lprocfs_rd_connect_flags() * and lustre/utils/wirecheck.c */ -#define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ +#ifdef HAVE_LRU_RESIZE_SUPPORT +#define LRU_RESIZE_CONNECT_FLAG OBD_CONNECT_LRU_RESIZE +#else +#define LRU_RESIZE_CONNECT_FLAG 0 +#endif + +#define MDT_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \ - OBD_CONNECT_NODEVOH | OBD_CONNECT_ATTRFID) + OBD_CONNECT_NODEVOH |/* OBD_CONNECT_ATTRFID |*/\ + OBD_CONNECT_LCL_CLIENT | \ + OBD_CONNECT_RMT_CLIENT | \ + OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \ + OBD_CONNECT_MDS_MDS | OBD_CONNECT_CANCELSET | \ + OBD_CONNECT_FID | \ + LRU_RESIZE_CONNECT_FLAG) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ - OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64) + OBD_CONNECT_BRW_SIZE | OBD_CONNECT_QUOTA64 | \ + OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET | \ + OBD_CONNECT_FID | \ + LRU_RESIZE_CONNECT_FLAG) #define ECHO_CONNECT_SUPPORTED (0) -#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION) +#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_FID) -#define MAX_QUOTA_COUNT32 ((0xffffffffULL >> QUOTABLOCK_BITS) << QUOTABLOCK_BITS) +#define MAX_QUOTA_COUNT32 (0xffffffffULL) #define OBD_OCD_VERSION(major,minor,patch,fix) (((major)<<24) + ((minor)<<16) +\ ((patch)<<8) + (fix)) @@ -311,18 +595,19 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); * If we eventually have separate connect data for different types, which we * almost certainly will, then perhaps we stick a union in here. */ struct obd_connect_data { - __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ - __u32 ocd_version; /* lustre release version number */ - __u32 ocd_grant; /* initial cache grant amount (bytes) */ - __u32 ocd_index; /* LOV index to connect to */ - __u32 ocd_brw_size; /* Maximum BRW size in bytes */ - __u64 ocd_ibits_known; /* inode bits this client understands */ - __u32 ocd_nllu; /* non-local-lustre-user */ - __u32 ocd_nllg; /* non-local-lustre-group */ - __u64 padding1; /* also fix lustre_swab_connect */ - __u64 padding2; /* also fix lustre_swab_connect */ - __u64 padding3; /* also fix lustre_swab_connect */ - __u64 padding4; /* also fix lustre_swab_connect */ + __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ + __u32 ocd_version; /* lustre release version number */ + __u32 ocd_grant; /* initial cache grant amount (bytes) */ + __u32 ocd_index; /* LOV index to connect to */ + __u32 ocd_brw_size; /* Maximum BRW size in bytes */ + __u64 ocd_ibits_known; /* inode bits this client understands */ + __u32 ocd_nllu; /* non-local-lustre-user */ + __u32 ocd_nllg; /* non-local-lustre-group */ + __u64 ocd_transno; /* first transno from client to be replayed */ + __u32 ocd_group; /* MDS group on OST */ + __u32 padding1; /* also fix lustre_swab_connect */ + __u64 padding2; /* also fix lustre_swab_connect */ + __u64 padding3; /* also fix lustre_swab_connect */ }; extern void lustre_swab_connect(struct obd_connect_data *ocd); @@ -347,8 +632,6 @@ typedef enum { OST_OPEN = 11, OST_CLOSE = 12, OST_STATFS = 13, -/* OST_SAN_READ = 14, deprecated */ -/* OST_SAN_WRITE = 15, deprecated */ OST_SYNC = 16, OST_SET_INFO = 17, OST_QUOTACHECK = 18, @@ -380,18 +663,19 @@ typedef uint32_t obd_count; #define OBD_FL_DEBUG_CHECK (0x00000040) /* echo client/server debug check */ #define OBD_FL_NO_USRQUOTA (0x00000100) /* the object's owner is over quota */ #define OBD_FL_NO_GRPQUOTA (0x00000200) /* the object's group is over quota */ -#define OBD_FL_CREATE_CROW (0x00000400) /* object should be create on write */ /* - * set this to delegate DLM locking during obd_punch() to the OSTs. Only OSTs + * Set this to delegate DLM locking during obd_punch() to the OSTs. Only OSTs * that declared OBD_CONNECT_TRUNCLOCK in their connect flags support this * functionality. */ #define OBD_FL_TRUNCLOCK (0x00000800) -/* this should be not smaller than sizeof(struct lustre_handle) + sizeof(struct - * llog_cookie) + sizeof(ll_fid). Nevertheless struct ll_fid is not longer - * stored in o_inline, we keep this just for case. */ +/* + * This should not be smaller than sizeof(struct lustre_handle) + sizeof(struct + * llog_cookie) + sizeof(struct ll_fid). Nevertheless struct ll_fid is not + * longer stored in o_inline, we keep this just for case. + */ #define OBD_INLINESZ 80 /* Note: 64-bit types are 64-bit aligned in structure */ @@ -441,6 +725,9 @@ extern void lustre_swab_obdo (struct obdo *o); #define LOV_PATTERN_FIRST 0x100 /* first stripe is not in round-robin */ #define LOV_PATTERN_CMOBD 0x200 +#define LOV_OBJECT_GROUP_DEFAULT ~0ULL +#define LOV_OBJECT_GROUP_CLEAR 0ULL + #define lov_ost_data lov_ost_data_v1 struct lov_ost_data_v1 { /* per-stripe data structure (little-endian)*/ __u64 l_object_id; /* OST object ID */ @@ -460,6 +747,15 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ }; +extern void lustre_swab_lov_mds_md(struct lov_mds_md *llm); + +#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data)) +#define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data)) + +#define XATTR_NAME_ACL_ACCESS "system.posix_acl_access" +#define XATTR_NAME_ACL_DEFAULT "system.posix_acl_default" +#define XATTR_NAME_LOV "trusted.lov" + #define OBD_MD_FLID (0x00000001ULL) /* object ID */ #define OBD_MD_FLATIME (0x00000002ULL) /* access time */ #define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */ @@ -486,6 +782,7 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLGROUP (0x01000000ULL) /* group */ #define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ #define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write easize is epoch */ + /* ->mds if epoch opens or closes */ #define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */ #define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */ #define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */ @@ -494,11 +791,22 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */ #define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */ +#define OBD_MD_MEA (0x0000000400000000ULL) /* CMD split EA */ #define OBD_MD_FLXATTR (0x0000001000000000ULL) /* xattr */ #define OBD_MD_FLXATTRLS (0x0000002000000000ULL) /* xattr list */ #define OBD_MD_FLXATTRRM (0x0000004000000000ULL) /* xattr remove */ #define OBD_MD_FLACL (0x0000008000000000ULL) /* ACL */ +#define OBD_MD_FLRMTPERM (0x0000010000000000ULL) /* remote permission */ +#define OBD_MD_FLMDSCAPA (0x0000020000000000ULL) /* MDS capability */ +#define OBD_MD_FLOSSCAPA (0x0000040000000000ULL) /* OSS capability */ +#define OBD_MD_FLCKSPLIT (0x0000080000000000ULL) /* Check split on server */ +#define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */ + +#define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */ +#define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */ +#define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */ +#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */ #define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \ OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \ @@ -591,6 +899,16 @@ extern void lustre_swab_ost_last_id(obd_id *id); /* lock value block communicated between the filter and llite */ +/* OST_LVB_ERR_INIT is needed because the return code in rc is + * negative, i.e. because ((MASK + rc) & MASK) != MASK. */ +#define OST_LVB_ERR_INIT 0xffbadbad80000000ULL +#define OST_LVB_ERR_MASK 0xffbadbad00000000ULL +#define OST_LVB_IS_ERR(blocks) \ + ((blocks & OST_LVB_ERR_MASK) == OST_LVB_ERR_MASK) +#define OST_LVB_SET_ERR(blocks, rc) \ + do { blocks = OST_LVB_ERR_INIT + rc; } while (0) +#define OST_LVB_GET_ERR(blocks) (int)(blocks - OST_LVB_ERR_INIT) + struct ost_lvb { __u64 lvb_size; __u64 lvb_mtime; @@ -624,7 +942,9 @@ typedef enum { MDS_QUOTACHECK = 47, MDS_QUOTACTL = 48, MDS_GETXATTR = 49, - MDS_SETXATTR = 50, + MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */ + MDS_WRITEPAGE = 51, + MDS_IS_SUBDIR = 52, MDS_LAST_OPC } mds_cmd_t; @@ -641,10 +961,13 @@ typedef enum { REINT_UNLINK = 4, REINT_RENAME = 5, REINT_OPEN = 6, -// REINT_CLOSE = 7, -// REINT_WRITE = 8, + REINT_SETXATTR = 7, +// REINT_CLOSE = 8, +// REINT_WRITE = 9, REINT_MAX -} mds_reint_t; +} mds_reint_t, mdt_reint_t; + +extern void lustre_swab_generic_32s (__u32 *val); /* the disposition of the intent outlines what was executed */ #define DISP_IT_EXECD 0x00000001 @@ -688,6 +1011,22 @@ struct mds_status_req { extern void lustre_swab_mds_status_req (struct mds_status_req *r); +/* mdt_thread_info.mti_flags. */ +enum md_op_flags { + /* The flag indicates Size-on-MDS attributes are changed. */ + MF_SOM_CHANGE = (1 << 0), + /* Flags indicates an epoch opens or closes. */ + MF_EPOCH_OPEN = (1 << 1), + MF_EPOCH_CLOSE = (1 << 2), + MF_MDC_CANCEL_FID1 = (1 << 3), + MF_MDC_CANCEL_FID2 = (1 << 4), + MF_MDC_CANCEL_FID3 = (1 << 5), + MF_MDC_CANCEL_FID4 = (1 << 6), +}; + +#define MF_SOM_LOCAL_FLAGS (MF_MDC_CANCEL_FID1 | MF_MDC_CANCEL_FID2 | \ + MF_MDC_CANCEL_FID3 | MF_MDC_CANCEL_FID4) + #define MDS_BFLAG_UNCOMMITTED_WRITES 0x1 #define MDS_BFLAG_EXT_FLAGS 0x80000000 /* == EXT3_RESERVED_FL */ @@ -732,6 +1071,36 @@ static inline int ll_inode_to_ext_flags(int oflags, int iflags) } #endif +struct mdt_body { + struct lu_fid fid1; + struct lu_fid fid2; + struct lustre_handle handle; + __u64 valid; + __u64 size; /* Offset, in the case of MDS_READPAGE */ + __u64 mtime; + __u64 atime; + __u64 ctime; + __u64 blocks; /* XID, in the case of MDS_READPAGE */ + __u64 ioepoch; + __u64 ino; /* for 1.6 compatibility */ + __u32 fsuid; + __u32 fsgid; + __u32 capability; + __u32 mode; + __u32 uid; + __u32 gid; + __u32 flags; /* from vfs for pin/unpin, MDS_BFLAG for close */ + __u32 rdev; + __u32 nlink; /* #bytes to read in the case of MDS_READPAGE */ + __u32 generation; /* for 1.6 compatibility */ + __u32 suppgid; + __u32 eadatasize; + __u32 aclsize; + __u32 max_mdsize; + __u32 max_cookiesize; + __u32 padding_4; /* also fix lustre_swab_mdt_body */ +}; + struct mds_body { struct ll_fid fid1; struct ll_fid fid2; @@ -763,6 +1132,16 @@ struct mds_body { }; extern void lustre_swab_mds_body (struct mds_body *b); +extern void lustre_swab_mdt_body (struct mdt_body *b); + +struct mdt_epoch { + struct lustre_handle handle; + __u64 ioepoch; + __u32 flags; + __u32 padding; +}; + +extern void lustre_swab_mdt_epoch (struct mdt_epoch *b); #define Q_QUOTACHECK 0x800100 #define Q_INITQUOTA 0x800101 /* init slave limits */ @@ -786,6 +1165,36 @@ struct obd_quotactl { extern void lustre_swab_obd_quotactl(struct obd_quotactl *q); +/* inode access permission for remote user, the inode info are omitted, + * for client knows them. */ +struct mds_remote_perm { + __u32 rp_uid; + __u32 rp_gid; + __u32 rp_fsuid; + __u32 rp_fsgid; + __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */ +}; + +/* permissions for md_perm.mp_perm */ +enum { + CFS_SETUID_PERM = 0x01, + CFS_SETGID_PERM = 0x02, + CFS_SETGRP_PERM = 0x04, + CFS_RMTACL_PERM = 0x08 +}; + +extern void lustre_swab_mds_remote_perm(struct mds_remote_perm *p); + +struct mdt_remote_perm { + __u32 rp_uid; + __u32 rp_gid; + __u32 rp_fsuid; + __u32 rp_fsgid; + __u32 rp_access_perm; /* MAY_READ/WRITE/EXEC */ +}; + +extern void lustre_swab_mdt_remote_perm(struct mdt_remote_perm *p); + struct mds_rec_setattr { __u32 sa_opcode; __u32 sa_fsuid; @@ -794,7 +1203,7 @@ struct mds_rec_setattr { __u32 sa_suppgid; __u32 sa_mode; struct ll_fid sa_fid; - __u64 sa_valid; + __u64 sa_valid; /* MDS_ATTR_* attributes */ __u64 sa_size; __u64 sa_mtime; __u64 sa_atime; @@ -805,17 +1214,70 @@ struct mds_rec_setattr { __u32 sa_padding; /* also fix lustre_swab_mds_rec_setattr */ }; -/* Remove this once we declare it in include/linux/fs.h (v21 kernel patch?) */ -#ifndef ATTR_CTIME_SET -#define ATTR_CTIME_SET 0x2000 -#endif - extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); +struct mdt_rec_setattr { + __u32 sa_opcode; + __u32 sa_fsuid; + __u32 sa_fsgid; + __u32 sa_cap; + __u32 sa_suppgid; + __u32 sa_padding_1; + struct lu_fid sa_fid; + __u64 sa_valid; + __u32 sa_uid; + __u32 sa_gid; + __u64 sa_size; + __u64 sa_blocks; + __u64 sa_mtime; + __u64 sa_atime; + __u64 sa_ctime; + __u32 sa_attr_flags; + __u32 sa_mode; + __u32 sa_padding_2; + __u32 sa_padding_3; + __u32 sa_padding_4; + __u32 sa_padding_5; +}; + +extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa); + +/* + * Attribute flags used in mds_rec_setattr::sa_valid. + * The kernel's #defines for ATTR_* should not be used over the network + * since the client and MDS may run different kernels (see bug 13828) + * Therefore, we should only use MDS_ATTR_* attributes for sa_valid. + */ +#define MDS_ATTR_MODE 0x1ULL /* = 1 */ +#define MDS_ATTR_UID 0x2ULL /* = 2 */ +#define MDS_ATTR_GID 0x4ULL /* = 4 */ +#define MDS_ATTR_SIZE 0x8ULL /* = 8 */ +#define MDS_ATTR_ATIME 0x10ULL /* = 16 */ +#define MDS_ATTR_MTIME 0x20ULL /* = 32 */ +#define MDS_ATTR_CTIME 0x40ULL /* = 64 */ +#define MDS_ATTR_ATIME_SET 0x80ULL /* = 128 */ +#define MDS_ATTR_MTIME_SET 0x100ULL /* = 256 */ +#define MDS_ATTR_FORCE 0x200ULL /* = 512, Not a change, but a change it */ +#define MDS_ATTR_ATTR_FLAG 0x400ULL /* = 1024 */ +#define MDS_ATTR_KILL_SUID 0x800ULL /* = 2048 */ +#define MDS_ATTR_KILL_SGID 0x1000ULL /* = 4096 */ +#define MDS_ATTR_CTIME_SET 0x2000ULL /* = 8192 */ +#define MDS_ATTR_FROM_OPEN 0x4000ULL /* = 16384, called from open path, ie O_TRUNC */ +#define MDS_ATTR_BLOCKS 0x8000ULL /* = 32768 */ + #ifndef FMODE_READ #define FMODE_READ 00000001 #define FMODE_WRITE 00000002 #endif + +#define FMODE_EPOCH 01000000 +#define FMODE_EPOCHLCK 02000000 +#define FMODE_SOM 04000000 +#define FMODE_CLOSED 0 + +#define MDS_OPEN_CREATED 00000010 +#define MDS_OPEN_CROSS 00000020 + #define MDS_FMODE_EXEC 00000004 #define MDS_OPEN_CREAT 00000100 #define MDS_OPEN_EXCL 00000200 @@ -827,10 +1289,53 @@ extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); #define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ #define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */ #define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file*/ +#define MDS_CREATE_RMT_ACL 01000000000 /* indicate create on remote server + * with default ACL */ +#define MDS_CREATE_SLAVE_OBJ 02000000000 /* indicate create slave object + * actually, this is for create, not + * conflict with other open flags */ #define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */ #define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ #define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ +/* permission for create non-directory file */ +#define MAY_CREATE (1 << 7) +/* permission for create directory file */ +#define MAY_LINK (1 << 8) +/* permission for delete from the directory */ +#define MAY_UNLINK (1 << 9) +/* source's permission for rename */ +#define MAY_RENAME_SRC (1 << 10) +/* target's permission for rename */ +#define MAY_RENAME_TAR (1 << 11) +/* part (parent's) VTX permission check */ +#define MAY_VTX_PART (1 << 12) +/* full VTX permission check */ +#define MAY_VTX_FULL (1 << 13) +/* lfs rgetfacl permission check */ +#define MAY_RGETFACL (1 << 14) + +enum { + MDS_CHECK_SPLIT = 1 << 0, + MDS_CROSS_REF = 1 << 1, + MDS_VTX_BYPASS = 1 << 2, + MDS_PERM_BYPASS = 1 << 3 +}; + +struct mds_rec_join { + struct ll_fid jr_fid; + __u64 jr_headsize; +}; + +extern void lustre_swab_mds_rec_join (struct mds_rec_join *jr); + +struct mdt_rec_join { + struct lu_fid jr_fid; + __u64 jr_headsize; +}; + +extern void lustre_swab_mdt_rec_join (struct mdt_rec_join *jr); + struct mds_rec_create { __u32 cr_opcode; __u32 cr_fsuid; @@ -852,12 +1357,29 @@ struct mds_rec_create { extern void lustre_swab_mds_rec_create (struct mds_rec_create *cr); -struct mds_rec_join { - struct ll_fid jr_fid; - __u64 jr_headsize; +struct mdt_rec_create { + __u32 cr_opcode; + __u32 cr_fsuid; + __u32 cr_fsgid; + __u32 cr_cap; + __u32 cr_suppgid1; + __u32 cr_suppgid2; + struct lu_fid cr_fid1; + struct lu_fid cr_fid2; + struct lustre_handle cr_old_handle; /* u64 handle in case of open replay */ + __u64 cr_time; + __u64 cr_rdev; + __u64 cr_ioepoch; + __u64 cr_padding_1; /* pad for 64 bits*/ + __u32 cr_mode; + __u32 cr_bias; + __u32 cr_flags; /* for use with open */ + __u32 cr_padding_2; + __u32 cr_padding_3; + __u32 cr_padding_4; }; -extern void lustre_swab_mds_rec_join (struct mds_rec_join *jr); +extern void lustre_swab_mdt_rec_create (struct mdt_rec_create *cr); struct mds_rec_link { __u32 lk_opcode; @@ -877,6 +1399,28 @@ struct mds_rec_link { extern void lustre_swab_mds_rec_link (struct mds_rec_link *lk); +struct mdt_rec_link { + __u32 lk_opcode; + __u32 lk_fsuid; + __u32 lk_fsgid; + __u32 lk_cap; + __u32 lk_suppgid1; + __u32 lk_suppgid2; + struct lu_fid lk_fid1; + struct lu_fid lk_fid2; + __u64 lk_time; + __u64 lk_padding_1; + __u64 lk_padding_2; + __u64 lk_padding_3; + __u64 lk_padding_4; + __u32 lk_bias; + __u32 lk_padding_5; + __u32 lk_padding_6; + __u32 lk_padding_7; + __u32 lk_padding_8; + __u32 lk_padding_9; +}; + struct mds_rec_unlink { __u32 ul_opcode; __u32 ul_fsuid; @@ -895,6 +1439,28 @@ struct mds_rec_unlink { extern void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul); +struct mdt_rec_unlink { + __u32 ul_opcode; + __u32 ul_fsuid; + __u32 ul_fsgid; + __u32 ul_cap; + __u32 ul_suppgid1; + __u32 ul_suppgid2; + struct lu_fid ul_fid1; + struct lu_fid ul_fid2; + __u64 ul_time; + __u64 ul_padding_2; + __u64 ul_padding_3; + __u64 ul_padding_4; + __u64 ul_padding_5; + __u32 ul_bias; + __u32 ul_mode; + __u32 ul_padding_6; + __u32 ul_padding_7; + __u32 ul_padding_8; + __u32 ul_padding_9; +}; + struct mds_rec_rename { __u32 rn_opcode; __u32 rn_fsuid; @@ -913,12 +1479,115 @@ struct mds_rec_rename { extern void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn); +struct mdt_rec_rename { + __u32 rn_opcode; + __u32 rn_fsuid; + __u32 rn_fsgid; + __u32 rn_cap; + __u32 rn_suppgid1; + __u32 rn_suppgid2; + struct lu_fid rn_fid1; + struct lu_fid rn_fid2; + __u64 rn_time; + __u64 rn_padding_1; + __u64 rn_padding_2; + __u64 rn_padding_3; + __u64 rn_padding_4; + __u32 rn_bias; /* some operation flags */ + __u32 rn_mode; /* cross-ref rename has mode */ + __u32 rn_padding_5; + __u32 rn_padding_6; + __u32 rn_padding_7; + __u32 rn_padding_8; +}; + +struct mdt_rec_setxattr { + __u32 sx_opcode; + __u32 sx_fsuid; + __u32 sx_fsgid; + __u32 sx_cap; + __u32 sx_suppgid1; + __u32 sx_suppgid2; + struct lu_fid sx_fid; + __u64 sx_padding_1; /* These three members are lu_fid size */ + __u32 sx_padding_2; + __u32 sx_padding_3; + __u64 sx_valid; + __u64 sx_padding_4; + __u64 sx_padding_5; + __u64 sx_padding_6; + __u64 sx_padding_7; + __u32 sx_size; + __u32 sx_flags; + __u32 sx_padding_8; + __u32 sx_padding_9; + __u32 sx_padding_10; + __u32 sx_padding_11; +}; + +struct mdt_rec_reint { + __u32 rr_opcode; + __u32 rr_fsuid; + __u32 rr_fsgid; + __u32 rr_cap; + __u32 rr_suppgid1; + __u32 rr_suppgid2; + struct lu_fid rr_fid1; + struct lu_fid rr_fid2; + __u64 rr_mtime; + __u64 rr_atime; + __u64 rr_ctime; + __u64 rr_size; + __u64 rr_blocks; + __u32 rr_bias; + __u32 rr_mode; + __u32 rr_padding_1; /* also fix lustre_swab_mdt_rec_reint */ + __u32 rr_padding_2; /* also fix lustre_swab_mdt_rec_reint */ + __u32 rr_padding_3; /* also fix lustre_swab_mdt_rec_reint */ + __u32 rr_padding_4; /* also fix lustre_swab_mdt_rec_reint */ +}; + +extern void lustre_swab_mdt_rec_reint(struct mdt_rec_reint *rr); + +struct lmv_desc { + __u32 ld_tgt_count; /* how many MDS's */ + __u32 ld_active_tgt_count; /* how many active */ + struct obd_uuid ld_uuid; +}; + +extern void lustre_swab_lmv_desc (struct lmv_desc *ld); + +struct md_fld { + seqno_t mf_seq; + mdsno_t mf_mds; +}; + +extern void lustre_swab_md_fld (struct md_fld *mf); + +enum fld_rpc_opc { + FLD_QUERY = 600, + FLD_LAST_OPC, + FLD_FIRST_OPC = FLD_QUERY +}; + +enum seq_rpc_opc { + SEQ_QUERY = 700, + SEQ_LAST_OPC, + SEQ_FIRST_OPC = SEQ_QUERY +}; + +enum seq_op { + SEQ_ALLOC_SUPER = 0, + SEQ_ALLOC_META = 1 +}; + /* * LOV data structures */ #define LOV_MIN_STRIPE_SIZE 65536 /* maximum PAGE_SIZE (ia64), power of 2 */ #define LOV_MAX_STRIPE_COUNT 160 /* until bug 4424 is fixed */ +#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */ #define LOV_MAX_UUID_BUFFER_SIZE 8192 /* The size of the buffer the lov/mdc reserves for the @@ -971,13 +1640,13 @@ extern void lustre_swab_ldlm_res_id (struct ldlm_res_id *id); /* lock types */ typedef enum { LCK_MINMODE = 0, - LCK_EX = 1, - LCK_PW = 2, - LCK_PR = 4, - LCK_CW = 8, - LCK_CR = 16, - LCK_NL = 32, - LCK_GROUP = 64, + LCK_EX = 1, + LCK_PW = 2, + LCK_PR = 4, + LCK_CW = 8, + LCK_CR = 16, + LCK_NL = 32, + LCK_GROUP = 64, LCK_MAXMODE } ldlm_mode_t; @@ -1046,16 +1715,29 @@ struct ldlm_lock_desc { extern void lustre_swab_ldlm_lock_desc (struct ldlm_lock_desc *l); +#define LDLM_LOCKREQ_HANDLES 2 +#define LDLM_ENQUEUE_CANCEL_OFF 1 + struct ldlm_request { __u32 lock_flags; - __u32 lock_padding; /* also fix lustre_swab_ldlm_request */ + __u32 lock_count; struct ldlm_lock_desc lock_desc; - struct lustre_handle lock_handle1; - struct lustre_handle lock_handle2; + struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES]; }; extern void lustre_swab_ldlm_request (struct ldlm_request *rq); +/* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available. + * Otherwise, 2 are available. */ +#define ldlm_request_bufsize(count,type) \ +({ \ + int _avail = LDLM_LOCKREQ_HANDLES; \ + _avail -= (type == LDLM_ENQUEUE ? LDLM_ENQUEUE_CANCEL_OFF : 0); \ + sizeof(struct ldlm_request) + \ + (count > _avail ? count - _avail : 0) * \ + sizeof(struct lustre_handle); \ +}) + struct ldlm_reply { __u32 lock_flags; __u32 lock_padding; /* also fix lustre_swab_ldlm_reply */ @@ -1067,7 +1749,6 @@ struct ldlm_reply { extern void lustre_swab_ldlm_reply (struct ldlm_reply *r); - /* * Opcodes for mountconf (mgs and mgc) */ @@ -1077,8 +1758,17 @@ typedef enum { MGS_EXCEPTION, /* node died, etc. */ MGS_TARGET_REG, /* whenever target starts up */ MGS_TARGET_DEL, + MGS_SET_INFO, MGS_LAST_OPC } mgs_cmd_t; +#define MGS_FIRST_OPC MGS_CONNECT + +#define MGS_PARAM_MAXLEN 1024 +#define KEY_SET_INFO "set_info" + +struct mgs_send_param { + char mgs_param[MGS_PARAM_MAXLEN]; +}; /* We pass this info to the MGS so it can write config logs */ #define MTI_NAME_MAXLEN 64 @@ -1267,7 +1957,7 @@ struct llog_setattr_rec { struct llog_size_change_rec { struct llog_rec_hdr lsc_hdr; struct ll_fid lsc_fid; - __u32 lsc_io_epoch; + __u32 lsc_ioepoch; __u32 padding; struct llog_rec_tail lsc_tail; } __attribute__((packed)); @@ -1332,6 +2022,8 @@ enum llogd_rpc_ops { LLOG_CATINFO = 507, /* for lfs catinfo */ LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/ + LLOG_LAST_OPC, + LLOG_FIRST_OPC = LLOG_ORIGIN_HANDLE_CREATE }; struct llogd_body { @@ -1416,4 +2108,98 @@ typedef enum { } quota_cmd_t; #define JOIN_FILE_ALIGN 4096 + +/* security opcodes */ +typedef enum { + SEC_CTX_INIT = 801, + SEC_CTX_INIT_CONT = 802, + SEC_CTX_FINI = 803, + SEC_LAST_OPC, + SEC_FIRST_OPC = SEC_CTX_INIT +} sec_cmd_t; + +/* + * capa related definitions + */ +#define CAPA_HMAC_MAX_LEN 64 +#define CAPA_HMAC_KEY_MAX_LEN 56 + +/* NB take care when changing the sequence of elements this struct, + * because the offset info is used in find_capa() */ +struct lustre_capa { + struct lu_fid lc_fid; /* fid */ + __u64 lc_opc; /* operations allowed */ + __u32 lc_uid; /* uid, it is obsolete, but maybe used in + * future, reserve it for 64-bits aligned.*/ + __u32 lc_flags; /* HMAC algorithm & flags */ + __u32 lc_keyid; /* key used for the capability */ + __u32 lc_timeout; /* capa timeout value (sec) */ + __u64 lc_expiry; /* expiry time (sec) */ + __u8 lc_hmac[CAPA_HMAC_MAX_LEN]; /* HMAC */ +} __attribute__((packed)); + +extern void lustre_swab_lustre_capa(struct lustre_capa *c); + +/* lustre_capa.lc_opc */ +enum { + CAPA_OPC_BODY_WRITE = 1<<0, /* write object data */ + CAPA_OPC_BODY_READ = 1<<1, /* read object data */ + CAPA_OPC_INDEX_LOOKUP = 1<<2, /* lookup object fid */ + CAPA_OPC_INDEX_INSERT = 1<<3, /* insert object fid */ + CAPA_OPC_INDEX_DELETE = 1<<4, /* delete object fid */ + CAPA_OPC_OSS_WRITE = 1<<5, /* write oss object data */ + CAPA_OPC_OSS_READ = 1<<6, /* read oss object data */ + CAPA_OPC_OSS_TRUNC = 1<<7, /* truncate oss object */ + CAPA_OPC_META_WRITE = 1<<8, /* write object meta data */ + CAPA_OPC_META_READ = 1<<9, /* read object meta data */ + +}; + +#define CAPA_OPC_OSS_RW (CAPA_OPC_OSS_READ | CAPA_OPC_OSS_WRITE) +#define CAPA_OPC_MDS_ONLY \ + (CAPA_OPC_BODY_WRITE | CAPA_OPC_BODY_READ | CAPA_OPC_INDEX_LOOKUP | \ + CAPA_OPC_INDEX_INSERT | CAPA_OPC_INDEX_DELETE) +#define CAPA_OPC_OSS_ONLY \ + (CAPA_OPC_OSS_WRITE | CAPA_OPC_OSS_READ | CAPA_OPC_OSS_TRUNC) +#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY +#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY) + +/* MDS capability covers object capability for operations of body r/w + * (dir readpage/sendpage), index lookup/insert/delete and meta data r/w, + * while OSS capability only covers object capability for operations of + * oss data(file content) r/w/truncate. + */ +static inline int capa_for_mds(struct lustre_capa *c) +{ + return (c->lc_opc & CAPA_OPC_INDEX_LOOKUP) != 0; +} + +static inline int capa_for_oss(struct lustre_capa *c) +{ + return (c->lc_opc & CAPA_OPC_INDEX_LOOKUP) == 0; +} + +/* lustre_capa.lc_hmac_alg */ +enum { + CAPA_HMAC_ALG_SHA1 = 1, /* sha1 algorithm */ + CAPA_HMAC_ALG_MAX, +}; + +#define CAPA_FL_MASK 0x00ffffff +#define CAPA_HMAC_ALG_MASK 0xff000000 + +struct lustre_capa_key { + __u64 lk_mdsid; /* mds# */ + __u32 lk_keyid; /* key# */ + __u32 lk_padding; + __u8 lk_key[CAPA_HMAC_KEY_MAX_LEN]; /* key */ +} __attribute__((packed)); + +extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k); + +/* quota check function */ +#define QUOTA_RET_OK 0 /* return successfully */ +#define QUOTA_RET_NOQUOTA 1 /* not support quota */ +#define QUOTA_RET_NOLIMIT 2 /* quota limit isn't set */ +#define QUOTA_RET_ACQUOTA 3 /* need to acquire extra quota */ #endif