#define PTL_RPC_MSG_REPLY 4713
/* DON'T use swabbed values of MAGIC as magic! */
-#define LUSTRE_MSG_MAGIC_V1 0x0BD00BD0
#define LUSTRE_MSG_MAGIC_V2 0x0BD00BD3
-
-#define LUSTRE_MSG_MAGIC_V1_SWABBED 0xD00BD00B
#define LUSTRE_MSG_MAGIC_V2_SWABBED 0xD30BD00B
#define LUSTRE_MSG_MAGIC LUSTRE_MSG_MAGIC_V2
#define LUSTRE_LOG_VERSION 0x00050000
#define LUSTRE_MGS_VERSION 0x00060000
-/* TODO: All obd_* typedefs will be removed in last patch in series */
-typedef __u64 obd_id;
-typedef __u64 obd_seq;
-typedef __s64 obd_time;
-typedef __u64 obd_size;
-typedef __u64 obd_off;
-typedef __u64 obd_blocks;
-typedef __u64 obd_valid;
-typedef __u32 obd_blksize;
-typedef __u32 obd_mode;
-typedef __u32 obd_uid;
-typedef __u32 obd_gid;
-typedef __u32 obd_flag;
-typedef __u32 obd_count;
-
/**
* Describes a range of sequence, lsr_start is included but lsr_end is
* not in the range.
*/
enum lma_compat {
LMAC_HSM = 0x00000001,
- LMAC_SOM = 0x00000002,
+/* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */
LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */
LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
* under /O/<seq>/d<x>. */
extern void lustre_lma_init(struct lustre_mdt_attrs *lma,
const struct lu_fid *fid,
__u32 compat, __u32 incompat);
-/**
- * SOM on-disk attributes stored in a separate xattr.
- */
-struct som_attrs {
- /** Bitfield for supported data in this structure. For future use. */
- __u32 som_compat;
-
- /** Incompat feature list. The supported feature mask is availabe in
- * SOM_INCOMPAT_SUPP */
- __u32 som_incompat;
-
- /** IO Epoch SOM attributes belongs to */
- __u64 som_ioepoch;
- /** total file size in objects */
- __u64 som_size;
- /** total fs blocks in objects */
- __u64 som_blocks;
- /** mds mount id the size is valid for */
- __u64 som_mountid;
-};
-extern void lustre_som_swab(struct som_attrs *attrs);
-#define SOM_INCOMPAT_SUPP 0x0
+/* copytool uses a 32b bitmask field to encode archive-Ids during register
+ * with MDT thru kuc.
+ * archive num = 0 => all
+ * archive num from 1 to 32
+ */
+#define LL_HSM_MAX_ARCHIVE (sizeof(__u32) * 8)
/**
* HSM on-disk attributes stored in a separate xattr.
FID_SEQ_OST_MDT0 = 0,
FID_SEQ_LLOG = 1, /* unnamed llogs */
FID_SEQ_ECHO = 2,
- FID_SEQ_OST_MDT1 = 3,
- FID_SEQ_OST_MAX = 9, /* Max MDT count before OST_on_FID */
+ FID_SEQ_UNUSED_START = 3,
+ FID_SEQ_UNUSED_END = 9,
FID_SEQ_LLOG_NAME = 10, /* named llogs */
FID_SEQ_RSVD = 11,
FID_SEQ_IGIF = 12,
FID_SEQ_QUOTA_GLB = 0x200000006ULL,
FID_SEQ_ROOT = 0x200000007ULL, /* Located on MDT0 */
FID_SEQ_LAYOUT_RBTREE = 0x200000008ULL,
+ /* sequence is used for update logs of cross-MDT operation */
+ FID_SEQ_UPDATE_LOG = 0x200000009ULL,
+ /* Sequence is used for the directory under which update logs
+ * are created. */
+ FID_SEQ_UPDATE_LOG_DIR = 0x20000000aULL,
FID_SEQ_NORMAL = 0x200000400ULL,
FID_SEQ_LOV_DEFAULT = 0xffffffffffffffffULL
};
FID_OID_DOT_LUSTRE_LPF = 3UL,
};
+/** OID for FID_SEQ_ROOT */
+enum root_oid {
+ FID_OID_ROOT = 1UL,
+ FID_OID_ECHO_ROOT = 2UL,
+};
+
static inline bool fid_seq_is_mdt0(__u64 seq)
{
return seq == FID_SEQ_OST_MDT0;
static inline void lu_root_fid(struct lu_fid *fid)
{
fid->f_seq = FID_SEQ_ROOT;
- fid->f_oid = 1;
+ fid->f_oid = FID_OID_ROOT;
+ fid->f_ver = 0;
+}
+
+static inline void lu_echo_root_fid(struct lu_fid *fid)
+{
+ fid->f_seq = FID_SEQ_ROOT;
+ fid->f_oid = FID_OID_ECHO_ROOT;
+ fid->f_ver = 0;
+}
+
+static inline void lu_update_log_fid(struct lu_fid *fid, __u32 index)
+{
+ fid->f_seq = FID_SEQ_UPDATE_LOG;
+ fid->f_oid = index;
+ fid->f_ver = 0;
+}
+
+static inline void lu_update_log_dir_fid(struct lu_fid *fid, __u32 index)
+{
+ fid->f_seq = FID_SEQ_UPDATE_LOG_DIR;
+ fid->f_oid = index;
fid->f_ver = 0;
}
/**
* Check if a fid is igif or not.
* \param fid the fid to be tested.
- * \return true if the fid is a igif; otherwise false.
+ * \return true if the fid is an igif; otherwise false.
*/
static inline bool fid_seq_is_igif(__u64 seq)
{
/**
* Check if a fid is idif or not.
* \param fid the fid to be tested.
- * \return true if the fid is a idif; otherwise false.
+ * \return true if the fid is an idif; otherwise false.
*/
static inline bool fid_seq_is_idif(__u64 seq)
{
return fid_seq(fid) == FID_SEQ_LAYOUT_RBTREE;
}
+static inline bool fid_seq_is_update_log(__u64 seq)
+{
+ return seq == FID_SEQ_UPDATE_LOG;
+}
+
+static inline bool fid_is_update_log(const struct lu_fid *fid)
+{
+ return fid_seq_is_update_log(fid_seq(fid));
+}
+
+static inline bool fid_seq_is_update_log_dir(__u64 seq)
+{
+ return seq == FID_SEQ_UPDATE_LOG_DIR;
+}
+
+static inline bool fid_is_update_log_dir(const struct lu_fid *fid)
+{
+ return fid_seq_is_update_log_dir(fid_seq(fid));
+}
+
/* convert an OST objid into an IDIF FID SEQ number */
static inline __u64 fid_idif_seq(__u64 id, __u32 ost_idx)
{
/* Check whether the fid is for LAST_ID */
static inline bool fid_is_last_id(const struct lu_fid *fid)
{
- return fid_oid(fid) == 0;
+ return fid_oid(fid) == 0 && fid_seq(fid) != FID_SEQ_UPDATE_LOG &&
+ fid_seq(fid) != FID_SEQ_UPDATE_LOG_DIR;
}
/**
- * Get inode number from a igif.
- * \param fid a igif to get inode number from.
+ * Get inode number from an igif.
+ * \param fid an igif to get inode number from.
* \return inode number for the igif.
*/
static inline ino_t lu_igif_ino(const struct lu_fid *fid)
extern void lustre_swab_ost_id(struct ost_id *oid);
/**
- * Get inode generation from a igif.
- * \param fid a igif to get inode generation from.
+ * Get inode generation from an igif.
+ * \param fid an igif to get inode generation from.
* \return inode generation for the igif.
*/
static inline __u32 lu_igif_gen(const struct lu_fid *fid)
};
void lustre_swab_orphan_ent(struct lu_orphan_ent *ent);
+struct update_ops;
+void lustre_swab_update_ops(struct update_ops *uops, unsigned int op_count);
+
/** @} lu_fid */
/** \defgroup lu_dir lu_dir
LUDA_TYPE = 0x0002,
LUDA_64BITHASH = 0x0004,
- /* The following attrs are used for MDT interanl only,
+ /* The following attrs are used for MDT internal only,
* not visible to client */
/* Verify the dirent consistency */
__u32 pb_version;
__u32 pb_opc;
__u32 pb_status;
- __u64 pb_last_xid;
- __u64 pb_last_seen;
+ __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
+ __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */
+ __u16 pb_padding0;
+ __u32 pb_padding1;
__u64 pb_last_committed;
__u64 pb_transno;
__u32 pb_flags;
__u32 pb_version;
__u32 pb_opc;
__u32 pb_status;
- __u64 pb_last_xid;
- __u64 pb_last_seen;
+ __u64 pb_last_xid; /* highest replied XID without lower unreplied XID */
+ __u16 pb_tag; /* virtual slot idx for multiple modifying RPCs */
+ __u16 pb_padding0;
+ __u32 pb_padding1;
__u64 pb_last_committed;
__u64 pb_transno;
__u32 pb_flags;
name in request */
#define OBD_CONNECT_LFSCK 0x40000000000000ULL/* support online LFSCK */
#define OBD_CONNECT_UNLINK_CLOSE 0x100000000000000ULL/* close file in unlink */
+#define OBD_CONNECT_MULTIMODRPCS 0x200000000000000ULL /* support multiple modify
+ RPCs in parallel */
#define OBD_CONNECT_DIR_STRIPE 0x400000000000000ULL /* striped DNE dir */
/* XXX README XXX:
OBD_CONNECT_OSS_CAPA | OBD_CONNECT_MDS_MDS | \
OBD_CONNECT_FID | LRU_RESIZE_CONNECT_FLAG | \
OBD_CONNECT_VBR | OBD_CONNECT_LOV_V3 | \
- OBD_CONNECT_SOM | OBD_CONNECT_FULL20 | \
+ OBD_CONNECT_FULL20 | \
OBD_CONNECT_64BITHASH | OBD_CONNECT_JOBSTATS | \
OBD_CONNECT_EINPROGRESS | \
OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_UMASK | \
*
* If we eventually have separate connect data for different types, which we
* almost certainly will, then perhaps we stick a union in here. */
-struct obd_connect_data_v1 {
- __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
- __u32 ocd_version; /* lustre release version number */
- __u32 ocd_grant; /* initial cache grant amount (bytes) */
- __u32 ocd_index; /* LOV index to connect to */
- __u32 ocd_brw_size; /* Maximum BRW size in bytes, must be 2^n */
- __u64 ocd_ibits_known; /* inode bits this client understands */
- __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */
- __u8 ocd_inodespace; /* log2 of the per-inode space consumption */
- __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */
- __u32 ocd_unused; /* also fix lustre_swab_connect */
- __u64 ocd_transno; /* first transno from client to be replayed */
- __u32 ocd_group; /* MDS group on OST */
- __u32 ocd_cksum_types; /* supported checksum algorithms */
- __u32 ocd_max_easize; /* How big LOV EA can be on MDS */
- __u32 ocd_instance; /* also fix lustre_swab_connect */
- __u64 ocd_maxbytes; /* Maximum stripe size in bytes */
-};
-
struct obd_connect_data {
__u64 ocd_connect_flags; /* OBD_CONNECT_* per above */
__u32 ocd_version; /* lustre release version number */
* if the corresponding flag in ocd_connect_flags is set. Accessing
* any field after ocd_maxbytes on the receiver without a valid flag
* may result in out-of-bound memory access and kernel oops. */
- __u64 padding1; /* added 2.1.0. also fix lustre_swab_connect */
+ __u16 ocd_maxmodrpcs; /* Maximum modify RPCs in parallel */
+ __u16 padding0; /* added 2.1.0. also fix lustre_swab_connect */
+ __u32 padding1; /* added 2.1.0. also fix lustre_swab_connect */
__u64 padding2; /* added 2.1.0. also fix lustre_swab_connect */
__u64 padding3; /* added 2.1.0. also fix lustre_swab_connect */
__u64 padding4; /* added 2.1.0. also fix lustre_swab_connect */
OST_STATFS = 13,
OST_SYNC = 16,
OST_SET_INFO = 17,
- OST_QUOTACHECK = 18,
+ OST_QUOTACHECK = 18, /* not used since 2.4 */
OST_QUOTACTL = 19,
OST_QUOTA_ADJUST_QUNIT = 20, /* not used since 2.4 */
OST_LAST_OPC
#define XATTR_NAME_SOM "trusted.som"
#define XATTR_NAME_HSM "trusted.hsm"
#define XATTR_NAME_LFSCK_BITMAP "trusted.lfsck_bitmap"
+#define XATTR_NAME_DUMMY "trusted.dummy"
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 8, 53, 0)
# define XATTR_NAME_LFSCK_NAMESPACE_OLD "trusted.lfsck_namespace"
#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */
#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */
/*#define OBD_MD_FLOSCOPQ (0x00400000ULL) osc opaque data, never used */
-#define OBD_MD_FLCOOKIE (0x00800000ULL) /* log cancellation cookie */
+/* OBD_MD_FLCOOKIE (0x00800000ULL) obsolete in 2.8 */
#define OBD_MD_FLGROUP (0x01000000ULL) /* group */
#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */
#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write with ioepoch */
#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */
#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
-#define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */
+#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
+ executed */
#define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */
MDS_PIN = 42, /* obsolete, never used in a release */
MDS_UNPIN = 43, /* obsolete, never used in a release */
MDS_SYNC = 44,
- MDS_DONE_WRITING = 45,
+ MDS_DONE_WRITING = 45, /* obsolete since 2.8.0 */
MDS_SET_INFO = 46,
- MDS_QUOTACHECK = 47,
+ MDS_QUOTACHECK = 47, /* not used since 2.4 */
MDS_QUOTACTL = 48,
MDS_GETXATTR = 49,
MDS_SETXATTR = 50, /* obsolete, now it's MDS_REINT op */
#define MDS_STATUS_CONN 1
#define MDS_STATUS_LOV 2
-/* mdt_thread_info.mti_flags. */
-enum md_op_flags {
- /* The flag indicates Size-on-MDS attributes are changed. */
- MF_SOM_CHANGE = (1 << 0),
- /* Flags indicates an epoch opens or closes. */
- MF_EPOCH_OPEN = (1 << 1),
- MF_EPOCH_CLOSE = (1 << 2),
- MF_MDC_CANCEL_FID1 = (1 << 3),
- MF_MDC_CANCEL_FID2 = (1 << 4),
- MF_MDC_CANCEL_FID3 = (1 << 5),
- MF_MDC_CANCEL_FID4 = (1 << 6),
- /* There is a pending attribute update. */
- MF_SOM_AU = (1 << 7),
- /* Cancel OST locks while getattr OST attributes. */
- MF_GETATTR_LOCK = (1 << 8),
- MF_GET_MDT_IDX = (1 << 9),
-};
-
-#define MF_SOM_LOCAL_FLAGS (MF_SOM_CHANGE | MF_EPOCH_OPEN | MF_EPOCH_CLOSE)
-
#define LUSTRE_BFLAG_UNCOMMITTED_WRITES 0x1
/* these should be identical to their EXT4_*_FL counterparts, they are
__u32 mbo_eadatasize;
__u32 mbo_aclsize;
__u32 mbo_max_mdsize;
- __u32 mbo_max_cookiesize;
+ __u32 mbo_unused3; /* was max_cookiesize until 2.8 */
__u32 mbo_uid_h; /* high 32-bits of uid, for FUID */
__u32 mbo_gid_h; /* high 32-bits of gid, for FUID */
__u32 mbo_padding_5; /* also fix lustre_swab_mdt_body */
extern void lustre_swab_mdt_body (struct mdt_body *b);
struct mdt_ioepoch {
- struct lustre_handle handle;
- __u64 ioepoch;
- __u32 flags;
- __u32 padding;
+ struct lustre_handle mio_handle;
+ __u64 mio_unused1; /* was ioepoch */
+ __u32 mio_unused2; /* was flags */
+ __u32 mio_padding;
};
extern void lustre_swab_mdt_ioepoch (struct mdt_ioepoch *b);
#define MDS_FMODE_CLOSED 00000000
#define MDS_FMODE_EXEC 00000004
-/* IO Epoch is opened on a closed file. */
-#define MDS_FMODE_EPOCH 01000000
-/* IO Epoch is opened on a file truncate. */
-#define MDS_FMODE_TRUNC 02000000
-/* Size-on-MDS Attribute Update is pending. */
-#define MDS_FMODE_SOM 04000000
+/* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */
+/* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */
+/* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */
#define MDS_OPEN_CREATED 00000010
#define MDS_OPEN_CROSS 00000020
MDS_CROSS_REF = 1 << 1,
MDS_VTX_BYPASS = 1 << 2,
MDS_PERM_BYPASS = 1 << 3,
- MDS_SOM = 1 << 4,
+/* MDS_SOM = 1 << 4, obsolete since 2.8.0 */
MDS_QUOTA_IGNORE = 1 << 5,
/* Was MDS_CLOSE_CLEANUP (1 << 6), No more used */
MDS_KEEP_ORPHAN = 1 << 7,
MDS_OWNEROVERRIDE = 1 << 11,
MDS_HSM_RELEASE = 1 << 12,
MDS_RENAME_MIGRATE = 1 << 13,
+ MDS_CLOSE_LAYOUT_SWAP = 1 << 14,
};
/* instance of mdt_reint_rec */
typedef enum {
OBD_PING = 400,
OBD_LOG_CANCEL,
- OBD_QC_CALLBACK,
+ OBD_QC_CALLBACK, /* not used since 2.4 */
OBD_IDX_READ,
OBD_LAST_OPC
} obd_cmd_t;
/* for multiple changelog consumers */
LLOG_CHANGELOG_USER_ORIG_CTXT = 14,
LLOG_AGENT_ORIG_CTXT = 15, /**< agent requests generation on cdt */
+ LLOG_UPDATELOG_ORIG_CTXT = 16, /* update log */
+ LLOG_UPDATELOG_REPL_CTXT = 17, /* update log */
LLOG_MAX_CTXTS
};
CHANGELOG_REC = LLOG_OP_MAGIC | 0x60000,
CHANGELOG_USER_REC = LLOG_OP_MAGIC | 0x70000,
HSM_AGENT_REC = LLOG_OP_MAGIC | 0x80000,
+ UPDATE_REC = LLOG_OP_MAGIC | 0xa0000,
LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539,
LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b,
} llog_op_type;
#define CHANGELOG_MINMASK (1 << CL_MARK)
/** bits covering all \a changelog_rec_type's */
#define CHANGELOG_ALLMASK 0XFFFFFFFF
-/** default \a changelog_rec_type mask */
-#define CHANGELOG_DEFMASK CHANGELOG_ALLMASK & ~(1 << CL_ATIME | 1 << CL_CLOSE)
+/** default \a changelog_rec_type mask. Allow all of them, except
+ * CL_ATIME since it can really be time consuming, and not necessary
+ * under normal use. */
+#define CHANGELOG_DEFMASK (CHANGELOG_ALLMASK & ~(1 << CL_ATIME))
/* changelog llog name, needed by client replicators */
#define CHANGELOG_CATALOG "changelog_catalog"
struct lustre_handle o_handle; /* brw: lock handle to prolong
* locks */
struct llog_cookie o_lcookie; /* destroy: unlink cookie from
- * MDS */
+ * MDS, obsolete in 2.8, reused
+ * in OSP */
__u32 o_uid_h;
__u32 o_gid_h;
/* Key for FIEMAP to be used in get_info calls */
struct ll_fiemap_info_key {
- char name[8];
- struct obdo oa;
- struct ll_user_fiemap fiemap;
+ char lfik_name[8];
+ struct obdo lfik_oa;
+ struct fiemap lfik_fiemap;
};
extern void lustre_swab_ost_body (struct ost_body *b);
extern void lustre_swab_ost_last_id(__u64 *id);
-extern void lustre_swab_fiemap(struct ll_user_fiemap *fiemap);
+extern void lustre_swab_fiemap(struct fiemap *fiemap);
extern void lustre_swab_lov_user_md_v1(struct lov_user_md_v1 *lum);
extern void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
*/
/**
- * Type of each update
+ * Type of each update, if adding/deleting update, please also update
+ * update_opcode in lustre/target/out_lib.c.
*/
enum update_type {
+ OUT_START = 0,
OUT_CREATE = 1,
OUT_DESTROY = 2,
OUT_REF_ADD = 3,
OUT_INDEX_DELETE = 11,
OUT_WRITE = 12,
OUT_XATTR_DEL = 13,
+ OUT_PUNCH = 14,
+ OUT_READ = 15,
OUT_LAST
};
void lustre_swab_object_update_request(struct object_update_request *our);
static inline size_t
-object_update_size(const struct object_update *update)
+object_update_params_size(const struct object_update *update)
{
- const struct object_update_param *param;
- size_t size;
- unsigned int i;
+ const struct object_update_param *param;
+ size_t total_size = 0;
+ unsigned int i;
- size = offsetof(struct object_update, ou_params[0]);
+ param = &update->ou_params[0];
for (i = 0; i < update->ou_params_count; i++) {
- param = (struct object_update_param *)((char *)update + size);
- size += object_update_param_size(param);
+ size_t size = object_update_param_size(param);
+
+ param = (struct object_update_param *)((char *)param + size);
+ total_size += size;
}
- return size;
+ return total_size;
+}
+
+static inline size_t
+object_update_size(const struct object_update *update)
+{
+ return offsetof(struct object_update, ou_params[0]) +
+ object_update_params_size(update);
}
static inline struct object_update *
return ptr;
}
+/* read update result */
+struct out_read_reply {
+ __u32 orr_size;
+ __u32 orr_padding;
+ __u64 orr_offset;
+ char orr_data[0];
+};
+
+static inline void orr_cpu_to_le(struct out_read_reply *orr_dst,
+ const struct out_read_reply *orr_src)
+{
+ orr_dst->orr_size = cpu_to_le32(orr_src->orr_size);
+ orr_dst->orr_padding = cpu_to_le32(orr_src->orr_padding);
+ orr_dst->orr_offset = cpu_to_le64(orr_dst->orr_offset);
+}
+
+static inline void orr_le_to_cpu(struct out_read_reply *orr_dst,
+ const struct out_read_reply *orr_src)
+{
+ orr_dst->orr_size = le32_to_cpu(orr_src->orr_size);
+ orr_dst->orr_padding = le32_to_cpu(orr_src->orr_padding);
+ orr_dst->orr_offset = le64_to_cpu(orr_dst->orr_offset);
+}
+
/** layout swap request structure
* fid1 and fid2 are in mdt_body
*/