-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
- */
-/*
- * Copyright (c) 2011 Whamcloud, Inc.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
r1->lsr_flags != r2->lsr_flags;
}
-#define DRANGE "[%#16.16"LPF64"x-%#16.16"LPF64"x):%x:%x"
+#define DRANGE "[%#16.16"LPF64"x-%#16.16"LPF64"x):%x:%s"
#define PRANGE(range) \
- (range)->lsr_start, \
- (range)->lsr_end, \
- (range)->lsr_index, \
- (range)->lsr_flags
+ (range)->lsr_start, \
+ (range)->lsr_end, \
+ (range)->lsr_index, \
+ (range)->lsr_flags == LU_SEQ_RANGE_MDT ? "mdt" : "ost"
+
/** \defgroup lu_fid lu_fid
* @{ */
FID_SEQ_START = 0x200000000ULL,
FID_SEQ_LOCAL_FILE = 0x200000001ULL,
FID_SEQ_DOT_LUSTRE = 0x200000002ULL,
+ /* XXX 0x200000003ULL is reserved for FID_SEQ_LLOG_OBJ */
+ FID_SEQ_SPECIAL = 0x200000004ULL,
+ FID_SEQ_QUOTA = 0x200000005ULL,
+ FID_SEQ_QUOTA_GLB = 0x200000006ULL,
FID_SEQ_NORMAL = 0x200000400ULL,
FID_SEQ_LOV_DEFAULT= 0xffffffffffffffffULL
};
#define IDIF_MAX_OID (1ULL << IDIF_OID_MAX_BITS)
#define IDIF_OID_MASK ((1ULL << IDIF_OID_MAX_BITS) - 1)
+/** OID for FID_SEQ_SPECIAL */
+enum special_oid {
+ /* Big Filesystem Lock to serialize rename operations */
+ FID_OID_SPECIAL_BFL = 1UL,
+};
+
+/** OID for FID_SEQ_DOT_LUSTRE */
+enum dot_lustre_oid {
+ FID_OID_DOT_LUSTRE = 1UL,
+ FID_OID_DOT_LUSTRE_OBF = 2UL,
+};
static inline int fid_seq_is_mdt0(obd_seq seq)
{
static inline int fid_seq_is_rsvd(const __u64 seq)
{
- return seq <= FID_SEQ_RSVD;
+ return (seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD);
};
static inline int fid_is_mdt0(const struct lu_fid *fid)
}
/* pack an IDIF FID into an ostid (id/seq) for the wire/disk */
-static inline void ostid_idif_pack(struct lu_fid *fid, struct ost_id *ostid)
+static inline void ostid_idif_pack(const struct lu_fid *fid,
+ struct ost_id *ostid)
{
ostid->oi_seq = FID_SEQ_OST_MDT0;
ostid->oi_id = fid_idif_id(fid->f_seq, fid->f_oid, fid->f_ver);
}
/* pack a non-IDIF FID into an ostid (id/seq) for the wire/disk */
-static inline void ostid_fid_pack(struct lu_fid *fid, struct ost_id *ostid)
+static inline void ostid_fid_pack(const struct lu_fid *fid,
+ struct ost_id *ostid)
{
ostid->oi_seq = fid_seq(fid);
ostid->oi_id = fid_ver_oid(fid);
}
/* pack any OST FID into an ostid (id/seq) for the wire/disk */
-static inline int fid_ostid_pack(struct lu_fid *fid, struct ost_id *ostid)
+static inline int fid_ostid_pack(const struct lu_fid *fid,
+ struct ost_id *ostid)
{
if (unlikely(fid_seq_is_igif(fid->f_seq))) {
CERROR("bad IGIF, "DFID"\n", PFID(fid));
fid != NULL &&
((fid_seq(fid) >= FID_SEQ_START && fid_oid(fid) != 0
&& fid_ver(fid) == 0) ||
- fid_is_igif(fid));
+ fid_is_igif(fid) || fid_seq_is_rsvd(fid_seq(fid)));
}
static inline int fid_is_zero(const struct lu_fid *fid)
/* Check that there is no alignment padding. */
CLASSERT(sizeof *f0 ==
sizeof f0->f_seq + sizeof f0->f_oid + sizeof f0->f_ver);
- LASSERTF(fid_is_igif(f0) || fid_ver(f0) == 0, DFID, PFID(f0));
- LASSERTF(fid_is_igif(f1) || fid_ver(f1) == 0, DFID, PFID(f1));
+ LASSERTF((fid_is_igif(f0) || fid_is_idif(f0)) ||
+ fid_ver(f0) == 0, DFID, PFID(f0));
+ LASSERTF((fid_is_igif(f1) || fid_is_idif(f1)) ||
+ fid_ver(f1) == 0, DFID, PFID(f1));
return memcmp(f0, f1, sizeof *f0) == 0;
}
/* without gss, ptlrpc_body is put at the first buffer. */
#define PTLRPC_NUM_VERSIONS 4
-struct ptlrpc_body {
+#define JOBSTATS_JOBID_SIZE 32 /* 32 bytes string */
+struct ptlrpc_body_v3 {
+ struct lustre_handle pb_handle;
+ __u32 pb_type;
+ __u32 pb_version;
+ __u32 pb_opc;
+ __u32 pb_status;
+ __u64 pb_last_xid;
+ __u64 pb_last_seen;
+ __u64 pb_last_committed;
+ __u64 pb_transno;
+ __u32 pb_flags;
+ __u32 pb_op_flags;
+ __u32 pb_conn_cnt;
+ __u32 pb_timeout; /* for req, the deadline, for rep, the service est */
+ __u32 pb_service_time; /* for rep, actual service time */
+ __u32 pb_limit;
+ __u64 pb_slv;
+ /* VBR: pre-versions */
+ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+ /* padding for future needs */
+ __u64 pb_padding[4];
+ char pb_jobid[JOBSTATS_JOBID_SIZE];
+};
+#define ptlrpc_body ptlrpc_body_v3
+
+struct ptlrpc_body_v2 {
struct lustre_handle pb_handle;
__u32 pb_type;
__u32 pb_version;
__u32 pb_op_flags;
__u32 pb_conn_cnt;
__u32 pb_timeout; /* for req, the deadline, for rep, the service est */
- __u32 pb_service_time; /* for rep, actual service time */
+ __u32 pb_service_time; /* for rep, actual service time, also used for
+ net_latency of req */
__u32 pb_limit;
__u64 pb_slv;
/* VBR: pre-versions */
#define OBD_CONNECT_SKIP_ORPHAN 0x400000000ULL /* don't reuse orphan objids */
#define OBD_CONNECT_MAX_EASIZE 0x800000000ULL /* preserved for large EA */
#define OBD_CONNECT_FULL20 0x1000000000ULL /* it is 2.0 client */
-#define OBD_CONNECT_LAYOUTLOCK 0x2000000000ULL /* client supports layout lock */
+#define OBD_CONNECT_LAYOUTLOCK 0x2000000000ULL /* client uses layout lock */
#define OBD_CONNECT_64BITHASH 0x4000000000ULL /* client supports 64-bits
* directory hash */
#define OBD_CONNECT_MAXBYTES 0x8000000000ULL /* max stripe size */
#define OBD_CONNECT_IMP_RECOV 0x10000000000ULL /* imp recovery support */
+#define OBD_CONNECT_JOBSTATS 0x20000000000ULL /* jobid in ptlrpc_body */
+#define OBD_CONNECT_UMASK 0x40000000000ULL /* create uses client umask */
+#define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS
+ * write RPC error properly */
+#define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for
+ * finer space reservation */
+#define OBD_CONNECT_NANOSECOND_TIMES 0x200000000000ULL /* nanosec resolution
+ * timestamps supported
+ */
+#define OBD_CONNECT_LVB_TYPE 0x400000000000ULL /* variable type of LVB */
+
+/* XXX README XXX:
+ * Please DO NOT add flag values here before first ensuring that this same
+ * flag value is not in use on some other branch. Please clear any such
+ * changes with senior engineers before starting to use a new flag. Then,
+ * submit a small patch against EVERY branch that ONLY adds the new flag
+ * and updates obd_connect_names[] for lprocfs_rd_connect_flags(), so it
+ * can be approved and landed easily to reserve the flag for future use. */
#define OCD_HAS_FLAG(ocd, flg) \
(!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
-/* also update obd_connect_names[] for lprocfs_rd_connect_flags()
- * and lustre/utils/wirecheck.c */
#ifdef HAVE_LRU_RESIZE_SUPPORT
#define LRU_RESIZE_CONNECT_FLAG OBD_CONNECT_LRU_RESIZE
OBD_CONNECT_FID | LRU_RESIZE_CONNECT_FLAG | \
OBD_CONNECT_VBR | OBD_CONNECT_LOV_V3 | \
OBD_CONNECT_SOM | OBD_CONNECT_FULL20 | \
- OBD_CONNECT_64BITHASH)
+ OBD_CONNECT_64BITHASH | \
+ OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \
OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CKSUM | \
OBD_CONNECT_CHANGE_QS | \
- OBD_CONNECT_OSS_CAPA | OBD_CONNECT_RMT_CLIENT | \
+ OBD_CONNECT_OSS_CAPA | \
+ OBD_CONNECT_RMT_CLIENT | \
OBD_CONNECT_RMT_CLIENT_FORCE | OBD_CONNECT_VBR | \
OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \
OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_FULL20 | \
- OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES)
+ OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | \
+ OBD_CONNECT_MAX_EASIZE | \
+ OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS)
#define ECHO_CONNECT_SUPPORTED (0)
#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV)
__u32 ocd_index; /* LOV index to connect to */
__u32 ocd_brw_size; /* Maximum BRW size in bytes */
__u64 ocd_ibits_known; /* inode bits this client understands */
- __u32 ocd_nllu; /* non-local-lustre-user */
- __u32 ocd_nllg; /* non-local-lustre-group */
+ __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */
+ __u8 ocd_inodespace; /* log2 of the per-inode space consumption */
+ __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */
+ __u32 ocd_unused; /* also fix lustre_swab_connect */
__u64 ocd_transno; /* first transno from client to be replayed */
__u32 ocd_group; /* MDS group on OST */
__u32 ocd_cksum_types; /* supported checksum algorithms */
__u32 ocd_index; /* LOV index to connect to */
__u32 ocd_brw_size; /* Maximum BRW size in bytes */
__u64 ocd_ibits_known; /* inode bits this client understands */
- __u32 ocd_nllu; /* non-local-lustre-user */
- __u32 ocd_nllg; /* non-local-lustre-group */
+ __u8 ocd_blocksize; /* log2 of the backend filesystem blocksize */
+ __u8 ocd_inodespace; /* log2 of the per-inode space consumption */
+ __u16 ocd_grant_extent; /* per-extent grant overhead, in 1K blocks */
+ __u32 ocd_unused; /* also fix lustre_swab_connect */
__u64 ocd_transno; /* first transno from client to be replayed */
__u32 ocd_group; /* MDS group on OST */
__u32 ocd_cksum_types; /* supported checksum algorithms */
__u64 paddingE; /* added 2.1.0. also fix lustre_swab_connect */
__u64 paddingF; /* added 2.1.0. also fix lustre_swab_connect */
};
+/* XXX README XXX:
+ * Please DO NOT use any fields here before first ensuring that this same
+ * field is not in use on some other branch. Please clear any such changes
+ * with senior engineers before starting to use a new field. Then, submit
+ * a small patch against EVERY branch that ONLY adds the new field along with
+ * the matching OBD_CONNECT flag, so that can be approved and landed easily to
+ * reserve the flag for future use. */
extern void lustre_swab_connect(struct obd_connect_data *ocd);
OBD_FL_CKSUM_RSVD2 = 0x00008000, /* for future cksum types */
OBD_FL_CKSUM_RSVD3 = 0x00010000, /* for future cksum types */
OBD_FL_SHRINK_GRANT = 0x00020000, /* object shrink the grant */
- OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client */
+ OBD_FL_MMAP = 0x00040000, /* object is mmapped on the client.
+ * XXX: obsoleted - reserved for old
+ * clients prior than 2.2 */
OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */
OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */
__u64 lmm_object_id; /* LOV object ID */
__u64 lmm_object_seq; /* LOV object seq number */
__u32 lmm_stripe_size; /* size of stripe in bytes */
- __u32 lmm_stripe_count; /* num stripes in use for this object */
+ /* lmm_stripe_count used to be __u32 */
+ __u16 lmm_stripe_count; /* num stripes in use for this object */
+ __u16 lmm_layout_gen; /* layout generation number */
struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
};
#define XATTR_NAME_LMA "trusted.lma"
#define XATTR_NAME_LMV "trusted.lmv"
#define XATTR_NAME_LINK "trusted.link"
+#define XATTR_NAME_FID "trusted.fid"
+#define XATTR_NAME_VERSION "trusted.version"
struct lov_mds_md_v3 { /* LOV EA mds/wire data (little-endian) */
__u64 lmm_object_id; /* LOV object ID */
__u64 lmm_object_seq; /* LOV object seq number */
__u32 lmm_stripe_size; /* size of stripe in bytes */
- __u32 lmm_stripe_count; /* num stripes in use for this object */
+ /* lmm_stripe_count used to be __u32 */
+ __u16 lmm_stripe_count; /* num stripes in use for this object */
+ __u16 lmm_layout_gen; /* layout generation number */
char lmm_pool_name[LOV_MAXPOOLNAME]; /* must be 32bit aligned */
struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
};
#define OBD_MD_FLCROSSREF (0x0000100000000000ULL) /* Cross-ref case */
#define OBD_MD_FLGETATTRLOCK (0x0000200000000000ULL) /* Get IOEpoch attributes
* under lock */
+#define OBD_MD_FLOBJCOUNT (0x0000400000000000ULL) /* for multiple destroy */
#define OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) /* lfs lsetfacl case */
#define OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) /* lfs lgetfacl case */
#define OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) /* lfs rsetfacl case */
#define OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) /* lfs rgetfacl case */
+#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
+
#define OBD_MD_FLGETATTR (OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | \
OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLKSZ | \
OBD_MD_FLMODE | OBD_MD_FLTYPE | OBD_MD_FLUID | \
__u64 lvb_blocks;
};
-extern void lustre_swab_ost_lvb(struct ost_lvb *);
-
/*
* MDS REQ RECORDS
*/
#define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */
#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */
#define MDS_INODELOCK_OPEN 0x000004 /* For opened files */
+#define MDS_INODELOCK_LAYOUT 0x000008 /* for layout */
-/* Do not forget to increase MDS_INODELOCK_MAXSHIFT when adding new bits */
+/* Do not forget to increase MDS_INODELOCK_MAXSHIFT when adding new bits
+ * XXX: MDS_INODELOCK_MAXSHIFT should be increased to 3 once the layout lock is
+ * supported */
#define MDS_INODELOCK_MAXSHIFT 2
/* This FULL lock is useful to take on unlink sort of operations */
#define MDS_INODELOCK_FULL ((1<<(MDS_INODELOCK_MAXSHIFT+1))-1)
extern void lustre_swab_ll_fid (struct ll_fid *fid);
+/* NOTE: until Lustre 1.8.7/2.1.1 the fid_ver() was packed into name[2],
+ * but was moved into name[1] along with the OID to avoid consuming the
+ * name[2,3] fields that need to be used for the quota id (also a FID). */
+enum {
+ LUSTRE_RES_ID_SEQ_OFF = 0,
+ LUSTRE_RES_ID_VER_OID_OFF = 1,
+ LUSTRE_RES_ID_WAS_VER_OFF = 2, /* see note above */
+ LUSTRE_RES_ID_HSH_OFF = 3
+};
+
#define MDS_STATUS_CONN 1
#define MDS_STATUS_LOV 2
#define MDS_OPEN_SYNC 00010000
#define MDS_OPEN_DIRECTORY 00200000
+#define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */
#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */
#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file.
MDS_SOM = 1 << 4,
MDS_QUOTA_IGNORE = 1 << 5,
MDS_CLOSE_CLEANUP = 1 << 6,
- MDS_KEEP_ORPHAN = 1 << 7
+ MDS_KEEP_ORPHAN = 1 << 7,
+ MDS_RECOV_OPEN = 1 << 8,
+ MDS_UNLINK_DESTROY = 1 << 9, /* Destory ost object in mdd_unlink */
};
/* instance of mdt_reint_rec */
* LOV data structures
*/
-#define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
-#define LOV_MIN_STRIPE_SIZE (1<<LOV_MIN_STRIPE_BITS)
-#define LOV_MAX_STRIPE_COUNT 160 /* until bug 4424 is fixed */
-#define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
-
#define LOV_MAX_UUID_BUFFER_SIZE 8192
/* The size of the buffer the lov/mdc reserves for the
* array of UUIDs returned by the MDS. With the current
extern void lustre_swab_ldlm_res_id (struct ldlm_res_id *id);
+static inline int ldlm_res_eq(const struct ldlm_res_id *res0,
+ const struct ldlm_res_id *res1)
+{
+ return !memcmp(res0, res1, sizeof(*res0));
+}
+
/* lock types */
typedef enum {
LCK_MINMODE = 0,
return (ex1->start <= ex2->end) && (ex2->start <= ex1->end);
}
+/* check if @ex1 contains @ex2 */
+static inline int ldlm_extent_contain(struct ldlm_extent *ex1,
+ struct ldlm_extent *ex2)
+{
+ return (ex1->start <= ex2->start) && (ex1->end >= ex2->end);
+}
+
struct ldlm_inodebits {
__u64 bits;
};
extern void lustre_swab_ldlm_policy_data (ldlm_wire_policy_data_t *d);
+/* Similarly to ldlm_wire_policy_data_t, there is one common swabber for all
+ * LVB types. As a result, any new LVB structure must match the fields of the
+ * ost_lvb structure. */
+union ldlm_wire_lvb {
+ struct ost_lvb l_ost;
+};
+
+extern void lustre_swab_lvb(union ldlm_wire_lvb *);
+
struct ldlm_intent {
__u64 opc;
};
/** bits covering all \a changelog_rec_type's */
#define CHANGELOG_ALLMASK 0XFFFFFFFF
/** default \a changelog_rec_type mask */
-#define CHANGELOG_DEFMASK CHANGELOG_ALLMASK & ~(1 << CL_ATIME)
+#define CHANGELOG_DEFMASK CHANGELOG_ALLMASK & ~(1 << CL_ATIME | 1 << CL_CLOSE)
/* changelog llog name, needed by client replicators */
#define CHANGELOG_CATALOG "changelog_catalog"
struct llog_rec_tail cr_tail; /**< for_sizezof_only */
} __attribute__((packed));
+struct llog_changelog_ext_rec {
+ struct llog_rec_hdr cr_hdr;
+ struct changelog_ext_rec cr;
+ struct llog_rec_tail cr_tail; /**< for_sizezof_only */
+} __attribute__((packed));
+
#define CHANGELOG_USER_PREFIX "cl"
struct llog_changelog_user_rec {
LLOG_ORIGIN_HANDLE_WRITE_REC = 504,
LLOG_ORIGIN_HANDLE_CLOSE = 505,
LLOG_ORIGIN_CONNECT = 506,
- LLOG_CATINFO = 507, /* for lfs catinfo */
+ LLOG_CATINFO = 507, /* deprecated */
LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508,
LLOG_ORIGIN_HANDLE_DESTROY = 509, /* for destroy llog object*/
LLOG_LAST_OPC,
__u64 o_ioepoch; /* epoch in ost writes */
__u32 o_stripe_idx; /* holds stripe idx */
__u32 o_parent_ver;
- struct lustre_handle o_handle; /* brw: lock handle to prolong locks */
- struct llog_cookie o_lcookie; /* destroy: unlink cookie from MDS */
-
+ struct lustre_handle o_handle; /* brw: lock handle to prolong
+ * locks */
+ struct llog_cookie o_lcookie; /* destroy: unlink cookie from
+ * MDS */
__u32 o_uid_h;
__u32 o_gid_h;
- __u64 o_padding_3;
+
+ __u64 o_data_version; /* getattr: sum of iversion for
+ * each stripe.
+ * brw: grant space consumed on
+ * the client for the write */
__u64 o_padding_4;
__u64 o_padding_5;
__u64 o_padding_6;
#define o_undirty o_mode
#define o_dropped o_misc
#define o_cksum o_nlink
+#define o_grant_used o_data_version
static inline void lustre_set_wire_obdo(struct obdo *wobdo, struct obdo *lobdo)
{