*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2015, Intel Corporation.
+ * Copyright (c) 2011, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#ifndef _LUSTRE_IDL_H_
#define _LUSTRE_IDL_H_
+#include <asm/byteorder.h>
#include <linux/types.h>
-#include <libcfs/libcfs.h>
+
#include <lnet/types.h>
#include <lustre/lustre_user.h> /* Defn's shared with user-space. */
-#include <lustre/lustre_errno.h>
#include <lustre_ver.h>
/*
LUSTRE_FID_INIT_OID = 1UL
};
-/** returns fid object sequence */
-static inline __u64 fid_seq(const struct lu_fid *fid)
-{
- return fid->f_seq;
-}
-
-/** returns fid object id */
-static inline __u32 fid_oid(const struct lu_fid *fid)
-{
- return fid->f_oid;
-}
-
-/** returns fid object version */
-static inline __u32 fid_ver(const struct lu_fid *fid)
-{
- return fid->f_ver;
-}
-
-static inline void fid_zero(struct lu_fid *fid)
-{
- memset(fid, 0, sizeof(*fid));
-}
-
-static inline __u64 fid_ver_oid(const struct lu_fid *fid)
-{
- return ((__u64)fid_ver(fid) << 32 | fid_oid(fid));
-}
+/**
+ * Different FID Format
+ * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0
+ *
+ * FID:
+ * File IDentifier generated by client from range allocated by the seq service.
+ * First 0x400 sequences [2^33, 2^33 + 0x400] are reserved for system use. Note
+ * that on ldiskfs MDTs that IGIF FIDs can use inode numbers starting at 12,
+ * but this is in the IGIF SEQ rangeand does not conflict with assigned FIDs.
+ *
+ * IGIF:
+ * Inode and Generation In FID, a surrogate FID used to globally identify an
+ * existing object on OLD formatted MDT file system. This would only be used on
+ * MDT0 in a DNE filesystem, because there are not expected to be any OLD
+ * formatted DNE filesystems. Belongs to a sequence in [12, 2^32 - 1] range,
+ * where sequence number is inode number, and inode generation is used as OID.
+ * NOTE: This assumes no more than 2^32-1 inodes exist in the MDT filesystem,
+ * which is the maximum possible for an ldiskfs backend. NOTE: This assumes
+ * that the reserved ext3/ext4/ldiskfs inode numbers [0-11] are never visible
+ * to clients, which has always been true.
+ *
+ * IDIF:
+ * Object ID in FID, a surrogate FID used to globally identify an existing
+ * object on OLD formatted OST file system. Belongs to a sequence in
+ * [2^32, 2^33 - 1]. Sequence number is calculated as:
+ * 1 << 32 | (ost_index << 16) | ((objid >> 32) & 0xffff)
+ * that is, SEQ consists of 16-bit OST index, and higher 16 bits of object ID.
+ * The generation of unique SEQ values per OST allows the IDIF FIDs to be
+ * identified in the FLD correctly. The OID field is calculated as:
+ * objid & 0xffffffff
+ * that is, it consists of lower 32 bits of object ID. NOTE This assumes that
+ * no more than 2^48-1 objects have ever been created on an OST, and that no
+ * more than 65535 OSTs are in use. Both are very reasonable assumptions (can
+ * uniquely map all objects on an OST that created 1M objects per second for 9
+ * years, or combinations thereof).
+ *
+ * OST_MDT0:
+ * Surrogate FID used to identify an existing object on OLD formatted OST
+ * filesystem. Belongs to the reserved sequence 0, and is used internally prior
+ * to the introduction of FID-on-OST, at which point IDIF will be used to
+ * identify objects as residing on a specific OST.
+ *
+ * LLOG:
+ * For Lustre Log objects the object sequence 1 is used. This is compatible with
+ * both OLD and NEW.1 namespaces, as this SEQ number is in the ext3/ldiskfs
+ * reserved inode range and does not conflict with IGIF sequence numbers.
+ *
+ * ECHO:
+ * For testing OST IO performance the object sequence 2 is used. This is
+ * compatible with both OLD and NEW.1 namespaces, as this SEQ number is in the
+ * ext3/ldiskfs reserved inode range and does not conflict with IGIF sequence
+ * numbers.
+ *
+ * OST_MDT1 .. OST_MAX:
+ * For testing with multiple MDTs the object sequence 3 through 9 is used,
+ * allowing direct mapping of MDTs 1 through 7 respectively, for a total of 8
+ * MDTs including OST_MDT0. This matches the legacy CMD project "group"
+ * mappings. However, this SEQ range is only for testing prior to any production
+ * DNE release, as the objects in this range conflict across all OSTs, as the
+ * OST index is not part of the FID.
+ *
+ *
+ * For compatibility with existing OLD OST network protocol structures, the FID
+ * must map onto the o_id and o_gr in a manner that ensures existing objects are
+ * identified consistently for IO, as well as onto the lock namespace to ensure
+ * both IDIFs map onto the same objects for IO as well as resources in the DLM.
+ *
+ * DLM OLD OBIF/IDIF:
+ * resource[] = {o_id, o_seq, 0, 0}; // o_seq == 0 for production releases
+ *
+ * DLM NEW.1 FID (this is the same for both the MDT and OST):
+ * resource[] = {SEQ, OID, VER, HASH};
+ *
+ * Note that for mapping IDIF values to DLM resource names the o_id may be
+ * larger than the 2^33 reserved sequence numbers for IDIF, so it is possible
+ * for the o_id numbers to overlap FID SEQ numbers in the resource. However, in
+ * all production releases the OLD o_seq field is always zero, and all valid FID
+ * OID values are non-zero, so the lock resources will not collide.
+ *
+ * For objects within the IDIF range, group extraction (non-CMD) will be:
+ * o_id = (fid->f_seq & 0x7fff) << 16 | fid->f_oid;
+ * o_seq = 0; // formerly group number
+ */
/**
* Note that reserved SEQ numbers below 12 will conflict with ldiskfs
* inodes in the IGIF namespace, so these reserved SEQ numbers can be
* used for other purposes and not risk collisions with existing inodes.
- *
- * Different FID Format
- * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0
*/
enum fid_seq {
FID_SEQ_OST_MDT0 = 0,
FID_OID_ECHO_ROOT = 2UL,
};
-static inline bool fid_seq_is_mdt0(__u64 seq)
-{
- return seq == FID_SEQ_OST_MDT0;
-}
-
-static inline bool fid_seq_is_mdt(__u64 seq)
-{
- return seq == FID_SEQ_OST_MDT0 || seq >= FID_SEQ_NORMAL;
-};
-
-static inline bool fid_seq_is_echo(__u64 seq)
-{
- return seq == FID_SEQ_ECHO;
-}
-
-static inline bool fid_is_echo(const struct lu_fid *fid)
-{
- return fid_seq_is_echo(fid_seq(fid));
-}
-
-static inline bool fid_seq_is_llog(__u64 seq)
-{
- return seq == FID_SEQ_LLOG;
-}
-
-static inline bool fid_is_llog(const struct lu_fid *fid)
-{
- /* file with OID == 0 is not llog but contains last oid */
- return fid_seq_is_llog(fid_seq(fid)) && fid_oid(fid) > 0;
-}
-
-static inline bool fid_seq_is_rsvd(__u64 seq)
-{
- return seq > FID_SEQ_OST_MDT0 && seq <= FID_SEQ_RSVD;
-};
-
-static inline bool fid_seq_is_special(__u64 seq)
-{
- return seq == FID_SEQ_SPECIAL;
-};
-
-static inline bool fid_seq_is_local_file(__u64 seq)
-{
- return seq == FID_SEQ_LOCAL_FILE ||
- seq == FID_SEQ_LOCAL_NAME;
-};
-
-static inline bool fid_seq_is_root(__u64 seq)
-{
- return seq == FID_SEQ_ROOT;
-}
-
-static inline bool fid_seq_is_dot(__u64 seq)
-{
- return seq == FID_SEQ_DOT_LUSTRE;
-}
-
-static inline bool fid_seq_is_default(__u64 seq)
-{
- return seq == FID_SEQ_LOV_DEFAULT;
-}
-
-static inline bool fid_is_mdt0(const struct lu_fid *fid)
-{
- return fid_seq_is_mdt0(fid_seq(fid));
-}
-
-static inline void lu_root_fid(struct lu_fid *fid)
-{
- fid->f_seq = FID_SEQ_ROOT;
- fid->f_oid = FID_OID_ROOT;
- fid->f_ver = 0;
-}
-
-static inline void lu_echo_root_fid(struct lu_fid *fid)
-{
- fid->f_seq = FID_SEQ_ROOT;
- fid->f_oid = FID_OID_ECHO_ROOT;
- fid->f_ver = 0;
-}
-
-static inline void lu_update_log_fid(struct lu_fid *fid, __u32 index)
-{
- fid->f_seq = FID_SEQ_UPDATE_LOG;
- fid->f_oid = index;
- fid->f_ver = 0;
-}
-
-static inline void lu_update_log_dir_fid(struct lu_fid *fid, __u32 index)
-{
- fid->f_seq = FID_SEQ_UPDATE_LOG_DIR;
- fid->f_oid = index;
- fid->f_ver = 0;
-}
-
-/**
- * Check if a fid is igif or not.
- * \param fid the fid to be tested.
- * \return true if the fid is an igif; otherwise false.
- */
-static inline bool fid_seq_is_igif(__u64 seq)
-{
- return seq >= FID_SEQ_IGIF && seq <= FID_SEQ_IGIF_MAX;
-}
-
-static inline bool fid_is_igif(const struct lu_fid *fid)
-{
- return fid_seq_is_igif(fid_seq(fid));
-}
-
-/**
- * Check if a fid is idif or not.
- * \param fid the fid to be tested.
- * \return true if the fid is an idif; otherwise false.
- */
-static inline bool fid_seq_is_idif(__u64 seq)
-{
- return seq >= FID_SEQ_IDIF && seq <= FID_SEQ_IDIF_MAX;
-}
-
-static inline bool fid_is_idif(const struct lu_fid *fid)
-{
- return fid_seq_is_idif(fid_seq(fid));
-}
-
-static inline bool fid_is_local_file(const struct lu_fid *fid)
-{
- return fid_seq_is_local_file(fid_seq(fid));
-}
-
-static inline bool fid_seq_is_norm(__u64 seq)
-{
- return (seq >= FID_SEQ_NORMAL);
-}
-
-static inline bool fid_is_norm(const struct lu_fid *fid)
-{
- return fid_seq_is_norm(fid_seq(fid));
-}
-
-static inline int fid_is_layout_rbtree(const struct lu_fid *fid)
-{
- return fid_seq(fid) == FID_SEQ_LAYOUT_RBTREE;
-}
-
-static inline bool fid_seq_is_update_log(__u64 seq)
-{
- return seq == FID_SEQ_UPDATE_LOG;
-}
-
-static inline bool fid_is_update_log(const struct lu_fid *fid)
-{
- return fid_seq_is_update_log(fid_seq(fid));
-}
-
-static inline bool fid_seq_is_update_log_dir(__u64 seq)
-{
- return seq == FID_SEQ_UPDATE_LOG_DIR;
-}
-
-static inline bool fid_is_update_log_dir(const struct lu_fid *fid)
-{
- return fid_seq_is_update_log_dir(fid_seq(fid));
-}
-
-/* convert an OST objid into an IDIF FID SEQ number */
-static inline __u64 fid_idif_seq(__u64 id, __u32 ost_idx)
-{
- return FID_SEQ_IDIF | (ost_idx << 16) | ((id >> 32) & 0xffff);
-}
-
-/* convert a packed IDIF FID into an OST objid */
-static inline __u64 fid_idif_id(__u64 seq, __u32 oid, __u32 ver)
-{
- return ((__u64)ver << 48) | ((seq & 0xffff) << 32) | oid;
-}
-
-static inline __u32 idif_ost_idx(__u64 seq)
-{
- return (seq >> 16) & 0xffff;
-}
-
-/* extract ost index from IDIF FID */
-static inline __u32 fid_idif_ost_idx(const struct lu_fid *fid)
-{
- return idif_ost_idx(fid_seq(fid));
-}
-
-/* extract OST sequence (group) from a wire ost_id (id/seq) pair */
-static inline __u64 ostid_seq(const struct ost_id *ostid)
-{
- if (fid_seq_is_mdt0(ostid->oi.oi_seq))
- return FID_SEQ_OST_MDT0;
-
- if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
- return FID_SEQ_LOV_DEFAULT;
-
- if (fid_is_idif(&ostid->oi_fid))
- return FID_SEQ_OST_MDT0;
-
- return fid_seq(&ostid->oi_fid);
-}
-
-/* extract OST objid from a wire ost_id (id/seq) pair */
-static inline __u64 ostid_id(const struct ost_id *ostid)
-{
- if (fid_seq_is_mdt0(ostid->oi.oi_seq))
- return ostid->oi.oi_id & IDIF_OID_MASK;
-
- if (unlikely(fid_seq_is_default(ostid->oi.oi_seq)))
- return ostid->oi.oi_id;
-
- if (fid_is_idif(&ostid->oi_fid))
- return fid_idif_id(fid_seq(&ostid->oi_fid),
- fid_oid(&ostid->oi_fid), 0);
-
- return fid_oid(&ostid->oi_fid);
-}
-
-static inline void ostid_set_seq(struct ost_id *oi, __u64 seq)
-{
- if (fid_seq_is_mdt0(seq) || fid_seq_is_default(seq)) {
- oi->oi.oi_seq = seq;
- } else {
- oi->oi_fid.f_seq = seq;
- /* Note: if f_oid + f_ver is zero, we need init it
- * to be 1, otherwise, ostid_seq will treat this
- * as old ostid (oi_seq == 0) */
- if (oi->oi_fid.f_oid == 0 && oi->oi_fid.f_ver == 0)
- oi->oi_fid.f_oid = LUSTRE_FID_INIT_OID;
- }
-}
-
-static inline void ostid_set_seq_mdt0(struct ost_id *oi)
-{
- ostid_set_seq(oi, FID_SEQ_OST_MDT0);
-}
-
-static inline void ostid_set_seq_echo(struct ost_id *oi)
-{
- ostid_set_seq(oi, FID_SEQ_ECHO);
-}
-
-static inline void ostid_set_seq_llog(struct ost_id *oi)
-{
- ostid_set_seq(oi, FID_SEQ_LLOG);
-}
-
-/**
- * Note: we need check oi_seq to decide where to set oi_id,
- * so oi_seq should always be set ahead of oi_id.
- */
-static inline void ostid_set_id(struct ost_id *oi, __u64 oid)
-{
- if (fid_seq_is_mdt0(oi->oi.oi_seq)) {
- if (oid >= IDIF_MAX_OID) {
- CERROR("Too large OID %#llx to set MDT0 "DOSTID"\n",
- (unsigned long long)oid, POSTID(oi));
- return;
- }
- oi->oi.oi_id = oid;
- } else if (fid_is_idif(&oi->oi_fid)) {
- if (oid >= IDIF_MAX_OID) {
- CERROR("Too large OID %#llx to set IDIF "DOSTID"\n",
- (unsigned long long)oid, POSTID(oi));
- return;
- }
- oi->oi_fid.f_seq = fid_idif_seq(oid,
- fid_idif_ost_idx(&oi->oi_fid));
- oi->oi_fid.f_oid = oid;
- oi->oi_fid.f_ver = oid >> 48;
- } else {
- if (oid > OBIF_MAX_OID) {
- CERROR("Too large oid %#llx to set REG "DOSTID"\n",
- (unsigned long long)oid, POSTID(oi));
- return;
- }
- oi->oi_fid.f_oid = oid;
- }
-}
-
-static inline int fid_set_id(struct lu_fid *fid, __u64 oid)
-{
- if (unlikely(fid_seq_is_igif(fid->f_seq))) {
- CERROR("bad IGIF, "DFID"\n", PFID(fid));
- return -EBADF;
- }
-
- if (fid_is_idif(fid)) {
- if (oid >= IDIF_MAX_OID) {
- CERROR("Too large OID %#llx to set IDIF "DFID"\n",
- (unsigned long long)oid, PFID(fid));
- return -EBADF;
- }
- fid->f_seq = fid_idif_seq(oid, fid_idif_ost_idx(fid));
- fid->f_oid = oid;
- fid->f_ver = oid >> 48;
- } else {
- if (oid > OBIF_MAX_OID) {
- CERROR("Too large OID %#llx to set REG "DFID"\n",
- (unsigned long long)oid, PFID(fid));
- return -EBADF;
- }
- fid->f_oid = oid;
- }
- return 0;
-}
-
-/**
- * Unpack an OST object id/seq (group) into a FID. This is needed for
- * converting all obdo, lmm, lsm, etc. 64-bit id/seq pairs into proper
- * FIDs. Note that if an id/seq is already in FID/IDIF format it will
- * be passed through unchanged. Only legacy OST objects in "group 0"
- * will be mapped into the IDIF namespace so that they can fit into the
- * struct lu_fid fields without loss. For reference see:
- * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs
- */
-static inline int ostid_to_fid(struct lu_fid *fid, const struct ost_id *ostid,
- __u32 ost_idx)
-{
- __u64 seq = ostid_seq(ostid);
-
- if (ost_idx > 0xffff) {
- CERROR("bad ost_idx, "DOSTID" ost_idx:%u\n", POSTID(ostid),
- ost_idx);
- return -EBADF;
- }
-
- if (fid_seq_is_mdt0(seq)) {
- __u64 oid = ostid_id(ostid);
-
- /* This is a "legacy" (old 1.x/2.early) OST object in "group 0"
- * that we map into the IDIF namespace. It allows up to 2^48
- * objects per OST, as this is the object namespace that has
- * been in production for years. This can handle create rates
- * of 1M objects/s/OST for 9 years, or combinations thereof. */
- if (oid >= IDIF_MAX_OID) {
- CERROR("bad MDT0 id(1), "DOSTID" ost_idx:%u\n",
- POSTID(ostid), ost_idx);
- return -EBADF;
- }
- fid->f_seq = fid_idif_seq(oid, ost_idx);
- /* truncate to 32 bits by assignment */
- fid->f_oid = oid;
- /* in theory, not currently used */
- fid->f_ver = oid >> 48;
- } else if (likely(!fid_seq_is_default(seq)))
- /* if (fid_seq_is_idif(seq) || fid_seq_is_norm(seq)) */ {
- /* This is either an IDIF object, which identifies objects
- * across all OSTs, or a regular FID. The IDIF namespace maps
- * legacy OST objects into the FID namespace. In both cases,
- * we just pass the FID through, no conversion needed. */
- if (ostid->oi_fid.f_ver != 0) {
- CERROR("bad MDT0 id(2), "DOSTID" ost_idx:%u\n",
- POSTID(ostid), ost_idx);
- return -EBADF;
- }
- *fid = ostid->oi_fid;
- }
-
- return 0;
-}
-
-/* pack any OST FID into an ostid (id/seq) for the wire/disk */
-static inline int fid_to_ostid(const struct lu_fid *fid, struct ost_id *ostid)
-{
- if (unlikely(fid_seq_is_igif(fid->f_seq))) {
- CERROR("bad IGIF, "DFID"\n", PFID(fid));
- return -EBADF;
- }
-
- if (fid_is_idif(fid)) {
- ostid_set_seq_mdt0(ostid);
- ostid_set_id(ostid, fid_idif_id(fid_seq(fid), fid_oid(fid),
- fid_ver(fid)));
- } else {
- ostid->oi_fid = *fid;
- }
-
- return 0;
-}
-
-/* Check whether the fid is for LAST_ID */
-static inline bool fid_is_last_id(const struct lu_fid *fid)
-{
- return fid_oid(fid) == 0 && fid_seq(fid) != FID_SEQ_UPDATE_LOG &&
- fid_seq(fid) != FID_SEQ_UPDATE_LOG_DIR;
-}
-
-/**
- * Get inode number from an igif.
- * \param fid an igif to get inode number from.
- * \return inode number for the igif.
- */
-static inline ino_t lu_igif_ino(const struct lu_fid *fid)
-{
- return fid_seq(fid);
-}
-
-/**
- * Get inode generation from an igif.
- * \param fid an igif to get inode generation from.
- * \return inode generation for the igif.
- */
-static inline __u32 lu_igif_gen(const struct lu_fid *fid)
-{
- return fid_oid(fid);
-}
-
-/**
- * Build igif from the inode number/generation.
- */
-static inline void lu_igif_build(struct lu_fid *fid, __u32 ino, __u32 gen)
-{
- fid->f_seq = ino;
- fid->f_oid = gen;
- fid->f_ver = 0;
-}
-
-/*
- * Fids are transmitted across network (in the sender byte-ordering),
- * and stored on disk in big-endian order.
- */
-static inline void fid_cpu_to_le(struct lu_fid *dst, const struct lu_fid *src)
-{
- dst->f_seq = cpu_to_le64(fid_seq(src));
- dst->f_oid = cpu_to_le32(fid_oid(src));
- dst->f_ver = cpu_to_le32(fid_ver(src));
-}
-
-static inline void fid_le_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
-{
- dst->f_seq = le64_to_cpu(fid_seq(src));
- dst->f_oid = le32_to_cpu(fid_oid(src));
- dst->f_ver = le32_to_cpu(fid_ver(src));
-}
-
-static inline void fid_cpu_to_be(struct lu_fid *dst, const struct lu_fid *src)
-{
- dst->f_seq = cpu_to_be64(fid_seq(src));
- dst->f_oid = cpu_to_be32(fid_oid(src));
- dst->f_ver = cpu_to_be32(fid_ver(src));
-}
-
-static inline void fid_be_to_cpu(struct lu_fid *dst, const struct lu_fid *src)
-{
- dst->f_seq = be64_to_cpu(fid_seq(src));
- dst->f_oid = be32_to_cpu(fid_oid(src));
- dst->f_ver = be32_to_cpu(fid_ver(src));
-}
-
-static inline bool fid_is_sane(const struct lu_fid *fid)
-{
- return fid != NULL &&
- ((fid_seq(fid) >= FID_SEQ_START && fid_ver(fid) == 0) ||
- fid_is_igif(fid) || fid_is_idif(fid) ||
- fid_seq_is_rsvd(fid_seq(fid)));
-}
-
-static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
-{
- return memcmp(f0, f1, sizeof *f0) == 0;
-}
-
-#define __diff_normalize(val0, val1) \
-({ \
- typeof(val0) __val0 = (val0); \
- typeof(val1) __val1 = (val1); \
- \
- (__val0 == __val1 ? 0 : __val0 > __val1 ? +1 : -1); \
-})
-
-static inline int lu_fid_cmp(const struct lu_fid *f0,
- const struct lu_fid *f1)
-{
- return
- __diff_normalize(fid_seq(f0), fid_seq(f1)) ?:
- __diff_normalize(fid_oid(f0), fid_oid(f1)) ?:
- __diff_normalize(fid_ver(f0), fid_ver(f1));
-}
-
-static inline void ostid_cpu_to_le(const struct ost_id *src_oi,
- struct ost_id *dst_oi)
-{
- if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) {
- dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
- dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
- } else {
- fid_cpu_to_le(&dst_oi->oi_fid, &src_oi->oi_fid);
- }
-}
-
-static inline void ostid_le_to_cpu(const struct ost_id *src_oi,
- struct ost_id *dst_oi)
-{
- if (fid_seq_is_mdt0(src_oi->oi.oi_seq)) {
- dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
- dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
- } else {
- fid_le_to_cpu(&dst_oi->oi_fid, &src_oi->oi_fid);
- }
-}
-
struct lu_orphan_rec {
/* The MDT-object's FID referenced by the orphan OST-object */
struct lu_fid lor_fid;
static inline struct lu_dirent *lu_dirent_start(struct lu_dirpage *dp)
{
- if (le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
- return NULL;
- else
- return dp->ldp_entries;
+ if (__le32_to_cpu(dp->ldp_flags) & LDF_EMPTY)
+ return NULL;
+ else
+ return dp->ldp_entries;
}
static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
{
- struct lu_dirent *next;
+ struct lu_dirent *next;
- if (le16_to_cpu(ent->lde_reclen) != 0)
- next = ((void *)ent) + le16_to_cpu(ent->lde_reclen);
- else
- next = NULL;
+ if (__le16_to_cpu(ent->lde_reclen) != 0)
+ next = ((void *)ent) + __le16_to_cpu(ent->lde_reclen);
+ else
+ next = NULL;
- return next;
+ return next;
}
static inline size_t lu_dirent_calc_size(size_t namelen, __u16 attr)
* MDS_READPAGE page size
*
* This is the directory page size packed in MDS_READPAGE RPC.
- * It's different than PAGE_CACHE_SIZE because the client needs to
+ * It's different than PAGE_SIZE because the client needs to
* access the struct lu_dirpage header packed at the beginning of
* the "page" and without this there isn't any way to know find the
- * lu_dirpage header is if client and server PAGE_CACHE_SIZE differ.
+ * lu_dirpage header is if client and server PAGE_SIZE differ.
*/
#define LU_PAGE_SHIFT 12
#define LU_PAGE_SIZE (1UL << LU_PAGE_SHIFT)
#define LU_PAGE_MASK (~(LU_PAGE_SIZE - 1))
-#define LU_PAGE_COUNT (1 << (PAGE_CACHE_SHIFT - LU_PAGE_SHIFT))
+#define LU_PAGE_COUNT (1 << (PAGE_SHIFT - LU_PAGE_SHIFT))
/** @} lu_dir */
#define OBD_CONNECT_TRUNCLOCK 0x400ULL /*locks on server for punch */
#define OBD_CONNECT_TRANSNO 0x800ULL /*replay sends init transno */
#define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks*/
-#define OBD_CONNECT_JOIN 0x2000ULL /*files can be concatenated.
- *We do not support JOIN FILE
- *anymore, reserve this flags
- *just for preventing such bit
- *to be reused.*/
+#define OBD_CONNECT_BARRIER 0x2000ULL /* write barrier */
#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server can GetAttr By Fid*/
#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open hndl on specl nodes*/
#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client, never used
#define OBD_CONNECT_AT 0x1000000ULL /*client uses AT */
#define OBD_CONNECT_LRU_RESIZE 0x2000000ULL /*LRU resize feature. */
#define OBD_CONNECT_MDS_MDS 0x4000000ULL /*MDS-MDS connection */
-#define OBD_CONNECT_REAL 0x8000000ULL /*real connection */
+#define OBD_CONNECT_REAL 0x8000000ULL /* obsolete since 2.8 */
#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*Not used since 2.4 */
#define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos*/
#define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */
#define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | \
OBD_CONNECT_MNE_SWAB | OBD_CONNECT_PINGLESS |\
- OBD_CONNECT_BULK_MBITS)
+ OBD_CONNECT_BULK_MBITS | OBD_CONNECT_BARRIER)
#define MGS_CONNECT_SUPPORTED2 0
* Please update DECLARE_CKSUM_NAME/OBD_CKSUM_ALL in obd.h when adding a new
* algorithm and also the OBD_FL_CKSUM* flags.
*/
-typedef enum {
+typedef enum cksum_types {
OBD_CKSUM_CRC32 = 0x00000001,
OBD_CKSUM_ADLER = 0x00000002,
OBD_CKSUM_CRC32C= 0x00000004,
/* reserved for specifying OSTs */
#define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC)
#define LOV_MAGIC LOV_MAGIC_V1
+#define LOV_MAGIC_COMP_V1 (0x0BD60000 | LOV_MAGIC_MAGIC)
/*
* magic for fully defined striping
* depending on the case (replay uses ready striping, non-replay req uses
* hints), so MDT replaces magic with appropriate one and now LOD can
* easily understand what's inside -bzzz
+ *
+ * those *_DEF magics are only used on server side internally, they
+ * won't be put on wire or disk.
*/
-#define LOV_MAGIC_V1_DEF 0x0CD10BD0
-#define LOV_MAGIC_V3_DEF 0x0CD30BD0
+#define LOV_MAGIC_DEF 0x10000000
+#define LOV_MAGIC_V1_DEF (LOV_MAGIC_DEF | LOV_MAGIC_V1)
+#define LOV_MAGIC_V3_DEF (LOV_MAGIC_DEF | LOV_MAGIC_V3)
+#define LOV_MAGIC_COMP_V1_DEF (LOV_MAGIC_DEF | LOV_MAGIC_COMP_V1)
#define lov_pattern(pattern) (pattern & ~LOV_PATTERN_F_MASK)
#define lov_pattern_flags(pattern) (pattern & LOV_PATTERN_F_MASK)
struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
};
-/**
- * Sigh, because pre-2.4 uses
- * struct lov_mds_md_v1 {
- * ........
- * __u64 lmm_object_id;
- * __u64 lmm_object_seq;
- * ......
- * }
- * to identify the LOV(MDT) object, and lmm_object_seq will
- * be normal_fid, which make it hard to combine these conversion
- * to ostid_to FID. so we will do lmm_oi/fid conversion separately
- *
- * We can tell the lmm_oi by this way,
- * 1.8: lmm_object_id = {inode}, lmm_object_gr = 0
- * 2.1: lmm_object_id = {oid < 128k}, lmm_object_seq = FID_SEQ_NORMAL
- * 2.4: lmm_oi.f_seq = FID_SEQ_NORMAL, lmm_oi.f_oid = {oid < 128k},
- * lmm_oi.f_ver = 0
- *
- * But currently lmm_oi/lsm_oi does not have any "real" usages,
- * except for printing some information, and the user can always
- * get the real FID from LMA, besides this multiple case check might
- * make swab more complicate. So we will keep using id/seq for lmm_oi.
- */
-
-static inline void fid_to_lmm_oi(const struct lu_fid *fid,
- struct ost_id *oi)
-{
- oi->oi.oi_id = fid_oid(fid);
- oi->oi.oi_seq = fid_seq(fid);
-}
-
-static inline void lmm_oi_set_seq(struct ost_id *oi, __u64 seq)
-{
- oi->oi.oi_seq = seq;
-}
-
-static inline void lmm_oi_set_id(struct ost_id *oi, __u64 oid)
-{
- oi->oi.oi_id = oid;
-}
-
-static inline __u64 lmm_oi_id(const struct ost_id *oi)
-{
- return oi->oi.oi_id;
-}
-
-static inline __u64 lmm_oi_seq(const struct ost_id *oi)
-{
- return oi->oi.oi_seq;
-}
-
-static inline void lmm_oi_le_to_cpu(struct ost_id *dst_oi,
- const struct ost_id *src_oi)
-{
- dst_oi->oi.oi_id = le64_to_cpu(src_oi->oi.oi_id);
- dst_oi->oi.oi_seq = le64_to_cpu(src_oi->oi.oi_seq);
-}
-
-static inline void lmm_oi_cpu_to_le(struct ost_id *dst_oi,
- const struct ost_id *src_oi)
-{
- dst_oi->oi.oi_id = cpu_to_le64(src_oi->oi.oi_id);
- dst_oi->oi.oi_seq = cpu_to_le64(src_oi->oi.oi_seq);
-}
-
#define MAX_MD_SIZE (sizeof(struct lov_mds_md) + 4 * sizeof(struct lov_ost_data))
#define MIN_MD_SIZE (sizeof(struct lov_mds_md) + 1 * sizeof(struct lov_ost_data))
* client holds the lock */
#define OBD_MD_FLOBJCOUNT (0x0000400000000000ULL) /* for multiple destroy */
-/* OBD_MD_FLRMTLSETFACL (0x0001000000000000ULL) lfs lsetfacl, obsolete */
-/* OBD_MD_FLRMTLGETFACL (0x0002000000000000ULL) lfs lgetfacl, obsolete */
-/* OBD_MD_FLRMTRSETFACL (0x0004000000000000ULL) lfs rsetfacl, obsolete */
-/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
-
#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
executed */
LUSTRE_LMA_FL_MASKS = LUSTRE_ORPHAN_FL,
};
-/* LUSTRE_LMA_FL_MASKS defines which flags will be stored in LMA */
-
-static inline int lma_to_lustre_flags(__u32 lma_flags)
-{
- return (lma_flags & LMAI_ORPHAN) ? LUSTRE_ORPHAN_FL : 0;
-}
-
-static inline int lustre_to_lma_flags(__u32 la_flags)
-{
- return (la_flags & LUSTRE_ORPHAN_FL) ? LMAI_ORPHAN : 0;
-}
-
-
#ifdef __KERNEL__
/* Convert wire LUSTRE_*_FL to corresponding client local VFS S_* values
* for the client inode i_flags. The LUSTRE_*_FL are the Lustre wire
__u32 cr_padding_4; /* rr_padding_4 */
};
-static inline void set_mrc_cr_flags(struct mdt_rec_create *mrc, __u64 flags)
-{
- mrc->cr_flags_l = (__u32)(flags & 0xFFFFFFFFUll);
- mrc->cr_flags_h = (__u32)(flags >> 32);
-}
-
-static inline __u64 get_mrc_cr_flags(struct mdt_rec_create *mrc)
-{
- return ((__u64)(mrc->cr_flags_l) | ((__u64)mrc->cr_flags_h << 32));
-}
-
/* instance of mdt_reint_rec */
struct mdt_rec_link {
__u32 lk_opcode;
static inline int lmv_mds_md_stripe_count_get(const union lmv_mds_md *lmm)
{
- switch (le32_to_cpu(lmm->lmv_magic)) {
+ switch (__le32_to_cpu(lmm->lmv_magic)) {
case LMV_MAGIC_V1:
- return le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count);
+ return __le32_to_cpu(lmm->lmv_md_v1.lmv_stripe_count);
case LMV_USER_MAGIC:
- return le32_to_cpu(lmm->lmv_user_md.lum_stripe_count);
+ return __le32_to_cpu(lmm->lmv_user_md.lum_stripe_count);
default:
return -EINVAL;
}
}
-static inline int lmv_mds_md_stripe_count_set(union lmv_mds_md *lmm,
- unsigned int stripe_count)
+static inline int lmv_mds_md_hash_type_get(const union lmv_mds_md *lmm)
{
- switch (le32_to_cpu(lmm->lmv_magic)) {
+ switch (__le32_to_cpu(lmm->lmv_magic)) {
case LMV_MAGIC_V1:
- lmm->lmv_md_v1.lmv_stripe_count = cpu_to_le32(stripe_count);
- break;
+ return __le32_to_cpu(lmm->lmv_md_v1.lmv_hash_type);
case LMV_USER_MAGIC:
- lmm->lmv_user_md.lum_stripe_count = cpu_to_le32(stripe_count);
- break;
+ return __le32_to_cpu(lmm->lmv_user_md.lum_hash_type);
default:
return -EINVAL;
}
- return 0;
}
enum fld_rpc_opc {
(unsigned long long)(res)->lr_name.name[2], \
(unsigned long long)(res)->lr_name.name[3]
-static inline bool ldlm_res_eq(const struct ldlm_res_id *res0,
- const struct ldlm_res_id *res1)
-{
- return memcmp(res0, res1, sizeof(*res0)) == 0;
-}
-
/* lock types */
typedef enum ldlm_mode {
LCK_MINMODE = 0,
__u64 gid;
};
-static inline int ldlm_extent_overlap(const struct ldlm_extent *ex1,
- const struct ldlm_extent *ex2)
-{
- return ex1->start <= ex2->end && ex2->start <= ex1->end;
-}
-
-/* check if @ex1 contains @ex2 */
-static inline int ldlm_extent_contain(const struct ldlm_extent *ex1,
- const struct ldlm_extent *ex2)
-{
- return ex1->start <= ex2->start && ex1->end >= ex2->end;
-}
-
struct ldlm_inodebits {
__u64 bits;
};
struct ldlm_inodebits l_inodebits;
} ldlm_wire_policy_data_t;
+struct barrier_lvb {
+ __u32 lvb_status;
+ __u32 lvb_index;
+ __u64 lvb_padding;
+};
+
+struct ldlm_gl_barrier_desc {
+ __u32 lgbd_status;
+ __u32 lgbd_timeout;
+ __u64 lgbd_padding;
+};
+
union ldlm_gl_desc {
struct ldlm_gl_lquota_desc lquota_desc;
+ struct ldlm_gl_barrier_desc barrier_desc;
};
enum ldlm_intent_flags {
struct lustre_handle lock_handle[LDLM_LOCKREQ_HANDLES];
};
-/* If LDLM_ENQUEUE, 1 slot is already occupied, 1 is available.
- * Otherwise, 2 are available. */
-#define ldlm_request_bufsize(count,type) \
-({ \
- int _avail = LDLM_LOCKREQ_HANDLES; \
- _avail -= (type == LDLM_ENQUEUE ? LDLM_ENQUEUE_CANCEL_OFF : 0); \
- sizeof(struct ldlm_request) + \
- (count > _avail ? count - _avail : 0) * \
- sizeof(struct lustre_handle); \
-})
-
struct ldlm_reply {
__u32 lock_flags;
__u32 lock_padding; /* also fix lustre_swab_ldlm_reply */
} mgs_cmd_t;
#define MGS_FIRST_OPC MGS_CONNECT
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0)
#define MGS_PARAM_MAXLEN 1024
#define KEY_SET_INFO "set_info"
struct mgs_send_param {
char mgs_param[MGS_PARAM_MAXLEN];
};
+#endif
/* We pass this info to the MGS so it can write config logs */
#define MTI_NAME_MAXLEN 64
} u;
};
+enum {
+ CONFIG_T_CONFIG = 0,
+ CONFIG_T_SPTLRPC = 1,
+ CONFIG_T_RECOVER = 2,
+ CONFIG_T_PARAMS = 3,
+ CONFIG_T_NODEMAP = 4,
+ CONFIG_T_BARRIER = 5,
+ CONFIG_T_MAX
+};
+
struct mgs_config_body {
- char mcb_name[MTI_NAME_MAXLEN]; /* logname */
- __u64 mcb_offset; /* next index of config log to request */
- __u16 mcb_type; /* type of log: CONFIG_T_[CONFIG|RECOVER] */
- __u8 mcb_reserved;
- __u8 mcb_bits; /* bits unit size of config log */
- __u32 mcb_units; /* # of units for bulk transfer */
+ char mcb_name[MTI_NAME_MAXLEN]; /* logname */
+ __u64 mcb_offset; /* next index of config log to request */
+ __u16 mcb_type; /* type of log: CONFIG_T_[CONFIG|RECOVER] */
+ __u8 mcb_nm_cur_pass;
+ __u8 mcb_bits; /* bits unit size of config log */
+ __u32 mcb_units; /* # of units for bulk transfer */
};
struct mgs_config_res {
- __u64 mcr_offset; /* index of last config log */
- __u64 mcr_size; /* size of the log */
+ __u64 mcr_offset; /* index of last config log */
+ union {
+ __u64 mcr_size; /* size of the log */
+ __u64 mcr_nm_cur_pass; /* current nodemap config pass */
+ };
};
/* Config marker flags (in config log) */
(rec->lrh_len - sizeof(struct llog_rec_hdr) - \
sizeof(struct llog_rec_tail))
-static inline void *rec_tail(struct llog_rec_hdr *rec)
-{
- return (void *)((char *)rec + rec->lrh_len -
- sizeof(struct llog_rec_tail));
-}
-
struct llog_logid_rec {
struct llog_rec_hdr lid_hdr;
struct llog_logid lid_id;
}
}
-static inline bool agent_req_in_final_state(enum agent_req_status ars)
-{
- return ((ars == ARS_SUCCEED) || (ars == ARS_FAILED) ||
- (ars == ARS_CANCELED));
-}
-
struct llog_agent_req_rec {
struct llog_rec_hdr arr_hdr; /**< record header */
__u32 arr_status; /**< status of the request */
struct lustre_handle o_handle; /* brw: lock handle to prolong
* locks */
struct llog_cookie o_lcookie; /* destroy: unlink cookie from
- * MDS, obsolete in 2.8, reused
- * in OSP */
+ * MDS, obsolete in 2.8 */
__u32 o_uid_h;
__u32 o_gid_h;
LE_PEER_EXIT = 9,
LE_CONDITIONAL_DESTROY = 10,
LE_PAIRS_VERIFY = 11,
- LE_SKIP_NLINK_DECLARE = 13,
- LE_SKIP_NLINK = 14,
LE_SET_LMV_MASTER = 15,
LE_SET_LMV_SLAVE = 16,
};
struct fiemap lfik_fiemap;
};
-void lustre_print_user_md(unsigned int level, struct lov_user_md *lum,
- const char *msg);
-
-/* Functions for dumping PTLRPC fields */
-void dump_rniobuf(struct niobuf_remote *rnb);
-void dump_ioo(struct obd_ioobj *nb);
-void dump_ost_body(struct ost_body *ob);
-void dump_rcs(__u32 *rc);
-
#define IDX_INFO_MAGIC 0x3D37CC37
/* Index file transfer through the network. The server serializes the index into
#define CAPA_OPC_MDS_DEFAULT ~CAPA_OPC_OSS_ONLY
#define CAPA_OPC_OSS_DEFAULT ~(CAPA_OPC_MDS_ONLY | CAPA_OPC_OSS_ONLY)
-static inline bool lovea_slot_is_dummy(const struct lov_ost_data_v1 *obj)
-{
- /* zero area does not care about the bytes-order. */
- if (obj->l_ost_oi.oi.oi_id == 0 && obj->l_ost_oi.oi.oi_seq == 0 &&
- obj->l_ost_idx == 0 && obj->l_ost_gen == 0)
- return true;
-
- return false;
-}
-
/* lustre_capa::lc_hmac_alg */
enum {
CAPA_HMAC_ALG_SHA1 = 1, /**< sha1 algorithm */
/** The link ea holds 1 \a link_ea_entry for each hardlink */
#define LINK_EA_MAGIC 0x11EAF1DFUL
struct link_ea_header {
- __u32 leh_magic;
- __u32 leh_reccount;
- __u64 leh_len; /* total size */
- /* future use */
- __u32 padding1;
- __u32 padding2;
+ __u32 leh_magic;
+ __u32 leh_reccount;
+ __u64 leh_len; /* total size */
+ __u32 leh_overflow_time;
+ __u32 leh_padding;
};
/** Hardlink data is name and parent fid.
*/
struct mdc_swap_layouts {
__u64 msl_flags;
-} __packed;
+} __attribute__((packed));
struct close_data {
struct lustre_handle cd_handle;
struct nodemap_global_rec ngr;
};
+/* This is the lu_ladvise struct which goes out on the wire.
+ * Corresponds to the userspace arg llapi_lu_ladvise.
+ * value[1-4] are unspecified fields, used differently by different advices */
+struct lu_ladvise {
+ __u16 lla_advice; /* advice type */
+ __u16 lla_value1; /* values for different advice types */
+ __u32 lla_value2;
+ __u64 lla_start; /* first byte of extent for advice */
+ __u64 lla_end; /* last byte of extent for advice */
+ __u32 lla_value3;
+ __u32 lla_value4;
+};
+
+/* This is the ladvise_hdr which goes on the wire, corresponds to the userspace
+ * arg llapi_ladvise_hdr.
+ * value[1-3] are unspecified fields, used differently by different advices */
+struct ladvise_hdr {
+ __u32 lah_magic; /* LADVISE_MAGIC */
+ __u32 lah_count; /* number of advices */
+ __u64 lah_flags; /* from enum ladvise_flag */
+ __u32 lah_value1; /* unused */
+ __u32 lah_value2; /* unused */
+ __u64 lah_value3; /* unused */
+ struct lu_ladvise lah_advise[0]; /* advices in this header */
+};
+
#endif
/** @} lustreidl */