Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / include / lustre / lustre_idl.h
index 7d710ef..04e3064 100644 (file)
@@ -372,6 +372,7 @@ static inline __u32 lu_igif_gen(const struct lu_fid *fid)
 }
 
 #define DFID "["LPX64":0x%x:0x%x]"
+#define SFID "0x%llx:0x%x:0x%x"
 
 #define PFID(fid)     \
         fid_seq(fid), \
@@ -489,18 +490,69 @@ static inline int lu_fid_cmp(const struct lu_fid *f0,
 
 /** \defgroup lu_dir lu_dir
  * @{ */
+
+/**
+ * Enumeration of possible directory entry attributes.
+ *
+ * Attributes follow directory entry header in the order they appear in this
+ * enumeration.
+ */
+enum lu_dirent_attrs {
+        LUDA_FID    = 0x0001,
+        LUDA_TYPE   = 0x0002,
+};
+
 /**
  * Layout of readdir pages, as transmitted on wire.
  */
 struct lu_dirent {
+        /** valid if LUDA_FID is set. */
         struct lu_fid lde_fid;
+        /** a unique entry identifier: a hash or an offset. */
         __u64         lde_hash;
+        /** total record length, including all attributes. */
         __u16         lde_reclen;
+        /** name length */
         __u16         lde_namelen;
-        __u32         lde_pad0;
+        /** optional variable size attributes following this entry.
+         *  taken from enum lu_dirent_attrs.
+         */
+        __u32         lde_attrs;
+        /** name is followed by the attributes indicated in ->ldp_attrs, in
+         *  their natural order. After the last attribute, padding bytes are
+         *  added to make ->lde_reclen a multiple of 8.
+         */
         char          lde_name[0];
 };
 
+/*
+ * Definitions of optional directory entry attributes formats.
+ *
+ * Individual attributes do not have their length encoded in a generic way. It
+ * is assumed that consumer of an attribute knows its format. This means that
+ * it is impossible to skip over an unknown attribute, except by skipping over all
+ * remaining attributes (by using ->lde_reclen), which is not too
+ * constraining, because new server versions will append new attributes at
+ * the end of an entry.
+ */
+
+/**
+ * Fid directory attribute: a fid of an object referenced by the entry. This
+ * will be almost always requested by the client and supplied by the server.
+ *
+ * Aligned to 8 bytes.
+ */
+/* To have compatibility with 1.8, lets have fid in lu_dirent struct. */
+
+/**
+ * File type.
+ *
+ * Aligned to 2 bytes.
+ */
+struct luda_type {
+        __u16 lt_type;
+};
+
 struct lu_dirpage {
         __u64            ldp_hash_start;
         __u64            ldp_hash_end;
@@ -533,11 +585,25 @@ static inline struct lu_dirent *lu_dirent_next(struct lu_dirent *ent)
         return next;
 }
 
+static inline int lu_dirent_calc_size(int namelen, __u16 attr)
+{
+        int size;
+
+        if (attr & LUDA_TYPE) {
+                const unsigned align = sizeof(struct luda_type) - 1;
+                size = (sizeof(struct lu_dirent) + namelen + align) & ~align;
+                size += sizeof(struct luda_type);
+        } else
+                size = sizeof(struct lu_dirent) + namelen;
+
+        return (size + 7) & ~7;
+}
+
 static inline int lu_dirent_size(struct lu_dirent *ent)
 {
         if (le16_to_cpu(ent->lde_reclen) == 0) {
-                return (sizeof(*ent) +
-                        le16_to_cpu(ent->lde_namelen) + 7) & ~7;
+                return lu_dirent_calc_size(le16_to_cpu(ent->lde_namelen),
+                                           le32_to_cpu(ent->lde_attrs));
         }
         return le16_to_cpu(ent->lde_reclen);
 }
@@ -666,39 +732,41 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define MSG_CONNECT_TRANSNO     0x00000100 /* report transno */
 
 /* Connect flags */
-#define OBD_CONNECT_RDONLY            0x1ULL /*client allowed read-only access*/
-#define OBD_CONNECT_INDEX             0x2ULL /*connect to specific LOV idx */
-#define OBD_CONNECT_MDS               0x4ULL /*connect from MDT to OST */
-#define OBD_CONNECT_GRANT             0x8ULL /*OSC acquires grant at connect */
-#define OBD_CONNECT_SRVLOCK          0x10ULL /*server takes locks for client */
-#define OBD_CONNECT_VERSION          0x20ULL /*Lustre versions in ocd */
-#define OBD_CONNECT_REQPORTAL        0x40ULL /*Separate non-IO request portal */
-#define OBD_CONNECT_ACL              0x80ULL /*access control lists */
-#define OBD_CONNECT_XATTR           0x100ULL /*client use extended attributes */
-#define OBD_CONNECT_CROW            0x200ULL /*MDS+OST create objects on write*/
-#define OBD_CONNECT_TRUNCLOCK       0x400ULL /*locks on server for punch */
-#define OBD_CONNECT_TRANSNO         0x800ULL /*replay sends initial transno */
-#define OBD_CONNECT_IBITS          0x1000ULL /*support for inodebits locks */
-#define OBD_CONNECT_JOIN           0x2000ULL /*files can be concatenated */
-#define OBD_CONNECT_ATTRFID        0x4000ULL /*Server supports GetAttr By Fid */
-#define OBD_CONNECT_NODEVOH        0x8000ULL /*No open handle on special nodes*/
-#define OBD_CONNECT_RMT_CLIENT 0x00010000ULL /*Remote client */
-#define OBD_CONNECT_RMT_CLIENT_FORCE 0x00020000ULL /*Remote client by force */
-#define OBD_CONNECT_BRW_SIZE      0x40000ULL /*Max bytes per rpc */
-#define OBD_CONNECT_QUOTA64       0x80000ULL /*64bit qunit_data.qd_count */
-#define OBD_CONNECT_MDS_CAPA     0x100000ULL /*MDS capability */
-#define OBD_CONNECT_OSS_CAPA     0x200000ULL /*OSS capability */
-#define OBD_CONNECT_CANCELSET    0x400000ULL /*Early batched cancels. */
-#define OBD_CONNECT_SOM        0x00800000ULL /*Size on MDS */
-#define OBD_CONNECT_AT         0x01000000ULL /*client uses adaptive timeouts */
-#define OBD_CONNECT_LRU_RESIZE 0x02000000ULL /*LRU resize feature. */
-#define OBD_CONNECT_MDS_MDS    0x04000000ULL /*MDS-MDS connection */
-#define OBD_CONNECT_REAL       0x08000000ULL /*real connection */
-#define OBD_CONNECT_CHANGE_QS  0x10000000ULL /*shrink/enlarge qunit b=10600 */
-#define OBD_CONNECT_CKSUM      0x20000000ULL /*support several cksum algos */
-#define OBD_CONNECT_FID        0x40000000ULL /*FID is supported by server */
-#define OBD_CONNECT_VBR        0x80000000ULL /*version based recovery */
-#define OBD_CONNECT_LOV_V3    0x100000000ULL /*client supports LOV v3 EA */
+#define OBD_CONNECT_RDONLY                0x1ULL /*client has read-only access*/
+#define OBD_CONNECT_INDEX                 0x2ULL /*connect specific LOV idx */
+#define OBD_CONNECT_MDS                   0x4ULL /*connect from MDT to OST */
+#define OBD_CONNECT_GRANT                 0x8ULL /*OSC gets grant at connect */
+#define OBD_CONNECT_SRVLOCK              0x10ULL /*server takes locks for cli */
+#define OBD_CONNECT_VERSION              0x20ULL /*Lustre versions in ocd */
+#define OBD_CONNECT_REQPORTAL            0x40ULL /*Separate non-IO req portal */
+#define OBD_CONNECT_ACL                  0x80ULL /*access control lists */
+#define OBD_CONNECT_XATTR               0x100ULL /*client use extended attr */
+#define OBD_CONNECT_CROW                0x200ULL /*MDS+OST create obj on write*/
+#define OBD_CONNECT_TRUNCLOCK           0x400ULL /*locks on server for punch */
+#define OBD_CONNECT_TRANSNO             0x800ULL /*replay sends init transno */
+#define OBD_CONNECT_IBITS              0x1000ULL /*support for inodebits locks*/
+#define OBD_CONNECT_JOIN               0x2000ULL /*files can be concatenated */
+#define OBD_CONNECT_ATTRFID            0x4000ULL /*Server can GetAttr By Fid*/
+#define OBD_CONNECT_NODEVOH            0x8000ULL /*No open hndl on specl nodes*/
+#define OBD_CONNECT_RMT_CLIENT        0x10000ULL /*Remote client */
+#define OBD_CONNECT_RMT_CLIENT_FORCE  0x20000ULL /*Remote client by force */
+#define OBD_CONNECT_BRW_SIZE          0x40000ULL /*Max bytes per rpc */
+#define OBD_CONNECT_QUOTA64           0x80000ULL /*64bit qunit_data.qd_count */
+#define OBD_CONNECT_MDS_CAPA         0x100000ULL /*MDS capability */
+#define OBD_CONNECT_OSS_CAPA         0x200000ULL /*OSS capability */
+#define OBD_CONNECT_CANCELSET        0x400000ULL /*Early batched cancels. */
+#define OBD_CONNECT_SOM              0x800000ULL /*Size on MDS */
+#define OBD_CONNECT_AT              0x1000000ULL /*client uses AT */
+#define OBD_CONNECT_LRU_RESIZE      0x2000000ULL /*LRU resize feature. */
+#define OBD_CONNECT_MDS_MDS         0x4000000ULL /*MDS-MDS connection */
+#define OBD_CONNECT_REAL            0x8000000ULL /*real connection */
+#define OBD_CONNECT_CHANGE_QS      0x10000000ULL /*shrink/enlarge qunit */
+#define OBD_CONNECT_CKSUM          0x20000000ULL /*support several cksum algos*/
+#define OBD_CONNECT_FID            0x40000000ULL /*FID is supported by server */
+#define OBD_CONNECT_VBR            0x80000000ULL /*version based recovery */
+#define OBD_CONNECT_LOV_V3        0x100000000ULL /*client supports LOV v3 EA */
+#define OBD_CONNECT_GRANT_SHRINK  0x200000000ULL /* support grant shrink */
+#define OBD_CONNECT_SKIP_ORPHAN   0x400000000ULL /* don't reuse orphan objids */
 /* also update obd_connect_names[] for lprocfs_rd_connect_flags()
  * and lustre/utils/wirecheck.c */
 
@@ -717,7 +785,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                 OBD_CONNECT_RMT_CLIENT_FORCE | \
                                 OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | \
                                 OBD_CONNECT_MDS_MDS | OBD_CONNECT_FID | \
-                                LRU_RESIZE_CONNECT_FLAG | \
+                                LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_VBR | \
                                 OBD_CONNECT_LOV_V3)
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                 OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
@@ -727,8 +795,9 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                 LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CKSUM | \
                                 OBD_CONNECT_CHANGE_QS | \
                                 OBD_CONNECT_OSS_CAPA  | OBD_CONNECT_RMT_CLIENT | \
-                                OBD_CONNECT_RMT_CLIENT_FORCE | \
-                                OBD_CONNECT_MDS)
+                                OBD_CONNECT_RMT_CLIENT_FORCE | OBD_CONNECT_VBR | \
+                                OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \
+                                OBD_CONNECT_GRANT_SHRINK)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT)
 
@@ -826,18 +895,14 @@ typedef __u32 obd_count;
 #define OBD_FL_NO_GRPQUOTA   (0x00000200) /* the object's group is over quota */
 #define OBD_FL_CREATE_CROW   (0x00000400) /* object should be create on write */
 
-/**
- * Set this to delegate DLM locking during obd_punch() to the OSTs. Only OSTs
- * that declared OBD_CONNECT_TRUNCLOCK in their connect flags support this
- * functionality.
- */
-#define OBD_FL_TRUNCLOCK     (0x00000800)
+#define OBD_FL_TRUNCLOCK     (0x00000800) /* delegate DLM locking during punch */
+#define OBD_FL_CKSUM_CRC32   (0x00001000) /* CRC32 checksum type */
+#define OBD_FL_CKSUM_ADLER   (0x00002000) /* ADLER checksum type */
+#define OBD_FL_CKSUM_RESV1   (0x00004000) /* reserved for future checksum type */
+#define OBD_FL_CKSUM_RESV2   (0x00008000) /* reserved for future checksum type */
+#define OBD_FL_CKSUM_RESV3   (0x00010000) /* reserved for future checksum type */
+#define OBD_FL_SHRINK_GRANT  (0x00020000) /* object shrink the grant */
 
-/*
- * Checksum types
- */
-#define OBD_FL_CKSUM_CRC32    (0x00001000)
-#define OBD_FL_CKSUM_ADLER    (0x00002000)
 #define OBD_FL_CKSUM_ALL      (OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER)
 
 #define LOV_MAGIC_V1      0x0BD10BD0
@@ -965,6 +1030,14 @@ struct lov_mds_md_v3 {            /* LOV EA mds/wire data (little-endian) */
 /* don't forget obdo_fid which is way down at the bottom so it can
  * come after the definition of llog_cookie */
 
+enum obd_statfs_state {
+        OS_STATE_DEGRADED       = 0x00000001, /**< RAID degraded/rebuilding */
+        OS_STATE_READONLY       = 0x00000002, /**< filesystem is read-only */
+        OS_STATE_RDONLY_1       = 0x00000004, /**< obsolete 1.6, was EROFS=30 */
+        OS_STATE_RDONLY_2       = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
+        OS_STATE_RDONLY_3       = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
+};
+
 struct obd_statfs {
         __u64           os_type;
         __u64           os_blocks;
@@ -976,7 +1049,7 @@ struct obd_statfs {
         __u32           os_bsize;
         __u32           os_namelen;
         __u64           os_maxbytes;
-        __u32           os_state;       /* positive error code on server */
+        __u32           os_state;       /**< obd_statfs_state OS_STATE_* flag */
         __u32           os_spare1;
         __u32           os_spare2;
         __u32           os_spare3;
@@ -991,16 +1064,23 @@ struct obd_statfs {
 extern void lustre_swab_obd_statfs (struct obd_statfs *os);
 #define OBD_STATFS_NODELAY      0x0001  /* requests should be send without delay
                                          * and resends for avoid deadlocks */
-
 #define OBD_STATFS_FROM_CACHE   0x0002  /* the statfs callback should not update
                                          * obd_osfs_age */
+#define OBD_STATFS_PTLRPCD      0x0004  /* requests will be sent via ptlrpcd
+                                         * instead of a specific set. This
+                                         * means that we cannot rely on the set
+                                         * interpret routine to be called.
+                                         * lov_statfs_fini() must thus be called
+                                         * by the request interpret routine */
 
 /* ost_body.data values for OST_BRW */
 
 #define OBD_BRW_READ            0x01
 #define OBD_BRW_WRITE           0x02
 #define OBD_BRW_RWMASK          (OBD_BRW_READ | OBD_BRW_WRITE)
-#define OBD_BRW_SYNC            0x08
+#define OBD_BRW_SYNC            0x08 /* this page is a part of synchronous
+                                      * transfer and is not accounted in
+                                      * the grant. */
 #define OBD_BRW_CHECK           0x10
 #define OBD_BRW_FROM_GRANT      0x20 /* the osc manages this under llite */
 #define OBD_BRW_GRANTED         0x40 /* the ost manages this */
@@ -2119,20 +2199,20 @@ struct lov_mds_md_join {
 #define LLOG_OP_MASK  0xfff00000
 
 typedef enum {
-        LLOG_PAD_MAGIC   = LLOG_OP_MAGIC | 0x00000,
-        OST_SZ_REC       = LLOG_OP_MAGIC | 0x00f00,
-        OST_RAID1_REC    = LLOG_OP_MAGIC | 0x01000,
-        MDS_UNLINK_REC   = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK,
-        MDS_SETATTR_REC  = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR,
-        MDS_SETATTR64_REC= LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) | REINT_SETATTR,
-        OBD_CFG_REC      = LLOG_OP_MAGIC | 0x20000,
-        PTL_CFG_REC      = LLOG_OP_MAGIC | 0x30000, /* obsolete */
-        LLOG_GEN_REC     = LLOG_OP_MAGIC | 0x40000,
-        LLOG_JOIN_REC    = LLOG_OP_MAGIC | 0x50000,
-         /** changelog record type */
-        CHANGELOG_REC    = LLOG_OP_MAGIC | 0x60000,
-        LLOG_HDR_MAGIC   = LLOG_OP_MAGIC | 0x45539,
-        LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b,
+        LLOG_PAD_MAGIC     = LLOG_OP_MAGIC | 0x00000,
+        OST_SZ_REC         = LLOG_OP_MAGIC | 0x00f00,
+        OST_RAID1_REC      = LLOG_OP_MAGIC | 0x01000,
+        MDS_UNLINK_REC     = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK,
+        MDS_SETATTR_REC    = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR,
+        MDS_SETATTR64_REC  = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) | REINT_SETATTR,
+        OBD_CFG_REC        = LLOG_OP_MAGIC | 0x20000,
+        PTL_CFG_REC        = LLOG_OP_MAGIC | 0x30000, /* obsolete */
+        LLOG_GEN_REC       = LLOG_OP_MAGIC | 0x40000,
+        LLOG_JOIN_REC      = LLOG_OP_MAGIC | 0x50000,
+        CHANGELOG_REC      = LLOG_OP_MAGIC | 0x60000,
+        CHANGELOG_USER_REC = LLOG_OP_MAGIC | 0x70000,
+        LLOG_HDR_MAGIC     = LLOG_OP_MAGIC | 0x45539,
+        LLOG_LOGID_MAGIC   = LLOG_OP_MAGIC | 0x4553b,
 } llog_op_type;
 
 /*
@@ -2267,18 +2347,33 @@ enum changelog_rec_type {
         CL_LAST
 };
 
+/** Changelog entry type names. Must be defined in the same order as the
+ * \a changelog_rec_type enum.
+ */
+#define DECLARE_CHANGELOG_NAMES static const char *changelog_str[] =         \
+       {"MARK","CREAT","MKDIR","HLINK","SLINK","MKNOD","UNLNK","RMDIR",      \
+        "RNMFM","RNMTO","OPEN","CLOSE","IOCTL","TRUNC","SATTR","XATTR"}
+
 /** \a changelog_rec_type's that can't be masked */
-#define CL_MINMASK (1 << CL_MARK)
+#define CHANGELOG_MINMASK (1 << CL_MARK)
 /** bits covering all \a changelog_rec_type's */
-#define CL_ALLMASK 0XFFFF
+#define CHANGELOG_ALLMASK 0XFFFF
 /** default \a changelog_rec_type mask */
-#define CL_DEFMASK CL_ALLMASK
+#define CHANGELOG_DEFMASK CHANGELOG_ALLMASK
 
 /* per-record flags */
 #define CLF_VERSION  0x1000
 #define CLF_FLAGMASK 0x0FFF
 #define CLF_HSM      0x0001
 
+/* changelog llog name, needed by client replicators */
+#define CHANGELOG_CATALOG "changelog_catalog"
+
+struct changelog_setinfo {
+        __u64 cs_recno;
+        __u32 cs_id;
+} __attribute__((packed));
+
 /** changelog record */
 struct llog_changelog_rec {
         struct llog_rec_hdr   cr_hdr;
@@ -2299,6 +2394,16 @@ struct llog_changelog_rec {
         };
 } __attribute__((packed));
 
+#define CHANGELOG_USER_PREFIX "cl"
+
+struct llog_changelog_user_rec {
+        struct llog_rec_hdr   cur_hdr;
+        __u32                 cur_id;
+        __u32                 cur_padding;
+        __u64                 cur_endrec;
+        struct llog_rec_tail  cur_tail;
+} __attribute__((packed));
+
 struct llog_gen {
         __u64 mnt_cnt;
         __u64 conn_cnt;
@@ -2466,6 +2571,7 @@ extern void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
 extern void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
                                             int stripe_count);
 extern void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj);
+extern void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
 
 /* llog_swab.c */
 extern void lustre_swab_llogd_body (struct llogd_body *d);
@@ -2631,7 +2737,7 @@ struct lustre_capa_key {
 extern void lustre_swab_lustre_capa_key(struct lustre_capa_key *k);
 
 /** The link ea holds 1 \a link_ea_entry for each hardlink */
-#define LINK_EA_MAGIC 0x01EA0000
+#define LINK_EA_MAGIC 0x11EAF1DFUL
 struct link_ea_header {
         __u32 leh_magic;
         __u32 leh_reccount;