X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Flustre%2Flustre_idl.h;h=bc3b762d087ad5bec254b43fe3563b5681164154;hp=ea1814c453e78af26b4c1a11db5574f0cb241b81;hb=12a130c71b88351bc361d1f741cad0a4c1fbb257;hpb=427ced3e5f49dae928bc8ef1dbb0c5ef620c3fd6;ds=sidebyside diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index ea1814c..bc3b762d 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -236,12 +236,93 @@ enum { }; /** + * Different FID Format + * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0 + * + * FID: + * File IDentifier generated by client from range allocated by the seq service. + * First 0x400 sequences [2^33, 2^33 + 0x400] are reserved for system use. Note + * that on ldiskfs MDTs that IGIF FIDs can use inode numbers starting at 12, + * but this is in the IGIF SEQ rangeand does not conflict with assigned FIDs. + * + * IGIF: + * Inode and Generation In FID, a surrogate FID used to globally identify an + * existing object on OLD formatted MDT file system. This would only be used on + * MDT0 in a DNE filesystem, because there are not expected to be any OLD + * formatted DNE filesystems. Belongs to a sequence in [12, 2^32 - 1] range, + * where sequence number is inode number, and inode generation is used as OID. + * NOTE: This assumes no more than 2^32-1 inodes exist in the MDT filesystem, + * which is the maximum possible for an ldiskfs backend. NOTE: This assumes + * that the reserved ext3/ext4/ldiskfs inode numbers [0-11] are never visible + * to clients, which has always been true. + * + * IDIF: + * Object ID in FID, a surrogate FID used to globally identify an existing + * object on OLD formatted OST file system. Belongs to a sequence in + * [2^32, 2^33 - 1]. Sequence number is calculated as: + * 1 << 32 | (ost_index << 16) | ((objid >> 32) & 0xffff) + * that is, SEQ consists of 16-bit OST index, and higher 16 bits of object ID. + * The generation of unique SEQ values per OST allows the IDIF FIDs to be + * identified in the FLD correctly. The OID field is calculated as: + * objid & 0xffffffff + * that is, it consists of lower 32 bits of object ID. NOTE This assumes that + * no more than 2^48-1 objects have ever been created on an OST, and that no + * more than 65535 OSTs are in use. Both are very reasonable assumptions (can + * uniquely map all objects on an OST that created 1M objects per second for 9 + * years, or combinations thereof). + * + * OST_MDT0: + * Surrogate FID used to identify an existing object on OLD formatted OST + * filesystem. Belongs to the reserved sequence 0, and is used internally prior + * to the introduction of FID-on-OST, at which point IDIF will be used to + * identify objects as residing on a specific OST. + * + * LLOG: + * For Lustre Log objects the object sequence 1 is used. This is compatible with + * both OLD and NEW.1 namespaces, as this SEQ number is in the ext3/ldiskfs + * reserved inode range and does not conflict with IGIF sequence numbers. + * + * ECHO: + * For testing OST IO performance the object sequence 2 is used. This is + * compatible with both OLD and NEW.1 namespaces, as this SEQ number is in the + * ext3/ldiskfs reserved inode range and does not conflict with IGIF sequence + * numbers. + * + * OST_MDT1 .. OST_MAX: + * For testing with multiple MDTs the object sequence 3 through 9 is used, + * allowing direct mapping of MDTs 1 through 7 respectively, for a total of 8 + * MDTs including OST_MDT0. This matches the legacy CMD project "group" + * mappings. However, this SEQ range is only for testing prior to any production + * DNE release, as the objects in this range conflict across all OSTs, as the + * OST index is not part of the FID. + * + * + * For compatibility with existing OLD OST network protocol structures, the FID + * must map onto the o_id and o_gr in a manner that ensures existing objects are + * identified consistently for IO, as well as onto the lock namespace to ensure + * both IDIFs map onto the same objects for IO as well as resources in the DLM. + * + * DLM OLD OBIF/IDIF: + * resource[] = {o_id, o_seq, 0, 0}; // o_seq == 0 for production releases + * + * DLM NEW.1 FID (this is the same for both the MDT and OST): + * resource[] = {SEQ, OID, VER, HASH}; + * + * Note that for mapping IDIF values to DLM resource names the o_id may be + * larger than the 2^33 reserved sequence numbers for IDIF, so it is possible + * for the o_id numbers to overlap FID SEQ numbers in the resource. However, in + * all production releases the OLD o_seq field is always zero, and all valid FID + * OID values are non-zero, so the lock resources will not collide. + * + * For objects within the IDIF range, group extraction (non-CMD) will be: + * o_id = (fid->f_seq & 0x7fff) << 16 | fid->f_oid; + * o_seq = 0; // formerly group number + */ + +/** * Note that reserved SEQ numbers below 12 will conflict with ldiskfs * inodes in the IGIF namespace, so these reserved SEQ numbers can be * used for other purposes and not risk collisions with existing inodes. - * - * Different FID Format - * http://arch.lustre.org/index.php?title=Interoperability_fids_zfs#NEW.0 */ enum fid_seq { FID_SEQ_OST_MDT0 = 0, @@ -962,9 +1043,14 @@ enum obdo_flags { * depending on the case (replay uses ready striping, non-replay req uses * hints), so MDT replaces magic with appropriate one and now LOD can * easily understand what's inside -bzzz + * + * those *_DEF magics are only used on server side internally, they + * won't be put on wire or disk. */ -#define LOV_MAGIC_V1_DEF 0x0CD10BD0 -#define LOV_MAGIC_V3_DEF 0x0CD30BD0 +#define LOV_MAGIC_DEF 0x10000000 +#define LOV_MAGIC_V1_DEF (LOV_MAGIC_DEF | LOV_MAGIC_V1) +#define LOV_MAGIC_V3_DEF (LOV_MAGIC_DEF | LOV_MAGIC_V3) +#define LOV_MAGIC_COMP_V1_DEF (LOV_MAGIC_DEF | LOV_MAGIC_COMP_V1) #define lov_pattern(pattern) (pattern & ~LOV_PATTERN_F_MASK) #define lov_pattern_flags(pattern) (pattern & LOV_PATTERN_F_MASK) @@ -2756,8 +2842,7 @@ struct obdo { struct lustre_handle o_handle; /* brw: lock handle to prolong * locks */ struct llog_cookie o_lcookie; /* destroy: unlink cookie from - * MDS, obsolete in 2.8, reused - * in OSP */ + * MDS, obsolete in 2.8 */ __u32 o_uid_h; __u32 o_gid_h;