*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* Lustre disk format definitions.
*
* @{
*/
#include <linux/types.h>
+#include <linux/uuid.h>
+#include <linux/lnet/lnet-types.h> /* for lnet_nid_t */
/****************** on-disk files ********************/
#define INDEX_BACKUP_DIR "index_backup"
#define MDT_ORPHAN_DIR "PENDING"
+/* On-disk configuration file. In host-endian order. */
+struct lustre_disk_data {
+ __u32 ldd_magic;
+ __u32 ldd_feature_compat; /* compatible feature flags */
+ __u32 ldd_feature_rocompat; /* read-only compatible feature flags */
+ __u32 ldd_feature_incompat; /* incompatible feature flags */
+
+ __u32 ldd_config_ver; /* config rewrite count - not used */
+ __u32 ldd_flags; /* LDD_SV_TYPE */
+ __u32 ldd_svindex; /* server index (0001), must match
+ * svname
+ */
+ __u32 ldd_mount_type; /* target fs type LDD_MT_* */
+ char ldd_fsname[64]; /* filesystem this server is part of,
+ * MTI_NAME_MAXLEN
+ */
+ char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/
+ __u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */
+
+ char ldd_userdata[1024 - 200]; /* arbitrary user string '200' */
+ __u8 ldd_padding[4096 - 1024]; /* 1024 */
+ char ldd_mount_opts[4096]; /* target fs mount opts '4096' */
+ char ldd_params[4096]; /* key=value pairs '8192' */
+};
+
/****************** persistent mount data *********************/
#define LDD_F_SV_TYPE_MDT 0x0001
#define LDD_F_ERROR 0x4000
/** process at lctl conf_param */
#define LDD_F_PARAM2 0x8000
+/** the target shouldn't use local logs */
+#define LDD_F_NO_LOCAL_LOGS 0x10000
#define LDD_MAGIC 0x1dd00001
enum ldd_mount_type {
LDD_MT_EXT3 = 0,
- LDD_MT_LDISKFS,
- LDD_MT_SMFS,
- LDD_MT_REISERFS,
- LDD_MT_LDISKFS2,
- LDD_MT_ZFS,
+ LDD_MT_LDISKFS = 1,
+ LDD_MT_REISERFS = 3,
+ LDD_MT_LDISKFS2 = 4,
+ LDD_MT_ZFS = 5,
LDD_MT_LAST
};
* The lrd_client_gen field is assigned with lcd_generation value
* to allow identify which client the reply data belongs to.
*/
-struct lsd_reply_data {
+struct lsd_reply_data_v1 {
+ __u64 lrd_transno; /* transaction number */
+ __u64 lrd_xid; /* transmission id */
+ __u64 lrd_data; /* per-operation data */
+ __u32 lrd_result; /* request result */
+ __u32 lrd_client_gen; /* client generation */
+};
+
+struct lsd_reply_data_v2 {
__u64 lrd_transno; /* transaction number */
__u64 lrd_xid; /* transmission id */
__u64 lrd_data; /* per-operation data */
__u32 lrd_result; /* request result */
__u32 lrd_client_gen; /* client generation */
+ __u32 lrd_batch_idx; /* sub request index in the batched RPC */
+ __u32 lrd_padding[7]; /* unused fields, total size is 8X __u64 */
};
+#define lsd_reply_data lsd_reply_data_v2
+
/* Header of the reply_data file */
-#define LRH_MAGIC 0xbdabda01
+#define LRH_MAGIC_V1 0xbdabda01
+#define LRH_MAGIC_V2 0xbdabda02
+#define LRH_MAGIC LRH_MAGIC_V2
+
+/* Don't change the header size for compatibility. */
struct lsd_reply_header {
__u32 lrh_magic;
__u32 lrh_header_size;
__u32 lrh_reply_size;
- __u8 lrh_pad[sizeof(struct lsd_reply_data) - 12];
+ __u8 lrh_pad[sizeof(struct lsd_reply_data_v1) - 12];
+};
+
+/****************** nodemap *********************/
+
+enum nodemap_idx_type {
+ NODEMAP_EMPTY_IDX = 0, /* index created with blank record */
+ NODEMAP_CLUSTER_IDX = 1, /* a nodemap cluster of nodes */
+ NODEMAP_RANGE_IDX = 2, /* nid range assigned to a nm cluster */
+ NODEMAP_UIDMAP_IDX = 3, /* uid map assigned to a nm cluster */
+ NODEMAP_GIDMAP_IDX = 4, /* gid map assigned to a nm cluster */
+ NODEMAP_PROJIDMAP_IDX = 5, /* projid map assigned to nm cluster */
+ NODEMAP_GLOBAL_IDX = 15, /* stores nodemap activation status */
+};
+
+/* lu_nodemap flags */
+enum nm_flag_bits {
+ NM_FL_ALLOW_ROOT_ACCESS = 0x1,
+ NM_FL_TRUST_CLIENT_IDS = 0x2,
+ NM_FL_DENY_UNKNOWN = 0x4,
+ NM_FL_MAP_UID = 0x8,
+ NM_FL_MAP_GID = 0x10,
+ NM_FL_ENABLE_AUDIT = 0x20,
+ NM_FL_FORBID_ENCRYPT = 0x40,
+ NM_FL_MAP_PROJID = 0x80,
+};
+
+enum nm_flag2_bits {
+ NM_FL2_READONLY_MOUNT = 0x1,
+};
+
+/* Nodemap records, uses 32 byte record length.
+ * New nodemap config records can be added into NODEMAP_CLUSTER_IDX
+ * with a new nk_cluster_subid value, as long as the records are
+ * kept at 32 bytes in size. New global config records can be added
+ * into NODEMAP_GLOBAL_IDX with a new nk_global_subid. This avoids
+ * breaking compatibility. Do not change the record size. If a
+ * new ID type or range is needed, a new IDX type should be used.
+ */
+struct nodemap_cluster_rec {
+ char ncr_name[LUSTRE_NODEMAP_NAME_LENGTH + 1];
+ enum nm_flag_bits ncr_flags:8;
+ enum nm_flag2_bits ncr_flags2:8;
+ __u8 ncr_padding1;
+ __u32 ncr_squash_projid;
+ __u32 ncr_squash_uid;
+ __u32 ncr_squash_gid;
+};
+
+/* lnet_nid_t is 8 bytes */
+struct nodemap_range_rec {
+ lnet_nid_t nrr_start_nid;
+ lnet_nid_t nrr_end_nid;
+ __u64 nrr_padding1;
+ __u64 nrr_padding2;
+};
+
+struct nodemap_id_rec {
+ __u32 nir_id_fs;
+ __u32 nir_padding1;
+ __u64 nir_padding2;
+ __u64 nir_padding3;
+ __u64 nir_padding4;
+};
+
+struct nodemap_global_rec {
+ __u8 ngr_is_active;
+ __u8 ngr_padding1;
+ __u16 ngr_padding2;
+ __u32 ngr_padding3;
+ __u64 ngr_padding4;
+ __u64 ngr_padding5;
+ __u64 ngr_padding6;
+};
+
+struct nodemap_cluster_roles_rec {
+ __u64 ncrr_roles; /* enum nodemap_rbac_roles */
+ __u64 ncrr_unused1;
+ __u64 ncrr_unused2;
+ __u64 ncrr_unused3;
+};
+
+union nodemap_rec {
+ struct nodemap_cluster_rec ncr;
+ struct nodemap_range_rec nrr;
+ struct nodemap_id_rec nir;
+ struct nodemap_global_rec ngr;
+ struct nodemap_cluster_roles_rec ncrr;
+};
+
+/* sub-keys for records of type NODEMAP_CLUSTER_IDX */
+enum nodemap_cluster_rec_subid {
+ NODEMAP_CLUSTER_REC = 0, /* nodemap_cluster_rec */
+ NODEMAP_CLUSTER_ROLES = 1, /* nodemap_cluster_roles_rec */
+};
+
+/* first 4 bits of the nodemap_id is the index type */
+struct nodemap_key {
+ __u32 nk_nodemap_id;
+ union {
+ __u32 nk_cluster_subid;
+ __u32 nk_range_id;
+ __u32 nk_id_client;
+ __u32 nk_unused;
+ };
+};
+
+#define NM_TYPE_MASK 0x0FFFFFFF
+#define NM_TYPE_SHIFT 28
+
+/* file structure used for saving OI scrub bookmark state for restart */
+#define OSD_OI_FID_OID_BITS_MAX 10
+#define OSD_OI_FID_NR_MAX (1UL << OSD_OI_FID_OID_BITS_MAX)
+#define SCRUB_OI_BITMAP_SIZE (OSD_OI_FID_NR_MAX >> 3)
+
+#define SCRUB_MAGIC_V1 0x4C5FD252
+#define SCRUB_MAGIC_V2 0x4C5FE253
+
+enum scrub_flags {
+ /* OI files have been recreated, OI mappings should be re-inserted. */
+ SF_RECREATED = 0x0000000000000001ULL,
+
+ /* OI files are invalid, should be rebuild ASAP */
+ SF_INCONSISTENT = 0x0000000000000002ULL,
+
+ /* OI scrub is triggered automatically. */
+ SF_AUTO = 0x0000000000000004ULL,
+
+ /* The device is upgraded from 1.8 format. */
+ SF_UPGRADE = 0x0000000000000008ULL,
+};
+
+enum scrub_status {
+ /* The scrub file is new created, for new MDT, upgrading from old disk,
+ * or re-creating the scrub file manually. */
+ SS_INIT = 0,
+
+ /* The scrub is checking/repairing the OI files. */
+ SS_SCANNING = 1,
+
+ /* The scrub checked/repaired the OI files successfully. */
+ SS_COMPLETED = 2,
+
+ /* The scrub failed to check/repair the OI files. */
+ SS_FAILED = 3,
+
+ /* The scrub is stopped manually, the OI files may be inconsistent. */
+ SS_STOPPED = 4,
+
+ /* The scrub is paused automatically when umount. */
+ SS_PAUSED = 5,
+
+ /* The scrub crashed during the scanning, should be restarted. */
+ SS_CRASHED = 6,
+};
+
+enum scrub_param {
+ /* Exit when fail. */
+ SP_FAILOUT = 0x0001,
+
+ /* Check only without repairing. */
+ SP_DRYRUN = 0x0002,
+};
+
+#ifdef __KERNEL__
+/* v6.2-rc5-72-g5e6a51787fef kernel APIs need type to be guid_t */
+#define uuid_le guid_t
+#endif
+
+struct scrub_file {
+ uuid_le sf_uuid; /* 128-bit uuid for volume */
+ __u64 sf_flags; /* see 'enum scrub_flags' */
+ __u32 sf_magic; /* SCRUB_MAGIC_V1/V2 */
+ __u16 sf_status; /* see 'enum scrub_status' */
+ __u16 sf_param; /* see 'enum scrub_param' */
+ __s64 sf_time_last_complete; /* wallclock of last scrub finish */
+ __s64 sf_time_latest_start; /* wallclock of last scrub run */
+ __s64 sf_time_last_checkpoint; /* wallclock of last checkpoint */
+ __u64 sf_pos_latest_start; /* OID of last scrub start */
+ __u64 sf_pos_last_checkpoint; /* OID of last scrub checkpoint */
+ __u64 sf_pos_first_inconsistent; /* OID first object to update */
+ __u64 sf_items_checked; /* number objects checked */
+ __u64 sf_items_updated; /* number objects updated */
+ __u64 sf_items_failed; /* number objects unrepairable */
+ __u64 sf_items_updated_prior; /* num objects fixed before scan */
+ __u64 sf_items_noscrub; /* number of objects skipped due to
+ * LDISKFS_STATE_LUSTRE_NOSCRUB */
+ __u64 sf_items_igif; /* number of IGIF(no FID) objects */
+ __u32 sf_run_time; /* scrub runtime in seconds */
+ __u32 sf_success_count; /* number of completed runs */
+ __u16 sf_oi_count; /* number of OI files */
+ __u16 sf_internal_flags; /* flags to keep after reset, see
+ * 'enum scrub_internal_flags' */
+ __u32 sf_reserved_1;
+ __u64 sf_reserved_2[16];
+ __u8 sf_oi_bitmap[SCRUB_OI_BITMAP_SIZE]; /* OI files recreated */
};
/** @} disk */