4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2016, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * Lustre disk format definitions.
33 * Author: Nathan Rutman <nathan.rutman@seagate.com>
36 #ifndef _UAPI_LUSTRE_DISK_H
37 #define _UAPI_LUSTRE_DISK_H
39 /** \defgroup disk disk
43 #include <linux/types.h>
44 #include <linux/uuid.h>
45 #include <linux/lnet/lnet-types.h> /* for lnet_nid_t */
47 /****************** on-disk files ********************/
49 #define MDT_LOGS_DIR "LOGS" /* COMPAT_146 */
50 #define MOUNT_CONFIGS_DIR "CONFIGS"
51 #define CONFIGS_FILE "mountdata"
52 /** Persistent mount data are stored on the disk in this file. */
53 #define MOUNT_DATA_FILE MOUNT_CONFIGS_DIR"/"CONFIGS_FILE
54 #define LAST_RCVD "last_rcvd"
55 #define REPLY_DATA "reply_data"
56 #define LOV_OBJID "lov_objid"
57 #define LOV_OBJSEQ "lov_objseq"
58 #define HEALTH_CHECK "health_check"
59 #define CAPA_KEYS "capa_keys"
60 #define CHANGELOG_USERS "changelog_users"
61 #define MGS_NIDTBL_DIR "NIDTBL_VERSIONS"
62 #define QMT_DIR "quota_master"
63 #define QSD_DIR "quota_slave"
64 #define QSD_DIR_DT "quota_slave_dt"
65 #define QSD_DIR_MD "quota_slave_md"
66 #define HSM_ACTIONS "hsm_actions"
67 #define LFSCK_DIR "LFSCK"
68 #define LFSCK_BOOKMARK "lfsck_bookmark"
69 #define LFSCK_LAYOUT "lfsck_layout"
70 #define LFSCK_NAMESPACE "lfsck_namespace"
71 #define REMOTE_PARENT_DIR "REMOTE_PARENT_DIR"
72 #define INDEX_BACKUP_DIR "index_backup"
73 #define MDT_ORPHAN_DIR "PENDING"
75 /* On-disk configuration file. In host-endian order. */
76 struct lustre_disk_data {
78 __u32 ldd_feature_compat; /* compatible feature flags */
79 __u32 ldd_feature_rocompat; /* read-only compatible feature flags */
80 __u32 ldd_feature_incompat; /* incompatible feature flags */
82 __u32 ldd_config_ver; /* config rewrite count - not used */
83 __u32 ldd_flags; /* LDD_SV_TYPE */
84 __u32 ldd_svindex; /* server index (0001), must match
87 __u32 ldd_mount_type; /* target fs type LDD_MT_* */
88 char ldd_fsname[64]; /* filesystem this server is part of,
91 char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/
92 __u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */
94 char ldd_userdata[1024 - 200]; /* arbitrary user string '200' */
95 __u8 ldd_padding[4096 - 1024]; /* 1024 */
96 char ldd_mount_opts[4096]; /* target fs mount opts '4096' */
97 char ldd_params[4096]; /* key=value pairs '8192' */
100 /****************** persistent mount data *********************/
102 #define LDD_F_SV_TYPE_MDT 0x0001
103 #define LDD_F_SV_TYPE_OST 0x0002
104 #define LDD_F_SV_TYPE_MGS 0x0004
105 #define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT | \
106 LDD_F_SV_TYPE_OST | \
108 #define LDD_F_SV_ALL 0x0008
109 /** need an index assignment */
110 #define LDD_F_NEED_INDEX 0x0010
111 /** never registered */
112 #define LDD_F_VIRGIN 0x0020
113 /** update the config logs for this server */
114 #define LDD_F_UPDATE 0x0040
115 /** rewrite the LDD */
116 #define LDD_F_REWRITE_LDD 0x0080
117 /** regenerate config logs for this fs or server */
118 #define LDD_F_WRITECONF 0x0100
120 /*#define LDD_F_UPGRADE14 0x0200 deprecated since 1.8 */
121 /** process as lctl conf_param */
122 #define LDD_F_PARAM 0x0400
123 /** all nodes are specified as service nodes */
124 #define LDD_F_NO_PRIMNODE 0x1000
125 /** IR enable flag */
126 #define LDD_F_IR_CAPABLE 0x2000
127 /** the MGS refused to register the target. */
128 #define LDD_F_ERROR 0x4000
129 /** process at lctl conf_param */
130 #define LDD_F_PARAM2 0x8000
131 /** the target shouldn't use local logs */
132 #define LDD_F_NO_LOCAL_LOGS 0x10000
134 #define LDD_MAGIC 0x1dd00001
136 #define XATTR_TARGET_RENAME "trusted.rename_tgt"
138 enum ldd_mount_type {
147 /****************** last_rcvd file *********************/
149 #define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
150 #define LR_SERVER_SIZE 512
151 #define LR_CLIENT_START 8192
152 #define LR_CLIENT_SIZE 128
153 #if LR_CLIENT_START < LR_SERVER_SIZE
154 #error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
158 * Data stored per server at the head of the last_rcvd file. In le32 order.
160 struct lr_server_data {
161 __u8 lsd_uuid[40]; /* server UUID */
162 __u64 lsd_last_transno; /* last completed transaction ID */
163 __u64 lsd_compat14; /* reserved - compat with old last_rcvd */
164 __u64 lsd_mount_count; /* incarnation number */
165 __u32 lsd_feature_compat; /* compatible feature flags */
166 __u32 lsd_feature_rocompat;/* read-only compatible feature flags */
167 __u32 lsd_feature_incompat;/* incompatible feature flags */
168 __u32 lsd_server_size; /* size of server data area */
169 __u32 lsd_client_start; /* start of per-client data area */
170 __u16 lsd_client_size; /* size of per-client data area */
171 __u16 lsd_subdir_count; /* number of subdirectories for objects */
172 __u64 lsd_catalog_oid; /* recovery catalog object id */
173 __u32 lsd_catalog_ogen; /* recovery catalog inode generation */
174 __u8 lsd_peeruuid[40]; /* UUID of MDS associated with this OST */
175 __u32 lsd_osd_index; /* index number of OST in LOV */
176 __u32 lsd_padding1; /* was lsd_mdt_index, unused in 2.4.0 */
177 __u32 lsd_start_epoch; /* VBR: start epoch from last boot */
178 /** transaction values since lsd_trans_table_time */
179 __u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
180 /** start point of transno table below */
181 __u32 lsd_trans_table_time; /* time of first slot in table above */
182 __u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
183 __u8 lsd_padding[LR_SERVER_SIZE - 288];
186 /* Data stored per client in the last_rcvd file. In le32 order. */
187 struct lsd_client_data {
188 __u8 lcd_uuid[40]; /* client UUID */
189 __u64 lcd_last_transno; /* last completed transaction ID */
190 __u64 lcd_last_xid; /* xid for the last transaction */
191 __u32 lcd_last_result; /* result from last RPC */
192 __u32 lcd_last_data; /* per-op data (disposition for
195 /* for MDS_CLOSE requests */
196 __u64 lcd_last_close_transno; /* last completed transaction ID */
197 __u64 lcd_last_close_xid; /* xid for the last transaction */
198 __u32 lcd_last_close_result; /* result from last RPC */
199 __u32 lcd_last_close_data; /* per-op data */
200 /* VBR: last versions */
201 __u64 lcd_pre_versions[4];
202 __u32 lcd_last_epoch;
203 /* generation counter of client slot in last_rcvd */
204 __u32 lcd_generation;
205 __u8 lcd_padding[LR_CLIENT_SIZE - 128];
208 /* Data stored in each slot of the reply_data file.
210 * The lrd_client_gen field is assigned with lcd_generation value
211 * to allow identify which client the reply data belongs to.
213 struct lsd_reply_data_v1 {
214 __u64 lrd_transno; /* transaction number */
215 __u64 lrd_xid; /* transmission id */
216 __u64 lrd_data; /* per-operation data */
217 __u32 lrd_result; /* request result */
218 __u32 lrd_client_gen; /* client generation */
221 struct lsd_reply_data_v2 {
222 __u64 lrd_transno; /* transaction number */
223 __u64 lrd_xid; /* transmission id */
224 __u64 lrd_data; /* per-operation data */
225 __u32 lrd_result; /* request result */
226 __u32 lrd_client_gen; /* client generation */
227 __u32 lrd_batch_idx; /* sub request index in the batched RPC */
228 __u32 lrd_padding[7]; /* unused fields, total size is 8X __u64 */
231 #define lsd_reply_data lsd_reply_data_v2
233 /* Header of the reply_data file */
234 #define LRH_MAGIC_V1 0xbdabda01
235 #define LRH_MAGIC_V2 0xbdabda02
236 #define LRH_MAGIC LRH_MAGIC_V2
238 /* Don't change the header size for compatibility. */
239 struct lsd_reply_header {
241 __u32 lrh_header_size;
242 __u32 lrh_reply_size;
243 __u8 lrh_pad[sizeof(struct lsd_reply_data_v1) - 12];
246 /****************** nodemap *********************/
248 enum nodemap_idx_type {
249 NODEMAP_EMPTY_IDX = 0, /* index created with blank record */
250 NODEMAP_CLUSTER_IDX = 1, /* a nodemap cluster of nodes */
251 NODEMAP_RANGE_IDX = 2, /* nid range assigned to a nm cluster */
252 NODEMAP_UIDMAP_IDX = 3, /* uid map assigned to a nm cluster */
253 NODEMAP_GIDMAP_IDX = 4, /* gid map assigned to a nm cluster */
254 NODEMAP_PROJIDMAP_IDX = 5, /* projid map assigned to nm cluster */
255 NODEMAP_GLOBAL_IDX = 15, /* stores nodemap activation status */
258 /* lu_nodemap flags */
260 NM_FL_ALLOW_ROOT_ACCESS = 0x1,
261 NM_FL_TRUST_CLIENT_IDS = 0x2,
262 NM_FL_DENY_UNKNOWN = 0x4,
264 NM_FL_MAP_GID = 0x10,
265 NM_FL_ENABLE_AUDIT = 0x20,
266 NM_FL_FORBID_ENCRYPT = 0x40,
267 NM_FL_MAP_PROJID = 0x80,
271 NM_FL2_READONLY_MOUNT = 0x1,
274 /* Nodemap records, uses 32 byte record length.
275 * New nodemap config records can be added into NODEMAP_CLUSTER_IDX
276 * with a new nk_cluster_subid value, as long as the records are
277 * kept at 32 bytes in size. New global config records can be added
278 * into NODEMAP_GLOBAL_IDX with a new nk_global_subid. This avoids
279 * breaking compatibility. Do not change the record size. If a
280 * new ID type or range is needed, a new IDX type should be used.
282 struct nodemap_cluster_rec {
283 char ncr_name[LUSTRE_NODEMAP_NAME_LENGTH + 1];
284 enum nm_flag_bits ncr_flags:8;
285 enum nm_flag2_bits ncr_flags2:8;
287 __u32 ncr_squash_projid;
288 __u32 ncr_squash_uid;
289 __u32 ncr_squash_gid;
292 /* lnet_nid_t is 8 bytes */
293 struct nodemap_range_rec {
294 lnet_nid_t nrr_start_nid;
295 lnet_nid_t nrr_end_nid;
300 struct nodemap_id_rec {
308 struct nodemap_global_rec {
318 struct nodemap_cluster_roles_rec {
319 __u64 ncrr_roles; /* enum nodemap_rbac_roles */
326 struct nodemap_cluster_rec ncr;
327 struct nodemap_range_rec nrr;
328 struct nodemap_id_rec nir;
329 struct nodemap_global_rec ngr;
330 struct nodemap_cluster_roles_rec ncrr;
333 /* sub-keys for records of type NODEMAP_CLUSTER_IDX */
334 enum nodemap_cluster_rec_subid {
335 NODEMAP_CLUSTER_REC = 0, /* nodemap_cluster_rec */
336 NODEMAP_CLUSTER_ROLES = 1, /* nodemap_cluster_roles_rec */
339 /* first 4 bits of the nodemap_id is the index type */
343 __u32 nk_cluster_subid;
350 #define NM_TYPE_MASK 0x0FFFFFFF
351 #define NM_TYPE_SHIFT 28
353 /* file structure used for saving OI scrub bookmark state for restart */
354 #define OSD_OI_FID_OID_BITS_MAX 10
355 #define OSD_OI_FID_NR_MAX (1UL << OSD_OI_FID_OID_BITS_MAX)
356 #define SCRUB_OI_BITMAP_SIZE (OSD_OI_FID_NR_MAX >> 3)
358 #define SCRUB_MAGIC_V1 0x4C5FD252
359 #define SCRUB_MAGIC_V2 0x4C5FE253
362 /* OI files have been recreated, OI mappings should be re-inserted. */
363 SF_RECREATED = 0x0000000000000001ULL,
365 /* OI files are invalid, should be rebuild ASAP */
366 SF_INCONSISTENT = 0x0000000000000002ULL,
368 /* OI scrub is triggered automatically. */
369 SF_AUTO = 0x0000000000000004ULL,
371 /* The device is upgraded from 1.8 format. */
372 SF_UPGRADE = 0x0000000000000008ULL,
376 /* The scrub file is new created, for new MDT, upgrading from old disk,
377 * or re-creating the scrub file manually. */
380 /* The scrub is checking/repairing the OI files. */
383 /* The scrub checked/repaired the OI files successfully. */
386 /* The scrub failed to check/repair the OI files. */
389 /* The scrub is stopped manually, the OI files may be inconsistent. */
392 /* The scrub is paused automatically when umount. */
395 /* The scrub crashed during the scanning, should be restarted. */
400 /* Exit when fail. */
403 /* Check only without repairing. */
408 /* v6.2-rc5-72-g5e6a51787fef kernel APIs need type to be guid_t */
409 #define uuid_le guid_t
413 uuid_le sf_uuid; /* 128-bit uuid for volume */
414 __u64 sf_flags; /* see 'enum scrub_flags' */
415 __u32 sf_magic; /* SCRUB_MAGIC_V1/V2 */
416 __u16 sf_status; /* see 'enum scrub_status' */
417 __u16 sf_param; /* see 'enum scrub_param' */
418 __s64 sf_time_last_complete; /* wallclock of last scrub finish */
419 __s64 sf_time_latest_start; /* wallclock of last scrub run */
420 __s64 sf_time_last_checkpoint; /* wallclock of last checkpoint */
421 __u64 sf_pos_latest_start; /* OID of last scrub start */
422 __u64 sf_pos_last_checkpoint; /* OID of last scrub checkpoint */
423 __u64 sf_pos_first_inconsistent; /* OID first object to update */
424 __u64 sf_items_checked; /* number objects checked */
425 __u64 sf_items_updated; /* number objects updated */
426 __u64 sf_items_failed; /* number objects unrepairable */
427 __u64 sf_items_updated_prior; /* num objects fixed before scan */
428 __u64 sf_items_noscrub; /* number of objects skipped due to
429 * LDISKFS_STATE_LUSTRE_NOSCRUB */
430 __u64 sf_items_igif; /* number of IGIF(no FID) objects */
431 __u32 sf_run_time; /* scrub runtime in seconds */
432 __u32 sf_success_count; /* number of completed runs */
433 __u16 sf_oi_count; /* number of OI files */
434 __u16 sf_internal_flags; /* flags to keep after reset, see
435 * 'enum scrub_internal_flags' */
437 __u64 sf_reserved_2[16];
438 __u8 sf_oi_bitmap[SCRUB_OI_BITMAP_SIZE]; /* OI files recreated */
443 #endif /* _UAPI_LUSTRE_DISK_H */