1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/include/lustre_disk.h
38 * Lustre disk format definitions.
40 * Author: Nathan Rutman <nathan@clusterfs.com>
43 #ifndef _LUSTRE_DISK_H
44 #define _LUSTRE_DISK_H
46 #include <lnet/types.h>
48 /****************** on-disk files *********************/
50 #define MDT_LOGS_DIR "LOGS" /* COMPAT_146 */
51 #define MOUNT_CONFIGS_DIR "CONFIGS"
52 /* Persistent mount data are stored on the disk in this file. */
53 #define MOUNT_DATA_FILE MOUNT_CONFIGS_DIR"/mountdata"
54 #define LAST_RCVD "last_rcvd"
55 #define LOV_OBJID "lov_objid"
56 #define HEALTH_CHECK "health_check"
59 /****************** persistent mount data *********************/
61 #define LDD_F_SV_TYPE_MDT 0x0001
62 #define LDD_F_SV_TYPE_OST 0x0002
63 #define LDD_F_SV_TYPE_MGS 0x0004
64 #define LDD_F_NEED_INDEX 0x0010 /* need an index assignment */
65 #define LDD_F_VIRGIN 0x0020 /* never registered */
66 #define LDD_F_UPDATE 0x0040 /* update all related config logs */
67 #define LDD_F_REWRITE_LDD 0x0080 /* rewrite the LDD */
68 #define LDD_F_WRITECONF 0x0100 /* regenerate all logs for this fs */
69 #define LDD_F_UPGRADE14 0x0200 /* COMPAT_14 */
70 #define LDD_F_PARAM 0x0400 /* process as lctl conf_param */
81 static inline char *mt_str(enum ldd_mount_type mt)
83 static char *mount_type_string[] = {
90 return mount_type_string[mt];
93 #define LDD_INCOMPAT_SUPP 0
94 #define LDD_ROCOMPAT_SUPP 0
96 #define LDD_MAGIC 0x1dd00001
98 /* On-disk configuration file. In host-endian order. */
99 struct lustre_disk_data {
101 __u32 ldd_feature_compat; /* compatible feature flags */
102 __u32 ldd_feature_rocompat;/* read-only compatible feature flags */
103 __u32 ldd_feature_incompat;/* incompatible feature flags */
105 __u32 ldd_config_ver; /* config rewrite count - not used */
106 __u32 ldd_flags; /* LDD_SV_TYPE */
107 __u32 ldd_svindex; /* server index (0001), must match
109 __u32 ldd_mount_type; /* target fs type LDD_MT_* */
110 char ldd_fsname[64]; /* filesystem this server is part of */
111 char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/
112 __u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */
114 /*200*/ char ldd_userdata[1024 - 200]; /* arbitrary user string */
115 /*1024*/__u8 ldd_padding[4096 - 1024];
116 /*4096*/char ldd_mount_opts[4096]; /* target fs mount opts */
117 /*8192*/char ldd_params[4096]; /* key=value pairs */
120 #define IS_MDT(data) ((data)->ldd_flags & LDD_F_SV_TYPE_MDT)
121 #define IS_OST(data) ((data)->ldd_flags & LDD_F_SV_TYPE_OST)
122 #define IS_MGS(data) ((data)->ldd_flags & LDD_F_SV_TYPE_MGS)
123 #define MT_STR(data) mt_str((data)->ldd_mount_type)
125 /* Make the mdt/ost server obd name based on the filesystem name */
126 static inline int server_make_name(__u32 flags, __u16 index, char *fs,
129 if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
130 sprintf(name, "%.8s-%s%04x", fs,
131 (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
133 } else if (flags & LDD_F_SV_TYPE_MGS) {
134 sprintf(name, "MGS");
136 CERROR("unknown server type %#x\n", flags);
142 /* Get the index from the obd name */
143 int server_name2index(char *svname, __u32 *idx, char **endptr);
146 /****************** mount command *********************/
148 /* The lmd is only used internally by Lustre; mount simply passes
149 everything as string options */
151 #define LMD_MAGIC 0xbdacbd03
153 /* gleaned from the mount command - no persistent info here */
154 struct lustre_mount_data {
156 __u32 lmd_flags; /* lustre mount flags */
157 int lmd_mgs_failnodes; /* mgs failover node count */
158 int lmd_exclude_count;
159 int lmd_recovery_time_soft;
160 int lmd_recovery_time_hard;
161 char *lmd_dev; /* device name */
162 char *lmd_profile; /* client only */
163 char *lmd_opts; /* lustre mount options (as opposed to
164 _device_ mount options) */
165 __u32 *lmd_exclude; /* array of OSTs to ignore */
168 #define LMD_FLG_SERVER 0x0001 /* Mounting a server */
169 #define LMD_FLG_CLIENT 0x0002 /* Mounting a client */
170 #define LMD_FLG_ABORT_RECOV 0x0008 /* Abort recovery */
171 #define LMD_FLG_NOSVC 0x0010 /* Only start MGS/MGC for servers,
173 #define LMD_FLG_NOMGS 0x0020 /* Only start target for servers, reusing
174 existing MGS services */
176 #define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
179 /****************** last_rcvd file *********************/
181 #define LR_SERVER_SIZE 512
182 #define LR_CLIENT_START 8192
183 #define LR_CLIENT_SIZE 128
184 #if LR_CLIENT_START < LR_SERVER_SIZE
185 #error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
189 * This limit is arbitrary (131072 clients on x86), but it is convenient to use
190 * 2^n * CFS_PAGE_SIZE * 8 for the number of bits that fit an order-n allocation.
191 * If we need more than 131072 clients (order-2 allocation on x86) then this
192 * should become an array of single-page pointers that are allocated on demand.
194 #define LR_MAX_CLIENTS max(128 * 1024UL, CFS_PAGE_SIZE * 8)
195 /* version recovery */
196 #define LR_EPOCH_BITS 32
197 #define lr_epoch(a) ((a) >> LR_EPOCH_BITS)
200 #define OBD_COMPAT_OST 0x00000002 /* this is an OST (temporary) */
201 #define OBD_COMPAT_MDT 0x00000004 /* this is an MDT (temporary) */
203 /* interop flag to show server 20 was used */
204 #define OBD_COMPAT_20 0x00000008
206 #define OBD_ROCOMPAT_LOVOBJID 0x00000001 /* MDS handles LOV_OBJID file */
207 #define OBD_ROCOMPAT_CROW 0x00000002 /* OST will CROW create objects */
209 #define OBD_INCOMPAT_GROUPS 0x00000001 /* OST handles group subdirs */
210 #define OBD_INCOMPAT_OST 0x00000002 /* this is an OST */
211 #define OBD_INCOMPAT_MDT 0x00000004 /* this is an MDT */
212 #define OBD_INCOMPAT_COMMON_LR 0x00000008 /* common last_rvcd format */
213 #define OBD_INCOMPAT_FID 0x00000010 /* FID is enabled */
214 #define OBD_INCOMPAT_SOM 0x00000020 /* Size-On-MDS is enabled */
216 #define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
217 /* Data stored per server at the head of the last_rcvd file. In le32 order.
218 This should be common to filter_internal.h, lustre_mds.h */
219 struct lr_server_data {
220 __u8 lsd_uuid[40]; /* server UUID */
221 __u64 lsd_last_transno; /* last completed transaction ID */
222 __u64 lsd_compat14; /* reserved - compat with old last_rcvd */
223 __u64 lsd_mount_count; /* incarnation number */
224 __u32 lsd_feature_compat; /* compatible feature flags */
225 __u32 lsd_feature_rocompat;/* read-only compatible feature flags */
226 __u32 lsd_feature_incompat;/* incompatible feature flags */
227 __u32 lsd_server_size; /* size of server data area */
228 __u32 lsd_client_start; /* start of per-client data area */
229 __u16 lsd_client_size; /* size of per-client data area */
230 __u16 lsd_subdir_count; /* number of subdirectories for objects */
231 __u64 lsd_catalog_oid; /* recovery catalog object id */
232 __u32 lsd_catalog_ogen; /* recovery catalog inode generation */
233 __u8 lsd_peeruuid[40]; /* UUID of MDS associated with this OST */
234 __u32 lsd_ost_index; /* index number of OST in LOV */
235 __u32 lsd_mdt_index; /* index number of MDT in LMV */
236 __u32 lsd_start_epoch; /* VBR: start epoch from last boot */
237 /** transaction values since lsd_trans_table_time */
238 __u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
239 /** start point of transno table below */
240 __u32 lsd_trans_table_time; /* time of first slot in table above */
241 __u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
242 __u8 lsd_padding[LR_SERVER_SIZE - 288];
245 /* Data stored per client in the last_rcvd file. In le32 order. */
246 struct lsd_client_data {
247 __u8 lcd_uuid[40]; /* client UUID */
248 __u64 lcd_last_transno; /* last completed transaction ID */
249 __u64 lcd_last_xid; /* xid for the last transaction */
250 __u32 lcd_last_result; /* result from last RPC */
251 __u32 lcd_last_data; /* per-op data (disposition for open &c.) */
252 /* for MDS_CLOSE requests */
253 __u64 lcd_last_close_transno; /* last completed transaction ID */
254 __u64 lcd_last_close_xid; /* xid for the last transaction */
255 __u32 lcd_last_close_result; /* result from last RPC */
256 __u32 lcd_last_close_data; /* per-op data */
257 /* VBR: last versions */
258 __u64 lcd_pre_versions[4];
259 __u32 lcd_last_epoch;
260 /** orphans handling for delayed export rely on that */
261 __u32 lcd_first_epoch;
262 __u8 lcd_padding[LR_CLIENT_SIZE - 128];
265 /* bug20354: the lcd_uuid for export of clients may be wrong */
266 static inline void check_lcd(char *obd_name, int index,
267 struct lsd_client_data *lcd)
269 int length = sizeof(lcd->lcd_uuid);
270 if (strnlen((const char *)lcd->lcd_uuid, length) == length) {
271 lcd->lcd_uuid[length - 1] = '\0';
273 LCONSOLE_ERROR("the client UUID (%s) on %s for exports"
274 "stored in last_rcvd(index = %d) is bad!\n",
275 lcd->lcd_uuid, obd_name, index);
279 static inline __u64 lsd_last_transno(struct lsd_client_data *lcd)
281 return le64_to_cpu(lcd->lcd_last_transno) >
282 le64_to_cpu(lcd->lcd_last_close_transno) ?
283 le64_to_cpu(lcd->lcd_last_transno) :
284 le64_to_cpu(lcd->lcd_last_close_transno);
288 /****************** superblock additional info *********************/
291 struct lustre_sb_info {
293 struct obd_device *lsi_mgc; /* mgc obd */
294 struct lustre_mount_data *lsi_lmd; /* mount command info */
295 struct lustre_disk_data *lsi_ldd; /* mount info on-disk */
296 struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */
297 struct vfsmount *lsi_srv_mnt; /* the one server mount */
298 atomic_t lsi_mounts; /* references to the srv_mnt */
299 #ifdef HAVE_NEW_BACKING_DEV_INFO
300 struct backing_dev_info bdi; /* Each client mountpoint needs own backing_dev_info */
304 #define LSI_SERVER 0x00000001
305 #define LSI_UMOUNT_FORCE 0x00000010
306 #define LSI_UMOUNT_FAILOVER 0x00000020
308 #define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info))
309 #define s2lsi_nocast(sb) ((sb)->s_fs_info)
310 #define get_profile_name(sb) (s2lsi(sb)->lsi_lmd->lmd_profile)
312 #endif /* __KERNEL__ */
314 /****************** mount lookup info *********************/
316 struct lustre_mount_info {
318 struct super_block *lmi_sb;
319 struct vfsmount *lmi_mnt;
320 struct list_head lmi_list_chain;
323 /****************** prototypes *********************/
326 #include <obd_class.h>
329 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb));
330 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
333 int lustre_common_put_super(struct super_block *sb);
334 int lustre_process_log(struct super_block *sb, char *logname,
335 struct config_llog_instance *cfg);
336 int lustre_end_log(struct super_block *sb, char *logname,
337 struct config_llog_instance *cfg);
338 struct lustre_mount_info *server_find_mount_locked(char *name);
339 struct lustre_mount_info *server_get_mount(char *name);
340 int server_put_mount(char *name, struct vfsmount *mnt);
341 int server_register_target(struct super_block *sb);
342 struct mgs_target_info;
343 int server_mti_print(char *title, struct mgs_target_info *mti);
346 int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id);
350 #endif // _LUSTRE_DISK_H