X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Finclude%2Flustre_disk.h;h=cb3835eac2312b4ab590ae6fc1cc665ea705bcf9;hb=09bf54263496fed984c4f150b0a3bafeb548d3f7;hp=df9fdddae6be0bc6a6af3d7753cdbf6fa7e24ebb;hpb=2c74bfcb7a06addb42e79c1f561afb0acfaaa7d0;p=fs%2Flustre-release.git diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index df9fddd..cb3835e 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Whamcloud, Inc. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -43,6 +43,11 @@ #ifndef _LUSTRE_DISK_H #define _LUSTRE_DISK_H +/** \defgroup disk disk + * + * @{ + */ + #include #include @@ -58,23 +63,28 @@ #define HEALTH_CHECK "health_check" #define CAPA_KEYS "capa_keys" #define CHANGELOG_USERS "changelog_users" - +#define MGS_NIDTBL_DIR "NIDTBL_VERSIONS" +#define QMT_DIR "quota_master" +#define QSD_DIR "quota_slave" /****************** persistent mount data *********************/ #define LDD_F_SV_TYPE_MDT 0x0001 #define LDD_F_SV_TYPE_OST 0x0002 #define LDD_F_SV_TYPE_MGS 0x0004 +#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT | \ + LDD_F_SV_TYPE_OST | \ + LDD_F_SV_TYPE_MGS) #define LDD_F_SV_ALL 0x0008 /** need an index assignment */ #define LDD_F_NEED_INDEX 0x0010 /** never registered */ #define LDD_F_VIRGIN 0x0020 -/** update the config logs for this server*/ +/** update the config logs for this server */ #define LDD_F_UPDATE 0x0040 /** rewrite the LDD */ #define LDD_F_REWRITE_LDD 0x0080 -/** regenerate all logs for this fs */ +/** regenerate config logs for this fs or server */ #define LDD_F_WRITECONF 0x0100 /** COMPAT_14 */ #define LDD_F_UPGRADE14 0x0200 @@ -82,14 +92,29 @@ #define LDD_F_PARAM 0x0400 /** backend fs make use of IAM directory format. */ #define LDD_F_IAM_DIR 0x0800 +/** all nodes are specified as service nodes */ +#define LDD_F_NO_PRIMNODE 0x1000 +/** IR enable flag */ +#define LDD_F_IR_CAPABLE 0x2000 +/** the MGS refused to register the target. */ +#define LDD_F_ERROR 0x4000 + +/* opc for target register */ +#define LDD_F_OPC_REG 0x10000000 +#define LDD_F_OPC_UNREG 0x20000000 +#define LDD_F_OPC_READY 0x40000000 +#define LDD_F_OPC_MASK 0xf0000000 + +#define LDD_F_ONDISK_MASK (LDD_F_SV_TYPE_MASK | LDD_F_IAM_DIR) enum ldd_mount_type { - LDD_MT_EXT3 = 0, - LDD_MT_LDISKFS, - LDD_MT_SMFS, - LDD_MT_REISERFS, - LDD_MT_LDISKFS2, - LDD_MT_LAST + LDD_MT_EXT3 = 0, + LDD_MT_LDISKFS, + LDD_MT_SMFS, + LDD_MT_REISERFS, + LDD_MT_LDISKFS2, + LDD_MT_ZFS, + LDD_MT_LAST }; static inline char *mt_str(enum ldd_mount_type mt) @@ -99,11 +124,25 @@ static inline char *mt_str(enum ldd_mount_type mt) "ldiskfs", "smfs", "reiserfs", - "ldiskfs2" + "ldiskfs2", + "zfs", }; return mount_type_string[mt]; } +static inline char *mt_type(enum ldd_mount_type mt) +{ + static char *mount_type_string[] = { + "osd-ldiskfs", + "osd-ldiskfs", + "osd-smfs", + "osd-reiserfs", + "osd-ldiskfs", + "osd-zfs", + }; + return mount_type_string[mt]; +} + #define LDD_INCOMPAT_SUPP 0 #define LDD_ROCOMPAT_SUPP 0 @@ -121,7 +160,8 @@ struct lustre_disk_data { __u32 ldd_svindex; /* server index (0001), must match svname */ __u32 ldd_mount_type; /* target fs type LDD_MT_* */ - char ldd_fsname[64]; /* filesystem this server is part of */ + char ldd_fsname[64]; /* filesystem this server is part of, + MTI_NAME_MAXLEN */ char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/ __u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */ @@ -171,6 +211,8 @@ struct lustre_mount_data { __u32 lmd_flags; /* lustre mount flags */ int lmd_mgs_failnodes; /* mgs failover node count */ int lmd_exclude_count; + int lmd_recovery_time_soft; + int lmd_recovery_time_hard; char *lmd_dev; /* device name */ char *lmd_profile; /* client only */ char *lmd_mgssec; /* sptlrpc flavor to mgs */ @@ -186,12 +228,21 @@ struct lustre_mount_data { no other services */ #define LMD_FLG_NOMGS 0x0020 /* Only start target for servers, reusing existing MGS services */ +#define LMD_FLG_WRITECONF 0x0040 /* Rewrite config log */ +#define LMD_FLG_NOIR 0x0080 /* NO imperative recovery */ +#define LMD_FLG_NOSCRUB 0x0100 /* Do not trigger scrub automatically */ #define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) /****************** last_rcvd file *********************/ +/** version recovery epoch */ +#define LR_EPOCH_BITS 32 +#define lr_epoch(a) ((a) >> LR_EPOCH_BITS) +#define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */ +#define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */ + #define LR_SERVER_SIZE 512 #define LR_CLIENT_START 8192 #define LR_CLIENT_SIZE 128 @@ -215,6 +266,8 @@ struct lustre_mount_data { #define OBD_COMPAT_OST 0x00000002 /** COMPAT_146: this is an MDT (temporary) */ #define OBD_COMPAT_MDT 0x00000004 +/** 2.0 server, interop flag to show server version is changed */ +#define OBD_COMPAT_20 0x00000008 /** MDS handles LOV_OBJID file */ #define OBD_ROCOMPAT_LOVOBJID 0x00000001 @@ -229,11 +282,20 @@ struct lustre_mount_data { #define OBD_INCOMPAT_COMMON_LR 0x00000008 /** FID is enabled */ #define OBD_INCOMPAT_FID 0x00000010 -/** - * lustre disk using iam format to store directory entries - */ -#define OBD_INCOMPAT_IAM_DIR 0x00000020 - +/** Size-on-MDS is enabled */ +#define OBD_INCOMPAT_SOM 0x00000020 +/** filesystem using iam format to store directory entries */ +#define OBD_INCOMPAT_IAM_DIR 0x00000040 +/** LMA attribute contains per-inode incompatible flags */ +#define OBD_INCOMPAT_LMA 0x00000080 +/** lmm_stripe_count has been shrunk from __u32 to __u16 and the remaining 16 + * bits are now used to store a generation. Once we start changing the layout + * and bumping the generation, old versions expecting a 32-bit lmm_stripe_count + * will be confused by interpreting stripe_count | gen << 16 as the actual + * stripe count */ +#define OBD_INCOMPAT_LMM_VER 0x00000100 +/** multiple OI files for MDT */ +#define OBD_INCOMPAT_MULTI_OI 0x00000200 /* Data stored per server at the head of the last_rcvd file. In le32 order. This should be common to filter_internal.h, lustre_mds.h */ @@ -254,7 +316,13 @@ struct lr_server_data { __u8 lsd_peeruuid[40]; /* UUID of MDS associated with this OST */ __u32 lsd_ost_index; /* index number of OST in LOV */ __u32 lsd_mdt_index; /* index number of MDT in LMV */ - __u8 lsd_padding[LR_SERVER_SIZE - 148]; + __u32 lsd_start_epoch; /* VBR: start epoch from last boot */ + /** transaction values since lsd_trans_table_time */ + __u64 lsd_trans_table[LR_EXPIRE_INTERVALS]; + /** start point of transno table below */ + __u32 lsd_trans_table_time; /* time of first slot in table above */ + __u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */ + __u8 lsd_padding[LR_SERVER_SIZE - 288]; }; /* Data stored per client in the last_rcvd file. In le32 order. */ @@ -269,9 +337,134 @@ struct lsd_client_data { __u64 lcd_last_close_xid; /* xid for the last transaction */ __u32 lcd_last_close_result; /* result from last RPC */ __u32 lcd_last_close_data; /* per-op data */ - __u8 lcd_padding[LR_CLIENT_SIZE - 88]; + /* VBR: last versions */ + __u64 lcd_pre_versions[4]; + __u32 lcd_last_epoch; + /** orphans handling for delayed export rely on that */ + __u32 lcd_first_epoch; + __u8 lcd_padding[LR_CLIENT_SIZE - 128]; }; +/* bug20354: the lcd_uuid for export of clients may be wrong */ +static inline void check_lcd(char *obd_name, int index, + struct lsd_client_data *lcd) +{ + int length = sizeof(lcd->lcd_uuid); + if (strnlen((char*)lcd->lcd_uuid, length) == length) { + lcd->lcd_uuid[length - 1] = '\0'; + + LCONSOLE_ERROR("the client UUID (%s) on %s for exports" + "stored in last_rcvd(index = %d) is bad!\n", + lcd->lcd_uuid, obd_name, index); + } +} + +/* last_rcvd handling */ +static inline void lsd_le_to_cpu(struct lr_server_data *buf, + struct lr_server_data *lsd) +{ + int i; + memcpy(lsd->lsd_uuid, buf->lsd_uuid, sizeof (lsd->lsd_uuid)); + lsd->lsd_last_transno = le64_to_cpu(buf->lsd_last_transno); + lsd->lsd_compat14 = le64_to_cpu(buf->lsd_compat14); + lsd->lsd_mount_count = le64_to_cpu(buf->lsd_mount_count); + lsd->lsd_feature_compat = le32_to_cpu(buf->lsd_feature_compat); + lsd->lsd_feature_rocompat = le32_to_cpu(buf->lsd_feature_rocompat); + lsd->lsd_feature_incompat = le32_to_cpu(buf->lsd_feature_incompat); + lsd->lsd_server_size = le32_to_cpu(buf->lsd_server_size); + lsd->lsd_client_start = le32_to_cpu(buf->lsd_client_start); + lsd->lsd_client_size = le16_to_cpu(buf->lsd_client_size); + lsd->lsd_subdir_count = le16_to_cpu(buf->lsd_subdir_count); + lsd->lsd_catalog_oid = le64_to_cpu(buf->lsd_catalog_oid); + lsd->lsd_catalog_ogen = le32_to_cpu(buf->lsd_catalog_ogen); + memcpy(lsd->lsd_peeruuid, buf->lsd_peeruuid, sizeof(lsd->lsd_peeruuid)); + lsd->lsd_ost_index = le32_to_cpu(buf->lsd_ost_index); + lsd->lsd_mdt_index = le32_to_cpu(buf->lsd_mdt_index); + lsd->lsd_start_epoch = le32_to_cpu(buf->lsd_start_epoch); + for (i = 0; i < LR_EXPIRE_INTERVALS; i++) + lsd->lsd_trans_table[i] = le64_to_cpu(buf->lsd_trans_table[i]); + lsd->lsd_trans_table_time = le32_to_cpu(buf->lsd_trans_table_time); + lsd->lsd_expire_intervals = le32_to_cpu(buf->lsd_expire_intervals); +} + +static inline void lsd_cpu_to_le(struct lr_server_data *lsd, + struct lr_server_data *buf) +{ + int i; + memcpy(buf->lsd_uuid, lsd->lsd_uuid, sizeof (buf->lsd_uuid)); + buf->lsd_last_transno = cpu_to_le64(lsd->lsd_last_transno); + buf->lsd_compat14 = cpu_to_le64(lsd->lsd_compat14); + buf->lsd_mount_count = cpu_to_le64(lsd->lsd_mount_count); + buf->lsd_feature_compat = cpu_to_le32(lsd->lsd_feature_compat); + buf->lsd_feature_rocompat = cpu_to_le32(lsd->lsd_feature_rocompat); + buf->lsd_feature_incompat = cpu_to_le32(lsd->lsd_feature_incompat); + buf->lsd_server_size = cpu_to_le32(lsd->lsd_server_size); + buf->lsd_client_start = cpu_to_le32(lsd->lsd_client_start); + buf->lsd_client_size = cpu_to_le16(lsd->lsd_client_size); + buf->lsd_subdir_count = cpu_to_le16(lsd->lsd_subdir_count); + buf->lsd_catalog_oid = cpu_to_le64(lsd->lsd_catalog_oid); + buf->lsd_catalog_ogen = cpu_to_le32(lsd->lsd_catalog_ogen); + memcpy(buf->lsd_peeruuid, lsd->lsd_peeruuid, sizeof(buf->lsd_peeruuid)); + buf->lsd_ost_index = cpu_to_le32(lsd->lsd_ost_index); + buf->lsd_mdt_index = cpu_to_le32(lsd->lsd_mdt_index); + buf->lsd_start_epoch = cpu_to_le32(lsd->lsd_start_epoch); + for (i = 0; i < LR_EXPIRE_INTERVALS; i++) + buf->lsd_trans_table[i] = cpu_to_le64(lsd->lsd_trans_table[i]); + buf->lsd_trans_table_time = cpu_to_le32(lsd->lsd_trans_table_time); + buf->lsd_expire_intervals = cpu_to_le32(lsd->lsd_expire_intervals); +} + +static inline void lcd_le_to_cpu(struct lsd_client_data *buf, + struct lsd_client_data *lcd) +{ + memcpy(lcd->lcd_uuid, buf->lcd_uuid, sizeof (lcd->lcd_uuid)); + lcd->lcd_last_transno = le64_to_cpu(buf->lcd_last_transno); + lcd->lcd_last_xid = le64_to_cpu(buf->lcd_last_xid); + lcd->lcd_last_result = le32_to_cpu(buf->lcd_last_result); + lcd->lcd_last_data = le32_to_cpu(buf->lcd_last_data); + lcd->lcd_last_close_transno = le64_to_cpu(buf->lcd_last_close_transno); + lcd->lcd_last_close_xid = le64_to_cpu(buf->lcd_last_close_xid); + lcd->lcd_last_close_result = le32_to_cpu(buf->lcd_last_close_result); + lcd->lcd_last_close_data = le32_to_cpu(buf->lcd_last_close_data); + lcd->lcd_pre_versions[0] = le64_to_cpu(buf->lcd_pre_versions[0]); + lcd->lcd_pre_versions[1] = le64_to_cpu(buf->lcd_pre_versions[1]); + lcd->lcd_pre_versions[2] = le64_to_cpu(buf->lcd_pre_versions[2]); + lcd->lcd_pre_versions[3] = le64_to_cpu(buf->lcd_pre_versions[3]); + lcd->lcd_last_epoch = le32_to_cpu(buf->lcd_last_epoch); + lcd->lcd_first_epoch = le32_to_cpu(buf->lcd_first_epoch); +} + +static inline void lcd_cpu_to_le(struct lsd_client_data *lcd, + struct lsd_client_data *buf) +{ + memcpy(buf->lcd_uuid, lcd->lcd_uuid, sizeof (lcd->lcd_uuid)); + buf->lcd_last_transno = cpu_to_le64(lcd->lcd_last_transno); + buf->lcd_last_xid = cpu_to_le64(lcd->lcd_last_xid); + buf->lcd_last_result = cpu_to_le32(lcd->lcd_last_result); + buf->lcd_last_data = cpu_to_le32(lcd->lcd_last_data); + buf->lcd_last_close_transno = cpu_to_le64(lcd->lcd_last_close_transno); + buf->lcd_last_close_xid = cpu_to_le64(lcd->lcd_last_close_xid); + buf->lcd_last_close_result = cpu_to_le32(lcd->lcd_last_close_result); + buf->lcd_last_close_data = cpu_to_le32(lcd->lcd_last_close_data); + buf->lcd_pre_versions[0] = cpu_to_le64(lcd->lcd_pre_versions[0]); + buf->lcd_pre_versions[1] = cpu_to_le64(lcd->lcd_pre_versions[1]); + buf->lcd_pre_versions[2] = cpu_to_le64(lcd->lcd_pre_versions[2]); + buf->lcd_pre_versions[3] = cpu_to_le64(lcd->lcd_pre_versions[3]); + buf->lcd_last_epoch = cpu_to_le32(lcd->lcd_last_epoch); + buf->lcd_first_epoch = cpu_to_le32(lcd->lcd_first_epoch); +} + +static inline __u64 lcd_last_transno(struct lsd_client_data *lcd) +{ + return (lcd->lcd_last_transno > lcd->lcd_last_close_transno ? + lcd->lcd_last_transno : lcd->lcd_last_close_transno); +} + +static inline __u64 lcd_last_xid(struct lsd_client_data *lcd) +{ + return (lcd->lcd_last_xid > lcd->lcd_last_close_xid ? + lcd->lcd_last_xid : lcd->lcd_last_close_xid); +} /****************** superblock additional info *********************/ #ifdef __KERNEL__ @@ -285,44 +478,46 @@ struct lustre_sb_info { struct lustre_disk_data *lsi_ldd; /* mount info on-disk */ struct ll_sb_info *lsi_llsbi; /* add'l client sbi info */ struct vfsmount *lsi_srv_mnt; /* the one server mount */ - atomic_t lsi_mounts; /* references to the srv_mnt */ + cfs_atomic_t lsi_mounts; /* references to the srv_mnt */ + struct backing_dev_info lsi_bdi; /* each client mountpoint needs + own backing_dev_info */ }; #define LSI_SERVER 0x00000001 #define LSI_UMOUNT_FORCE 0x00000010 #define LSI_UMOUNT_FAILOVER 0x00000020 +#define LSI_BDI_INITIALIZED 0x00000040 +#define LSI_IR_CAPABLE 0x00000080 #define s2lsi(sb) ((struct lustre_sb_info *)((sb)->s_fs_info)) #define s2lsi_nocast(sb) ((sb)->s_fs_info) #define get_profile_name(sb) (s2lsi(sb)->lsi_lmd->lmd_profile) +#define get_mount_flags(sb) (s2lsi(sb)->lsi_lmd->lmd_flags) #endif /* __KERNEL__ */ /****************** mount lookup info *********************/ struct lustre_mount_info { - char *lmi_name; - struct super_block *lmi_sb; - struct vfsmount *lmi_mnt; - struct list_head lmi_list_chain; + char *lmi_name; + struct super_block *lmi_sb; + struct vfsmount *lmi_mnt; + cfs_list_t lmi_list_chain; }; /****************** prototypes *********************/ #ifdef __KERNEL__ -#include /* obd_mount.c */ -void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb)); +void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb, + struct vfsmount *mnt)); void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb)); int lustre_common_put_super(struct super_block *sb); -int lustre_process_log(struct super_block *sb, char *logname, - struct config_llog_instance *cfg); -int lustre_end_log(struct super_block *sb, char *logname, - struct config_llog_instance *cfg); +struct lustre_mount_info *server_find_mount_locked(const char *name); struct lustre_mount_info *server_get_mount(const char *name); struct lustre_mount_info *server_get_mount_2(const char *name); int server_put_mount(const char *name, struct vfsmount *mnt); @@ -330,10 +525,13 @@ int server_put_mount_2(const char *name, struct vfsmount *mnt); int server_register_target(struct super_block *sb); struct mgs_target_info; int server_mti_print(char *title, struct mgs_target_info *mti); +void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd); /* mgc_request.c */ -int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id); +int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type); #endif +/** @} disk */ + #endif // _LUSTRE_DISK_H