Whamcloud - gitweb
LU-1182 quota: quota accounting library
[fs/lustre-release.git] / lustre / include / lustre_disk.h
index 6e0a0f6..b5bd305 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #ifndef _LUSTRE_DISK_H
 #define _LUSTRE_DISK_H
 
+/** \defgroup disk disk
+ *
+ * @{
+ */
+
 #include <libcfs/libcfs.h>
 #include <lnet/types.h>
 
 #define LOV_OBJID         "lov_objid"
 #define HEALTH_CHECK      "health_check"
 #define CAPA_KEYS         "capa_keys"
+#define CHANGELOG_USERS   "changelog_users"
+#define MGS_NIDTBL_DIR    "NIDTBL_VERSIONS"
+#define QMT_DIR           "quota_master"
+#define QSD_DIR           "quota_slave"
 
 /****************** persistent mount data *********************/
 
 #define LDD_F_SV_TYPE_MDT   0x0001
 #define LDD_F_SV_TYPE_OST   0x0002
 #define LDD_F_SV_TYPE_MGS   0x0004
+#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT  | \
+                            LDD_F_SV_TYPE_OST  | \
+                            LDD_F_SV_TYPE_MGS)
+#define LDD_F_SV_ALL        0x0008
 /** need an index assignment */
 #define LDD_F_NEED_INDEX    0x0010
 /** never registered */
 #define LDD_F_VIRGIN        0x0020
-/** update the config logs for this server*/
+/** update the config logs for this server */
 #define LDD_F_UPDATE        0x0040
 /** rewrite the LDD */
 #define LDD_F_REWRITE_LDD   0x0080
-/** regenerate all logs for this fs */
+/** regenerate config logs for this fs or server */
 #define LDD_F_WRITECONF     0x0100
 /** COMPAT_14 */
 #define LDD_F_UPGRADE14     0x0200
 #define LDD_F_PARAM         0x0400
 /** backend fs make use of IAM directory format. */
 #define LDD_F_IAM_DIR       0x0800
+/** all nodes are specified as service nodes */
+#define LDD_F_NO_PRIMNODE   0x1000
+/** IR enable flag */
+#define LDD_F_IR_CAPABLE    0x2000
+/** the MGS refused to register the target. */
+#define LDD_F_ERROR         0x4000
+
+/* opc for target register */
+#define LDD_F_OPC_REG   0x10000000
+#define LDD_F_OPC_UNREG 0x20000000
+#define LDD_F_OPC_READY 0x40000000
+#define LDD_F_OPC_MASK  0xf0000000
+
+#define LDD_F_ONDISK_MASK  (LDD_F_SV_TYPE_MASK | LDD_F_IAM_DIR)
 
 enum ldd_mount_type {
-        LDD_MT_EXT3 = 0,
-        LDD_MT_LDISKFS,
-        LDD_MT_SMFS,
-        LDD_MT_REISERFS,
-        LDD_MT_LDISKFS2,
-        LDD_MT_LAST
+       LDD_MT_EXT3 = 0,
+       LDD_MT_LDISKFS,
+       LDD_MT_SMFS,
+       LDD_MT_REISERFS,
+       LDD_MT_LDISKFS2,
+       LDD_MT_ZFS,
+       LDD_MT_LAST
 };
 
 static inline char *mt_str(enum ldd_mount_type mt)
@@ -115,10 +143,11 @@ struct lustre_disk_data {
 
         __u32      ldd_config_ver;      /* config rewrite count - not used */
         __u32      ldd_flags;           /* LDD_SV_TYPE */
-        __u32      ldd_svindex;         /* server index (0001), must match 
+        __u32      ldd_svindex;         /* server index (0001), must match
                                            svname */
         __u32      ldd_mount_type;      /* target fs type LDD_MT_* */
-        char       ldd_fsname[64];      /* filesystem this server is part of */
+        char       ldd_fsname[64];      /* filesystem this server is part of,
+                                           MTI_NAME_MAXLEN */
         char       ldd_svname[64];      /* this server's name (lustre-mdt0001)*/
         __u8       ldd_uuid[40];        /* server UUID (COMPAT_146) */
 
@@ -138,9 +167,10 @@ static inline int server_make_name(__u32 flags, __u16 index, char *fs,
                                    char *name)
 {
         if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
-                sprintf(name, "%.8s-%s%04x", fs,
-                        (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
-                        index);
+                if (!(flags & LDD_F_SV_ALL))
+                        sprintf(name, "%.8s-%s%04x", fs,
+                                (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
+                                index);
         } else if (flags & LDD_F_SV_TYPE_MGS) {
                 sprintf(name, "MGS");
         } else {
@@ -156,7 +186,7 @@ int server_name2index(char *svname, __u32 *idx, char **endptr);
 
 /****************** mount command *********************/
 
-/* The lmd is only used internally by Lustre; mount simply passes 
+/* The lmd is only used internally by Lustre; mount simply passes
    everything as string options */
 
 #define LMD_MAGIC    0xbdacbd03
@@ -167,9 +197,12 @@ struct lustre_mount_data {
         __u32      lmd_flags;         /* lustre mount flags */
         int        lmd_mgs_failnodes; /* mgs failover node count */
         int        lmd_exclude_count;
+        int        lmd_recovery_time_soft;
+        int        lmd_recovery_time_hard;
         char      *lmd_dev;           /* device name */
         char      *lmd_profile;       /* client only */
-        char      *lmd_opts;          /* lustre mount options (as opposed to 
+        char      *lmd_mgssec;        /* sptlrpc flavor to mgs */
+        char      *lmd_opts;          /* lustre mount options (as opposed to
                                          _device_ mount options) */
         __u32     *lmd_exclude;       /* array of OSTs to ignore */
 };
@@ -177,16 +210,25 @@ struct lustre_mount_data {
 #define LMD_FLG_SERVER       0x0001  /* Mounting a server */
 #define LMD_FLG_CLIENT       0x0002  /* Mounting a client */
 #define LMD_FLG_ABORT_RECOV  0x0008  /* Abort recovery */
-#define LMD_FLG_NOSVC        0x0010  /* Only start MGS/MGC for servers, 
+#define LMD_FLG_NOSVC        0x0010  /* Only start MGS/MGC for servers,
                                         no other services */
 #define LMD_FLG_NOMGS        0x0020  /* Only start target for servers, reusing
                                         existing MGS services */
+#define LMD_FLG_WRITECONF    0x0040  /* Rewrite config log */
+#define LMD_FLG_NOIR         0x0080  /* NO imperative recovery */
+#define LMD_FLG_NOSCRUB             0x0100  /* Do not trigger scrub automatically */
 
-#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) 
+#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
 
 
 /****************** last_rcvd file *********************/
 
+/** version recovery epoch */
+#define LR_EPOCH_BITS   32
+#define lr_epoch(a) ((a) >> LR_EPOCH_BITS)
+#define LR_EXPIRE_INTERVALS 16 /**< number of intervals to track transno */
+#define ENOENT_VERSION 1 /** 'virtual' version of non-existent object */
+
 #define LR_SERVER_SIZE   512
 #define LR_CLIENT_START 8192
 #define LR_CLIENT_SIZE   128
@@ -210,6 +252,8 @@ struct lustre_mount_data {
 #define OBD_COMPAT_OST          0x00000002
 /** COMPAT_146: this is an MDT (temporary) */
 #define OBD_COMPAT_MDT          0x00000004
+/** 2.0 server, interop flag to show server version is changed */
+#define OBD_COMPAT_20           0x00000008
 
 /** MDS handles LOV_OBJID file */
 #define OBD_ROCOMPAT_LOVOBJID   0x00000001
@@ -224,11 +268,20 @@ struct lustre_mount_data {
 #define OBD_INCOMPAT_COMMON_LR  0x00000008
 /** FID is enabled */
 #define OBD_INCOMPAT_FID        0x00000010
-/**
- * lustre disk using iam format to store directory entries
- */
-#define OBD_INCOMPAT_IAM_DIR    0x00000020
-
+/** Size-on-MDS is enabled */
+#define OBD_INCOMPAT_SOM        0x00000020
+/** filesystem using iam format to store directory entries */
+#define OBD_INCOMPAT_IAM_DIR    0x00000040
+/** LMA attribute contains per-inode incompatible flags */
+#define OBD_INCOMPAT_LMA        0x00000080
+/** lmm_stripe_count has been shrunk from __u32 to __u16 and the remaining 16
+ * bits are now used to store a generation. Once we start changing the layout
+ * and bumping the generation, old versions expecting a 32-bit lmm_stripe_count
+ * will be confused by interpreting stripe_count | gen << 16 as the actual
+ * stripe count */
+#define OBD_INCOMPAT_LMM_VER    0x00000100
+/** multiple OI files for MDT */
+#define OBD_INCOMPAT_MULTI_OI   0x00000200
 
 /* Data stored per server at the head of the last_rcvd file.  In le32 order.
    This should be common to filter_internal.h, lustre_mds.h */
@@ -249,7 +302,13 @@ struct lr_server_data {
         __u8  lsd_peeruuid[40];    /* UUID of MDS associated with this OST */
         __u32 lsd_ost_index;       /* index number of OST in LOV */
         __u32 lsd_mdt_index;       /* index number of MDT in LMV */
-        __u8  lsd_padding[LR_SERVER_SIZE - 148];
+        __u32 lsd_start_epoch;     /* VBR: start epoch from last boot */
+        /** transaction values since lsd_trans_table_time */
+        __u64 lsd_trans_table[LR_EXPIRE_INTERVALS];
+        /** start point of transno table below */
+        __u32 lsd_trans_table_time; /* time of first slot in table above */
+        __u32 lsd_expire_intervals; /* LR_EXPIRE_INTERVALS */
+        __u8  lsd_padding[LR_SERVER_SIZE - 288];
 };
 
 /* Data stored per client in the last_rcvd file.  In le32 order. */
@@ -264,12 +323,138 @@ struct lsd_client_data {
         __u64 lcd_last_close_xid;     /* xid for the last transaction */
         __u32 lcd_last_close_result;  /* result from last RPC */
         __u32 lcd_last_close_data;    /* per-op data */
-        __u8  lcd_padding[LR_CLIENT_SIZE - 88];
+        /* VBR: last versions */
+        __u64 lcd_pre_versions[4];
+        __u32 lcd_last_epoch;
+        /** orphans handling for delayed export rely on that */
+        __u32 lcd_first_epoch;
+        __u8  lcd_padding[LR_CLIENT_SIZE - 128];
 };
 
+/* bug20354: the lcd_uuid for export of clients may be wrong */
+static inline void check_lcd(char *obd_name, int index,
+                             struct lsd_client_data *lcd)
+{
+        int length = sizeof(lcd->lcd_uuid);
+        if (strnlen((char*)lcd->lcd_uuid, length) == length) {
+                lcd->lcd_uuid[length - 1] = '\0';
+
+                LCONSOLE_ERROR("the client UUID (%s) on %s for exports"
+                               "stored in last_rcvd(index = %d) is bad!\n",
+                               lcd->lcd_uuid, obd_name, index);
+        }
+}
+
+/* last_rcvd handling */
+static inline void lsd_le_to_cpu(struct lr_server_data *buf,
+                                 struct lr_server_data *lsd)
+{
+        int i;
+        memcpy(lsd->lsd_uuid, buf->lsd_uuid, sizeof (lsd->lsd_uuid));
+        lsd->lsd_last_transno     = le64_to_cpu(buf->lsd_last_transno);
+        lsd->lsd_compat14         = le64_to_cpu(buf->lsd_compat14);
+        lsd->lsd_mount_count      = le64_to_cpu(buf->lsd_mount_count);
+        lsd->lsd_feature_compat   = le32_to_cpu(buf->lsd_feature_compat);
+        lsd->lsd_feature_rocompat = le32_to_cpu(buf->lsd_feature_rocompat);
+        lsd->lsd_feature_incompat = le32_to_cpu(buf->lsd_feature_incompat);
+        lsd->lsd_server_size      = le32_to_cpu(buf->lsd_server_size);
+        lsd->lsd_client_start     = le32_to_cpu(buf->lsd_client_start);
+        lsd->lsd_client_size      = le16_to_cpu(buf->lsd_client_size);
+        lsd->lsd_subdir_count     = le16_to_cpu(buf->lsd_subdir_count);
+        lsd->lsd_catalog_oid      = le64_to_cpu(buf->lsd_catalog_oid);
+        lsd->lsd_catalog_ogen     = le32_to_cpu(buf->lsd_catalog_ogen);
+        memcpy(lsd->lsd_peeruuid, buf->lsd_peeruuid, sizeof(lsd->lsd_peeruuid));
+        lsd->lsd_ost_index        = le32_to_cpu(buf->lsd_ost_index);
+        lsd->lsd_mdt_index        = le32_to_cpu(buf->lsd_mdt_index);
+        lsd->lsd_start_epoch      = le32_to_cpu(buf->lsd_start_epoch);
+        for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
+                lsd->lsd_trans_table[i] = le64_to_cpu(buf->lsd_trans_table[i]);
+        lsd->lsd_trans_table_time = le32_to_cpu(buf->lsd_trans_table_time);
+        lsd->lsd_expire_intervals = le32_to_cpu(buf->lsd_expire_intervals);
+}
+
+static inline void lsd_cpu_to_le(struct lr_server_data *lsd,
+                                 struct lr_server_data *buf)
+{
+        int i;
+        memcpy(buf->lsd_uuid, lsd->lsd_uuid, sizeof (buf->lsd_uuid));
+        buf->lsd_last_transno     = cpu_to_le64(lsd->lsd_last_transno);
+        buf->lsd_compat14         = cpu_to_le64(lsd->lsd_compat14);
+        buf->lsd_mount_count      = cpu_to_le64(lsd->lsd_mount_count);
+        buf->lsd_feature_compat   = cpu_to_le32(lsd->lsd_feature_compat);
+        buf->lsd_feature_rocompat = cpu_to_le32(lsd->lsd_feature_rocompat);
+        buf->lsd_feature_incompat = cpu_to_le32(lsd->lsd_feature_incompat);
+        buf->lsd_server_size      = cpu_to_le32(lsd->lsd_server_size);
+        buf->lsd_client_start     = cpu_to_le32(lsd->lsd_client_start);
+        buf->lsd_client_size      = cpu_to_le16(lsd->lsd_client_size);
+        buf->lsd_subdir_count     = cpu_to_le16(lsd->lsd_subdir_count);
+        buf->lsd_catalog_oid      = cpu_to_le64(lsd->lsd_catalog_oid);
+        buf->lsd_catalog_ogen     = cpu_to_le32(lsd->lsd_catalog_ogen);
+        memcpy(buf->lsd_peeruuid, lsd->lsd_peeruuid, sizeof(buf->lsd_peeruuid));
+        buf->lsd_ost_index        = cpu_to_le32(lsd->lsd_ost_index);
+        buf->lsd_mdt_index        = cpu_to_le32(lsd->lsd_mdt_index);
+        buf->lsd_start_epoch      = cpu_to_le32(lsd->lsd_start_epoch);
+        for (i = 0; i < LR_EXPIRE_INTERVALS; i++)
+                buf->lsd_trans_table[i] = cpu_to_le64(lsd->lsd_trans_table[i]);
+        buf->lsd_trans_table_time = cpu_to_le32(lsd->lsd_trans_table_time);
+        buf->lsd_expire_intervals = cpu_to_le32(lsd->lsd_expire_intervals);
+}
+
+static inline void lcd_le_to_cpu(struct lsd_client_data *buf,
+                                 struct lsd_client_data *lcd)
+{
+        memcpy(lcd->lcd_uuid, buf->lcd_uuid, sizeof (lcd->lcd_uuid));
+        lcd->lcd_last_transno       = le64_to_cpu(buf->lcd_last_transno);
+        lcd->lcd_last_xid           = le64_to_cpu(buf->lcd_last_xid);
+        lcd->lcd_last_result        = le32_to_cpu(buf->lcd_last_result);
+        lcd->lcd_last_data          = le32_to_cpu(buf->lcd_last_data);
+        lcd->lcd_last_close_transno = le64_to_cpu(buf->lcd_last_close_transno);
+        lcd->lcd_last_close_xid     = le64_to_cpu(buf->lcd_last_close_xid);
+        lcd->lcd_last_close_result  = le32_to_cpu(buf->lcd_last_close_result);
+        lcd->lcd_last_close_data    = le32_to_cpu(buf->lcd_last_close_data);
+        lcd->lcd_pre_versions[0]    = le64_to_cpu(buf->lcd_pre_versions[0]);
+        lcd->lcd_pre_versions[1]    = le64_to_cpu(buf->lcd_pre_versions[1]);
+        lcd->lcd_pre_versions[2]    = le64_to_cpu(buf->lcd_pre_versions[2]);
+        lcd->lcd_pre_versions[3]    = le64_to_cpu(buf->lcd_pre_versions[3]);
+        lcd->lcd_last_epoch         = le32_to_cpu(buf->lcd_last_epoch);
+        lcd->lcd_first_epoch        = le32_to_cpu(buf->lcd_first_epoch);
+}
+
+static inline void lcd_cpu_to_le(struct lsd_client_data *lcd,
+                                 struct lsd_client_data *buf)
+{
+        memcpy(buf->lcd_uuid, lcd->lcd_uuid, sizeof (lcd->lcd_uuid));
+        buf->lcd_last_transno       = cpu_to_le64(lcd->lcd_last_transno);
+        buf->lcd_last_xid           = cpu_to_le64(lcd->lcd_last_xid);
+        buf->lcd_last_result        = cpu_to_le32(lcd->lcd_last_result);
+        buf->lcd_last_data          = cpu_to_le32(lcd->lcd_last_data);
+        buf->lcd_last_close_transno = cpu_to_le64(lcd->lcd_last_close_transno);
+        buf->lcd_last_close_xid     = cpu_to_le64(lcd->lcd_last_close_xid);
+        buf->lcd_last_close_result  = cpu_to_le32(lcd->lcd_last_close_result);
+        buf->lcd_last_close_data    = cpu_to_le32(lcd->lcd_last_close_data);
+        buf->lcd_pre_versions[0]    = cpu_to_le64(lcd->lcd_pre_versions[0]);
+        buf->lcd_pre_versions[1]    = cpu_to_le64(lcd->lcd_pre_versions[1]);
+        buf->lcd_pre_versions[2]    = cpu_to_le64(lcd->lcd_pre_versions[2]);
+        buf->lcd_pre_versions[3]    = cpu_to_le64(lcd->lcd_pre_versions[3]);
+        buf->lcd_last_epoch         = cpu_to_le32(lcd->lcd_last_epoch);
+        buf->lcd_first_epoch        = cpu_to_le32(lcd->lcd_first_epoch);
+}
+
+static inline __u64 lcd_last_transno(struct lsd_client_data *lcd)
+{
+        return (lcd->lcd_last_transno > lcd->lcd_last_close_transno ?
+                lcd->lcd_last_transno : lcd->lcd_last_close_transno);
+}
+
+static inline __u64 lcd_last_xid(struct lsd_client_data *lcd)
+{
+        return (lcd->lcd_last_xid > lcd->lcd_last_close_xid ?
+                lcd->lcd_last_xid : lcd->lcd_last_close_xid);
+}
 
-#ifdef __KERNEL__
 /****************** superblock additional info *********************/
+#ifdef __KERNEL__
+
 struct ll_sb_info;
 
 struct lustre_sb_info {
@@ -279,44 +464,46 @@ struct lustre_sb_info {
         struct lustre_disk_data  *lsi_ldd;     /* mount info on-disk */
         struct ll_sb_info        *lsi_llsbi;   /* add'l client sbi info */
         struct vfsmount          *lsi_srv_mnt; /* the one server mount */
-        atomic_t                  lsi_mounts;  /* references to the srv_mnt */
+        cfs_atomic_t              lsi_mounts;  /* references to the srv_mnt */
+        struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
+                                                  own backing_dev_info */
 };
 
 #define LSI_SERVER                       0x00000001
 #define LSI_UMOUNT_FORCE                 0x00000010
 #define LSI_UMOUNT_FAILOVER              0x00000020
+#define LSI_BDI_INITIALIZED              0x00000040
+#define LSI_IR_CAPABLE                   0x00000080
 
 #define     s2lsi(sb)        ((struct lustre_sb_info *)((sb)->s_fs_info))
 #define     s2lsi_nocast(sb) ((sb)->s_fs_info)
 
 #define     get_profile_name(sb)   (s2lsi(sb)->lsi_lmd->lmd_profile)
+#define            get_mount_flags(sb)    (s2lsi(sb)->lsi_lmd->lmd_flags)
 
 #endif /* __KERNEL__ */
 
 /****************** mount lookup info *********************/
 
 struct lustre_mount_info {
-        char               *lmi_name;
-        struct super_block *lmi_sb;
-        struct vfsmount    *lmi_mnt;
-        struct list_head    lmi_list_chain;
+        char                 *lmi_name;
+        struct super_block   *lmi_sb;
+        struct vfsmount      *lmi_mnt;
+        cfs_list_t            lmi_list_chain;
 };
 
 /****************** prototypes *********************/
 
 #ifdef __KERNEL__
-#include <obd_class.h>
 
 /* obd_mount.c */
-void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb));
+void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
+                                                  struct vfsmount *mnt));
 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb));
 
 
 int lustre_common_put_super(struct super_block *sb);
-int lustre_process_log(struct super_block *sb, char *logname, 
-                     struct config_llog_instance *cfg);
-int lustre_end_log(struct super_block *sb, char *logname, 
-                       struct config_llog_instance *cfg);
+struct lustre_mount_info *server_find_mount_locked(const char *name);
 struct lustre_mount_info *server_get_mount(const char *name);
 struct lustre_mount_info *server_get_mount_2(const char *name);
 int server_put_mount(const char *name, struct vfsmount *mnt);
@@ -324,10 +511,13 @@ int server_put_mount_2(const char *name, struct vfsmount *mnt);
 int server_register_target(struct super_block *sb);
 struct mgs_target_info;
 int server_mti_print(char *title, struct mgs_target_info *mti);
+void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd);
 
 /* mgc_request.c */
-int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id);
+int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
 
 #endif
 
+/** @} disk */
+
 #endif // _LUSTRE_DISK_H