Whamcloud - gitweb
ORNL-10: Basic IR implementation
authorJinshan Xiong <jay@whamcloud.com>
Fri, 19 Aug 2011 23:44:50 +0000 (16:44 -0700)
committerOleg Drokin <green@whamcloud.com>
Thu, 13 Oct 2011 21:19:49 +0000 (17:19 -0400)
To support imperative recovery, there is a target status table defined for
each file system defined on the MGS. When a target registers itself to the
MGS, the MGS will change this table correspondingly.

In the status table, one important field is target NID. This NID information
is used by clients locating server where target lives. By transferring this NID
to clients, clients can know the restarting of targets earlier. This is
so-called imperative recovery - the MGS notifies clients to do recovery
imperatively instead of timeout based standard recovery.

To implement imperative recovery, clients are asked to cache a NID table, which
contains the location information of all servers. Clients need to hold a read
mode ldlm plain lock - recover lock - to cache this table. Whenever the MGS
wants to change this table, it will enqueue an EXCL recover lock so that all
clients will be notified for this change. Clients will request for a new read
recover lock and then query for the MGS for NID table updates.

Change-Id: I3b38ba142b810df507805b71972feeb1bade1ac2
Signed-off-by: Jinshan Xiong <jay@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/1217
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
36 files changed:
lustre/include/lprocfs_status.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_disk.h
lustre/include/lustre_req_layout.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/include/obd_support.h
lustre/ldlm/ldlm_lib.c
lustre/mdc/lproc_mdc.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_lproc.c
lustre/mdt/mdt_recovery.c
lustre/mgc/mgc_internal.h
lustre/mgc/mgc_request.c
lustre/mgs/Makefile.in
lustre/mgs/lproc_mgs.c
lustre/mgs/mgs_fs.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_internal.h
lustre/mgs/mgs_llog.c
lustre/mgs/mgs_nids.c [new file with mode: 0644]
lustre/obdclass/lprocfs_status.c
lustre/obdclass/obd_mount.c
lustre/obdfilter/filter.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/lproc_osc.c
lustre/ptlrpc/import.c
lustre/ptlrpc/layout.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/sec.c
lustre/ptlrpc/service.c
lustre/ptlrpc/wiretest.c
lustre/utils/req-layout.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 95ec094..771f226 100644 (file)
@@ -539,6 +539,8 @@ extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
                                    unsigned long count, void *data);
 extern int lprocfs_wr_ping(struct file *file, const char *buffer,
                            unsigned long count, void *data);
+extern int lprocfs_wr_import(struct file *file, const char *buffer,
+                             unsigned long count, void *data);
 
 /* Statfs helpers */
 extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
@@ -583,6 +585,12 @@ int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
 int lprocfs_obd_rd_hash(char *page, char **start, off_t off,
                         int count, int *eof, void *data);
 
+/* lprocfs_status.c: IR factor */
+int lprocfs_obd_rd_ir_factor(char *page, char **start, off_t off,
+                             int count, int *eof, void *data);
+int lprocfs_obd_wr_ir_factor(struct file *file, const char *buffer,
+                             unsigned long count, void *data);
+
 extern int lprocfs_seq_release(cfs_inode_t *, struct file *);
 
 /* You must use these macros when you want to refer to
@@ -648,6 +656,9 @@ int lprocfs_obd_rd_max_pages_per_rpc(char *page, char **start, off_t off,
                                      int count, int *eof, void *data);
 int lprocfs_obd_wr_max_pages_per_rpc(struct file *file, const char *buffer,
                                      unsigned long count, void *data);
+int lprocfs_target_rd_instance(char *page, char **start, off_t off,
+                               int count, int *eof, void *data);
+
 /* all quota proc functions */
 extern int lprocfs_quota_rd_bunit(char *page, char **start,
                                   off_t off, int count,
@@ -864,7 +875,9 @@ static inline int lprocfs_wr_evict_client(struct file *file,
 static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
                                   unsigned long count, void *data)
 { return 0; }
-
+static inline int lprocfs_wr_import(struct file *file, const char *buffer,
+                                    unsigned long count, void *data)
+{ return 0; }
 
 /* Statfs helpers */
 static inline
index a9f6ba5..5e91b63 100644 (file)
 #define SEQ_METADATA_PORTAL            30
 #define SEQ_DATA_PORTAL                31
 #define SEQ_CONTROLLER_PORTAL          32
+#define MGS_BULK_PORTAL                33
 
 /* Portal 63 is reserved for the Cray Inc DVS - nic@cray.com, roe@cray.com, n8851@cray.com */
 
@@ -1102,6 +1103,11 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_64BITHASH    0x4000000000ULL /* client supports 64-bits
                                                   * directory hash */
 #define OBD_CONNECT_MAXBYTES     0x8000000000ULL /* max stripe size */
+#define OBD_CONNECT_IMP_RECOV   0x10000000000ULL /* imp recovery support */
+
+#define OCD_HAS_FLAG(ocd, flg)  \
+        (!!((ocd)->ocd_connect_flags & OBD_CONNECT_##flg))
+
 /* also update obd_connect_names[] for lprocfs_rd_connect_flags()
  * and lustre/utils/wirecheck.c */
 
@@ -1138,7 +1144,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
                                 OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
-                                OBD_CONNECT_FULL20)
+                                OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV)
 
 /* Features required for this version of the client to work with server */
 #define CLIENT_CONNECT_MDT_REQD (OBD_CONNECT_IBITS | OBD_CONNECT_FID | \
@@ -1168,7 +1174,7 @@ struct obd_connect_data_v1 {
         __u32 ocd_group;         /* MDS group on OST */
         __u32 ocd_cksum_types;   /* supported checksum algorithms */
         __u32 ocd_max_easize;    /* How big LOV EA can be on MDS */
-        __u32 padding;           /* also fix lustre_swab_connect */
+        __u32 ocd_instance;      /* also fix lustre_swab_connect */
         __u64 ocd_maxbytes;      /* Maximum stripe size in bytes */
 };
 
@@ -1185,7 +1191,7 @@ struct obd_connect_data {
         __u32 ocd_group;         /* MDS group on OST */
         __u32 ocd_cksum_types;   /* supported checksum algorithms */
         __u32 ocd_max_easize;    /* How big LOV EA can be on MDS */
-        __u32 padding;           /* also fix lustre_swab_connect */
+        __u32 ocd_instance;      /* instance # of this target */
         __u64 ocd_maxbytes;      /* Maximum stripe size in bytes */
         /* Fields after ocd_maxbytes are only accessible by the receiver
          * if the corresponding flag in ocd_connect_flags is set. Accessing
@@ -2380,6 +2386,7 @@ typedef enum {
         MGS_TARGET_REG,        /* whenever target starts up */
         MGS_TARGET_DEL,
         MGS_SET_INFO,
+        MGS_CONFIG_READ,
         MGS_LAST_OPC
 } mgs_cmd_t;
 #define MGS_FIRST_OPC MGS_CONNECT
@@ -2392,25 +2399,55 @@ struct mgs_send_param {
 };
 
 /* We pass this info to the MGS so it can write config logs */
-#define MTI_NAME_MAXLEN 64
+#define MTI_NAME_MAXLEN  64
 #define MTI_PARAM_MAXLEN 4096
-#define MTI_NIDS_MAX 32
+#define MTI_NIDS_MAX     32
 struct mgs_target_info {
         __u32            mti_lustre_ver;
         __u32            mti_stripe_index;
         __u32            mti_config_ver;
         __u32            mti_flags;
         __u32            mti_nid_count;
-        __u32            padding;                    /* 64 bit align */
+        __u32            mti_instance; /* Running instance of target */
         char             mti_fsname[MTI_NAME_MAXLEN];
         char             mti_svname[MTI_NAME_MAXLEN];
         char             mti_uuid[sizeof(struct obd_uuid)];
         __u64            mti_nids[MTI_NIDS_MAX];     /* host nids (lnet_nid_t)*/
         char             mti_params[MTI_PARAM_MAXLEN];
 };
-
 extern void lustre_swab_mgs_target_info(struct mgs_target_info *oinfo);
 
+struct mgs_nidtbl_entry {
+        __u64           mne_version;    /* table version of this entry */
+        __u32           mne_instance;   /* target instance # */
+        __u32           mne_index;      /* target index */
+        __u32           mne_length;     /* length of this entry - by bytes */
+        __u8            mne_type;       /* target type LDD_F_SV_TYPE_OST/MDT */
+        __u8            mne_nid_type;   /* type of nid(mbz). for ipv6. */
+        __u8            mne_nid_size;   /* size of each NID, by bytes */
+        __u8            mne_nid_count;  /* # of NIDs in buffer */
+        union {
+                lnet_nid_t nids[0];     /* variable size buffer for NIDs. */
+        } u;
+};
+extern void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *oinfo);
+
+struct mgs_config_body {
+        char     mcb_name[MTI_NAME_MAXLEN]; /* logname */
+        __u64    mcb_offset;    /* next index of config log to request */
+        __u16    mcb_type;      /* type of log: CONFIG_T_[CONFIG|RECOVER] */
+        __u8     mcb_reserved;
+        __u8     mcb_bits;      /* bits unit size of config log */
+        __u32    mcb_units;     /* # of units for bulk transfer */
+};
+extern void lustre_swab_mgs_config_body(struct mgs_config_body *body);
+
+struct mgs_config_res {
+        __u64    mcr_offset;    /* index of last config log */
+        __u64    mcr_size;      /* size of the log */
+};
+extern void lustre_swab_mgs_config_res(struct mgs_config_res *body);
+
 /* Config marker flags (in config log) */
 #define CM_START       0x01
 #define CM_END         0x02
index 6fb49d8..17e04e5 100644 (file)
@@ -66,6 +66,7 @@
 #define HEALTH_CHECK      "health_check"
 #define CAPA_KEYS         "capa_keys"
 #define CHANGELOG_USERS   "changelog_users"
+#define MGS_NIDTBL_DIR    "NIDTBL_VERSIONS"
 
 
 /****************** persistent mount data *********************/
@@ -73,6 +74,9 @@
 #define LDD_F_SV_TYPE_MDT   0x0001
 #define LDD_F_SV_TYPE_OST   0x0002
 #define LDD_F_SV_TYPE_MGS   0x0004
+#define LDD_F_SV_TYPE_MASK (LDD_F_SV_TYPE_MDT  | \
+                            LDD_F_SV_TYPE_OST  | \
+                            LDD_F_SV_TYPE_MGS)
 #define LDD_F_SV_ALL        0x0008
 /** need an index assignment */
 #define LDD_F_NEED_INDEX    0x0010
 #define LDD_F_IAM_DIR       0x0800
 /** all nodes are specified as service nodes */
 #define LDD_F_NO_PRIMNODE   0x1000
+/** IR enable flag */
+#define LDD_F_IR_CAPABLE    0x2000
+/** the MGS refused to register the target. */
+#define LDD_F_ERROR         0x4000
+
+/* opc for target register */
+#define LDD_F_OPC_REG   0x10000000
+#define LDD_F_OPC_UNREG 0x20000000
+#define LDD_F_OPC_READY 0x40000000
+#define LDD_F_OPC_MASK  0xf0000000
+
+#define LDD_F_ONDISK_MASK  (LDD_F_SV_TYPE_MASK | LDD_F_IAM_DIR)
 
 enum ldd_mount_type {
         LDD_MT_EXT3 = 0,
@@ -200,6 +216,7 @@ struct lustre_mount_data {
 #define LMD_FLG_NOMGS        0x0020  /* Only start target for servers, reusing
                                         existing MGS services */
 #define LMD_FLG_WRITECONF    0x0040  /* Rewrite config log */
+#define LMD_FLG_NOIR         0x0080  /* NO imperative recovery */
 
 #define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
 
@@ -441,13 +458,15 @@ struct lustre_sb_info {
         struct ll_sb_info        *lsi_llsbi;   /* add'l client sbi info */
         struct vfsmount          *lsi_srv_mnt; /* the one server mount */
         cfs_atomic_t              lsi_mounts;  /* references to the srv_mnt */
-        struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs own backing_dev_info */
+        struct backing_dev_info   lsi_bdi;     /* each client mountpoint needs
+                                                  own backing_dev_info */
 };
 
 #define LSI_SERVER                       0x00000001
 #define LSI_UMOUNT_FORCE                 0x00000010
 #define LSI_UMOUNT_FAILOVER              0x00000020
 #define LSI_BDI_INITIALIZED              0x00000040
+#define LSI_IR_CAPABLE                   0x00000080
 
 #define     s2lsi(sb)        ((struct lustre_sb_info *)((sb)->s_fs_info))
 #define     s2lsi_nocast(sb) ((sb)->s_fs_info)
@@ -484,6 +503,7 @@ int server_put_mount_2(const char *name, struct vfsmount *mnt);
 int server_register_target(struct super_block *sb);
 struct mgs_target_info;
 int server_mti_print(char *title, struct mgs_target_info *mti);
+void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd);
 
 /* mgc_request.c */
 int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
index 88a2ecf..0480263 100644 (file)
@@ -139,6 +139,7 @@ extern struct req_format RQF_SEC_CTX;
 /* MGS req_format */
 extern struct req_format RQF_MGS_TARGET_REG;
 extern struct req_format RQF_MGS_SET_INFO;
+extern struct req_format RQF_MGS_CONFIG_READ;
 /* fid/fld req_format */
 extern struct req_format RQF_SEQ_QUERY;
 extern struct req_format RQF_FLD_QUERY;
@@ -283,6 +284,13 @@ extern struct req_msg_field RMF_RCS;
 extern struct req_msg_field RMF_FIEMAP_KEY;
 extern struct req_msg_field RMF_FIEMAP_VAL;
 
+/* MGS config read message format */
+extern struct req_msg_field RMF_MGS_CONFIG_BODY;
+extern struct req_msg_field RMF_MGS_CONFIG_RES;
+
+/* generic uint32 */
+extern struct req_msg_field RMF_U32;
+
 /** @} req_layout */
 
 #endif /* _LUSTRE_REQ_LAYOUT_H__ */
index 9d7a61f..08d483f 100644 (file)
@@ -233,6 +233,7 @@ struct ost_server_data;
 /* hold common fields for "target" device */
 struct obd_device_target {
         __u32                     obt_magic;
+        __u32                     obt_instance;
         struct super_block       *obt_sb;
         /** last_rcvd file */
         struct file              *obt_rcvd_filp;
@@ -505,6 +506,7 @@ struct mgs_obd {
         cfs_list_t                       mgs_fs_db_list;
         cfs_semaphore_t                  mgs_sem;
         cfs_proc_dir_entry_t            *mgs_proc_live;
+        cfs_time_t                       mgs_start_time;
 };
 
 struct mds_obd {
@@ -1014,6 +1016,7 @@ struct obd_device {
                       obd_no_conn:1,       /* deny new connections */
                       obd_inactive:1,      /* device active/inactive
                                            * (for /proc/status only!!) */
+                      obd_no_ir:1,         /* no imperative recovery. */
                       obd_process_conf:1;  /* device is processing mgs config */
         /* use separate field as it is set in interrupt to don't mess with
          * protection of other bits using _bh lock */
@@ -1067,8 +1070,9 @@ struct obd_device {
         cfs_timer_t                      obd_recovery_timer;
         time_t                           obd_recovery_start; /* seconds */
         time_t                           obd_recovery_end; /* seconds, for lprocfs_status */
-        time_t                           obd_recovery_time_hard;
+        int                              obd_recovery_time_hard;
         int                              obd_recovery_timeout;
+        int                              obd_recovery_ir_factor;
 
         /* new recovery stuff from CMD2 */
         struct target_recovery_data      obd_recovery_data;
@@ -1167,7 +1171,6 @@ enum obd_cleanup_stage {
 #define KEY_CONNECT_FLAG        "connect_flags"
 #define KEY_SYNC_LOCK_CANCEL    "sync_lock_cancel"
 
-
 struct lu_context;
 
 /* /!\ must be coherent with include/linux/namei.h on patched kernel */
index 54d0a65..7a92a42 100644 (file)
@@ -160,7 +160,8 @@ int class_config_dump_llog(struct llog_ctxt *ctxt, char *name,
 enum {
         CONFIG_T_CONFIG  = 0,
         CONFIG_T_SPTLRPC = 1,
-        CONFIG_T_MAX     = 2
+        CONFIG_T_RECOVER = 2,
+        CONFIG_T_MAX     = 3
 };
 
 /* list of active configuration logs  */
@@ -170,6 +171,7 @@ struct config_llog_data {
         cfs_list_t                  cld_list_chain;
         cfs_atomic_t                cld_refcount;
         struct config_llog_data    *cld_sptlrpc;/* depended sptlrpc log */
+        struct config_llog_data    *cld_recover;    /* imperative recover log */
         struct obd_export          *cld_mgcexp;
         cfs_mutex_t                 cld_lock;
         int                         cld_type;
index 5cdebfd..46a2bd1 100644 (file)
@@ -147,6 +147,13 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 /* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */
 #define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \
                              INITIAL_CONNECT_TIMEOUT)
+/* The min time a target should wait for clients to reconnect in recovery */
+#define OBD_RECOVERY_TIME_MIN    (2*RECONNECT_DELAY_MAX)
+#define OBD_IR_FACTOR_MIN         1
+#define OBD_IR_FACTOR_MAX         10
+#define OBD_IR_FACTOR_DEFAULT    (OBD_IR_FACTOR_MAX/2)
+/* default timeout for the MGS to become IR_FULL */
+#define OBD_IR_MGS_TIMEOUT       (4*obd_timeout)
 #define LONG_UNLINK 300          /* Unlink should happen before now */
 
 /**
index bfde12a..4047407 100644 (file)
@@ -951,6 +951,10 @@ dont_check_exports:
         }
         if (rc)
                 GOTO(out, rc);
+
+        LASSERT(target->u.obt.obt_magic == OBT_MAGIC);
+        data->ocd_instance = target->u.obt.obt_instance;
+
         /* Return only the parts of obd_connect_data that we understand, so the
          * client knows that we don't understand the rest. */
         if (data) {
index aa6ed8c..1a84846 100644 (file)
@@ -163,7 +163,7 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = {
         { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight,
                                 mdc_wr_max_rpcs_in_flight, 0 },
         { "timeouts",        lprocfs_rd_timeouts,    0, 0 },
-        { "import",          lprocfs_rd_import,      0, 0 },
+        { "import",          lprocfs_rd_import,      lprocfs_wr_import, 0 },
         { "state",           lprocfs_rd_state,       0, 0 },
         { "hsm_nl",          0, mdc_wr_kuc,          0, 0, 0222 },
         { 0 }
index 7d0be2b..8143935 100644 (file)
@@ -4504,14 +4504,8 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
                         CERROR("CMD Operation not allowed in IOP mode\n");
                         GOTO(err_lmi, rc = -EINVAL);
                 }
-                /* Read recovery timeouts */
-                if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_soft)
-                        obd->obd_recovery_timeout =
-                                lsi->lsi_lmd->lmd_recovery_time_soft;
-
-                if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_hard)
-                        obd->obd_recovery_time_hard =
-                                lsi->lsi_lmd->lmd_recovery_time_hard;
+
+                obd->u.obt.obt_magic = OBT_MAGIC;
         }
 
         cfs_rwlock_init(&m->mdt_sptlrpc_lock);
index cebcbb7..053f626 100644 (file)
@@ -808,6 +808,9 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
         { "som",                        lprocfs_rd_mdt_som,
                                         lprocfs_wr_mdt_som, 0 },
         { "mdccomm",                    0, lprocfs_mdt_wr_mdc,              0 },
+        { "instance",                   lprocfs_target_rd_instance,         0 },
+        { "ir_factor",                  lprocfs_obd_rd_ir_factor,
+                                        lprocfs_obd_wr_ir_factor,           0 },
         { 0 }
 };
 
index 50c69eb..103fc7a 100644 (file)
@@ -476,6 +476,7 @@ static int mdt_server_data_init(const struct lu_env *env,
         cfs_spin_unlock(&mdt->mdt_lut.lut_translock);
 
         obd->u.obt.obt_mount_count = mount_count + 1;
+        obd->u.obt.obt_instance = (__u32)obd->u.obt.obt_mount_count;
         lsd->lsd_mount_count = obd->u.obt.obt_mount_count;
 
         /* save it, so mount count and last_transno is current */
index eb5f403..0e5ad2c 100644 (file)
@@ -60,4 +60,9 @@ static inline int cld_is_sptlrpc(struct config_llog_data *cld)
         return cld->cld_type == CONFIG_T_SPTLRPC;
 }
 
+static inline int cld_is_recover(struct config_llog_data *cld)
+{
+        return cld->cld_type == CONFIG_T_RECOVER;
+}
+
 #endif  /* _MGC_INTERNAL_H */
index 295b6c7..57f69e8 100644 (file)
@@ -88,6 +88,9 @@ static int mgc_name2resid(char *name, int len, struct ldlm_res_id *res_id,
         case CONFIG_T_SPTLRPC:
                 resname = 0;
                 break;
+        case CONFIG_T_RECOVER:
+                resname = type;
+                break;
         default:
                 LBUG();
         }
@@ -149,6 +152,8 @@ static void config_log_put(struct config_llog_data *cld)
 
                 CDEBUG(D_MGC, "dropping config log %s\n", cld->cld_logname);
 
+                if (cld->cld_recover)
+                        config_log_put(cld->cld_recover);
                 if (cld->cld_sptlrpc)
                         config_log_put(cld->cld_sptlrpc);
                 if (cld_is_sptlrpc(cld))
@@ -250,6 +255,37 @@ struct config_llog_data *do_config_log_add(struct obd_device *obd,
         RETURN(cld);
 }
 
+static struct config_llog_data *config_recover_log_add(struct obd_device *obd,
+        char *fsname,
+        struct config_llog_instance *cfg,
+        struct super_block *sb)
+{
+        struct config_llog_instance lcfg = *cfg;
+        struct lustre_sb_info *lsi = s2lsi(sb);
+        struct config_llog_data *cld;
+        char logname[32];
+
+        if ((lsi->lsi_flags & LSI_SERVER) && IS_OST(lsi->lsi_ldd))
+                return NULL;
+
+        /* we have to use different llog for clients and mdts for cmd
+         * where only clients are notified if one of cmd server restarts */
+        LASSERT(strlen(fsname) < sizeof(logname) / 2);
+        strcpy(logname, fsname);
+        if (lsi->lsi_flags & LSI_SERVER) { /* mdt */
+                LASSERT(lcfg.cfg_instance == NULL);
+                lcfg.cfg_instance = sb;
+                strcat(logname, "-mdtir");
+        } else {
+                LASSERT(lcfg.cfg_instance != NULL);
+                strcat(logname, "-cliir");
+        }
+
+        cld = do_config_log_add(obd, logname, CONFIG_T_RECOVER, &lcfg, sb);
+        return cld;
+}
+
+
 /** Add this log to the list of active logs watched by an MGC.
  * Active means we're watching for updates.
  * We have one active log per "mount" - client instance or servername.
@@ -259,6 +295,7 @@ static int config_log_add(struct obd_device *obd, char *logname,
                           struct config_llog_instance *cfg,
                           struct super_block *sb)
 {
+        struct lustre_sb_info *lsi = s2lsi(sb);
         struct config_llog_data *cld;
         struct config_llog_data *sptlrpc_cld;
         char                     seclogname[32];
@@ -299,6 +336,18 @@ static int config_log_add(struct obd_device *obd, char *logname,
 
         cld->cld_sptlrpc = sptlrpc_cld;
 
+        LASSERT(lsi->lsi_lmd);
+        if (!(lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)) {
+                struct config_llog_data *recover_cld;
+                *strrchr(seclogname, '-') = 0;
+                recover_cld = config_recover_log_add(obd, seclogname, cfg, sb);
+                if (IS_ERR(recover_cld)) {
+                        config_log_put(cld);
+                        RETURN(PTR_ERR(recover_cld));
+                }
+                cld->cld_recover = recover_cld;
+        }
+
         RETURN(0);
 }
 
@@ -308,7 +357,9 @@ CFS_DECLARE_MUTEX(llog_process_lock);
  */
 static int config_log_end(char *logname, struct config_llog_instance *cfg)
 {
-        struct config_llog_data *cld, *cld_sptlrpc = NULL;
+        struct config_llog_data *cld;
+        struct config_llog_data *cld_sptlrpc = NULL;
+        struct config_llog_data *cld_recover = NULL;
         int rc = 0;
         ENTRY;
 
@@ -332,8 +383,18 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg)
         }
 
         cld->cld_stopping = 1;
+
+        cld_recover = cld->cld_recover;
+        cld->cld_recover = NULL;
         cfs_mutex_unlock(&cld->cld_lock);
 
+        if (cld_recover) {
+                cfs_mutex_lock(&cld_recover->cld_lock);
+                cld_recover->cld_stopping = 1;
+                cfs_mutex_unlock(&cld_recover->cld_lock);
+                config_log_put(cld_recover);
+        }
+
         cfs_spin_lock(&config_list_lock);
         cld_sptlrpc = cld->cld_sptlrpc;
         cld->cld_sptlrpc = NULL;
@@ -1026,6 +1087,24 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen,
         RETURN(rc);
 }
 
+static int mgc_get_info(struct obd_export *exp, __u32 keylen, void *key,
+                        __u32 *vallen, void *val, struct lov_stripe_md *unused)
+{
+        int rc = -EINVAL;
+
+        if (KEY_IS(KEY_CONN_DATA)) {
+                struct obd_import *imp = class_exp2cliimp(exp);
+                struct obd_connect_data *data = val;
+
+                if (*vallen == sizeof(*data)) {
+                        *data = imp->imp_connect_data;
+                        rc = 0;
+                }
+        }
+
+        return rc;
+}
+
 static int mgc_import_event(struct obd_device *obd,
                             struct obd_import *imp,
                             enum obd_import_event event)
@@ -1119,6 +1198,317 @@ static int mgc_llog_finish(struct obd_device *obd, int count)
         RETURN(rc);
 }
 
+enum {
+        CONFIG_READ_NRPAGES_INIT = 1 << (20 - CFS_PAGE_SHIFT),
+        CONFIG_READ_NRPAGES      = 4
+};
+
+static int mgc_apply_recover_logs(struct obd_device *obd,
+                                  struct config_llog_data *cld,
+                                  __u64 max_version,
+                                  void *data, int datalen)
+{
+        struct config_llog_instance *cfg = &cld->cld_cfg;
+        struct lustre_sb_info       *lsi = s2lsi(cfg->cfg_sb);
+        struct mgs_nidtbl_entry *entry;
+        struct lustre_cfg       *lcfg;
+        struct lustre_cfg_bufs   bufs;
+        u64   prev_version = 0;
+        char *inst;
+        char *buf;
+        int   bufsz = CFS_PAGE_SIZE;
+        int   pos;
+        int   rc  = 0;
+        int   off = 0;
+
+        OBD_ALLOC(buf, CFS_PAGE_SIZE);
+        if (buf == NULL)
+                return -ENOMEM;
+
+        LASSERT(cfg->cfg_instance != NULL);
+        LASSERT(cfg->cfg_sb == cfg->cfg_instance);
+        inst = buf;
+        if (!(lsi->lsi_flags & LSI_SERVER)) {
+                pos = sprintf(inst, "%p", cfg->cfg_instance);
+        } else {
+                LASSERT(IS_MDT(lsi->lsi_ldd));
+                pos = sprintf(inst, "MDT%04x", lsi->lsi_ldd->ldd_svindex);
+        }
+        buf   += pos + 1;
+        bufsz -= pos + 1;
+
+        while (datalen > 0) {
+                int   entry_len = sizeof(*entry);
+                int   is_ost;
+                struct obd_device *obd;
+                char *obdname;
+                char *cname;
+                char *params;
+                char *uuid;
+
+                rc = -EINVAL;
+                if (datalen < sizeof(*entry))
+                        break;
+
+                entry = (typeof(entry))(data + off);
+
+                /* sanity check */
+                if (entry->mne_nid_type != 0) /* only support type 0 for ipv4 */
+                        break;
+                if (entry->mne_nid_count == 0) /* at least one nid entry */
+                        break;
+                if (entry->mne_nid_size != sizeof(lnet_nid_t))
+                        break;
+
+                entry_len += entry->mne_nid_count * entry->mne_nid_size;
+                if (datalen < entry_len) /* must have entry_len at least */
+                        break;
+
+                lustre_swab_mgs_nidtbl_entry(entry);
+                LASSERT(entry->mne_length <= CFS_PAGE_SIZE);
+                if (entry->mne_length < entry_len)
+                        break;
+
+                off     += entry->mne_length;
+                datalen -= entry->mne_length;
+                if (datalen < 0)
+                        break;
+
+                if (entry->mne_version > max_version) {
+                        CERROR("entry index(%lld) is over max_index(%lld)\n",
+                               entry->mne_version, max_version);
+                        break;
+                }
+
+                if (prev_version >= entry->mne_version) {
+                        CERROR("index unsorted, prev %lld, now %lld\n",
+                               prev_version, entry->mne_version);
+                        break;
+                }
+                prev_version = entry->mne_version;
+
+                /*
+                 * Write a string with format "nid::instance" to
+                 * lustre/<osc|mdc>/<target>-<osc|mdc>-<instance>/import.
+                 */
+
+                is_ost = entry->mne_type == LDD_F_SV_TYPE_OST;
+                memset(buf, 0, bufsz);
+                obdname = buf;
+                pos = 0;
+
+                /* lustre-OST0001-osc-<instance #> */
+                strcpy(obdname, cld->cld_logname);
+                cname = strrchr(obdname, '-');
+                if (cname == NULL) {
+                        CERROR("mgc: invalid logname %s\n", obdname);
+                        break;
+                }
+
+                pos = cname - obdname;
+                obdname[pos] = 0;
+                pos += sprintf(obdname + pos, "-%s%04x",
+                                  is_ost ? "OST" : "MDT", entry->mne_index);
+
+                cname = is_ost ? "osc" : "mdc",
+                pos += sprintf(obdname + pos, "-%s-%s", cname, inst);
+                lustre_cfg_bufs_reset(&bufs, obdname);
+
+                /* find the obd by obdname */
+                obd = class_name2obd(obdname);
+                if (obd == NULL) {
+                        CDEBUG(D_INFO, "mgc: cannot find obdname %s\n",
+                               obdname);
+
+                        /* this is a safe race, when the ost is starting up...*/
+                        continue;
+                }
+
+                /* osc.import = "connection=<Conn UUID>::<target instance>" */
+                ++pos;
+                params = buf + pos;
+                pos += sprintf(params, "%s.import=%s", cname, "connection=");
+                uuid = buf + pos;
+
+                /* TODO: iterate all nids to find one */
+                /* find uuid by nid */
+                rc = client_import_find_conn(obd->u.cli.cl_import,
+                                             entry->u.nids[0],
+                                             (struct obd_uuid *)uuid);
+                if (rc < 0) {
+                        CERROR("mgc: cannot find uuid by nid %s\n",
+                               libcfs_nid2str(entry->u.nids[0]));
+                        break;
+                }
+
+                CDEBUG(D_INFO, "Find uuid %s by nid %s\n",
+                       uuid, libcfs_nid2str(entry->u.nids[0]));
+
+                pos += strlen(uuid);
+                pos += sprintf(buf + pos, "::%u", entry->mne_instance);
+                LASSERT(pos < bufsz);
+
+                lustre_cfg_bufs_set_string(&bufs, 1, params);
+
+                rc = -ENOMEM;
+                lcfg = lustre_cfg_new(LCFG_PARAM, &bufs);
+                if (lcfg == NULL) {
+                        CERROR("mgc: cannot allocate memory\n");
+                        break;
+                }
+
+                CDEBUG(D_INFO, "ir apply logs "LPD64"/"LPD64" for %s -> %s\n",
+                       prev_version, max_version, obdname, params);
+
+                rc = class_process_config(lcfg);
+                lustre_cfg_free(lcfg);
+                if (rc)
+                        CDEBUG(D_INFO, "process config for %s error %d\n",
+                               obdname, rc);
+
+                /* continue, even one with error */
+        }
+
+        OBD_FREE(inst, CFS_PAGE_SIZE);
+        return rc;
+}
+
+/**
+ * This function is called if this client was notified for target restarting
+ * by the MGS. A CONFIG_READ RPC is going to send to fetch recovery logs.
+ */
+static int mgc_process_recover_log(struct obd_device *obd,
+                                   struct config_llog_data *cld)
+{
+        struct ptlrpc_request *req = NULL;
+        struct config_llog_instance *cfg = &cld->cld_cfg;
+        struct mgs_config_body *body;
+        struct mgs_config_res  *res;
+        struct ptlrpc_bulk_desc *desc;
+        cfs_page_t **pages;
+        int nrpages;
+        bool eof = true;
+        int i;
+        int ealen;
+        int rc;
+        ENTRY;
+
+        /* allocate buffer for bulk transfer.
+         * if this is the first time for this mgs to read logs,
+         * CONFIG_READ_NRPAGES_INIT will be used since it will read all logs
+         * once; otherwise, it only reads increment of logs, this should be
+         * small and CONFIG_READ_NRPAGES will be used.
+         */
+        nrpages = CONFIG_READ_NRPAGES;
+        if (cfg->cfg_last_idx == 0) /* the first time */
+                nrpages = CONFIG_READ_NRPAGES_INIT;
+
+        OBD_ALLOC(pages, sizeof(*pages) * nrpages);
+        if (pages == NULL)
+                GOTO(out, rc = -ENOMEM);
+
+        for (i = 0; i < nrpages; i++) {
+                pages[i] = cfs_alloc_page(CFS_ALLOC_STD);
+                if (pages[i] == NULL)
+                        GOTO(out, rc = -ENOMEM);
+        }
+
+again:
+        LASSERT(cld_is_recover(cld));
+        LASSERT(cfs_mutex_is_locked(&cld->cld_lock));
+        req = ptlrpc_request_alloc(class_exp2cliimp(cld->cld_mgcexp),
+                                   &RQF_MGS_CONFIG_READ);
+        if (req == NULL)
+                GOTO(out, rc = -ENOMEM);
+
+        rc = ptlrpc_request_pack(req, LUSTRE_MGS_VERSION, MGS_CONFIG_READ);
+        if (rc)
+                GOTO(out, rc);
+
+        /* pack request */
+        body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
+        LASSERT(body != NULL);
+        LASSERT(sizeof(body->mcb_name) > strlen(cld->cld_logname));
+        strncpy(body->mcb_name, cld->cld_logname, sizeof(body->mcb_name));
+        body->mcb_offset = cfg->cfg_last_idx + 1;
+        body->mcb_type   = cld->cld_type;
+        body->mcb_bits   = CFS_PAGE_SHIFT;
+        body->mcb_units  = nrpages;
+
+        /* allocate bulk transfer descriptor */
+        desc = ptlrpc_prep_bulk_imp(req, nrpages, BULK_PUT_SINK,
+                                    MGS_BULK_PORTAL);
+        if (desc == NULL)
+                GOTO(out, rc = -ENOMEM);
+
+        for (i = 0; i < nrpages; i++)
+                ptlrpc_prep_bulk_page(desc, pages[i], 0, CFS_PAGE_SIZE);
+
+        ptlrpc_request_set_replen(req);
+        rc = ptlrpc_queue_wait(req);
+        if (rc)
+                GOTO(out, rc);
+
+        res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
+        if (res->mcr_size < res->mcr_offset)
+                GOTO(out, rc = -EINVAL);
+
+        /* always update the index even though it might have errors with
+         * handling the recover logs */
+        cfg->cfg_last_idx = res->mcr_offset;
+        eof = res->mcr_offset == res->mcr_size;
+
+        CDEBUG(D_INFO, "Latest version "LPD64", more %d.\n",
+               res->mcr_offset, eof == false);
+
+        ealen = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk, 0);
+        if (ealen < 0)
+                GOTO(out, rc = ealen);
+
+        if (ealen > nrpages << CFS_PAGE_SHIFT)
+                GOTO(out, rc = -EINVAL);
+
+        if (ealen == 0) { /* no logs transferred */
+                if (!eof)
+                        rc = -EINVAL;
+                GOTO(out, rc);
+        }
+
+        for (i = 0; i < nrpages && ealen > 0; i++) {
+                int rc2;
+                void *ptr;
+
+                ptr = cfs_kmap(pages[i]);
+                rc2 = mgc_apply_recover_logs(obd, cld, res->mcr_offset, ptr,
+                                             min_t(int, ealen, CFS_PAGE_SIZE));
+                cfs_kunmap(pages[i]);
+                if (rc2 < 0) {
+                        CWARN("Process recover log %s error %d\n",
+                              cld->cld_logname, rc2);
+                        break;
+                }
+
+                ealen -= CFS_PAGE_SIZE;
+        }
+
+out:
+        if (req)
+                ptlrpc_req_finished(req);
+
+        if (rc == 0 && !eof)
+                goto again;
+
+        if (pages) {
+                for (i = 0; i < nrpages; i++) {
+                        if (pages[i] == NULL)
+                                break;
+                        cfs_free_page(pages[i]);
+                }
+                OBD_FREE(pages, sizeof(*pages) * nrpages);
+        }
+        return rc;
+}
+
 /* identical to mgs_log_is_empty */
 static int mgc_llog_is_empty(struct obd_device *obd, struct llog_ctxt *ctxt,
                             char *name)
@@ -1394,7 +1784,14 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld)
                 config_log_get(cld);
         }
 
-        rc = mgc_process_cfg_log(mgc, cld, rcl != 0);
+
+        if (cld_is_recover(cld)) {
+                rc = 0; /* this is not a fatal error for recover log */
+                if (rcl == 0)
+                        rc = mgc_process_recover_log(mgc, cld);
+        } else {
+                rc = mgc_process_cfg_log(mgc, cld, rcl != 0);
+        }
 
         CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n",
                mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc);
@@ -1476,6 +1873,11 @@ static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf)
                 cld->cld_cfg.cfg_flags |= CFG_F_COMPAT146;
 
                 rc = mgc_process_log(obd, cld);
+                if (rc == 0 && cld->cld_recover) {
+                        rc = mgc_process_log(obd, cld->cld_recover);
+                        if (rc)
+                                CERROR("Cannot process recover llog %d\n", rc);
+                }
                 config_log_put(cld);
 
                 break;
@@ -1512,6 +1914,7 @@ struct obd_ops mgc_obd_ops = {
         .o_cancel       = mgc_cancel,
         //.o_iocontrol    = mgc_iocontrol,
         .o_set_info_async = mgc_set_info_async,
+        .o_get_info       = mgc_get_info,
         .o_import_event = mgc_import_event,
         .o_llog_init    = mgc_llog_init,
         .o_llog_finish  = mgc_llog_finish,
index 413f381..d1cafa0 100644 (file)
@@ -1,5 +1,5 @@
 MODULES := mgs
-mgs-objs := mgs_handler.o mgs_fs.o mgs_llog.o lproc_mgs.o
+mgs-objs := mgs_handler.o mgs_fs.o mgs_llog.o lproc_mgs.o mgs_nids.o
 
 EXTRA_DIST := $(mgs-objs:%.o=%.c) mgs_internal.h
 
index a16449f..b424d2b 100644 (file)
@@ -217,11 +217,27 @@ static int mgs_live_seq_show(struct seq_file *seq, void *v)
         }
         seq_show_srpc_rules(seq, fsdb->fsdb_name, &fsdb->fsdb_srpc_gen);
 
+        seq_printf(seq, "\nImperative Recovery Status:\n");
+
+        lprocfs_rd_ir_state(seq, fsdb);
+
         cfs_up(&fsdb->fsdb_sem);
         return 0;
 }
 
-LPROC_SEQ_FOPS_RO(mgs_live);
+static ssize_t mgs_live_seq_write(struct file *file, const char *buf,
+                                  size_t len, loff_t *off)
+{
+        struct seq_file *seq  = file->private_data;
+        struct fs_db    *fsdb = seq->private;
+        ssize_t rc;
+
+        rc = lprocfs_wr_ir_state(file, buf, len, fsdb);
+        if (rc >= 0)
+                rc = len;
+        return rc;
+}
+LPROC_SEQ_FOPS(mgs_live);
 
 int lproc_mgs_add_live(struct obd_device *obd, struct fs_db *fsdb)
 {
index 13b73e0..0efb7b2 100644 (file)
@@ -183,6 +183,18 @@ int mgs_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
         }
         mgs->mgs_configs_dir = dentry;
 
+        /* create directory to store nid table versions */
+        dentry = simple_mkdir(cfs_fs_pwd(current->fs), mnt, MGS_NIDTBL_DIR,
+                              0777, 1);
+        if (IS_ERR(dentry)) {
+                rc = PTR_ERR(dentry);
+                CERROR("cannot create %s directory: rc = %d\n",
+                       MOUNT_CONFIGS_DIR, rc);
+                GOTO(err_pop, rc);
+        } else {
+                dput(dentry);
+        }
+
 err_pop:
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         return rc;
index 4829c1f..45c74b2 100644 (file)
@@ -196,6 +196,9 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                 GOTO(err_ops, rc = -EROFS);
         }
 
+        obd->u.obt.obt_magic = OBT_MAGIC;
+        obd->u.obt.obt_instance = 0;
+
         /* namespace for mgs llog */
         obd->obd_namespace = ldlm_namespace_new(obd ,"MGS",
                                                 LDLM_NAMESPACE_SERVER,
@@ -225,6 +228,7 @@ static int mgs_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         /* Internal mgs setup */
         mgs_init_fsdb_list(obd);
         cfs_sema_init(&mgs->mgs_sem, 1);
+        mgs->mgs_start_time = cfs_time_current_sec();
 
         /* Setup proc */
         lprocfs_mgs_init_vars(&lvars);
@@ -325,50 +329,55 @@ static int mgs_cleanup(struct obd_device *obd)
 }
 
 /* similar to filter_prepare_destroy */
-static int mgs_get_cfg_lock(struct obd_device *obd, char *fsname,
-                            struct lustre_handle *lockh)
+int mgs_get_lock(struct obd_device *obd, struct ldlm_res_id *res,
+                 struct lustre_handle *lockh)
 {
-        struct ldlm_res_id res_id;
         int rc, flags = 0;
         ENTRY;
 
-        rc = mgc_fsname2resid(fsname, &res_id, CONFIG_T_CONFIG);
-        if (!rc)
-                rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id,
-                                            LDLM_PLAIN, NULL, LCK_EX,
-                                            &flags, ldlm_blocking_ast,
-                                            ldlm_completion_ast, NULL,
-                                            fsname, 0, NULL, lockh);
+        rc = ldlm_cli_enqueue_local(obd->obd_namespace, res,
+                                    LDLM_PLAIN, NULL, LCK_EX,
+                                    &flags, ldlm_blocking_ast,
+                                    ldlm_completion_ast, NULL,
+                                    NULL, 0, NULL, lockh);
         if (rc)
-                CERROR("can't take cfg lock for %s (%d)\n", fsname, rc);
+                CERROR("can't take cfg lock for "LPX64"/"LPX64"(%d)\n",
+                       le64_to_cpu(res->name[0]), le64_to_cpu(res->name[1]),
+                       rc);
 
         RETURN(rc);
 }
 
-static int mgs_put_cfg_lock(struct lustre_handle *lockh)
+int mgs_put_lock(struct lustre_handle *lockh)
 {
         ENTRY;
-        ldlm_lock_decref(lockh, LCK_EX);
+        ldlm_lock_decref_and_cancel(lockh, LCK_EX);
         RETURN(0);
 }
 
 void mgs_revoke_lock(struct obd_device *obd, struct fs_db *fsdb)
 {
         struct lustre_handle lockh;
+        struct ldlm_res_id   res_id;
         int                  lockrc;
+        int                  bit;
+        int                  rc;
 
         LASSERT(fsdb->fsdb_name[0] != '\0');
+        rc = mgc_fsname2resid(fsdb->fsdb_name, &res_id, CONFIG_T_CONFIG);
+        LASSERT(rc == 0);
 
-        if (cfs_test_and_set_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags) == 0) {
-                lockrc = mgs_get_cfg_lock(obd, fsdb->fsdb_name, &lockh);
+        bit = FSDB_REVOKING_LOCK;
+        if (!rc && cfs_test_and_set_bit(bit, &fsdb->fsdb_flags) == 0) {
+                lockrc = mgs_get_lock(obd, &res_id, &lockh);
                 /* clear the bit before lock put */
-                cfs_clear_bit(FSDB_REVOKING_LOCK, &fsdb->fsdb_flags);
+                cfs_clear_bit(bit, &fsdb->fsdb_flags);
 
                 if (lockrc != ELDLM_OK)
                         CERROR("lock error %d for fs %s\n",
                                lockrc, fsdb->fsdb_name);
                 else
-                        mgs_put_cfg_lock(&lockh);
+                        mgs_put_lock(&lockh);
         }
 }
 
@@ -433,6 +442,7 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req)
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mgs_target_info *mti, *rep_mti;
         struct fs_db *fsdb;
+        int opc;
         int rc = 0;
         ENTRY;
 
@@ -440,6 +450,26 @@ static int mgs_handle_target_reg(struct ptlrpc_request *req)
 
         mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO);
 
+        opc = mti->mti_flags & LDD_F_OPC_MASK;
+        if (opc == LDD_F_OPC_READY) {
+                CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
+                       mti->mti_fsname, mti->mti_stripe_index);
+                rc = mgs_ir_update(obd, mti);
+                if (rc) {
+                        LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
+                        CERROR("Update IR return with %d(ignore and IR "
+                               "disabled)\n", rc);
+                }
+                GOTO(out_nolock, rc);
+        }
+
+        /* Do not support unregistering right now. */
+        if (opc != LDD_F_OPC_REG)
+                GOTO(out_nolock, rc = -EINVAL);
+
+        CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
+               mti->mti_fsname, mti->mti_stripe_index);
+
         if (mti->mti_flags & LDD_F_NEED_INDEX)
                 mti->mti_flags |= LDD_F_WRITECONF;
 
@@ -534,6 +564,11 @@ out_nolock:
         CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
                mti->mti_stripe_index, rc);
         req->rq_status = rc;
+        if (rc)
+                /* we need an error flag to tell the target what's going on,
+                 * instead of just doing it by error code only. */
+                mti->mti_flags |= LDD_F_ERROR;
+
         rc = req_capsule_server_pack(&req->rq_pill);
         if (rc)
                 RETURN(rc);
@@ -581,6 +616,33 @@ static int mgs_set_info_rpc(struct ptlrpc_request *req)
         RETURN(rc);
 }
 
+static int mgs_config_read(struct ptlrpc_request *req)
+{
+        struct mgs_config_body *body;
+        int rc;
+        ENTRY;
+
+        body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
+        if (body == NULL)
+                RETURN(-EINVAL);
+
+        switch (body->mcb_type) {
+        case CONFIG_T_RECOVER:
+                rc = mgs_get_ir_logs(req);
+                break;
+
+        case CONFIG_T_CONFIG:
+                rc = -ENOTSUPP;
+                break;
+
+        default:
+                rc = -EINVAL;
+                break;
+        }
+
+        RETURN(rc);
+}
+
 /*
  * similar as in ost_connect_check_sptlrpc()
  */
@@ -721,7 +783,11 @@ int mgs_handle(struct ptlrpc_request *req)
                 req_capsule_set(&req->rq_pill, &RQF_MGS_SET_INFO);
                 rc = mgs_set_info_rpc(req);
                 break;
-
+        case MGS_CONFIG_READ:
+                DEBUG_REQ(D_MGS, req, "read config");
+                req_capsule_set(&req->rq_pill, &RQF_MGS_CONFIG_READ);
+                rc = mgs_config_read(req);
+                break;
         case LDLM_ENQUEUE:
                 DEBUG_REQ(D_MGS, req, "enqueue");
                 req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE);
index ae3c424..51ae0b1 100644 (file)
 #include <lustre_log.h>
 #include <lustre_export.h>
 
-/* mgs_llog.c */
-int class_dentry_readdir(struct obd_device *obd, struct dentry *dir,
-                         struct vfsmount *inmnt,
-                         cfs_list_t *dentry_list);
-
 #define MGSSELF_NAME    "_mgs"
 
+/* -- imperative recovery control data structures -- */
+/**
+ * restarting targets.
+ */
+struct mgs_nidtbl;
+struct mgs_nidtbl_target {
+        cfs_list_t              mnt_list;
+        struct mgs_nidtbl      *mnt_fs;
+        u64                     mnt_version;
+        int                     mnt_type; /* OST or MDT */
+        cfs_time_t              mnt_last_active;
+        struct mgs_target_info  mnt_mti;
+};
+
+enum {
+        IR_FULL = 0,
+        IR_STARTUP,
+        IR_DISABLED,
+        IR_PARTIAL
+};
+
+#define IR_STRINGS { "full", "startup", "disabled", "partial" }
+
+/**
+ */
+struct fs_db;
+
+struct mgs_nidtbl {
+        struct fs_db *mn_fsdb;
+        struct file  *mn_version_file;
+        cfs_mutex_t   mn_lock;
+        u64           mn_version;
+        int           mn_nr_targets;
+        cfs_list_t    mn_targets;
+};
+
 struct mgs_tgt_srpc_conf {
         struct mgs_tgt_srpc_conf  *mtsc_next;
         char                      *mtsc_tgt;
@@ -69,7 +100,6 @@ struct mgs_tgt_srpc_conf {
 #define FSDB_OSCNAME18          (4)  /* old 1.8 style OSC naming */
 #define FSDB_UDESC              (5)  /* sptlrpc user desc, will be obsolete */
 
-
 struct fs_db {
         char              fsdb_name[9];
         cfs_list_t        fsdb_list;           /* list of databases */
@@ -87,15 +117,38 @@ struct fs_db {
         unsigned long     fsdb_flags;
         __u32             fsdb_gen;
 
-        /* in-memory copy of the srpc rules, guarded by fsdb_sem */
+        /* in-memory copy of the srpc rules, guarded by fsdb_lock */
         struct sptlrpc_rule_set   fsdb_srpc_gen;
         struct mgs_tgt_srpc_conf *fsdb_srpc_tgt;
+
+        int                  fsdb_ir_state;
+
+        /* Target NIDs Table */
+        struct mgs_nidtbl    fsdb_nidtbl;
+
+        /* async thread to notify clients */
+        struct obd_device   *fsdb_obd;
+        cfs_waitq_t          fsdb_notify_waitq;
+        cfs_completion_t     fsdb_notify_comp;
+        cfs_atomic_t         fsdb_notify_phase;
+        volatile int         fsdb_notify_async:1,
+                             fsdb_notify_stop:1;
+        /* statistic data */
+        unsigned int         fsdb_notify_total;
+        unsigned int         fsdb_notify_max;
+        unsigned int         fsdb_notify_count;
 };
 
+/* mgs_llog.c */
+int class_dentry_readdir(struct obd_device *obd, struct dentry *dir,
+                         struct vfsmount *inmnt,
+                         cfs_list_t *dentry_list);
+
 int mgs_init_fsdb_list(struct obd_device *obd);
 int mgs_cleanup_fsdb_list(struct obd_device *obd);
-int mgs_find_or_make_fsdb(struct obd_device *obd, char *name, 
+int mgs_find_or_make_fsdb(struct obd_device *obd, char *name,
                           struct fs_db **dbh);
+struct fs_db *mgs_find_fsdb(struct obd_device *obd, char *fsname);
 int mgs_get_fsdb_srpc_from_llog(struct obd_device *obd, struct fs_db *fsdb);
 int mgs_check_index(struct obd_device *obd, struct mgs_target_info *mti);
 int mgs_check_failnid(struct obd_device *obd, struct mgs_target_info *mti);
@@ -112,6 +165,22 @@ int mgs_pool_cmd(struct obd_device *obd, enum lcfg_command_type cmd,
 
 /* mgs_handler.c */
 void mgs_revoke_lock(struct obd_device *obd, struct fs_db *fsdb);
+int  mgs_get_lock(struct obd_device *obd, struct ldlm_res_id *res,
+                  struct lustre_handle *lockh);
+int  mgs_put_lock(struct lustre_handle *lockh);
+
+/* mgs_nids.c */
+int  mgs_ir_update(struct obd_device *obd, struct mgs_target_info *mti);
+int  mgs_ir_init_fs(struct obd_device *obd, struct fs_db *fsdb);
+void mgs_ir_fini_fs(struct obd_device *obd, struct fs_db *fsdb);
+int  mgs_get_ir_logs(struct ptlrpc_request *req);
+int  lprocfs_wr_ir_state(struct file *file, const char *buffer,
+                           unsigned long count, void *data);
+int  lprocfs_rd_ir_state(struct seq_file *seq, void *data);
+int  lprocfs_wr_ir_timeout(struct file *file, const char *buffer,
+                           unsigned long count, void *data);
+int  lprocfs_rd_ir_timeout(char *page, char **start, off_t off, int count,
+                           int *eof, void *data);
 
 /* mgs_fs.c */
 int mgs_export_stats_init(struct obd_device *obd, struct obd_export *exp,
index 4c64911..ad4a8a5 100644 (file)
@@ -324,7 +324,7 @@ static void mgs_free_fsdb_srpc(struct fs_db *fsdb)
         sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
 }
 
-static struct fs_db *mgs_find_fsdb(struct obd_device *obd, char *fsname)
+struct fs_db *mgs_find_fsdb(struct obd_device *obd, char *fsname)
 {
         struct mgs_obd *mgs = &obd->u.mgs;
         struct fs_db *fsdb;
@@ -382,6 +382,9 @@ static struct fs_db *mgs_new_fsdb(struct obd_device *obd, char *fsname)
                 if (rc)
                         GOTO(err, rc);
 
+                /* initialise data for NID table */
+                mgs_ir_init_fs(obd, fsdb);
+
                 lproc_mgs_add_live(obd, fsdb);
         }
 
@@ -407,6 +410,10 @@ static void mgs_free_fsdb(struct obd_device *obd, struct fs_db *fsdb)
         cfs_down(&fsdb->fsdb_sem);
         lproc_mgs_del_live(obd, fsdb);
         cfs_list_del(&fsdb->fsdb_list);
+
+        /* deinitialize fsr */
+        mgs_ir_fini_fs(obd, fsdb);
+
         if (fsdb->fsdb_ost_index_map)
                 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
         if (fsdb->fsdb_mdt_index_map)
@@ -2894,7 +2901,7 @@ int mgs_erase_log(struct obd_device *obd, char *name)
 int mgs_erase_logs(struct obd_device *obd, char *fsname)
 {
         struct mgs_obd *mgs = &obd->u.mgs;
-        static struct fs_db *fsdb;
+        struct fs_db *fsdb;
         cfs_list_t dentry_list;
         struct l_linux_dirent *dirent, *n;
         int rc, len = strlen(fsname);
diff --git a/lustre/mgs/mgs_nids.c b/lustre/mgs/mgs_nids.c
new file mode 100644 (file)
index 0000000..359e321
--- /dev/null
@@ -0,0 +1,807 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mgs/mgs_nids.c
+ *
+ * NID table management for lustre.
+ *
+ * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
+ */
+
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MGS
+#define D_MGS D_CONFIG
+
+#ifdef __KERNEL__
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/fs.h>
+#endif
+
+#include <obd.h>
+#include <obd_lov.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <obd_ost.h>
+#include <libcfs/list.h>
+#include <linux/lvfs.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include "mgs_internal.h"
+
+static unsigned int ir_timeout;
+
+static int nidtbl_is_sane(struct mgs_nidtbl *tbl)
+{
+        struct mgs_nidtbl_target *tgt;
+        int version = 0;
+
+        LASSERT(cfs_mutex_is_locked(&tbl->mn_lock));
+        cfs_list_for_each_entry(tgt, &tbl->mn_targets, mnt_list) {
+                if (!tgt->mnt_version)
+                        continue;
+
+                if (version >= tgt->mnt_version)
+                        return 0;
+
+                version = tgt->mnt_version;
+        }
+        return 1;
+}
+
+/**
+ * Fetch nidtbl entries whose version are not less than @version
+ * nidtbl entries will be packed in @pages by @unit_size units - entries
+ * shouldn't cross unit boundaries.
+ */
+static int mgs_nidtbl_read(struct obd_device *unused, struct mgs_nidtbl *tbl,
+                           struct mgs_config_res *res, cfs_page_t **pages,
+                           int nrpages, int units_total, int unit_size)
+{
+        struct mgs_nidtbl_target *tgt;
+        struct mgs_nidtbl_entry  *entry;
+        struct mgs_nidtbl_entry  *last_in_unit = NULL;
+        struct mgs_target_info   *mti;
+        __u64 version = res->mcr_offset;
+        bool nobuf = false;
+        void *buf = NULL;
+        int bytes_in_unit = 0;
+        int units_in_page = 0;
+        int index = 0;
+        int rc = 0;
+        ENTRY;
+
+        /* make sure unit_size is power 2 */
+        LASSERT((unit_size & (unit_size - 1)) == 0);
+        LASSERT(nrpages << CFS_PAGE_SHIFT >= units_total * unit_size);
+
+        cfs_mutex_lock(&tbl->mn_lock);
+        LASSERT(nidtbl_is_sane(tbl));
+
+        /* no more entries ? */
+        if (version > tbl->mn_version) {
+                version = tbl->mn_version;
+                goto out;
+        }
+
+        /* iterate over all targets to compose a bitmap by the type of llog.
+         * If the llog is for MDTs, llog entries for OSTs will be returned;
+         * otherwise, it's for clients, then llog entries for both OSTs and
+         * MDTs will be returned.
+         */
+        cfs_list_for_each_entry(tgt, &tbl->mn_targets, mnt_list) {
+                int entry_len = sizeof(*entry);
+
+                if (tgt->mnt_version < version)
+                        continue;
+
+                /* write target recover information */
+                mti  = &tgt->mnt_mti;
+                LASSERT(mti->mti_nid_count < MTI_NIDS_MAX);
+                entry_len += mti->mti_nid_count * sizeof(lnet_nid_t);
+
+                if (entry_len > unit_size) {
+                        CWARN("nidtbl: too large entry: entry length %d,"
+                              "unit size: %d\n", entry_len, unit_size);
+                        GOTO(out, rc = -EOVERFLOW);
+                }
+
+                if (bytes_in_unit < entry_len) {
+                        if (units_total == 0) {
+                                nobuf = true;
+                                break;
+                        }
+
+                        /* check if we need to consume remaining bytes. */
+                        if (last_in_unit != NULL && bytes_in_unit) {
+                                /* entry has been swapped. */
+                                __swab32s(&last_in_unit->mne_length);
+                                last_in_unit->mne_length += bytes_in_unit;
+                                __swab32s(&last_in_unit->mne_length);
+                                rc  += bytes_in_unit;
+                                buf += bytes_in_unit;
+                                last_in_unit = NULL;
+                        }
+                        LASSERT((rc & (unit_size - 1)) == 0);
+
+                        if (units_in_page == 0) {
+                                /* allocate a new page */
+                                pages[index] = cfs_alloc_page(CFS_ALLOC_STD);
+                                if (pages[index] == NULL) {
+                                        rc = -ENOMEM;
+                                        break;
+                                }
+
+                                /* destroy previous map */
+                                if (index > 0)
+                                        cfs_kunmap(pages[index - 1]);
+
+                                /* reassign buffer */
+                                buf = cfs_kmap(pages[index]);
+                                ++index;
+
+                                units_in_page = CFS_PAGE_SIZE / unit_size;
+                                LASSERT(units_in_page > 0);
+                        }
+
+                        /* allocate an unit */
+                        LASSERT(((long)buf & (unit_size - 1)) == 0);
+                        bytes_in_unit = unit_size;
+                        --units_in_page;
+                        --units_total;
+                }
+
+                /* fill in entry. */
+                entry = (struct mgs_nidtbl_entry *)buf;
+                entry->mne_version   = tgt->mnt_version;
+                entry->mne_instance  = mti->mti_instance;
+                entry->mne_index     = mti->mti_stripe_index;
+                entry->mne_length    = entry_len;
+                entry->mne_type      = tgt->mnt_type;
+                entry->mne_nid_type  = 0;
+                entry->mne_nid_size  = sizeof(lnet_nid_t);
+                entry->mne_nid_count = mti->mti_nid_count;
+                memcpy(entry->u.nids, mti->mti_nids,
+                       mti->mti_nid_count * sizeof(lnet_nid_t));
+                lustre_swab_mgs_nidtbl_entry(entry);
+
+                version = tgt->mnt_version;
+                rc     += entry_len;
+                buf    += entry_len;
+
+                bytes_in_unit -= entry_len;
+                last_in_unit   = entry;
+
+                CDEBUG(D_MGS, "fsname %s, entry size %d, pages %d/%d/%d/%d.\n",
+                       tbl->mn_fsdb->fsdb_name, entry_len,
+                       bytes_in_unit, index, nrpages, units_total);
+        }
+        if (index > 0)
+                cfs_kunmap(pages[index - 1]);
+out:
+        LASSERT(version <= tbl->mn_version);
+        res->mcr_size = tbl->mn_version;
+        res->mcr_offset = nobuf ? version : tbl->mn_version;
+        cfs_mutex_unlock(&tbl->mn_lock);
+        LASSERT(ergo(version == 1, rc == 0)); /* get the log first time */
+
+        CDEBUG(D_MGS, "Read IR logs %s return with %d, version %llu\n",
+               tbl->mn_fsdb->fsdb_name, rc, version);
+        RETURN(rc);
+}
+
+static int nidtbl_update_version(struct obd_device *obd, struct mgs_nidtbl *tbl)
+{
+        struct lvfs_run_ctxt saved;
+        struct file         *file = NULL;
+        char                 filename[sizeof(MGS_NIDTBL_DIR) + 9];
+        u64                  version;
+        loff_t               off = 0;
+        int                  rc;
+        ENTRY;
+
+        LASSERT(cfs_mutex_is_locked(&tbl->mn_lock));
+        LASSERT(sizeof(filename) < 32);
+
+        sprintf(filename, "%s/%s",
+                MGS_NIDTBL_DIR, tbl->mn_fsdb->fsdb_name);
+
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+        file = l_filp_open(filename, O_RDWR|O_CREAT, 0660);
+        if (!IS_ERR(file)) {
+                version = cpu_to_le64(tbl->mn_version);
+                rc = lustre_fwrite(file, &version, sizeof(version), &off);
+                if (rc == sizeof(version))
+                        rc = 0;
+                filp_close(file, 0);
+                fsfilt_sync(obd, obd->u.mgs.mgs_sb);
+        } else {
+                rc = PTR_ERR(file);
+        }
+
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        RETURN(rc);
+}
+
+#define MGS_NIDTBL_VERSION_INIT 2
+
+static int nidtbl_read_version(struct obd_device *obd, struct mgs_nidtbl *tbl)
+{
+        struct lvfs_run_ctxt saved;
+        struct file         *file = NULL;
+        char                 filename[sizeof(MGS_NIDTBL_DIR) + 9];
+        u64                  version;
+        loff_t               off = 0;
+        int                  rc;
+        ENTRY;
+
+        LASSERT(cfs_mutex_is_locked(&tbl->mn_lock));
+        LASSERT(sizeof(filename) < 32);
+
+        sprintf(filename, "%s/%s",
+                MGS_NIDTBL_DIR, tbl->mn_fsdb->fsdb_name);
+
+        push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+        file = l_filp_open(filename, O_RDONLY, 0);
+        if (!IS_ERR(file)) {
+                rc = lustre_fread(file, &version, sizeof(version), &off);
+                if (rc == sizeof(version))
+                        rc = cpu_to_le64(version);
+                else if (rc == 0)
+                        rc = MGS_NIDTBL_VERSION_INIT;
+                else
+                        CERROR("read version file %s error %d\n", filename, rc);
+                filp_close(file, 0);
+        } else {
+                rc = PTR_ERR(file);
+                if (rc == -ENOENT)
+                        rc = MGS_NIDTBL_VERSION_INIT;
+        }
+
+        pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+        RETURN(rc);
+}
+
+static int mgs_nidtbl_write(struct fs_db *fsdb, struct mgs_target_info *mti)
+{
+        struct mgs_nidtbl        *tbl;
+        struct mgs_nidtbl_target *tgt;
+        bool found = false;
+        int type   = mti->mti_flags & LDD_F_SV_TYPE_MASK;
+        int rc     = 0;
+        ENTRY;
+
+        type &= ~LDD_F_SV_TYPE_MGS;
+        LASSERT(type != 0);
+
+        tbl = &fsdb->fsdb_nidtbl;
+        cfs_mutex_lock(&tbl->mn_lock);
+        cfs_list_for_each_entry(tgt, &tbl->mn_targets, mnt_list) {
+                struct mgs_target_info *info = &tgt->mnt_mti;
+                if (type == tgt->mnt_type &&
+                    mti->mti_stripe_index == info->mti_stripe_index) {
+                        found = true;
+                        break;
+                }
+        }
+        if (!found) {
+                OBD_ALLOC_PTR(tgt);
+                if (tgt == NULL)
+                        GOTO(out, rc = -ENOMEM);
+
+                CFS_INIT_LIST_HEAD(&tgt->mnt_list);
+                tgt->mnt_fs      = tbl;
+                tgt->mnt_version = 0;       /* 0 means invalid */
+                tgt->mnt_type    = type;
+
+                ++tbl->mn_nr_targets;
+        }
+
+        tgt->mnt_version = ++tbl->mn_version;
+        tgt->mnt_mti     = *mti;
+
+        cfs_list_move_tail(&tgt->mnt_list, &tbl->mn_targets);
+
+        rc = nidtbl_update_version(fsdb->fsdb_obd, tbl);
+        EXIT;
+
+out:
+        cfs_mutex_unlock(&tbl->mn_lock);
+        if (rc)
+                CERROR("Write NID table version for file system %s error %d\n",
+                       fsdb->fsdb_name, rc);
+        return rc;
+}
+
+static void mgs_nidtbl_fini_fs(struct fs_db *fsdb)
+{
+        struct mgs_nidtbl *tbl = &fsdb->fsdb_nidtbl;
+        CFS_LIST_HEAD(head);
+
+        cfs_mutex_lock(&tbl->mn_lock);
+        tbl->mn_nr_targets = 0;
+        cfs_list_splice_init(&tbl->mn_targets, &head);
+        cfs_mutex_unlock(&tbl->mn_lock);
+
+        while (!cfs_list_empty(&head)) {
+                struct mgs_nidtbl_target *tgt;
+                tgt = list_entry(head.next, struct mgs_nidtbl_target, mnt_list);
+                cfs_list_del(&tgt->mnt_list);
+                OBD_FREE_PTR(tgt);
+        }
+}
+
+static int mgs_nidtbl_init_fs(struct fs_db *fsdb)
+{
+        struct mgs_nidtbl *tbl = &fsdb->fsdb_nidtbl;
+
+        CFS_INIT_LIST_HEAD(&tbl->mn_targets);
+        cfs_mutex_init(&tbl->mn_lock);
+        tbl->mn_nr_targets = 0;
+        tbl->mn_fsdb = fsdb;
+        cfs_mutex_lock(&tbl->mn_lock);
+        tbl->mn_version = nidtbl_read_version(fsdb->fsdb_obd, tbl);
+        cfs_mutex_unlock(&tbl->mn_lock);
+        CDEBUG(D_MGS, "IR: current version is %llu\n", tbl->mn_version);
+
+        return 0;
+}
+
+/* --------- Imperative Recovery relies on nidtbl stuff ------- */
+static int mgs_ir_notify(void *arg)
+{
+        struct fs_db      *fsdb   = arg;
+        struct ldlm_res_id resid;
+
+        char name[sizeof(fsdb->fsdb_name) + 20];
+
+        LASSERTF(sizeof(name) < 32, "name is too large to be in stack.\n");
+        sprintf(name, "mgs_%s_notify", fsdb->fsdb_name);
+        cfs_daemonize(name);
+
+        cfs_complete(&fsdb->fsdb_notify_comp);
+
+        mgc_fsname2resid(fsdb->fsdb_name, &resid, CONFIG_T_RECOVER);
+        while (1) {
+                struct l_wait_info   lwi = { 0 };
+                struct lustre_handle lockh;
+                cfs_time_t           curtime;
+                int                  lockrc;
+                int                  delta;
+
+                l_wait_event(fsdb->fsdb_notify_waitq,
+                             fsdb->fsdb_notify_stop ||
+                             cfs_atomic_read(&fsdb->fsdb_notify_phase),
+                             &lwi);
+                if (fsdb->fsdb_notify_stop)
+                        break;
+
+                CDEBUG(D_MGS, "%s woken up, phase is %d\n",
+                       name, cfs_atomic_read(&fsdb->fsdb_notify_phase));
+
+                curtime = cfs_time_current();
+                lockrc = mgs_get_lock(fsdb->fsdb_obd, &resid, &lockh);
+                if (lockrc == ELDLM_OK) {
+                        cfs_atomic_set(&fsdb->fsdb_notify_phase, 0);
+                        mgs_put_lock(&lockh);
+
+                        /* do statistic */
+                        fsdb->fsdb_notify_count++;
+                        delta = (cfs_time_current() - curtime) / NSEC_PER_USEC;
+                        fsdb->fsdb_notify_total += delta;
+                        if (delta > fsdb->fsdb_notify_max)
+                                fsdb->fsdb_notify_max = delta;
+                        CDEBUG(D_MGS, "Revoke recover lock of %s spent %dus\n",
+                               fsdb->fsdb_name, delta);
+                } else {
+                        CERROR("Fatal error %d for fs %s\n",
+                               lockrc, fsdb->fsdb_name);
+                }
+        }
+
+        cfs_complete(&fsdb->fsdb_notify_comp);
+        return 0;
+}
+
+int mgs_ir_init_fs(struct obd_device *obd, struct fs_db *fsdb)
+{
+        struct mgs_obd *mgs = &obd->u.mgs;
+        int rc;
+
+        if (!ir_timeout)
+                ir_timeout = OBD_IR_MGS_TIMEOUT;
+
+        fsdb->fsdb_ir_state = IR_FULL;
+        if (cfs_time_before(cfs_time_current_sec(),
+                            mgs->mgs_start_time + ir_timeout))
+                fsdb->fsdb_ir_state = IR_STARTUP;
+
+        /* start notify thread */
+        fsdb->fsdb_obd = obd;
+        cfs_atomic_set(&fsdb->fsdb_notify_phase, 0);
+        cfs_waitq_init(&fsdb->fsdb_notify_waitq);
+        cfs_init_completion(&fsdb->fsdb_notify_comp);
+        rc = cfs_create_thread(mgs_ir_notify, fsdb, CFS_DAEMON_FLAGS);
+        if (rc > 0)
+                cfs_wait_for_completion(&fsdb->fsdb_notify_comp);
+        else
+                CERROR("Start notify thread error %d\n", rc);
+
+        mgs_nidtbl_init_fs(fsdb);
+        return 0;
+}
+
+void mgs_ir_fini_fs(struct obd_device *obd, struct fs_db *fsdb)
+{
+        if (cfs_test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags))
+                return;
+
+        mgs_nidtbl_fini_fs(fsdb);
+
+        fsdb->fsdb_notify_stop = 1;
+        cfs_waitq_signal(&fsdb->fsdb_notify_waitq);
+        cfs_wait_for_completion(&fsdb->fsdb_notify_comp);
+}
+
+/* caller must have held fsdb_sem */
+static inline void ir_state_graduate(struct fs_db *fsdb)
+{
+        struct mgs_obd *mgs = &fsdb->fsdb_obd->u.mgs;
+
+        if (fsdb->fsdb_ir_state == IR_STARTUP) {
+                if (cfs_time_before(mgs->mgs_start_time + ir_timeout,
+                                    cfs_time_current_sec())) {
+                        fsdb->fsdb_ir_state = IR_FULL;
+                }
+        }
+}
+
+int mgs_ir_update(struct obd_device *obd, struct mgs_target_info *mti)
+{
+        struct fs_db *fsdb;
+        bool notify = true;
+        int rc;
+
+        if (mti->mti_instance == 0)
+                return -EINVAL;
+
+        rc = mgs_find_or_make_fsdb(obd, mti->mti_fsname, &fsdb);
+        if (rc)
+                return rc;
+
+        rc = mgs_nidtbl_write(fsdb, mti);
+        if (rc)
+                return rc;
+
+        /* check ir state */
+        cfs_down(&fsdb->fsdb_sem);
+        ir_state_graduate(fsdb);
+        switch (fsdb->fsdb_ir_state) {
+        case IR_FULL:
+                mti->mti_flags |= LDD_F_IR_CAPABLE;
+                break;
+        case IR_DISABLED:
+                notify = false;
+        case IR_STARTUP:
+        case IR_PARTIAL:
+                break;
+        default:
+                LBUG();
+        }
+        cfs_up(&fsdb->fsdb_sem);
+
+        LASSERT(ergo(mti->mti_flags & LDD_F_IR_CAPABLE, notify));
+        if (notify) {
+                CDEBUG(D_MGS, "Try to revoke recover lock of %s\n",
+                       fsdb->fsdb_name);
+                cfs_atomic_inc(&fsdb->fsdb_notify_phase);
+                cfs_waitq_signal(&fsdb->fsdb_notify_waitq);
+        }
+        return 0;
+}
+
+/* NID table can be cached by two entities: Clients and MDTs */
+enum {
+        IR_CLIENT  = 1,
+        IR_MDT     = 2
+};
+
+static int delogname(char *logname, char *fsname, int *typ)
+{
+        char *ptr;
+        int   type;
+        int   len;
+
+        ptr = strrchr(logname, '-');
+        if (ptr == NULL)
+                return -EINVAL;
+
+        /* decouple file system name. The llog name may be:
+         * - "prefix-fsname", prefix is "cliir" or "mdtir"
+         */
+        if (strncmp(ptr, "-mdtir", 6) == 0)
+                type = IR_MDT;
+        else if (strncmp(ptr, "-cliir", 6) == 0)
+                type = IR_CLIENT;
+        else
+                return -EINVAL;
+
+        len = ptr - logname;
+        if (len == 0)
+                return -EINVAL;
+
+        memcpy(fsname, logname, len);
+        fsname[len] = 0;
+        if (typ)
+                *typ = type;
+        return 0;
+}
+
+int mgs_get_ir_logs(struct ptlrpc_request *req)
+{
+        struct obd_device *obd = req->rq_export->exp_obd;
+        struct fs_db      *fsdb;
+        struct mgs_config_body  *body;
+        struct mgs_config_res   *res;
+        struct ptlrpc_bulk_desc *desc;
+        struct l_wait_info lwi;
+        char               fsname[16];
+        long               bufsize;
+        int                unit_size;
+
+        int                type;
+        int                rc = 0;
+        int                i;
+        int                bytes;
+        int                page_count;
+        int                nrpages;
+        cfs_page_t       **pages = NULL;
+        ENTRY;
+
+        body = req_capsule_client_get(&req->rq_pill, &RMF_MGS_CONFIG_BODY);
+        if (body == NULL)
+                RETURN(-EINVAL);
+
+        if (body->mcb_type != CONFIG_T_RECOVER)
+                RETURN(-EINVAL);
+
+        rc = delogname(body->mcb_name, fsname, &type);
+        if (rc)
+                RETURN(rc);
+
+        rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb);
+        if (rc)
+                GOTO(out, rc);
+
+        bufsize = body->mcb_units << body->mcb_bits;
+        nrpages = (bufsize + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
+        if (nrpages > PTLRPC_MAX_BRW_PAGES)
+                RETURN(-EINVAL);
+
+        CDEBUG(D_MGS, "Reading IR log %s bufsize %ld.\n",
+               body->mcb_name, bufsize);
+
+        OBD_ALLOC(pages, sizeof(*pages) * nrpages);
+        if (pages == NULL)
+                RETURN(-ENOMEM);
+
+        rc = req_capsule_server_pack(&req->rq_pill);
+        if (rc)
+                GOTO(out, rc);
+
+        res = req_capsule_server_get(&req->rq_pill, &RMF_MGS_CONFIG_RES);
+        if (res == NULL)
+                GOTO(out, rc = -EINVAL);
+
+        res->mcr_offset = body->mcb_offset;
+        unit_size = min_t(int, 1 << body->mcb_bits, CFS_PAGE_SIZE);
+        bytes = mgs_nidtbl_read(obd, &fsdb->fsdb_nidtbl, res, pages, nrpages,
+                                bufsize / unit_size, unit_size);
+        if (bytes < 0)
+                GOTO(out, rc = bytes);
+
+        /* start bulk transfer */
+        page_count = (bytes + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT;
+        LASSERT(page_count <= nrpages);
+        desc = ptlrpc_prep_bulk_exp(req, page_count,
+                                    BULK_PUT_SOURCE, MGS_BULK_PORTAL);
+        if (desc == NULL)
+                GOTO(out, rc = -ENOMEM);
+
+        for (i = 0; i < page_count && bytes > 0; i++) {
+                ptlrpc_prep_bulk_page(desc, pages[i], 0,
+                                min_t(int, bytes, CFS_PAGE_SIZE));
+                bytes -= CFS_PAGE_SIZE;
+        }
+
+        rc = target_bulk_io(req->rq_export, desc, &lwi);
+        ptlrpc_free_bulk(desc);
+
+out:
+        if (pages) {
+                for (i = 0; i < nrpages; i++) {
+                        if (pages[i] == NULL)
+                                break;
+                        cfs_free_page(pages[i]);
+                }
+                OBD_FREE(pages, sizeof(*pages) * nrpages);
+        }
+        return rc;
+}
+
+static int lprocfs_ir_set_state(struct fs_db *fsdb, const char *buf)
+{
+        const char *strings[] = IR_STRINGS;
+        int         state = -1;
+        int         i;
+
+        for (i = 0; i < ARRAY_SIZE(strings); i++) {
+                if (strcmp(strings[i], buf) == 0) {
+                        state = i;
+                        break;
+                }
+        }
+        if (state < 0)
+                return -EINVAL;
+
+        CDEBUG(D_MGS, "change fsr state of %s from %s to %s\n",
+               fsdb->fsdb_name, strings[fsdb->fsdb_ir_state], strings[state]);
+        cfs_down(&fsdb->fsdb_sem);
+        fsdb->fsdb_ir_state = state;
+        cfs_up(&fsdb->fsdb_sem);
+
+        return 0;
+}
+
+static int lprocfs_ir_set_timeout(struct fs_db *fsdb, const char *buf)
+{
+        return -EINVAL;
+}
+
+static int lprocfs_ir_clear_stats(struct fs_db *fsdb, const char *buf)
+{
+        if (*buf)
+                return -EINVAL;
+
+        fsdb->fsdb_notify_total = 0;
+        fsdb->fsdb_notify_max   = 0;
+        fsdb->fsdb_notify_count = 0;
+        return 0;
+}
+
+static struct lproc_ir_cmd {
+        char *name;
+        int   namelen;
+        int (*handler)(struct fs_db *, const char *);
+} ir_cmds[] = {
+        { "state=",   6, lprocfs_ir_set_state },
+        { "timeout=", 8, lprocfs_ir_set_timeout },
+        { "0",        1, lprocfs_ir_clear_stats }
+};
+
+int lprocfs_wr_ir_state(struct file *file, const char *buffer,
+                         unsigned long count, void *data)
+{
+        struct fs_db *fsdb = data;
+        char *kbuf;
+        char *ptr;
+        int rc = 0;
+
+        if (count > CFS_PAGE_SIZE)
+                return -EINVAL;
+
+        OBD_ALLOC(kbuf, count + 1);
+        if (kbuf == NULL)
+                return -ENOMEM;
+
+        if (copy_from_user(kbuf, buffer, count)) {
+                OBD_FREE(kbuf, count);
+                return -EFAULT;
+        }
+
+        kbuf[count] = 0; /* buffer is supposed to end with 0 */
+        if (kbuf[count - 1] == '\n')
+                kbuf[count - 1] = 0;
+        ptr = kbuf;
+
+        /* fsname=<file system name> must be the 1st entry */
+        while (ptr != NULL) {
+                char *tmpptr;
+                int i;
+
+                tmpptr = strchr(ptr, ';');
+                if (tmpptr)
+                        *tmpptr++ = 0;
+
+                rc = -EINVAL;
+                for (i = 0; i < ARRAY_SIZE(ir_cmds); i++) {
+                        struct lproc_ir_cmd *cmd;
+                        int cmdlen;
+
+                        cmd    = &ir_cmds[i];
+                        cmdlen = cmd->namelen;
+                        if (strncmp(cmd->name, ptr, cmdlen) == 0) {
+                                ptr += cmdlen;
+                                rc = cmd->handler(fsdb, ptr);
+                                break;
+                        }
+                }
+                if (rc)
+                        break;
+
+                ptr = tmpptr;
+        }
+        if (rc)
+                CERROR("Unable to process command: %s(%d)\n", ptr, rc);
+        OBD_FREE(kbuf, count + 1);
+        return rc ?: count;
+}
+
+int lprocfs_rd_ir_state(struct seq_file *seq, void *data)
+{
+        struct fs_db      *fsdb = data;
+        struct mgs_nidtbl *tbl  = &fsdb->fsdb_nidtbl;
+        const char        *ir_strings[] = IR_STRINGS;
+
+        /* mgs_live_seq_show() already holds fsdb_sem. */
+        ir_state_graduate(fsdb);
+
+        seq_printf(seq,
+                   "\tstate: %s, nidtbl version: %lld\n",
+                   ir_strings[fsdb->fsdb_ir_state], tbl->mn_version);
+        seq_printf(seq, "\tnotify total/max/count: %u/%u/%u\n",
+                   fsdb->fsdb_notify_total, fsdb->fsdb_notify_max,
+                   fsdb->fsdb_notify_count);
+        return 0;
+}
+
+int lprocfs_rd_ir_timeout(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
+{
+        *eof = 1;
+        return snprintf(page, count, "%d\n", ir_timeout);
+}
+
+int lprocfs_wr_ir_timeout(struct file *file, const char *buffer,
+                          unsigned long count, void *data)
+{
+        return lprocfs_wr_uint(file, buffer, count, &ir_timeout);
+}
+
index eed7097..7bdd31b 100644 (file)
@@ -810,6 +810,7 @@ static const char *obd_connect_names[] = {
         "layout_lock",
         "64bithash",
         "object_max_bytes",
+        "imp_recov",
         NULL
 };
 
@@ -850,10 +851,12 @@ int lprocfs_rd_import(char *page, char **start, off_t off, int count,
                      "    name: %s\n"
                      "    target: %s\n"
                      "    state: %s\n"
+                     "    instance: %u\n"
                      "    connect_flags: [",
                      obd->obd_name,
                      obd2cli_tgt(obd),
-                     ptlrpc_import_state_name(imp->imp_state));
+                     ptlrpc_import_state_name(imp->imp_state),
+                     imp->imp_connect_data.ocd_instance);
         i += obd_connect_flags2str(page + i, count - i,
                                    imp->imp_connect_data.ocd_connect_flags,
                                    ", ");
@@ -2267,6 +2270,9 @@ int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
                 if (lprocfs_obd_snprintf(&page, size, &len, "VBR: %s\n",
                                          obd->obd_version_recov ? "ON" : "OFF")<=0)
                         goto out;
+                if (lprocfs_obd_snprintf(&page, size, &len, "IR: %s\n",
+                                         obd->obd_no_ir ? "OFF" : "ON") <= 0)
+                        goto out;
                 goto fclose;
         }
 
@@ -2318,6 +2324,36 @@ out:
 }
 EXPORT_SYMBOL(lprocfs_obd_rd_recovery_status);
 
+int lprocfs_obd_rd_ir_factor(char *page, char **start, off_t off,
+                             int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%d\n",
+                        obd->obd_recovery_ir_factor);
+}
+EXPORT_SYMBOL(lprocfs_obd_rd_ir_factor);
+
+int lprocfs_obd_wr_ir_factor(struct file *file, const char *buffer,
+                             unsigned long count, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        int val, rc;
+        LASSERT(obd != NULL);
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val < OBD_IR_FACTOR_MIN || val > OBD_IR_FACTOR_MAX)
+                return -EINVAL;
+
+        obd->obd_recovery_ir_factor = val;
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_obd_wr_ir_factor);
+
 int lprocfs_obd_rd_recovery_time_soft(char *page, char **start, off_t off,
                                       int count, int *eof, void *data)
 {
@@ -2351,7 +2387,7 @@ int lprocfs_obd_rd_recovery_time_hard(char *page, char **start, off_t off,
         struct obd_device *obd = data;
         LASSERT(obd != NULL);
 
-        return snprintf(page, count, "%lu\n", obd->obd_recovery_time_hard);
+        return snprintf(page, count, "%u\n", obd->obd_recovery_time_hard);
 }
 EXPORT_SYMBOL(lprocfs_obd_rd_recovery_time_hard);
 
@@ -2424,6 +2460,19 @@ int lprocfs_obd_wr_max_pages_per_rpc(struct file *file, const char *buffer,
 }
 EXPORT_SYMBOL(lprocfs_obd_wr_max_pages_per_rpc);
 
+int lprocfs_target_rd_instance(char *page, char **start, off_t off,
+                               int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        struct obd_device_target *target = &obd->u.obt;
+
+        LASSERT(obd != NULL);
+        LASSERT(target->obt_magic == OBT_MAGIC);
+        *eof = 1;
+        return snprintf(page, count, "%u\n", obd->u.obt.obt_instance);
+}
+EXPORT_SYMBOL(lprocfs_target_rd_instance);
+
 EXPORT_SYMBOL(lprocfs_register);
 EXPORT_SYMBOL(lprocfs_srch);
 EXPORT_SYMBOL(lprocfs_remove);
index 656281c..15f50c4 100644 (file)
@@ -598,7 +598,7 @@ static int lustre_start_mgc(struct super_block *sb)
         struct obd_uuid *uuid;
         class_uuid_t uuidc;
         lnet_nid_t nid;
-        char *mgcname, *niduuid, *mgssec;
+        char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
         char *ptr;
         int recov_bk;
         int rc = 0, i = 0, j, len;
@@ -634,6 +634,8 @@ static int lustre_start_mgc(struct super_block *sb)
                 RETURN(-EINVAL);
         }
 
+        cfs_mutex_down(&mgc_start_lock);
+
         len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
         OBD_ALLOC(mgcname, len);
         OBD_ALLOC(niduuid, len + 2);
@@ -643,7 +645,9 @@ static int lustre_start_mgc(struct super_block *sb)
 
         mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
 
-        cfs_mutex_down(&mgc_start_lock);
+        OBD_ALLOC_PTR(data);
+        if (data == NULL)
+                GOTO(out_free, rc = -ENOMEM);
 
         obd = class_name2obd(mgcname);
         if (obd && !obd->obd_stopping) {
@@ -656,6 +660,32 @@ static int lustre_start_mgc(struct super_block *sb)
                 /* Re-using an existing MGC */
                 cfs_atomic_inc(&obd->u.cli.cl_mgc_refcount);
 
+                /* IR compatibility check, only for clients */
+                if (lmd_is_client(lsi->lsi_lmd)) {
+                        int has_ir;
+                        int vallen = sizeof(*data);
+                        __u32 *flags = &lsi->lsi_lmd->lmd_flags;
+
+                        rc = obd_get_info(obd->obd_self_export,
+                                          strlen(KEY_CONN_DATA), KEY_CONN_DATA,
+                                          &vallen, data, NULL);
+                        LASSERT(rc == 0);
+                        has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
+                        if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
+                                /* LMD_FLG_NOIR is for test purpose only */
+                                LCONSOLE_WARN(
+                                    "Trying to mount a client with IR setting "
+                                    "not compatible with current mgc. "
+                                    "Force to use current mgc setting that is "
+                                    "IR %s.\n",
+                                    has_ir ? "enabled" : "disabled");
+                                if (has_ir)
+                                        *flags &= ~LMD_FLG_NOIR;
+                                else
+                                        *flags |= LMD_FLG_NOIR;
+                        }
+                }
+
                 recov_bk = 0;
                 /* If we are restarting the MGS, don't try to keep the MGC's
                    old connection, or registration will fail. */
@@ -785,14 +815,14 @@ static int lustre_start_mgc(struct super_block *sb)
                 /* nonfatal */
                 CWARN("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
         /* We connect to the MGS at setup, and don't disconnect until cleanup */
-        OBD_ALLOC_PTR(data);
-        if (data == NULL)
-                GOTO(out, rc = -ENOMEM);
         data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_FID |
-                                  OBD_CONNECT_AT | OBD_CONNECT_FULL20;
+                                  OBD_CONNECT_AT | OBD_CONNECT_FULL20   |
+                                  OBD_CONNECT_IMP_RECOV;
+        if (lmd_is_client(lsi->lsi_lmd) &&
+            lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
+                data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
         data->ocd_version = LUSTRE_VERSION_CODE;
         rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
-        OBD_FREE_PTR(data);
         if (rc) {
                 CERROR("connect failed %d\n", rc);
                 GOTO(out, rc);
@@ -807,6 +837,8 @@ out:
 out_free:
         cfs_mutex_up(&mgc_start_lock);
 
+        if (data)
+                OBD_FREE_PTR(data);
         if (mgcname)
                 OBD_FREE(mgcname, len);
         if (niduuid)
@@ -1034,6 +1066,7 @@ int server_register_target(struct super_block *sb)
         struct obd_device *mgc = lsi->lsi_mgc;
         struct lustre_disk_data *ldd = lsi->lsi_ldd;
         struct mgs_target_info *mti = NULL;
+        bool writeconf;
         int rc;
         ENTRY;
 
@@ -1054,16 +1087,35 @@ int server_register_target(struct super_block *sb)
                libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
                mti->mti_flags);
 
+        /* if write_conf is true, the registration must succeed */
+        writeconf = !!(ldd->ldd_flags & (LDD_F_NEED_INDEX | LDD_F_UPDATE));
+        mti->mti_flags |= LDD_F_OPC_REG;
+
         /* Register the target */
         /* FIXME use mgc_process_config instead */
         rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
                                 sizeof(KEY_REGISTER_TARGET), KEY_REGISTER_TARGET,
                                 sizeof(*mti), mti, NULL);
-        if (rc)
+        if (rc) {
+                if (mti->mti_flags & LDD_F_ERROR) {
+                        LCONSOLE_ERROR_MSG(0x160,
+                                "The MGS is refusing to allow this "
+                                "server (%s) to start. Please see messages"
+                                " on the MGS node.\n", ldd->ldd_svname);
+                } else if (writeconf) {
+                        LCONSOLE_ERROR_MSG(0x15f,
+                                "Communication to the MGS return error %d. "
+                                "Is the MGS running?\n", rc);
+                } else {
+                        CERROR("Cannot talk to the MGS: %d, not fatal\n", rc);
+                        /* reset the error code for non-fatal error. */
+                        rc = 0;
+                }
                 GOTO(out, rc);
+        }
 
         /* Always update our flags */
-        ldd->ldd_flags = mti->mti_flags & ~LDD_F_REWRITE_LDD;
+        ldd->ldd_flags = mti->mti_flags & LDD_F_ONDISK_MASK;
 
         /* If this flag is set, it means the MGS wants us to change our
            on-disk data. (So far this means just the index.) */
@@ -1096,6 +1148,51 @@ out:
         RETURN(rc);
 }
 
+/**
+ * Notify the MGS that this target is ready.
+ * Used by IR - if the MGS receives this message, it will notify clients.
+ */
+static int server_notify_target(struct super_block *sb, struct obd_device *obd)
+{
+        struct lustre_sb_info *lsi = s2lsi(sb);
+        struct obd_device *mgc = lsi->lsi_mgc;
+        struct mgs_target_info *mti = NULL;
+        int rc;
+        ENTRY;
+
+        LASSERT(mgc);
+
+        if (!(lsi->lsi_flags & LSI_SERVER))
+                RETURN(-EINVAL);
+
+        OBD_ALLOC_PTR(mti);
+        if (!mti)
+                RETURN(-ENOMEM);
+        rc = server_sb2mti(sb, mti);
+        if (rc)
+                GOTO(out, rc);
+
+        mti->mti_instance = obd->u.obt.obt_instance;
+        mti->mti_flags |= LDD_F_OPC_READY;
+
+        /* FIXME use mgc_process_config instead */
+        rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
+                                sizeof(KEY_REGISTER_TARGET),
+                                KEY_REGISTER_TARGET,
+                                sizeof(*mti), mti, NULL);
+
+        /* Imperative recovery: if the mgs informs us to use IR? */
+        if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
+            (mti->mti_flags & LDD_F_IR_CAPABLE))
+                lsi->lsi_flags |= LSI_IR_CAPABLE;
+
+out:
+        if (mti)
+                OBD_FREE_PTR(mti);
+        RETURN(rc);
+
+}
+
 /** Start server targets: MDTs and OSTs
  */
 static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
@@ -1131,7 +1228,7 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
 #endif
 
         /* If we're an OST, make sure the global OSS is running */
-        if (lsi->lsi_ldd->ldd_flags & LDD_F_SV_TYPE_OST) {
+        if (IS_OST(lsi->lsi_ldd)) {
                 /* make sure OSS is started */
                 cfs_mutex_down(&server_start_lock);
                 obd = class_name2obd(LUSTRE_OSS_OBDNAME);
@@ -1157,26 +1254,8 @@ static int server_start_targets(struct super_block *sb, struct vfsmount *mnt)
 
         /* Register with MGS */
         rc = server_register_target(sb);
-        if (rc && (lsi->lsi_ldd->ldd_flags &
-                   (LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_UPGRADE14))){
-                CERROR("Required registration failed for %s: %d\n",
-                       lsi->lsi_ldd->ldd_svname, rc);
-                if (rc == -EIO) {
-                        LCONSOLE_ERROR_MSG(0x15f, "Communication error with "
-                                           "the MGS.  Is the MGS running?\n");
-                }
-                GOTO(out_mgc, rc);
-        }
-        if (rc == -EINVAL) {
-                LCONSOLE_ERROR_MSG(0x160, "The MGS is refusing to allow this "
-                                   "server (%s) to start. Please see messages"
-                                   " on the MGS node.\n",
-                                   lsi->lsi_ldd->ldd_svname);
-                GOTO(out_mgc, rc);
-        }
-        /* non-fatal error of registeration with MGS */
         if (rc)
-                CDEBUG(D_MOUNT, "Cannot register with MGS: %d\n", rc);
+                GOTO(out_mgc, rc);
 
         /* Let the target look up the mount using the target's name
            (we can't pass the sb or mnt through class_process_config.) */
@@ -1214,8 +1293,13 @@ out_mgc:
                                       obd->obd_self_export, 0, NULL, NULL);
                 }
 
+                server_notify_target(sb, obd);
+
                 /* log has been fully processed */
                 obd_notify(obd, NULL, OBD_NOTIFY_CONFIG, (void *)CONFIG_LOG);
+
+                /* calculate recovery timeout, do it after lustre_process_log */
+                server_calc_timeout(lsi, obd);
         }
 
         RETURN(rc);
@@ -1778,6 +1862,66 @@ int server_name2index(char *svname, __u32 *idx, char **endptr)
         return rc;
 }
 
+/*
+ * Calculate timeout value for a target.
+ */
+void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd)
+{
+        struct lustre_mount_data *lmd;
+        int soft = 0;
+        int hard = 0;
+        int factor = 0;
+        bool has_ir = !!(lsi->lsi_flags & LSI_IR_CAPABLE);
+        int min = OBD_RECOVERY_TIME_MIN;
+
+        LASSERT(lsi->lsi_flags & LSI_SERVER);
+
+        lmd = lsi->lsi_lmd;
+        if (lmd) {
+                soft   = lmd->lmd_recovery_time_soft;
+                hard   = lmd->lmd_recovery_time_hard;
+                has_ir = has_ir && !(lmd->lmd_flags & LMD_FLG_NOIR);
+                obd->obd_no_ir = !has_ir;
+        }
+
+        if (soft == 0)
+                soft = OBD_RECOVERY_TIME_SOFT;
+        if (hard == 0)
+                hard = OBD_RECOVERY_TIME_HARD;
+
+        /* target may have ir_factor configured. */
+        factor = OBD_IR_FACTOR_DEFAULT;
+        if (obd->obd_recovery_ir_factor)
+                factor = obd->obd_recovery_ir_factor;
+
+        if (has_ir) {
+                int new_soft = soft;
+                int new_hard = hard;
+
+                /* adjust timeout value by imperative recovery */
+
+                new_soft = (soft * factor) / OBD_IR_FACTOR_MAX;
+                new_hard = (hard * factor) / OBD_IR_FACTOR_MAX;
+
+                /* make sure the timeout is not too short */
+                new_soft = max(min, new_soft);
+                new_hard = max(new_soft, new_hard);
+
+                LCONSOLE_INFO("%s: Imperative Recovery enabled, recovery "
+                              "window shrunk from %d-%d down to %d-%d\n",
+                              obd->obd_name, soft, hard, new_soft, new_hard);
+
+                soft = new_soft;
+                hard = new_hard;
+        }
+
+        /* we're done */
+        obd->obd_recovery_timeout   = soft;
+        obd->obd_recovery_time_hard = hard;
+        obd->obd_recovery_ir_factor = factor;
+}
+EXPORT_SYMBOL(server_calc_timeout);
+
 /*************** mount common betweeen server and client ***************/
 
 /* Common umount */
@@ -1970,8 +2114,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
         s1 = options;
         while (*s1) {
                 int clear = 0;
-                int time_min = 2 * (CONNECTION_SWITCH_MAX +
-                               2 * INITIAL_CONNECT_TIMEOUT);
+                int time_min = OBD_RECOVERY_TIME_MIN;
 
                 /* Skip whitespace and extra commas */
                 while (*s1 == ' ' || *s1 == ',')
@@ -1993,6 +2136,9 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
                         lmd->lmd_recovery_time_hard = max_t(int,
                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
                         clear++;
+                } else if (strncmp(s1, "noir", 4) == 0) {
+                        lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
+                        clear++;
                 } else if (strncmp(s1, "nosvc", 5) == 0) {
                         lmd->lmd_flags |= LMD_FLG_NOSVC;
                         clear++;
@@ -2047,7 +2193,7 @@ static int lmd_parse(char *options, struct lustre_mount_data *lmd)
         s1 = strstr(devname, ":/");
         if (s1) {
                 ++s1;
-                lmd->lmd_flags = LMD_FLG_CLIENT;
+                lmd->lmd_flags |= LMD_FLG_CLIENT;
                 /* Remove leading /s from fsname */
                 while (*++s1 == '/') ;
                 /* Freed in lustre_free_lsi */
index 54907db..aa8a8f6 100644 (file)
@@ -904,6 +904,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
         obd->obd_last_committed = le64_to_cpu(lsd->lsd_last_transno);
 out:
         obd->u.obt.obt_mount_count = mount_count + 1;
+        obd->u.obt.obt_instance = (__u32)obd->u.obt.obt_mount_count;
         lsd->lsd_mount_count = cpu_to_le64(obd->u.obt.obt_mount_count);
 
         /* save it, so mount count and last_transno is current */
@@ -1982,14 +1983,6 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                 struct lustre_sb_info *lsi = s2lsi(lmi->lmi_sb);
                 mnt = lmi->lmi_mnt;
                 obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
-
-                /* gets recovery timeouts from mount data */
-                if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_soft)
-                        obd->obd_recovery_timeout =
-                                lsi->lsi_lmd->lmd_recovery_time_soft;
-                if (lsi->lsi_lmd && lsi->lsi_lmd->lmd_recovery_time_hard)
-                        obd->obd_recovery_time_hard =
-                                lsi->lsi_lmd->lmd_recovery_time_hard;
         } else {
                 /* old path - used by lctl */
                 CERROR("Using old MDS mount method\n");
@@ -2029,9 +2022,9 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                 }
         }
 
+        obd->u.obt.obt_magic = OBT_MAGIC;
         obd->u.obt.obt_vfsmnt = mnt;
         obd->u.obt.obt_sb = mnt->mnt_sb;
-        obd->u.obt.obt_magic = OBT_MAGIC;
         filter->fo_fstype = mnt->mnt_sb->s_type->name;
         CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt);
 
index b13fa91..e8750a4 100644 (file)
@@ -477,6 +477,9 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = {
                           lprocfs_filter_wr_syncjournal, 0 },
         { "sync_on_lock_cancel", lprocfs_filter_rd_sync_lock_cancel,
                                  lprocfs_filter_wr_sync_lock_cancel, 0 },
+        { "instance",     lprocfs_target_rd_instance, 0 },
+        { "ir_factor",    lprocfs_obd_rd_ir_factor,
+                          lprocfs_obd_wr_ir_factor, 0},
         { 0 }
 };
 
index f59ab26..7da9621 100644 (file)
@@ -608,7 +608,7 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = {
                                 osc_wr_contention_seconds, 0 },
         { "lockless_truncate",  osc_rd_lockless_truncate,
                                 osc_wr_lockless_truncate, 0 },
-        { "import",          lprocfs_rd_import,        0, 0 },
+        { "import",          lprocfs_rd_import,        lprocfs_wr_import, 0 },
         { "state",           lprocfs_rd_state,         0, 0 },
         { 0 }
 };
index b0a5292..0715ec6 100644 (file)
@@ -948,6 +948,8 @@ finish:
                 }
 
                 imp->imp_connect_data = *ocd;
+                CDEBUG(D_HA, "obd %s to target with inst %u\n",
+                       imp->imp_obd->obd_name, ocd->ocd_instance);
 
                 exp = class_conn2export(&imp->imp_dlm_handle);
                 cfs_spin_unlock(&imp->imp_lock);
index 2d7eeea..f554e3f 100644 (file)
@@ -97,6 +97,16 @@ static const struct req_msg_field *mgs_set_info[] = {
         &RMF_MGS_SEND_PARAM
 };
 
+static const struct req_msg_field *mgs_config_read_client[] = {
+        &RMF_PTLRPC_BODY,
+        &RMF_MGS_CONFIG_BODY
+};
+
+static const struct req_msg_field *mgs_config_read_server[] = {
+        &RMF_PTLRPC_BODY,
+        &RMF_MGS_CONFIG_RES
+};
+
 static const struct req_msg_field *log_cancel_client[] = {
         &RMF_PTLRPC_BODY,
         &RMF_LOGCOOKIES
@@ -555,6 +565,7 @@ static struct req_format *req_formats[] = {
         &RQF_SEC_CTX,
         &RQF_MGS_TARGET_REG,
         &RQF_MGS_SET_INFO,
+        &RQF_MGS_CONFIG_READ,
         &RQF_SEQ_QUERY,
         &RQF_FLD_QUERY,
         &RQF_MDS_CONNECT,
@@ -690,6 +701,23 @@ struct req_msg_field RMF_MGS_SEND_PARAM =
                     NULL, NULL);
 EXPORT_SYMBOL(RMF_MGS_SEND_PARAM);
 
+struct req_msg_field RMF_MGS_CONFIG_BODY =
+        DEFINE_MSGF("mgs_config_read request", 0,
+                    sizeof(struct mgs_config_body),
+                    lustre_swab_mgs_config_body, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_BODY);
+
+struct req_msg_field RMF_MGS_CONFIG_RES =
+        DEFINE_MSGF("mgs_config_read reply ", 0,
+                    sizeof(struct mgs_config_res),
+                    lustre_swab_mgs_config_res, NULL);
+EXPORT_SYMBOL(RMF_MGS_CONFIG_RES);
+
+struct req_msg_field RMF_U32 =
+        DEFINE_MSGF("generic u32", 0,
+                    sizeof(__u32), lustre_swab_generic_32s, NULL);
+EXPORT_SYMBOL(RMF_U32);
+
 struct req_msg_field RMF_SETINFO_VAL =
         DEFINE_MSGF("setinfo_val", 0, -1, NULL, NULL);
 EXPORT_SYMBOL(RMF_SETINFO_VAL);
@@ -982,6 +1010,11 @@ struct req_format RQF_MGS_SET_INFO =
                          mgs_set_info);
 EXPORT_SYMBOL(RQF_MGS_SET_INFO);
 
+struct req_format RQF_MGS_CONFIG_READ =
+        DEFINE_REQ_FMT0("MGS_CONFIG_READ", mgs_config_read_client,
+                         mgs_config_read_server);
+EXPORT_SYMBOL(RQF_MGS_CONFIG_READ);
+
 struct req_format RQF_SEQ_QUERY =
         DEFINE_REQ_FMT0("SEQ_QUERY", seq_query_client, seq_query_server);
 EXPORT_SYMBOL(RQF_SEQ_QUERY);
index b42c41b..94ec060 100644 (file)
@@ -107,6 +107,7 @@ struct ll_rpc_opcode {
         { MGS_TARGET_REG,   "mgs_target_reg" },
         { MGS_TARGET_DEL,   "mgs_target_del" },
         { MGS_SET_INFO,     "mgs_set_info" },
+        { MGS_CONFIG_READ,  "mgs_config_read" },
         { OBD_PING,         "obd_ping" },
         { OBD_LOG_CANCEL,   "llog_origin_handle_cancel" },
         { OBD_QC_CALLBACK,  "obd_quota_callback" },
@@ -799,4 +800,70 @@ int lprocfs_wr_ping(struct file *file, const char *buffer,
 }
 EXPORT_SYMBOL(lprocfs_wr_ping);
 
+/* Write the connection UUID to this file to attempt to connect to that node.
+ * The connection UUID is a node's primary NID. For example,
+ * "echo connection=192.168.0.1@tcp0::instance > .../import".
+ */
+int lprocfs_wr_import(struct file *file, const char *buffer,
+                      unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        struct obd_import *imp = obd->u.cli.cl_import;
+        char *kbuf = NULL;
+        char *uuid;
+        char *ptr;
+        int do_reconn = 1;
+        const char prefix[] = "connection=";
+        const int prefix_len = sizeof(prefix) - 1;
+
+        if (count > CFS_PAGE_SIZE - 1 || count <= prefix_len)
+                return -EINVAL;
+
+        OBD_ALLOC(kbuf, count + 1);
+        if (kbuf == NULL)
+                return -ENOMEM;
+
+        if (cfs_copy_from_user(kbuf, buffer, count))
+                GOTO(out, count = -EFAULT);
+
+        kbuf[count] = 0;
+
+        /* only support connection=uuid::instance now */
+        if (strncmp(prefix, kbuf, prefix_len) != 0)
+                GOTO(out, count = -EINVAL);
+
+        uuid = kbuf + prefix_len;
+        ptr = strstr(uuid, "::");
+        if (ptr) {
+                __u32 inst;
+                char *endptr;
+
+                *ptr = 0;
+                do_reconn = 0;
+                ptr += strlen("::");
+                inst = simple_strtol(ptr, &endptr, 10);
+                if (*endptr) {
+                        CERROR("config: wrong instance # %s\n", ptr);
+                } else if (inst != imp->imp_connect_data.ocd_instance) {
+                        CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted "
+                               "target(%u/%u), reconnecting...\n",
+                               imp->imp_obd->obd_name,
+                               imp->imp_connect_data.ocd_instance, inst);
+                        do_reconn = 1;
+                } else {
+                        CDEBUG(D_INFO, "IR: %s has already been connecting to "
+                               "new target(%u)\n",
+                               imp->imp_obd->obd_name, inst);
+                }
+        }
+
+        if (do_reconn)
+                ptlrpc_recover_import(imp, uuid, 1);
+
+out:
+        OBD_FREE(kbuf, count + 1);
+        return count;
+}
+EXPORT_SYMBOL(lprocfs_wr_import);
+
 #endif /* LPROCFS */
index 7b59b72..1429774 100644 (file)
@@ -1573,13 +1573,13 @@ void lustre_swab_connect(struct obd_connect_data *ocd)
         __swab64s(&ocd->ocd_transno);
         __swab32s(&ocd->ocd_group);
         __swab32s(&ocd->ocd_cksum_types);
+        __swab32s(&ocd->ocd_instance);
         /* Fields after ocd_cksum_types are only accessible by the receiver
          * if the corresponding flag in ocd_connect_flags is set. Accessing
          * any field after ocd_maxbytes on the receiver without a valid flag
          * may result in out-of-bound memory access and kernel oops. */
         if (ocd->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)
                 __swab32s(&ocd->ocd_max_easize);
-        CLASSERT(offsetof(typeof(*ocd), padding) != 0);
         if (ocd->ocd_connect_flags & OBD_CONNECT_MAXBYTES)
                 __swab64s(&ocd->ocd_maxbytes);
         CLASSERT(offsetof(typeof(*ocd), padding1) != 0);
@@ -1781,12 +1781,51 @@ void lustre_swab_mgs_target_info(struct mgs_target_info *mti)
         __swab32s(&mti->mti_stripe_index);
         __swab32s(&mti->mti_config_ver);
         __swab32s(&mti->mti_flags);
+        __swab32s(&mti->mti_instance);
         __swab32s(&mti->mti_nid_count);
         CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
         for (i = 0; i < MTI_NIDS_MAX; i++)
                 __swab64s(&mti->mti_nids[i]);
 }
 
+void lustre_swab_mgs_nidtbl_entry(struct mgs_nidtbl_entry *entry)
+{
+        int i;
+
+        __swab64s(&entry->mne_version);
+        __swab32s(&entry->mne_instance);
+        __swab32s(&entry->mne_index);
+        __swab32s(&entry->mne_length);
+
+        /* mne_nid_(count|type) must be one byte size because we're gonna
+         * access it w/o swapping. */
+        CLASSERT(sizeof(entry->mne_nid_count) == sizeof(__u8));
+        CLASSERT(sizeof(entry->mne_nid_type) == sizeof(__u8));
+
+        /* remove this assertion if ipv6 is supported. */
+        LASSERT(entry->mne_nid_type == 0);
+        for (i = 0; i < entry->mne_nid_count; i++) {
+                CLASSERT(sizeof(lnet_nid_t) == sizeof(__u64));
+                __swab64s(&entry->u.nids[i]);
+        }
+}
+EXPORT_SYMBOL(lustre_swab_mgs_nidtbl_entry);
+
+void lustre_swab_mgs_config_body(struct mgs_config_body *body)
+{
+        __swab64s(&body->mcb_offset);
+        __swab32s(&body->mcb_units);
+        __swab16s(&body->mcb_type);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_body);
+
+void lustre_swab_mgs_config_res(struct mgs_config_res *body)
+{
+        __swab64s(&body->mcr_offset);
+        __swab64s(&body->mcr_size);
+}
+EXPORT_SYMBOL(lustre_swab_mgs_config_res);
+
 static void lustre_swab_obd_dqinfo (struct obd_dqinfo *i)
 {
         __swab64s (&i->dqi_bgrace);
index 0fbd562..62af35c 100644 (file)
@@ -822,6 +822,7 @@ void sptlrpc_req_set_flavor(struct ptlrpc_request *req, int opcode)
         switch (opcode) {
         case OST_READ:
         case MDS_READPAGE:
+        case MGS_CONFIG_READ:
                 req->rq_bulk_read = 1;
                 break;
         case OST_WRITE:
index bac757e..2c5d417 100644 (file)
@@ -1519,6 +1519,7 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service *svc)
                 break;
         case MDS_READPAGE:
         case OST_READ:
+        case MGS_CONFIG_READ:
                 req->rq_bulk_read = 1;
                 break;
         }
index 20134f0..458f1d8 100644 (file)
@@ -68,8 +68,8 @@ void lustre_assert_wire_constants(void)
 {
         /* Wire protocol assertions generated by 'wirecheck'
          * (make -C lustre/utils newwiretest)
-         * running on Linux centos5.localhost 2.6.18-prep #3 SMP Mon Mar 22 08:28:01 EDT 2010 x86_64 
-         * with gcc version 4.1.2 20071124 (Red Hat 4.1.2-42) */
+         * running on Linux venus 2.6.32-131.6.1.el6_lustre.gad4c1d5.x86_64 #1 SMP Thu Jul 28 23:13:5
+         * with gcc version 4.4.4 20100726 (Red Hat 4.4.4-13) (GCC)  */
 
 
         /* Constants... */
@@ -285,6 +285,12 @@ void lustre_assert_wire_constants(void)
                  (long long)MGS_TARGET_DEL);
         LASSERTF(MGS_SET_INFO == 255, " found %lld\n",
                  (long long)MGS_SET_INFO);
+        LASSERTF(LDF_EMPTY == 1, " found %lld\n",
+                 (long long)LDF_EMPTY);
+        LASSERTF(LDF_COLLIDE == 2, " found %lld\n",
+                 (long long)LDF_COLLIDE);
+        LASSERTF(LU_PAGE_SIZE == 4096, " found %lld\n",
+                 (long long)LU_PAGE_SIZE);
         /* Sizes and Offsets */
 
         /* Checks for struct obd_uuid */
@@ -463,10 +469,10 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct obd_connect_data, ocd_max_easize));
         LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize));
-        LASSERTF((int)offsetof(struct obd_connect_data, padding) == 60, " found %lld\n",
-                 (long long)(int)offsetof(struct obd_connect_data, padding));
-        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_instance));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance));
         LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, " found %lld\n",
                  (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
         LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, " found %lld\n",
@@ -479,46 +485,47 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct obd_connect_data, padding2));
         LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
-        CLASSERT(OBD_CONNECT_RDONLY ==                    0x1ULL);
-        CLASSERT(OBD_CONNECT_INDEX ==                     0x2ULL);
-        CLASSERT(OBD_CONNECT_MDS ==                       0x4ULL);
-        CLASSERT(OBD_CONNECT_GRANT ==                     0x8ULL);
-        CLASSERT(OBD_CONNECT_SRVLOCK ==                  0x10ULL);
-        CLASSERT(OBD_CONNECT_VERSION ==                  0x20ULL);
-        CLASSERT(OBD_CONNECT_REQPORTAL ==                0x40ULL);
-        CLASSERT(OBD_CONNECT_ACL ==                      0x80ULL);
-        CLASSERT(OBD_CONNECT_XATTR ==                   0x100ULL);
-        CLASSERT(OBD_CONNECT_CROW ==                    0x200ULL);
-        CLASSERT(OBD_CONNECT_TRUNCLOCK ==               0x400ULL);
-        CLASSERT(OBD_CONNECT_TRANSNO ==                 0x800ULL);
-        CLASSERT(OBD_CONNECT_IBITS ==                  0x1000ULL);
-        CLASSERT(OBD_CONNECT_JOIN ==                   0x2000ULL);
-        CLASSERT(OBD_CONNECT_ATTRFID ==                0x4000ULL);
-        CLASSERT(OBD_CONNECT_NODEVOH ==                0x8000ULL);
-        CLASSERT(OBD_CONNECT_RMT_CLIENT ==            0x10000ULL);
-        CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE ==      0x20000ULL);
-        CLASSERT(OBD_CONNECT_BRW_SIZE ==              0x40000ULL);
-        CLASSERT(OBD_CONNECT_QUOTA64 ==               0x80000ULL);
-        CLASSERT(OBD_CONNECT_MDS_CAPA ==             0x100000ULL);
-        CLASSERT(OBD_CONNECT_OSS_CAPA ==             0x200000ULL);
-        CLASSERT(OBD_CONNECT_CANCELSET ==            0x400000ULL);
-        CLASSERT(OBD_CONNECT_SOM ==                  0x800000ULL);
-        CLASSERT(OBD_CONNECT_AT ==                  0x1000000ULL);
-        CLASSERT(OBD_CONNECT_LRU_RESIZE ==          0x2000000ULL);
-        CLASSERT(OBD_CONNECT_MDS_MDS ==             0x4000000ULL);
-        CLASSERT(OBD_CONNECT_REAL ==                0x8000000ULL);
-        CLASSERT(OBD_CONNECT_CHANGE_QS ==          0x10000000ULL);
-        CLASSERT(OBD_CONNECT_CKSUM ==              0x20000000ULL);
-        CLASSERT(OBD_CONNECT_FID ==                0x40000000ULL);
-        CLASSERT(OBD_CONNECT_VBR ==                0x80000000ULL);
-        CLASSERT(OBD_CONNECT_LOV_V3 ==            0x100000000ULL);
-        CLASSERT(OBD_CONNECT_GRANT_SHRINK ==      0x200000000ULL);
-        CLASSERT(OBD_CONNECT_SKIP_ORPHAN ==       0x400000000ULL);
-        CLASSERT(OBD_CONNECT_MAX_EASIZE ==        0x800000000ULL);
-        CLASSERT(OBD_CONNECT_FULL20 ==           0x1000000000ULL);
-        CLASSERT(OBD_CONNECT_LAYOUTLOCK ==       0x2000000000ULL);
-        CLASSERT(OBD_CONNECT_64BITHASH ==        0x4000000000ULL);
-        CLASSERT(OBD_CONNECT_MAXBYTES ==         0x8000000000ULL);
+        CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
+        CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
+        CLASSERT(OBD_CONNECT_MDS == 0x4ULL);
+        CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
+        CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
+        CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
+        CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
+        CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
+        CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
+        CLASSERT(OBD_CONNECT_CROW == 0x200ULL);
+        CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
+        CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
+        CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
+        CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
+        CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
+        CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL);
+        CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL);
+        CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL);
+        CLASSERT(OBD_CONNECT_MDS_CAPA == 0x100000ULL);
+        CLASSERT(OBD_CONNECT_OSS_CAPA == 0x200000ULL);
+        CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL);
+        CLASSERT(OBD_CONNECT_SOM == 0x800000ULL);
+        CLASSERT(OBD_CONNECT_AT == 0x1000000ULL);
+        CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL);
+        CLASSERT(OBD_CONNECT_MDS_MDS == 0x4000000ULL);
+        CLASSERT(OBD_CONNECT_REAL == 0x8000000ULL);
+        CLASSERT(OBD_CONNECT_CHANGE_QS == 0x10000000ULL);
+        CLASSERT(OBD_CONNECT_CKSUM == 0x20000000ULL);
+        CLASSERT(OBD_CONNECT_FID == 0x40000000ULL);
+        CLASSERT(OBD_CONNECT_VBR == 0x80000000ULL);
+        CLASSERT(OBD_CONNECT_LOV_V3 == 0x100000000ULL);
+        CLASSERT(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL);
+        CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL);
+        CLASSERT(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL);
+        CLASSERT(OBD_CONNECT_FULL20 == 0x1000000000ULL);
+        CLASSERT(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL);
+        CLASSERT(OBD_CONNECT_64BITHASH == 0x4000000000ULL);
+        CLASSERT(OBD_CONNECT_MAXBYTES == 0x8000000000ULL);
+        CLASSERT(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL);
 
         /* Checks for struct obdo */
         LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n",
@@ -688,8 +695,8 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_FL_CKSUM_ADLER == 8192);
         CLASSERT(OBD_FL_CKSUM_CRC32C == 16384);
         CLASSERT(OBD_FL_SHRINK_GRANT == 131072);
-        CLASSERT(OBD_FL_MMAP == (0x00040000));
-        CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000));
+        CLASSERT(OBD_FL_MMAP == 262144);
+        CLASSERT(OBD_FL_RECOV_RESEND == 524288);
         CLASSERT(OBD_CKSUM_CRC32 == 1);
         CLASSERT(OBD_CKSUM_ADLER == 2);
         CLASSERT(OBD_CKSUM_CRC32C == 4);
@@ -2722,4 +2729,61 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct link_ea_entry, lee_name));
         LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
                  (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
+
+        /* Checks for struct hsm_user_item */
+        LASSERTF((int)sizeof(struct hsm_user_item) == 32, " found %lld\n",
+                 (long long)(int)sizeof(struct hsm_user_item));
+        LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_item, hui_fid));
+        LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid));
+        LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_item, hui_extent));
+        LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent));
+
+        /* Checks for struct hsm_user_request */
+        LASSERTF((int)sizeof(struct hsm_user_request) == 16, " found %lld\n",
+                 (long long)(int)sizeof(struct hsm_user_request));
+        LASSERTF((int)offsetof(struct hsm_user_request, hur_action) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_request, hur_action));
+        LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_action) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_action));
+        LASSERTF((int)offsetof(struct hsm_user_request, hur_archive_num) == 4, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_request, hur_archive_num));
+        LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_archive_num) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_archive_num));
+        LASSERTF((int)offsetof(struct hsm_user_request, hur_itemcount) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_request, hur_itemcount));
+        LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_itemcount) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_itemcount));
+        LASSERTF((int)offsetof(struct hsm_user_request, hur_data_len) == 12, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_request, hur_data_len));
+        LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_data_len) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_data_len));
+
+        /* Checks for struct hsm_user_state */
+        LASSERTF((int)sizeof(struct hsm_user_state) == 32, " found %lld\n",
+                 (long long)(int)sizeof(struct hsm_user_state));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_states));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_num) == 4, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_archive_num));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_num) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_num));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location));
 }
+
index 43ccac7..ec16547 100644 (file)
@@ -77,6 +77,8 @@
 #define lustre_swab_fiemap NULL
 #define lustre_swab_qdata NULL
 #define lustre_swab_ost_lvb NULL
+#define lustre_swab_mgs_config_body NULL
+#define lustre_swab_mgs_config_res NULL
 #define dump_rniobuf NULL
 #define dump_ioo NULL
 #define dump_obdo NULL
index 5ea06e7..353a18c 100644 (file)
@@ -194,7 +194,7 @@ static void check_obd_connect_data(void)
         CHECK_MEMBER(obd_connect_data, ocd_group);
         CHECK_MEMBER(obd_connect_data, ocd_cksum_types);
         CHECK_MEMBER(obd_connect_data, ocd_max_easize);
-        CHECK_MEMBER(obd_connect_data, padding);
+        CHECK_MEMBER(obd_connect_data, ocd_instance);
         CHECK_MEMBER(obd_connect_data, ocd_maxbytes);
         CHECK_MEMBER(obd_connect_data, padding1);
         CHECK_MEMBER(obd_connect_data, padding2);
@@ -239,6 +239,7 @@ static void check_obd_connect_data(void)
         CHECK_CDEFINE(OBD_CONNECT_LAYOUTLOCK);
         CHECK_CDEFINE(OBD_CONNECT_64BITHASH);
         CHECK_CDEFINE(OBD_CONNECT_MAXBYTES);
+        CHECK_CDEFINE(OBD_CONNECT_IMP_RECOV);
 }
 
 static void
index 79fca5c..1f553cf 100644 (file)
@@ -65,8 +65,8 @@ void lustre_assert_wire_constants(void)
 {
         /* Wire protocol assertions generated by 'wirecheck'
          * (make -C lustre/utils newwiretest)
-         * running on Linux centos5.localhost 2.6.18-prep #3 SMP Mon Mar 22 08:28:01 EDT 2010 x86_64 
-         * with gcc version 4.1.2 20071124 (Red Hat 4.1.2-42) */
+         * running on Linux venus 2.6.32-131.6.1.el6_lustre.gad4c1d5.x86_64 #1 SMP Thu Jul 28 23:13:5
+         * with gcc version 4.4.4 20100726 (Red Hat 4.4.4-13) (GCC)  */
 
 
         /* Constants... */
@@ -282,6 +282,12 @@ void lustre_assert_wire_constants(void)
                  (long long)MGS_TARGET_DEL);
         LASSERTF(MGS_SET_INFO == 255, " found %lld\n",
                  (long long)MGS_SET_INFO);
+        LASSERTF(LDF_EMPTY == 1, " found %lld\n",
+                 (long long)LDF_EMPTY);
+        LASSERTF(LDF_COLLIDE == 2, " found %lld\n",
+                 (long long)LDF_COLLIDE);
+        LASSERTF(LU_PAGE_SIZE == 4096, " found %lld\n",
+                 (long long)LU_PAGE_SIZE);
         /* Sizes and Offsets */
 
         /* Checks for struct obd_uuid */
@@ -460,10 +466,10 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct obd_connect_data, ocd_max_easize));
         LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_max_easize));
-        LASSERTF((int)offsetof(struct obd_connect_data, padding) == 60, " found %lld\n",
-                 (long long)(int)offsetof(struct obd_connect_data, padding));
-        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct obd_connect_data *)0)->padding));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_instance) == 60, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_instance));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_instance) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_instance));
         LASSERTF((int)offsetof(struct obd_connect_data, ocd_maxbytes) == 64, " found %lld\n",
                  (long long)(int)offsetof(struct obd_connect_data, ocd_maxbytes));
         LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_maxbytes) == 8, " found %lld\n",
@@ -476,46 +482,47 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct obd_connect_data, padding2));
         LASSERTF((int)sizeof(((struct obd_connect_data *)0)->padding2) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct obd_connect_data *)0)->padding2));
-        CLASSERT(OBD_CONNECT_RDONLY ==                    0x1ULL);
-        CLASSERT(OBD_CONNECT_INDEX ==                     0x2ULL);
-        CLASSERT(OBD_CONNECT_MDS ==                       0x4ULL);
-        CLASSERT(OBD_CONNECT_GRANT ==                     0x8ULL);
-        CLASSERT(OBD_CONNECT_SRVLOCK ==                  0x10ULL);
-        CLASSERT(OBD_CONNECT_VERSION ==                  0x20ULL);
-        CLASSERT(OBD_CONNECT_REQPORTAL ==                0x40ULL);
-        CLASSERT(OBD_CONNECT_ACL ==                      0x80ULL);
-        CLASSERT(OBD_CONNECT_XATTR ==                   0x100ULL);
-        CLASSERT(OBD_CONNECT_CROW ==                    0x200ULL);
-        CLASSERT(OBD_CONNECT_TRUNCLOCK ==               0x400ULL);
-        CLASSERT(OBD_CONNECT_TRANSNO ==                 0x800ULL);
-        CLASSERT(OBD_CONNECT_IBITS ==                  0x1000ULL);
-        CLASSERT(OBD_CONNECT_JOIN ==                   0x2000ULL);
-        CLASSERT(OBD_CONNECT_ATTRFID ==                0x4000ULL);
-        CLASSERT(OBD_CONNECT_NODEVOH ==                0x8000ULL);
-        CLASSERT(OBD_CONNECT_RMT_CLIENT ==            0x10000ULL);
-        CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE ==      0x20000ULL);
-        CLASSERT(OBD_CONNECT_BRW_SIZE ==              0x40000ULL);
-        CLASSERT(OBD_CONNECT_QUOTA64 ==               0x80000ULL);
-        CLASSERT(OBD_CONNECT_MDS_CAPA ==             0x100000ULL);
-        CLASSERT(OBD_CONNECT_OSS_CAPA ==             0x200000ULL);
-        CLASSERT(OBD_CONNECT_CANCELSET ==            0x400000ULL);
-        CLASSERT(OBD_CONNECT_SOM ==                  0x800000ULL);
-        CLASSERT(OBD_CONNECT_AT ==                  0x1000000ULL);
-        CLASSERT(OBD_CONNECT_LRU_RESIZE ==          0x2000000ULL);
-        CLASSERT(OBD_CONNECT_MDS_MDS ==             0x4000000ULL);
-        CLASSERT(OBD_CONNECT_REAL ==                0x8000000ULL);
-        CLASSERT(OBD_CONNECT_CHANGE_QS ==          0x10000000ULL);
-        CLASSERT(OBD_CONNECT_CKSUM ==              0x20000000ULL);
-        CLASSERT(OBD_CONNECT_FID ==                0x40000000ULL);
-        CLASSERT(OBD_CONNECT_VBR ==                0x80000000ULL);
-        CLASSERT(OBD_CONNECT_LOV_V3 ==            0x100000000ULL);
-        CLASSERT(OBD_CONNECT_GRANT_SHRINK ==      0x200000000ULL);
-        CLASSERT(OBD_CONNECT_SKIP_ORPHAN ==       0x400000000ULL);
-        CLASSERT(OBD_CONNECT_MAX_EASIZE ==        0x800000000ULL);
-        CLASSERT(OBD_CONNECT_FULL20 ==           0x1000000000ULL);
-        CLASSERT(OBD_CONNECT_LAYOUTLOCK ==       0x2000000000ULL);
-        CLASSERT(OBD_CONNECT_64BITHASH ==        0x4000000000ULL);
-        CLASSERT(OBD_CONNECT_MAXBYTES ==         0x8000000000ULL);
+        CLASSERT(OBD_CONNECT_RDONLY == 0x1ULL);
+        CLASSERT(OBD_CONNECT_INDEX == 0x2ULL);
+        CLASSERT(OBD_CONNECT_MDS == 0x4ULL);
+        CLASSERT(OBD_CONNECT_GRANT == 0x8ULL);
+        CLASSERT(OBD_CONNECT_SRVLOCK == 0x10ULL);
+        CLASSERT(OBD_CONNECT_VERSION == 0x20ULL);
+        CLASSERT(OBD_CONNECT_REQPORTAL == 0x40ULL);
+        CLASSERT(OBD_CONNECT_ACL == 0x80ULL);
+        CLASSERT(OBD_CONNECT_XATTR == 0x100ULL);
+        CLASSERT(OBD_CONNECT_CROW == 0x200ULL);
+        CLASSERT(OBD_CONNECT_TRUNCLOCK == 0x400ULL);
+        CLASSERT(OBD_CONNECT_TRANSNO == 0x800ULL);
+        CLASSERT(OBD_CONNECT_IBITS == 0x1000ULL);
+        CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
+        CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
+        CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT_FORCE == 0x20000ULL);
+        CLASSERT(OBD_CONNECT_BRW_SIZE == 0x40000ULL);
+        CLASSERT(OBD_CONNECT_QUOTA64 == 0x80000ULL);
+        CLASSERT(OBD_CONNECT_MDS_CAPA == 0x100000ULL);
+        CLASSERT(OBD_CONNECT_OSS_CAPA == 0x200000ULL);
+        CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL);
+        CLASSERT(OBD_CONNECT_SOM == 0x800000ULL);
+        CLASSERT(OBD_CONNECT_AT == 0x1000000ULL);
+        CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x2000000ULL);
+        CLASSERT(OBD_CONNECT_MDS_MDS == 0x4000000ULL);
+        CLASSERT(OBD_CONNECT_REAL == 0x8000000ULL);
+        CLASSERT(OBD_CONNECT_CHANGE_QS == 0x10000000ULL);
+        CLASSERT(OBD_CONNECT_CKSUM == 0x20000000ULL);
+        CLASSERT(OBD_CONNECT_FID == 0x40000000ULL);
+        CLASSERT(OBD_CONNECT_VBR == 0x80000000ULL);
+        CLASSERT(OBD_CONNECT_LOV_V3 == 0x100000000ULL);
+        CLASSERT(OBD_CONNECT_GRANT_SHRINK == 0x200000000ULL);
+        CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL);
+        CLASSERT(OBD_CONNECT_MAX_EASIZE == 0x800000000ULL);
+        CLASSERT(OBD_CONNECT_FULL20 == 0x1000000000ULL);
+        CLASSERT(OBD_CONNECT_LAYOUTLOCK == 0x2000000000ULL);
+        CLASSERT(OBD_CONNECT_64BITHASH == 0x4000000000ULL);
+        CLASSERT(OBD_CONNECT_MAXBYTES == 0x8000000000ULL);
+        CLASSERT(OBD_CONNECT_IMP_RECOV == 0x10000000000ULL);
 
         /* Checks for struct obdo */
         LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n",
@@ -685,8 +692,8 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_FL_CKSUM_ADLER == 8192);
         CLASSERT(OBD_FL_CKSUM_CRC32C == 16384);
         CLASSERT(OBD_FL_SHRINK_GRANT == 131072);
-        CLASSERT(OBD_FL_MMAP == (0x00040000));
-        CLASSERT(OBD_FL_RECOV_RESEND == (0x00080000));
+        CLASSERT(OBD_FL_MMAP == 262144);
+        CLASSERT(OBD_FL_RECOV_RESEND == 524288);
         CLASSERT(OBD_CKSUM_CRC32 == 1);
         CLASSERT(OBD_CKSUM_ADLER == 2);
         CLASSERT(OBD_CKSUM_CRC32C == 4);
@@ -2719,4 +2726,61 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct link_ea_entry, lee_name));
         LASSERTF((int)sizeof(((struct link_ea_entry *)0)->lee_name) == 0, " found %lld\n",
                  (long long)(int)sizeof(((struct link_ea_entry *)0)->lee_name));
+
+        /* Checks for struct hsm_user_item */
+        LASSERTF((int)sizeof(struct hsm_user_item) == 32, " found %lld\n",
+                 (long long)(int)sizeof(struct hsm_user_item));
+        LASSERTF((int)offsetof(struct hsm_user_item, hui_fid) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_item, hui_fid));
+        LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_fid) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_fid));
+        LASSERTF((int)offsetof(struct hsm_user_item, hui_extent) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_item, hui_extent));
+        LASSERTF((int)sizeof(((struct hsm_user_item *)0)->hui_extent) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_item *)0)->hui_extent));
+
+        /* Checks for struct hsm_user_request */
+        LASSERTF((int)sizeof(struct hsm_user_request) == 16, " found %lld\n",
+                 (long long)(int)sizeof(struct hsm_user_request));
+        LASSERTF((int)offsetof(struct hsm_user_request, hur_action) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_request, hur_action));
+        LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_action) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_action));
+        LASSERTF((int)offsetof(struct hsm_user_request, hur_archive_num) == 4, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_request, hur_archive_num));
+        LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_archive_num) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_archive_num));
+        LASSERTF((int)offsetof(struct hsm_user_request, hur_itemcount) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_request, hur_itemcount));
+        LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_itemcount) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_itemcount));
+        LASSERTF((int)offsetof(struct hsm_user_request, hur_data_len) == 12, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_request, hur_data_len));
+        LASSERTF((int)sizeof(((struct hsm_user_request *)0)->hur_data_len) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_request *)0)->hur_data_len));
+
+        /* Checks for struct hsm_user_state */
+        LASSERTF((int)sizeof(struct hsm_user_state) == 32, " found %lld\n",
+                 (long long)(int)sizeof(struct hsm_user_state));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_states) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_states));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_states) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_states));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_archive_num) == 4, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_archive_num));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_archive_num) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_archive_num));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_state) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_state));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_state));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_action) == 12, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_action));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_action));
+        LASSERTF((int)offsetof(struct hsm_user_state, hus_in_progress_location) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct hsm_user_state, hus_in_progress_location));
+        LASSERTF((int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location) == 16, " found %lld\n",
+                 (long long)(int)sizeof(((struct hsm_user_state *)0)->hus_in_progress_location));
 }
+